[Bioperl-l] wu-blast and bioperl

Tuan A. Tran tuantran167 at gmail.com
Mon May 23 18:26:34 EDT 2005


Hi,

I tried to run wublast with bioperl. However, I got an error

-------------------- WARNING ---------------------
MSG: cannot find path to wublast
---------------------------------------------------
Can't call method "next_result" on an undefined value at testanno.pl line 225, 
<GEN0> line 1.

Is there any method which is equivalent to "next_result" method? I
really appreciate if anyone can help.

My code lousy code is quoted below. I tested it with blastall from
ncbi. It works fine.
I am using debian sarge and I have wu-blast install in
/usr/local/WU-BLAST2.0. Furthermore, I also installed new bioperl-1.5.

Thanks in advance
TAT




=================
#!/usr/local/lib/perl -w

BEGIN {$ENV{WUBLASTDIR} = '/usr/local/WU-BLAST2.0/';};

use Bio::Seq;
use Bio::SeqIO;
use Bio::SearchIO;
use Bio::AlignIO;
use Bio::SimpleAlign;
use Bio::LocatableSeq;
use Bio::Tools::Run::StandAloneBlast;
use Getopt::Long;
use Bio::DB::GenBank;
use Bio::DB::Flat::BDB;
#use Bio::Index::GenBank;
use Bio::Index::Fasta;
use Bio::SeqFeature::Generic;
use DBI;


#########
# Declare database names
#########

my $db_core_fly = 'fly_core';

############
#
#  Define variables for tables in database drosophila_melanogaster_core_30_3d
#
###########

my $tattrib = 'attrib_type';
my $tseq_region = 'seq_region';
my $tseq_region_attrib = 'seq_region_attrib';
my $texon = 'exon';
my $texon_stable_id = 'exon_stable_id';
my $texon_transcript = 'exon_transcript';
my $tgene = 'gene';
my $tgene_stable_id = 'gene_stable_id';
my $ttranscript = 'transcript';
my $ttranscript_stable_id = 'transcript_stable_id';
my $txref='xref';
my $texternal_db = 'external_db';
my $texternal_synonym = 'external_synonym';

###########
# This subroutine is to tell Perl to communicate with my MySQL database
# Source: got it after googling
###########

sub start_InputDB {
   my $database = $_[0];
   use DBI;
   my $DBD = 'mysql';
   my $host ='localhost';
   my $user = 'tuan';
   my $passwd = '';
   my $inputdb = DBI->connect("DBI:$DBD:$database:$host","$user","",
                                   {RaiseError => 1, AutoCommit => 1});
   return $inputdb;
}

######################

sub feature2string {
   my $f = shift;
   #my $stable_id = $f->stable_id();
   my $seq_region = $f->seq_region_name();
   my $start = $f->start();
   my $end = $f->end();
   my $strand = $f->strand();
   my $seq = $f->seq();
   return ">$seq_region:$start - $end ($strand)\n$seq";
}

sub getGene {
   my $f = shift;
   #my $stable_id = $f->stable_id();
   my $seq_region = $f->seq_region_name();
   my $start = $f->start();
   my $end = $f->end();
   my $strand = $f->strand();
   my $seq = $f->seq();
   return ">$seq_region:$start - $end ($strand)\n$seq";
}

############################################################################
# Change number of organisms according to your problem
# BE SURE TO CHANGE AS EXAMPLE BELOW
############################################################################

my (@organism,$org_idx);

my $max_number_organism = 3;

@organism = ('fly');

my $fly_dna = 'Drosophila_melanogaster.BDGP3.2.1.may.dna_rm.';

my %fly_chromosome = (
                fly => [$fly_dna.$org_chro.'.2h',$fly_dna.$org_chro.'.2L',
$fly_dna.$org_chro.'.2R',$fly_dna.$org_chro.'.3h',$fly_dna.$org_chro.'.3L',
$fly_dna.$org_chro.'.3R',$fly_dna.$org_chro.'.4', $fly_dna.$org_chro.'.4h',
$fly_dna.$org_chro.'.U',$fly_dna.$org_chro.'.X', $fly_dna.$org_chro.'.Xh',
			$fly_dna.$org_chro.'.Yh']);

my %fly_mysqldb = ( fly => ['fly_core']);



############################################################################

my $meta='local';

my $infile = shift;

my $in  = Bio::SeqIO->new ( -file   => $infile,
	 		      -format => 'fasta' );

my $prefix="summary_";
my $sumout = $prefix . $infile;

$prefix="out_";
$allout = $prefix . $infile;

$datastatistics = "stat_" . $infile . ".dat";


my $out = Bio::SeqIO->new ( -file   => '>seq_out.fa',
			      -format => 'fasta' );
my $fout = Bio::SeqIO->new(-fh => \*STDOUT , -format => 'fasta');

#1111111111
while(my $query=$in->next_seq()) 
{ 

   print ALLOUT ">",$query->id," ",$query->desc;
   print ALLOUT "\t",$query->seq,"\n";
   print SUMOUT ">",$query->id," ",$query->desc;
   print SUMOUT "\t",$query->seq,"\n";
#   print ">",$query->id," ",$query->desc;
#   print "\t",$query->seq,"\n";
  
#2222222222
foreach $organism (@organism)
{
	my $dir = '/Crick/' . $organism . '/';


#########################################            
# Go through each mysql database (mostly core and est)
########################################
#333333333333
foreach my $each_db (@{$fly_mysqldb{$organism}})
{
   my $inputdb = &start_InputDB($each_db);

#######################################
# Go through each chromosome of a species
#######################################
#4444444444444		
foreach my $org_chro (@{$fly_chromosome{$organism}})
{
	   	
	# database for blast 
	my($local_blastdb,$db,$fmt);
	$local_blastdb = $dir . 'dna/' . $org_chro . '.fa';

#print "is it here ", $local_blastdb, "\n";

	$db = $dir . 'dna/' . $organism . '.index';
	$fmt='Fasta';
 my $blastout = "blast.out." . $infile;
	
	my @param = ('program'  => 'wublastn','database' => $local_blastdb,
		     'E'=>10,'o'=>$blastout);
	my $factory = Bio::Tools::Run::StandAloneBlast->new(@param,
_READMETHOD => "Blast");
	$wublast_path = $factory->program_path();
	print $wublast_path, "\n";
	$factory->q(-10000.0);
	$factory->g(F);

#my $blastout = "blast.out." . $infile . $organisms . "." . $org_chro
. "." . $each_db;
  	my $blast_report = $factory->wublast($query);
  
	##########
	# Look at every hits in blast output
	#
	###########
   #55555555555
  	while(my $result = $blast_report->next_result) 
 	{
print "number of hits: ",$result->num_hits,"\n";

     	if($result->num_hits > 0) 
     	{
    
        	my $match_count=0; 
			my $hit_count=0;
        	my @myhits =$result->hits();
         	
			foreach my $myhit (@myhits) 
        	{
           	# Count the number of high scoring pair
           	my $hsp_count=0;
     			my @hsps = $myhit->hsps();
            
     			foreach my $hsp (@hsps) 
     			{
print "hsp_length:", $hsp->length, " and query_length:";
print $query->length, "\n";
						
					#########
					# Counting the number of hits with the same length as query sequence
					########
        			if($hsp->length eq $query->length) 
        			{ 
           			$match_count++;  
						$hsp_count++;
        			}
     			} # end of foreach my $hsp (@hsps)

			} # end of foreach my $myhit (@myhits)

			##########
     		# Pick the top 4 of high scoring pair
     		########### 
     		if($match_count>0 && $match_count <= 4) 
     		{

				my $temp_organism;
				if($temp_organism ne $organism) {
print        $organism,": ",$match_count," full length hsps\n";
				}
				$temp_organism = $organism;

 				foreach my $hit (@myhits) 
        		{
          		my $hsp_count=0;
          		my @hsps = $hit->hsps();
          		foreach my $hsp (@hsps) 
          		{
            		# if the length of the high scoring pair is equal to
            		# the length of input sequence, then this is a good hit
            		if($hsp->length eq $query->length) 
						{ 
							$hsp_count++; 
						} 
            	}
          
print "hsp_count: $hsp_count.\n"; 
	  
          		if($hsp_count!=0) 
          		{  

print $hit->name, " : $hsp_count full length hsps\n";
          		}
  
          		if($hsp_count>0 ) 
          		{
            		my $id = $hit->name;
            		my($dbobj,$seqio);
            	

            		foreach my $hsp (@hsps) 
            		{
              			$hsp_start = $hsp->query->start; 
              			my $start = $hsp->hit->start(); my $end = $hsp->hit->end();
              			if($hsp->length eq $query->length) 
              			{
print "HIT: ",$hsp->hit_string,"  ",$hit->name;

           				my $flank_start=$start-100;
            			if($flank_start<1) 
            			{ 
              				$flank_start=1; 
            			}

            			my $flank_end=$end+100;
            			if($flank_end>$hit->length) 
            			{ 
              				$flank_end=$hit->length; 
            			}
            			my $flank_len=$flank_start-$flank_end+1;


						

############################################################

							} #end of if($hsp->length eq $query->length)

							} # end of foreach my $hsp (@hsps) 
						} # end of if($hsp_count>0 && $hsp_count<4)
					} # end of foreach my $hit (@myhits)
				} # end of if($match_count>0 && $match_count<4) 
			} # end of if($result->num_hits>0) 

   #55555555555
  	} # end of while(my $result = $blast_report->next_result)
   #55555555555

#444444444444
} # end of for $each_chromosome (keys %insect_chromosome)
#444444444444


#333333333333
} # end of foreach my $each_db (@{$insect_mysqldb{$organism}})
#333333333333

#2222222222  
} # end of for($org_idx=0; $org_idx < max_number_org; org_idx++)
#2222222222

#1111111111
} # end of while(my $query=$in->next_seq())
#1111111111



More information about the Bioperl-l mailing list