[Bioperl-l] wu-blast and bioperl
Jason Stajich
jason.stajich at duke.edu
Mon May 23 20:38:55 EDT 2005
it is still not finding wublast hence your message, you don't want an
equivalenced next_result method, you want to tell the module how to
find the executable it needs.
I think you need to set the WUBLAST variable after the use
Bio::Tools::Run::StandAloneBlast.
I'm not really looking at the rest of the code below though so I
don't know if you've made other mistakes.
You can always just write a simple script to test that the basics work
use strict;
use Bio::Tools::Run::StandAloneBlast;
$ENV{WUBLASTDIR} = '/usr/local/WU-BLAST2.0';
my $factory = Bio::Tools::Run::StandAloneBlast->new();
print $factory->program_path();
-jason
On May 23, 2005, at 6:26 PM, Tuan A. Tran wrote:
> Hi,
>
> I tried to run wublast with bioperl. However, I got an error
>
> -------------------- WARNING ---------------------
> MSG: cannot find path to wublast
> ---------------------------------------------------
> Can't call method "next_result" on an undefined value at
> testanno.pl line 225,
> <GEN0> line 1.
>
> Is there any method which is equivalent to "next_result" method? I
> really appreciate if anyone can help.
>
> My code lousy code is quoted below. I tested it with blastall from
> ncbi. It works fine.
> I am using debian sarge and I have wu-blast install in
> /usr/local/WU-BLAST2.0. Furthermore, I also installed new bioperl-1.5.
>
> Thanks in advance
> TAT
>
>
>
>
> =================
> #!/usr/local/lib/perl -w
>
> BEGIN {$ENV{WUBLASTDIR} = '/usr/local/WU-BLAST2.0/';};
>
> use Bio::Seq;
> use Bio::SeqIO;
> use Bio::SearchIO;
> use Bio::AlignIO;
> use Bio::SimpleAlign;
> use Bio::LocatableSeq;
> use Bio::Tools::Run::StandAloneBlast;
> use Getopt::Long;
> use Bio::DB::GenBank;
> use Bio::DB::Flat::BDB;
> #use Bio::Index::GenBank;
> use Bio::Index::Fasta;
> use Bio::SeqFeature::Generic;
> use DBI;
>
>
> #########
> # Declare database names
> #########
>
> my $db_core_fly = 'fly_core';
>
> ############
> #
> # Define variables for tables in database
> drosophila_melanogaster_core_30_3d
> #
> ###########
>
> my $tattrib = 'attrib_type';
> my $tseq_region = 'seq_region';
> my $tseq_region_attrib = 'seq_region_attrib';
> my $texon = 'exon';
> my $texon_stable_id = 'exon_stable_id';
> my $texon_transcript = 'exon_transcript';
> my $tgene = 'gene';
> my $tgene_stable_id = 'gene_stable_id';
> my $ttranscript = 'transcript';
> my $ttranscript_stable_id = 'transcript_stable_id';
> my $txref='xref';
> my $texternal_db = 'external_db';
> my $texternal_synonym = 'external_synonym';
>
> ###########
> # This subroutine is to tell Perl to communicate with my MySQL
> database
> # Source: got it after googling
> ###########
>
> sub start_InputDB {
> my $database = $_[0];
> use DBI;
> my $DBD = 'mysql';
> my $host ='localhost';
> my $user = 'tuan';
> my $passwd = '';
> my $inputdb = DBI->connect("DBI:$DBD:$database:$host","$user","",
> {RaiseError => 1, AutoCommit =>
> 1});
> return $inputdb;
> }
>
> ######################
>
> sub feature2string {
> my $f = shift;
> #my $stable_id = $f->stable_id();
> my $seq_region = $f->seq_region_name();
> my $start = $f->start();
> my $end = $f->end();
> my $strand = $f->strand();
> my $seq = $f->seq();
> return ">$seq_region:$start - $end ($strand)\n$seq";
> }
>
> sub getGene {
> my $f = shift;
> #my $stable_id = $f->stable_id();
> my $seq_region = $f->seq_region_name();
> my $start = $f->start();
> my $end = $f->end();
> my $strand = $f->strand();
> my $seq = $f->seq();
> return ">$seq_region:$start - $end ($strand)\n$seq";
> }
>
> ######################################################################
> ######
> # Change number of organisms according to your problem
> # BE SURE TO CHANGE AS EXAMPLE BELOW
> ######################################################################
> ######
>
> my (@organism,$org_idx);
>
> my $max_number_organism = 3;
>
> @organism = ('fly');
>
> my $fly_dna = 'Drosophila_melanogaster.BDGP3.2.1.may.dna_rm.';
>
> my %fly_chromosome = (
> fly => [$fly_dna.$org_chro.'.2h',$fly_dna.
> $org_chro.'.2L',
> $fly_dna.$org_chro.'.2R',$fly_dna.$org_chro.'.3h',$fly_dna.
> $org_chro.'.3L',
> $fly_dna.$org_chro.'.3R',$fly_dna.$org_chro.'.4', $fly_dna.
> $org_chro.'.4h',
> $fly_dna.$org_chro.'.U',$fly_dna.$org_chro.'.X', $fly_dna.
> $org_chro.'.Xh',
> $fly_dna.$org_chro.'.Yh']);
>
> my %fly_mysqldb = ( fly => ['fly_core']);
>
>
>
> ######################################################################
> ######
>
> my $meta='local';
>
> my $infile = shift;
>
> my $in = Bio::SeqIO->new ( -file => $infile,
> -format => 'fasta' );
>
> my $prefix="summary_";
> my $sumout = $prefix . $infile;
>
> $prefix="out_";
> $allout = $prefix . $infile;
>
> $datastatistics = "stat_" . $infile . ".dat";
>
>
> my $out = Bio::SeqIO->new ( -file => '>seq_out.fa',
> -format => 'fasta' );
> my $fout = Bio::SeqIO->new(-fh => \*STDOUT , -format => 'fasta');
>
> #1111111111
> while(my $query=$in->next_seq())
> {
>
> print ALLOUT ">",$query->id," ",$query->desc;
> print ALLOUT "\t",$query->seq,"\n";
> print SUMOUT ">",$query->id," ",$query->desc;
> print SUMOUT "\t",$query->seq,"\n";
> # print ">",$query->id," ",$query->desc;
> # print "\t",$query->seq,"\n";
>
> #2222222222
> foreach $organism (@organism)
> {
> my $dir = '/Crick/' . $organism . '/';
>
>
> #########################################
> # Go through each mysql database (mostly core and est)
> ########################################
> #333333333333
> foreach my $each_db (@{$fly_mysqldb{$organism}})
> {
> my $inputdb = &start_InputDB($each_db);
>
> #######################################
> # Go through each chromosome of a species
> #######################################
> #4444444444444
> foreach my $org_chro (@{$fly_chromosome{$organism}})
> {
>
> # database for blast
> my($local_blastdb,$db,$fmt);
> $local_blastdb = $dir . 'dna/' . $org_chro . '.fa';
>
> #print "is it here ", $local_blastdb, "\n";
>
> $db = $dir . 'dna/' . $organism . '.index';
> $fmt='Fasta';
> my $blastout = "blast.out." . $infile;
>
> my @param = ('program' => 'wublastn','database' =>
> $local_blastdb,
> 'E'=>10,'o'=>$blastout);
> my $factory = Bio::Tools::Run::StandAloneBlast->new(@param,
> _READMETHOD => "Blast");
> $wublast_path = $factory->program_path();
> print $wublast_path, "\n";
> $factory->q(-10000.0);
> $factory->g(F);
>
> #my $blastout = "blast.out." . $infile . $organisms . "." . $org_chro
> . "." . $each_db;
> my $blast_report = $factory->wublast($query);
>
> ##########
> # Look at every hits in blast output
> #
> ###########
> #55555555555
> while(my $result = $blast_report->next_result)
> {
> print "number of hits: ",$result->num_hits,"\n";
>
> if($result->num_hits > 0)
> {
>
> my $match_count=0;
> my $hit_count=0;
> my @myhits =$result->hits();
>
> foreach my $myhit (@myhits)
> {
> # Count the number of high scoring pair
> my $hsp_count=0;
> my @hsps = $myhit->hsps();
>
> foreach my $hsp (@hsps)
> {
> print "hsp_length:", $hsp->length, " and query_length:";
> print $query->length, "\n";
>
> #########
> # Counting the number of hits with the same
> length as query sequence
> ########
> if($hsp->length eq $query->length)
> {
> $match_count++;
> $hsp_count++;
> }
> } # end of foreach my $hsp (@hsps)
>
> } # end of foreach my $myhit (@myhits)
>
> ##########
> # Pick the top 4 of high scoring pair
> ###########
> if($match_count>0 && $match_count <= 4)
> {
>
> my $temp_organism;
> if($temp_organism ne $organism) {
> print $organism,": ",$match_count," full length hsps\n";
> }
> $temp_organism = $organism;
>
> foreach my $hit (@myhits)
> {
> my $hsp_count=0;
> my @hsps = $hit->hsps();
> foreach my $hsp (@hsps)
> {
> # if the length of the high scoring pair is
> equal to
> # the length of input sequence, then this is a
> good hit
> if($hsp->length eq $query->length)
> {
> $hsp_count++;
> }
> }
>
> print "hsp_count: $hsp_count.\n";
>
> if($hsp_count!=0)
> {
>
> print $hit->name, " : $hsp_count full length hsps\n";
> }
>
> if($hsp_count>0 )
> {
> my $id = $hit->name;
> my($dbobj,$seqio);
>
>
> foreach my $hsp (@hsps)
> {
> $hsp_start = $hsp->query->start;
> my $start = $hsp->hit->start(); my $end =
> $hsp->hit->end();
> if($hsp->length eq $query->length)
> {
> print "HIT: ",$hsp->hit_string," ",$hit->name;
>
> my $flank_start=$start-100;
> if($flank_start<1)
> {
> $flank_start=1;
> }
>
> my $flank_end=$end+100;
> if($flank_end>$hit->length)
> {
> $flank_end=$hit->length;
> }
> my $flank_len=$flank_start-$flank_end+1;
>
>
>
>
> ############################################################
>
> } #end of if($hsp->length eq $query-
> >length)
>
> } # end of foreach my $hsp (@hsps)
> } # end of if($hsp_count>0 && $hsp_count<4)
> } # end of foreach my $hit (@myhits)
> } # end of if($match_count>0 && $match_count<4)
> } # end of if($result->num_hits>0)
>
> #55555555555
> } # end of while(my $result = $blast_report->next_result)
> #55555555555
>
> #444444444444
> } # end of for $each_chromosome (keys %insect_chromosome)
> #444444444444
>
>
> #333333333333
> } # end of foreach my $each_db (@{$insect_mysqldb{$organism}})
> #333333333333
>
> #2222222222
> } # end of for($org_idx=0; $org_idx < max_number_org; org_idx++)
> #2222222222
>
> #1111111111
> } # end of while(my $query=$in->next_seq())
> #1111111111
>
> _______________________________________________
> Bioperl-l mailing list
> Bioperl-l at portal.open-bio.org
> http://portal.open-bio.org/mailman/listinfo/bioperl-l
>
--
Jason Stajich
Duke University
http://www.duke.edu/~jes12/
More information about the Bioperl-l
mailing list