[Bioperl-l] Remote Blast - same script but different results
Roy
roychu at gmail.com
Tue Nov 24 23:00:58 UTC 2009
Hi bioperl community,
I've tried searching the old lists to see if this topic has been
covered, and perhaps this question arises from my own lack of
familiarity with BLAST, but (from my perl script listed below) I get
different results with remote blast when I call my script (that is, I
will either get hits or no hits at all). I'll call the script one
time, and get no hits. Then call the script again (with the same
parameters), and get the same several hits that I may have before
after having gotten no hits. I use a subroutine to parse the blast
report information, and then I use a boolean to indicate whether
results are returned or not. Any insight into what I may have missed
would be appreciated. Short question, is this behavior typical? My
understanding of how BLAST works is that it shouldn'tl...
Thanks in advance,
Roy
#!/usr/bin/perl -w
use strict;
use warnings;
use Carp;
use Bio::Perl;
use CGI;
use Bio::SeqIO;
use Bio::SearchIO;
use Bio::SeqFeature::Generic;
use Bio::Restriction::Analysis;
use Bio::Tools::Run::RemoteBlast;
use Bio::SimpleAlign;
use Bio::AlignIO;
use Bio::LocatableSeq;
my $five_seqobj = Bio::Seq->new(
-seq => 'ATTCCCACCGGGACCTGCGGGGCTGAGTGCCCTTCTCGGTTGCTGCCGCTGAGGAGCCCGCCCAGCCAGCCAGGGCCGCGAGGCCGAGGCCAGGCCGCAGCCCAGGAGCCGCCCCACCGCAGCTGGCGATGGACCCGCCGAGGCCCGCGCTGCTGGCGCTGCTGGCGCTGCCTGCGCTGCTGCTGCTGCTGCTGGCGGGCGCCAGGGCCG',
-display_id => 'genomic_a',
-alphabet => 'dna',
);
my $three_seqobj = Bio::Seq->new(
-seq => 'GTGAGTGCGCGGCCGCTCTGCGGGCGCAGAGGGAGCGGGAGGGAGCCGGCGGCACGAGGTTGGCCGGGGCAGCCTGGGCCTAGGCCAGAGGGAGGGCAGCCACAGGGTCCAGGGCGAGTGGGGGGATTGGACCAGCTGGCGGCCCCTGCAGGCTCAGGATGGGGGGCGCGGGATGGAGGGGCTGAGGAGGGGGTCTCCGGAGCCTGCCTC',
-display_id => 'genomic_b',
-alphabet => 'dna',
);
my @params = (
'-program' => 'blastn',
'-database' => 'refseq_genomic',
'-expect' => '10',
'-readmethod' => 'blastxml'
);
$Bio::Tools::Run::RemoteBlast::HEADER{'MEGABLAST'} = 'YES';
$Bio::Tools::Run::RemoteBlast::HEADER{'PERC_IDENT'} = 75;
$Bio::Tools::Run::RemoteBlast::HEADER{'FORMAT_TYPE'} = 'XML';
$Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Homo sapiens [ORGN]';
$Bio::Tools::Run::RemoteBlast::HEADER{'HITLIST_SIZE'} = 100; # Put:
limit number of hits
my $factory_a = Bio::Tools::Run::RemoteBlast->new(@params);
$factory_a->retrieve_parameter('FORMAT_TYPE', 'XML');
my $hits_a;
my $hits_b;
my $r;
my $bool_hit;
print "Submitting BLAST query - 5' end (MEGABLAST = YES)\n";
$Bio::Tools::Run::RemoteBlast::HEADER{'MEGABLAST'} = 'YES';
$r = $factory_a->submit_blast($a_seqobj);
$bool_hit = fetch_blast_report($factory_a);
unless ($bool_hit) {
print "\nNo hits\n";
print "Re-submitting BLAST query - 5' end (MEGABLAST = NO)\n";
sleep 5;
$Bio::Tools::Run::RemoteBlast::HEADER{'MEGABLAST'} = 'NO';
$r = $factory_a->submit_blast($a_seqobj);
($bool_hit, $hits_a) = fetch_blast_report($factory_a);
if ($bool_hit == 0) { print "No hits\n"; }
sleep 5;
}
my $factory_b = Bio::Tools::Run::RemoteBlast->new(@params);
print "\n--------------------------------------------------\n\n";
print "Submitting BLAST query - 3' end (MEGABLAST = YES)\n";
$Bio::Tools::Run::RemoteBlast::HEADER{'MEGABLAST'} = 'YES';
$r = $remote_blast_three->submit_blast($b_seqobj);
$bool_hit = fetch_blast_report($factory_b);
unless ($bool_hit) {
print " No hits\n";
print "Re-submitting BLAST query - 3' end (MEGABLAST = NO)\n";
sleep 5;
$Bio::Tools::Run::RemoteBlast::HEADER{'MEGABLAST'} = 'NO';
$r = $factory_b->submit_blast($b_seqobj);
($bool_hit, $hits_b) = fetch_blast_report($factory_b);
if ($bool_hit == 0) { print " No hits\n"; }
sleep 5;
}
print "\nbye\n\n";
print "$hits_a\n$hits_b\n";
exit;
sub fetch_blast_report {
my ($factory) = @_;
my $v = 1;
my $bool_hit = 0;
my $hits = '';
print STDERR "waiting...";
while (my @rids = $factory->each_rid) {
foreach my $rid (@rids) {
print STDERR ".";
my $rc = $factory->retrieve_blast($rid);
# retrieves blast report from remote blast queue,
# returns -1 on error, 0 on 'job not finished', Bio::SearchIO object
# args, remote blast id (rid)
if (!ref($rc)) {
# if not empty string, ref EXPR returns a non-empty string if EXPR
is a reference
if ($rc < 0) {
$factory->remove_rid($rid);
}
print STDERR "." if ($v > 0);
#####################################################################################
is this printing out as multiple dots? when and why?
sleep 5;
} else {
$bool_hit = 1;
my $result = $rc->next_result();
unless ($result->num_hits > 0) {
$bool_hit = 0;
}
# returns: Bio::Search::Result::ResultI object
$factory->remove_rid($rid);
print "\ndatabase:\t", $result->database_name,"\n";
print "query name:\t", $result->query_name,"\n";
print "query length\t", $result->query_length,"\n";
print "num hits\t", $result->num_hits,"\n";
if ($result->num_hits) {
# $result->hits returns an array of hits
# $results->no_hits_found, boolean vs $#{@hits} ie. filtering\
while (my $hit = $result->next_hit) {
print "\nhit name:\t", $hit->name,"\n";
print "description:\t", $hit->description,"\n";
print "locus:\t", $hit->locus,"\n";
print "algorithm: ", $hit->algorithm,"\thit length: ",
$hit->length,"\thit ranking: ", $hit->rank,"\n";
while (my $hsp = $hit->next_hsp) {
print "evalue: ", $hsp->evalue,"\tscore: ",
$hsp->score,"\tpercent_id: ", $hsp->percent_identity,"\n";
print "query_start: ", $hsp->query->start,"\tquery_end: ",
$hsp->query->end;
print "\tquery_length: ", $hsp->query->length,"\tquery_strand:
", $hsp->strand('query'), "\n";
print "subject_start: ", $hsp->subject->start,"\tsubject_end: ",
$hsp->subject->end;
print "\tsubject_length: ",
$hsp->subject->length,"\tsubject_strand: ", $hsp->strand('subject'),
"\n\n";
my $aln = $hsp->get_aln;
if ($aln->is_flush) {
foreach my $seq ($aln->each_seq) {
print $seq->seq,"\n";
}
print $aln->gap_line, "\n";
print $aln->consensus_string(95), "\n\n";
}
$hits .= $hit->name."\t".$hsp->subject->start."\t".$hsp->subject->end."\t".$hsp->strand('subject')."\n";
}
}
}
}
}
return ($bool_hit, $hits);
}
}
More information about the Bioperl-l
mailing list