[Bioperl-l] wu-blast and bioperl
Tuan A. Tran
tuantran167 at gmail.com
Mon May 23 18:26:34 EDT 2005
Hi,
I tried to run wublast with bioperl. However, I got an error
-------------------- WARNING ---------------------
MSG: cannot find path to wublast
---------------------------------------------------
Can't call method "next_result" on an undefined value at testanno.pl line 225,
<GEN0> line 1.
Is there any method which is equivalent to "next_result" method? I
really appreciate if anyone can help.
My code lousy code is quoted below. I tested it with blastall from
ncbi. It works fine.
I am using debian sarge and I have wu-blast install in
/usr/local/WU-BLAST2.0. Furthermore, I also installed new bioperl-1.5.
Thanks in advance
TAT
=================
#!/usr/local/lib/perl -w
BEGIN {$ENV{WUBLASTDIR} = '/usr/local/WU-BLAST2.0/';};
use Bio::Seq;
use Bio::SeqIO;
use Bio::SearchIO;
use Bio::AlignIO;
use Bio::SimpleAlign;
use Bio::LocatableSeq;
use Bio::Tools::Run::StandAloneBlast;
use Getopt::Long;
use Bio::DB::GenBank;
use Bio::DB::Flat::BDB;
#use Bio::Index::GenBank;
use Bio::Index::Fasta;
use Bio::SeqFeature::Generic;
use DBI;
#########
# Declare database names
#########
my $db_core_fly = 'fly_core';
############
#
# Define variables for tables in database drosophila_melanogaster_core_30_3d
#
###########
my $tattrib = 'attrib_type';
my $tseq_region = 'seq_region';
my $tseq_region_attrib = 'seq_region_attrib';
my $texon = 'exon';
my $texon_stable_id = 'exon_stable_id';
my $texon_transcript = 'exon_transcript';
my $tgene = 'gene';
my $tgene_stable_id = 'gene_stable_id';
my $ttranscript = 'transcript';
my $ttranscript_stable_id = 'transcript_stable_id';
my $txref='xref';
my $texternal_db = 'external_db';
my $texternal_synonym = 'external_synonym';
###########
# This subroutine is to tell Perl to communicate with my MySQL database
# Source: got it after googling
###########
sub start_InputDB {
my $database = $_[0];
use DBI;
my $DBD = 'mysql';
my $host ='localhost';
my $user = 'tuan';
my $passwd = '';
my $inputdb = DBI->connect("DBI:$DBD:$database:$host","$user","",
{RaiseError => 1, AutoCommit => 1});
return $inputdb;
}
######################
sub feature2string {
my $f = shift;
#my $stable_id = $f->stable_id();
my $seq_region = $f->seq_region_name();
my $start = $f->start();
my $end = $f->end();
my $strand = $f->strand();
my $seq = $f->seq();
return ">$seq_region:$start - $end ($strand)\n$seq";
}
sub getGene {
my $f = shift;
#my $stable_id = $f->stable_id();
my $seq_region = $f->seq_region_name();
my $start = $f->start();
my $end = $f->end();
my $strand = $f->strand();
my $seq = $f->seq();
return ">$seq_region:$start - $end ($strand)\n$seq";
}
############################################################################
# Change number of organisms according to your problem
# BE SURE TO CHANGE AS EXAMPLE BELOW
############################################################################
my (@organism,$org_idx);
my $max_number_organism = 3;
@organism = ('fly');
my $fly_dna = 'Drosophila_melanogaster.BDGP3.2.1.may.dna_rm.';
my %fly_chromosome = (
fly => [$fly_dna.$org_chro.'.2h',$fly_dna.$org_chro.'.2L',
$fly_dna.$org_chro.'.2R',$fly_dna.$org_chro.'.3h',$fly_dna.$org_chro.'.3L',
$fly_dna.$org_chro.'.3R',$fly_dna.$org_chro.'.4', $fly_dna.$org_chro.'.4h',
$fly_dna.$org_chro.'.U',$fly_dna.$org_chro.'.X', $fly_dna.$org_chro.'.Xh',
$fly_dna.$org_chro.'.Yh']);
my %fly_mysqldb = ( fly => ['fly_core']);
############################################################################
my $meta='local';
my $infile = shift;
my $in = Bio::SeqIO->new ( -file => $infile,
-format => 'fasta' );
my $prefix="summary_";
my $sumout = $prefix . $infile;
$prefix="out_";
$allout = $prefix . $infile;
$datastatistics = "stat_" . $infile . ".dat";
my $out = Bio::SeqIO->new ( -file => '>seq_out.fa',
-format => 'fasta' );
my $fout = Bio::SeqIO->new(-fh => \*STDOUT , -format => 'fasta');
#1111111111
while(my $query=$in->next_seq())
{
print ALLOUT ">",$query->id," ",$query->desc;
print ALLOUT "\t",$query->seq,"\n";
print SUMOUT ">",$query->id," ",$query->desc;
print SUMOUT "\t",$query->seq,"\n";
# print ">",$query->id," ",$query->desc;
# print "\t",$query->seq,"\n";
#2222222222
foreach $organism (@organism)
{
my $dir = '/Crick/' . $organism . '/';
#########################################
# Go through each mysql database (mostly core and est)
########################################
#333333333333
foreach my $each_db (@{$fly_mysqldb{$organism}})
{
my $inputdb = &start_InputDB($each_db);
#######################################
# Go through each chromosome of a species
#######################################
#4444444444444
foreach my $org_chro (@{$fly_chromosome{$organism}})
{
# database for blast
my($local_blastdb,$db,$fmt);
$local_blastdb = $dir . 'dna/' . $org_chro . '.fa';
#print "is it here ", $local_blastdb, "\n";
$db = $dir . 'dna/' . $organism . '.index';
$fmt='Fasta';
my $blastout = "blast.out." . $infile;
my @param = ('program' => 'wublastn','database' => $local_blastdb,
'E'=>10,'o'=>$blastout);
my $factory = Bio::Tools::Run::StandAloneBlast->new(@param,
_READMETHOD => "Blast");
$wublast_path = $factory->program_path();
print $wublast_path, "\n";
$factory->q(-10000.0);
$factory->g(F);
#my $blastout = "blast.out." . $infile . $organisms . "." . $org_chro
. "." . $each_db;
my $blast_report = $factory->wublast($query);
##########
# Look at every hits in blast output
#
###########
#55555555555
while(my $result = $blast_report->next_result)
{
print "number of hits: ",$result->num_hits,"\n";
if($result->num_hits > 0)
{
my $match_count=0;
my $hit_count=0;
my @myhits =$result->hits();
foreach my $myhit (@myhits)
{
# Count the number of high scoring pair
my $hsp_count=0;
my @hsps = $myhit->hsps();
foreach my $hsp (@hsps)
{
print "hsp_length:", $hsp->length, " and query_length:";
print $query->length, "\n";
#########
# Counting the number of hits with the same length as query sequence
########
if($hsp->length eq $query->length)
{
$match_count++;
$hsp_count++;
}
} # end of foreach my $hsp (@hsps)
} # end of foreach my $myhit (@myhits)
##########
# Pick the top 4 of high scoring pair
###########
if($match_count>0 && $match_count <= 4)
{
my $temp_organism;
if($temp_organism ne $organism) {
print $organism,": ",$match_count," full length hsps\n";
}
$temp_organism = $organism;
foreach my $hit (@myhits)
{
my $hsp_count=0;
my @hsps = $hit->hsps();
foreach my $hsp (@hsps)
{
# if the length of the high scoring pair is equal to
# the length of input sequence, then this is a good hit
if($hsp->length eq $query->length)
{
$hsp_count++;
}
}
print "hsp_count: $hsp_count.\n";
if($hsp_count!=0)
{
print $hit->name, " : $hsp_count full length hsps\n";
}
if($hsp_count>0 )
{
my $id = $hit->name;
my($dbobj,$seqio);
foreach my $hsp (@hsps)
{
$hsp_start = $hsp->query->start;
my $start = $hsp->hit->start(); my $end = $hsp->hit->end();
if($hsp->length eq $query->length)
{
print "HIT: ",$hsp->hit_string," ",$hit->name;
my $flank_start=$start-100;
if($flank_start<1)
{
$flank_start=1;
}
my $flank_end=$end+100;
if($flank_end>$hit->length)
{
$flank_end=$hit->length;
}
my $flank_len=$flank_start-$flank_end+1;
############################################################
} #end of if($hsp->length eq $query->length)
} # end of foreach my $hsp (@hsps)
} # end of if($hsp_count>0 && $hsp_count<4)
} # end of foreach my $hit (@myhits)
} # end of if($match_count>0 && $match_count<4)
} # end of if($result->num_hits>0)
#55555555555
} # end of while(my $result = $blast_report->next_result)
#55555555555
#444444444444
} # end of for $each_chromosome (keys %insect_chromosome)
#444444444444
#333333333333
} # end of foreach my $each_db (@{$insect_mysqldb{$organism}})
#333333333333
#2222222222
} # end of for($org_idx=0; $org_idx < max_number_org; org_idx++)
#2222222222
#1111111111
} # end of while(my $query=$in->next_seq())
#1111111111
More information about the Bioperl-l
mailing list