[Bioperl-l] Bug in Bio::Tools::BPlite.pm ?
Leonardo Marino-Ramirez
marino@tofu.tamu.edu
Tue, 25 Sep 2001 11:24:13 -0500 (CDT)
This message is in MIME format. The first part should be readable text,
while the remaining parts are likely unreadable without MIME-aware tools.
Send mail to mime@docserver.cac.washington.edu for more info.
--1537571378-215628967-999667291=:15741
Content-Type: TEXT/PLAIN; CHARSET=US-ASCII
Content-ID: <Pine.LNX.4.30.0109251111522.19445@tofu.tamu.edu>
Hello,
I am trying to get subject start end end coordinates from blast reports
using the Bio::Tools::BPlite module.
I am using a standard script to parse blast reports:
#!/usr/bin/perl
# parse_blast
# Reads and parse blast reports
# usage parse_blast -d <directory> > <Name_of_parse_output>
# Output format
#
use strict;
use Bio::Tools::BPlite;
use Getopt::Std;
use vars qw($opt_d
$file
@blast_reports
$fname
$report
@tmp
$qn
$gi
$sc
$ev
$pid
$qfr
$qstr
$qsta
$qend
$ssta
$send
$sbjct
$type
$db
$anal1
$anal2
$anal3
$anal4
$anal
);
getopt('d');
my $dirname = "$opt_d";
## Get a working directory containing blast reports
if (opendir (DIR, $dirname)) {
while ($file = readdir(DIR)) {
## blast reports must contain the .br suffix
push (@blast_reports, "$file") if ($file =~ /\.br$/);
}
}
closedir(DIR);
foreach $fname (@blast_reports) {
parse_blast ( $report );
}
## parse_blast subroutine
sub parse_blast {
my $report = new Bio::Tools::BPlite(-file=>"$fname");
my $query = $report->query;
## Get only the clone id from all the query name
@tmp = split ' ', $query; $qn = $tmp[0];
$db = $report->database;
## get analysis code for database
$anal1 = $anal2 = $anal3 = $anal4 = ();
if ($db eq "ecoli.aa") {
$anal1 = "1";
} elsif ($db eq "pdbaa") {
$anal2 = "2";
} elsif ($db eq "ecoli.na") {
$anal3 = "3";
} else {
$anal1 = $anal2 = $anal3 = "0";
}
while(my $sbjct = $report->nextSbjct) {
my $type = $sbjct->report_type(); #print "$type\n";
## get analysis code for blast type
if ($type eq "BLASTX") {
$anal4 = "1";
} elsif ($type eq "BLASTN") {
$anal4 = "2";
} else {
$anal4 = "0";
}
my $blast_hit = $sbjct->name;
## get gi's from blast report
@tmp = split /\|/, $blast_hit; $gi = $tmp[1];
while(my $hsp = $sbjct->nextHSP) {
$sc = $hsp->bits; #print "score is $sc\n";
$ev = $hsp->P; #print "e-value is $ev\n";
$pid = $hsp->percent; #print "% id is $pid\n";
$qfr = $hsp->query->frame; #print "query frame is $qfr\n";
$qstr = $hsp->query->strand; #print "query strand is $qstr\n";
$qsta = $hsp->query->start;
$qend = $hsp->query->end;
$ssta = $hsp->subject->start;
$send = $hsp->subject->end;
$anal = "$anal4$anal1$anal2$anal3";
print
"$qsta\t$qend\t$sc\t$qstr\t$qfr\t$anal\t$qn\t$ssta\t$send\t$blast_hit\t$gi\t$ev\t$pid\n";
}
}
}
The problem is that when I am reading a blast report (see attachment) the
start and end coordinates are inverted!
My output of the script above looks like this:
102 613 944 1 23 EB10001G04.Seq 3503126
3503640gi|6626251|gb|U00096.1|U00096 Escherichia coli K-12 MG1655 complete
genome 6626251 0.0 98.6
What is the problem? Note that there is also a tab missing between the
subject end and the query name.
uname -a
Linux tofu.tamu.edu 2.2.12-20smp #1 SMP Mon Sep 27 10:34:45 EDT 1999 i686
unknow
n
perl 5.005_03 built for i386-linux
$Id: BPlite.pm,v 1.25 2001/09/05 11:38:57 heikki Exp
Thanks, Leonardo
--1537571378-215628967-999667291=:15741
Content-Type: TEXT/PLAIN; CHARSET=US-ASCII; NAME="EB10001G04.seq.ecoli.na.br"
Content-Transfer-Encoding: BASE64
Content-ID: <Pine.LNX.4.30.0109251115050.19445@tofu.tamu.edu>
Content-Description:
Content-Disposition: ATTACHMENT; FILENAME="EB10001G04.seq.ecoli.na.br"
QkxBU1ROIDIuMi4xIFtBcHItMTMtMjAwMV0NCg0KDQpSZWZlcmVuY2U6IEFs
dHNjaHVsLCBTdGVwaGVuIEYuLCBUaG9tYXMgTC4gTWFkZGVuLCBBbGVqYW5k
cm8gQS4gU2NoYWZmZXIsIA0KSmluZ2h1aSBaaGFuZywgWmhlbmcgWmhhbmcs
IFdlYmIgTWlsbGVyLCBhbmQgRGF2aWQgSi4gTGlwbWFuICgxOTk3KSwgDQoi
R2FwcGVkIEJMQVNUIGFuZCBQU0ktQkxBU1Q6IGEgbmV3IGdlbmVyYXRpb24g
b2YgcHJvdGVpbiBkYXRhYmFzZSBzZWFyY2gNCnByb2dyYW1zIiwgIE51Y2xl
aWMgQWNpZHMgUmVzLiAyNTozMzg5LTM0MDIuDQoNClF1ZXJ5PSBFQjEwMDAx
RzA0LlNlcSAgICA2MTMgICAgICAwICAgIDYxMyAgQUJJDQogICAgICAgICAo
NjEzIGxldHRlcnMpDQoNCkRhdGFiYXNlOiBlY29saS5uYQ0KICAgICAgICAg
ICAxIHNlcXVlbmNlczsgNCw2MzksMjIxIHRvdGFsIGxldHRlcnMNCg0KU2Vh
cmNoaW5nLmRvbmUNCg0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFNjb3JlICAg
ICBFDQpTZXF1ZW5jZXMgcHJvZHVjaW5nIHNpZ25pZmljYW50IGFsaWdubWVu
dHM6ICAgICAgICAgICAgICAgICAgICAgICAgKGJpdHMpICBWYWx1ZQ0KDQpn
aXw2NjI2MjUxfGdifFUwMDA5Ni4xfFUwMDA5NiBFc2NoZXJpY2hpYSBjb2xp
IEstMTIgTUcxNjU1IGNvbXBsLi4uICAgOTQ0ICAwLjANCg0KPmdpfDY2MjYy
NTF8Z2J8VTAwMDk2LjF8VTAwMDk2IEVzY2hlcmljaGlhIGNvbGkgSy0xMiBN
RzE2NTUgY29tcGxldGUgZ2Vub21lDQogICAgICAgICAgTGVuZ3RoID0gNDYz
OTIyMQ0KDQogU2NvcmUgPSAgOTQ0IGJpdHMgKDQ3NiksIEV4cGVjdCA9IDAu
MA0KIElkZW50aXRpZXMgPSA1MDgvNTE1ICg5OCUpLCBHYXBzID0gMy81MTUg
KDAlKQ0KIFN0cmFuZCA9IFBsdXMgLyBNaW51cw0KDQogICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICANClF1ZXJ5OiAxMDIgICAgIGNhYWFnY2N0Z2Fj
Z2F0dGF0dGdhZ2djZ2NhZ2NhYWFhZ2NhZ3R0dGdjZ2N0Z2d0Z2dhdGFnY2F0
dHRndCAxNjENCiAgICAgICAgICAgICAgIHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fA0KU2Jq
Y3Q6IDM1MDM2NDAgY2FhYWdjY3RnYWNnYXR0YXR0Z2FnZ2NnY2FnY2FhYWFn
Y2FndHR0Z2NnY3RnZ3RnZ2F0YWdjYXR0dGd0IDM1MDM1ODENCg0KICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgDQpRdWVyeTogMTYyICAgICBjZ2Nj
YXR0dGNjY2NnZ2NhZ2NnYWd0dHRjdHRhY3RnZ2NnZ3RnYXR0dGFnZ2N0dGFh
Y2djY2FnZ2FjdGcgMjIxDQogICAgICAgICAgICAgICB8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHwNClNiamN0OiAzNTAzNTgwIGNnY2NhdHR0Y2NjY2dnY2FnY2dhZ3R0dGN0
dGFjdGdnY2dndGdhdHR0YWdnY3R0YWFjZ2NjYWdnYWN0ZyAzNTAzNTIxDQoN
CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIA0KUXVlcnk6IDIyMiAg
ICAgYWF0Y2FhY2NnY2djZ3R0YWNjY2FnY2d0Z3RnZ2FhY2FnZ3RnY3RnZ2N0
Z2F0Z2NhdHR0Y2FjZ2NhY2FnIDI4MQ0KICAgICAgICAgICAgICAgfHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8DQpTYmpjdDogMzUwMzUyMCBhYXRjYWFjY2djZ2NndHRhY2Nj
YWdjZ3RndGdnYWFjYWdndGdjdGdnY3RnYXRnY2F0dHRjYWNnY2FjYWcgMzUw
MzQ2MQ0KDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICANClF1ZXJ5
OiAyODIgICAgIGdjdGdjZ2djZ2N0Z2d0Z2NhZ2dnY2djZ2dnZ2FjdGdnY2dj
Z2F0dGNnY2djY2dnZ2N0Z2djZ2djdHR0ZyAzNDENCiAgICAgICAgICAgICAg
IHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fA0KU2JqY3Q6IDM1MDM0NjAgZ2N0Z2NnZ2NnY3Rn
Z3RnY2FnZ2djZ2NnZ2dnYWN0Z2djZ2NnYXR0Y2djZ2NjZ2dnY3RnZ2NnZ2N0
dHRnIDM1MDM0MDENCg0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
DQpRdWVyeTogMzQyICAgICBjdGNhYWFjY2dnZ2djYWdjZ3RjdHRjdGdndGdj
YXRnYWNnY2djY3RndHR0YWNjY2dhY2dhY2FjZ2dndHQgNDAxDQogICAgICAg
ICAgICAgICB8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHwNClNiamN0OiAzNTAzNDAwIGN0Y2Fh
YWNjZ2dnZ2NhZ2NndGN0dGN0Z2d0Z2NhdGdhY2djZ2NjdGd0dHRhY2NjZ2Fj
Z2FjYWNnZ2d0dCAzNTAzMzQxDQoNCiAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgIA0KUXVlcnk6IDQwMiAgICAgYXR0YXR0Z2FnY2FnYXRnZ2dnY3Rn
YWNnY3R0YXR0YWN0Z3R0Z2F0dHRjYWF0Z2FjY3RndGNnZ2NhY3RnIDQ2MQ0K
ICAgICAgICAgICAgICAgfHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8DQpTYmpjdDogMzUwMzM0
MCBhdHRhdHRnYWdjYWdhdGdnZ2djdGdhY2djdHRhdHRhY3RndHRnYXR0dGNh
YXRnYWNjdGd0Y2dnY2FjdGcgMzUwMzI4MQ0KDQogICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICANClF1ZXJ5OiA0NjIgICAgIGFhZ2NhZ2d0Y2d0Y2dh
Y2dhYWdjYWNhYWNjZ2dhdGdjZ2djZ2N0Z2d0Z2NhZ2NhdGFjZ2NnY2NhZ2Nh
ZyA1MjENCiAgICAgICAgICAgICAgIHx8fHx8fHx8fHx8fHx8fHx8ICAgfHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fA0KU2JqY3Q6
IDM1MDMyODAgYWFnY2FnZ3RjZ3RjZ2FjZ2FnY2FhY2FhY2NnZ2F0Z2NnZ2Nn
Y3RnZ3RnY2FnY2F0YWNnY2djY2FnY2FnIDM1MDMyMjENCg0KICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgDQpRdWVyeTogNTIyICAgICBjY2djYWdn
YWNhZ2N0YWNndGdjdGdnY2FnYXRndGdjdGdnY2FhY2d0dGdjZ2NnY2dnY2Fn
Z3RndHQtY2EgNTgwDQogICAgICAgICAgICAgICB8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHwgfHwN
ClNiamN0OiAzNTAzMjIwIGNjZ2NhZ2dhY2FnY3RhY2d0Z2N0Z2djYWdhdGd0
Z2N0Z2djYWFjZ3R0Z2NnY2djZ2djYWdndGd0dGNjYSAzNTAzMTYxDQoNCiAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgDQpRdWVyeTogNTgxICAgICBnY2ctdGFhY2NnYXRnYWMtYWN0YXRnY25n
dGdhdGdhYWdndCA2MTMNCiAgICAgICAgICAgICAgIHx8fCB8fHx8fHx8fHx8
fCB8fHx8fHx8IHx8fHx8fHx8fHx8DQpTYmpjdDogMzUwMzE2MCBnY2d0dGFh
Y2NnYXRnYWNhYWN0YXRnY2dndGdhdGdhYWdndCAzNTAzMTI2DQoNCg0KICBE
YXRhYmFzZTogZWNvbGkubmENCiAgICBQb3N0ZWQgZGF0ZTogIEF1ZyAyMSwg
MjAwMSAgMjo1MCBQTQ0KICBOdW1iZXIgb2YgbGV0dGVycyBpbiBkYXRhYmFz
ZTogNCw2MzksMjIxDQogIE51bWJlciBvZiBzZXF1ZW5jZXMgaW4gZGF0YWJh
c2U6ICAxDQogIA0KTGFtYmRhICAgICBLICAgICAgSA0KICAgIDEuMzcgICAg
MC43MTEgICAgIDEuMzEgDQoNCkdhcHBlZA0KTGFtYmRhICAgICBLICAgICAg
SA0KICAgIDEuMzcgICAgMC43MTEgICAgIDEuMzEgDQoNCg0KTWF0cml4OiBi
bGFzdG4gbWF0cml4OjEgLTMNCkdhcCBQZW5hbHRpZXM6IEV4aXN0ZW5jZTog
NSwgRXh0ZW5zaW9uOiAyDQpOdW1iZXIgb2YgSGl0cyB0byBEQjogMjkxNQ0K
TnVtYmVyIG9mIFNlcXVlbmNlczogMQ0KTnVtYmVyIG9mIGV4dGVuc2lvbnM6
IDI5MTUNCk51bWJlciBvZiBzdWNjZXNzZnVsIGV4dGVuc2lvbnM6IDMNCk51
bWJlciBvZiBzZXF1ZW5jZXMgYmV0dGVyIHRoYW4gMS4wZS0wNTogMQ0KbGVu
Z3RoIG9mIHF1ZXJ5OiA2MTMNCmxlbmd0aCBvZiBkYXRhYmFzZTogNCw2Mzks
MjIxDQplZmZlY3RpdmUgSFNQIGxlbmd0aDogMTYNCmVmZmVjdGl2ZSBsZW5n
dGggb2YgcXVlcnk6IDU5Nw0KZWZmZWN0aXZlIGxlbmd0aCBvZiBkYXRhYmFz
ZTogNCw2MzksMjA1DQplZmZlY3RpdmUgc2VhcmNoIHNwYWNlOiAyNzY5NjA1
Mzg1DQplZmZlY3RpdmUgc2VhcmNoIHNwYWNlIHVzZWQ6IDI3Njk2MDUzODUN
ClQ6IDANCkE6IDQwDQpYMTogNiAoMTEuOSBiaXRzKQ0KWDI6IDE1ICgyOS43
IGJpdHMpDQpTMTogMTIgKDI0LjMgYml0cykNClMyOiAyNCAoNDguMSBiaXRz
KQ0K
--1537571378-215628967-999667291=:15741--