[Bioperl-l] Bug in Bio::Tools::BPlite.pm ?

Leonardo Marino-Ramirez marino@tofu.tamu.edu
Tue, 25 Sep 2001 11:24:13 -0500 (CDT)


  This message is in MIME format.  The first part should be readable text,
  while the remaining parts are likely unreadable without MIME-aware tools.
  Send mail to mime@docserver.cac.washington.edu for more info.

--1537571378-215628967-999667291=:15741
Content-Type: TEXT/PLAIN; CHARSET=US-ASCII
Content-ID: <Pine.LNX.4.30.0109251111522.19445@tofu.tamu.edu>

Hello,

I am trying to get subject start end end coordinates from blast reports
using the Bio::Tools::BPlite module.

I am using a standard script to parse blast reports:

#!/usr/bin/perl
# parse_blast
# Reads and parse blast reports
# usage parse_blast -d <directory>  >  <Name_of_parse_output>
# Output format
#

use strict;
use Bio::Tools::BPlite;
use Getopt::Std;

use vars qw($opt_d
            $file
            @blast_reports
            $fname
            $report
            @tmp
            $qn
            $gi
            $sc
            $ev
            $pid
            $qfr
            $qstr
            $qsta
            $qend
            $ssta
            $send
            $sbjct
            $type
            $db
            $anal1
            $anal2
            $anal3
            $anal4
            $anal
            );

getopt('d');

my $dirname = "$opt_d";

## Get a working directory containing blast reports
if (opendir (DIR, $dirname)) {
   while  ($file = readdir(DIR)) {
       ## blast reports must contain the .br suffix
        push (@blast_reports, "$file") if ($file =~ /\.br$/);
   }
}
   closedir(DIR);

foreach $fname (@blast_reports) {

    parse_blast ( $report );

}


## parse_blast subroutine
sub parse_blast {

my $report = new Bio::Tools::BPlite(-file=>"$fname");

my $query = $report->query;

## Get only the clone id from all the query name
@tmp = split ' ', $query; $qn = $tmp[0];

$db = $report->database;

## get analysis code for database
$anal1 = $anal2 = $anal3 = $anal4 = ();

if ($db eq "ecoli.aa") {
    $anal1 = "1";
}   elsif ($db eq "pdbaa") {
    $anal2 = "2";
}  elsif ($db eq "ecoli.na") {
    $anal3 = "3";
} else {
    $anal1 = $anal2 = $anal3 = "0";
}

while(my $sbjct = $report->nextSbjct) {
    my $type = $sbjct->report_type(); #print "$type\n";

    ## get analysis code for blast type
    if ($type eq "BLASTX") {
        $anal4 = "1";
    } elsif ($type eq "BLASTN") {
        $anal4 = "2";
    } else {
        $anal4 = "0";
    }

    my $blast_hit = $sbjct->name;
    ## get gi's from blast report
    @tmp = split /\|/, $blast_hit; $gi = $tmp[1];

    while(my $hsp = $sbjct->nextHSP) {
        $sc = $hsp->bits; #print "score is $sc\n";
        $ev = $hsp->P; #print "e-value is $ev\n";
        $pid = $hsp->percent; #print "% id is $pid\n";
        $qfr = $hsp->query->frame; #print "query frame is $qfr\n";
        $qstr = $hsp->query->strand; #print "query strand is $qstr\n";
        $qsta = $hsp->query->start;
        $qend = $hsp->query->end;
        $ssta = $hsp->subject->start;
        $send = $hsp->subject->end;

        $anal = "$anal4$anal1$anal2$anal3";

            print
"$qsta\t$qend\t$sc\t$qstr\t$qfr\t$anal\t$qn\t$ssta\t$send\t$blast_hit\t$gi\t$ev\t$pid\n";

    }
}

}


The problem is that when I am reading a blast report (see attachment) the
start and end coordinates are inverted!

My output of the script above looks like this:

102     613     944     1               23      EB10001G04.Seq  3503126
3503640gi|6626251|gb|U00096.1|U00096 Escherichia coli K-12 MG1655 complete
genome      6626251  0.0     98.6

What is the problem? Note that there is also a tab missing between the
subject end and the query name.

uname -a
Linux tofu.tamu.edu 2.2.12-20smp #1 SMP Mon Sep 27 10:34:45 EDT 1999 i686
unknow
n

perl 5.005_03 built for i386-linux

$Id: BPlite.pm,v 1.25 2001/09/05 11:38:57 heikki Exp

Thanks, Leonardo

--1537571378-215628967-999667291=:15741
Content-Type: TEXT/PLAIN; CHARSET=US-ASCII; NAME="EB10001G04.seq.ecoli.na.br"
Content-Transfer-Encoding: BASE64
Content-ID: <Pine.LNX.4.30.0109251115050.19445@tofu.tamu.edu>
Content-Description: 
Content-Disposition: ATTACHMENT; FILENAME="EB10001G04.seq.ecoli.na.br"

QkxBU1ROIDIuMi4xIFtBcHItMTMtMjAwMV0NCg0KDQpSZWZlcmVuY2U6IEFs
dHNjaHVsLCBTdGVwaGVuIEYuLCBUaG9tYXMgTC4gTWFkZGVuLCBBbGVqYW5k
cm8gQS4gU2NoYWZmZXIsIA0KSmluZ2h1aSBaaGFuZywgWmhlbmcgWmhhbmcs
IFdlYmIgTWlsbGVyLCBhbmQgRGF2aWQgSi4gTGlwbWFuICgxOTk3KSwgDQoi
R2FwcGVkIEJMQVNUIGFuZCBQU0ktQkxBU1Q6IGEgbmV3IGdlbmVyYXRpb24g
b2YgcHJvdGVpbiBkYXRhYmFzZSBzZWFyY2gNCnByb2dyYW1zIiwgIE51Y2xl
aWMgQWNpZHMgUmVzLiAyNTozMzg5LTM0MDIuDQoNClF1ZXJ5PSBFQjEwMDAx
RzA0LlNlcSAgICA2MTMgICAgICAwICAgIDYxMyAgQUJJDQogICAgICAgICAo
NjEzIGxldHRlcnMpDQoNCkRhdGFiYXNlOiBlY29saS5uYQ0KICAgICAgICAg
ICAxIHNlcXVlbmNlczsgNCw2MzksMjIxIHRvdGFsIGxldHRlcnMNCg0KU2Vh
cmNoaW5nLmRvbmUNCg0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFNjb3JlICAg
ICBFDQpTZXF1ZW5jZXMgcHJvZHVjaW5nIHNpZ25pZmljYW50IGFsaWdubWVu
dHM6ICAgICAgICAgICAgICAgICAgICAgICAgKGJpdHMpICBWYWx1ZQ0KDQpn
aXw2NjI2MjUxfGdifFUwMDA5Ni4xfFUwMDA5NiBFc2NoZXJpY2hpYSBjb2xp
IEstMTIgTUcxNjU1IGNvbXBsLi4uICAgOTQ0ICAwLjANCg0KPmdpfDY2MjYy
NTF8Z2J8VTAwMDk2LjF8VTAwMDk2IEVzY2hlcmljaGlhIGNvbGkgSy0xMiBN
RzE2NTUgY29tcGxldGUgZ2Vub21lDQogICAgICAgICAgTGVuZ3RoID0gNDYz
OTIyMQ0KDQogU2NvcmUgPSAgOTQ0IGJpdHMgKDQ3NiksIEV4cGVjdCA9IDAu
MA0KIElkZW50aXRpZXMgPSA1MDgvNTE1ICg5OCUpLCBHYXBzID0gMy81MTUg
KDAlKQ0KIFN0cmFuZCA9IFBsdXMgLyBNaW51cw0KDQogICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICANClF1ZXJ5OiAxMDIgICAgIGNhYWFnY2N0Z2Fj
Z2F0dGF0dGdhZ2djZ2NhZ2NhYWFhZ2NhZ3R0dGdjZ2N0Z2d0Z2dhdGFnY2F0
dHRndCAxNjENCiAgICAgICAgICAgICAgIHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fA0KU2Jq
Y3Q6IDM1MDM2NDAgY2FhYWdjY3RnYWNnYXR0YXR0Z2FnZ2NnY2FnY2FhYWFn
Y2FndHR0Z2NnY3RnZ3RnZ2F0YWdjYXR0dGd0IDM1MDM1ODENCg0KICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgDQpRdWVyeTogMTYyICAgICBjZ2Nj
YXR0dGNjY2NnZ2NhZ2NnYWd0dHRjdHRhY3RnZ2NnZ3RnYXR0dGFnZ2N0dGFh
Y2djY2FnZ2FjdGcgMjIxDQogICAgICAgICAgICAgICB8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHwNClNiamN0OiAzNTAzNTgwIGNnY2NhdHR0Y2NjY2dnY2FnY2dhZ3R0dGN0
dGFjdGdnY2dndGdhdHR0YWdnY3R0YWFjZ2NjYWdnYWN0ZyAzNTAzNTIxDQoN
CiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIA0KUXVlcnk6IDIyMiAg
ICAgYWF0Y2FhY2NnY2djZ3R0YWNjY2FnY2d0Z3RnZ2FhY2FnZ3RnY3RnZ2N0
Z2F0Z2NhdHR0Y2FjZ2NhY2FnIDI4MQ0KICAgICAgICAgICAgICAgfHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8DQpTYmpjdDogMzUwMzUyMCBhYXRjYWFjY2djZ2NndHRhY2Nj
YWdjZ3RndGdnYWFjYWdndGdjdGdnY3RnYXRnY2F0dHRjYWNnY2FjYWcgMzUw
MzQ2MQ0KDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICANClF1ZXJ5
OiAyODIgICAgIGdjdGdjZ2djZ2N0Z2d0Z2NhZ2dnY2djZ2dnZ2FjdGdnY2dj
Z2F0dGNnY2djY2dnZ2N0Z2djZ2djdHR0ZyAzNDENCiAgICAgICAgICAgICAg
IHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fA0KU2JqY3Q6IDM1MDM0NjAgZ2N0Z2NnZ2NnY3Rn
Z3RnY2FnZ2djZ2NnZ2dnYWN0Z2djZ2NnYXR0Y2djZ2NjZ2dnY3RnZ2NnZ2N0
dHRnIDM1MDM0MDENCg0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
DQpRdWVyeTogMzQyICAgICBjdGNhYWFjY2dnZ2djYWdjZ3RjdHRjdGdndGdj
YXRnYWNnY2djY3RndHR0YWNjY2dhY2dhY2FjZ2dndHQgNDAxDQogICAgICAg
ICAgICAgICB8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHwNClNiamN0OiAzNTAzNDAwIGN0Y2Fh
YWNjZ2dnZ2NhZ2NndGN0dGN0Z2d0Z2NhdGdhY2djZ2NjdGd0dHRhY2NjZ2Fj
Z2FjYWNnZ2d0dCAzNTAzMzQxDQoNCiAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgIA0KUXVlcnk6IDQwMiAgICAgYXR0YXR0Z2FnY2FnYXRnZ2dnY3Rn
YWNnY3R0YXR0YWN0Z3R0Z2F0dHRjYWF0Z2FjY3RndGNnZ2NhY3RnIDQ2MQ0K
ICAgICAgICAgICAgICAgfHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8DQpTYmpjdDogMzUwMzM0
MCBhdHRhdHRnYWdjYWdhdGdnZ2djdGdhY2djdHRhdHRhY3RndHRnYXR0dGNh
YXRnYWNjdGd0Y2dnY2FjdGcgMzUwMzI4MQ0KDQogICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICANClF1ZXJ5OiA0NjIgICAgIGFhZ2NhZ2d0Y2d0Y2dh
Y2dhYWdjYWNhYWNjZ2dhdGdjZ2djZ2N0Z2d0Z2NhZ2NhdGFjZ2NnY2NhZ2Nh
ZyA1MjENCiAgICAgICAgICAgICAgIHx8fHx8fHx8fHx8fHx8fHx8ICAgfHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fA0KU2JqY3Q6
IDM1MDMyODAgYWFnY2FnZ3RjZ3RjZ2FjZ2FnY2FhY2FhY2NnZ2F0Z2NnZ2Nn
Y3RnZ3RnY2FnY2F0YWNnY2djY2FnY2FnIDM1MDMyMjENCg0KICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgICAgICAgICAgDQpRdWVyeTogNTIyICAgICBjY2djYWdn
YWNhZ2N0YWNndGdjdGdnY2FnYXRndGdjdGdnY2FhY2d0dGdjZ2NnY2dnY2Fn
Z3RndHQtY2EgNTgwDQogICAgICAgICAgICAgICB8fHx8fHx8fHx8fHx8fHx8
fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHwgfHwN
ClNiamN0OiAzNTAzMjIwIGNjZ2NhZ2dhY2FnY3RhY2d0Z2N0Z2djYWdhdGd0
Z2N0Z2djYWFjZ3R0Z2NnY2djZ2djYWdndGd0dGNjYSAzNTAzMTYxDQoNCiAg
ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgDQpRdWVyeTogNTgxICAgICBnY2ctdGFhY2NnYXRnYWMtYWN0YXRnY25n
dGdhdGdhYWdndCA2MTMNCiAgICAgICAgICAgICAgIHx8fCB8fHx8fHx8fHx8
fCB8fHx8fHx8IHx8fHx8fHx8fHx8DQpTYmpjdDogMzUwMzE2MCBnY2d0dGFh
Y2NnYXRnYWNhYWN0YXRnY2dndGdhdGdhYWdndCAzNTAzMTI2DQoNCg0KICBE
YXRhYmFzZTogZWNvbGkubmENCiAgICBQb3N0ZWQgZGF0ZTogIEF1ZyAyMSwg
MjAwMSAgMjo1MCBQTQ0KICBOdW1iZXIgb2YgbGV0dGVycyBpbiBkYXRhYmFz
ZTogNCw2MzksMjIxDQogIE51bWJlciBvZiBzZXF1ZW5jZXMgaW4gZGF0YWJh
c2U6ICAxDQogIA0KTGFtYmRhICAgICBLICAgICAgSA0KICAgIDEuMzcgICAg
MC43MTEgICAgIDEuMzEgDQoNCkdhcHBlZA0KTGFtYmRhICAgICBLICAgICAg
SA0KICAgIDEuMzcgICAgMC43MTEgICAgIDEuMzEgDQoNCg0KTWF0cml4OiBi
bGFzdG4gbWF0cml4OjEgLTMNCkdhcCBQZW5hbHRpZXM6IEV4aXN0ZW5jZTog
NSwgRXh0ZW5zaW9uOiAyDQpOdW1iZXIgb2YgSGl0cyB0byBEQjogMjkxNQ0K
TnVtYmVyIG9mIFNlcXVlbmNlczogMQ0KTnVtYmVyIG9mIGV4dGVuc2lvbnM6
IDI5MTUNCk51bWJlciBvZiBzdWNjZXNzZnVsIGV4dGVuc2lvbnM6IDMNCk51
bWJlciBvZiBzZXF1ZW5jZXMgYmV0dGVyIHRoYW4gMS4wZS0wNTogMQ0KbGVu
Z3RoIG9mIHF1ZXJ5OiA2MTMNCmxlbmd0aCBvZiBkYXRhYmFzZTogNCw2Mzks
MjIxDQplZmZlY3RpdmUgSFNQIGxlbmd0aDogMTYNCmVmZmVjdGl2ZSBsZW5n
dGggb2YgcXVlcnk6IDU5Nw0KZWZmZWN0aXZlIGxlbmd0aCBvZiBkYXRhYmFz
ZTogNCw2MzksMjA1DQplZmZlY3RpdmUgc2VhcmNoIHNwYWNlOiAyNzY5NjA1
Mzg1DQplZmZlY3RpdmUgc2VhcmNoIHNwYWNlIHVzZWQ6IDI3Njk2MDUzODUN
ClQ6IDANCkE6IDQwDQpYMTogNiAoMTEuOSBiaXRzKQ0KWDI6IDE1ICgyOS43
IGJpdHMpDQpTMTogMTIgKDI0LjMgYml0cykNClMyOiAyNCAoNDguMSBiaXRz
KQ0K
--1537571378-215628967-999667291=:15741--