[Bioperl-l] Re: [Bioperl-guts-l] Notification: incoming/974
Roger Hall
Roger Hall" <roger@iosea.com
Sat, 9 Jun 2001 19:17:08 -0700
This is a multi-part message in MIME format.
------=_NextPart_000_008C_01C0F118.C6A95870
Content-Type: text/plain;
charset="iso-8859-1"
Content-Transfer-Encoding: 7bit
Can some test this? I added a C float switch to P in HSP.pm for this
case...stupid decision it is.
Please rename/backup your working HSP.pm first.
Thanks!
Roger
----- Original Message -----
From: "Saurabh D. Patel" <patels02@doc.mssm.edu>
To: "Hilmar Lapp" <hlapp@gmx.net>; <bioperl-l@bioperl.org>
Sent: Saturday, June 09, 2001 2:52 PM
Subject: [Bioperl-l] Re: [Bioperl-guts-l] Notification: incoming/974
> your perl one liner prints cool on my system too. I looked a little in
> HSP.pm under the BPlite directory and noticed the lines:
>
> my ($p) = $scoreline =~ /[Sum ]*P[\(\d+\)]* = (\S+)/;
> if (not defined $p) {($p) = $scoreline =~ /Expect = (\S+)/}
>
> compare with HSP.pm under the Blast directory:
>
> $p = "1$p" if defined $p and $p=~ /^e/i;
>
> I think adding this line would catch this case.
>
> Saurabh.
>
> Hilmar Lapp wrote:
>
> > Have you checked what $hsp->P() really returns?
> >
> > perl -e '$x = "e-108"; if($x < 0.01) { print "cool\n"; } else {
> > print "uncool\n"; }'
> >
> > prints "cool" on my system (perl 5.005003). I think this is a
> > known parsing bug, but I'm not sure.
> >
> > Hilmar
> > --
> > -----------------------------------------------------------------
> > Hilmar Lapp email: hilmarl@yahoo.com
> > GNF, San Diego, Ca. 92122 phone: +1 858 812 1757
> > -----------------------------------------------------------------
> _______________________________________________
> Bioperl-l mailing list
> Bioperl-l@bioperl.org
> http://bioperl.org/mailman/listinfo/bioperl-l
------=_NextPart_000_008C_01C0F118.C6A95870
Content-Type: application/octet-stream;
name="HSP.pm"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
filename="HSP.pm"
#########################################################################=
######
# Bio::Tools::BPlite::HSP
#########################################################################=
######
# HSP =3D High Scoring Pair (to all non-experts as I am)
#
# The original BPlite.pm module has been written by Ian Korf !
# see http://sapiens.wustl.edu/~ikorf
#
# You may distribute this module under the same terms as perl itself
package Bio::Tools::BPlite::HSP;
use vars qw(@ISA);
use strict;
# to disable overloading comment this out:
#use overload '""' =3D> '_overload';
# Object preamble - inheriets from Bio::SeqFeature::SimilarityPair
use Bio::SeqFeature::SimilarityPair;
use Bio::SeqFeature::Similarity;
@ISA =3D qw(Bio::SeqFeature::SimilarityPair);
sub new {
my ($class, @args) =3D @_;
# workaround to make sure frame is not set before strand is
# interpreted from query/subject info=20
# this workaround removes the key from the hash
# so the superclass does not try and work with it
# we'll take care of setting it in this module later on
my %newargs =3D @args;
foreach ( keys %newargs ) {
if( /frame$/i ) {
delete $newargs{$_};
}=20
}
# done with workaround
my $self =3D $class->SUPER::new(%newargs);
=20
my ($score,$bits,$match,$positive,$p,$qb,$qe,$sb,$se,$qs,
$ss,$hs,$qname,$sname,$qlength,$slength, $frame) =3D=20
$self->_rearrange([qw(SCORE
BITS
MATCH
POSITIVE
P
QUERYBEGIN
QUERYEND
SBJCTBEGIN
SBJCTEND
QUERYSEQ
SBJCTSEQ
HOMOLOGYSEQ
QUERYNAME
SBJCTNAME =20
QUERYLENGTH
SBJCTLENGTH
FRAME
)],@args);
=20
# Store the aligned query as sequence feature
if ($qe > $qb) { # normal query: start < end
$self->query( Bio::SeqFeature::Similarity->new
(-start=3D>$qb, -end=3D>$qe, -strand=3D>1,=20
-source=3D>"BLAST" ) ) }
else { # reverse query (i dont know if this is possible, but feel =
free to correct)
$self->query( Bio::SeqFeature::Similarity->new
(-start=3D>$qe, -end=3D>$qb, -strand=3D>-1,
-source=3D>"BLAST" ) ) }
# store the aligned subject as sequence feature
if ($se > $sb) { # normal subject
$self->subject( Bio::SeqFeature::Similarity->new
(-start=3D>$sb, -end=3D>$se, -strand=3D>1,
-source=3D>"BLAST" ) ) }
else { # reverse subject: start bigger than end
$self->subject( Bio::SeqFeature::Similarity->new
(-start=3D>$se, -end=3D>$sb, -strand=3D>-1,=20
-source=3D>"BLAST" ) ) }
# name the sequences
$self->query->seqname($qname); # query
$self->subject->seqname($sname); # subject
# set lengths
$self->query->seqlength($qlength); # query
$self->subject->seqlength($slength); # subject
# set object vars
$self->score($score);
$self->bits($bits);
$self->significance($p);
$self->query->frac_identical($match);
$self->subject->frac_identical($match);
$self->{'PERCENT'} =3D int((1000 * $match)/
$self->query->length)/10;
$self->{'POSITIVE'} =3D $positive;
$self->{'QS'} =3D $qs;
$self->{'SS'} =3D $ss;
$self->{'HS'} =3D $hs;
=20
defined $frame && $self->frame($frame);
return $self; # success - we hope!
}
# to disable overloading comment this out:
sub _overload {
my $self =3D shift;
return $self->start."..".$self->end." ".$self->bits;
}
=3Dhead2 P
Title : P
Usage : $hsp->P();
Function : returns the P (significance) value for a HSP=20
Example :=20
Returns : (double) significance value
Args :
=3Dcut
sub P {
my $float =3D shift->significance(@_);
my $match =3D '([+-]?)(?=3D\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?'; # =
Perl Cookbook 2.1
if ($float =3D~ /^$match$/) {
# Is a C float
return $float;
} elsif ("1$float" =3D~ /^$match$/) {
# Almost C float, Jitterbug 974
return "1$float";
# Handle another odd number format
# } elsif ("HACK" =3D~ /^HACKMATCH$/) {
# # Almost C float, Jitterbug X
# return "HACK";
} else {
print "Warning: $float is not a known number format. Returning zero. =
\n";
return 0;
}
}
=3Dhead2 percent
Title : percent
Usage : $hsp->percent();
Function : returns the percent matching=20
Returns : (double) percent matching
Args : none
=3Dcut
sub percent {shift->{'PERCENT'}}
=3Dhead2 match
Title : match
Usage : $hsp->match();
Function : returns the match
Example :=20
Returns : (double) frac_identical=20
Args :
=3Dcut
sub match {shift->query->frac_identical(@_)}
=3Dhead2 positive
Title : positive
Usage : $hsp->positive();
Function : returns the number of positive hits=20
Returns : (int) number of positive residue hits=20
Args : none
=3Dcut
sub positive {shift->{'POSITIVE'}}
=3Dhead2 querySeq
Title : querySeq
Usage : $hsp->querySeq();
Function : returns the query sequence
Returns : (string) the Query Sequence=20
Args : none
=3Dcut
sub querySeq {shift->{'QS'}}
=3Dhead2 sbjctSeq
Title : sbjctSeq
Usage : $hsp->sbjctSeq();
Function : returns the Sbjct sequence=20
Returns : (string) the Sbjct Sequence=20
Args : none
=3Dcut
sub sbjctSeq {shift->{'SS'}}
=3Dhead2 homologySeq
Title : homologySeq
Usage : $hsp->homologySeq();
Function : returns the homologous sequence=20
Returns : (string) homologous sequence=20
Args : none
=3Dcut
sub homologySeq {shift->{'HS'}}
=3Dhead2 qs
Title : qs
Usage : $hsp->qs();
Function : returns the Query Sequence (same as querySeq)
Returns : (string) query Sequence=20
Args : none
=3Dcut
sub qs {shift->{'QS'}}
=3Dhead2 ss
Title : ss
Usage : $hsp->ss();
Function : returns the subject sequence ( same as sbjctSeq)=20
Returns : (string) Sbjct Sequence
Args : none
=3Dcut
sub ss {shift->{'SS'}}
=3Dhead2 hs
Title : hs
Usage : $hsp->hs();
Function : returns the Homologous Sequence (same as homologySeq )=20
Returns : (string) Homologous Sequence
Args : none
=3Dcut
sub hs {shift->{'HS'}}
sub frame {
my ($self, $frame) =3D @_;
if( defined $frame ) {
if( $frame =3D=3D 0 ) {
$frame =3D undef;
} elsif( $frame !~ /^([+-])?([1-3])/ ) { =20
$self->warn("Specifying an invalid frame ($frame)");
$frame =3D undef;
} else {=20
if( ($1 eq '-' && $self->subject->strand >=3D 0) ||
($1 eq '+' && $self->subject->strand <=3D 0) ) {
$self->warn("Frame ($frame) did not match strand of query match (".
$self->subject->strand().")");
}
$frame =3D $2 - 1;
}
}
return $self->SUPER::frame($frame);
}
1;
------=_NextPart_000_008C_01C0F118.C6A95870--