[Bioperl-l] 65% speedup for Bio::SeqIO::fasta::next_seq test
Tim Bunce
Tim.Bunce@pobox.com
Mon, 30 Sep 2002 14:39:57 +0100
The appended patch over the latest repository version introduces
caching of valid alphabets into Bio::SeqIO::alphabet. This saves
the overhead of using Bio::PrimarySeq::new to validate an alphabet.
The code path for a typical Bio::SeqIO::fasta::next_seq call is reduced from:
Bio::SeqIO::fasta::next_seq
Bio::Root::IO::_readline
File::Temp::BEGIN
Bio::SeqIO::alphabet
Bio::SeqIO::sequence_factory
Bio::Seq::SeqFastaSpeedFactory::create
Bio::PrimarySeq::_guess_alphabet
Bio::PrimarySeq::seq
Bio::Root::RootI::_rearrange
Bio::PrimarySeq::alphabet
Bio::Seq::alphabet
Bio::Seq::primary_seq
Bio::PrimarySeq::alphabet
Bio::SeqIO::alphabet
Bio::PrimarySeq::new
Bio::Root::Root::BEGIN
Bio::Root::Root::verbose
Bio::Root::RootI::_rearrange
Bio::PrimarySeq::alphabet
Bio::Root::RootI::DESTROY
Bio::Root::Root::_cleanup_methods
UNIVERSAL::isa
Bio::Seq::DESTROY
Bio::Root::RootI::DESTROY
Bio::Root::Root::_cleanup_methods
UNIVERSAL::isa
to just:
Bio::SeqIO::fasta::next_seq
Bio::Root::IO::_readline
File::Temp::BEGIN
Bio::SeqIO::alphabet
Bio::SeqIO::sequence_factory
Bio::Seq::SeqFastaSpeedFactory::create
Bio::PrimarySeq::_guess_alphabet
Bio::PrimarySeq::seq
Bio::Root::RootI::_rearrange
Bio::PrimarySeq::alphabet
Bio::Seq::alphabet
Bio::Seq::primary_seq
Bio::PrimarySeq::alphabet
Bio::SeqIO::alphabet
Bio::Seq::DESTROY
Bio::Root::RootI::DESTROY
Bio::Root::Root::_cleanup_methods
UNIVERSAL::isa
The patch also fine-tunes the verbose and alphabet methods.
Overall the sp_protein.lseg test data is now read 65% faster.
Tim.
p.s. The File::Temp::BEGIN shown by Devel::DProf is actually
Bio::Root::IO::_fh which has reused the CODE address originally
used by File::Temp::BEGIN before it was discarded.
diff -r -du bioperl/core/Bio/Root/Root.pm bioperl-core-timbo/Bio/Root/Root.pm
--- bioperl/core/Bio/Root/Root.pm Mon Sep 30 11:26:36 2002
+++ bioperl-core-timbo/Bio/Root/Root.pm Mon Sep 30 13:26:34 2002
@@ -227,13 +227,13 @@
sub verbose {
my ($self,$value) = @_;
# allow one to set global verbosity flag
- if( $DEBUG ) { return $DEBUG }
+ return $DEBUG if $DEBUG;
+ return $VERBOSITY unless ref $self;
- if(ref($self) && (defined $value || ! defined $self->{'_root_verbose'}) ) {
- $value = 0 unless defined $value;
- $self->{'_root_verbose'} = $value;
- }
- return (ref($self) ? $self->{'_root_verbose'} : $VERBOSITY);
+ if (defined $value || ! defined $self->{'_root_verbose'}) {
+ $self->{'_root_verbose'} = $value || 0;
+ }
+ return $self->{'_root_verbose'};
}
sub _register_for_cleanup {
diff -r -du bioperl/core/Bio/Seq.pm bioperl-core-timbo/Bio/Seq.pm
--- bioperl/core/Bio/Seq.pm Mon Sep 30 11:25:24 2002
+++ bioperl-core-timbo/Bio/Seq.pm Mon Sep 30 13:56:01 2002
@@ -790,10 +790,8 @@
=cut
sub alphabet {
- my ($self,$value) = @_;
- if( defined $value ) {
- return $self->primary_seq->alphabet($value);
- }
+ my $self = shift;
+ return $self->primary_seq->alphabet(shift) if @_ && defined $_[0];
return $self->primary_seq->alphabet();
}
diff -r -du bioperl/core/Bio/SeqIO.pm bioperl-core-timbo/Bio/SeqIO.pm
--- bioperl/core/Bio/SeqIO.pm Mon Aug 12 09:40:04 2002
+++ bioperl-core-timbo/Bio/SeqIO.pm Mon Sep 30 14:05:07 2002
@@ -321,6 +321,8 @@
eval { require Bio::SeqIO::staden::read; };
}
+my %valid_alphabet_cache;
+
=head2 new
Title : new
@@ -483,16 +485,21 @@
my ($self, $value) = @_;
if ( defined $value) {
- # instead of hard-coding the allowed values once more, we check by
- # creating a dummy sequence object
- eval {
- require Bio::PrimarySeq;
- my $seq = Bio::PrimarySeq->new('-alphabet' => $value);
- };
- if($@) {
- $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
+ $value = lc $value;
+ unless ($valid_alphabet_cache{$value}) {
+ # instead of hard-coding the allowed values once more, we check by
+ # creating a dummy sequence object
+ eval {
+ require Bio::PrimarySeq;
+ my $seq = Bio::PrimarySeq->new('-alphabet' => $value);
+
+ };
+ if ($@) {
+ $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
+ }
+ $valid_alphabet_cache{$value} = 1;
}
- $self->{'alphabet'} = "\L$value";
+ $self->{'alphabet'} = $value;
}
return $self->{'alphabet'};
}