[Bioperl-l] RandomSeqIO

Jean-Marc Frigerio Frigerio at pierroton.inra.fr
Tue Oct 19 07:18:18 EDT 2004


Hi,

 I wrote and use the following module, which inherits from Bio::SeqIO and adds 
2 methods : get_by_id, and first_seq.

The idea is to index the sequence file 'in memory' to gain random access to 
the sequences.

I wonder if  it could be of any use for other bioperl users.

			--- Jean-Marc

# BioPerl module for RandomSeqIO
#
# Cared for by Jean-Marc Frigerio <Frigerio at pierroton.inra.fr>
#       
#
# Copyright Jean-Marc Frigerio
#
# You may distribute this module under the same terms as perl itself
#
# _history
# October 18, 2004  
# POD documentation - main docs before the code

=head1 NAME

RandomSeqIO inherits from Bio::SeqIO

=head1 SYNOPSIS

	use RandomSeqIO;
	my $file = shift;
	my $fh = IO::File->new($file) or die "open failed: $!\n";

	my $seqio  = RandomSeqIO->new('	-format' => 'fasta', -fh => $fh);

=head1 DESCRIPTION

	RandomSeqIO adds the get_by_id and first_seq methods to Bio::SeqIO

=cut

package RandomSeqIO;


use strict;
use vars qw(@ISA);
use Bio::SeqIO;

@ISA = qw(Bio::SeqIO);


=head1 OVERRIDEN METHOD


=head1 next_seq

	Title		: next_seq
	Usage		: $seqio->RandomSeqIO::next_seq;
	Args		: none
	Returns		: A Bio::Seq

=cut
sub next_seq
{
	my $self = shift;
	my $pos = $self->RandomSeqIO::_position;
	my $next = $self->next_seq;
	if ($next)
	{
		$self->RandomSeqIO::_push($next->id,$pos);
		return $next;
	}
	
}

=head1 NEW METHODS

=head1 first_seq

	Title		: first_seq
	Usage		: $seqio->RandomSeqIO::first_seq;
	Functions	: "Rewind" the filehandle to the beginning of the file
	Args		: none
	Returns		: A Bio::Seq

=cut
sub first_seq
{
	my $self = shift;
	seek($self->_fh,0,0);
	return $self->RandomSeqIO::next_seq;
}

=head1 get_by_id

	Title		: get_by_id
	Usage		: $seqio->RandomSeqIO::get_by_id('077F05');
	Args		: EST id
	Returns		: A Bio::Seq

=cut
sub get_by_id
{
	my $self = shift;
	my $id = shift;
	if (defined $self->{LIST}{$id} )
	{	
		$self->RandomSeqIO::_position($id);
		return $self->next_seq;
	}
	else
	{
		while (my $seq = $self->RandomSeqIO::next_seq)
		{
			return $seq if $seq->id eq $id;
		}
	}
}


=head1 PRIVATE METHODS
	
=head1 _position

	Title		: _position
	Usage		: $self->RandomSeqIO::_position();
	Function	: Set/get the position in the file
	Example		: $self->RandomSeqIO::_position($id);
	Args		: id (optional)
	Returns		: the current position in the file

=cut

sub _position
{
	my $self = shift;
	my $fh = $self->_fh;
	if (@_)
   { 
		my $id = shift;
		seek($fh,$self->{LIST}{$id},0);
	}
	return tell $fh;
}

=head1 _push

	Title		: _push
	Usage		: $self->RandomSeqIO::_push;
	Function	: Stores id and position in the  hash $self->{LIST}
	Example		: $self->RandomSeqIO::_push($next->id,$pos);
	Args		: none
	Returns		: none

=cut

sub _push
{
	my $self = shift;
	my ($id,$pos) =  @_;
	$self->{LIST}{$id} = $pos;
}

=head1 _pop_all

	Title		: _pop_all
	Usage		: $self->RandomSeqIO::_pop_all;
	Function	: print the content of $self->{LIST}
	Example		: $self->RandomSeqIO::__pop_all;
	Args		: none
	Returns		: none
	
=cut
sub _pop_all
{
	my $self = shift;
	for (keys %{$self->{LIST}})
	{
		print "$_ : $self->{LIST}{$_}\n";
	}
}
1;



More information about the Bioperl-l mailing list