[Bioperl-l] EST Alignment questions

Aaron J Mackey Aaron J. Mackey" <amackey@virginia.edu
Fri, 1 Nov 2002 19:17:26 -0500 (EST)


Methinks you should investigate Bio::Seq::EncodedSeq, which is a
LocatableSeq that has the ability to handled encoded gaps ...

-Aaron

On Fri, 1 Nov 2002, Jamie Hatfield wrote:

> > From: Steven Lembark
> > > I should probably just extend it to do what I want, but I'm
> > not quite
> > > *that* familiar with bioperl and ooperl yet.
> >
> > What you are trying to do is not all that complicated,
> > mainly just deriving a class from the original BioPerl
> > object that inserts the gaps (e.g., $myobj->withgaps($seq)
> > or something similar).
>
> Something along these lines??  (main changes are in add_seq)
>
> This is the true alignment, so the real sequence and gaps are given and
> GappedAlign does the rest.
>
> cons      AAGGCCTGTA
> est1    GCAA   C GTAAC
> est2         GCC GTATG
>
> =====================BEGIN GappedAlign.pm==============================
> =head1 NAME
>
> GappedAlign - Multiple (gapped) alignments held as a set of sequences
>
> =head1 SYNOPSIS
>
>   use Bio::GappedAlign;
>   use Bio::AlignIO;
>   use strict;
>
>   my $seq;
>   my $Align = new Bio::GappedAlign();
>
>   $seq = new Bio::LocatableSeq( -seq => 'AAGGCCTGTA', -id => 'cons',
>                                 -start => 1, -end => 10);
>   $Align->add_seq(-seq => $seq);
>
>   $seq = new Bio::LocatableSeq( -seq => 'GCAACGTAAC', -id => 'est1',
>                                 -start => 1, -end => 12);
>   $Align->add_seq(-seq => $seq, -offset => -2, -gaps => "4 4 4 5");
>
>   $seq = new Bio::LocatableSeq( -seq => 'GCCGTATG', -id => 'est2',
>                                 -start => 1, -end => 8);
>   $Align->add_seq(-seq => $seq, -offset => 3,  -gaps => "3");
>
>   my $out   = Bio::AlignIO->new(-fh => \*STDOUT, -format => 'clustalw');
>   $out->write_aln($Align);
>
> =cut
>
> # Let the code begin...
>
> package Bio::GappedAlign;
> use vars qw(@ISA);
> use strict;
>
> use Bio::Root::Root;
> use Bio::SimpleAlign;
>
> @ISA = qw(Bio::SimpleAlign);
>
> =head2 new
>
>  Title     : new
>  Usage     : my $aln = new Bio::GappedAlign();
>  Function  : Creates a new simple align object
>  Returns   : Bio::GappedAlign
>  Args      : -source => string representing the source program
>                         where this alignment came from
>
> See L<Bio::SimpleAlign> for more information
>
> =cut
>
> sub new {
>   my($class,@args) = @_;
>   my $self = $class->SUPER::new(@args);
>
>   $self->{'_far_left'} = 0;
>
>   return $self;
> }
>
> =head2 add_seq
>
>  Title     : add_seq
>  Usage     : $myalign->add_seq();
>  Function  : Adds another sequence to the alignment. *Does not* align
>              it - just adds it to the hashes.
>  Returns   : nothing
>  Args      : -seq    => a Bio::LocatableSeq object
>              -offset => integer offset from "origin"
> 	     -gaps   => space seperated string of integer positions
> 	       where there is are gaps
>              -order  => which row in the alignment to put the seq in
> (optional)
>
> See L<Bio::LocatableSeq> for more information
>
> =cut
>
> sub add_seq {
>     my ($self,@args) = @_;
>     my ($seq, $offset, $gaps, $order) =
>          $self->_rearrange([qw(SEQ OFFSET GAPS ORDER)], @args);
>     my $gap = $self->gap_char;
>     my $s;
>
>     $offset || ($offset = 0);
>     $gaps   || ($gaps   = "");
>
>     if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
> 	$self->throw("Unable to process non locatable sequences [",
> ref($seq), "]");
>     }
>
>     $s = $seq->seq;
>     map { substr($s, $_, 0) = $gap; }
>        sort {$b<=>$a} split(/ /, $gaps);
>
>     if ($offset < $self->{'_far_left'}) {
>       ## if we add a seq that is farther left than everybody else,
>       ##  we need to add gaps to other sequences at their start
>
>       ## if you always start adding with the leftmost sequence, you
>       ## don't have to worry about this additional overhead.
>
>       my $odiff = $self->{'_far_left'} - $offset;
>       my $other;
>
>       foreach $other ( $self->each_seq() ) {
>         $other->seq(($gap x $odiff) . $other->seq);
>       }
>
>       $self->{'_far_left'} = $offset;
>     }
>     elsif ($offset > $self->{'_far_left'}) {
>       ## we've already seen a sequence farther left than this, so pad
> this one
>
>       my $odiff = $offset - $self->{'_far_left'};
>
>       $s = ($gap x $odiff) . $s;
>
>     }
>
>     $seq->seq($s);
>
>     ## let SimpleAlign do the rest of the work
>
>     $self->SUPER::add_seq($seq, $order);
> }
>
> 1;
> ======================END GappedAlign.pm===============================
>
> ----------------------------------------------------------------------
> Jamie Hatfield                              Room 541H, Marley Building
> Systems Programmer                          University of Arizona
> Arizona Genomics Computational              Tucson, AZ  85721
>   Laboratory (AGCoL)                        (520) 626-9598
>
> _______________________________________________
> Bioperl-l mailing list
> Bioperl-l@bioperl.org
> http://bioperl.org/mailman/listinfo/bioperl-l
>

-- 
 Aaron J Mackey
 Pearson Laboratory
 University of Virginia
 (434) 924-2821
 amackey@virginia.edu