[Bioperl-l] EST Alignment questions
Jamie Hatfield
jamie@genome.arizona.edu
Fri, 1 Nov 2002 13:17:13 -0700
> From: Steven Lembark
> > I should probably just extend it to do what I want, but I'm
> not quite
> > *that* familiar with bioperl and ooperl yet.
>
> What you are trying to do is not all that complicated,
> mainly just deriving a class from the original BioPerl
> object that inserts the gaps (e.g., $myobj->withgaps($seq)
> or something similar).
Something along these lines?? (main changes are in add_seq)
This is the true alignment, so the real sequence and gaps are given and
GappedAlign does the rest.
cons AAGGCCTGTA
est1 GCAA C GTAAC
est2 GCC GTATG
=====================BEGIN GappedAlign.pm==============================
=head1 NAME
GappedAlign - Multiple (gapped) alignments held as a set of sequences
=head1 SYNOPSIS
use Bio::GappedAlign;
use Bio::AlignIO;
use strict;
my $seq;
my $Align = new Bio::GappedAlign();
$seq = new Bio::LocatableSeq( -seq => 'AAGGCCTGTA', -id => 'cons',
-start => 1, -end => 10);
$Align->add_seq(-seq => $seq);
$seq = new Bio::LocatableSeq( -seq => 'GCAACGTAAC', -id => 'est1',
-start => 1, -end => 12);
$Align->add_seq(-seq => $seq, -offset => -2, -gaps => "4 4 4 5");
$seq = new Bio::LocatableSeq( -seq => 'GCCGTATG', -id => 'est2',
-start => 1, -end => 8);
$Align->add_seq(-seq => $seq, -offset => 3, -gaps => "3");
my $out = Bio::AlignIO->new(-fh => \*STDOUT, -format => 'clustalw');
$out->write_aln($Align);
=cut
# Let the code begin...
package Bio::GappedAlign;
use vars qw(@ISA);
use strict;
use Bio::Root::Root;
use Bio::SimpleAlign;
@ISA = qw(Bio::SimpleAlign);
=head2 new
Title : new
Usage : my $aln = new Bio::GappedAlign();
Function : Creates a new simple align object
Returns : Bio::GappedAlign
Args : -source => string representing the source program
where this alignment came from
See L<Bio::SimpleAlign> for more information
=cut
sub new {
my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->{'_far_left'} = 0;
return $self;
}
=head2 add_seq
Title : add_seq
Usage : $myalign->add_seq();
Function : Adds another sequence to the alignment. *Does not* align
it - just adds it to the hashes.
Returns : nothing
Args : -seq => a Bio::LocatableSeq object
-offset => integer offset from "origin"
-gaps => space seperated string of integer positions
where there is are gaps
-order => which row in the alignment to put the seq in
(optional)
See L<Bio::LocatableSeq> for more information
=cut
sub add_seq {
my ($self,@args) = @_;
my ($seq, $offset, $gaps, $order) =
$self->_rearrange([qw(SEQ OFFSET GAPS ORDER)], @args);
my $gap = $self->gap_char;
my $s;
$offset || ($offset = 0);
$gaps || ($gaps = "");
if( !ref $seq || ! $seq->isa('Bio::LocatableSeq') ) {
$self->throw("Unable to process non locatable sequences [",
ref($seq), "]");
}
$s = $seq->seq;
map { substr($s, $_, 0) = $gap; }
sort {$b<=>$a} split(/ /, $gaps);
if ($offset < $self->{'_far_left'}) {
## if we add a seq that is farther left than everybody else,
## we need to add gaps to other sequences at their start
## if you always start adding with the leftmost sequence, you
## don't have to worry about this additional overhead.
my $odiff = $self->{'_far_left'} - $offset;
my $other;
foreach $other ( $self->each_seq() ) {
$other->seq(($gap x $odiff) . $other->seq);
}
$self->{'_far_left'} = $offset;
}
elsif ($offset > $self->{'_far_left'}) {
## we've already seen a sequence farther left than this, so pad
this one
my $odiff = $offset - $self->{'_far_left'};
$s = ($gap x $odiff) . $s;
}
$seq->seq($s);
## let SimpleAlign do the rest of the work
$self->SUPER::add_seq($seq, $order);
}
1;
======================END GappedAlign.pm===============================
----------------------------------------------------------------------
Jamie Hatfield Room 541H, Marley Building
Systems Programmer University of Arizona
Arizona Genomics Computational Tucson, AZ 85721
Laboratory (AGCoL) (520) 626-9598