[Bioperl-l] How do you create a Bio::Align::AlignI object?

Wes Barris wes.barris at csiro.au
Fri Oct 31 00:58:43 EST 2003


Hi,

I need to convert the contents of an ACE file into a series of MSF files
(one for each contig).  Since BioPerl does not offer a straight-forward
conversion process for this task, I have to manually read each contig
section of the ACE file and manually create the structures for an
AlignI object.  The only way I have found to create an AlignI object
was by opening an input stream to an existing file.  As a result, I
had to write the consensus to a file just so that I could open it again
and gain access to an AlignI object.

Here is my code.  Is there a slicker way to do this?

acetomsf.pl:

#!/usr/local/bin/perl -w
#
#
use strict;
use Bio::Assembly::IO;
use Bio::AlignIO;
use Bio::SeqIO;
#
my $usage = "Usage: $0 <infile.ace>\n";
my $infile = shift or die $usage;
my $prefix = 'btcn';
my $io = new Bio::Assembly::IO(-file=>$infile, -format=>'ace');
my $assembly = $io->next_assembly;

foreach my $contig ($assembly->all_contigs()) {
    my $contigName = $prefix.$contig->id;
#  $contig->match();    # not yet implemented
#
# Write the consensus to a file.
#
    my $consensusSeq = new Bio::Seq(
                         -seq=>$contig->get_consensus_sequence->seq,
                         -id=>$prefix.$contig->id);
    my $seqout = new Bio::SeqIO(-file=>">$contigName.fa", -format=>'fasta');
    $seqout->write_seq($consensusSeq);
#
# Snarf the consensus back in so that we can create a Bio::Align::AlignI object.
#
    my $instream  = new Bio::AlignIO(-format=>'fasta', -file=>"$contigName.fa");
    my $outstream = new Bio::AlignIO(-format=>'msf', -file=>">$contigName.msf");
    my $aln = $instream->next_aln();
#
# Loop through each sequence in the contig adding it to the new alignment.
#
    foreach my $seq ($contig->each_seq) {
#
# Some of the ACE starting locations are negative.
#
       if ($contig->get_seq_coord($seq)->start <= 0) {
          my $offset = -$contig->get_seq_coord($seq)->start + 2;
          $seq->seq($seq->subseq($offset,$seq->length));
          print($seq->primary_id," was clipped at the beginning by $offset\n");
          }
#
# Pad each sequence so it aligns with the consensus.
#
       my $before = $contig->get_seq_coord($seq)->start - 1;
       my $after = $contig->get_consensus_length - $contig->get_seq_coord($seq)->end;
       my $alignedSequence = '-' x $before . $seq->seq . '-' x $after;
#
# Some of the ACE ending locations go beyond the consensus.
#
       if (length($alignedSequence) > $contig->get_consensus_length) {
          $alignedSequence = substr($alignedSequence, 1 ,$contig->get_consensus_length);
          print($seq->primary_id," was clipped at the end\n");
          }
       $seq->seq($alignedSequence);
       $aln->add_seq($seq);
       }
    $aln->set_displayname_flat;
    $outstream->write_aln($aln);
    undef $outstream;
    }


-- 
Wes Barris
E-Mail: Wes.Barris at csiro.au




More information about the Bioperl-l mailing list