[Bioperl-l] How do you create a Bio::Align::AlignI object?
Wes Barris
wes.barris at csiro.au
Fri Oct 31 00:58:43 EST 2003
Hi,
I need to convert the contents of an ACE file into a series of MSF files
(one for each contig). Since BioPerl does not offer a straight-forward
conversion process for this task, I have to manually read each contig
section of the ACE file and manually create the structures for an
AlignI object. The only way I have found to create an AlignI object
was by opening an input stream to an existing file. As a result, I
had to write the consensus to a file just so that I could open it again
and gain access to an AlignI object.
Here is my code. Is there a slicker way to do this?
acetomsf.pl:
#!/usr/local/bin/perl -w
#
#
use strict;
use Bio::Assembly::IO;
use Bio::AlignIO;
use Bio::SeqIO;
#
my $usage = "Usage: $0 <infile.ace>\n";
my $infile = shift or die $usage;
my $prefix = 'btcn';
my $io = new Bio::Assembly::IO(-file=>$infile, -format=>'ace');
my $assembly = $io->next_assembly;
foreach my $contig ($assembly->all_contigs()) {
my $contigName = $prefix.$contig->id;
# $contig->match(); # not yet implemented
#
# Write the consensus to a file.
#
my $consensusSeq = new Bio::Seq(
-seq=>$contig->get_consensus_sequence->seq,
-id=>$prefix.$contig->id);
my $seqout = new Bio::SeqIO(-file=>">$contigName.fa", -format=>'fasta');
$seqout->write_seq($consensusSeq);
#
# Snarf the consensus back in so that we can create a Bio::Align::AlignI object.
#
my $instream = new Bio::AlignIO(-format=>'fasta', -file=>"$contigName.fa");
my $outstream = new Bio::AlignIO(-format=>'msf', -file=>">$contigName.msf");
my $aln = $instream->next_aln();
#
# Loop through each sequence in the contig adding it to the new alignment.
#
foreach my $seq ($contig->each_seq) {
#
# Some of the ACE starting locations are negative.
#
if ($contig->get_seq_coord($seq)->start <= 0) {
my $offset = -$contig->get_seq_coord($seq)->start + 2;
$seq->seq($seq->subseq($offset,$seq->length));
print($seq->primary_id," was clipped at the beginning by $offset\n");
}
#
# Pad each sequence so it aligns with the consensus.
#
my $before = $contig->get_seq_coord($seq)->start - 1;
my $after = $contig->get_consensus_length - $contig->get_seq_coord($seq)->end;
my $alignedSequence = '-' x $before . $seq->seq . '-' x $after;
#
# Some of the ACE ending locations go beyond the consensus.
#
if (length($alignedSequence) > $contig->get_consensus_length) {
$alignedSequence = substr($alignedSequence, 1 ,$contig->get_consensus_length);
print($seq->primary_id," was clipped at the end\n");
}
$seq->seq($alignedSequence);
$aln->add_seq($seq);
}
$aln->set_displayname_flat;
$outstream->write_aln($aln);
undef $outstream;
}
--
Wes Barris
E-Mail: Wes.Barris at csiro.au
More information about the Bioperl-l
mailing list