[Bioperl-l] counting gaps in a column of alignment
subha kalyanamoorthy
sksweety24 at gmail.com
Thu Jul 25 04:37:02 UTC 2013
Hi there,
I am a new bioperl user. I made a script to count the nucleotide
composition in each column of the alignment. I am able to get the count for
the nucleotides, but not for the gap characters from the following program.
I would greatly appreciate any suggestions to correct this program.
Thanks.
#***************************My Program**********************************
#!/bin/perl -w
use strict;
use warnings;
use List::Util 'max';
use Bio::SimpleAlign;
use Bio::Align::AlignI;
use Bio::AlignIO;
use Bio::SeqIO;
my $in= Bio::AlignIO->new( -file => "seq.fst", -format => "fasta");
my $align = $in->next_aln();
print "column\tA's\tT's\C's\G's\n";
for (my $i = 1; $i <= $align->length; $i++) {
my %count;
my $seqs = $align->slice($i,$i);
my $gap_char = $seqs->gap_char();
my $count_A=0;
my $count_C=0;
my $count_T=0;
my $count_G=0;
my $count_N=0;
my $count_gap=0;
foreach my $seq ($seqs->each_seq) {
my $col=$seq->seq;
if ($col eq 'A'){
$count_A++;
}elsif ($col eq 'C'){
$count_C++;
}elsif ($col eq 'T'){
$count_T++;
}elsif ($col eq 'G'){
$count_G++;
}elsif ( $col eq 'N'){
$count_N++;
}elsif ($col =~ m/^\Q$gap_char\E$/){
$count_gap++;
}
$count{$seq->seq} += 1;
}
print"$i\t$count_A\t$count_T\t$count_C\t$count_G\n";
}
#***********************************************************************`
More information about the Bioperl-l
mailing list