[Bioperl-l] counting gaps in a column of alignment
    subha kalyanamoorthy 
    sksweety24 at gmail.com
       
    Thu Jul 25 04:37:02 UTC 2013
    
    
  
Hi there,
I am a new bioperl user.  I made a script to count the nucleotide
composition in each column of the alignment. I am able to get the count for
the nucleotides, but not for the gap characters from the following program.
I would greatly appreciate any suggestions to correct this program.
Thanks.
#***************************My Program**********************************
#!/bin/perl -w
use strict;
use warnings;
use List::Util 'max';
use Bio::SimpleAlign;
use Bio::Align::AlignI;
use Bio::AlignIO;
use Bio::SeqIO;
my $in= Bio::AlignIO->new( -file => "seq.fst", -format => "fasta");
my $align = $in->next_aln();
print "column\tA's\tT's\C's\G's\n";
for (my $i = 1; $i <= $align->length; $i++) {
 my %count;
 my $seqs = $align->slice($i,$i);
my $gap_char = $seqs->gap_char();
    my $count_A=0;
    my $count_C=0;
    my $count_T=0;
    my $count_G=0;
    my $count_N=0;
    my $count_gap=0;
    foreach my $seq ($seqs->each_seq) {
    my $col=$seq->seq;
        if ($col eq 'A'){
        $count_A++;
        }elsif ($col eq 'C'){
        $count_C++;
        }elsif ($col eq 'T'){
        $count_T++;
        }elsif ($col eq 'G'){
        $count_G++;
        }elsif ( $col eq 'N'){
        $count_N++;
        }elsif ($col =~ m/^\Q$gap_char\E$/){
        $count_gap++;
}
      $count{$seq->seq} += 1;
 }
print"$i\t$count_A\t$count_T\t$count_C\t$count_G\n";
}
#***********************************************************************`
    
    
More information about the Bioperl-l
mailing list