[Bioperl-l] counting gaps in a column of alignment

subha kalyanamoorthy sksweety24 at gmail.com
Thu Jul 25 04:37:02 UTC 2013


Hi there,

I am a new bioperl user.  I made a script to count the nucleotide
composition in each column of the alignment. I am able to get the count for
the nucleotides, but not for the gap characters from the following program.
I would greatly appreciate any suggestions to correct this program.

Thanks.

#***************************My Program**********************************

#!/bin/perl -w
use strict;
use warnings;

use List::Util 'max';
use Bio::SimpleAlign;
use Bio::Align::AlignI;
use Bio::AlignIO;
use Bio::SeqIO;

my $in= Bio::AlignIO->new( -file => "seq.fst", -format => "fasta");

my $align = $in->next_aln();

print "column\tA's\tT's\C's\G's\n";

for (my $i = 1; $i <= $align->length; $i++) {

 my %count;

 my $seqs = $align->slice($i,$i);

my $gap_char = $seqs->gap_char();

    my $count_A=0;
    my $count_C=0;
    my $count_T=0;
    my $count_G=0;
    my $count_N=0;
    my $count_gap=0;

    foreach my $seq ($seqs->each_seq) {

    my $col=$seq->seq;

        if ($col eq 'A'){
        $count_A++;
        }elsif ($col eq 'C'){
        $count_C++;
        }elsif ($col eq 'T'){
        $count_T++;
        }elsif ($col eq 'G'){
        $count_G++;
        }elsif ( $col eq 'N'){
        $count_N++;
        }elsif ($col =~ m/^\Q$gap_char\E$/){
        $count_gap++;
}
      $count{$seq->seq} += 1;
 }

print"$i\t$count_A\t$count_T\t$count_C\t$count_G\n";

}

#***********************************************************************`



More information about the Bioperl-l mailing list