[Bioperl-l] Problem with paml+codeml
subarna thakur
bubli_thakur at rediffmail.com
Wed Apr 20 08:38:07 UTC 2011
HiI am using the bioperl script to calculate the ka/ks ratio for a no. of sequence using PAML. As the script runs, a tmp file is generated and mlc file is also generated in tmp folder. I have checked the mlc file and it seems to be ok. But the align_output file comes as-SEQ1 SEQ2 Ka Ks Ka/Ks PROT_PERCENTIDThe ouput file is blank without any ka-ks value.The script I am using is follows-------------------------------------------------------
#!/usr/bin/perl -w
use strict;
use Bio::Tools::Run::Phylo::PAML::Codeml;
use Bio::Tools::Run::Alignment::Clustalw;
# for projecting alignments from protein to R/DNA space
use Bio::Align::Utilities qw(aa_to_dna_aln);
# for input of the sequence data
use Bio::SeqIO;
use Bio::AlignIO;
BEGIN { $ENV{CLUSTALDIR} = '/usr/local/bin' }
BEGIN { $ENV{PAMLDIR} = '/root/Desktop/paml44/bin' }
BEGIN { $ENV{TMPDIR} = '/tmp' }
my $aln_factory = Bio::Tools::Run::Alignment::Clustalw->new;
my $seqdata = shift || 'nt.fasta';
my $seqio = new Bio::SeqIO(-file => $seqdata,
-format => 'fasta');
my %seqs;
my @prots;
# process each sequence
while ( my $seq = $seqio->next_seq ) {
$seqs{$seq->display_id} = $seq;
# translate them into protein
my $protein = $seq->translate();
my $pseq = $protein->seq();
if( $pseq =~ /\*/ &&
$pseq !~ /\*$/ ) {
warn("provided a CDS sequence with a stop codon, PAML will choke!");
exit(0);
}
# Tcoffee can't handle '*' even if it is trailing
$pseq =~ s/\*//g;
$protein->seq($pseq);
push @prots, $protein;
}
if( @prots < 2 ) {
warn("Need at least 2 CDS sequences to proceed");
exit(0);
}
open(OUT, ">align_output.txt") || die("cannot open output align_output for writing");
# Align the sequences with clustalw
my $aa_aln = $aln_factory->align(\@prots);
# project the protein alignment back to CDS coordinates
my $dna_aln = aa_to_dna_aln($aa_aln, \%seqs);
my @each = $dna_aln->each_seq();
my $kaks_factory = Bio::Tools::Run::Phylo::PAML::Codeml->new(-save_tempfiles => 1);
( -params => { 'runmode' => -2,
'seqtype' => 1,
} );
# set the alignment object
$kaks_factory->alignment($dna_aln);
my ($rc,$parser) = $kaks_factory->run();
my $result = $parser->next_result();
my $MLmatrix = $result->get_MLmatrix();
my @otus = $result->get_seqs();
my @pos = map {
my $c= 1;
foreach my $s ( @each ) {
last if( $s->display_id eq $_->display_id );
$c++;
}
$c;
} @otus;
print OUT join("\t", qw(SEQ1 SEQ2 Ka Ks Ka/Ks PROT_PERCENTID CDNA_PERCENTID)), "\n";
for( my $i = 0; $i < (scalar @otus -1) ; $i++) {
for( my $j = $i+1; $j < (scalar @otus); $j++ ) {
my $sub_aa_aln = $aa_aln->select_noncont($pos[$i],$pos[$j]);
my $sub_dna_aln = $dna_aln->select_noncont($pos[$i],$pos[$j]);
print OUT join("\t",
$otus[$i]->display_id,
$otus[$j]->display_id,$MLmatrix->[$i]->[$j]->{'dN'},
$MLmatrix->[$i]->[$j]->{'dS'},
$MLmatrix->[$i]->[$j]->{'omega'},
sprintf("%.2f",$sub_aa_aln->percentage_identity),
sprintf("%.2f",$sub_dna_aln->percentage_identity),
), "\n";
}
}
-----------------------------------------
Can anybody please suggest what is wrong with the script? I think something is wrong in the end section particularly the counter. RegardsSubarna
More information about the Bioperl-l
mailing list