[Bioperl-l] BioPerl and NHX tree

Laurence Amilhat Laurence.Amilhat at toulouse.inra.fr
Tue May 6 09:32:53 UTC 2008


Hello,


I am trying to convert a newick treefile to a NHX file with specie tags 
in order to visualize it with the ATV viewer.
The script is working but I think there is an error because the ATV 
return this error message:
" Failed to read gene tree from 
"BX881913.1.p.om.4.tfa_prot.tfa.taxid.alltree.cons_outtree.rooted.long.nhx" 
[Error in NHX format: More than one distance to parent:"0.0"]"

When comparing the intree and outtree, they seem to be different, for 
example the intree file begins with (((((( and the outtree begins with (((

Do you have an idea of what I am doing wrong?

Here is my code:

use strict;
use Bio::TreeIO;
use Bio::Tree::NodeNHX;
use Getopt::Long;


my $tree_file;
my $outfile;
my $codefile;
my %corresp;

GetOptions('f|file:s' =>\$tree_file, 'o|out:s' =>\$outfile, 'c|code:s' 
=>\$codefile);

# Read the correspondence file
# For each sequence get:
#       - the TAXID
#       - the specie name
#       - the specie name (with no space)
#       - the complete fasta header
open (CODE, "< $codefile");
while (<CODE>)
{
        chomp;
        my($code,$a, $b, $c, $d, $e)=split (/\t/);
        $corresp{$code}{"taxid"}=$b;
        $corresp{$code}{"species"}=$d;
        $corresp{$code}{"header"}=$e;
        $corresp{$code}{"nom"}=$c;
}

my $treeio = new Bio::TreeIO (-format => 'nhx', -file => "$tree_file");
#my $treeout= new Bio::TreeIO (-format => 'nhx', -file =>">$outfile", 
-binary=>"1");
my $treeout= new Bio::TreeIO (-format => 'nhx', -file =>">$outfile");

# Read the tree and change sequence header and add a NHX flag to specify 
the specie
while (my $tree= $treeio->next_tree)
{
        my @nodes=$tree->get_nodes();
        foreach my $nd(@nodes)
        {
                if ($nd->is_Leaf())
                {
                        my $id=$nd->id();
                        print STDOUT "ID $id\n";
                        #add a NHX tag to the node which is the specie name
                        $nd->nhx_tag({S=>$corresp{$id}{"nom"}});
                         #change the sequence code by its complete fasta 
header
                        $id=$corresp{$id}{"header"};
                        $nd->id($id);
                }
        }
        $treeout->write_tree($tree);
}


Here is the infile:

((((((20:3.0,21:3.0):2.0,(((17:3.0,18:3.0):2.0,19:3.0):3.0,(15:3.0,16:3.0):3.0):1.0):2.0,
14:3.0):3.0,22:3.0):3.0,((13:3.0,(11:3.0,(10:3.0,12:3.0):1.0):3.0):3.0,(2:3.0,
1:3.0):3.0):3.0):0.0,((5:3.0,4:3.0):3.0,(3:3.0,((8:3.0,6:3.0):3.0,(9:3.0,7:6.0):3.0):3.0):2.0):3.0);


Here is the output file:

(((lcl|Fam_018802_Contig1_2_TAXID=8022_:3.0[&&NHX:S=Oncorhynchus 
mykiss],BX881913.1.p.om.4_1_1_-_501_TAXID=8022_:3.0[&&NHX:S=Oncorhynchus my
kiss]):3.0[&&NHX],(lcl|Fam_013546_Contig1_PIMPR_6_TAXID=90988_:3.0[&&NHX:S=Pimephales 
promelas],(lcl|ENSDARP00000087648_pep_known_chromosome
_ZFISH7_13_51517919_51522668_-1_gene_ENSDARG00000063670_t:3.0[&&NHX:S=Danio 
rerio],(lcl|ENSDARP00000087661_pep_novel_chromosome_ZFISH7_13_51
517919_51522668_-1_gene_ENSDARG00000063670_t:3.0[&&NHX:S=Danio 
rerio],lcl|ENSDARP00000087654_pep_known_chromosome_ZFISH7_13_51517544_5152273
9_-1_gene_ENSDARG00000063670_t:3.0[&&NHX:S=Danio 
rerio]):1.0[&&NHX]):3.0[&&NHX]):3.0[&&NHX]):3.0[&&NHX],(lcl|Fam_012588_Contig3090_GADMO_2_T
AXID=8049_:3.0[&&NHX:S=Gadus 
morhua],(lcl|GSTENP00018428001_pep_known_chromosome_TETRAODON7_14_8497414_8500061_-1_gene_GSTENG00018428001_t:3
.0[&&NHX:S=Tetraodon 
nigroviridis],((lcl|ENSORLP00000013438_pep_novel_chromosome_MEDAKA1_24_3589482_3594915_-1_gene_ENSORLG00000010721_tr:3.
0[&&NHX:S=Oryzias 
latipes],lcl|ENSGACP00000006915_pep_novel_group_BROADS1_groupXVIII_2150130_2155380_1_gene_ENSGACG00000005224_:3.0[&&NHX:S=
Gasterosteus 
aculeatus]):2.0[&&NHX],((lcl|ENSDARP00000074838_pep_novel_chromosome_ZFISH7_20_12837032_12851267_1_gene_ENSDARG00000011000_tr:3
.0[&&NHX:S=Danio 
rerio],lcl|ENSDARP00000015974_pep_known_chromosome_ZFISH7_20_12836852_12852683_1_gene_ENSDARG00000011000_tr:3.0[&&NHX:S=Dan
io 
rerio]):3.0[&&NHX],(lcl|Contig618_HIPHI_5_TAXID=8267_:3.0[&&NHX:S=Hippoglossus 
hippoglossus],(lcl|Fam_023545_Contig2_2_TAXID=8022_:3.0[&&
NHX:S=Oncorhynchus 
mykiss],lcl|ENSTRUP00000046040_pep_novel_scaffold_FUGU4_scaffold_185_27966_32394_1_gene_ENSTRUG00000017961_t:3.0[&&NHX:S=
Takifugu 
rubripes]):2.0[&&NHX]):3.0[&&NHX]):1.0[&&NHX]):2.0[&&NHX]):3.0[&&NHX]):3.0[&&NHX]):0.0[&&NHX],((lcl|ENSORLP00000013701_pep_novel_ch
romosome_MEDAKA1_15_25438171_25450498_-1_gene_ENSORLG00000010924_:3.0[&&NHX:S=Oryzias 
latipes],lcl|ENSGACP00000007323_pep_novel_group_BROADS
1_groupVI_6476613_6485834_1_gene_ENSGACG00000005527_tra:3.0[&&NHX:S=Gasterosteus 
aculeatus]):3.0[&&NHX],(lcl|GSTENP00030753001_pep_known_chr
omosome_TETRAODON7_17_3400689_3407671_1_gene_GSTENG00030753001_tr:3.0[&&NHX:S=Tetraodon 
nigroviridis],((lcl|ENSTRUP00000035694_pep_novel_sca
ffold_FUGU4_scaffold_125_722763_725332_1_gene_ENSTRUG00000013959:3.0[&&NHX:S=Takifugu 
rubripes],lcl|ENSTRUP00000035693_pep_novel_scaffold_FU
GU4_scaffold_125_722763_725332_1_gene_ENSTRUG00000013959:3.0[&&NHX:S=Takifugu 
rubripes]):3.0[&&NHX],(lcl|ENSTRUP00000035695_pep_novel_scaffo
ld_FUGU4_scaffold_125_722853_725332_1_gene_ENSTRUG00000013959:3.0[&&NHX:S=Takifugu 
rubripes],lcl|ENSTRUP00000035691_pep_novel_scaffold_FUGU4
_scaffold_125_718572_725332_1_gene_ENSTRUG00000013959:6.0[&&NHX:S=Takifugu 
rubripes]):3.0[&&NHX]):3.0[&&NHX]):2.0[&&NHX]):3.0[&&NHX];






-- 
====================================================================
= Laurence Amilhat    INRA Toulouse 31326 Castanet-Tolosan     	   = 
= Tel: 33 5 61 28 57 08   Email: laurence.amilhat at toulouse.inra.fr =
====================================================================






More information about the Bioperl-l mailing list