[Bioperl-l] BioPerl and NHX tree
Laurence Amilhat
Laurence.Amilhat at toulouse.inra.fr
Tue May 6 09:32:53 UTC 2008
Hello,
I am trying to convert a newick treefile to a NHX file with specie tags
in order to visualize it with the ATV viewer.
The script is working but I think there is an error because the ATV
return this error message:
" Failed to read gene tree from
"BX881913.1.p.om.4.tfa_prot.tfa.taxid.alltree.cons_outtree.rooted.long.nhx"
[Error in NHX format: More than one distance to parent:"0.0"]"
When comparing the intree and outtree, they seem to be different, for
example the intree file begins with (((((( and the outtree begins with (((
Do you have an idea of what I am doing wrong?
Here is my code:
use strict;
use Bio::TreeIO;
use Bio::Tree::NodeNHX;
use Getopt::Long;
my $tree_file;
my $outfile;
my $codefile;
my %corresp;
GetOptions('f|file:s' =>\$tree_file, 'o|out:s' =>\$outfile, 'c|code:s'
=>\$codefile);
# Read the correspondence file
# For each sequence get:
# - the TAXID
# - the specie name
# - the specie name (with no space)
# - the complete fasta header
open (CODE, "< $codefile");
while (<CODE>)
{
chomp;
my($code,$a, $b, $c, $d, $e)=split (/\t/);
$corresp{$code}{"taxid"}=$b;
$corresp{$code}{"species"}=$d;
$corresp{$code}{"header"}=$e;
$corresp{$code}{"nom"}=$c;
}
my $treeio = new Bio::TreeIO (-format => 'nhx', -file => "$tree_file");
#my $treeout= new Bio::TreeIO (-format => 'nhx', -file =>">$outfile",
-binary=>"1");
my $treeout= new Bio::TreeIO (-format => 'nhx', -file =>">$outfile");
# Read the tree and change sequence header and add a NHX flag to specify
the specie
while (my $tree= $treeio->next_tree)
{
my @nodes=$tree->get_nodes();
foreach my $nd(@nodes)
{
if ($nd->is_Leaf())
{
my $id=$nd->id();
print STDOUT "ID $id\n";
#add a NHX tag to the node which is the specie name
$nd->nhx_tag({S=>$corresp{$id}{"nom"}});
#change the sequence code by its complete fasta
header
$id=$corresp{$id}{"header"};
$nd->id($id);
}
}
$treeout->write_tree($tree);
}
Here is the infile:
((((((20:3.0,21:3.0):2.0,(((17:3.0,18:3.0):2.0,19:3.0):3.0,(15:3.0,16:3.0):3.0):1.0):2.0,
14:3.0):3.0,22:3.0):3.0,((13:3.0,(11:3.0,(10:3.0,12:3.0):1.0):3.0):3.0,(2:3.0,
1:3.0):3.0):3.0):0.0,((5:3.0,4:3.0):3.0,(3:3.0,((8:3.0,6:3.0):3.0,(9:3.0,7:6.0):3.0):3.0):2.0):3.0);
Here is the output file:
(((lcl|Fam_018802_Contig1_2_TAXID=8022_:3.0[&&NHX:S=Oncorhynchus
mykiss],BX881913.1.p.om.4_1_1_-_501_TAXID=8022_:3.0[&&NHX:S=Oncorhynchus my
kiss]):3.0[&&NHX],(lcl|Fam_013546_Contig1_PIMPR_6_TAXID=90988_:3.0[&&NHX:S=Pimephales
promelas],(lcl|ENSDARP00000087648_pep_known_chromosome
_ZFISH7_13_51517919_51522668_-1_gene_ENSDARG00000063670_t:3.0[&&NHX:S=Danio
rerio],(lcl|ENSDARP00000087661_pep_novel_chromosome_ZFISH7_13_51
517919_51522668_-1_gene_ENSDARG00000063670_t:3.0[&&NHX:S=Danio
rerio],lcl|ENSDARP00000087654_pep_known_chromosome_ZFISH7_13_51517544_5152273
9_-1_gene_ENSDARG00000063670_t:3.0[&&NHX:S=Danio
rerio]):1.0[&&NHX]):3.0[&&NHX]):3.0[&&NHX]):3.0[&&NHX],(lcl|Fam_012588_Contig3090_GADMO_2_T
AXID=8049_:3.0[&&NHX:S=Gadus
morhua],(lcl|GSTENP00018428001_pep_known_chromosome_TETRAODON7_14_8497414_8500061_-1_gene_GSTENG00018428001_t:3
.0[&&NHX:S=Tetraodon
nigroviridis],((lcl|ENSORLP00000013438_pep_novel_chromosome_MEDAKA1_24_3589482_3594915_-1_gene_ENSORLG00000010721_tr:3.
0[&&NHX:S=Oryzias
latipes],lcl|ENSGACP00000006915_pep_novel_group_BROADS1_groupXVIII_2150130_2155380_1_gene_ENSGACG00000005224_:3.0[&&NHX:S=
Gasterosteus
aculeatus]):2.0[&&NHX],((lcl|ENSDARP00000074838_pep_novel_chromosome_ZFISH7_20_12837032_12851267_1_gene_ENSDARG00000011000_tr:3
.0[&&NHX:S=Danio
rerio],lcl|ENSDARP00000015974_pep_known_chromosome_ZFISH7_20_12836852_12852683_1_gene_ENSDARG00000011000_tr:3.0[&&NHX:S=Dan
io
rerio]):3.0[&&NHX],(lcl|Contig618_HIPHI_5_TAXID=8267_:3.0[&&NHX:S=Hippoglossus
hippoglossus],(lcl|Fam_023545_Contig2_2_TAXID=8022_:3.0[&&
NHX:S=Oncorhynchus
mykiss],lcl|ENSTRUP00000046040_pep_novel_scaffold_FUGU4_scaffold_185_27966_32394_1_gene_ENSTRUG00000017961_t:3.0[&&NHX:S=
Takifugu
rubripes]):2.0[&&NHX]):3.0[&&NHX]):1.0[&&NHX]):2.0[&&NHX]):3.0[&&NHX]):3.0[&&NHX]):0.0[&&NHX],((lcl|ENSORLP00000013701_pep_novel_ch
romosome_MEDAKA1_15_25438171_25450498_-1_gene_ENSORLG00000010924_:3.0[&&NHX:S=Oryzias
latipes],lcl|ENSGACP00000007323_pep_novel_group_BROADS
1_groupVI_6476613_6485834_1_gene_ENSGACG00000005527_tra:3.0[&&NHX:S=Gasterosteus
aculeatus]):3.0[&&NHX],(lcl|GSTENP00030753001_pep_known_chr
omosome_TETRAODON7_17_3400689_3407671_1_gene_GSTENG00030753001_tr:3.0[&&NHX:S=Tetraodon
nigroviridis],((lcl|ENSTRUP00000035694_pep_novel_sca
ffold_FUGU4_scaffold_125_722763_725332_1_gene_ENSTRUG00000013959:3.0[&&NHX:S=Takifugu
rubripes],lcl|ENSTRUP00000035693_pep_novel_scaffold_FU
GU4_scaffold_125_722763_725332_1_gene_ENSTRUG00000013959:3.0[&&NHX:S=Takifugu
rubripes]):3.0[&&NHX],(lcl|ENSTRUP00000035695_pep_novel_scaffo
ld_FUGU4_scaffold_125_722853_725332_1_gene_ENSTRUG00000013959:3.0[&&NHX:S=Takifugu
rubripes],lcl|ENSTRUP00000035691_pep_novel_scaffold_FUGU4
_scaffold_125_718572_725332_1_gene_ENSTRUG00000013959:6.0[&&NHX:S=Takifugu
rubripes]):3.0[&&NHX]):3.0[&&NHX]):2.0[&&NHX]):3.0[&&NHX];
--
====================================================================
= Laurence Amilhat INRA Toulouse 31326 Castanet-Tolosan =
= Tel: 33 5 61 28 57 08 Email: laurence.amilhat at toulouse.inra.fr =
====================================================================
More information about the Bioperl-l
mailing list