[Bioperl-l] Need help for implementing a new TreeIO module
Guillaume Rousse
rousse at ccr.jussieu.fr
Tue Jan 4 08:11:36 EST 2005
Jason Stajich wrote:
> If you want to build a node with two leaves, first you have to start
> with a 'tree' section to tell the handler that this is nested data.
> Start a 'tree' event, build the node (like the section just above), then
> build two leaf nodes (like the leaf node section above), then end the
> 'tree' event. 'tree' is an unfortunate name for the event but don't
> feel like changing it - a throwback from when I thought I'd only need an
> initial 'tree' an just 'node' events.
>
> $self->_eventHandler->start_document;
> $self->_eventHandler->start_element({'Name' => 'tree'});
> # do internal node
> # do leaf node
> # do leaf node
> $self->_eventHandler->end_element({'Name' => 'tree'});
> return $self->_eventHandler->end_document;
OK, done, but I still have an issue with each internal node connecting
two leaves, producing a third intermediate leaf. I don't know if the
problems comes from me or from bioperl. Here is my code, along with a
test script.
I you don't want to install Algorithm::Cluster to test, the input data
is something as:
-1: 5 4 0.000
-2: 7 6 0.000
-3: 10 11 0.010
-4: 2 0 0.090
-5: -3 12 0.095
-6: 1 -4 0.115
-7: -5 9 0.143
-8: -1 3 0.250
-9: -2 -7 0.618
-10: -8 -6 0.639
-11: 8 -10 5.805
-12: -9 -11 28.056
Where the first column is internal node id, the second and third one the
children id for each node, and the fourth one the distance between the
children.
I also patched svggraph to use parameters instead of hard-coded values,
and also to allow some normalisation for the branches lengths, in such a
way that it would be easy to add new normalisation functions, including
arbitrary code. Patch attached too.
--
No flight ever leaves on time unless you are running late and need the
delay to make the flight
-- Murphy's Laws for Frequent Flyers n°1
-------------- next part --------------
#!/usr/bin/perl
use Algorithm::Cluster;
use Bio::TreeIO;
use strict;
my $weight = [ 1,1 ];
my $data = [
[ 1.1, 1.2 ],
[ 1.4, 1.3 ],
[ 1.1, 1.5 ],
[ 2.0, 1.5 ],
[ 1.7, 1.9 ],
[ 1.7, 1.9 ],
[ 5.7, 5.9 ],
[ 5.7, 5.9 ],
[ 3.1, 3.3 ],
[ 5.4, 5.3 ],
[ 5.1, 5.5 ],
[ 5.0, 5.5 ],
[ 5.1, 5.2 ],
];
my $mask = [
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
[ 1, 1 ],
];
my $labels = [ qw/a b c d e f g h i j k l m/ ];
my %params = (
applyscale => 0,
transpose => 0,
method => 'a',
dist => 'e',
data => $data,
mask => $mask,
weight => $weight,
);
my ($result, $linkdist);
my ($i,$j);
($result, $linkdist) = Algorithm::Cluster::treecluster(%params);
$i=0;
foreach(@{$result}) {
printf("%3d: %3d %3d %7.3f\n",-1-$i,$_->[0],$_->[1],$linkdist->[$i]);
++$i;
}
my $in = new Bio::TreeIO(
-format => 'cluster',
-result => $result,
-linkdist => $linkdist,
-labels => $labels,
);
my $out = new Bio::TreeIO(
-format => 'svggraph',
-file => '>output.svg'
);
$out->write_tree($in->next_tree());
-------------- next part --------------
# $Id: nexus.pm,v 1.2 2003/12/06 18:10:26 jason Exp $
#
# BioPerl module for Bio::TreeIO::cluster
#
# Contributed by Guillaume Rousse <Guillaume-dot-Rousse-at-inria-dot-fr>
#
# Copyright INRIA
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
Bio::TreeIO::cluster - A TreeIO driver module for parsing Algorithm::Cluster::treecluster output
=head1 SYNOPSIS
# do not use this module directly
use Bio::TreeIO;
use Algorithm::Cluster::treecluster;
my ($result, $linkdist) = Algorithm::Cluster::treecluster(
distances => $matrix
);
my $treeio = new Bio::TreeIO(
-format => 'cluster',
-result => $result,
-linkdist => $linkdist,
-labels => $labels
);
my $tree = $treeio->next_tree;
=head1 DESCRIPTION
This is a driver module for parsing Algorithm::Cluster::treecluster output.
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list. Your participation is much appreciated.
bioperl-l at bioperl.org - General discussion
http://bioperl.org/MailList.shtml - About the mailing lists
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
of the bugs and their resolution. Bug reports can be submitted via
the web:
http://bugzilla.bioperl.org/
=head1 AUTHOR - Guillaume Rousse
Email Guillaume-dot-Rousse-at-inria-dot-fr
Describe contact details here
=head1 CONTRIBUTORS
Additional contributors names and emails here
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
# Let the code begin...
package Bio::TreeIO::cluster;
use vars qw(@ISA);
use strict;
use Bio::TreeIO;
use Bio::Event::EventGeneratorI;
use IO::String;
@ISA = qw(Bio::TreeIO);
sub _initialize {
my ($self, %args) = @_;
$self->{_result} = $args{'-result'};
$self->{_linkdist} = $args{'-linkdist'};
$self->{_labels} = $args{'-labels'};
$self->SUPER::_initialize(%args);
}
=head2 next_tree
Title : next_tree
Usage : my $tree = $treeio->next_tree
Function: Gets the next tree in the stream
Returns : Bio::Tree::TreeI
Args : none
=cut
sub next_tree {
my ($self) = @_;
$self->_eventHandler->start_document();
# build tree from the root
$self->_eventHandler->start_element({Name => 'tree'});
$self->_recurse(-1, 0);
$self->_recurse(-1, 1);
$self->_eventHandler->end_element({Name => 'tree'});
return $self->_eventHandler->end_document;
}
sub _recurse {
my ($self, $line, $column) = @_;
my $id = $self->{_result}->[$line]->[$column];
if ($id >= 0) {
# leaf
$self->debug("leaf $id\n");
$self->debug("distance $self->{_linkdist}->[$line]\n");
$self->debug("label $self->{_labels}->[$id]\n");
$self->_eventHandler->start_element({Name => 'node'});
$self->_eventHandler->start_element({Name => 'branch_length'});
$self->_eventHandler->characters($self->{_linkdist}->[$line]);
$self->_eventHandler->end_element({Name => 'branch_length'});
$self->_eventHandler->start_element({Name => 'id'});
$self->_eventHandler->characters($self->{_labels}->[$id]);
$self->_eventHandler->end_element({Name => 'id'});
$self->_eventHandler->start_element({Name => 'leaf'});
$self->_eventHandler->characters(1);
$self->_eventHandler->end_element({Name => 'leaf'});
$self->_eventHandler->end_element({Name => 'node'});
} else {
# internal node
$self->debug("internal node $id\n");
$self->debug("distance $self->{_linkdist}->[$line]\n");
$self->_eventHandler->start_element({Name => 'node'});
$self->_eventHandler->start_element({Name => 'branch_length'});
$self->_eventHandler->characters($self->{_linkdist}->[$line]);
$self->_eventHandler->end_element({Name => 'branch_length'});
$self->_eventHandler->start_element({Name => 'leaf'});
$self->_eventHandler->characters(0);
$self->_eventHandler->end_element({Name => 'leaf'});
$self->_eventHandler->start_element({Name => 'tree'});
my $child_id = - ($id + 1);
$self->_recurse($child_id, 0);
$self->_recurse($child_id, 1);
$self->_eventHandler->end_element({Name => 'tree'});
$self->_eventHandler->end_element({Name => 'node'});
}
}
=head2 write_tree
Title : write_tree
Usage :
Function: Sorry not possible with this format
Returns : none
Args : none
=cut
sub write_tree{
$_[0]->throw("Sorry the format 'cluster' can only be used as an input format");
}
1;
-------------- next part --------------
--- /usr/lib/perl5/vendor_perl/5.8.6/Bio/TreeIO/svggraph.pm 2003-11-28 07:27:16.000000000 +0100
+++ Bio/TreeIO/svggraph.pm 2005-01-04 13:57:14.265334869 +0100
@@ -86,22 +86,16 @@
@ISA = qw(Bio::TreeIO );
-=head2 new
-
- Title : new
- Usage : my $obj = new Bio::TreeIO::svggraph();
- Function: Builds a new Bio::TreeIO::svggraph object
- Returns : Bio::TreeIO::svggraph
- Args :
-
-
-=cut
-
-sub new {
- my($class, at args) = @_;
-
- my $self = $class->SUPER::new(@args);
-
+sub _initialize {
+ my ($self, %args) = @_;
+ $self->{_width} = $args{'-width'} || 1600;
+ $self->{_height} = $args{'-height'} || 1000;
+ $self->{_margin} = $args{'-margin'} || 30;
+ $self->{_stroke} = $args{'-stroke'} || 'black';
+ $self->{_stroke_width} = $args{'-stroke_width'} || 2;
+ $self->{_font_size} = $args{'-font_size'} || '10px';
+ $self->{_normalize} = $args{'-normalize'};
+ $self->SUPER::_initialize(%args);
}
=head2 write_tree
@@ -116,28 +110,35 @@
sub write_tree{
my ($self,$tree) = @_;
- my $line = _write_tree_Helper($tree->get_root_node);
+ my $line = $self->_write_tree_Helper($tree->get_root_node);
$self->_print($line. "\n");
$self->flush if $self->_flush_on_write && defined $self->_fh;
return;
}
sub _write_tree_Helper {
- my ($node) = @_;
+ my ($self,$node) = @_;
- #this needs to be parameterized
- my $graph = SVG::Graph->new(width=>1600,height=>1000,margin=>30);
+ my $graph = SVG::Graph->new(
+ width => $self->{_width},
+ height => $self->{_height},
+ margin => $self->{_margin}
+ );
my $group0 = $graph->add_frame;
my $tree = SVG::Graph::Data::Tree->new;
my $root = SVG::Graph::Data::Node->new;
$root->name($node->id);
- _decorateRoot($root, $node->each_Descendent());
+ $self->_decorateRoot($root, $node->each_Descendent());
$tree->root($root);
$group0->add_data($tree);
- #this needs to be parameterized
- $group0->add_glyph('tree', stroke=>'black','stroke-width'=>2,'font-size'=>'10px');
+ $group0->add_glyph(
+ 'tree',
+ 'stroke' => $self->{_stroke},
+ 'stroke-width' => $self->{_stroke_width},
+ 'font-size' => $self->{_font_size}
+ );
return($graph->draw);
}
@@ -156,16 +157,21 @@
=cut
sub _decorateRoot{
- my $previousNode = shift;
- my @children = @_;
- foreach my $child (@children)
- {
- my $currNode = SVG::Graph::Data::Node->new;
- $currNode->branch_label($child->id);
- $currNode->branch_length($child->branch_length);
- $previousNode->add_daughter($currNode);
- _decorateRoot($currNode, $child->each_Descendent());
- }
+ my ($self,$previousNode, at children) = @_;
+ foreach my $child (@children) {
+ my $currNode = SVG::Graph::Data::Node->new;
+ $currNode->branch_label($child->id);
+ my $length = $child->branch_length;
+ CASE: {
+ if ($self->{_normalize} eq 'log') {
+ $length = log($length + 1);
+ last CASE;
+ }
+ }
+ $currNode->branch_length($length);
+ $previousNode->add_daughter($currNode);
+ $self->_decorateRoot($currNode, $child->each_Descendent());
+ }
}
=head2 next_tree
More information about the Bioperl-l
mailing list