[Bioperl-l] Re: Bioperl tree class
Richard Copley
copley@embl-heidelberg.de
Wed, 10 Oct 2001 18:23:17 +0200
This is a multi-part message in MIME format.
--------------080505080607050108010303
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit
>>Did you (or anyone else) get around to writing a bioperl object that can
>>read in PHYLIP format tree files? I am on the verge of writing one; it
>>wouldn't take long, but there's no sense duplicating effort.
>>
>
> No, I haven't, neither has my group in Singapore, just mailing bioperl, to
> check if Jason or others are working on it.
I started writing one ages ago but didn't finish - I attach it on the off
chance it's useful - realistically, it would take a lot of work. This uses
recursion - not sure if this is the best way to do it. I also put in a
function for drawing the tree using GD. Again this works in a limited fashion
but is far from a production product.
The one thing I want to plead for any bioperl Tree class is that any
implementation can deal with multifurcating trees. (Does that make them graphs?).
--
Richard Copley
EMBL
Meyerhofstr.1
69012 Heidelberg
Germany
Tel: +49 6221 387 534
FAX: +49 6221 387 517
--------------080505080607050108010303
Content-Type: type=audio/x-pn-realaudio-plugin;
name="Tree.pm"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="Tree.pm"
=head1 NAME
Tree - An object for parsing Newick style tree files
=head1 SYNOPSIS
use Tree;
$tree_string = '(((fly,worm,mosquito),(human,ape)),yeast);';
$tree = Tree->new();
$tree->parse_string($tree_string);
Make a png:
$png = $tree->GD_tree($height_per_leaf,$width_per_node,$border);
Wish list - not implemented:
Collapse branches not supported by bootstrap values:
$tree->collapse_unsupported(750);
implement by removing node, and replacing with kids using splice
--- then require some trickery with n_kids counter.
Get the highest node which has a given set of leaves under it:
$node = $tree->highest_node_for_clade('cerevisiae','pombe');
(should work for a single species)
Rerooting a tree with a given outgroup:
$tree->outgroup($node);
=head1 DESCRIPTION
Parses Newick style phylogenies (e.g. clustal *.ph, *.phb, *.dnd files)
into a tree object. Supports branch lengths and bootstrap values. The tree
can be mulitfurcating - i.e. multiple branches per node are supported.
To create an image from a tree object:
$png = $tree->GD_tree($height_per_leaf,$width_per_node,$border);
where dimensions are in pixels. width_per_node is actually a scaling
factor, multiplied by the distance to the node - for trees without
distances, this will be equivalent to the width in pixels.
=head1 AUTHOR
Richard Copley - Email copley@embl-heidelberg.de
=head1 APPENDIX
The rest of the documentation details each of the object methods.
=cut
package Tree;
use strict;
use Carp;
use GD;
my $depth = 0;
my $pos = -1;
my $Node = 'node';
my $Leaf = 'leaf';
sub new {
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {};
bless ($self,$class);
$self->{n_kids} = 0;
$self->{name} = '';
$self->{parent} = undef;
return $self;
}
sub parse_string {
my $self = shift;
my $tree_string = shift;
$depth = 0;
$pos = -1; # reset string index
my @tree = split(//,$tree_string);
$self->read_tree(\@tree);
return;
}
sub read_tree {
my $tree = shift;
my $string = shift;
$depth++;
$pos++;
# print "Entering: Depth: $depth, position: $pos\n";
while(1){
my $kid;
if($string->[$pos] eq '('){
$kid = $tree->new_kid();
$kid->read_tree($string);
$pos++;
if($pos >= $#{$string}){ return }
}
my $buffer = '';
while(($string->[$pos] ne ')') && ($string->[$pos] ne '(' ) ){
$buffer .= $string->[$pos];
$pos++;
# a little defensiveness to stop if something goes wrong
if(!defined($string->[$pos])){
croak "Problem parsing tree file - check format";
}
}
# print "Buffer contents: $buffer\n";
my @leaves = split(/,/,$buffer);
foreach my $leaf (@leaves){
next if ($leaf eq '');
my $dist = 1;
my $bootstrap = undef;
if($leaf =~ /:(\d+\.\d+)/){
$dist = $1;
}
if($leaf =~ /\[(\d+)\]/){
$bootstrap = $1;
}
if($leaf =~ /^:/){
# just a distance for the current node - not a leaf
$kid->{dist} = $dist;
$kid->{boot} = $bootstrap;
next;
} else {
$leaf =~ s/:.*//;
}
$tree->new_kid( 'name' => $leaf,
'boot' => $bootstrap,
'dist' => $dist,
'type' => $Leaf,
);
}
if($string->[$pos] eq ')'){
# print "Leaving: Depth: $depth, position: $pos\n";
$depth--;
return;
}
if($pos >= $#{$string}){
die "Fell through - problem parsing tree\n";
}
# print "Looping: Depth: $depth, position: $pos\n";
# if we fall through to here, we must have a '('
# the while loop will take us straight back to a read_tree call
}
}
sub new_kid {
my $parent = shift;
my %params = @_;
my $class = ref($parent) || $parent;
my $kid = {};
bless ($kid,$class);
my $n_kids = $parent->{n_kids};
$parent->{n_kids}++;
$kid->{parent} = $parent;
$kid->{n_kids} = 0;
$kid->{type} = $Node; # everything a node until changed into a leaf
$kid->{boot} = undef;
$kid->{dist} = 1;
$parent->{kid}[$n_kids] = $kid;
while(( my $key, my $value) = each %params){
$kid->{$key} = $value;
}
return $kid;
}
=head2
Title : leaf_count
Usage : $no_of_leaves = $tree->leaf_count()
Function: calculates the number of leaves under each node in a tree -
useful for working out how to display things
Returns : the number of leaves under a node
Args : none
=cut
sub leaf_count {
my $self = shift;
if (@_) { $self->{n_leaves} = shift }
if(defined($self->{n_leaves})){
return $self->{n_leaves};
}
my $leaf_count = 0;
foreach my $kid (@{$self->{kid}}){
if($kid->{type} eq $Node){
$leaf_count += $kid->leaf_count;
} elsif($kid->{type} eq $Leaf){
$kid->{n_leaves} = 1;
$leaf_count++;
} else {
croak "Unknown type in count_leaves";
}
}
$self->{n_leaves} = $leaf_count;
return $leaf_count;
}
sub leaves {
my $self = shift;
my $leaves = shift;
foreach my $kid (@{$self->{kid}}){
if($kid->{type} eq $Node){
$kid->leaves($leaves);
} elsif($kid->{type} eq $Leaf){
push(@{$leaves},$kid->{name});
} else {
croak "Unknown type in count_leaves";
}
}
return;
}
sub kid_count {
my $self = shift;
if (@_) { $self->{n_kids} = shift }
return $self->{n_kids};
}
sub name {
my $self = shift;
if (@_) { $self->{name} = shift }
return $self->{name};
}
sub type {
my $self = shift;
if (@_) { $self->{type} = shift }
return $self->{name};
}
=head2
Title : depth
Usage : $depth = $tree->max_depth($current_depth,$$max_depth);
Function: calculates the maximum depth of the tree
Returns : the current depth - the maximum depth under the node
is in the scalar ref of the second argument
Args : none
=cut
sub depth {
my $self = shift;
my $depth = shift;
my $deepest = shift;
$depth++;
foreach my $kid (@{$self->{kid}}){
$depth = $kid->depth($depth,$deepest);
$depth--;
if($depth > $$deepest){
$$deepest = $depth;
}
print "depth: [$depth], deepest, ",$$deepest,"\n";
}
return $depth;
}
=head2
Title : reroot
Usage : $tree->reroot(leaf_name);
Function: reroot the tree with the 'leaf_name' as the leftmost leaf
Returns : ??
Args : none
=cut
sub reroot {
my $self = shift;
my $root_leaf = shift;
my $root = $self->get_leaf($root_leaf);
}
=head2
Title : get_leaf
Usage : $tree->get_leaf('human');
Function: get the object corresponding to the named leaf
Returns : the leaf object
Args : the wanted leaf
=cut
sub get_leaf {
my $self = shift;
my $wanted_leaf = shift;
foreach my $kid (@{$self->{kid}}){
my $leaf = $self->get_leaf($wanted_leaf);
if($leaf eq $wanted_leaf){
return $leaf;
}
}
return undef;
}
=head2
Title : GD_tree
Usage : $tree->GD_tree(height_per_leaf,width_per_node);
Function: Make a png of the tree
Returns : the binary image
Args : none
=cut
sub GD_tree {
my $self = shift;
my $height_per_leaf = shift;
my $width_per_node = shift;
my $border = shift;
my $height = (( $self->leaf_count - 1 ) * $height_per_leaf );
my $depth = 0;
$self->depth(0,\$depth);
my $length = $width_per_node * $depth;
print "Height per leaf: [$height_per_leaf pixels]\n";
my $im = new GD::Image($length+($border*3),$height+($border*2));
my $colours = {};
$colours->{white} = $im->colorAllocate(255,255,255);
$colours->{black} = $im->colorAllocate(0,0,0);
$colours->{red} = $im->colorAllocate(255,0,0);
$colours->{blue} = $im->colorAllocate(0,0,255);
$im->transparent($colours->{white});
my $x = $border;
$self->_draw_nodes($im,0,$border,$height_per_leaf,
$width_per_node,$border,$colours);
# $self->{kid}[0]->_draw_nodes($im,0,$border,$height_per_leaf,
# $width_per_node,$border,$colours);
return $im;
}
sub _draw_nodes {
my $self = shift;
my $im = shift;
my $top = shift;
my $x = shift;
my $height_per_leaf = shift;
my $width_per_node = shift;
my $border = shift;
my $colours = shift;
print "Kids: ",$self->kid_count,"\n";
my $last_y;
foreach my $kid (@{$self->{kid}}){
my $leaf_count = $kid->leaf_count;
my $bottom = (($leaf_count-1) * $height_per_leaf) + $top;
my $node_height = $top + (($bottom - $top) / 2);
my $leaf_height = $top;
print "top: $top bottom: $bottom leaves: $leaf_count\n";
my $x2 = $x+$kid->{dist}*$width_per_node;
printf("Base: $bottom\n");
if($kid->{type} eq $Node){
print "node drawing at: $x,$node_height\n";
$im->filledRectangle($x,($border+$node_height),
$x2,($border+$node_height+3),
$colours->{black});
$kid->_draw_nodes($im,$top,$x2,$height_per_leaf,
$width_per_node,$border,$colours);
if(defined($last_y)){
$im->filledRectangle($x,($border+$last_y),
$x+3,($border+$node_height),
$colours->{black});
}
$last_y = $node_height;
$top = $bottom + $height_per_leaf;
} else {
# leaf drawing code
print "leaf drawing: x: [$x] y: [$leaf_height] ",$kid->{name},"\n";
$im->filledRectangle($x,($border+$leaf_height),
$x2,($border+$leaf_height+3),
$colours->{blue});
if(defined($last_y)){
$im->filledRectangle($x,($border+$last_y),
$x+3,($border+$leaf_height),
$colours->{black});
}
$last_y = $leaf_height;
$im->string(gdMediumBoldFont,
$x2+5,($border+$leaf_height-5),
$kid->{name},
$colours->{red});
$top += $height_per_leaf;
}
}
return;
}
1;
--------------080505080607050108010303--