[BioRuby-cvs] bioruby/lib/bio/db newick.rb,NONE,1.1
Naohisa Goto
ngoto at dev.open-bio.org
Thu Oct 5 13:38:24 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/db
In directory dev.open-bio.org:/tmp/cvs-serv12262/lib/bio/db
Added Files:
newick.rb
Log Message:
* lib/bio/phylogenetictree.rb: Bio::PhylogeneticTree is phylogenetic tree
data structure class.
* lib/bio/db/newick.rb: Bio::Newick is the Newick Standard (aka.
New Hampshire Format) phylogenetic tree parser. Some methods for
formatting Newick output also exists in this file.
--- NEW FILE: newick.rb ---
#
# = bio/db/newick.rb - Newick Standard phylogenetic tree parser / formatter
#
# Copyright:: Copyright (C) 2004-2006
# Naohisa Goto <ng at bioruby.org>
# Daniel Amelang <dan at amelang.net>
# License:: Ruby's
#
# $Id: newick.rb,v 1.1 2006/10/05 13:38:22 ngoto Exp $
#
module Bio
class PhylogeneticTree
#---
# newick output
#+++
def __get_option(key, options)
options[key] or (@options ? @options[key] : nil)
end
private :__get_option
# formats leaf
def __to_newick_format_leaf(node, edge, options)
label = get_node_name(node).to_s
dist = get_edge_distance_string(edge)
bs = get_node_bootstrap_string(node)
if __get_option(:branch_length_style, options) == :disabled
dist = nil
end
case __get_option(:bootstrap_style, options)
when :disabled
label + (dist ? ":#{dist}" : '')
when :molphy
label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '')
when :traditional
label + (bs ? bs : '') + (dist ? ":#{dist}" : '')
else
# default: same as molphy style
label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '')
end
end
private :__to_newick_format_leaf
#
def __to_newick(parents, source, depth, options)
result = []
indent0 = ' ' * depth
indent = ' ' * (depth + 1)
self.each_out_edge(source) do |src, tgt, edge|
if parents.include?(tgt) then
;;
elsif self.out_degree(tgt) == 1 then
result << indent + __to_newick_format_leaf(tgt, edge, options)
else
result <<
__to_newick([ src ].concat(parents), tgt, depth + 1, options) +
__to_newick_format_leaf(tgt, edge, options)
end
end
indent0 + "(\n" + result.join(",\n") +
(result.size > 0 ? "\n" : '') + indent0 + ')'
end
private :__to_newick
# Returns a newick formatted string.
def newick(options = {})
root = @root
root ||= self.nodes.first
return '();' unless root
__to_newick([], root, 0, options) +
__to_newick_format_leaf(root, Edge.new, options) +
";\n"
end
end #class PhylogeneticTree
#---
# newick parser
#+++
# Newick standard phylogenetic tree parser class.
#
# This is alpha version. Incompatible changes may be made frequently.
class Newick
# delemiter of the entry
DELIMITER = RS = ";"
# parse error class
class ParseError < RuntimeError; end
# same as Bio::PhylogeneticTree::Edge
Edge = Bio::PhylogeneticTree::Edge
# same as Bio::PhylogeneticTree::Node
Node = Bio::PhylogeneticTree::Node
# Creates a new Newick object.
# _options_ for parsing can be set.
#
# Note: molphy-style bootstrap values are always parsed, even if
# the options[:bootstrap_style] is set to :traditional or :disabled.
# Note: By default, if all of the internal node's names are numeric
# and there are no molphy-style boostrap values,
# the names are regarded as bootstrap values.
# options[:bootstrap_style] = :disabled or :molphy to disable the feature.
def initialize(str, options = nil)
str = str.sub(/\;(.*)/m, ';')
@original_string = str
@entry_overrun = $1
@options = (options or {})
end
# parser options
# (in some cases, options can be automatically set by the parser)
attr_reader :options
# original string before parsing
attr_reader :original_string
# string after this entry
attr_reader :entry_overrun
# Gets the tree.
# Returns a Bio::PhylogeneticTree object.
def tree
if !defined?(@tree)
@tree = __parse_newick(@original_string, @options)
else
@tree
end
end
# Re-parses the tree from the original string.
# Returns self.
# This method is useful after changing parser options.
def reparse
remove_instance_variable(:tree)
self.tree
self
end
private
# gets a option
def __get_option(key, options)
options[key] or (@options ? @options[key] : nil)
end
# Parses newick formatted leaf (or internal node) name.
def __parse_newick_leaf(str, node, edge)
case str
when /(.*)\:(.*)\[(.*)\]/
node.name = $1
edge.distance_string = $2 if $2 and !($2.strip.empty?)
node.bootstrap_string = $3 if $3 and !($3.strip.empty?)
when /(.*)\[(.*)\]/
node.name = $1
node.bootstrap_string = $2 if $2 and !($2.strip.empty?)
when /(.*)\:(.*)/
node.name = $1
edge.distance_string = $2 if $2 and !($2.strip.empty?)
else
node.name = str
end
true
end
# Parses newick formatted string.
def __parse_newick(str, options = {})
# initializing
root = Node.new
cur_node = root
edges = []
nodes = [ root ]
internal_nodes = []
node_stack = []
# preparation of tokens
str = str.chop if str[-1..-1] == ';'
ary = str.split(/([\(\)\,])/)
ary.collect! { |x| x.strip!; x.empty? ? nil : x }
ary.compact!
previous_token = nil
# main loop
while token = ary.shift
#p token
case token
when ','
if previous_token == ',' or previous_token == '(' then
# there is a leaf whose name is empty.
ary.unshift(token)
ary.unshift('')
token = nil
end
when '('
node = Node.new
nodes << node
internal_nodes << node
node_stack.push(cur_node)
cur_node = node
when ')'
if previous_token == ',' or previous_token == '(' then
# there is a leaf whose name is empty.
ary.unshift(token)
ary.unshift('')
token = nil
else
edge = Edge.new
next_token = ary[0]
if next_token and next_token != ',' and next_token != ')' then
__parse_newick_leaf(next_token, cur_node, edge)
ary.shift
end
parent = node_stack.pop
raise ParseError, 'unmatched parentheses' unless parent
edges << Bio::Relation.new(parent, cur_node, edge)
cur_node = parent
end
else
leaf = Node.new
edge = Edge.new
__parse_newick_leaf(token, leaf, edge)
nodes << leaf
edges << Bio::Relation.new(cur_node, leaf, edge)
end #case
previous_token = token
end #while
raise ParseError, 'unmatched parentheses' unless node_stack.empty?
bsopt = __get_option(:bootstrap_style, options)
unless bsopt == :disabled or bsopt == :molphy then
# If all of the internal node's names are numeric
# and there are no molphy-style boostrap values,
# the names are regarded as bootstrap values.
flag = false
internal_nodes.each do |node|
if node.bootstrap
unless __get_option(:bootstrap_style, options) == :traditional
@options[:bootstrap_style] = :molphy
end
flag = false
break
end
if node.name and !node.name.to_s.strip.empty? then
if /\A[\+\-]?\d*\.?\d*\z/ =~ node.name
flag = true
else
flag = false
break
end
end
end
if flag then
@options[:bootstrap_style] = :traditional
internal_nodes.each do |node|
if node.name then
node.bootstrap_string = node.name
node.name = nil
end
end
end
end
# If the root implicitly prepared by the program is a leaf and
# there are no additional information for the edge from the root to
# the first internal node, the root is removed.
if rel = edges[-1] and rel.node == [ root, internal_nodes[0] ] and
rel.relation.instance_eval { !defined?(@distance) } and
edges.find_all { |x| x.node.include?(root) }.size == 1
nodes.shift
edges.pop
end
# Let the tree into instance variables
tree = Bio::PhylogeneticTree.new
tree.instance_eval {
@pathway.relations.concat(edges)
@pathway.to_list
}
tree.root = nodes[0]
tree.options.update(@options)
tree
end
end #class Newick
end #module Bio
More information about the bioruby-cvs
mailing list