[BioRuby-cvs] bioruby/lib/bio/db newick.rb,1.1,1.2
Naohisa Goto
ngoto at dev.open-bio.org
Wed Dec 13 15:46:30 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/db
In directory dev.open-bio.org:/tmp/cvs-serv8064/db
Modified Files:
newick.rb
Log Message:
NHX (New Hampshire eXtended) input is supported by Bio::Newick class.
Bio::PhylogeneticTree supports NHX output (as a string) by #output(:NHX).
When outputs tree, indention can be specified by options.
Many attributes are added to support Bio::PhylogeneticTree::Node and
Bio::PhylogeneticTree::Edge.
Node order in original Newick data is stored to
Bio::PhylogeneticTree::Node#order_number.
Index: newick.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/newick.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** newick.rb 5 Oct 2006 13:38:22 -0000 1.1
--- newick.rb 13 Dec 2006 15:46:28 -0000 1.2
***************
*** 17,22 ****
#+++
def __get_option(key, options)
! options[key] or (@options ? @options[key] : nil)
end
private :__get_option
--- 17,31 ----
#+++
+ DEFAULT_OPTIONS =
+ { :indent => ' ' }
+
def __get_option(key, options)
! if (r = options[key]) != nil then
! r
! elsif @options && (r = @options[key]) != nil then
! r
! else
! DEFAULT_OPTIONS[key]
! end
end
private :__get_option
***************
*** 49,82 ****
private :__to_newick_format_leaf
#
! def __to_newick(parents, source, depth, options)
result = []
! indent0 = ' ' * depth
! indent = ' ' * (depth + 1)
! self.each_out_edge(source) do |src, tgt, edge|
if parents.include?(tgt) then
;;
elsif self.out_degree(tgt) == 1 then
! result << indent + __to_newick_format_leaf(tgt, edge, options)
else
result <<
! __to_newick([ src ].concat(parents), tgt, depth + 1, options) +
! __to_newick_format_leaf(tgt, edge, options)
end
end
! indent0 + "(\n" + result.join(",\n") +
! (result.size > 0 ? "\n" : '') + indent0 + ')'
end
private :__to_newick
# Returns a newick formatted string.
! def newick(options = {})
root = @root
root ||= self.nodes.first
return '();' unless root
! __to_newick([], root, 0, options) +
__to_newick_format_leaf(root, Edge.new, options) +
";\n"
end
end #class PhylogeneticTree
--- 58,212 ----
private :__to_newick_format_leaf
+ # formats leaf for NHX
+ def __to_newick_format_leaf_NHX(node, edge, options)
+
+ label = get_node_name(node).to_s
+
+ dist = get_edge_distance_string(edge)
+
+ bs = get_node_bootstrap_string(node)
+
+ if __get_option(:branch_length_style, options) == :disabled
+ dist = nil
+ end
+
+ nhx = {}
+
+ # bootstrap
+ nhx[:B] = bs if bs and !(bs.empty?)
+ # EC number
+ nhx[:E] = node.ec_number if node.instance_eval {
+ defined?(@ec_number) && self.ec_number
+ }
+ # scientific name
+ nhx[:S] = node.scientific_name if node.instance_eval {
+ defined?(@scientific_name) && self.scientific_name
+ }
+ # taxonomy id
+ nhx[:T] = node.taxonomy_id if node.instance_eval {
+ defined?(@taxonomy_id) && self.taxonomy_id
+ }
+
+ # :D (gene duplication or speciation)
+ if node.instance_eval { defined?(@events) && !(self.events.empty?) } then
+ if node.events.include?(:gene_duplication)
+ nhx[:D] = 'Y'
+ elsif node.events.include?(:speciation)
+ nhx[:D] = 'N'
+ end
+ end
+
+ # log likelihood
+ nhx[:L] = edge.log_likelihood if edge.instance_eval {
+ defined?(@log_likelihood) && self.log_likelihood }
+ # width
+ nhx[:W] = edge.width if edge.instance_eval {
+ defined?(@width) && self.width }
+
+ # merges other parameters
+ flag = node.instance_eval { defined? @nhx_parameters }
+ nhx.merge!(node.nhx_parameters) if flag
+ flag = edge.instance_eval { defined? @nhx_parameters }
+ nhx.merge!(edge.nhx_parameters) if flag
+
+ nhx_string = nhx.keys.sort{ |a,b| a.to_s <=> b.to_s }.collect do |key|
+ "#{key.to_s}=#{nhx[key].to_s}"
+ end.join(':')
+ nhx_string = "[&&NHX:" + nhx_string + "]" unless nhx_string.empty?
+
+ label + (dist ? ":#{dist}" : '') + nhx_string
+ end
+ private :__to_newick_format_leaf_NHX
+
#
! def __to_newick(parents, source, depth, format_leaf,
! options, &block)
result = []
! if indent_string = __get_option(:indent, options) then
! indent0 = indent_string * depth
! indent = indent_string * (depth + 1)
! newline = "\n"
! else
! indent0 = indent = newline = ''
! end
! out_edges = self.out_edges(source)
! if block_given? then
! out_edges.sort! { |edge1, edge2| yield(edge1[1], edge2[1]) }
! else
! out_edges.sort! do |edge1, edge2|
! o1 = edge1[1].order_number
! o2 = edge2[1].order_number
! if o1 and o2 then
! o1 <=> o2
! else
! edge1[1].name.to_s <=> edge2[1].name.to_s
! end
! end
! end
! out_edges.each do |src, tgt, edge|
if parents.include?(tgt) then
;;
elsif self.out_degree(tgt) == 1 then
! result << indent + __send__(format_leaf, tgt, edge, options)
else
result <<
! __to_newick([ src ].concat(parents), tgt, depth + 1,
! format_leaf, options) +
! __send__(format_leaf, tgt, edge, options)
end
end
! indent0 + "(" + newline + result.join(',' + newline) +
! (result.size > 0 ? newline : '') + indent0 + ')'
end
private :__to_newick
# Returns a newick formatted string.
! # If block is given, the order of the node is sorted
! # (as the same manner as Enumerable#sort).
! # Description about options.
! # :indent : indent string; set false to disable (default: ' ')
! # :bootstrap_style : :disabled disables bootstrap representations
! # :traditional traditional style
! # :molphy Molphy style (default)
! def output_newick(options = {}, &block) #:yields: node1, node2
root = @root
root ||= self.nodes.first
return '();' unless root
! __to_newick([], root, 0, :__to_newick_format_leaf, options, &block) +
__to_newick_format_leaf(root, Edge.new, options) +
";\n"
end
+
+ alias newick output_newick
+
+
+ # Returns a NHX (New Hampshire eXtended) formatted string.
+ # If block is given, the order of the node is sorted
+ # (as the same manner as Enumerable#sort).
+ # Description about options.
+ # :indent : indent string; set false to disable (default: ' ')
+ def output_nhx(options = {}, &block) #:yields: node1, node2
+ root = @root
+ root ||= self.nodes.first
+ return '();' unless root
+ __to_newick([], root, 0,
+ :__to_newick_format_leaf_NHX, options, &block) +
+ __to_newick_format_leaf_NHX(root, Edge.new, options) +
+ ";\n"
+ end
+
+ # Returns formatted text (or something) of the tree
+ # Currently supported format is: :newick, :NHX
+ def output(format, *arg, &block)
+ case format
+ when :newick
+ output_newick(*arg, &block)
+ when :NHX
+ output_nhx(*arg, &block)
+ else
+ raise 'Unknown format'
+ end
+ end
+
end #class PhylogeneticTree
***************
*** 105,114 ****
# _options_ for parsing can be set.
#
! # Note: molphy-style bootstrap values are always parsed, even if
# the options[:bootstrap_style] is set to :traditional or :disabled.
# Note: By default, if all of the internal node's names are numeric
! # and there are no molphy-style boostrap values,
! # the names are regarded as bootstrap values.
! # options[:bootstrap_style] = :disabled or :molphy to disable the feature.
def initialize(str, options = nil)
str = str.sub(/\;(.*)/m, ';')
--- 235,245 ----
# _options_ for parsing can be set.
#
! # Note: molphy-style bootstrap values may be parsed, even if
# the options[:bootstrap_style] is set to :traditional or :disabled.
# Note: By default, if all of the internal node's names are numeric
! # and there are no NHX and no molphy-style boostrap values,
! # the names of internal nodes are regarded as bootstrap values.
! # options[:bootstrap_style] = :disabled or :molphy to disable the feature
! # (or at least one NHX tag exists).
def initialize(str, options = nil)
str = str.sub(/\;(.*)/m, ';')
***************
*** 155,167 ****
# Parses newick formatted leaf (or internal node) name.
! def __parse_newick_leaf(str, node, edge)
case str
when /(.*)\:(.*)\[(.*)\]/
node.name = $1
edge.distance_string = $2 if $2 and !($2.strip.empty?)
! node.bootstrap_string = $3 if $3 and !($3.strip.empty?)
when /(.*)\[(.*)\]/
node.name = $1
! node.bootstrap_string = $2 if $2 and !($2.strip.empty?)
when /(.*)\:(.*)/
node.name = $1
--- 286,300 ----
# Parses newick formatted leaf (or internal node) name.
! def __parse_newick_leaf(str, node, edge, options)
case str
when /(.*)\:(.*)\[(.*)\]/
node.name = $1
edge.distance_string = $2 if $2 and !($2.strip.empty?)
! # bracketted string into bstr
! bstr = $3
when /(.*)\[(.*)\]/
node.name = $1
! # bracketted string into bstr
! bstr = $2
when /(.*)\:(.*)/
node.name = $1
***************
*** 170,173 ****
--- 303,369 ----
node.name = str
end
+
+ # determines NHX or Molphy-style bootstrap
+ if bstr and !(bstr.strip.empty?)
+ case __get_option(:original_format, options)
+ when :nhx
+ # regarded as NHX string which might be broken
+ __parse_nhx(bstr, node, edge)
+ when :traditional
+ # simply ignored
+ else
+ case bstr
+ when /\A\&\&NHX/
+ # NHX string
+ # force to set NHX mode
+ @options[:original_format] = :nhx
+ __parse_nhx(bstr, node, edge)
+ else
+ # Molphy-style boostrap values
+ # let molphy mode if nothing determined
+ @options[:original_format] ||= :molphy
+ node.bootstrap_string = bstr
+ end #case bstr
+ end
+ end
+
+ # returns true
+ true
+ end
+
+ # Parses NHX (New Hampshire eXtended) string
+ def __parse_nhx(bstr, node, edge)
+ a = bstr.split(/\:/)
+ a.shift if a[0] == '&&NHX'
+ a.each do |str|
+ tag, val = str.split(/\=/, 2)
+ case tag
+ when 'B'
+ node.bootstrap_string = val
+ when 'D'
+ case val
+ when 'Y'
+ node.events.push :gene_duplication
+ when 'N'
+ node.events.push :speciation
+ end
+ when 'E'
+ node.ec_number = val
+ when 'L'
+ edge.log_likelihood = val.to_f
+ when 'S'
+ node.scientific_name = val
+ when 'T'
+ node.taxonomy_id = val
+ when 'W'
+ edge.width = val.to_i
+ when 'XB'
+ edge.nhx_parameters[:XB] = val
+ when 'O', 'SO'
+ node.nhx_parameters[tag.to_sym] = val.to_i
+ else # :Co, :SN, :Sw, :XN, and others
+ node.nhx_parameters[tag.to_sym] = val
+ end
+ end #each
true
end
***************
*** 215,219 ****
next_token = ary[0]
if next_token and next_token != ',' and next_token != ')' then
! __parse_newick_leaf(next_token, cur_node, edge)
ary.shift
end
--- 411,415 ----
next_token = ary[0]
if next_token and next_token != ',' and next_token != ')' then
! __parse_newick_leaf(next_token, cur_node, edge, options)
ary.shift
end
***************
*** 226,230 ****
leaf = Node.new
edge = Edge.new
! __parse_newick_leaf(token, leaf, edge)
nodes << leaf
edges << Bio::Relation.new(cur_node, leaf, edge)
--- 422,426 ----
leaf = Node.new
edge = Edge.new
! __parse_newick_leaf(token, leaf, edge, options)
nodes << leaf
edges << Bio::Relation.new(cur_node, leaf, edge)
***************
*** 234,250 ****
raise ParseError, 'unmatched parentheses' unless node_stack.empty?
bsopt = __get_option(:bootstrap_style, options)
! unless bsopt == :disabled or bsopt == :molphy then
! # If all of the internal node's names are numeric
! # and there are no molphy-style boostrap values,
# the names are regarded as bootstrap values.
flag = false
internal_nodes.each do |node|
- if node.bootstrap
- unless __get_option(:bootstrap_style, options) == :traditional
- @options[:bootstrap_style] = :molphy
- end
- flag = false
- break
- end
if node.name and !node.name.to_s.strip.empty? then
if /\A[\+\-]?\d*\.?\d*\z/ =~ node.name
--- 430,440 ----
raise ParseError, 'unmatched parentheses' unless node_stack.empty?
bsopt = __get_option(:bootstrap_style, options)
! ofmt = __get_option(:original_format, options)
! unless bsopt == :disabled or bsopt == :molphy or
! ofmt == :nhx or ofmt == :molphy then
! # If all of the internal node's names are numeric,
# the names are regarded as bootstrap values.
flag = false
internal_nodes.each do |node|
if node.name and !node.name.to_s.strip.empty? then
if /\A[\+\-]?\d*\.?\d*\z/ =~ node.name
***************
*** 258,261 ****
--- 448,452 ----
if flag then
@options[:bootstrap_style] = :traditional
+ @options[:original_format] = :traditional
internal_nodes.each do |node|
if node.name then
***************
*** 266,274 ****
end
end
# If the root implicitly prepared by the program is a leaf and
# there are no additional information for the edge from the root to
# the first internal node, the root is removed.
if rel = edges[-1] and rel.node == [ root, internal_nodes[0] ] and
! rel.relation.instance_eval { !defined?(@distance) } and
edges.find_all { |x| x.node.include?(root) }.size == 1
nodes.shift
--- 457,471 ----
end
end
+ # Sets nodes order numbers
+ nodes.each_with_index do |node, i|
+ node.order_number = i
+ end
# If the root implicitly prepared by the program is a leaf and
# there are no additional information for the edge from the root to
# the first internal node, the root is removed.
if rel = edges[-1] and rel.node == [ root, internal_nodes[0] ] and
! rel.relation.instance_eval {
! !defined?(@distance) and !defined?(@log_likelihood) and
! !defined?(@width) and !defined?(@nhx_parameters) } and
edges.find_all { |x| x.node.include?(root) }.size == 1
nodes.shift
More information about the bioruby-cvs
mailing list