From nakao at dev.open-bio.org Thu Oct 5 03:39:32 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 05 Oct 2006 07:39:32 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db/embl sptr.rb,1.34,1.35 Message-ID: <200610050739.k957dWIZ011096@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/embl In directory dev.open-bio.org:/tmp/cvs-serv11054/lib/bio/db/embl Modified Files: sptr.rb Log Message: * Fixed Bio::SPTR bugs reported by SONDEREGGER Bernhard. - Now a leading uncapital letter OS line is acceptable. (cf. uniprot:Q32725). - Fixed regexp for parsing the OS line from uniprot:O63147. Index: sptr.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/sptr.rb,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** sptr.rb 15 Jul 2006 15:29:26 -0000 1.34 --- sptr.rb 5 Oct 2006 07:39:29 -0000 1.35 *************** *** 293,301 **** # OS Genus species (name0), G s0 (name0), and G s (name0) (name1). # OS Homo sapiens (Human), and Rarrus norveticus (Rat) def os(num = nil) unless @data['OS'] os = Array.new fetch('OS').split(/, and|, /).each do |tmp| ! if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/ org = $1 tmp =~ /(\(.+\))/ --- 293,303 ---- # OS Genus species (name0), G s0 (name0), and G s (name0) (name1). # OS Homo sapiens (Human), and Rarrus norveticus (Rat) + # OS Hippotis sp. Clark and Watts 825. + # OS unknown cyperaceous sp. def os(num = nil) unless @data['OS'] os = Array.new fetch('OS').split(/, and|, /).each do |tmp| ! if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/ org = $1 tmp =~ /(\(.+\))/ From nakao at dev.open-bio.org Thu Oct 5 03:39:32 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 05 Oct 2006 07:39:32 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db/embl test_sptr.rb,1.4,1.5 Message-ID: <200610050739.k957dWg0011101@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db/embl In directory dev.open-bio.org:/tmp/cvs-serv11054/test/unit/bio/db/embl Modified Files: test_sptr.rb Log Message: * Fixed Bio::SPTR bugs reported by SONDEREGGER Bernhard. - Now a leading uncapital letter OS line is acceptable. (cf. uniprot:Q32725). - Fixed regexp for parsing the OS line from uniprot:O63147. Index: test_sptr.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/db/embl/test_sptr.rb,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** test_sptr.rb 15 Jul 2006 15:29:26 -0000 1.4 --- test_sptr.rb 5 Oct 2006 07:39:30 -0000 1.5 *************** *** 1758,1761 **** --- 1758,1775 ---- end + class TestOSLine < Test::Unit::TestCase + def test_uncapitalized_letter_Q32725_9POAL + data = "OS unknown cyperaceous sp.\n" + sp = SPTR.new(data) + assert_equal('unknown cyperaceous sp.', sp.os.first['os']) + end + + def test_period_trancation_O63147 + data = "OS Hippotis sp. Clark and Watts 825.\n" + sp = SPTR.new(data) + assert_equal('Hippotis sp. Clark and Watts 825.', sp.os.first['os']) + end + end + end # module Bio From ngoto at dev.open-bio.org Thu Oct 5 09:38:24 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 05 Oct 2006 13:38:24 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio phylogenetictree.rb,NONE,1.1 Message-ID: <200610051338.k95DcOfV012288@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory dev.open-bio.org:/tmp/cvs-serv12262/lib/bio Added Files: phylogenetictree.rb Log Message: * lib/bio/phylogenetictree.rb: Bio::PhylogeneticTree is phylogenetic tree data structure class. * lib/bio/db/newick.rb: Bio::Newick is the Newick Standard (aka. New Hampshire Format) phylogenetic tree parser. Some methods for formatting Newick output also exists in this file. --- NEW FILE: phylogenetictree.rb --- # # = bio/phylogenetictree.rb - phylogenetic tree data structure class # # Copyright:: Copyright (C) 2006 # Naohisa Goto # License:: Ruby's # # $Id: phylogenetictree.rb,v 1.1 2006/10/05 13:38:21 ngoto Exp $ # require 'matrix' require 'bio/pathway' module Bio # This is the class for phylogenetic tree. # It stores a phylogenetic tree. # # Internally, it is based on Bio::Pathway class. # However, users cannot handle Bio::Pathway object directly. # # This is alpha version. Incompatible changes may be made frequently. class PhylogeneticTree # Error when there are no path between specified nodes class NoPathError < RuntimeError; end # Error when the two nodes are not adjacent. class NotAdjacentNodesError < RuntimeError; end # Edge object of each node. # By default, the object doesn't contain any node information. class Edge # creates a new edge. def initialize(distance = nil) if distance.kind_of?(Numeric) self.distance = distance elsif distance self.distance_string = distance end end # evolutionary distance attr_reader :distance # evolutionary distance represented as a string attr_reader :distance_string # set evolutionary distance value def distance=(num) @distance = num @distance_string = (num ? num.to_s : num) end # set evolutionary distance value from a string def distance_string=(str) if str.to_s.strip.empty? @distance = nil @distance_string = str else @distance = str.to_f @distance_string = str end end # visualization of this object def inspect "" end # string representation of this object def to_s @distance_string.to_s end end #class Edge # Gets distance value from the given edge. # Returns float or any other numeric value or nil. def get_edge_distance(edge) begin dist = edge.distance rescue NoMethodError dist = edge end dist end # Gets distance string from the given edge. # Returns a string or nil. def get_edge_distance_string(edge) begin dist = edge.distance_string rescue NoMethodError dist = (edge ? edge.to_s : nil) end dist end # Returns edge1 + edge2 def get_edge_merged(edge1, edge2) dist1 = get_edge_distance(edge1) dist2 = get_edge_distance(edge2) if dist1 and dist2 then Edge.new(dist1 + dist2) elsif dist1 then Edge.new(dist1) elsif dist2 then Edge.new(dist2) else Edge.new end end # Node object. class Node # Creates a new node. def initialize(name = nil) @name = name if name end # name of the node attr_accessor :name # bootstrap value attr_reader :bootstrap # bootstrap value as a string attr_reader :bootstrap_string # sets a bootstrap value def bootstrap=(num) @bootstrap_string = (num ? num.to_s : num) @bootstrap = num end # sets a bootstrap value from a string def bootstrap_string=(str) if str.to_s.strip.empty? @bootstrap = nil @bootstrap_string = str else i = str.to_i f = str.to_f @bootstrap = (i == f ? i : f) @bootstrap_string = str end end # visualization of this object def inspect if @name and !@name.empty? then str = "(Node:#{@name.inspect}" else str = sprintf('(Node:%x', (self.__id__ << 1) & 0xffffffff) end str += " bootstrap=#{@bootstrap.inspect}" if @bootstrap str += ")" str end # string representation of this object def to_s @name.to_s end end #class Node # Gets node name def get_node_name(node) begin node.name rescue NoMethodError node.to_s end end def get_node_bootstrap(node) begin node.bootstrap rescue NoMethodError nil end end def get_node_bootstrap_string(node) begin node.bootstrap_string rescue NoMethodError nil end end # Creates a new phylogenetic tree. # When no arguments are given, it creates a new empty tree. # When a PhylogeneticTree object is given, it copies the tree. # Note that the new tree shares Node and Edge objects # with the given tree. def initialize(tree = nil) # creates an undirected adjacency list graph @pathway = Bio::Pathway.new([], true) @root = nil @options = {} self.concat(tree) if tree end # root node of this tree # (even if unrooted tree, it is used by some methods) attr_accessor :root # tree options; mainly used for tree output attr_accessor :options # Clears all nodes and edges. # Returns self. # Note that options and root are also cleared. def clear initialize self end # Returns all nodes as an array. def nodes @pathway.graph.keys end # Number of nodes. def number_of_nodes @pathway.nodes end # Iterates over each node of this tree. def each_node(&x) #:yields: node @pathway.graph.each_key(&x) self end # Iterates over each edges of this tree. def each_edge #:yields: source, target, edge @pathway.relations.each do |rel| yield rel.node[0], rel.node[1], rel.relation end self end # Returns all edges an array of [ node0, node1, edge ] def edges @pathway.relations.collect do |rel| [ rel.node[0], rel.node[1], rel.relation ] end end # Returns number of edges in the tree. def number_of_edges @pathway.relations.size end # Returns an array of adjacent nodes of the given node. def adjacent_nodes(node) h = @pathway.graph[node] h ? h.keys : [] end # Returns all connected edges with adjacent nodes. # Returns an array of the array [ source, target, edge ]. # # The reason why the method name is "out_edges" is that # it comes from the Boost Graph Library. def out_edges(source) h = @pathway.graph[source] if h h.collect { |key, val| [ source, key, val ] } else [] end end # Iterates over each connected edges of the given node. # Returns self. # # The reason why the method name is "each_out_edge" is that # it comes from the Boost Graph Library. def each_out_edge(source) #:yields: source, target, edge h = @pathway.graph[source] h.each { |key, val| yield source, key, val } if h self end # Returns number of edges in the given node. # # The reason why the method name is "out_degree" is that # it comes from the Boost Graph Library. def out_degree(source) h = @pathway.graph[source] h ? h.size : 0 end # Returns an edge from source to target. # If source and target are not adjacent nodes, returns nil. def get_edge(source, target) h = @pathway.graph[source] h ? h[target] : nil end # Adds a new edge to the tree. # Returns the newly added edge. # If the edge already exists, it is overwritten with new one. def add_edge(source, target, edge = Edge.new) @pathway.append(Bio::Relation.new(source, target, edge)) edge end # Adds a node to the tree. # Returns self. # If the node already exists, it does nothing. def add_node(node) @pathway.graph[node] ||= {} self end # If the node exists, returns true. # Otherwise, returns false. def include?(node) @pathway.graph[node] ? true : false end # Removes all edges connected with the node. # Returns self. # If the node does not exist, raises IndexError. def clear_node(node) unless self.include?(node) raise IndexError, 'the node does not exist' end @pathway.relations.delete_if do |rel| rel.node.include?(node) end @pathway.graph[node].each_key do |k| @pathway.graph[k].delete(node) end self end # Removes the given node from the tree. # All edges connected with the node are also removed. # Returns self. # If the node does not exist, raises IndexError. def remove_node(node) self.clear_node(node) @pathway.graph.delete(node) self end # Removes each node if the block returns not nil. # All edges connected with the removed nodes are also removed. # Returns self. def remove_node_if all = self.nodes all.each do |node| if yield node then self.clear_node(node) @pathway.graph.delete(node) end end self end # Removes an edge between source and target. #--- # If two or more edges exists between source and target, # all of them are removed. #+++ def remove_edge(source, target) fwd = [ source, target ] rev = [ target, source ] @pathway.relations.delete_if do |rel| rel.node == fwd or rel.node == rev end h = @pathway.graph[source] h.delete(target) if h h = @pathway.graph[target] h.delete(source) if h self end # Removes each edge if the block returns not nil. # Returns self. def remove_edge_if #:yields: source, target, edge removed_rel = [] @pathway.relations.delete_if do |rel| if yield rel.node[0], rel.node[1], edge then removed_rel << rel true end end removed_rel.each do |rel| source = rel[0] target = rel[1] h = @pathway.graph[source] h.delete(target) if h h = @pathway.graph[target] h.delete(source) if h end self end # Replaces each node by each block's return value. # Returns self. def collect_node! #:yields: node tr = {} self.each_node do |node| tr[node] = yield node end # replaces nodes in @pathway.relations @pathway.relations.each do |rel| rel.node.collect! { |node| tr[node] } end # re-generates @pathway from relations @pathway.to_list # adds orphan nodes tr.each_value do |newnode| @pathway.graph[newnode] ||= {} end self end # Replaces each edge by each block's return value. # Returns self. def collect_edge! #:yields: source, target, edge @pathway.relations.each do |rel| newedge = yield rel.node[0], rel.node[1], rel.relation rel.relation = newedge @pathway.append(rel, false) end self end # Gets the sub-tree consisted of given nodes. # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. # Returns a PhylogeneticTree object. # Note that the sub-tree shares Node and Edge objects # with the original tree. def subtree(nodes) nodes = nodes.find_all do |x| @pathway.graph[x] end return self.class.new if nodes.empty? # creates subtree new_tree = self.class.new nodes.each do |x| new_tree.add_node(x) end self.each_edge do |node1, node2, edge| if new_tree.include?(node1) and new_tree.include?(node2) then new_tree.add_edge(node1, node2, edge) end end return new_tree end # Gets the sub-tree consisted of given nodes and # all internal nodes connected between given nodes. # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. # Returns a PhylogeneticTree object. # The result is unspecified for cyclic trees. # Note that the sub-tree shares Node and Edge objects # with the original tree. def subtree_with_all_paths(nodes) hash = {} nodes.each { |x| hash[x] = true } nodes.each_index do |i| node1 = nodes[i] (0...i).each do |j| node2 = nodes[j] unless node1 == node2 then begin path = self.path(node1, node2) rescue IndexError, NoPathError path = [] end path.each { |x| hash[x] = true } end end end self.subtree(hash.keys) end # Concatenates the other tree. # If the same edge exists, the edge in _other_ is used. # Returns self. # The result is unspecified if _other_ isn't a PhylogeneticTree object. # Note that the Node and Edge objects in the _other_ tree are # shared in the concatinated tree. def concat(other) #raise TypeError unless other.kind_of?(self.class) other.each_node do |node| self.add_node(node) end other.each_edge do |node1, node2, edge| self.add_edge(node1, node2, edge) end self end # Gets path from node1 to node2. # Retruns an array of nodes, including node1 and node2. # If node1 and/or node2 do not exist, IndexError is raised. # If node1 and node2 are not connected, NoPathError is raised. # The result is unspecified for cyclic trees. def path(node1, node2) raise IndexError, 'node1 not found' unless @pathway.graph[node1] raise IndexError, 'node2 not found' unless @pathway.graph[node2] return [ node1 ] if node1 == node2 step, path = @pathway.bfs_shortest_path(node1, node2) unless path[0] == node1 and path[-1] == node2 then raise NoPathError, 'node1 and node2 are not connected' end path end # Iterates over each edge from node1 to node2. # The result is unspecified for cyclic trees. def each_edge_in_path(node1, node2) path = self.path(node1, node2) source = path.shift path.each do |target| edge = self.get_edge(source, target) yield source, target, edge source = target end self end # Returns distance between node1 and node2. # It would raise error if the edges didn't contain distance values. # The result is unspecified for cyclic trees. def distance(node1, node2) distance = 0 self.each_edge_in_path(node1, node2) do |source, target, edge| distance += get_edge_distance(edge) end distance end # Gets the parent node of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an Node object or nil. # The result is unspecified for cyclic trees. def parent(node, root = nil) root ||= @root self.path(root, node)[-2] end # Gets the adjacent children nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def children(node, root = nil) root ||= @root path = self.path(root, node) result = self.adjacent_nodes(node) result -= path result end # Gets all descendent nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def descendents(node, root = nil) root ||= @root distance, route = @pathway.breadth_first_search(root) d = distance[node] result = [] distance.each do |key, val| if val > d then x = key while x = route[x] if x == node then result << key break end break if distance[x] <= d end end end result end # If _node_ is nil, returns an array of # all leaves (nodes connected with one edge). # Otherwise, gets all descendent leaf nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def leaves(node = nil, root = nil) unless node then nodes = [] self.each_node do |x| nodes << x if self.out_degree(x) == 1 end return nodes else root ||= @root self.descendents(node, root).find_all do |x| self.adjacent_nodes(x).size == 1 end end end # Gets all ancestral nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def ancestors(node, root = nil) root ||= @root (self.path(root, node) - [ node ]).reverse end # Gets the lowest common ancestor of the two nodes. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns a Node object or nil. # The result is unspecified for cyclic trees. def lowest_common_ancestor(node1, node2, root = nil) root ||= @root distance, route = @pathway.breadth_first_search(root) x = node1; r1 = [] begin; r1 << x; end while x = route[x] x = node2; r2 = [] begin; r2 << x; end while x = route[x] return (r1 & r2).first end # Calculates distance matrix of given nodes. # If _nodes_ is nil, or is ommited, it acts the same as # tree.distance_matrix(tree.leaves). # Returns a matrix object. # The result is unspecified for cyclic trees. # Note 1: The diagonal values of the matrix are 0. # Note 2: If the distance cannot be calculated, nil will be set. def distance_matrix(nodes = nil) nodes ||= self.leaves matrix = [] nodes.each_index do |i| row = [] nodes.each_index do |j| if i == j then distance = 0 elsif r = matrix[j] and val = r[i] then distance = val else distance = (self.distance(nodes[i], nodes[j]) rescue nil) end row << distance end matrix << row end Matrix.rows(matrix, false) end # Shows the adjacency matrix representation of the tree. # It shows matrix only for given nodes. # If _nodes_ is nil or is ommitted, # it acts the same as tree.adjacency_matrix(tree.nodes). # If a block is given, for each edge, # it yields _source_, _target_, and _edge_, and # uses the returned value of the block. # Without blocks, it uses edge. # Returns a matrix object. def adjacency_matrix(nodes = nil, default_value = nil, diagonal_value = nil) #:yields: source, target, edge nodes ||= self.nodes size = nodes.size hash = {} nodes.each_with_index { |x, i| hash[x] = i } # prepares an matrix matrix = Array.new(size, nil) matrix.collect! { |x| Array.new(size, default_value) } (0...size).each { |i| matrix[i][i] = diagonal_value } # fills the matrix from each edge self.each_edge do |source, target, edge| i_source = hash[source] i_target = hash[target] if i_source and i_target then val = block_given? ? (yield source, target, edge) : edge matrix[i_source][i_target] = val matrix[i_target][i_source] = val end end Matrix.rows(matrix, false) end # Removes all nodes that are not branches nor leaves. # That is, removes nodes connected with exactly two edges. # For each removed node, two adjacent edges are merged and # a new edge are created. # Returns removed nodes. # Note that orphan nodes are still kept unchanged. def remove_nonsense_nodes hash = {} self.each_node do |node| hash[node] = true if @pathway.graph[node].size == 2 end hash.each_key do |node| adjs = @pathway.graph[node].keys edges = @pathway.graph[node].values new_edge = get_edge_merged(edges[0], edges[1]) @pathway.graph[adjs[0]].delete(node) @pathway.graph[adjs[1]].delete(node) @pathway.graph.delete(node) @pathway.append(Bio::Relation.new(adjs[0], adjs[1], new_edge)) end #@pathway.to_relations @pathway.relations.reject! do |rel| hash[rel.node[0]] or hash[rel.node[1]] end return hash.keys end # Insert a new node between adjacent nodes node1 and node2. # The old edge between node1 and node2 are changed to the edge # between new_node and node2. # The edge between node1 and new_node is newly created. # # If new_distance is specified, the distance between # node1 and new_node is set to new_distance, and # distance between new_node and node2 is set to # tree.get_edge(node1, node2).distance - new_distance. # # Returns self. # If node1 and node2 are not adjacent, raises NotAdjacentNodesError. # # If new_node already exists in the tree, the tree would become # circular. In addition, if the edge between new_node and # node1 (or node2) already exists, it will be erased. def insert_node(node1, node2, new_node, new_distance = nil) unless edge = self.get_edge(node1, node2) then raise NotAdjacentNodesError, 'node1 and node2 are not adjacent.' end new_edge = Edge.new(new_distance) self.remove_edge(node1, node2) self.add_edge(node1, new_node, new_edge) if new_distance and old_distance = get_edge_distance(edge) then old_distance -= new_distance begin edge.distance = old_distance rescue NoMethodError edge = old_distance end end self.add_edge(new_node, node2, edge) self end end #class PhylogeneticTree end #module Bio #--- # temporary added #+++ require 'bio/db/newick' From ngoto at dev.open-bio.org Thu Oct 5 09:38:24 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 05 Oct 2006 13:38:24 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db newick.rb,NONE,1.1 Message-ID: <200610051338.k95DcOAx012291@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory dev.open-bio.org:/tmp/cvs-serv12262/lib/bio/db Added Files: newick.rb Log Message: * lib/bio/phylogenetictree.rb: Bio::PhylogeneticTree is phylogenetic tree data structure class. * lib/bio/db/newick.rb: Bio::Newick is the Newick Standard (aka. New Hampshire Format) phylogenetic tree parser. Some methods for formatting Newick output also exists in this file. --- NEW FILE: newick.rb --- # # = bio/db/newick.rb - Newick Standard phylogenetic tree parser / formatter # # Copyright:: Copyright (C) 2004-2006 # Naohisa Goto # Daniel Amelang # License:: Ruby's # # $Id: newick.rb,v 1.1 2006/10/05 13:38:22 ngoto Exp $ # module Bio class PhylogeneticTree #--- # newick output #+++ def __get_option(key, options) options[key] or (@options ? @options[key] : nil) end private :__get_option # formats leaf def __to_newick_format_leaf(node, edge, options) label = get_node_name(node).to_s dist = get_edge_distance_string(edge) bs = get_node_bootstrap_string(node) if __get_option(:branch_length_style, options) == :disabled dist = nil end case __get_option(:bootstrap_style, options) when :disabled label + (dist ? ":#{dist}" : '') when :molphy label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '') when :traditional label + (bs ? bs : '') + (dist ? ":#{dist}" : '') else # default: same as molphy style label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '') end end private :__to_newick_format_leaf # def __to_newick(parents, source, depth, options) result = [] indent0 = ' ' * depth indent = ' ' * (depth + 1) self.each_out_edge(source) do |src, tgt, edge| if parents.include?(tgt) then ;; elsif self.out_degree(tgt) == 1 then result << indent + __to_newick_format_leaf(tgt, edge, options) else result << __to_newick([ src ].concat(parents), tgt, depth + 1, options) + __to_newick_format_leaf(tgt, edge, options) end end indent0 + "(\n" + result.join(",\n") + (result.size > 0 ? "\n" : '') + indent0 + ')' end private :__to_newick # Returns a newick formatted string. def newick(options = {}) root = @root root ||= self.nodes.first return '();' unless root __to_newick([], root, 0, options) + __to_newick_format_leaf(root, Edge.new, options) + ";\n" end end #class PhylogeneticTree #--- # newick parser #+++ # Newick standard phylogenetic tree parser class. # # This is alpha version. Incompatible changes may be made frequently. class Newick # delemiter of the entry DELIMITER = RS = ";" # parse error class class ParseError < RuntimeError; end # same as Bio::PhylogeneticTree::Edge Edge = Bio::PhylogeneticTree::Edge # same as Bio::PhylogeneticTree::Node Node = Bio::PhylogeneticTree::Node # Creates a new Newick object. # _options_ for parsing can be set. # # Note: molphy-style bootstrap values are always parsed, even if # the options[:bootstrap_style] is set to :traditional or :disabled. # Note: By default, if all of the internal node's names are numeric # and there are no molphy-style boostrap values, # the names are regarded as bootstrap values. # options[:bootstrap_style] = :disabled or :molphy to disable the feature. def initialize(str, options = nil) str = str.sub(/\;(.*)/m, ';') @original_string = str @entry_overrun = $1 @options = (options or {}) end # parser options # (in some cases, options can be automatically set by the parser) attr_reader :options # original string before parsing attr_reader :original_string # string after this entry attr_reader :entry_overrun # Gets the tree. # Returns a Bio::PhylogeneticTree object. def tree if !defined?(@tree) @tree = __parse_newick(@original_string, @options) else @tree end end # Re-parses the tree from the original string. # Returns self. # This method is useful after changing parser options. def reparse remove_instance_variable(:tree) self.tree self end private # gets a option def __get_option(key, options) options[key] or (@options ? @options[key] : nil) end # Parses newick formatted leaf (or internal node) name. def __parse_newick_leaf(str, node, edge) case str when /(.*)\:(.*)\[(.*)\]/ node.name = $1 edge.distance_string = $2 if $2 and !($2.strip.empty?) node.bootstrap_string = $3 if $3 and !($3.strip.empty?) when /(.*)\[(.*)\]/ node.name = $1 node.bootstrap_string = $2 if $2 and !($2.strip.empty?) when /(.*)\:(.*)/ node.name = $1 edge.distance_string = $2 if $2 and !($2.strip.empty?) else node.name = str end true end # Parses newick formatted string. def __parse_newick(str, options = {}) # initializing root = Node.new cur_node = root edges = [] nodes = [ root ] internal_nodes = [] node_stack = [] # preparation of tokens str = str.chop if str[-1..-1] == ';' ary = str.split(/([\(\)\,])/) ary.collect! { |x| x.strip!; x.empty? ? nil : x } ary.compact! previous_token = nil # main loop while token = ary.shift #p token case token when ',' if previous_token == ',' or previous_token == '(' then # there is a leaf whose name is empty. ary.unshift(token) ary.unshift('') token = nil end when '(' node = Node.new nodes << node internal_nodes << node node_stack.push(cur_node) cur_node = node when ')' if previous_token == ',' or previous_token == '(' then # there is a leaf whose name is empty. ary.unshift(token) ary.unshift('') token = nil else edge = Edge.new next_token = ary[0] if next_token and next_token != ',' and next_token != ')' then __parse_newick_leaf(next_token, cur_node, edge) ary.shift end parent = node_stack.pop raise ParseError, 'unmatched parentheses' unless parent edges << Bio::Relation.new(parent, cur_node, edge) cur_node = parent end else leaf = Node.new edge = Edge.new __parse_newick_leaf(token, leaf, edge) nodes << leaf edges << Bio::Relation.new(cur_node, leaf, edge) end #case previous_token = token end #while raise ParseError, 'unmatched parentheses' unless node_stack.empty? bsopt = __get_option(:bootstrap_style, options) unless bsopt == :disabled or bsopt == :molphy then # If all of the internal node's names are numeric # and there are no molphy-style boostrap values, # the names are regarded as bootstrap values. flag = false internal_nodes.each do |node| if node.bootstrap unless __get_option(:bootstrap_style, options) == :traditional @options[:bootstrap_style] = :molphy end flag = false break end if node.name and !node.name.to_s.strip.empty? then if /\A[\+\-]?\d*\.?\d*\z/ =~ node.name flag = true else flag = false break end end end if flag then @options[:bootstrap_style] = :traditional internal_nodes.each do |node| if node.name then node.bootstrap_string = node.name node.name = nil end end end end # If the root implicitly prepared by the program is a leaf and # there are no additional information for the edge from the root to # the first internal node, the root is removed. if rel = edges[-1] and rel.node == [ root, internal_nodes[0] ] and rel.relation.instance_eval { !defined?(@distance) } and edges.find_all { |x| x.node.include?(root) }.size == 1 nodes.shift edges.pop end # Let the tree into instance variables tree = Bio::PhylogeneticTree.new tree.instance_eval { @pathway.relations.concat(edges) @pathway.to_list } tree.root = nodes[0] tree.options.update(@options) tree end end #class Newick end #module Bio From ngoto at dev.open-bio.org Thu Oct 5 09:38:24 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 05 Oct 2006 13:38:24 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio test_phylogenetictree.rb, NONE, 1.1 Message-ID: <200610051338.k95DcOLR012298@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio In directory dev.open-bio.org:/tmp/cvs-serv12262/test/unit/bio Added Files: test_phylogenetictree.rb Log Message: * lib/bio/phylogenetictree.rb: Bio::PhylogeneticTree is phylogenetic tree data structure class. * lib/bio/db/newick.rb: Bio::Newick is the Newick Standard (aka. New Hampshire Format) phylogenetic tree parser. Some methods for formatting Newick output also exists in this file. --- NEW FILE: test_phylogenetictree.rb --- # # = test/bio/test_phylogenetictree.rb - unit test for Bio::PhylogeneticTree # # Copyright:: Copyright (C) 2006 # Naohisa Goto # License:: Ruby's # # $Id: test_phylogenetictree.rb,v 1.1 2006/10/05 13:38:22 ngoto Exp $ # require 'test/unit' require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), [".."] * 3, "lib")).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'bio' require 'bio/phylogenetictree' module Bio class TestPhylogeneticTreeEdge < Test::Unit::TestCase def setup @obj = Bio::PhylogeneticTree::Edge.new(123.45) end def test_initialize assert_nothing_raised { Bio::PhylogeneticTree::Edge.new } assert_equal(1.23, Bio::PhylogeneticTree::Edge.new(1.23).distance) assert_equal(12.3, Bio::PhylogeneticTree::Edge.new('12.3').distance) end def test_distance assert_equal(123.45, @obj.distance) end def test_distance_string assert_equal("123.45", @obj.distance_string) end def test_distance=() @obj.distance = 678.9 assert_equal(678.9, @obj.distance) assert_equal("678.9", @obj.distance_string) @obj.distance = nil assert_equal(nil, @obj.distance) assert_equal(nil, @obj.distance_string) end def test_distance_string=() @obj.distance_string = "678.9" assert_equal(678.9, @obj.distance) assert_equal("678.9", @obj.distance_string) @obj.distance_string = nil assert_equal(nil, @obj.distance) assert_equal(nil, @obj.distance_string) end def test_inspect assert_equal("", @obj.inspect) end def test_to_s assert_equal("123.45", @obj.to_s) end end #class TestPhylogeneticTreeEdge class TestPhylogeneticTreeNode < Test::Unit::TestCase def setup @obj = Bio::PhylogeneticTree::Node.new end def test_initialize assert_nothing_raised { Bio::PhylogeneticTree::Node.new } a = nil assert_nothing_raised { a = Bio::PhylogeneticTree::Node.new('mouse') } assert_equal('mouse', a.name) end def test_name assert_equal(nil, @obj.name) @obj.name = 'human' assert_equal('human', @obj.name) end def test_bootstrap assert_equal(nil, @obj.bootstrap) end def test_bootstrap_string assert_equal(nil, @obj.bootstrap_string) end def test_bootstrap=() @obj.bootstrap = 98 assert_equal(98, @obj.bootstrap) assert_equal('98', @obj.bootstrap_string) @obj.bootstrap = nil assert_equal(nil, @obj.bootstrap) assert_equal(nil, @obj.bootstrap_string) end def test_bootstrap_string=() @obj.bootstrap_string = '98' assert_equal(98, @obj.bootstrap) assert_equal('98', @obj.bootstrap_string) @obj.bootstrap_string = '99.98' assert_equal(99.98, @obj.bootstrap) assert_equal('99.98', @obj.bootstrap_string) @obj.bootstrap = nil assert_equal(nil, @obj.bootstrap) assert_equal(nil, @obj.bootstrap_string) end def test_inspect @obj.name = 'human' assert_equal('(Node:"human")', @obj.inspect) @obj.bootstrap = 99.98 assert_equal('(Node:"human" bootstrap=99.98)', @obj.inspect) end def test_to_s @obj.name = 'human' assert_equal('human', @obj.to_s) end end #class TestPhylogeneticTreeNode class TestPhylogeneticTree < Test::Unit::TestCase def setup @tree = Bio::PhylogeneticTree.new end def test_get_edge_distance edge = Bio::PhylogeneticTree::Edge.new assert_equal(nil, @tree.get_edge_distance(edge)) edge = Bio::PhylogeneticTree::Edge.new(12.34) assert_equal(12.34, @tree.get_edge_distance(edge)) assert_equal(12.34, @tree.get_edge_distance(12.34)) end def test_get_edge_distance_string edge = Bio::PhylogeneticTree::Edge.new assert_equal(nil, @tree.get_edge_distance_string(edge)) edge = Bio::PhylogeneticTree::Edge.new(12.34) assert_equal("12.34", @tree.get_edge_distance_string(edge)) assert_equal("12.34", @tree.get_edge_distance_string(12.34)) end def test_get_node_name node = Bio::PhylogeneticTree::Node.new assert_equal(nil, @tree.get_node_name(node)) node.name = 'human' assert_equal('human', @tree.get_node_name(node)) end def test_initialize assert_nothing_raised { Bio::PhylogeneticTree.new } assert_nothing_raised { Bio::PhylogeneticTree.new(@tree) } end def test_root assert_equal(nil, @tree.root) end def test_root=() assert_equal(nil, @tree.root) node = Bio::PhylogeneticTree::Node.new @tree.root = node assert_equal(node, @tree.root) end def test_options assert_equal({}, @tree.options) @tree.options[:bootstrap_style] = :traditional assert_equal(:traditional, @tree.options[:bootstrap_style]) end end #class TestPhylogeneticTree class TestPhylogeneticTree2 < Test::Unit::TestCase def setup # Note that below data is NOT real. The distances are random. @tree = Bio::PhylogeneticTree.new mouse = Bio::PhylogeneticTree::Node.new('mouse') rat = Bio::PhylogeneticTree::Node.new('rat') rodents = Bio::PhylogeneticTree::Node.new('rodents') human = Bio::PhylogeneticTree::Node.new('human') chimpanzee = Bio::PhylogeneticTree::Node.new('chimpanzee') primates = Bio::PhylogeneticTree::Node.new('primates') mammals = Bio::PhylogeneticTree::Node.new('mammals') @tree.add_edge(rodents, mouse, Bio::PhylogeneticTree::Edge.new(0.0968)) @tree.add_edge(rodents, rat, Bio::PhylogeneticTree::Edge.new(0.1125)) @tree.add_edge(mammals, rodents, Bio::PhylogeneticTree::Edge.new(0.2560)) @tree.add_edge(primates, human, Bio::PhylogeneticTree::Edge.new(0.0386)) @tree.add_edge(primates, chimpanzee, Bio::PhylogeneticTree::Edge.new(0.0503)) @tree.add_edge(mammals, primates, Bio::PhylogeneticTree::Edge.new(0.2235)) @nodes = [ mouse, rat, rodents, human, chimpanzee, primates, mammals ] end def test_clear assert_nothing_raised { @tree.clear } assert_equal(0, @tree.number_of_nodes) assert_equal(0, @tree.number_of_edges) end def test_nodes nodes = @nodes.sort { |a, b| a.__id__ <=> b.__id__ } assert_equal(nodes, @tree.nodes.sort { |a, b| a.__id__ <=> b.__id__ }) end def test_number_of_nodes assert_equal(7, @tree.number_of_nodes) end def test_each_node end end #class TestPhylogeneticTree2 end #module Bio From ngoto at dev.open-bio.org Thu Oct 5 09:38:24 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 05 Oct 2006 13:38:24 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db test_newick.rb,NONE,1.1 Message-ID: <200610051338.k95DcOjV012303@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db In directory dev.open-bio.org:/tmp/cvs-serv12262/test/unit/bio/db Added Files: test_newick.rb Log Message: * lib/bio/phylogenetictree.rb: Bio::PhylogeneticTree is phylogenetic tree data structure class. * lib/bio/db/newick.rb: Bio::Newick is the Newick Standard (aka. New Hampshire Format) phylogenetic tree parser. Some methods for formatting Newick output also exists in this file. --- NEW FILE: test_newick.rb --- # # = test/bio/db/newick.rb - Unit test for Bio::Newick # # Copyright:: Copyright (C) 2004-2006 # Daniel Amelang # Naohisa Goto # License:: Ruby's # # $Id: test_newick.rb,v 1.1 2006/10/05 13:38:22 ngoto Exp $ # require 'test/unit' require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), [".."] * 3, "lib")).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'bio' require 'bio/phylogenetictree' require 'bio/db/newick' module Bio class TestNewick < Test::Unit::TestCase TREE_STRING = <<-END_OF_TREE_STRING ( ( HexLEZ35:0.00263, HexMCZ42:0.00788 ):0.00854, ( HexFLZ48:0.00457, ( HexFLZ83:0.00217, HexFLZ13:0.00574 ):0.00100 ):0.04692, HexLEZ73:0.00268 )[0.1250]; END_OF_TREE_STRING def test_string_tree newick = Bio::Newick.new(TREE_STRING) tree = newick.tree assert_equal(3, tree.children(tree.root).size) assert_equal(9, tree.descendents(tree.root).size) assert_equal(6, tree.leaves.size) leaf = tree.nodes.find { |x| x.name == 'HexFLZ83' } assert_equal(3, tree.ancestors(leaf).size) assert_equal(tree.path(tree.root, leaf)[1], tree.ancestors(leaf)[1]) assert_equal(0.00217, tree.get_edge(leaf, tree.parent(leaf)).distance) assert_equal("HexFLZ83", leaf.name) end end #class TestNewick end #module Bio From ngoto at dev.open-bio.org Fri Oct 6 05:53:40 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Fri, 06 Oct 2006 09:53:40 +0000 Subject: [BioRuby-cvs] bioruby ChangeLog,1.53,1.54 Message-ID: <200610060953.k969remd015631@dev.open-bio.org> Update of /home/repository/bioruby/bioruby In directory dev.open-bio.org:/tmp/cvs-serv15611 Modified Files: ChangeLog Log Message: Added Changelog (made by N.Goto) Index: ChangeLog =================================================================== RCS file: /home/repository/bioruby/bioruby/ChangeLog,v retrieving revision 1.53 retrieving revision 1.54 diff -C2 -d -r1.53 -r1.54 *** ChangeLog 27 Jun 2006 05:44:57 -0000 1.53 --- ChangeLog 6 Oct 2006 09:53:38 -0000 1.54 *************** *** 1,2 **** --- 1,38 ---- + 2006-10-05 Naohisa Goto + + * lib/bio/db/newick.rb + + Bio::Newick for Newick standard phylogenetic tree parser is + newly added (contributed by Daniel Amelang). + + * lib/bio/phylogenetictree.rb + + Bio::PhylogeneticTree for phylogenetic tree data structure + is newly added. + + 2006-07-14 Naohisa Goto + + * lib/bio/command.rb + + Bio::Command::Tools and Bio::Command::NetTools are combined + and re-constructed into a new Bio::Command module. + + lib/bio/appl/blast.rb, lib/bio/appl/fasta.rb, + lib/bio/appl/emboss.rb, lib/bio/appl/psort.rb, + lib/bio/appl/hmmer.rb, lib/bio/db/fantom.rb, + lib/bio/io/fastacmd.rb, lib/bio/io/fetch.rb, + lib/bio/io/keggapi.rb, lib/bio/io/pubmed.rb, and + lib/bio/io/registry.rb are changed to use the new Bio::Command + instead of old Bio::Command or Net::HTTP. + + 2006-06-29 Naohisa Goto + + * lib/bio/appl/blat/report.rb + + Bio::BLAT::Report::Hit#milli_bad, #percent_identity, #protein?, + #score, and #psl_version methods/attributes are newly added, + and psl files without headers are supported (discussed in + bioruby-ja ML). + 2006-06-27 Naohisa Goto *************** *** 8,11 **** --- 44,60 ---- is empty. + * lib/bio/db/pdb/pdb.rb + + Bio::PDB::ATOM#name, #resName, #iCode, #chaarge, #segID, and + #element are changed to strip whitespaces when initializing. + Bio::PDB::HETATM is also subject to the above changes. + (suggested by Mikael Borg) + + 2006-06-12 Naohisa Goto + + * lib/bio/io/flatfile.rb + + Bug fix: Bio::FlatFile.open(klass, filename) didn't work. + 2006-05-02 Mitsuteru Nakao *************** *** 14,17 **** --- 63,74 ---- Bio::PTS1 first commit. + 2006-04-30 Naohisa Goto + + * lib/bio/appl/blast/format0.rb + + Bug fix: parse error for hits whose database sequence names + contain 'Score', and subsequent hits after them would lost + (reported by Tomoaki NISHIYAMA). + 2006-04-14 Mitsuteru Nakao *************** *** 21,24 **** --- 78,92 ---- Browser. + 2006-03-22 Naohisa Goto + + * lib/bio/io/flatfile.rb + + Bug fix: Bio::FlatFile raises error for pipes, ARGF, etc. + The bug also affects bio/appl/mafft.rb, bio/appl/clustalw.rb, + bio/appl/blast.rb, bio/io/fastacmd.rb, and so on. + + Bio::FlatFile#entry_start_pos and #entry_ended_pos are + changed to be enabled only when Bio::FlatFile#entry_pos_flag + is true. 2006-02-27 Toshiaki Katayama From ngoto at dev.open-bio.org Fri Oct 6 10:18:53 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Fri, 06 Oct 2006 14:18:53 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio test_phylogenetictree.rb, 1.1, 1.2 Message-ID: <200610061418.k96EIrPw016007@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio In directory dev.open-bio.org:/tmp/cvs-serv15987/test/unit/bio Modified Files: test_phylogenetictree.rb Log Message: added tests Index: test_phylogenetictree.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/test_phylogenetictree.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** test_phylogenetictree.rb 5 Oct 2006 13:38:22 -0000 1.1 --- test_phylogenetictree.rb 6 Oct 2006 14:18:51 -0000 1.2 *************** *** 181,205 **** # Note that below data is NOT real. The distances are random. @tree = Bio::PhylogeneticTree.new ! mouse = Bio::PhylogeneticTree::Node.new('mouse') ! rat = Bio::PhylogeneticTree::Node.new('rat') ! rodents = Bio::PhylogeneticTree::Node.new('rodents') ! human = Bio::PhylogeneticTree::Node.new('human') ! chimpanzee = Bio::PhylogeneticTree::Node.new('chimpanzee') ! primates = Bio::PhylogeneticTree::Node.new('primates') ! mammals = Bio::PhylogeneticTree::Node.new('mammals') ! @tree.add_edge(rodents, mouse, ! Bio::PhylogeneticTree::Edge.new(0.0968)) ! @tree.add_edge(rodents, rat, ! Bio::PhylogeneticTree::Edge.new(0.1125)) ! @tree.add_edge(mammals, rodents, ! Bio::PhylogeneticTree::Edge.new(0.2560)) ! @tree.add_edge(primates, human, ! Bio::PhylogeneticTree::Edge.new(0.0386)) ! @tree.add_edge(primates, chimpanzee, ! Bio::PhylogeneticTree::Edge.new(0.0503)) ! @tree.add_edge(mammals, primates, ! Bio::PhylogeneticTree::Edge.new(0.2235)) @nodes = ! [ mouse, rat, rodents, human, chimpanzee, primates, mammals ] end --- 181,212 ---- # Note that below data is NOT real. The distances are random. @tree = Bio::PhylogeneticTree.new ! @mouse = Bio::PhylogeneticTree::Node.new('mouse') ! @rat = Bio::PhylogeneticTree::Node.new('rat') ! @rodents = Bio::PhylogeneticTree::Node.new('rodents') ! @human = Bio::PhylogeneticTree::Node.new('human') ! @chimpanzee = Bio::PhylogeneticTree::Node.new('chimpanzee') ! @primates = Bio::PhylogeneticTree::Node.new('primates') ! @mammals = Bio::PhylogeneticTree::Node.new('mammals') @nodes = ! [ @mouse, @rat, @rodents, @human, @chimpanzee, @primates, @mammals ] ! @edge_rodents_mouse = Bio::PhylogeneticTree::Edge.new(0.0968) ! @edge_rodents_rat = Bio::PhylogeneticTree::Edge.new(0.1125) ! @edge_mammals_rodents = Bio::PhylogeneticTree::Edge.new(0.2560) ! @edge_primates_human = Bio::PhylogeneticTree::Edge.new(0.0386) ! @edge_primates_chimpanzee = Bio::PhylogeneticTree::Edge.new(0.0503) ! @edge_mammals_primates = Bio::PhylogeneticTree::Edge.new(0.2235) ! @edges = [ ! [ @rodents, @mouse, @edge_rodents_mouse ], ! [ @rodents, @rat, @edge_rodents_rat ], ! [ @mammals, @rodents, @edge_mammals_rodents ], ! [ @primates, @human, @edge_primates_human ], ! [ @primates, @chimpanzee, @edge_primates_chimpanzee ], ! [ @mammals, @primates, @edge_mammals_primates ] ! ] ! @edges.each do |a| ! @tree.add_edge(*a) ! end ! ! @by_id = Proc.new { |a, b| a.__id__ <=> b.__id__ } end *************** *** 211,216 **** def test_nodes ! nodes = @nodes.sort { |a, b| a.__id__ <=> b.__id__ } ! assert_equal(nodes, @tree.nodes.sort { |a, b| a.__id__ <=> b.__id__ }) end --- 218,223 ---- def test_nodes ! nodes = @nodes.sort(&@by_id) ! assert_equal(nodes, @tree.nodes.sort(&@by_id)) end *************** *** 220,223 **** --- 227,406 ---- def test_each_node + @tree.each_node do |x| + assert_not_nil(@nodes.delete(x)) + end + assert_equal(true, @nodes.empty?) + end + + def test_each_edge + @tree.each_edge do |source, target, edge| + assert_not_nil(@edges.delete([ source, target, edge ])) + end + assert_equal(true, @edges.empty?) + end + + def test_edges + edges = @edges.sort { |a, b| a[-1].distance <=> b[-1].distance } + assert_equal(edges, + @tree.edges.sort { + |a, b| a[-1].distance <=> b[-1].distance }) + end + + def test_number_of_edges + assert_equal(@edges.size, @tree.number_of_edges) + end + + def test_adjacent_nodes + assert_equal([ @rodents ], @tree.adjacent_nodes(@mouse)) + assert_equal([ @rodents ], @tree.adjacent_nodes(@rat)) + assert_equal([ @primates ], @tree.adjacent_nodes(@human)) + assert_equal([ @primates ], @tree.adjacent_nodes(@chimpanzee)) + assert_equal([ @mouse, @rat, @mammals ].sort(&@by_id), + @tree.adjacent_nodes(@rodents).sort(&@by_id)) + assert_equal([ @human, @chimpanzee, @mammals ].sort(&@by_id), + @tree.adjacent_nodes(@primates).sort(&@by_id)) + assert_equal([ @rodents, @primates ].sort(&@by_id), + @tree.adjacent_nodes(@mammals).sort(&@by_id)) + # test for not existed nodes + assert_equal([], @tree.adjacent_nodes(Bio::PhylogeneticTree::Node.new)) + end + + def test_out_edges + assert_equal([[ @mouse, @rodents, @edge_rodents_mouse ]], + @tree.out_edges(@mouse)) + assert_equal([[ @rat, @rodents, @edge_rodents_rat ]], + @tree.out_edges(@rat)) + assert_equal([[ @human, @primates, @edge_primates_human ]], + @tree.out_edges(@human)) + assert_equal([[ @chimpanzee, @primates, @edge_primates_chimpanzee ]], + @tree.out_edges(@chimpanzee)) + + adjacents = [ @mouse, @rat, @mammals ] + edges = [ @edge_rodents_mouse, @edge_rodents_rat, @edge_mammals_rodents ] + @tree.out_edges(@rodents).each do |a| + assert_equal(@rodents, a[0]) + assert_not_nil(i = adjacents.index(a[1])) + assert_equal(edges[i], a[2]) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + adjacents = [ @human, @chimpanzee, @mammals ] + edges = [ @edge_primates_human, @edge_primates_chimpanzee, + @edge_mammals_primates ] + @tree.out_edges(@primates).each do |a| + assert_equal(@primates, a[0]) + assert_not_nil(i = adjacents.index(a[1])) + assert_equal(edges[i], a[2]) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + adjacents = [ @rodents, @primates ] + edges = [ @edge_mammals_rodents, @edge_mammals_primates ] + @tree.out_edges(@mammals).each do |a| + assert_equal(@mammals, a[0]) + assert_not_nil(i = adjacents.index(a[1])) + assert_equal(edges[i], a[2]) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + # test for not existed nodes + assert_equal([], @tree.out_edges(Bio::PhylogeneticTree::Node.new)) + end + + def test_each_out_edge + flag = nil + r = @tree.each_out_edge(@mouse) do |src, tgt, edge| + assert_equal(@mouse, src) + assert_equal(@rodents, tgt) + assert_equal(@edge_rodents_mouse, edge) + flag = true + end + assert_equal(@tree, r) + assert_equal(true, flag) + + flag = nil + r = @tree.each_out_edge(@rat) do |src, tgt, edge| + assert_equal(@rat, src) + assert_equal(@rodents, tgt) + assert_equal(@edge_rodents_rat, edge) + flag = true + end + assert_equal(@tree, r) + assert_equal(true, flag) + + flag = nil + r = @tree.each_out_edge(@human) do |src, tgt, edge| + assert_equal(@human, src) + assert_equal(@primates, tgt) + assert_equal(@edge_primates_human, edge) + flag = true + end + assert_equal(@tree, r) + assert_equal(true, flag) + + flag = nil + r = @tree.each_out_edge(@chimpanzee) do |src, tgt, edge| + assert_equal(@chimpanzee, src) + assert_equal(@primates, tgt) + assert_equal(@edge_primates_chimpanzee, edge) + flag = true + end + assert_equal(@tree, r) + assert_equal(true, flag) + + adjacents = [ @mouse, @rat, @mammals ] + edges = [ @edge_rodents_mouse, @edge_rodents_rat, @edge_mammals_rodents ] + @tree.each_out_edge(@rodents) do |src, tgt, edge| + assert_equal(@rodents, src) + assert_not_nil(i = adjacents.index(tgt)) + assert_equal(edges[i], edge) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + adjacents = [ @human, @chimpanzee, @mammals ] + edges = [ @edge_primates_human, @edge_primates_chimpanzee, + @edge_mammals_primates ] + @tree.each_out_edge(@primates) do |src, tgt, edge| + assert_equal(@primates, src) + assert_not_nil(i = adjacents.index(tgt)) + assert_equal(edges[i], edge) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + adjacents = [ @rodents, @primates ] + edges = [ @edge_mammals_rodents, @edge_mammals_primates ] + @tree.each_out_edge(@mammals) do |src, tgt, edge| + assert_equal(@mammals, src) + assert_not_nil(i = adjacents.index(tgt)) + assert_equal(edges[i], edge) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + # test for not existed nodes + flag = nil + node = Bio::PhylogeneticTree::Node.new + r = @tree.each_out_edge(node) do |src, tgt, edge| + flag = true + end + assert_equal(@tree, r) + assert_equal(nil, flag) end From nakao at dev.open-bio.org Thu Oct 5 07:39:32 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 05 Oct 2006 07:39:32 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db/embl sptr.rb,1.34,1.35 Message-ID: <200610050739.k957dWIZ011096@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/embl In directory dev.open-bio.org:/tmp/cvs-serv11054/lib/bio/db/embl Modified Files: sptr.rb Log Message: * Fixed Bio::SPTR bugs reported by SONDEREGGER Bernhard. - Now a leading uncapital letter OS line is acceptable. (cf. uniprot:Q32725). - Fixed regexp for parsing the OS line from uniprot:O63147. Index: sptr.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/sptr.rb,v retrieving revision 1.34 retrieving revision 1.35 diff -C2 -d -r1.34 -r1.35 *** sptr.rb 15 Jul 2006 15:29:26 -0000 1.34 --- sptr.rb 5 Oct 2006 07:39:29 -0000 1.35 *************** *** 293,301 **** # OS Genus species (name0), G s0 (name0), and G s (name0) (name1). # OS Homo sapiens (Human), and Rarrus norveticus (Rat) def os(num = nil) unless @data['OS'] os = Array.new fetch('OS').split(/, and|, /).each do |tmp| ! if tmp =~ /([A-Z][a-z]* *[\w\d \:\'\+\-]+[\w\d])/ org = $1 tmp =~ /(\(.+\))/ --- 293,303 ---- # OS Genus species (name0), G s0 (name0), and G s (name0) (name1). # OS Homo sapiens (Human), and Rarrus norveticus (Rat) + # OS Hippotis sp. Clark and Watts 825. + # OS unknown cyperaceous sp. def os(num = nil) unless @data['OS'] os = Array.new fetch('OS').split(/, and|, /).each do |tmp| ! if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/ org = $1 tmp =~ /(\(.+\))/ From nakao at dev.open-bio.org Thu Oct 5 07:39:32 2006 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Thu, 05 Oct 2006 07:39:32 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db/embl test_sptr.rb,1.4,1.5 Message-ID: <200610050739.k957dWg0011101@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db/embl In directory dev.open-bio.org:/tmp/cvs-serv11054/test/unit/bio/db/embl Modified Files: test_sptr.rb Log Message: * Fixed Bio::SPTR bugs reported by SONDEREGGER Bernhard. - Now a leading uncapital letter OS line is acceptable. (cf. uniprot:Q32725). - Fixed regexp for parsing the OS line from uniprot:O63147. Index: test_sptr.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/db/embl/test_sptr.rb,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** test_sptr.rb 15 Jul 2006 15:29:26 -0000 1.4 --- test_sptr.rb 5 Oct 2006 07:39:30 -0000 1.5 *************** *** 1758,1761 **** --- 1758,1775 ---- end + class TestOSLine < Test::Unit::TestCase + def test_uncapitalized_letter_Q32725_9POAL + data = "OS unknown cyperaceous sp.\n" + sp = SPTR.new(data) + assert_equal('unknown cyperaceous sp.', sp.os.first['os']) + end + + def test_period_trancation_O63147 + data = "OS Hippotis sp. Clark and Watts 825.\n" + sp = SPTR.new(data) + assert_equal('Hippotis sp. Clark and Watts 825.', sp.os.first['os']) + end + end + end # module Bio From ngoto at dev.open-bio.org Thu Oct 5 13:38:24 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 05 Oct 2006 13:38:24 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio phylogenetictree.rb,NONE,1.1 Message-ID: <200610051338.k95DcOfV012288@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory dev.open-bio.org:/tmp/cvs-serv12262/lib/bio Added Files: phylogenetictree.rb Log Message: * lib/bio/phylogenetictree.rb: Bio::PhylogeneticTree is phylogenetic tree data structure class. * lib/bio/db/newick.rb: Bio::Newick is the Newick Standard (aka. New Hampshire Format) phylogenetic tree parser. Some methods for formatting Newick output also exists in this file. --- NEW FILE: phylogenetictree.rb --- # # = bio/phylogenetictree.rb - phylogenetic tree data structure class # # Copyright:: Copyright (C) 2006 # Naohisa Goto # License:: Ruby's # # $Id: phylogenetictree.rb,v 1.1 2006/10/05 13:38:21 ngoto Exp $ # require 'matrix' require 'bio/pathway' module Bio # This is the class for phylogenetic tree. # It stores a phylogenetic tree. # # Internally, it is based on Bio::Pathway class. # However, users cannot handle Bio::Pathway object directly. # # This is alpha version. Incompatible changes may be made frequently. class PhylogeneticTree # Error when there are no path between specified nodes class NoPathError < RuntimeError; end # Error when the two nodes are not adjacent. class NotAdjacentNodesError < RuntimeError; end # Edge object of each node. # By default, the object doesn't contain any node information. class Edge # creates a new edge. def initialize(distance = nil) if distance.kind_of?(Numeric) self.distance = distance elsif distance self.distance_string = distance end end # evolutionary distance attr_reader :distance # evolutionary distance represented as a string attr_reader :distance_string # set evolutionary distance value def distance=(num) @distance = num @distance_string = (num ? num.to_s : num) end # set evolutionary distance value from a string def distance_string=(str) if str.to_s.strip.empty? @distance = nil @distance_string = str else @distance = str.to_f @distance_string = str end end # visualization of this object def inspect "" end # string representation of this object def to_s @distance_string.to_s end end #class Edge # Gets distance value from the given edge. # Returns float or any other numeric value or nil. def get_edge_distance(edge) begin dist = edge.distance rescue NoMethodError dist = edge end dist end # Gets distance string from the given edge. # Returns a string or nil. def get_edge_distance_string(edge) begin dist = edge.distance_string rescue NoMethodError dist = (edge ? edge.to_s : nil) end dist end # Returns edge1 + edge2 def get_edge_merged(edge1, edge2) dist1 = get_edge_distance(edge1) dist2 = get_edge_distance(edge2) if dist1 and dist2 then Edge.new(dist1 + dist2) elsif dist1 then Edge.new(dist1) elsif dist2 then Edge.new(dist2) else Edge.new end end # Node object. class Node # Creates a new node. def initialize(name = nil) @name = name if name end # name of the node attr_accessor :name # bootstrap value attr_reader :bootstrap # bootstrap value as a string attr_reader :bootstrap_string # sets a bootstrap value def bootstrap=(num) @bootstrap_string = (num ? num.to_s : num) @bootstrap = num end # sets a bootstrap value from a string def bootstrap_string=(str) if str.to_s.strip.empty? @bootstrap = nil @bootstrap_string = str else i = str.to_i f = str.to_f @bootstrap = (i == f ? i : f) @bootstrap_string = str end end # visualization of this object def inspect if @name and !@name.empty? then str = "(Node:#{@name.inspect}" else str = sprintf('(Node:%x', (self.__id__ << 1) & 0xffffffff) end str += " bootstrap=#{@bootstrap.inspect}" if @bootstrap str += ")" str end # string representation of this object def to_s @name.to_s end end #class Node # Gets node name def get_node_name(node) begin node.name rescue NoMethodError node.to_s end end def get_node_bootstrap(node) begin node.bootstrap rescue NoMethodError nil end end def get_node_bootstrap_string(node) begin node.bootstrap_string rescue NoMethodError nil end end # Creates a new phylogenetic tree. # When no arguments are given, it creates a new empty tree. # When a PhylogeneticTree object is given, it copies the tree. # Note that the new tree shares Node and Edge objects # with the given tree. def initialize(tree = nil) # creates an undirected adjacency list graph @pathway = Bio::Pathway.new([], true) @root = nil @options = {} self.concat(tree) if tree end # root node of this tree # (even if unrooted tree, it is used by some methods) attr_accessor :root # tree options; mainly used for tree output attr_accessor :options # Clears all nodes and edges. # Returns self. # Note that options and root are also cleared. def clear initialize self end # Returns all nodes as an array. def nodes @pathway.graph.keys end # Number of nodes. def number_of_nodes @pathway.nodes end # Iterates over each node of this tree. def each_node(&x) #:yields: node @pathway.graph.each_key(&x) self end # Iterates over each edges of this tree. def each_edge #:yields: source, target, edge @pathway.relations.each do |rel| yield rel.node[0], rel.node[1], rel.relation end self end # Returns all edges an array of [ node0, node1, edge ] def edges @pathway.relations.collect do |rel| [ rel.node[0], rel.node[1], rel.relation ] end end # Returns number of edges in the tree. def number_of_edges @pathway.relations.size end # Returns an array of adjacent nodes of the given node. def adjacent_nodes(node) h = @pathway.graph[node] h ? h.keys : [] end # Returns all connected edges with adjacent nodes. # Returns an array of the array [ source, target, edge ]. # # The reason why the method name is "out_edges" is that # it comes from the Boost Graph Library. def out_edges(source) h = @pathway.graph[source] if h h.collect { |key, val| [ source, key, val ] } else [] end end # Iterates over each connected edges of the given node. # Returns self. # # The reason why the method name is "each_out_edge" is that # it comes from the Boost Graph Library. def each_out_edge(source) #:yields: source, target, edge h = @pathway.graph[source] h.each { |key, val| yield source, key, val } if h self end # Returns number of edges in the given node. # # The reason why the method name is "out_degree" is that # it comes from the Boost Graph Library. def out_degree(source) h = @pathway.graph[source] h ? h.size : 0 end # Returns an edge from source to target. # If source and target are not adjacent nodes, returns nil. def get_edge(source, target) h = @pathway.graph[source] h ? h[target] : nil end # Adds a new edge to the tree. # Returns the newly added edge. # If the edge already exists, it is overwritten with new one. def add_edge(source, target, edge = Edge.new) @pathway.append(Bio::Relation.new(source, target, edge)) edge end # Adds a node to the tree. # Returns self. # If the node already exists, it does nothing. def add_node(node) @pathway.graph[node] ||= {} self end # If the node exists, returns true. # Otherwise, returns false. def include?(node) @pathway.graph[node] ? true : false end # Removes all edges connected with the node. # Returns self. # If the node does not exist, raises IndexError. def clear_node(node) unless self.include?(node) raise IndexError, 'the node does not exist' end @pathway.relations.delete_if do |rel| rel.node.include?(node) end @pathway.graph[node].each_key do |k| @pathway.graph[k].delete(node) end self end # Removes the given node from the tree. # All edges connected with the node are also removed. # Returns self. # If the node does not exist, raises IndexError. def remove_node(node) self.clear_node(node) @pathway.graph.delete(node) self end # Removes each node if the block returns not nil. # All edges connected with the removed nodes are also removed. # Returns self. def remove_node_if all = self.nodes all.each do |node| if yield node then self.clear_node(node) @pathway.graph.delete(node) end end self end # Removes an edge between source and target. #--- # If two or more edges exists between source and target, # all of them are removed. #+++ def remove_edge(source, target) fwd = [ source, target ] rev = [ target, source ] @pathway.relations.delete_if do |rel| rel.node == fwd or rel.node == rev end h = @pathway.graph[source] h.delete(target) if h h = @pathway.graph[target] h.delete(source) if h self end # Removes each edge if the block returns not nil. # Returns self. def remove_edge_if #:yields: source, target, edge removed_rel = [] @pathway.relations.delete_if do |rel| if yield rel.node[0], rel.node[1], edge then removed_rel << rel true end end removed_rel.each do |rel| source = rel[0] target = rel[1] h = @pathway.graph[source] h.delete(target) if h h = @pathway.graph[target] h.delete(source) if h end self end # Replaces each node by each block's return value. # Returns self. def collect_node! #:yields: node tr = {} self.each_node do |node| tr[node] = yield node end # replaces nodes in @pathway.relations @pathway.relations.each do |rel| rel.node.collect! { |node| tr[node] } end # re-generates @pathway from relations @pathway.to_list # adds orphan nodes tr.each_value do |newnode| @pathway.graph[newnode] ||= {} end self end # Replaces each edge by each block's return value. # Returns self. def collect_edge! #:yields: source, target, edge @pathway.relations.each do |rel| newedge = yield rel.node[0], rel.node[1], rel.relation rel.relation = newedge @pathway.append(rel, false) end self end # Gets the sub-tree consisted of given nodes. # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. # Returns a PhylogeneticTree object. # Note that the sub-tree shares Node and Edge objects # with the original tree. def subtree(nodes) nodes = nodes.find_all do |x| @pathway.graph[x] end return self.class.new if nodes.empty? # creates subtree new_tree = self.class.new nodes.each do |x| new_tree.add_node(x) end self.each_edge do |node1, node2, edge| if new_tree.include?(node1) and new_tree.include?(node2) then new_tree.add_edge(node1, node2, edge) end end return new_tree end # Gets the sub-tree consisted of given nodes and # all internal nodes connected between given nodes. # _nodes_ must be an array of nodes. # Nodes that do not exist in the original tree are ignored. # Returns a PhylogeneticTree object. # The result is unspecified for cyclic trees. # Note that the sub-tree shares Node and Edge objects # with the original tree. def subtree_with_all_paths(nodes) hash = {} nodes.each { |x| hash[x] = true } nodes.each_index do |i| node1 = nodes[i] (0...i).each do |j| node2 = nodes[j] unless node1 == node2 then begin path = self.path(node1, node2) rescue IndexError, NoPathError path = [] end path.each { |x| hash[x] = true } end end end self.subtree(hash.keys) end # Concatenates the other tree. # If the same edge exists, the edge in _other_ is used. # Returns self. # The result is unspecified if _other_ isn't a PhylogeneticTree object. # Note that the Node and Edge objects in the _other_ tree are # shared in the concatinated tree. def concat(other) #raise TypeError unless other.kind_of?(self.class) other.each_node do |node| self.add_node(node) end other.each_edge do |node1, node2, edge| self.add_edge(node1, node2, edge) end self end # Gets path from node1 to node2. # Retruns an array of nodes, including node1 and node2. # If node1 and/or node2 do not exist, IndexError is raised. # If node1 and node2 are not connected, NoPathError is raised. # The result is unspecified for cyclic trees. def path(node1, node2) raise IndexError, 'node1 not found' unless @pathway.graph[node1] raise IndexError, 'node2 not found' unless @pathway.graph[node2] return [ node1 ] if node1 == node2 step, path = @pathway.bfs_shortest_path(node1, node2) unless path[0] == node1 and path[-1] == node2 then raise NoPathError, 'node1 and node2 are not connected' end path end # Iterates over each edge from node1 to node2. # The result is unspecified for cyclic trees. def each_edge_in_path(node1, node2) path = self.path(node1, node2) source = path.shift path.each do |target| edge = self.get_edge(source, target) yield source, target, edge source = target end self end # Returns distance between node1 and node2. # It would raise error if the edges didn't contain distance values. # The result is unspecified for cyclic trees. def distance(node1, node2) distance = 0 self.each_edge_in_path(node1, node2) do |source, target, edge| distance += get_edge_distance(edge) end distance end # Gets the parent node of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an Node object or nil. # The result is unspecified for cyclic trees. def parent(node, root = nil) root ||= @root self.path(root, node)[-2] end # Gets the adjacent children nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def children(node, root = nil) root ||= @root path = self.path(root, node) result = self.adjacent_nodes(node) result -= path result end # Gets all descendent nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def descendents(node, root = nil) root ||= @root distance, route = @pathway.breadth_first_search(root) d = distance[node] result = [] distance.each do |key, val| if val > d then x = key while x = route[x] if x == node then result << key break end break if distance[x] <= d end end end result end # If _node_ is nil, returns an array of # all leaves (nodes connected with one edge). # Otherwise, gets all descendent leaf nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def leaves(node = nil, root = nil) unless node then nodes = [] self.each_node do |x| nodes << x if self.out_degree(x) == 1 end return nodes else root ||= @root self.descendents(node, root).find_all do |x| self.adjacent_nodes(x).size == 1 end end end # Gets all ancestral nodes of the _node_. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns an array of Nodes. # The result is unspecified for cyclic trees. def ancestors(node, root = nil) root ||= @root (self.path(root, node) - [ node ]).reverse end # Gets the lowest common ancestor of the two nodes. # If _root_ isn't specified or _root_ is nil, @root is used. # Returns a Node object or nil. # The result is unspecified for cyclic trees. def lowest_common_ancestor(node1, node2, root = nil) root ||= @root distance, route = @pathway.breadth_first_search(root) x = node1; r1 = [] begin; r1 << x; end while x = route[x] x = node2; r2 = [] begin; r2 << x; end while x = route[x] return (r1 & r2).first end # Calculates distance matrix of given nodes. # If _nodes_ is nil, or is ommited, it acts the same as # tree.distance_matrix(tree.leaves). # Returns a matrix object. # The result is unspecified for cyclic trees. # Note 1: The diagonal values of the matrix are 0. # Note 2: If the distance cannot be calculated, nil will be set. def distance_matrix(nodes = nil) nodes ||= self.leaves matrix = [] nodes.each_index do |i| row = [] nodes.each_index do |j| if i == j then distance = 0 elsif r = matrix[j] and val = r[i] then distance = val else distance = (self.distance(nodes[i], nodes[j]) rescue nil) end row << distance end matrix << row end Matrix.rows(matrix, false) end # Shows the adjacency matrix representation of the tree. # It shows matrix only for given nodes. # If _nodes_ is nil or is ommitted, # it acts the same as tree.adjacency_matrix(tree.nodes). # If a block is given, for each edge, # it yields _source_, _target_, and _edge_, and # uses the returned value of the block. # Without blocks, it uses edge. # Returns a matrix object. def adjacency_matrix(nodes = nil, default_value = nil, diagonal_value = nil) #:yields: source, target, edge nodes ||= self.nodes size = nodes.size hash = {} nodes.each_with_index { |x, i| hash[x] = i } # prepares an matrix matrix = Array.new(size, nil) matrix.collect! { |x| Array.new(size, default_value) } (0...size).each { |i| matrix[i][i] = diagonal_value } # fills the matrix from each edge self.each_edge do |source, target, edge| i_source = hash[source] i_target = hash[target] if i_source and i_target then val = block_given? ? (yield source, target, edge) : edge matrix[i_source][i_target] = val matrix[i_target][i_source] = val end end Matrix.rows(matrix, false) end # Removes all nodes that are not branches nor leaves. # That is, removes nodes connected with exactly two edges. # For each removed node, two adjacent edges are merged and # a new edge are created. # Returns removed nodes. # Note that orphan nodes are still kept unchanged. def remove_nonsense_nodes hash = {} self.each_node do |node| hash[node] = true if @pathway.graph[node].size == 2 end hash.each_key do |node| adjs = @pathway.graph[node].keys edges = @pathway.graph[node].values new_edge = get_edge_merged(edges[0], edges[1]) @pathway.graph[adjs[0]].delete(node) @pathway.graph[adjs[1]].delete(node) @pathway.graph.delete(node) @pathway.append(Bio::Relation.new(adjs[0], adjs[1], new_edge)) end #@pathway.to_relations @pathway.relations.reject! do |rel| hash[rel.node[0]] or hash[rel.node[1]] end return hash.keys end # Insert a new node between adjacent nodes node1 and node2. # The old edge between node1 and node2 are changed to the edge # between new_node and node2. # The edge between node1 and new_node is newly created. # # If new_distance is specified, the distance between # node1 and new_node is set to new_distance, and # distance between new_node and node2 is set to # tree.get_edge(node1, node2).distance - new_distance. # # Returns self. # If node1 and node2 are not adjacent, raises NotAdjacentNodesError. # # If new_node already exists in the tree, the tree would become # circular. In addition, if the edge between new_node and # node1 (or node2) already exists, it will be erased. def insert_node(node1, node2, new_node, new_distance = nil) unless edge = self.get_edge(node1, node2) then raise NotAdjacentNodesError, 'node1 and node2 are not adjacent.' end new_edge = Edge.new(new_distance) self.remove_edge(node1, node2) self.add_edge(node1, new_node, new_edge) if new_distance and old_distance = get_edge_distance(edge) then old_distance -= new_distance begin edge.distance = old_distance rescue NoMethodError edge = old_distance end end self.add_edge(new_node, node2, edge) self end end #class PhylogeneticTree end #module Bio #--- # temporary added #+++ require 'bio/db/newick' From ngoto at dev.open-bio.org Thu Oct 5 13:38:24 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 05 Oct 2006 13:38:24 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db newick.rb,NONE,1.1 Message-ID: <200610051338.k95DcOAx012291@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory dev.open-bio.org:/tmp/cvs-serv12262/lib/bio/db Added Files: newick.rb Log Message: * lib/bio/phylogenetictree.rb: Bio::PhylogeneticTree is phylogenetic tree data structure class. * lib/bio/db/newick.rb: Bio::Newick is the Newick Standard (aka. New Hampshire Format) phylogenetic tree parser. Some methods for formatting Newick output also exists in this file. --- NEW FILE: newick.rb --- # # = bio/db/newick.rb - Newick Standard phylogenetic tree parser / formatter # # Copyright:: Copyright (C) 2004-2006 # Naohisa Goto # Daniel Amelang # License:: Ruby's # # $Id: newick.rb,v 1.1 2006/10/05 13:38:22 ngoto Exp $ # module Bio class PhylogeneticTree #--- # newick output #+++ def __get_option(key, options) options[key] or (@options ? @options[key] : nil) end private :__get_option # formats leaf def __to_newick_format_leaf(node, edge, options) label = get_node_name(node).to_s dist = get_edge_distance_string(edge) bs = get_node_bootstrap_string(node) if __get_option(:branch_length_style, options) == :disabled dist = nil end case __get_option(:bootstrap_style, options) when :disabled label + (dist ? ":#{dist}" : '') when :molphy label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '') when :traditional label + (bs ? bs : '') + (dist ? ":#{dist}" : '') else # default: same as molphy style label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '') end end private :__to_newick_format_leaf # def __to_newick(parents, source, depth, options) result = [] indent0 = ' ' * depth indent = ' ' * (depth + 1) self.each_out_edge(source) do |src, tgt, edge| if parents.include?(tgt) then ;; elsif self.out_degree(tgt) == 1 then result << indent + __to_newick_format_leaf(tgt, edge, options) else result << __to_newick([ src ].concat(parents), tgt, depth + 1, options) + __to_newick_format_leaf(tgt, edge, options) end end indent0 + "(\n" + result.join(",\n") + (result.size > 0 ? "\n" : '') + indent0 + ')' end private :__to_newick # Returns a newick formatted string. def newick(options = {}) root = @root root ||= self.nodes.first return '();' unless root __to_newick([], root, 0, options) + __to_newick_format_leaf(root, Edge.new, options) + ";\n" end end #class PhylogeneticTree #--- # newick parser #+++ # Newick standard phylogenetic tree parser class. # # This is alpha version. Incompatible changes may be made frequently. class Newick # delemiter of the entry DELIMITER = RS = ";" # parse error class class ParseError < RuntimeError; end # same as Bio::PhylogeneticTree::Edge Edge = Bio::PhylogeneticTree::Edge # same as Bio::PhylogeneticTree::Node Node = Bio::PhylogeneticTree::Node # Creates a new Newick object. # _options_ for parsing can be set. # # Note: molphy-style bootstrap values are always parsed, even if # the options[:bootstrap_style] is set to :traditional or :disabled. # Note: By default, if all of the internal node's names are numeric # and there are no molphy-style boostrap values, # the names are regarded as bootstrap values. # options[:bootstrap_style] = :disabled or :molphy to disable the feature. def initialize(str, options = nil) str = str.sub(/\;(.*)/m, ';') @original_string = str @entry_overrun = $1 @options = (options or {}) end # parser options # (in some cases, options can be automatically set by the parser) attr_reader :options # original string before parsing attr_reader :original_string # string after this entry attr_reader :entry_overrun # Gets the tree. # Returns a Bio::PhylogeneticTree object. def tree if !defined?(@tree) @tree = __parse_newick(@original_string, @options) else @tree end end # Re-parses the tree from the original string. # Returns self. # This method is useful after changing parser options. def reparse remove_instance_variable(:tree) self.tree self end private # gets a option def __get_option(key, options) options[key] or (@options ? @options[key] : nil) end # Parses newick formatted leaf (or internal node) name. def __parse_newick_leaf(str, node, edge) case str when /(.*)\:(.*)\[(.*)\]/ node.name = $1 edge.distance_string = $2 if $2 and !($2.strip.empty?) node.bootstrap_string = $3 if $3 and !($3.strip.empty?) when /(.*)\[(.*)\]/ node.name = $1 node.bootstrap_string = $2 if $2 and !($2.strip.empty?) when /(.*)\:(.*)/ node.name = $1 edge.distance_string = $2 if $2 and !($2.strip.empty?) else node.name = str end true end # Parses newick formatted string. def __parse_newick(str, options = {}) # initializing root = Node.new cur_node = root edges = [] nodes = [ root ] internal_nodes = [] node_stack = [] # preparation of tokens str = str.chop if str[-1..-1] == ';' ary = str.split(/([\(\)\,])/) ary.collect! { |x| x.strip!; x.empty? ? nil : x } ary.compact! previous_token = nil # main loop while token = ary.shift #p token case token when ',' if previous_token == ',' or previous_token == '(' then # there is a leaf whose name is empty. ary.unshift(token) ary.unshift('') token = nil end when '(' node = Node.new nodes << node internal_nodes << node node_stack.push(cur_node) cur_node = node when ')' if previous_token == ',' or previous_token == '(' then # there is a leaf whose name is empty. ary.unshift(token) ary.unshift('') token = nil else edge = Edge.new next_token = ary[0] if next_token and next_token != ',' and next_token != ')' then __parse_newick_leaf(next_token, cur_node, edge) ary.shift end parent = node_stack.pop raise ParseError, 'unmatched parentheses' unless parent edges << Bio::Relation.new(parent, cur_node, edge) cur_node = parent end else leaf = Node.new edge = Edge.new __parse_newick_leaf(token, leaf, edge) nodes << leaf edges << Bio::Relation.new(cur_node, leaf, edge) end #case previous_token = token end #while raise ParseError, 'unmatched parentheses' unless node_stack.empty? bsopt = __get_option(:bootstrap_style, options) unless bsopt == :disabled or bsopt == :molphy then # If all of the internal node's names are numeric # and there are no molphy-style boostrap values, # the names are regarded as bootstrap values. flag = false internal_nodes.each do |node| if node.bootstrap unless __get_option(:bootstrap_style, options) == :traditional @options[:bootstrap_style] = :molphy end flag = false break end if node.name and !node.name.to_s.strip.empty? then if /\A[\+\-]?\d*\.?\d*\z/ =~ node.name flag = true else flag = false break end end end if flag then @options[:bootstrap_style] = :traditional internal_nodes.each do |node| if node.name then node.bootstrap_string = node.name node.name = nil end end end end # If the root implicitly prepared by the program is a leaf and # there are no additional information for the edge from the root to # the first internal node, the root is removed. if rel = edges[-1] and rel.node == [ root, internal_nodes[0] ] and rel.relation.instance_eval { !defined?(@distance) } and edges.find_all { |x| x.node.include?(root) }.size == 1 nodes.shift edges.pop end # Let the tree into instance variables tree = Bio::PhylogeneticTree.new tree.instance_eval { @pathway.relations.concat(edges) @pathway.to_list } tree.root = nodes[0] tree.options.update(@options) tree end end #class Newick end #module Bio From ngoto at dev.open-bio.org Thu Oct 5 13:38:24 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 05 Oct 2006 13:38:24 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio test_phylogenetictree.rb, NONE, 1.1 Message-ID: <200610051338.k95DcOLR012298@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio In directory dev.open-bio.org:/tmp/cvs-serv12262/test/unit/bio Added Files: test_phylogenetictree.rb Log Message: * lib/bio/phylogenetictree.rb: Bio::PhylogeneticTree is phylogenetic tree data structure class. * lib/bio/db/newick.rb: Bio::Newick is the Newick Standard (aka. New Hampshire Format) phylogenetic tree parser. Some methods for formatting Newick output also exists in this file. --- NEW FILE: test_phylogenetictree.rb --- # # = test/bio/test_phylogenetictree.rb - unit test for Bio::PhylogeneticTree # # Copyright:: Copyright (C) 2006 # Naohisa Goto # License:: Ruby's # # $Id: test_phylogenetictree.rb,v 1.1 2006/10/05 13:38:22 ngoto Exp $ # require 'test/unit' require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), [".."] * 3, "lib")).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'bio' require 'bio/phylogenetictree' module Bio class TestPhylogeneticTreeEdge < Test::Unit::TestCase def setup @obj = Bio::PhylogeneticTree::Edge.new(123.45) end def test_initialize assert_nothing_raised { Bio::PhylogeneticTree::Edge.new } assert_equal(1.23, Bio::PhylogeneticTree::Edge.new(1.23).distance) assert_equal(12.3, Bio::PhylogeneticTree::Edge.new('12.3').distance) end def test_distance assert_equal(123.45, @obj.distance) end def test_distance_string assert_equal("123.45", @obj.distance_string) end def test_distance=() @obj.distance = 678.9 assert_equal(678.9, @obj.distance) assert_equal("678.9", @obj.distance_string) @obj.distance = nil assert_equal(nil, @obj.distance) assert_equal(nil, @obj.distance_string) end def test_distance_string=() @obj.distance_string = "678.9" assert_equal(678.9, @obj.distance) assert_equal("678.9", @obj.distance_string) @obj.distance_string = nil assert_equal(nil, @obj.distance) assert_equal(nil, @obj.distance_string) end def test_inspect assert_equal("", @obj.inspect) end def test_to_s assert_equal("123.45", @obj.to_s) end end #class TestPhylogeneticTreeEdge class TestPhylogeneticTreeNode < Test::Unit::TestCase def setup @obj = Bio::PhylogeneticTree::Node.new end def test_initialize assert_nothing_raised { Bio::PhylogeneticTree::Node.new } a = nil assert_nothing_raised { a = Bio::PhylogeneticTree::Node.new('mouse') } assert_equal('mouse', a.name) end def test_name assert_equal(nil, @obj.name) @obj.name = 'human' assert_equal('human', @obj.name) end def test_bootstrap assert_equal(nil, @obj.bootstrap) end def test_bootstrap_string assert_equal(nil, @obj.bootstrap_string) end def test_bootstrap=() @obj.bootstrap = 98 assert_equal(98, @obj.bootstrap) assert_equal('98', @obj.bootstrap_string) @obj.bootstrap = nil assert_equal(nil, @obj.bootstrap) assert_equal(nil, @obj.bootstrap_string) end def test_bootstrap_string=() @obj.bootstrap_string = '98' assert_equal(98, @obj.bootstrap) assert_equal('98', @obj.bootstrap_string) @obj.bootstrap_string = '99.98' assert_equal(99.98, @obj.bootstrap) assert_equal('99.98', @obj.bootstrap_string) @obj.bootstrap = nil assert_equal(nil, @obj.bootstrap) assert_equal(nil, @obj.bootstrap_string) end def test_inspect @obj.name = 'human' assert_equal('(Node:"human")', @obj.inspect) @obj.bootstrap = 99.98 assert_equal('(Node:"human" bootstrap=99.98)', @obj.inspect) end def test_to_s @obj.name = 'human' assert_equal('human', @obj.to_s) end end #class TestPhylogeneticTreeNode class TestPhylogeneticTree < Test::Unit::TestCase def setup @tree = Bio::PhylogeneticTree.new end def test_get_edge_distance edge = Bio::PhylogeneticTree::Edge.new assert_equal(nil, @tree.get_edge_distance(edge)) edge = Bio::PhylogeneticTree::Edge.new(12.34) assert_equal(12.34, @tree.get_edge_distance(edge)) assert_equal(12.34, @tree.get_edge_distance(12.34)) end def test_get_edge_distance_string edge = Bio::PhylogeneticTree::Edge.new assert_equal(nil, @tree.get_edge_distance_string(edge)) edge = Bio::PhylogeneticTree::Edge.new(12.34) assert_equal("12.34", @tree.get_edge_distance_string(edge)) assert_equal("12.34", @tree.get_edge_distance_string(12.34)) end def test_get_node_name node = Bio::PhylogeneticTree::Node.new assert_equal(nil, @tree.get_node_name(node)) node.name = 'human' assert_equal('human', @tree.get_node_name(node)) end def test_initialize assert_nothing_raised { Bio::PhylogeneticTree.new } assert_nothing_raised { Bio::PhylogeneticTree.new(@tree) } end def test_root assert_equal(nil, @tree.root) end def test_root=() assert_equal(nil, @tree.root) node = Bio::PhylogeneticTree::Node.new @tree.root = node assert_equal(node, @tree.root) end def test_options assert_equal({}, @tree.options) @tree.options[:bootstrap_style] = :traditional assert_equal(:traditional, @tree.options[:bootstrap_style]) end end #class TestPhylogeneticTree class TestPhylogeneticTree2 < Test::Unit::TestCase def setup # Note that below data is NOT real. The distances are random. @tree = Bio::PhylogeneticTree.new mouse = Bio::PhylogeneticTree::Node.new('mouse') rat = Bio::PhylogeneticTree::Node.new('rat') rodents = Bio::PhylogeneticTree::Node.new('rodents') human = Bio::PhylogeneticTree::Node.new('human') chimpanzee = Bio::PhylogeneticTree::Node.new('chimpanzee') primates = Bio::PhylogeneticTree::Node.new('primates') mammals = Bio::PhylogeneticTree::Node.new('mammals') @tree.add_edge(rodents, mouse, Bio::PhylogeneticTree::Edge.new(0.0968)) @tree.add_edge(rodents, rat, Bio::PhylogeneticTree::Edge.new(0.1125)) @tree.add_edge(mammals, rodents, Bio::PhylogeneticTree::Edge.new(0.2560)) @tree.add_edge(primates, human, Bio::PhylogeneticTree::Edge.new(0.0386)) @tree.add_edge(primates, chimpanzee, Bio::PhylogeneticTree::Edge.new(0.0503)) @tree.add_edge(mammals, primates, Bio::PhylogeneticTree::Edge.new(0.2235)) @nodes = [ mouse, rat, rodents, human, chimpanzee, primates, mammals ] end def test_clear assert_nothing_raised { @tree.clear } assert_equal(0, @tree.number_of_nodes) assert_equal(0, @tree.number_of_edges) end def test_nodes nodes = @nodes.sort { |a, b| a.__id__ <=> b.__id__ } assert_equal(nodes, @tree.nodes.sort { |a, b| a.__id__ <=> b.__id__ }) end def test_number_of_nodes assert_equal(7, @tree.number_of_nodes) end def test_each_node end end #class TestPhylogeneticTree2 end #module Bio From ngoto at dev.open-bio.org Thu Oct 5 13:38:24 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Thu, 05 Oct 2006 13:38:24 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db test_newick.rb,NONE,1.1 Message-ID: <200610051338.k95DcOjV012303@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db In directory dev.open-bio.org:/tmp/cvs-serv12262/test/unit/bio/db Added Files: test_newick.rb Log Message: * lib/bio/phylogenetictree.rb: Bio::PhylogeneticTree is phylogenetic tree data structure class. * lib/bio/db/newick.rb: Bio::Newick is the Newick Standard (aka. New Hampshire Format) phylogenetic tree parser. Some methods for formatting Newick output also exists in this file. --- NEW FILE: test_newick.rb --- # # = test/bio/db/newick.rb - Unit test for Bio::Newick # # Copyright:: Copyright (C) 2004-2006 # Daniel Amelang # Naohisa Goto # License:: Ruby's # # $Id: test_newick.rb,v 1.1 2006/10/05 13:38:22 ngoto Exp $ # require 'test/unit' require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), [".."] * 3, "lib")).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'bio' require 'bio/phylogenetictree' require 'bio/db/newick' module Bio class TestNewick < Test::Unit::TestCase TREE_STRING = <<-END_OF_TREE_STRING ( ( HexLEZ35:0.00263, HexMCZ42:0.00788 ):0.00854, ( HexFLZ48:0.00457, ( HexFLZ83:0.00217, HexFLZ13:0.00574 ):0.00100 ):0.04692, HexLEZ73:0.00268 )[0.1250]; END_OF_TREE_STRING def test_string_tree newick = Bio::Newick.new(TREE_STRING) tree = newick.tree assert_equal(3, tree.children(tree.root).size) assert_equal(9, tree.descendents(tree.root).size) assert_equal(6, tree.leaves.size) leaf = tree.nodes.find { |x| x.name == 'HexFLZ83' } assert_equal(3, tree.ancestors(leaf).size) assert_equal(tree.path(tree.root, leaf)[1], tree.ancestors(leaf)[1]) assert_equal(0.00217, tree.get_edge(leaf, tree.parent(leaf)).distance) assert_equal("HexFLZ83", leaf.name) end end #class TestNewick end #module Bio From ngoto at dev.open-bio.org Fri Oct 6 09:53:40 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Fri, 06 Oct 2006 09:53:40 +0000 Subject: [BioRuby-cvs] bioruby ChangeLog,1.53,1.54 Message-ID: <200610060953.k969remd015631@dev.open-bio.org> Update of /home/repository/bioruby/bioruby In directory dev.open-bio.org:/tmp/cvs-serv15611 Modified Files: ChangeLog Log Message: Added Changelog (made by N.Goto) Index: ChangeLog =================================================================== RCS file: /home/repository/bioruby/bioruby/ChangeLog,v retrieving revision 1.53 retrieving revision 1.54 diff -C2 -d -r1.53 -r1.54 *** ChangeLog 27 Jun 2006 05:44:57 -0000 1.53 --- ChangeLog 6 Oct 2006 09:53:38 -0000 1.54 *************** *** 1,2 **** --- 1,38 ---- + 2006-10-05 Naohisa Goto + + * lib/bio/db/newick.rb + + Bio::Newick for Newick standard phylogenetic tree parser is + newly added (contributed by Daniel Amelang). + + * lib/bio/phylogenetictree.rb + + Bio::PhylogeneticTree for phylogenetic tree data structure + is newly added. + + 2006-07-14 Naohisa Goto + + * lib/bio/command.rb + + Bio::Command::Tools and Bio::Command::NetTools are combined + and re-constructed into a new Bio::Command module. + + lib/bio/appl/blast.rb, lib/bio/appl/fasta.rb, + lib/bio/appl/emboss.rb, lib/bio/appl/psort.rb, + lib/bio/appl/hmmer.rb, lib/bio/db/fantom.rb, + lib/bio/io/fastacmd.rb, lib/bio/io/fetch.rb, + lib/bio/io/keggapi.rb, lib/bio/io/pubmed.rb, and + lib/bio/io/registry.rb are changed to use the new Bio::Command + instead of old Bio::Command or Net::HTTP. + + 2006-06-29 Naohisa Goto + + * lib/bio/appl/blat/report.rb + + Bio::BLAT::Report::Hit#milli_bad, #percent_identity, #protein?, + #score, and #psl_version methods/attributes are newly added, + and psl files without headers are supported (discussed in + bioruby-ja ML). + 2006-06-27 Naohisa Goto *************** *** 8,11 **** --- 44,60 ---- is empty. + * lib/bio/db/pdb/pdb.rb + + Bio::PDB::ATOM#name, #resName, #iCode, #chaarge, #segID, and + #element are changed to strip whitespaces when initializing. + Bio::PDB::HETATM is also subject to the above changes. + (suggested by Mikael Borg) + + 2006-06-12 Naohisa Goto + + * lib/bio/io/flatfile.rb + + Bug fix: Bio::FlatFile.open(klass, filename) didn't work. + 2006-05-02 Mitsuteru Nakao *************** *** 14,17 **** --- 63,74 ---- Bio::PTS1 first commit. + 2006-04-30 Naohisa Goto + + * lib/bio/appl/blast/format0.rb + + Bug fix: parse error for hits whose database sequence names + contain 'Score', and subsequent hits after them would lost + (reported by Tomoaki NISHIYAMA). + 2006-04-14 Mitsuteru Nakao *************** *** 21,24 **** --- 78,92 ---- Browser. + 2006-03-22 Naohisa Goto + + * lib/bio/io/flatfile.rb + + Bug fix: Bio::FlatFile raises error for pipes, ARGF, etc. + The bug also affects bio/appl/mafft.rb, bio/appl/clustalw.rb, + bio/appl/blast.rb, bio/io/fastacmd.rb, and so on. + + Bio::FlatFile#entry_start_pos and #entry_ended_pos are + changed to be enabled only when Bio::FlatFile#entry_pos_flag + is true. 2006-02-27 Toshiaki Katayama From ngoto at dev.open-bio.org Fri Oct 6 14:18:53 2006 From: ngoto at dev.open-bio.org (Naohisa Goto) Date: Fri, 06 Oct 2006 14:18:53 +0000 Subject: [BioRuby-cvs] bioruby/test/unit/bio test_phylogenetictree.rb, 1.1, 1.2 Message-ID: <200610061418.k96EIrPw016007@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio In directory dev.open-bio.org:/tmp/cvs-serv15987/test/unit/bio Modified Files: test_phylogenetictree.rb Log Message: added tests Index: test_phylogenetictree.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/test_phylogenetictree.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** test_phylogenetictree.rb 5 Oct 2006 13:38:22 -0000 1.1 --- test_phylogenetictree.rb 6 Oct 2006 14:18:51 -0000 1.2 *************** *** 181,205 **** # Note that below data is NOT real. The distances are random. @tree = Bio::PhylogeneticTree.new ! mouse = Bio::PhylogeneticTree::Node.new('mouse') ! rat = Bio::PhylogeneticTree::Node.new('rat') ! rodents = Bio::PhylogeneticTree::Node.new('rodents') ! human = Bio::PhylogeneticTree::Node.new('human') ! chimpanzee = Bio::PhylogeneticTree::Node.new('chimpanzee') ! primates = Bio::PhylogeneticTree::Node.new('primates') ! mammals = Bio::PhylogeneticTree::Node.new('mammals') ! @tree.add_edge(rodents, mouse, ! Bio::PhylogeneticTree::Edge.new(0.0968)) ! @tree.add_edge(rodents, rat, ! Bio::PhylogeneticTree::Edge.new(0.1125)) ! @tree.add_edge(mammals, rodents, ! Bio::PhylogeneticTree::Edge.new(0.2560)) ! @tree.add_edge(primates, human, ! Bio::PhylogeneticTree::Edge.new(0.0386)) ! @tree.add_edge(primates, chimpanzee, ! Bio::PhylogeneticTree::Edge.new(0.0503)) ! @tree.add_edge(mammals, primates, ! Bio::PhylogeneticTree::Edge.new(0.2235)) @nodes = ! [ mouse, rat, rodents, human, chimpanzee, primates, mammals ] end --- 181,212 ---- # Note that below data is NOT real. The distances are random. @tree = Bio::PhylogeneticTree.new ! @mouse = Bio::PhylogeneticTree::Node.new('mouse') ! @rat = Bio::PhylogeneticTree::Node.new('rat') ! @rodents = Bio::PhylogeneticTree::Node.new('rodents') ! @human = Bio::PhylogeneticTree::Node.new('human') ! @chimpanzee = Bio::PhylogeneticTree::Node.new('chimpanzee') ! @primates = Bio::PhylogeneticTree::Node.new('primates') ! @mammals = Bio::PhylogeneticTree::Node.new('mammals') @nodes = ! [ @mouse, @rat, @rodents, @human, @chimpanzee, @primates, @mammals ] ! @edge_rodents_mouse = Bio::PhylogeneticTree::Edge.new(0.0968) ! @edge_rodents_rat = Bio::PhylogeneticTree::Edge.new(0.1125) ! @edge_mammals_rodents = Bio::PhylogeneticTree::Edge.new(0.2560) ! @edge_primates_human = Bio::PhylogeneticTree::Edge.new(0.0386) ! @edge_primates_chimpanzee = Bio::PhylogeneticTree::Edge.new(0.0503) ! @edge_mammals_primates = Bio::PhylogeneticTree::Edge.new(0.2235) ! @edges = [ ! [ @rodents, @mouse, @edge_rodents_mouse ], ! [ @rodents, @rat, @edge_rodents_rat ], ! [ @mammals, @rodents, @edge_mammals_rodents ], ! [ @primates, @human, @edge_primates_human ], ! [ @primates, @chimpanzee, @edge_primates_chimpanzee ], ! [ @mammals, @primates, @edge_mammals_primates ] ! ] ! @edges.each do |a| ! @tree.add_edge(*a) ! end ! ! @by_id = Proc.new { |a, b| a.__id__ <=> b.__id__ } end *************** *** 211,216 **** def test_nodes ! nodes = @nodes.sort { |a, b| a.__id__ <=> b.__id__ } ! assert_equal(nodes, @tree.nodes.sort { |a, b| a.__id__ <=> b.__id__ }) end --- 218,223 ---- def test_nodes ! nodes = @nodes.sort(&@by_id) ! assert_equal(nodes, @tree.nodes.sort(&@by_id)) end *************** *** 220,223 **** --- 227,406 ---- def test_each_node + @tree.each_node do |x| + assert_not_nil(@nodes.delete(x)) + end + assert_equal(true, @nodes.empty?) + end + + def test_each_edge + @tree.each_edge do |source, target, edge| + assert_not_nil(@edges.delete([ source, target, edge ])) + end + assert_equal(true, @edges.empty?) + end + + def test_edges + edges = @edges.sort { |a, b| a[-1].distance <=> b[-1].distance } + assert_equal(edges, + @tree.edges.sort { + |a, b| a[-1].distance <=> b[-1].distance }) + end + + def test_number_of_edges + assert_equal(@edges.size, @tree.number_of_edges) + end + + def test_adjacent_nodes + assert_equal([ @rodents ], @tree.adjacent_nodes(@mouse)) + assert_equal([ @rodents ], @tree.adjacent_nodes(@rat)) + assert_equal([ @primates ], @tree.adjacent_nodes(@human)) + assert_equal([ @primates ], @tree.adjacent_nodes(@chimpanzee)) + assert_equal([ @mouse, @rat, @mammals ].sort(&@by_id), + @tree.adjacent_nodes(@rodents).sort(&@by_id)) + assert_equal([ @human, @chimpanzee, @mammals ].sort(&@by_id), + @tree.adjacent_nodes(@primates).sort(&@by_id)) + assert_equal([ @rodents, @primates ].sort(&@by_id), + @tree.adjacent_nodes(@mammals).sort(&@by_id)) + # test for not existed nodes + assert_equal([], @tree.adjacent_nodes(Bio::PhylogeneticTree::Node.new)) + end + + def test_out_edges + assert_equal([[ @mouse, @rodents, @edge_rodents_mouse ]], + @tree.out_edges(@mouse)) + assert_equal([[ @rat, @rodents, @edge_rodents_rat ]], + @tree.out_edges(@rat)) + assert_equal([[ @human, @primates, @edge_primates_human ]], + @tree.out_edges(@human)) + assert_equal([[ @chimpanzee, @primates, @edge_primates_chimpanzee ]], + @tree.out_edges(@chimpanzee)) + + adjacents = [ @mouse, @rat, @mammals ] + edges = [ @edge_rodents_mouse, @edge_rodents_rat, @edge_mammals_rodents ] + @tree.out_edges(@rodents).each do |a| + assert_equal(@rodents, a[0]) + assert_not_nil(i = adjacents.index(a[1])) + assert_equal(edges[i], a[2]) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + adjacents = [ @human, @chimpanzee, @mammals ] + edges = [ @edge_primates_human, @edge_primates_chimpanzee, + @edge_mammals_primates ] + @tree.out_edges(@primates).each do |a| + assert_equal(@primates, a[0]) + assert_not_nil(i = adjacents.index(a[1])) + assert_equal(edges[i], a[2]) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + adjacents = [ @rodents, @primates ] + edges = [ @edge_mammals_rodents, @edge_mammals_primates ] + @tree.out_edges(@mammals).each do |a| + assert_equal(@mammals, a[0]) + assert_not_nil(i = adjacents.index(a[1])) + assert_equal(edges[i], a[2]) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + # test for not existed nodes + assert_equal([], @tree.out_edges(Bio::PhylogeneticTree::Node.new)) + end + + def test_each_out_edge + flag = nil + r = @tree.each_out_edge(@mouse) do |src, tgt, edge| + assert_equal(@mouse, src) + assert_equal(@rodents, tgt) + assert_equal(@edge_rodents_mouse, edge) + flag = true + end + assert_equal(@tree, r) + assert_equal(true, flag) + + flag = nil + r = @tree.each_out_edge(@rat) do |src, tgt, edge| + assert_equal(@rat, src) + assert_equal(@rodents, tgt) + assert_equal(@edge_rodents_rat, edge) + flag = true + end + assert_equal(@tree, r) + assert_equal(true, flag) + + flag = nil + r = @tree.each_out_edge(@human) do |src, tgt, edge| + assert_equal(@human, src) + assert_equal(@primates, tgt) + assert_equal(@edge_primates_human, edge) + flag = true + end + assert_equal(@tree, r) + assert_equal(true, flag) + + flag = nil + r = @tree.each_out_edge(@chimpanzee) do |src, tgt, edge| + assert_equal(@chimpanzee, src) + assert_equal(@primates, tgt) + assert_equal(@edge_primates_chimpanzee, edge) + flag = true + end + assert_equal(@tree, r) + assert_equal(true, flag) + + adjacents = [ @mouse, @rat, @mammals ] + edges = [ @edge_rodents_mouse, @edge_rodents_rat, @edge_mammals_rodents ] + @tree.each_out_edge(@rodents) do |src, tgt, edge| + assert_equal(@rodents, src) + assert_not_nil(i = adjacents.index(tgt)) + assert_equal(edges[i], edge) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + adjacents = [ @human, @chimpanzee, @mammals ] + edges = [ @edge_primates_human, @edge_primates_chimpanzee, + @edge_mammals_primates ] + @tree.each_out_edge(@primates) do |src, tgt, edge| + assert_equal(@primates, src) + assert_not_nil(i = adjacents.index(tgt)) + assert_equal(edges[i], edge) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + adjacents = [ @rodents, @primates ] + edges = [ @edge_mammals_rodents, @edge_mammals_primates ] + @tree.each_out_edge(@mammals) do |src, tgt, edge| + assert_equal(@mammals, src) + assert_not_nil(i = adjacents.index(tgt)) + assert_equal(edges[i], edge) + adjacents.delete_at(i) + edges.delete_at(i) + end + assert_equal(true, adjacents.empty?) + assert_equal(true, edges.empty?) + + # test for not existed nodes + flag = nil + node = Bio::PhylogeneticTree::Node.new + r = @tree.each_out_edge(node) do |src, tgt, edge| + flag = true + end + assert_equal(@tree, r) + assert_equal(nil, flag) end