[BioRuby-cvs] bioruby/lib/bio/db/kegg taxonomy.rb,1.1,1.2

Katayama Toshiaki k at dev.open-bio.org
Mon Jul 9 10:29:18 UTC 2007


Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg
In directory dev.open-bio.org:/tmp/cvs-serv29953

Modified Files:
	taxonomy.rb 
Log Message:
* Comments translated into English.


Index: taxonomy.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/taxonomy.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** taxonomy.rb	9 Jul 2007 08:48:03 -0000	1.1
--- taxonomy.rb	9 Jul 2007 10:29:16 -0000	1.2
***************
*** 25,33 ****
  
    def initialize(filename, orgs = [])
      @tree = Hash.new
      @path = Array.new
      @leaves = Hash.new
  
!     # ¥ë¡¼¥È¥Î¡¼¥É¤ò Genes ¤È¤¹¤ë
      @root = 'Genes'
  
--- 25,39 ----
  
    def initialize(filename, orgs = [])
+     # Stores the taxonomic tree as a linked list (implemented in Hash), so
+     # every node need to have unique name (key) to work correctly
      @tree = Hash.new
+ 
+     # Also stores the taxonomic tree as a list of arrays (full path)
      @path = Array.new
+ 
+     # Also stores all leaf nodes (organism codes) of every intermediate nodes
      @leaves = Hash.new
  
!     # tentative name for the root node (use accessor to change)
      @root = 'Genes'
  
***************
*** 39,43 ****
        next if line.strip.empty?
  
!       # ¥¿¥¯¥½¥Î¥ß¡¼³¬ÁØ¹Ô - # ¤Î¸Ä¿ô¤Ç³¬Áؤò¥¤¥ó¥Ç¥ó¥È¤¹¤ë½èÍý
        if line[/^#/]
  	level = line[/^#+/].length
--- 45,49 ----
        next if line.strip.empty?
  
!       # line for taxonomic hierarchy (indent according to the number of # marks)
        if line[/^#/]
  	level = line[/^#+/].length
***************
*** 45,69 ****
  	hier[level] = sanitize(label)
  
!       # À¸Êª¼ï¥ê¥¹¥È¹Ô - À¸Êª¼ï¥³¡¼¥É¤È¥¹¥È¥ì¥¤¥ó°ã¤¤¤ò¤Þ¤È¤á¤ë½èÍý
        else
  	tax, org, name, desc = line.chomp.split("\t")
          if orgs.nil? or orgs.empty? or orgs.include?(org)
            species, strain, = name.split('_')
!           # (0) species ̾¤¬Ä¾Á°¤Î¹Ô¤Î¤â¤Î¤ÈƱ¤¸¾ì¹ç¡¢¤½¤Î¥°¥ë¡¼¥×¤ËÄɲÃ
!           #  Gamma/enterobacteria ¤Ê¤É³ç¤ê¤¬Âç¤Þ¤«¤Ç¼ï¿ô¤Î¿¤¤¥°¥ë¡¼¥×¤ò
!           #  Ʊ¤¸¼ï̾¡Ê¥¹¥È¥ì¥¤¥ó°ã¤¤¡Ë¤´¤È¤Ë¥µ¥Ö¥°¥ë¡¼¥×²½¤¹¤ë¤Î¤¬ÌÜŪ
!           #   ex. E.coli, H.influenzae ¤Ê¤É
!           # ¥È¥ê¥Ã¥­¡¼¤ÊÉôʬ¡§
!           #  ¤â¤· species ̾¤¬¾å°ÌÃæ´Ö¥Î¡¼¥É¡Ê### ¹Ô¤Ê¤É¡Ë¤ÈƱ¤¸¡Ê´û½Ð¡Ë¤Ç¤¢¤ì¤Ð
!           #  Tree ¤ò Hash ¤Ç»ý¤Ä»ÅÍͤȥ³¥ó¥Õ¥ê¥¯¥È¤¹¤ë¤Î¤ÇÊÌ̾¤¬É¬Í×
!           # (1) species ̾¤¬·ÏÅý¤Î°Û¤Ê¤ë¾å°ÌÃæ´Ö¥Î¡¼¥É̾¤ÈƱ¤¸¾ì¹ç
!           #   ¢ª ¤È¤ê¤¢¤¨¤º species ̾¤Ë _sp ¤ò¤Ä¤±¤Æ¥³¥ó¥Õ¥ê¥¯¥È¤òÈò¤±¤ë (1-1)
!           #      ¤¹¤Ç¤Ë _sp ¤â»È¤ï¤ì¤Æ¤¤¤ë¾ì¹ç¤Ï strain ̾¤ò»È¤¦ (1-2)
!           #   ex. Bacteria/Proteobacteria/Beta/T.denitrificans/tbd ¤È
            #       Bacteria/Proteobacteria/Epsilon/T.denitrificans_ATCC33889/tdn
!           #    -> Bacteria/Proteobacteria/Beta/T.denitrificans/tbd ¤È
            #       Bacteria/Proteobacteria/Epsilon/T.denitrificans_sp/tdn
!           # (2) species ̾¤¬¾åµ­Ãæ´Ö¥Î¡¼¥É̾¤ÈƱ¤¸¾ì¹ç
!           #   ¢ª ¤È¤ê¤¢¤¨¤º species ̾¤Ë _sp ¤ò¤Ä¤±¤Æ¥³¥ó¥Õ¥ê¥¯¥È¤òÈò¤±¤ë
            #   ex. Bacteria/Cyanobacgteria/Cyanobacteria_CYA/cya
            #       Bacteria/Cyanobacgteria/Cyanobacteria_CYB/cya
--- 51,79 ----
  	hier[level] = sanitize(label)
  
!       # line for organims name (unify different strains of a species)
        else
  	tax, org, name, desc = line.chomp.split("\t")
          if orgs.nil? or orgs.empty? or orgs.include?(org)
            species, strain, = name.split('_')
!           # (0) Grouping of the strains of the same species.
!           #  If the name of species is the same as the previous line,
!           #  add the species to the same species group.
!           #   ex. Gamma/enterobacteria has a large number of organisms,
!           #       so sub grouping of strains is needed for E.coli strains etc.
!           #
!           # However, if the species name is already used, need to avoid
!           # collision of species name as the current implementation stores
!           # the tree as a Hash, which may cause the infinite loop.
!           #
!           # (1) If species name == the intermediate node of other lineage
!           #  Add '_sp' to the species name to avoid the conflict (1-1), and if
!           #  'species_sp' is already taken, use 'species_strain' instead (1-2).
!           #   ex. Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
            #       Bacteria/Proteobacteria/Epsilon/T.denitrificans_ATCC33889/tdn
!           #    -> Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
            #       Bacteria/Proteobacteria/Epsilon/T.denitrificans_sp/tdn
!           #
!           # (2) If species name == the intermediate node of the same lineage
!           #  Add '_sp' to the species name to avoid the conflict.
            #   ex. Bacteria/Cyanobacgteria/Cyanobacteria_CYA/cya
            #       Bacteria/Cyanobacgteria/Cyanobacteria_CYB/cya
***************
*** 90,97 ****
              end
            end
!           # hier ¤Ï [nil, Eukaryotes, Fungi, Ascomycetes, Saccharomycetes] ¤Ë
!           # species ¤È org ¤Î [S_cerevisiae, sce] ¤ò²Ã¤¨¤¿·Á¼°
!           hier[level+1] = species
!           #hier[level+1] = sanitize(species)
            hier[level+2] = org
            ary = hier[1, level+2]
--- 100,107 ----
              end
            end
!           # 'hier' is an array of the taxonomic tree + species and strain name.
!           #  ex. [nil, Eukaryotes, Fungi, Ascomycetes, Saccharomycetes] +
!           #      [S_cerevisiae, sce]
!           hier[level+1] = species	# sanitize(species)
            hier[level+2] = org
            ary = hier[1, level+2]
***************
*** 115,120 ****
    end
  
!   # root ¥Î¡¼¥É¤Î²¼¤Ë [node, subnode, subsubnode, ..., leaf] ¤Ê¥Ñ¥¹¤òÄɲÃ
!   # ³ÆÃæ´Ö¥Î¡¼¥É¤¬»ÒÍ×ÁǤò¥Ï¥Ã¥·¥å¤ÇÊÝ»ý
    def add_to_tree(ary)
      parent = @root
--- 125,130 ----
    end
  
!   # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
!   # and every intermediate nodes stores their child nodes as a Hash.
    def add_to_tree(ary)
      parent = @root
***************
*** 126,130 ****
    end
  
!   # ³ÆÃæ´Ö¥Î¡¼¥É¤ËÂбþ¤¹¤ë¥ê¡¼¥Õ¤Î¥ê¥¹¥È¤òÊÝ»ý
    def add_to_leaves(ary)
      leaf = ary.last
--- 136,141 ----
    end
  
!   # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
!   # and stores leaf nodes to the every intermediate nodes as an Array.
    def add_to_leaves(ary)
      leaf = ary.last
***************
*** 135,173 ****
    end
  
!   # ³ÆÃæ´Ö¥Î¡¼¥É¤Þ¤Ç¤Î¥Ñ¥¹¤òÊÝ»ý
    def add_to_path(ary)
      @path << ary
    end
  
!   # ¿Æ¥Î¡¼¥É¤«¤é¸«¤Æ»Ò¥Î¡¼¥É¤¬Â¹¥Î¡¼¥É¤ò£±¤Ä¤·¤«»ý¤Ã¤Æ¤¤¤Ê¤¤¾ì¹ç¡¢
!   # ¹¥Î¡¼¥É¤Î»Ò¶¡¡Ê¤Ò¹¡Ë¤ò¡¢»Ò¥Î¡¼¥É¤Î»Ò¡Ê¹¡Ë¤È¤¹¤ë
!   #
!   # ex.
!   #  Plants / Monocotyledons / grass family / osa --> Plants / Monocotyledons / osa
    #
    def compact(node = root)
!     # »Ò¥Î¡¼¥É¤¬¤¢¤ê
      if subnodes = @tree[node]
!       # ¤½¤ì¤¾¤ì¤Î»Ò¥Î¡¼¥É¤Ë¤Ä¤¤¤Æ
        subnodes.keys.each do |subnode|
-         # ¹¥Î¡¼¥É¤ò¼èÆÀ
          if subsubnodes = @tree[subnode]
!           # ¹¥Î¡¼¥É¤Î¿ô¤¬ 1 ¤Ä¤Î¾ì¹ç
            if subsubnodes.keys.size == 1
!             # ¹¥Î¡¼¥É¤Î̾Á°¤ò¼èÆÀ
              subsubnode = subsubnodes.keys.first
!             # ¹¥Î¡¼¥É¤Î»Ò¶¡¤ò¼èÆÀ
              if subsubsubnodes = @tree[subsubnode]
!               # ¹¥Î¡¼¥É¤Î»Ò¶¡¤ò»Ò¥Î¡¼¥É¤Î»Ò¶¡¤Ë¤¹¤²¤«¤¨
                @tree[subnode] = subsubsubnodes
!               # ¹¥Î¡¼¥É¤òºï½ü
                @tree[subnode].delete(subsubnode)
                warn "--- compact: #{subsubnode} is replaced by #{subsubsubnodes}" if $DEBUG
!               # ¿·¤·¤¤Â¹¥Î¡¼¥É¤Ç¤â compact ¤¬É¬Íפ«¤â¤·¤ì¤Ê¤¤¤¿¤á·«¤êÊÖ¤¹
                retry
              end
            end
          end
!         # »Ò¥Î¡¼¥É¤ò¿Æ¥Î¡¼¥É¤È¤·¤ÆºÆµ¢
          compact(subnode)
        end
--- 146,185 ----
    end
  
!   # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
!   # and stores the path itself in an Array.
    def add_to_path(ary)
      @path << ary
    end
  
!   # Compaction of intermediate nodes of the resulted taxonomic tree.
!   #  - If child node has only one child node (grandchild), make the child of
!   #    grandchild as a grandchild.
!   #  ex.
!   #    Plants / Monocotyledons / grass family / osa
!   #    --> Plants / Monocotyledons / osa
    #
    def compact(node = root)
!     # if the node has children
      if subnodes = @tree[node]
!       # obtain grandchildren for each child
        subnodes.keys.each do |subnode|
          if subsubnodes = @tree[subnode]
!           # if the number of grandchild node is 1
            if subsubnodes.keys.size == 1
!             # obtain the name of the grandchild node
              subsubnode = subsubnodes.keys.first
!             # obtain the child of the grandchlid node
              if subsubsubnodes = @tree[subsubnode]
!               # make the child of grandchild node as a chlid of child node
                @tree[subnode] = subsubsubnodes
!               # delete grandchild node
                @tree[subnode].delete(subsubnode)
                warn "--- compact: #{subsubnode} is replaced by #{subsubsubnodes}" if $DEBUG
!               # retry until new grandchild also needed to be compacted.
                retry
              end
            end
          end
!         # repeat recurseively
          compact(subnode)
        end
***************
*** 175,199 ****
    end
  
!   # ¥ê¡¼¥Õ¥Î¡¼¥É¤¬£±¤Ä¤Î¾ì¹ç¡¢¿Æ¥Î¡¼¥É¤ò¥ê¡¼¥Õ¥Î¡¼¥É¤Ë¤¹¤²¤«¤¨¤ë
!   #
!   # ex.
!   #  Plants / Monocotyledons / osa --> Plants / osa
    #
    def reduce(node = root)
!     # »Ò¥Î¡¼¥É¤¬¤¢¤ê
      if subnodes = @tree[node]
!       # ¤½¤ì¤¾¤ì¤Î»Ò¥Î¡¼¥É¤Ë¤Ä¤¤¤Æ
        subnodes.keys.each do |subnode|
-         # ¹¥Î¡¼¥É¤ò¼èÆÀ
          if subsubnodes = @tree[subnode]
!           # ¹¥Î¡¼¥É¤Î¿ô¤¬ 1 ¤Ä¤Î¾ì¹ç
            if subsubnodes.keys.size == 1
!             # ¹¥Î¡¼¥É¤Î̾Á°¤ò¼èÆÀ
              subsubnode = subsubnodes.keys.first
!             # ¹¥Î¡¼¥É¤¬¥ê¡¼¥Õ¤Î¾ì¹ç
              unless @tree[subsubnode]
!               # ¹¥Î¡¼¥É¤ò»Ò¥Î¡¼¥É¤Ë¤¹¤²¤«¤¨
                @tree[node].update(subsubnodes)
!               # »Ò¥Î¡¼¥É¤òºï½ü
                @tree[node].delete(subnode)
                warn "--- reduce: #{subnode} is replaced by #{subsubnode}" if $DEBUG
--- 187,212 ----
    end
  
!   # Reduction of the leaf node of the resulted taxonomic tree.
!   #  - If the parent node have only one leaf node, replace parent node
!   #    with the leaf node.
!   #  ex.
!   #   Plants / Monocotyledons / osa
!   #   --> Plants / osa
    #
    def reduce(node = root)
!     # if the node has children
      if subnodes = @tree[node]
!       # obtain grandchildren for each child
        subnodes.keys.each do |subnode|
          if subsubnodes = @tree[subnode]
!           # if the number of grandchild node is 1
            if subsubnodes.keys.size == 1
!             # obtain the name of the grandchild node
              subsubnode = subsubnodes.keys.first
!             # if the grandchild node is a leaf node
              unless @tree[subsubnode]
!               # make the grandchild node as a child node
                @tree[node].update(subsubnodes)
!               # delete child node
                @tree[node].delete(subnode)
                warn "--- reduce: #{subnode} is replaced by #{subsubnode}" if $DEBUG
***************
*** 201,205 ****
            end
          end
!         # »Ò¥Î¡¼¥É¤ò¿Æ¥Î¡¼¥É¤È¤·¤ÆºÆµ¢
          reduce(subnode)
        end
--- 214,218 ----
            end
          end
!         # repeat recursively
          reduce(subnode)
        end
***************
*** 207,212 ****
    end
  
!   # Í¿¤¨¤é¤ì¤¿¥Î¡¼¥É¤È¡¢»Ò¥Î¡¼¥É¤Î¥ê¥¹¥È¡ÊHash¡Ë¤ò¤¦¤±¤È¤ê¡¢
!   # »Ò¥Î¡¼¥É¤Ë¤Ä¤¤¤Æ¥¤¥Æ¥ì¡¼¥·¥ç¥ó¤¹¤ë
    def dfs(parent, &block)
      if children = @tree[parent]
--- 220,225 ----
    end
  
!   # Traverse the taxonomic tree by the depth first search method
!   # under the given (root or intermediate) node.
    def dfs(parent, &block)
      if children = @tree[parent]
***************
*** 218,222 ****
    end
  
!   # ¸½ºß¤Î³¬Áؤο¼¤µ¤â¥¤¥Æ¥ì¡¼¥·¥ç¥ó¤ËÅϤ¹
    def dfs_with_level(parent, &block)
      @level ||= 0
--- 231,236 ----
    end
  
!   # Similar to the dfs method but also passes the current level of the nest
!   # to the iterator.
    def dfs_with_level(parent, &block)
      @level ||= 0
***************
*** 231,239 ****
    end
  
!   # ¥Ä¥ê¡¼¹½Â¤¤ò¥¢¥¹¥­¡¼¥¢¡¼¥È¤Çɽ¼¨¤¹¤ë
    def to_s
      result = "#{@root}\n"
      @tree[@root].keys.each do |node|
!       result += subtree(node, "  ")
      end
      return result
--- 245,253 ----
    end
  
!   # Convert the taxonomic tree structure to a simple ascii art.
    def to_s
      result = "#{@root}\n"
      @tree[@root].keys.each do |node|
!       result += ascii_tree(node, "  ")
      end
      return result
***************
*** 242,252 ****
    private
  
!   # ¾åµ­ to_s ÍѤβ¼ÀÁ¤±¥á¥½¥Ã¥É
!   def subtree(node, indent)
      result = "#{indent}+- #{node}\n"
      indent += "  "
      @tree[node].keys.each do |child|
        if @tree[child]
!         result += subtree(child, indent)
        else
          result += "#{indent}+- #{child}\n"
--- 256,266 ----
    private
  
!   # Helper method for the to_s method.
!   def ascii_tree(node, indent)
      result = "#{indent}+- #{node}\n"
      indent += "  "
      @tree[node].keys.each do |child|
        if @tree[child]
!         result += ascii_tree(child, indent)
        else
          result += "#{indent}+- #{child}\n"




More information about the bioruby-cvs mailing list