[BioRuby-cvs] bioruby/lib/bio/db/kegg taxonomy.rb,1.1,1.2
Katayama Toshiaki
k at dev.open-bio.org
Mon Jul 9 10:29:18 UTC 2007
- Previous message: [BioRuby-cvs] bioruby/lib bio.rb,1.84,1.85
- Next message: [BioRuby-cvs] bioruby/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates bg.gif, NONE, 1.1 console.png, NONE, 1.1 _classes.rhtml, 1.1, 1.2 _log.rhtml, 1.1, 1.2 _methods.rhtml, 1.2, 1.3 _modules.rhtml, 1.1, 1.2 bioruby.css, 1.4, 1.5 bioruby.rhtml, 1.3, 1.4 bioruby_controller.rb, 1.5, 1.6 commands.rhtml, 1.1, 1.2 index.rhtml, 1.3, 1.4
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg
In directory dev.open-bio.org:/tmp/cvs-serv29953
Modified Files:
taxonomy.rb
Log Message:
* Comments translated into English.
Index: taxonomy.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/taxonomy.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** taxonomy.rb 9 Jul 2007 08:48:03 -0000 1.1
--- taxonomy.rb 9 Jul 2007 10:29:16 -0000 1.2
***************
*** 25,33 ****
def initialize(filename, orgs = [])
@tree = Hash.new
@path = Array.new
@leaves = Hash.new
! # ¥ë¡¼¥È¥Î¡¼¥É¤ò Genes ¤È¤¹¤ë
@root = 'Genes'
--- 25,39 ----
def initialize(filename, orgs = [])
+ # Stores the taxonomic tree as a linked list (implemented in Hash), so
+ # every node need to have unique name (key) to work correctly
@tree = Hash.new
+
+ # Also stores the taxonomic tree as a list of arrays (full path)
@path = Array.new
+
+ # Also stores all leaf nodes (organism codes) of every intermediate nodes
@leaves = Hash.new
! # tentative name for the root node (use accessor to change)
@root = 'Genes'
***************
*** 39,43 ****
next if line.strip.empty?
! # ¥¿¥¯¥½¥Î¥ß¡¼³¬ÁØ¹Ô - # ¤Î¸Ä¿ô¤Ç³¬Áؤò¥¤¥ó¥Ç¥ó¥È¤¹¤ë½èÍý
if line[/^#/]
level = line[/^#+/].length
--- 45,49 ----
next if line.strip.empty?
! # line for taxonomic hierarchy (indent according to the number of # marks)
if line[/^#/]
level = line[/^#+/].length
***************
*** 45,69 ****
hier[level] = sanitize(label)
! # À¸Êª¼ï¥ê¥¹¥È¹Ô - À¸Êª¼ï¥³¡¼¥É¤È¥¹¥È¥ì¥¤¥ó°ã¤¤¤ò¤Þ¤È¤á¤ë½èÍý
else
tax, org, name, desc = line.chomp.split("\t")
if orgs.nil? or orgs.empty? or orgs.include?(org)
species, strain, = name.split('_')
! # (0) species ̾¤¬Ä¾Á°¤Î¹Ô¤Î¤â¤Î¤ÈƱ¤¸¾ì¹ç¡¢¤½¤Î¥°¥ë¡¼¥×¤ËÄɲÃ
! # Gamma/enterobacteria ¤Ê¤É³ç¤ê¤¬Âç¤Þ¤«¤Ç¼ï¿ô¤Î¿¤¤¥°¥ë¡¼¥×¤ò
! # Ʊ¤¸¼ï̾¡Ê¥¹¥È¥ì¥¤¥ó°ã¤¤¡Ë¤´¤È¤Ë¥µ¥Ö¥°¥ë¡¼¥×²½¤¹¤ë¤Î¤¬ÌÜŪ
! # ex. E.coli, H.influenzae ¤Ê¤É
! # ¥È¥ê¥Ã¥¡¼¤ÊÉôʬ¡§
! # ¤â¤· species ̾¤¬¾å°ÌÃæ´Ö¥Î¡¼¥É¡Ê### ¹Ô¤Ê¤É¡Ë¤ÈƱ¤¸¡Ê´û½Ð¡Ë¤Ç¤¢¤ì¤Ð
! # Tree ¤ò Hash ¤Ç»ý¤Ä»ÅÍͤȥ³¥ó¥Õ¥ê¥¯¥È¤¹¤ë¤Î¤ÇÊÌ̾¤¬É¬Í×
! # (1) species ̾¤¬·ÏÅý¤Î°Û¤Ê¤ë¾å°ÌÃæ´Ö¥Î¡¼¥É̾¤ÈƱ¤¸¾ì¹ç
! # ¢ª ¤È¤ê¤¢¤¨¤º species ̾¤Ë _sp ¤ò¤Ä¤±¤Æ¥³¥ó¥Õ¥ê¥¯¥È¤òÈò¤±¤ë (1-1)
! # ¤¹¤Ç¤Ë _sp ¤â»È¤ï¤ì¤Æ¤¤¤ë¾ì¹ç¤Ï strain ̾¤ò»È¤¦ (1-2)
! # ex. Bacteria/Proteobacteria/Beta/T.denitrificans/tbd ¤È
# Bacteria/Proteobacteria/Epsilon/T.denitrificans_ATCC33889/tdn
! # -> Bacteria/Proteobacteria/Beta/T.denitrificans/tbd ¤È
# Bacteria/Proteobacteria/Epsilon/T.denitrificans_sp/tdn
! # (2) species ̾¤¬¾åµÃæ´Ö¥Î¡¼¥É̾¤ÈƱ¤¸¾ì¹ç
! # ¢ª ¤È¤ê¤¢¤¨¤º species ̾¤Ë _sp ¤ò¤Ä¤±¤Æ¥³¥ó¥Õ¥ê¥¯¥È¤òÈò¤±¤ë
# ex. Bacteria/Cyanobacgteria/Cyanobacteria_CYA/cya
# Bacteria/Cyanobacgteria/Cyanobacteria_CYB/cya
--- 51,79 ----
hier[level] = sanitize(label)
! # line for organims name (unify different strains of a species)
else
tax, org, name, desc = line.chomp.split("\t")
if orgs.nil? or orgs.empty? or orgs.include?(org)
species, strain, = name.split('_')
! # (0) Grouping of the strains of the same species.
! # If the name of species is the same as the previous line,
! # add the species to the same species group.
! # ex. Gamma/enterobacteria has a large number of organisms,
! # so sub grouping of strains is needed for E.coli strains etc.
! #
! # However, if the species name is already used, need to avoid
! # collision of species name as the current implementation stores
! # the tree as a Hash, which may cause the infinite loop.
! #
! # (1) If species name == the intermediate node of other lineage
! # Add '_sp' to the species name to avoid the conflict (1-1), and if
! # 'species_sp' is already taken, use 'species_strain' instead (1-2).
! # ex. Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
# Bacteria/Proteobacteria/Epsilon/T.denitrificans_ATCC33889/tdn
! # -> Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
# Bacteria/Proteobacteria/Epsilon/T.denitrificans_sp/tdn
! #
! # (2) If species name == the intermediate node of the same lineage
! # Add '_sp' to the species name to avoid the conflict.
# ex. Bacteria/Cyanobacgteria/Cyanobacteria_CYA/cya
# Bacteria/Cyanobacgteria/Cyanobacteria_CYB/cya
***************
*** 90,97 ****
end
end
! # hier ¤Ï [nil, Eukaryotes, Fungi, Ascomycetes, Saccharomycetes] ¤Ë
! # species ¤È org ¤Î [S_cerevisiae, sce] ¤ò²Ã¤¨¤¿·Á¼°
! hier[level+1] = species
! #hier[level+1] = sanitize(species)
hier[level+2] = org
ary = hier[1, level+2]
--- 100,107 ----
end
end
! # 'hier' is an array of the taxonomic tree + species and strain name.
! # ex. [nil, Eukaryotes, Fungi, Ascomycetes, Saccharomycetes] +
! # [S_cerevisiae, sce]
! hier[level+1] = species # sanitize(species)
hier[level+2] = org
ary = hier[1, level+2]
***************
*** 115,120 ****
end
! # root ¥Î¡¼¥É¤Î²¼¤Ë [node, subnode, subsubnode, ..., leaf] ¤Ê¥Ñ¥¹¤òÄɲÃ
! # ³ÆÃæ´Ö¥Î¡¼¥É¤¬»ÒÍ×ÁǤò¥Ï¥Ã¥·¥å¤ÇÊÝ»ý
def add_to_tree(ary)
parent = @root
--- 125,130 ----
end
! # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
! # and every intermediate nodes stores their child nodes as a Hash.
def add_to_tree(ary)
parent = @root
***************
*** 126,130 ****
end
! # ³ÆÃæ´Ö¥Î¡¼¥É¤ËÂбþ¤¹¤ë¥ê¡¼¥Õ¤Î¥ê¥¹¥È¤òÊÝ»ý
def add_to_leaves(ary)
leaf = ary.last
--- 136,141 ----
end
! # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
! # and stores leaf nodes to the every intermediate nodes as an Array.
def add_to_leaves(ary)
leaf = ary.last
***************
*** 135,173 ****
end
! # ³ÆÃæ´Ö¥Î¡¼¥É¤Þ¤Ç¤Î¥Ñ¥¹¤òÊÝ»ý
def add_to_path(ary)
@path << ary
end
! # ¿Æ¥Î¡¼¥É¤«¤é¸«¤Æ»Ò¥Î¡¼¥É¤¬Â¹¥Î¡¼¥É¤ò£±¤Ä¤·¤«»ý¤Ã¤Æ¤¤¤Ê¤¤¾ì¹ç¡¢
! # ¹¥Î¡¼¥É¤Î»Ò¶¡¡Ê¤Ò¹¡Ë¤ò¡¢»Ò¥Î¡¼¥É¤Î»Ò¡Ê¹¡Ë¤È¤¹¤ë
! #
! # ex.
! # Plants / Monocotyledons / grass family / osa --> Plants / Monocotyledons / osa
#
def compact(node = root)
! # »Ò¥Î¡¼¥É¤¬¤¢¤ê
if subnodes = @tree[node]
! # ¤½¤ì¤¾¤ì¤Î»Ò¥Î¡¼¥É¤Ë¤Ä¤¤¤Æ
subnodes.keys.each do |subnode|
- # ¹¥Î¡¼¥É¤ò¼èÆÀ
if subsubnodes = @tree[subnode]
! # ¹¥Î¡¼¥É¤Î¿ô¤¬ 1 ¤Ä¤Î¾ì¹ç
if subsubnodes.keys.size == 1
! # ¹¥Î¡¼¥É¤Î̾Á°¤ò¼èÆÀ
subsubnode = subsubnodes.keys.first
! # ¹¥Î¡¼¥É¤Î»Ò¶¡¤ò¼èÆÀ
if subsubsubnodes = @tree[subsubnode]
! # ¹¥Î¡¼¥É¤Î»Ò¶¡¤ò»Ò¥Î¡¼¥É¤Î»Ò¶¡¤Ë¤¹¤²¤«¤¨
@tree[subnode] = subsubsubnodes
! # ¹¥Î¡¼¥É¤òºï½ü
@tree[subnode].delete(subsubnode)
warn "--- compact: #{subsubnode} is replaced by #{subsubsubnodes}" if $DEBUG
! # ¿·¤·¤¤Â¹¥Î¡¼¥É¤Ç¤â compact ¤¬É¬Íפ«¤â¤·¤ì¤Ê¤¤¤¿¤á·«¤êÊÖ¤¹
retry
end
end
end
! # »Ò¥Î¡¼¥É¤ò¿Æ¥Î¡¼¥É¤È¤·¤ÆºÆµ¢
compact(subnode)
end
--- 146,185 ----
end
! # Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
! # and stores the path itself in an Array.
def add_to_path(ary)
@path << ary
end
! # Compaction of intermediate nodes of the resulted taxonomic tree.
! # - If child node has only one child node (grandchild), make the child of
! # grandchild as a grandchild.
! # ex.
! # Plants / Monocotyledons / grass family / osa
! # --> Plants / Monocotyledons / osa
#
def compact(node = root)
! # if the node has children
if subnodes = @tree[node]
! # obtain grandchildren for each child
subnodes.keys.each do |subnode|
if subsubnodes = @tree[subnode]
! # if the number of grandchild node is 1
if subsubnodes.keys.size == 1
! # obtain the name of the grandchild node
subsubnode = subsubnodes.keys.first
! # obtain the child of the grandchlid node
if subsubsubnodes = @tree[subsubnode]
! # make the child of grandchild node as a chlid of child node
@tree[subnode] = subsubsubnodes
! # delete grandchild node
@tree[subnode].delete(subsubnode)
warn "--- compact: #{subsubnode} is replaced by #{subsubsubnodes}" if $DEBUG
! # retry until new grandchild also needed to be compacted.
retry
end
end
end
! # repeat recurseively
compact(subnode)
end
***************
*** 175,199 ****
end
! # ¥ê¡¼¥Õ¥Î¡¼¥É¤¬£±¤Ä¤Î¾ì¹ç¡¢¿Æ¥Î¡¼¥É¤ò¥ê¡¼¥Õ¥Î¡¼¥É¤Ë¤¹¤²¤«¤¨¤ë
! #
! # ex.
! # Plants / Monocotyledons / osa --> Plants / osa
#
def reduce(node = root)
! # »Ò¥Î¡¼¥É¤¬¤¢¤ê
if subnodes = @tree[node]
! # ¤½¤ì¤¾¤ì¤Î»Ò¥Î¡¼¥É¤Ë¤Ä¤¤¤Æ
subnodes.keys.each do |subnode|
- # ¹¥Î¡¼¥É¤ò¼èÆÀ
if subsubnodes = @tree[subnode]
! # ¹¥Î¡¼¥É¤Î¿ô¤¬ 1 ¤Ä¤Î¾ì¹ç
if subsubnodes.keys.size == 1
! # ¹¥Î¡¼¥É¤Î̾Á°¤ò¼èÆÀ
subsubnode = subsubnodes.keys.first
! # ¹¥Î¡¼¥É¤¬¥ê¡¼¥Õ¤Î¾ì¹ç
unless @tree[subsubnode]
! # ¹¥Î¡¼¥É¤ò»Ò¥Î¡¼¥É¤Ë¤¹¤²¤«¤¨
@tree[node].update(subsubnodes)
! # »Ò¥Î¡¼¥É¤òºï½ü
@tree[node].delete(subnode)
warn "--- reduce: #{subnode} is replaced by #{subsubnode}" if $DEBUG
--- 187,212 ----
end
! # Reduction of the leaf node of the resulted taxonomic tree.
! # - If the parent node have only one leaf node, replace parent node
! # with the leaf node.
! # ex.
! # Plants / Monocotyledons / osa
! # --> Plants / osa
#
def reduce(node = root)
! # if the node has children
if subnodes = @tree[node]
! # obtain grandchildren for each child
subnodes.keys.each do |subnode|
if subsubnodes = @tree[subnode]
! # if the number of grandchild node is 1
if subsubnodes.keys.size == 1
! # obtain the name of the grandchild node
subsubnode = subsubnodes.keys.first
! # if the grandchild node is a leaf node
unless @tree[subsubnode]
! # make the grandchild node as a child node
@tree[node].update(subsubnodes)
! # delete child node
@tree[node].delete(subnode)
warn "--- reduce: #{subnode} is replaced by #{subsubnode}" if $DEBUG
***************
*** 201,205 ****
end
end
! # »Ò¥Î¡¼¥É¤ò¿Æ¥Î¡¼¥É¤È¤·¤ÆºÆµ¢
reduce(subnode)
end
--- 214,218 ----
end
end
! # repeat recursively
reduce(subnode)
end
***************
*** 207,212 ****
end
! # Í¿¤¨¤é¤ì¤¿¥Î¡¼¥É¤È¡¢»Ò¥Î¡¼¥É¤Î¥ê¥¹¥È¡ÊHash¡Ë¤ò¤¦¤±¤È¤ê¡¢
! # »Ò¥Î¡¼¥É¤Ë¤Ä¤¤¤Æ¥¤¥Æ¥ì¡¼¥·¥ç¥ó¤¹¤ë
def dfs(parent, &block)
if children = @tree[parent]
--- 220,225 ----
end
! # Traverse the taxonomic tree by the depth first search method
! # under the given (root or intermediate) node.
def dfs(parent, &block)
if children = @tree[parent]
***************
*** 218,222 ****
end
! # ¸½ºß¤Î³¬Áؤο¼¤µ¤â¥¤¥Æ¥ì¡¼¥·¥ç¥ó¤ËÅϤ¹
def dfs_with_level(parent, &block)
@level ||= 0
--- 231,236 ----
end
! # Similar to the dfs method but also passes the current level of the nest
! # to the iterator.
def dfs_with_level(parent, &block)
@level ||= 0
***************
*** 231,239 ****
end
! # ¥Ä¥ê¡¼¹½Â¤¤ò¥¢¥¹¥¡¼¥¢¡¼¥È¤Çɽ¼¨¤¹¤ë
def to_s
result = "#{@root}\n"
@tree[@root].keys.each do |node|
! result += subtree(node, " ")
end
return result
--- 245,253 ----
end
! # Convert the taxonomic tree structure to a simple ascii art.
def to_s
result = "#{@root}\n"
@tree[@root].keys.each do |node|
! result += ascii_tree(node, " ")
end
return result
***************
*** 242,252 ****
private
! # ¾åµ to_s ÍѤβ¼ÀÁ¤±¥á¥½¥Ã¥É
! def subtree(node, indent)
result = "#{indent}+- #{node}\n"
indent += " "
@tree[node].keys.each do |child|
if @tree[child]
! result += subtree(child, indent)
else
result += "#{indent}+- #{child}\n"
--- 256,266 ----
private
! # Helper method for the to_s method.
! def ascii_tree(node, indent)
result = "#{indent}+- #{node}\n"
indent += " "
@tree[node].keys.each do |child|
if @tree[child]
! result += ascii_tree(child, indent)
else
result += "#{indent}+- #{child}\n"
- Previous message: [BioRuby-cvs] bioruby/lib bio.rb,1.84,1.85
- Next message: [BioRuby-cvs] bioruby/lib/bio/shell/rails/vendor/plugins/generators/bioruby/templates bg.gif, NONE, 1.1 console.png, NONE, 1.1 _classes.rhtml, 1.1, 1.2 _log.rhtml, 1.1, 1.2 _methods.rhtml, 1.2, 1.3 _modules.rhtml, 1.1, 1.2 bioruby.css, 1.4, 1.5 bioruby.rhtml, 1.3, 1.4 bioruby_controller.rb, 1.5, 1.6 commands.rhtml, 1.1, 1.2 index.rhtml, 1.3, 1.4
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the bioruby-cvs
mailing list