[BioRuby-cvs] bioruby/lib/bio/db/kegg genes.rb,0.22,0.23
Katayama Toshiaki
k at dev.open-bio.org
Tue Jul 25 19:12:34 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg
In directory dev.open-bio.org:/tmp/cvs-serv27986/lib/bio/db/kegg
Modified Files:
genes.rb
Log Message:
* changed to RDoc
* changed to use autoload
* 'keggclass' method is changed to the 'pathway' method (this field is
renamed in the original database)
* removed splinks method (this field is obsoleted in the original database)
* chromosome method is slightly improved
* locations method is added to return Bio::Locations object when possible
* motifs method is added (this field is added in the original database)
* codon_usage method is renamed to cu_list method (and disabled the codon
argument - use hash for this purpose) which returns an Array of codon usages
* cu method is renamed to codon_usage which returns a Hash of codon usage
* aalen and ntlen method is changed to return the numbers written in the entry
(not the number calculated by the sequence length - use seq.length for this
purpose)
Index: genes.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/genes.rb,v
retrieving revision 0.22
retrieving revision 0.23
diff -C2 -d -r0.22 -r0.23
*** genes.rb 9 Nov 2005 12:30:07 -0000 0.22
--- genes.rb 25 Jul 2006 19:12:32 -0000 0.23
***************
*** 1,293 ****
#
! # bio/db/kegg/genes.rb - KEGG/GENES database class
#
! # Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k at bioruby.org>
#
! # This library is free software; you can redistribute it and/or
! # modify it under the terms of the GNU Lesser General Public
! # License as published by the Free Software Foundation; either
! # version 2 of the License, or (at your option) any later version.
#
- # This library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Lesser General Public License for more details.
#
! # You should have received a copy of the GNU Lesser General Public
! # License along with this library; if not, write to the Free Software
! # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! # $Id$
#
!
! require 'bio/db'
module Bio
! class KEGG
!
! class GENES < KEGGDB
! DELIMITER = RS = "\n///\n"
! TAGSIZE = 12
! def initialize(entry)
! super(entry, TAGSIZE)
! end
! def entry
! unless @data['ENTRY']
! hash = Hash.new('')
! if get('ENTRY').length > 30
! e = get('ENTRY')
! hash['id'] = e[12..29].strip
! hash['division'] = e[30..39].strip
! hash['organism'] = e[40..80].strip
! end
! @data['ENTRY'] = hash
! end
! @data['ENTRY']
! end
- def entry_id
- entry['id']
- end
! def division
! entry['division'] # CDS, tRNA etc.
end
! def organism
! entry['organism'] # H.sapiens etc.
! end
! def name
! field_fetch('NAME')
! end
! def genes
! name.split(', ')
! end
! def gene
! genes.first
! end
! def definition
! field_fetch('DEFINITION')
! end
! def eclinks
! # definition.slice(/\[EC:(.*?)\]/, 1) # ruby >= 1.7
! # definition.scan(/\[EC:(.*?)\]/).flatten
! if /\[EC:(.*?)\]/.match(definition)
! $1.split(/\s+/)
! else
! []
! end
! end
! def splinks
! # definition.slice(/\[SP:(.*?)\]/, 1) # ruby >= 1.7
! # definition.scan(/\[SP:(.*?)\]/).flatten
! if /\[SP:(.*?)\]/.match(definition)
! $1.split(/\s+/)
! else
! []
! end
! end
! def keggclass
! field_fetch('CLASS')
! end
! def pathways
! keggclass.scan(/\[PATH:(.*?)\]/).flatten
! end
! def position
! unless @data['POSITION']
! @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
! end
! @data['POSITION']
! end
! def gbposition
! position.sub(/.*?:/, '')
! end
! def chromosome
! if position =~ /:/
! position.sub(/:.*/, '')
! else
! nil
! end
! end
! def dblinks
! unless @data['DBLINKS']
! hash = {}
! get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
! id_array = str.strip.split(/\s+/)
! hash[db] = id_array
! end
! @data['DBLINKS'] = hash
! end
! @data['DBLINKS'] # Hash of Array of DB IDs in DBLINKS
! end
! def codon_usage(codon = nil)
! unless @data['CODON_USAGE']
! ary = []
! get('CODON_USAGE').sub(/.*/,'').each_line do |line| # cut 1st line
! line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
! ary.push(cu.to_i)
! end
! end
! @data['CODON_USAGE'] = ary
! end
! if codon
! h = { 't' => 0, 'c' => 1, 'a' => 2, 'g' => 3 }
! x, y, z = codon.downcase.scan(/\w/)
! codon_num = h[x] * 16 + h[y] * 4 + h[z]
! @data['CODON_USAGE'][codon_num] # CODON_USAGE of the codon
else
! return @data['CODON_USAGE'] # Array of CODON_USAGE (default)
! end
! end
!
! def cu
! hash = Hash.new
! list = codon_usage
! base = %w(t c a g)
! base.each_with_index do |x, i|
! base.each_with_index do |y, j|
! base.each_with_index do |z, k|
! hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
! end
! end
! end
! return hash
! end
!
! def aaseq
! unless @data['AASEQ']
! @data['AASEQ'] = Sequence::AA.new(fetch('AASEQ').gsub(/[\s\d\/]+/, ''))
end
! @data['AASEQ']
end
! def aalen
! @data['AALEN'] = aaseq.length
end
! def ntseq
! unless @data['NTSEQ']
! @data['NTSEQ'] = Sequence::NA.new(fetch('NTSEQ').gsub(/[\s\d\/]+/, ''))
end
- @data['NTSEQ']
end
! alias naseq ntseq
! def ntlen
! @data['NTLEN'] = ntseq.length
end
- alias nalen ntlen
-
end
!
end
! end
!
!
!
! if __FILE__ == $0
! require 'bio/io/fetch'
! e = Bio::Fetch.query('genes', 'b0002')
! g = Bio::KEGG::GENES.new(e)
! p g.entry
! p g.entry_id
! p g.division
! p g.name
! p g.gene
! p g.definition
! p g.keggclass
! p g.position
! p g.dblinks
! p g.codon_usage
! p g.cu
! p g.aaseq
! p g.aalen
! p g.naseq
! p g.nalen
! p g.eclinks
! p g.splinks
! p g.pathways
end
- =begin
-
- = Bio::KEGG::GENES
-
- === Initialize
-
- --- Bio::KEGG::GENES.new
-
- === ENTRY
-
- --- Bio::KEGG::GENES#entry -> Hash
- --- Bio::KEGG::GENES#entry_id -> String
- --- Bio::KEGG::GENES#division -> String
- --- Bio::KEGG::GENES#organism -> String
-
- === NAME
-
- --- Bio::KEGG::GENES#name -> String
- --- Bio::KEGG::GENES#genes -> Array
- --- Bio::KEGG::GENES#gene -> String
-
- === DEFINITION
-
- --- Bio::KEGG::GENES#definition -> String
- --- Bio::KEGG::GENES#eclinks -> Array
- --- Bio::KEGG::GENES#splinks -> Array
-
- === CLASS
-
- --- Bio::KEGG::GENES#keggclass -> String
- --- Bio::KEGG::GENES#pathways -> Array
-
- === POSITION
-
- --- Bio::KEGG::GENES#position -> String
-
- === DBLINKS
-
- --- Bio::KEGG::GENES#dblinks -> Hash
-
- === CODON_USAGE
-
- --- Bio::KEGG::GENES#codon_usage(codon = nil) -> Array or Fixnum
- --- Bio::KEGG::GENES#cu -> Hash
-
- === AASEQ
- --- Bio::KEGG::GENES#aaseq -> Bio::Sequence::AA
- --- Bio::KEGG::GENES#aalen -> Fixnum
-
- === NTSEQ
- --- Bio::KEGG::GENES#ntseq -> Bio::Sequence::NA
- --- Bio::KEGG::GENES#naseq -> Bio::Sequence::NA
- --- Bio::KEGG::GENES#ntlen -> Fixnum
- --- Bio::KEGG::GENES#nalen -> Fixnum
-
- =end
--- 1,259 ----
#
! # = bio/db/kegg/genes.rb - KEGG/GENES database class
#
! # Copyright:: Copyright (C) 2001, 2002, 2006
! # Toshiaki Katayama <k at bioruby.org>
! # License:: Ruby's
#
! # $Id$
#
#
! # == KEGG GENES parser
#
! # See http://www.genome.jp/kegg/genes.html
#
! #
! # === Examples
! #
! # require 'bio/io/fetch'
! # entry_string = Bio::Fetch.query('genes', 'b0002')
! #
! # entry = Bio::KEGG::GENES.new(entry_string)
! #
! # # ENTRY
! # p entry.entry # => Hash
! #
! # p entry.entry_id # => String
! # p entry.division # => String
! # p entry.organism # => String
! #
! # # NAME
! # p entry.name # => String
! # p entry.genes # => Array
! # p entry.gene # => String
! #
! # # DEFINITION
! # p entry.definition # => String
! # p entry.eclinks # => Array
! #
! # # PATHWAY
! # p entry.pathway # => String
! # p entry.pathways # => Array
! #
! # # POSITION
! # p entry.position # => String
! # p entry.chromosome # => String
! # p entry.gbposition # => String
! # p entry.locations # => Bio::Locations
! #
! # # MOTIF
! # p entry.motif # => Hash of Array
! #
! # # DBLINKS
! # p entry.dblinks # => Hash of Array
! #
! # # CODON_USAGE
! # p entry.codon_usage # => Hash
! # p entry.cu_list # => Array
! #
! # # AASEQ
! # p entry.aaseq # => Bio::Sequence::AA
! # p entry.aalen # => Fixnum
! #
! # # NTSEQ
! # p entry.ntseq # => Bio::Sequence::NA
! # p entry.naseq # => Bio::Sequence::NA
! # p entry.ntlen # => Fixnum
! # p entry.nalen # => Fixnum
! #
module Bio
! autoload :KEGGDB, 'bio/db'
! autoload :Locations, 'bio/location'
! autoload :Sequence, 'bio/sequence'
! class KEGG
! class GENES < KEGGDB
+ DELIMITER = RS = "\n///\n"
+ TAGSIZE = 12
! def initialize(entry)
! super(entry, TAGSIZE)
! end
! def entry
! unless @data['ENTRY']
! hash = Hash.new('')
! if get('ENTRY').length > 30
! e = get('ENTRY')
! hash['id'] = e[12..29].strip
! hash['division'] = e[30..39].strip
! hash['organism'] = e[40..80].strip
end
+ @data['ENTRY'] = hash
+ end
+ @data['ENTRY']
+ end
! def entry_id
! entry['id']
! end
! def division
! entry['division'] # CDS, tRNA etc.
! end
! def organism
! entry['organism'] # H.sapiens etc.
! end
! def name
! field_fetch('NAME')
! end
! def genes
! name.split(', ')
! end
! def gene
! genes.first
! end
! def definition
! field_fetch('DEFINITION')
! end
! def eclinks
! ec_list = definition.slice(/\[EC:(.*?)\]/, 1)
! if ec_list
! ec_list.strip.split(/\s+/)
! else
! []
! end
! end
! def pathway
! field_fetch('PATHWAY')
! end
! def pathways
! pathway.scan(/\[PATH:(.*?)\]/).flatten
! end
! def position
! unless @data['POSITION']
! @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
! end
! @data['POSITION']
! end
! def chromosome
! if position[/:/]
! position.sub(/:.*/, '')
! elsif ! position[/\.\./]
! position
! else
! nil
! end
! end
! def gbposition
! position.sub(/.*?:/, '')
! end
! def locations
! Bio::Locations.new(gbposition)
! end
! def motif
! unless @data['MOTIF']
! hash = {}
! db = nil
! lines_fetch('MOTIF').each do |line|
! if line[/^\S+:/]
! db, str = line.split(/:/)
else
! str = line
end
! hash[db] ||= []
! hash[db] += str.strip.split(/\s+/)
end
+ @data['MOTIF'] = hash
+ end
+ @data['MOTIF'] # Hash of Array of IDs in MOTIF
+ end
! def dblinks
! unless @data['DBLINKS']
! hash = {}
! get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
! id_array = str.strip.split(/\s+/)
! hash[db] = id_array
end
+ @data['DBLINKS'] = hash
+ end
+ @data['DBLINKS'] # Hash of Array of IDs in DBLINKS
+ end
! def codon_usage(codon = nil)
! unless @data['CODON_USAGE']
! hash = Hash.new
! list = cu_list
! base = %w(t c a g)
! base.each_with_index do |x, i|
! base.each_with_index do |y, j|
! base.each_with_index do |z, k|
! hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
! end
end
end
! @data['CODON_USAGE'] = hash
! end
! @data['CODON_USAGE']
! end
! def cu_list
! ary = []
! get('CODON_USAGE').sub(/.*/,'').each_line do |line| # cut 1st line
! line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
! ary.push(cu.to_i)
end
end
! return ary
end
! def aaseq
! unless @data['AASEQ']
! @data['AASEQ'] = Bio::Sequence::AA.new(fetch('AASEQ').gsub(/\d+/, ''))
! end
! @data['AASEQ']
! end
! def aalen
! fetch('AASEQ')[/\d+/].to_i
! end
! def ntseq
! unless @data['NTSEQ']
! @data['NTSEQ'] = Bio::Sequence::NA.new(fetch('NTSEQ').gsub(/\d+/, ''))
! end
! @data['NTSEQ']
! end
! alias naseq ntseq
! def ntlen
! fetch('NTSEQ')[/\d+/].to_i
! end
! alias nalen ntlen
end
+ end # KEGG
+ end # Bio
More information about the bioruby-cvs
mailing list