[BioRuby-cvs] bioruby/lib/bio/db/kegg genes.rb,0.22,0.23

Tue Jul 25 19:12:34 UTC 2006

Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg
In directory dev.open-bio.org:/tmp/cvs-serv27986/lib/bio/db/kegg

Modified Files:
	genes.rb 
Log Message:
* changed to RDoc
* changed to use autoload
* 'keggclass' method is changed to the 'pathway' method (this field is
  renamed in the original database)
* removed splinks method (this field is obsoleted in the original database)
* chromosome method is slightly improved
* locations method is added to return Bio::Locations object when possible
* motifs method is added (this field is added in the original database)
* codon_usage method is renamed to cu_list method (and disabled the codon
  argument - use hash for this purpose) which returns an Array of codon usages
* cu method is renamed to codon_usage which returns a Hash of codon usage
* aalen and ntlen method is changed to return the numbers written in the entry
  (not the number calculated by the sequence length - use seq.length for this
  purpose)

Index: genes.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/genes.rb,v
retrieving revision 0.22
retrieving revision 0.23
diff -C2 -d -r0.22 -r0.23
*** genes.rb	9 Nov 2005 12:30:07 -0000	0.22
--- genes.rb	25 Jul 2006 19:12:32 -0000	0.23
***************
*** 1,293 ****
  #
! # bio/db/kegg/genes.rb - KEGG/GENES database class
  #
! #   Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k at bioruby.org>
  #
! #  This library is free software; you can redistribute it and/or
! #  modify it under the terms of the GNU Lesser General Public
! #  License as published by the Free Software Foundation; either
! #  version 2 of the License, or (at your option) any later version.
  #
- #  This library is distributed in the hope that it will be useful,
- #  but WITHOUT ANY WARRANTY; without even the implied warranty of
- #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- #  Lesser General Public License for more details.
  #
! #  You should have received a copy of the GNU Lesser General Public
! #  License along with this library; if not, write to the Free Software
! #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  #
! #  $Id$
  #
! 
! require 'bio/db'

  module Bio

!   class KEGG
! 
!     class GENES < KEGGDB

!       DELIMITER	= RS = "\n///\n"
!       TAGSIZE	= 12

!       def initialize(entry)
!         super(entry, TAGSIZE)
!       end

!       def entry
!         unless @data['ENTRY']
!           hash = Hash.new('')
!           if get('ENTRY').length > 30
!             e = get('ENTRY')
!             hash['id']       = e[12..29].strip
!             hash['division'] = e[30..39].strip
!             hash['organism'] = e[40..80].strip
!           end
!           @data['ENTRY'] = hash
!         end
!         @data['ENTRY']
!       end

-       def entry_id
-         entry['id']
-       end

!       def division
!         entry['division']			# CDS, tRNA etc.
        end

!       def organism
!         entry['organism']			# H.sapiens etc.
!       end

!       def name
!         field_fetch('NAME')
!       end

!       def genes
!         name.split(', ')
!       end

!       def gene
!         genes.first
!       end

!       def definition
!         field_fetch('DEFINITION')
!       end

!       def eclinks
! #       definition.slice(/\[EC:(.*?)\]/, 1)	# ruby >= 1.7
! #       definition.scan(/\[EC:(.*?)\]/).flatten
!         if /\[EC:(.*?)\]/.match(definition)
!           $1.split(/\s+/)
!         else
!           []
!         end
!       end

!       def splinks
! #       definition.slice(/\[SP:(.*?)\]/, 1)	# ruby >= 1.7
! #       definition.scan(/\[SP:(.*?)\]/).flatten
!         if /\[SP:(.*?)\]/.match(definition)
!           $1.split(/\s+/)
!         else
!           []
!         end
!       end

!       def keggclass
!         field_fetch('CLASS')
!       end

!       def pathways
!         keggclass.scan(/\[PATH:(.*?)\]/).flatten
!       end

!       def position
!         unless @data['POSITION']
!           @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
!         end
!         @data['POSITION']
!       end

!       def gbposition
!         position.sub(/.*?:/, '')
!       end

!       def chromosome
!         if position =~ /:/
!           position.sub(/:.*/, '')
!         else
!           nil
!         end
!       end

!       def dblinks
!         unless @data['DBLINKS']
!           hash = {}
!           get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
!             id_array = str.strip.split(/\s+/)
!             hash[db] = id_array
!           end
!           @data['DBLINKS'] = hash
!         end
!         @data['DBLINKS']		# Hash of Array of DB IDs in DBLINKS
!       end

!       def codon_usage(codon = nil)
!         unless @data['CODON_USAGE']
!           ary = []
!           get('CODON_USAGE').sub(/.*/,'').each_line do |line|	# cut 1st line
!             line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
!               ary.push(cu.to_i)
!             end
!           end
!           @data['CODON_USAGE'] = ary
!         end

!         if codon
!           h = { 't' => 0, 'c' => 1, 'a' => 2, 'g' => 3 }
!           x, y, z = codon.downcase.scan(/\w/)
!           codon_num = h[x] * 16 + h[y] * 4 + h[z]
!           @data['CODON_USAGE'][codon_num]	# CODON_USAGE of the codon
          else
!           return @data['CODON_USAGE']	# Array of CODON_USAGE (default)
!         end
!       end
! 
!       def cu
!         hash = Hash.new
!         list = codon_usage
!         base = %w(t c a g)
!         base.each_with_index do |x, i|
!           base.each_with_index do |y, j|
!             base.each_with_index do |z, k|
!               hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
!             end
!           end
!         end
!         return hash
!       end
! 
!       def aaseq
!         unless @data['AASEQ']
!           @data['AASEQ'] = Sequence::AA.new(fetch('AASEQ').gsub(/[\s\d\/]+/, ''))
          end
!         @data['AASEQ']
        end

!       def aalen
!         @data['AALEN'] = aaseq.length
        end

!       def ntseq
!         unless @data['NTSEQ']
!           @data['NTSEQ'] = Sequence::NA.new(fetch('NTSEQ').gsub(/[\s\d\/]+/, ''))
          end
-         @data['NTSEQ']
        end
!       alias naseq ntseq

!       def ntlen
!         @data['NTLEN'] = ntseq.length
        end
-       alias nalen ntlen
- 
      end
! 
    end

! end
! 
! 
! 
! if __FILE__ == $0

!   require 'bio/io/fetch'

!   e = Bio::Fetch.query('genes', 'b0002')
!   g = Bio::KEGG::GENES.new(e)

!   p g.entry
!   p g.entry_id
!   p g.division
!   p g.name
!   p g.gene
!   p g.definition
!   p g.keggclass
!   p g.position
!   p g.dblinks
!   p g.codon_usage
!   p g.cu
!   p g.aaseq
!   p g.aalen
!   p g.naseq
!   p g.nalen
!   p g.eclinks
!   p g.splinks
!   p g.pathways

  end

- =begin
- 
- = Bio::KEGG::GENES
- 
- === Initialize
- 
- --- Bio::KEGG::GENES.new
- 
- === ENTRY
- 
- --- Bio::KEGG::GENES#entry -> Hash
- --- Bio::KEGG::GENES#entry_id -> String
- --- Bio::KEGG::GENES#division -> String
- --- Bio::KEGG::GENES#organism -> String
- 
- === NAME
- 
- --- Bio::KEGG::GENES#name -> String
- --- Bio::KEGG::GENES#genes -> Array
- --- Bio::KEGG::GENES#gene -> String
- 
- === DEFINITION
- 
- --- Bio::KEGG::GENES#definition -> String
- --- Bio::KEGG::GENES#eclinks -> Array
- --- Bio::KEGG::GENES#splinks -> Array
- 
- === CLASS
- 
- --- Bio::KEGG::GENES#keggclass -> String
- --- Bio::KEGG::GENES#pathways -> Array
- 
- === POSITION
- 
- --- Bio::KEGG::GENES#position -> String
- 
- === DBLINKS
- 
- --- Bio::KEGG::GENES#dblinks -> Hash
- 
- === CODON_USAGE
- 
- --- Bio::KEGG::GENES#codon_usage(codon = nil) -> Array or Fixnum
- --- Bio::KEGG::GENES#cu -> Hash
- 
- === AASEQ

- --- Bio::KEGG::GENES#aaseq -> Bio::Sequence::AA
- --- Bio::KEGG::GENES#aalen -> Fixnum
- 
- === NTSEQ

- --- Bio::KEGG::GENES#ntseq -> Bio::Sequence::NA
- --- Bio::KEGG::GENES#naseq -> Bio::Sequence::NA
- --- Bio::KEGG::GENES#ntlen -> Fixnum
- --- Bio::KEGG::GENES#nalen -> Fixnum
- 
- =end
--- 1,259 ----
  #
! # = bio/db/kegg/genes.rb - KEGG/GENES database class
  #
! # Copyright::   Copyright (C) 2001, 2002, 2006
! #               Toshiaki Katayama <k at bioruby.org>
! # License::     Ruby's
  #
! # $Id$
  #
  #
! # == KEGG GENES parser
  #
! # See http://www.genome.jp/kegg/genes.html
  #
! #
! # === Examples
! # 
! #  require 'bio/io/fetch'
! #  entry_string = Bio::Fetch.query('genes', 'b0002')
! # 
! #  entry = Bio::KEGG::GENES.new(entry_string)
! # 
! #  # ENTRY
! #  p entry.entry       # => Hash
! #
! #  p entry.entry_id    # => String
! #  p entry.division    # => String
! #  p entry.organism    # => String
! # 
! #  # NAME
! #  p entry.name        # => String
! #  p entry.genes       # => Array
! #  p entry.gene        # => String
! # 
! #  # DEFINITION
! #  p entry.definition  # => String
! #  p entry.eclinks     # => Array
! # 
! #  # PATHWAY
! #  p entry.pathway     # => String
! #  p entry.pathways    # => Array
! # 
! #  # POSITION
! #  p entry.position    # => String
! #  p entry.chromosome  # => String
! #  p entry.gbposition  # => String
! #  p entry.locations   # => Bio::Locations
! #
! #  # MOTIF
! #  p entry.motif       # => Hash of Array
! #
! #  # DBLINKS
! #  p entry.dblinks     # => Hash of Array
! # 
! #  # CODON_USAGE
! #  p entry.codon_usage # => Hash
! #  p entry.cu_list     # => Array
! # 
! #  # AASEQ
! #  p entry.aaseq       # => Bio::Sequence::AA
! #  p entry.aalen       # => Fixnum
! # 
! #  # NTSEQ
! #  p entry.ntseq       # => Bio::Sequence::NA
! #  p entry.naseq       # => Bio::Sequence::NA
! #  p entry.ntlen       # => Fixnum
! #  p entry.nalen       # => Fixnum
! # 

  module Bio

!   autoload :KEGGDB,    'bio/db'
!   autoload :Locations, 'bio/location'
!   autoload :Sequence,  'bio/sequence'

! class KEGG

! class GENES < KEGGDB

+   DELIMITER	= RS = "\n///\n"
+   TAGSIZE	= 12

!   def initialize(entry)
!     super(entry, TAGSIZE)
!   end

!   def entry
!     unless @data['ENTRY']
!       hash = Hash.new('')
!       if get('ENTRY').length > 30
!         e = get('ENTRY')
!         hash['id']       = e[12..29].strip
!         hash['division'] = e[30..39].strip
!         hash['organism'] = e[40..80].strip
        end
+       @data['ENTRY'] = hash
+     end
+     @data['ENTRY']
+   end

!   def entry_id
!     entry['id']
!   end

!   def division
!     entry['division']			# CDS, tRNA etc.
!   end

!   def organism
!     entry['organism']			# H.sapiens etc.
!   end

!   def name
!     field_fetch('NAME')
!   end

!   def genes
!     name.split(', ')
!   end

!   def gene
!     genes.first
!   end

!   def definition
!     field_fetch('DEFINITION')
!   end

!   def eclinks
!     ec_list = definition.slice(/\[EC:(.*?)\]/, 1)
!     if ec_list
!       ec_list.strip.split(/\s+/)
!     else
!       []
!     end
!   end

!   def pathway
!     field_fetch('PATHWAY')
!   end

!   def pathways
!     pathway.scan(/\[PATH:(.*?)\]/).flatten
!   end

!   def position
!     unless @data['POSITION']
!       @data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
!     end
!     @data['POSITION']
!   end

!   def chromosome
!     if position[/:/]
!       position.sub(/:.*/, '')
!     elsif ! position[/\.\./]
!       position
!     else
!       nil
!     end
!   end

!   def gbposition
!     position.sub(/.*?:/, '')
!   end

!   def locations
!     Bio::Locations.new(gbposition)
!   end

!   def motif
!     unless @data['MOTIF']
!       hash = {}
!       db = nil
!       lines_fetch('MOTIF').each do |line|
!         if line[/^\S+:/]
!           db, str = line.split(/:/)
          else
!           str = line
          end
!         hash[db] ||= []
!         hash[db] += str.strip.split(/\s+/)
        end
+       @data['MOTIF'] = hash
+     end
+     @data['MOTIF']		# Hash of Array of IDs in MOTIF
+   end

!   def dblinks
!     unless @data['DBLINKS']
!       hash = {}
!       get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
!         id_array = str.strip.split(/\s+/)
!         hash[db] = id_array
        end
+       @data['DBLINKS'] = hash
+     end
+     @data['DBLINKS']		# Hash of Array of IDs in DBLINKS
+   end

!   def codon_usage(codon = nil)
!     unless @data['CODON_USAGE']
!       hash = Hash.new
!       list = cu_list
!       base = %w(t c a g)
!       base.each_with_index do |x, i|
!         base.each_with_index do |y, j|
!           base.each_with_index do |z, k|
!             hash["#{x}#{y}#{z}"] = list[i*16 + j*4 + k]
!           end
          end
        end
!       @data['CODON_USAGE'] = hash
!     end
!     @data['CODON_USAGE']
!   end

!   def cu_list
!     ary = []
!     get('CODON_USAGE').sub(/.*/,'').each_line do |line|	# cut 1st line
!       line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
!         ary.push(cu.to_i)
        end
      end
!     return ary
    end

!   def aaseq
!     unless @data['AASEQ']
!       @data['AASEQ'] = Bio::Sequence::AA.new(fetch('AASEQ').gsub(/\d+/, ''))
!     end
!     @data['AASEQ']
!   end

!   def aalen
!     fetch('AASEQ')[/\d+/].to_i
!   end

!   def ntseq
!     unless @data['NTSEQ']
!       @data['NTSEQ'] = Bio::Sequence::NA.new(fetch('NTSEQ').gsub(/\d+/, ''))
!     end
!     @data['NTSEQ']
!   end
!   alias naseq ntseq

!   def ntlen
!     fetch('NTSEQ')[/\d+/].to_i
!   end
!   alias nalen ntlen

  end

+ end # KEGG
+ end # Bio