[BioRuby-cvs] bioruby/lib/bio/db/embl embl.rb,1.21,1.22

Mitsuteru C. Nakao nakao at pub.open-bio.org
Sun Oct 23 05:59:50 EDT 2005


Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory pub.open-bio.org:/tmp/cvs-serv18980/bio/db/embl

Modified Files:
	embl.rb 
Log Message:
* Changed to RDocc format. 
* Bio::EMBL::Common changed to Bio::EMBLDB::Common.


Index: embl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/embl.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** embl.rb	24 Sep 2005 01:15:56 -0000	1.21
--- embl.rb	23 Oct 2005 09:59:48 -0000	1.22
***************
*** 1,6 ****
  #
! # bio/db/embl/embl.rb - EMBL database class
  #
! #   Copyright (C) 2001, 2002 Mitsuteru C. Nakao <n at bioruby.org>
  #
  #  This library is free software; you can redistribute it and/or
--- 1,26 ----
  #
! # = bio/db/embl/embl.rb - EMBL database class
  #
! # 
! # Author::      Mitsuteru C. Nakao <n at bioruby.org>
! # Copyright::   Copyright (C) 2001-2005 BioRuby Project
! # License::     LGPL
! #
! # $Id$
! #
! # == EMBL database entry
! #
! #
! #
! # == Example
! # 
! # emb = Bio::EMBL.new($<.read)
! # emb.entry_id
! # emb.each_cds do |cds|
! #   cds
! # end
! # emb.seq
! #
! #--
  #
  #  This library is free software; you can redistribute it and/or
***************
*** 18,22 ****
  #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  #
! #  $Id$
  #
  
--- 38,42 ----
  #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  #
! #++
  #
  
***************
*** 25,34 ****
  
  module Bio
! class EMBL
!   include Bio::EMBL::Common
  
!   ##
    # ID Line
!   # "ID  ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."
    #
    # DATA_CLASS = ['standard']
--- 45,59 ----
  
  module Bio
! class EMBL < EMBLDB
!   include Bio::EMBLDB::Common
  
!   # returns contents in the ID line.
!   # * Bio::EMBL#id_line -> <ID Hash>
!   # where <ID Hash> is:
!   #  {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
!   #   'SEQUENCE_LENGTH' => Int}
!   #
    # ID Line
!   #  "ID  ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."
    #
    # DATA_CLASS = ['standard']
***************
*** 59,75 ****
      unless @data['ID']
        tmp = Hash.new
!       idline = @orig['ID'].split(/ +/)         
!       tmp['ENTRY_NAME']      = idline[1]
!       tmp['DATA_CLASS']      = idline[2].sub(/;/,'')  
!       tmp['MOLECULE_TYPE']   = idline[3].sub(/;/,'')  # "cyclic DNA"
!       tmp['DVISION']         = idline[4].sub(/;/,'')
!       tmp['SEQUENCE_LENGTH'] = idline[5].to_i
        @data['ID'] = tmp
      end
!     if block_given?
!       @data['ID'].each do |k,v|
!         yield(k,v)
!       end
!     elsif key
        @data['ID'][key]
      else
--- 84,97 ----
      unless @data['ID']
        tmp = Hash.new
!       idline = fetch('ID').split(/; +/)         
!       tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline[0].split(/ +/)
!       tmp['MOLECULE_TYPE'] = idline[1]
!       tmp['DIVISION'] = idline[2]
!       tmp['SEQUENCE_LENGTH'] = idline[3].strip.split(' ').first.to_i
! 
        @data['ID'] = tmp
      end
!     
!     if key
        @data['ID'][key]
      else
***************
*** 78,84 ****
    end
  
!   ##
!   # Bio::EMBL#entry -> String
!   #          #entry_name -> String
    def entry
      id_line('ENTRY_NAME')
--- 100,105 ----
    end
  
!   # returns ENTRY_NAME in the ID line.
!   # * Bio::EMBL#entry -> String
    def entry
      id_line('ENTRY_NAME')
***************
*** 87,93 ****
    alias entry_id entry
  
!   ##
!   # Bio::EMBL#molecule -> String
!   # 
    def molecule
      id_line('MOLECULE_TYPE')
--- 108,113 ----
    alias entry_id entry
  
!   # returns MOLECULE_TYPE in the ID line.
!   # * Bio::EMBL#molecule -> String
    def molecule
      id_line('MOLECULE_TYPE')
***************
*** 95,108 ****
    alias molecule_type molecule
  
!   ##
!   # Bio::EMBL#division -> String
!   # 
    def division
      id_line('DIVISION')
    end
  
!   ##
!   # Bio::EMBL#sequencelength -> String
!   # 
    def sequence_length
      id_line('SEQUENCE_LENGTH')
--- 115,126 ----
    alias molecule_type molecule
  
!   # returns DIVISION in the ID line.
!   # * Bio::EMBL#division -> String
    def division
      id_line('DIVISION')
    end
  
!   # returns SEQUENCE_LENGTH in the ID line.
!   # * Bio::EMBL#sequencelength -> String
    def sequence_length
      id_line('SEQUENCE_LENGTH')
***************
*** 110,128 ****
    alias seqlen sequence_length
    
!   ##
    # AC Line
    # "AC   A12345; B23456;"
-   #
-   # Bio::EMBLDB#ac  -> Array
-   #            #accessions  -> Array
  
  
!   ##
!   # SV Line; sequence version (1/entry)
!   # "SV    Accession.Version"
!   #
!   # Bio::EMBL#sv -> String
!   # Bio::EMBL#version -> Int
    #
    def sv
      field_fetch('SV').sub(/;/,'')
--- 128,142 ----
    alias seqlen sequence_length
    
! 
    # AC Line
    # "AC   A12345; B23456;"
  
  
!   # returns the version information in the sequence version (SV) line.
!   # * Bio::EMBL#sv -> Accession.Version in String
!   # * Bio::EMBL#version -> accession in Int
    #
+   # SV Line; sequence version (1/entry)
+   #  SV    Accession.Version
    def sv
      field_fetch('SV').sub(/;/,'')
***************
*** 131,141 ****
      sv.split(".")[1].to_i
    end
    
!   ##
!   # DT Line; date (2/entry)
!   # Bio::EMBL#dt  -> Hash
!   # Bio::EMBL#dt(key)  -> String
!   #   key = (created|updated)
    #
    def dt(key=nil)
      unless @data['DT']
--- 145,158 ----
      sv.split(".")[1].to_i
    end
+ 
    
!   # returns contents in the date (DT) line.
!   # * Bio::EMBL#dt  -> <DT Hash>
!   # where <DT Hash> is:
!   #  {}
!   # * Bio::EMBL#dt(key)  -> String
!   # keys: 'created' and 'updated'
    #
+   # DT Line; date (2/entry)
    def dt(key=nil)
      unless @data['DT']
***************
*** 146,154 ****
        @data['DT'] = tmp
      end
!     if block_given?
!       @data['DT'].each do |k,v|
!         yield(k,v)
!       end
!     elsif key
        @data['DT'][key]
      else
--- 163,167 ----
        @data['DT'] = tmp
      end
!     if key
        @data['DT'][key]
      else
***************
*** 208,223 ****
  
  
!   ##
!   # FH Line; feature table header (0 or 2)
!   # FT Line; feature table data (>=0)
!   #
!   # Bio::EMBL#ft -> Array
!   # Bio::EMBL#ft {} -> {|Hash| }
!   # Bio::EMBL#ft(Int) -> Hash
    #
    def fh
!     get('FH')
    end
    # same as features method in bio/db/genbank.rb 
    def ft(num = nil)
      unless @data['FT']
--- 221,239 ----
  
  
!   # returns feature table header (String) in the feature header (FH) line.
    #
+   # FH Line; feature table header (0 or 2)
    def fh
!     fetch('FH')
    end
+ 
+   # returns contents in the feature table (FT) lines.
+   # * Bio::EMBL#ft -> [ <FT Hash>* ]
+   # * Bio::EMBL#ft {} -> {|<FT Hash>| }
+   # * Bio::EMBL#ft(Int) -> Hash
+   #
    # same as features method in bio/db/genbank.rb 
+   #
+   # FT Line; feature table data (>=0)
    def ft(num = nil)
      unless @data['FT']
***************
*** 255,260 ****
      end
      if block_given?
!       @data['FT'].each do |f|
!         yield f
        end
      else
--- 271,276 ----
      end
      if block_given?
!       @data['FT'].each do |feature_table|
!         yield feature_table
        end
      else
***************
*** 264,279 ****
    alias features ft
  
    def each_cds
!     ft.each do |feature|
!       if feature.feature == 'CDS'
!         yield feature
        end
      end
    end
  
    def each_gene
!     ft.each do |feature|
!       if feature.feature == 'gene'
!         yield feature
        end
      end
--- 280,297 ----
    alias features ft
  
+   # iterates on CDS features in the FT lines.
    def each_cds
!     ft.each do |cds_feature|
!       if cds_feature.feature == 'CDS'
!         yield cds_feature
        end
      end
    end
  
+   # iterates on gene features in the FT lines.
    def each_gene
!     ft.each do |gene_feature|
!       if gene_feature.feature == 'gene'
!         yield gene_feature
        end
      end
***************
*** 281,287 ****
  
  
!   ##
!   # CC Line; comments of notes (>=0)
    #
    def cc
      get('CC')
--- 299,305 ----
  
  
!   # returns comment text in the comments (CC) line.
    #
+   # CC Line; comments of notes (>=0)
    def cc
      get('CC')
***************
*** 295,319 ****
  
  
!   ##
!   # SQ Line; sequence header (1/entry)
!   # "SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;"
!   # Bio::EMBL#sq  -> Hash
!   # Bio::EMBL#sq(base)  -> Int
!   #          #sq[base]  -> Int
    #
    def sq(base = nil)
      unless @data['SQ']
        fetch('SQ') =~ \
               /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
!       @data['SQ']={'ntlen'=>$1.to_i, 'other'=>$6.to_i,
!                    'a'=>$2.to_i,'c'=>$3.to_i,'g'=>$4.to_i,'t'=>$5.to_i}
      else
        @data['SQ']
      end
!     if block_given?
!       @data['SQ'].each do |k,v|
!         yield(k,v)
!       end
!     elsif base
        @data['SQ'][base.downcase]
      else
--- 313,337 ----
  
  
!   # returns sequence header information in the sequence header (SQ) line.
!   # * Bio::EMBL#sq  -> <SQ Hash>
!   # where <SQ Hash> is:
!   #     {'ntlen' => Int, 'other' => Int,
!   #      'a' => Int, 'c' => Int, 'g' => Int, 't' => Int}
!   # * Bio::EMBL#sq(base)  -> <base content in Int>
!   # * Bio::EMBL#sq[base]  -> <base content in Int>
    #
+   # SQ Line; sequence header (1/entry)
+   #  SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
    def sq(base = nil)
      unless @data['SQ']
        fetch('SQ') =~ \
               /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/
!       @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i,
!                      'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i}
      else
        @data['SQ']
      end
! 
!     if base
        @data['SQ'][base.downcase]
      else
***************
*** 321,335 ****
      end
    end
-   # Bio::EMBL#gc  -> Float
-   def gc
-     ( sq('g') + sq('c') ) / sq('ntlen').to_f * 100
-   end
    
  
!   ##
    # @orig[''] as sequence
    # bb Line; (blanks) sequence data (>=1)
-   # Bio::EMBL#seq  -> Bio::Sequence::NA
-   #
    def seq
      Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
--- 339,349 ----
      end
    end
    
  
!   # returns the nucleotie sequence in this entry.
!   # * Bio::EMBL#seq  -> Bio::Sequence::NA
!   #
    # @orig[''] as sequence
    # bb Line; (blanks) sequence data (>=1)
    def seq
      Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') )
***************
*** 341,345 ****
  
  
- 
    ### private methods
  
--- 355,358 ----
***************
*** 434,517 ****
  
  
- =begin
- 
- = Bio::EMBL
- 
- === Initialize
- 
- --- Bio::EMBL#new(an_embl_entry)
- 
- === ID line (Identification)
- 
- --- Bio::EMBL#id_line -> Hash
- --- Bio::EMBL#id_line(key) -> String
- 
-       key = (entryname|molecule|division|sequencelength)
- 
- --- Bio::EMBL#entry -> String
- --- Bio::EMBL#entryname -> String
- --- Bio::EMBL#molecule -> String
- --- Bio::EMBL#division -> String
- --- Bio::EMBL#sequencelength -> Int
- 
- === AC lines (Accession number)
- 
- --- Bio::EMBL#ac -> Array
-  
- === SV line (Sequence version)
- 
- --- Bio::EMBL#sv -> String
- 
- === DT lines (Date) 
- 
- --- Bio::EMBL#dt -> Hash
- --- Bio::EMBL#dt(key) -> String
- 
-       key = (created|updated)
- 
- === DE lines (Description)
- 
- --- Bio::EMBL#de -> String
- 
- === KW lines (Keyword)
- 
- --- Bio::EMBL#kw -> Array
- 
- === OS lines (Organism species)
- 
- --- Bio::EMBL#os -> Hash
- 
- === OC lines (organism classification)
- 
- --- Bio::EMBL#oc -> Array
- 
- === OG line (Organella)
- 
- --- Bio::EMBL#og -> String
- 
- === RN RC RP RX RA RT RL lines (Reference)
-       
- --- Bio::EMBL#ref -> String 
- 
- === DR lines (Database cross-reference)
- 
- --- Bio::EMBL#dr -> Array
- 
- === FH FT lines (Feature table header and data)
- 
- --- Bio::EMBL#ft -> Bio::Features
- --- Bio::EMBL#each_cds -> Array
- --- Bio::EMBL#each_gene -> Array
- 
- 
- === SQ Lines (Sequence header and data)
- 
- --- Bio::EMBL#sq -> Hash
- --- Bio::EMBL#sq(base) -> Int
- 
-       base = (a|c|g|t|u|other)
- 
- --- Bio::EMBL#gc -> Float
- --- Bio::EMBL#seq -> Bio::Sequece::NA
- 
- =end
--- 447,448 ----



More information about the bioruby-cvs mailing list