[BioRuby-cvs] bioruby/lib/bio/db/embl embl.rb,1.27,1.28

Tue Mar 13 17:03:57 UTC 2007

Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory dev.open-bio.org:/tmp/cvs-serv7300/lib/bio/db/embl

Modified Files:
	embl.rb 
Log Message:
* Fixed a bug for parsing id_line in the EMBL release 89 format 
  reported by Michael Han.
* Added the unit test and data files for EMBL release 89 format.


Index: embl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/embl.rb,v
retrieving revision 1.27
retrieving revision 1.28
diff -C2 -d -r1.27 -r1.28
*** embl.rb	14 Apr 2006 05:49:30 -0000	1.27
--- embl.rb	13 Mar 2007 17:03:55 -0000	1.28
***************
*** 3,7 ****
  #
  # 
! # Copyright::   Copyright (C) 2001-2006 Mitsuteru C. Nakao <n at bioruby.org>
  # License::     Ruby's
  #
--- 3,7 ----
  #
  # 
! # Copyright::   Copyright (C) 2001-2007 Mitsuteru C. Nakao <n at bioruby.org>
  # License::     Ruby's
  #
***************
*** 41,45 ****
    # where <ID Hash> is:
    #  {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
!   #   'SEQUENCE_LENGTH' => Int}
    #
    # ID Line
--- 41,45 ----
    # where <ID Hash> is:
    #  {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
!   #   'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}
    #
    # ID Line
***************
*** 70,81 ****
    #  VRL (Viruses)
    #
    def id_line(key=nil)
      unless @data['ID']
        tmp = Hash.new
        idline = fetch('ID').split(/; +/)         
!       tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline[0].split(/ +/)
!       tmp['MOLECULE_TYPE'] = idline[1]
!       tmp['DIVISION'] = idline[2]
!       tmp['SEQUENCE_LENGTH'] = idline[3].strip.split(' ').first.to_i
  
        @data['ID'] = tmp
--- 70,98 ----
    #  VRL (Viruses)
    #
+   # Rel 89-
+   # ID   CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.
+   # ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.
+   # 1. Primary accession number
+   # 2. Sequence version number
+   # 3. Topology: 'circular' or 'linear'
+   # 4. Molecule type (see note 1 below)
+   # 5. Data class (see section 3.1)
+   # 6. Taxonomic division (see section 3.2)
+   # 7. Sequence length (see note 2 below)
    def id_line(key=nil)
      unless @data['ID']
        tmp = Hash.new
        idline = fetch('ID').split(/; +/)         
!       tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
!       if idline.first =~ /^SV/
!         tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
!         tmp['TOPOLOGY'] = idline.shift
!         tmp['MOLECULE_TYPE'] = idline.shift
!         tmp['DATA_CLASS'] = idline.shift
!       else
!         tmp['MOLECULE_TYPE'] = idline.shift
!       end
!       tmp['DIVISION'] = idline.shift
!       tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i
  
        @data['ID'] = tmp
***************
*** 129,136 ****
    #  SV    Accession.Version
    def sv
!     field_fetch('SV').sub(/;/,'')
    end
    def version
!     sv.split(".")[1].to_i
    end
  
--- 146,157 ----
    #  SV    Accession.Version
    def sv
!     if (v = field_fetch('SV').sub(/;/,'')) == ""
!       [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') 
!     else
!       v
!     end  
    end
    def version
!     (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i
    end