[BioRuby-cvs] bioruby/lib/bio/db/embl embl.rb,1.27,1.28
Mitsuteru C. Nakao
nakao at dev.open-bio.org
Tue Mar 13 17:03:57 UTC 2007
Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory dev.open-bio.org:/tmp/cvs-serv7300/lib/bio/db/embl
Modified Files:
embl.rb
Log Message:
* Fixed a bug for parsing id_line in the EMBL release 89 format
reported by Michael Han.
* Added the unit test and data files for EMBL release 89 format.
Index: embl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/embl.rb,v
retrieving revision 1.27
retrieving revision 1.28
diff -C2 -d -r1.27 -r1.28
*** embl.rb 14 Apr 2006 05:49:30 -0000 1.27
--- embl.rb 13 Mar 2007 17:03:55 -0000 1.28
***************
*** 3,7 ****
#
#
! # Copyright:: Copyright (C) 2001-2006 Mitsuteru C. Nakao <n at bioruby.org>
# License:: Ruby's
#
--- 3,7 ----
#
#
! # Copyright:: Copyright (C) 2001-2007 Mitsuteru C. Nakao <n at bioruby.org>
# License:: Ruby's
#
***************
*** 41,45 ****
# where <ID Hash> is:
# {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
! # 'SEQUENCE_LENGTH' => Int}
#
# ID Line
--- 41,45 ----
# where <ID Hash> is:
# {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
! # 'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}
#
# ID Line
***************
*** 70,81 ****
# VRL (Viruses)
#
def id_line(key=nil)
unless @data['ID']
tmp = Hash.new
idline = fetch('ID').split(/; +/)
! tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline[0].split(/ +/)
! tmp['MOLECULE_TYPE'] = idline[1]
! tmp['DIVISION'] = idline[2]
! tmp['SEQUENCE_LENGTH'] = idline[3].strip.split(' ').first.to_i
@data['ID'] = tmp
--- 70,98 ----
# VRL (Viruses)
#
+ # Rel 89-
+ # ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.
+ # ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.
+ # 1. Primary accession number
+ # 2. Sequence version number
+ # 3. Topology: 'circular' or 'linear'
+ # 4. Molecule type (see note 1 below)
+ # 5. Data class (see section 3.1)
+ # 6. Taxonomic division (see section 3.2)
+ # 7. Sequence length (see note 2 below)
def id_line(key=nil)
unless @data['ID']
tmp = Hash.new
idline = fetch('ID').split(/; +/)
! tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
! if idline.first =~ /^SV/
! tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
! tmp['TOPOLOGY'] = idline.shift
! tmp['MOLECULE_TYPE'] = idline.shift
! tmp['DATA_CLASS'] = idline.shift
! else
! tmp['MOLECULE_TYPE'] = idline.shift
! end
! tmp['DIVISION'] = idline.shift
! tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i
@data['ID'] = tmp
***************
*** 129,136 ****
# SV Accession.Version
def sv
! field_fetch('SV').sub(/;/,'')
end
def version
! sv.split(".")[1].to_i
end
--- 146,157 ----
# SV Accession.Version
def sv
! if (v = field_fetch('SV').sub(/;/,'')) == ""
! [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.')
! else
! v
! end
end
def version
! (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i
end
More information about the bioruby-cvs
mailing list