[BioRuby-cvs] bioruby/lib/bio/db/embl embl.rb,1.29.2.6,1.29.2.7

Naohisa Goto ngoto at dev.open-bio.org
Tue Jun 17 16:04:38 UTC 2008


Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory dev.open-bio.org:/tmp/cvs-serv21250/lib/bio/db/embl

Modified Files:
      Tag: BRANCH-biohackathon2008
	embl.rb 
Log Message:
* Bio::EMBL#cc is changed to cut heading "CC   ".
* Bio::EMBL#to_biosequence to improve support for sequence output
  and data exchange.
* To get parse result of DT lines more easily, Bio::EMBL#date_modified,
  date_created, release_modified, release_created, and entry_version
  methods are added. 


Index: embl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/embl.rb,v
retrieving revision 1.29.2.6
retrieving revision 1.29.2.7
diff -C2 -d -r1.29.2.6 -r1.29.2.7
*** embl.rb	28 May 2008 13:09:03 -0000	1.29.2.6
--- embl.rb	17 Jun 2008 16:04:36 -0000	1.29.2.7
***************
*** 32,39 ****
--- 32,42 ----
  #
  
+ require 'date'
  require 'bio/db'
  require 'bio/db/embl/common'
  require 'bio/compat/features'
  require 'bio/compat/references'
+ require 'bio/sequence'
+ require 'bio/sequence/dblink'
  
  module Bio
***************
*** 323,329 ****
    # CC Line; comments of notes (>=0)
    def cc
!     get('CC')
    end
! 
  
    ##
--- 326,332 ----
    # CC Line; comments of notes (>=0)
    def cc
!     get('CC').to_s.gsub(/^CC   /, '')
    end
!   alias comment cc
  
    ##
***************
*** 376,379 ****
--- 379,436 ----
    #++
  
+   # modified date. Returns Date object, String or nil.
+   def date_modified
+     parse_date(self.dt['updated'])
+   end
+ 
+   # created date. Returns Date object, String or nil.
+   def date_created
+     parse_date(self.dt['created'])
+   end
+ 
+   # release number when last updated
+   def release_modified
+     parse_release_version(self.dt['updated'])[0]
+   end
+ 
+   # release number when created
+   def release_created
+     parse_release_version(self.dt['created'])[0]
+   end
+ 
+   # entry version number numbered by EMBL
+   def entry_version
+     parse_release_version(self.dt['updated'])[1]
+   end
+ 
+   # parse date string. Returns Date object.
+   def parse_date(str)
+     begin
+       Date.parse(str)
+     rescue ArgumentError, TypeError, NoMethodError, NameError
+       str
+     end
+   end
+   private :parse_date
+ 
+   # extracts release and version numbers from DT line
+   def parse_release_version(str)
+     return [ nil, nil ] unless str
+     a = str.split(/[\(\,\)]/)
+     dstr = a.shift
+     rel = nil
+     ver = nil
+     a.each do |x|
+       case x
+       when /Rel\.\s*(.+)/
+         rel = $1.strip
+       when /Version\s*(.+)/
+         ver = $1.strip
+       end
+     end
+     [ rel, ver ]
+   end
+   private :parse_release_version
+ 
    # converts the entry to Bio::Sequence object
    # ---
***************
*** 382,385 ****
--- 439,444 ----
    def to_biosequence
      bio_seq = Bio::Sequence.new(self.seq)
+ 
+     bio_seq.id_namespace = 'EMBL'
      bio_seq.entry_id = self.entry_id
      bio_seq.primary_accession = self.accessions[0]
***************
*** 389,394 ****
      bio_seq.definition = self.description
      bio_seq.topology = self.topology
!     bio_seq.date_created = self.dt['created']
!     bio_seq.date_modified = self.dt['updated']
      bio_seq.division = self.division
      bio_seq.sequence_version = self.version
--- 448,456 ----
      bio_seq.definition = self.description
      bio_seq.topology = self.topology
!     bio_seq.date_created = self.date_created
!     bio_seq.date_modified = self.date_modified
!     bio_seq.release_created = self.release_created
!     bio_seq.release_modified = self.release_modified
!     bio_seq.entry_version = self.entry_version
      bio_seq.division = self.division
      bio_seq.sequence_version = self.version
***************
*** 396,402 ****
      bio_seq.species = self.fetch('OS')
      bio_seq.classification = self.oc
      bio_seq.references = self.references
      bio_seq.features = self.ft
!     
      return bio_seq
    end
--- 458,469 ----
      bio_seq.species = self.fetch('OS')
      bio_seq.classification = self.oc
+     # bio_seq.organelle = self.fetch('OG') # unsupported yet
      bio_seq.references = self.references
      bio_seq.features = self.ft
!     bio_seq.comments = self.cc
!     bio_seq.dblinks = get('DR').split(/\n/).collect { |x|
!       Bio::Sequence::DBLink.parse_embl_DR_line(x)
!     }
! 
      return bio_seq
    end




More information about the bioruby-cvs mailing list