[BioRuby-cvs] bioruby/lib/bio/db/embl format_embl.rb, 1.1.2.2, 1.1.2.3 common.rb, 1.12.2.3, 1.12.2.4

Naohisa Goto ngoto at dev.open-bio.org
Wed Apr 23 18:52:20 UTC 2008


Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory dev.open-bio.org:/tmp/cvs-serv13059/lib/bio/db/embl

Modified Files:
      Tag: BRANCH-biohackathon2008
	format_embl.rb common.rb 
Log Message:
* lib/bio/reference.rb
  * New methods: Bio::Reference#comments, Bio::Reference#doi
  * Code of Bio::Reference#embl is moved to lib/bio/db/embl/format_embl.rb
    to improve tolerance for various data (e.g. references with no
    record numbers or with duplicated record numbers).
* lib/bio/db/embl/common.rb
  * Changes to support for Bio::Reference#comments.
* lib/bio/db/embl/format_embl.rb
  * Bio::Sequence::Format::NucFormatter::Embl#reference_format_embl
    (private method) is added based on Bio::Reference#embl.
  * Changes to improve tolerance for various data.


Index: format_embl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/Attic/format_embl.rb,v
retrieving revision 1.1.2.2
retrieving revision 1.1.2.3
diff -C2 -d -r1.1.2.2 -r1.1.2.3
*** format_embl.rb	27 Mar 2008 13:38:31 -0000	1.1.2.2
--- format_embl.rb	23 Apr 2008 18:52:18 -0000	1.1.2.3
***************
*** 25,28 ****
--- 25,76 ----
      end
  
+     # format reference
+     # ref:: Bio::Reference object
+     # hash:: (optional) a hash for RN (reference number) administration
+     def reference_format_embl(ref, hash = nil)
+       lines = Array.new
+       if ref.embl_gb_record_number or hash then
+         refno = ref.embl_gb_record_number.to_i
+         hash ||= {}
+         if refno <= 0 or hash[refno] then
+           refno = hash.keys.sort[-1].to_i + 1
+           hash[refno] = true
+         end
+         lines << embl_wrap("RN   ", "[#{refno}]")
+       end
+       if ref.comments then
+         ref.comments.each do |cmnt|
+           lines << embl_wrap("RC   ", cmnt)
+         end
+       end
+       unless ref.sequence_position.to_s.empty? then
+         lines << embl_wrap("RP   ",   "#{ref.sequence_position}")
+       end
+       unless ref.doi.to_s.empty? then
+         lines << embl_wrap("RX   ",   "DOI; #{ref.doi}.")
+       end
+       unless ref.pubmed.to_s.empty? then
+         lines << embl_wrap("RX   ",   "PUBMED; #{ref.pubmed}.")
+       end
+       unless ref.authors.empty?
+         lines << embl_wrap('RA   ', ref.authors.join(', ') + ';')
+       end
+       lines << embl_wrap('RT   ',
+                          (ref.title.to_s.empty? ? '' :
+                           "\"#{ref.title}\"") + ';')
+       unless ref.journal.to_s.empty? then
+         volissue = "#{ref.volume.to_s}"
+         volissue = "#{volissue}(#{ref.issue})" unless ref.issue.to_s.empty? 
+         rl = "#{ref.journal}"
+         rl += " #{volissue}" unless volissue.empty? 
+         rl += ":#{ref.pages}" unless ref.pages.to_s.empty?
+         rl += "(#{ref.year})" unless ref.year.to_s.empty?
+         rl += '.'
+         lines << embl_wrap('RL   ', rl)
+       end
+       lines << "XX"
+       return lines.join("\n")
+     end
+ 
      def seq_format_embl(seq)
        output_lines = Array.new
***************
*** 43,64 ****
      erb_template <<'__END_OF_TEMPLATE__'
  ID   <%= entry_id %>; SV <%= sequence_version %>; <%= topology %>; <%= molecule_type %>; <%= data_class %>; <%= division %>; <%= seq.length %> BP.
! XX
  <%= embl_wrap('AC   ', accessions.reject{|a| a.nil?}.join('; ') + ';') %>
! XX
  DT   <%= date_created %>
  DT   <%= date_modified %>
! XX
  <%= embl_wrap('DE   ', definition) %>
! XX
  <%= embl_wrap('KW   ', keywords.join('; ') + '.') %>
! XX
  OS   <%= species %>
  <%= embl_wrap('OC   ', classification.join('; ') + '.') %>
  XX   
! <%= (references || []).collect{|ref| ref.format('embl')}.join("\n") %>
! XX
! FH   Key             Location/Qualifiers
! FH
! <%= format_features_embl(features || []) %>XX
  SQ   Sequence <%= seq.length %> BP; <%= seq.composition.collect{|k,v| "#{v} #{k.upcase}"}.join('; ') + '; ' + (seq.gsub(/[ACTGactg]/, '').length.to_s ) + ' other;' %>
  <%= seq_format_embl(seq) %>
--- 91,111 ----
      erb_template <<'__END_OF_TEMPLATE__'
  ID   <%= entry_id %>; SV <%= sequence_version %>; <%= topology %>; <%= molecule_type %>; <%= data_class %>; <%= division %>; <%= seq.length %> BP.
! XX   
  <%= embl_wrap('AC   ', accessions.reject{|a| a.nil?}.join('; ') + ';') %>
! XX   
  DT   <%= date_created %>
  DT   <%= date_modified %>
! XX   
  <%= embl_wrap('DE   ', definition) %>
! XX   
  <%= embl_wrap('KW   ', keywords.join('; ') + '.') %>
! XX   
  OS   <%= species %>
  <%= embl_wrap('OC   ', classification.join('; ') + '.') %>
  XX   
! <% hash = {}; (references || []).each do |ref| %><%= reference_format_embl(ref, hash) %>
! <% end %>FH   Key             Location/Qualifiers
! FH   
! <%= format_features_embl(features || []) %>XX   
  SQ   Sequence <%= seq.length %> BP; <%= seq.composition.collect{|k,v| "#{v} #{k.upcase}"}.join('; ') + '; ' + (seq.gsub(/[ACTGactg]/, '').length.to_s ) + ' other;' %>
  <%= seq_format_embl(seq) %>

Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/common.rb,v
retrieving revision 1.12.2.3
retrieving revision 1.12.2.4
diff -C2 -d -r1.12.2.3 -r1.12.2.4
*** common.rb	23 Apr 2008 18:04:51 -0000	1.12.2.3
--- common.rb	23 Apr 2008 18:52:18 -0000	1.12.2.4
***************
*** 280,284 ****
              end
            when 'RC'
!             hash['comment'] = value
            when 'RP'
              hash['sequence_position'] = value
--- 280,287 ----
              end
            when 'RC'
!             unless value.to_s.strip.empty?
!               hash['comments'] ||= []
!               hash['comments'].push value
!             end
            when 'RP'
              hash['sequence_position'] = value




More information about the bioruby-cvs mailing list