[BioRuby-cvs] bioruby/lib/bio/db/genbank format_genbank.rb, 1.1.2.2, 1.1.2.3

Naohisa Goto ngoto at dev.open-bio.org
Wed May 7 12:28:58 UTC 2008


Update of /home/repository/bioruby/bioruby/lib/bio/db/genbank
In directory dev.open-bio.org:/tmp/cvs-serv7845/lib/bio/db/genbank

Modified Files:
      Tag: BRANCH-biohackathon2008
	format_genbank.rb 
Log Message:
* added support for 'REMARK' (comment in reference).
* Bug Fix: an author's name should not be separated into two lines.


Index: format_genbank.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/genbank/Attic/format_genbank.rb,v
retrieving revision 1.1.2.2
retrieving revision 1.1.2.3
diff -C2 -d -r1.1.2.2 -r1.1.2.3
*** format_genbank.rb	7 May 2008 06:17:52 -0000	1.1.2.2
--- format_genbank.rb	7 May 2008 12:28:56 -0000	1.1.2.3
***************
*** 33,36 ****
--- 33,104 ----
      end
  
+     # Given words (an Array of String) are wrapping with EMBL style.
+     # Each word is never splitted inside the word.
+     def genbank_wrap_words(array)
+       width = 67
+       result = []
+       str = nil
+       array.each do |x|
+         if str then
+           if str.length + 1 + x.length > width then
+             str = nil
+           else
+             str.concat ' '
+             str.concat x
+           end
+         end
+         unless str then
+           str = "#{x}"
+           result.push str
+         end
+       end
+       result.join("\n" + " " * 12)
+     end
+ 
+     # formats references
+     def reference_format_genbank(ref, num)
+       pos = ref.sequence_position.to_s.gsub(/\s/, '')
+       pos.gsub!(/(\d+)\-(\d+)/, "\\1 to \\2")
+       pos.gsub!(/\s*\,\s*/, '; ')
+       if pos.empty?
+         pos = ''
+       else
+         pos = " (bases #{pos})"
+       end
+       volissue = "#{ref.volume.to_s}"
+       volissue += " (#{ref.issue})" unless ref.issue.to_s.empty? 
+       journal = "#{ref.journal.to_s}"
+       journal += " #{volissue}" unless volissue.empty? 
+       journal += ", #{ref.pages}" unless ref.pages.to_s.empty?
+       journal += " (#{ref.year})" unless ref.year.to_s.empty?
+ 
+       alist = ref.authors.collect do |x|
+         y = x.to_s.strip.split(/\, *([^\,]+)\z/)
+         y[1].gsub!(/\. +/, '.') if y[1]
+         y.join(',')
+       end
+       lastauthor = alist.pop
+       last2author = alist.pop
+       alist.each { |x| x.concat ',' }
+       alist.push last2author if last2author
+       alist.push "and" unless alist.empty?
+       alist.push lastauthor.to_s
+       result = <<__END_OF_REFERENCE__
+ REFERENCE   #{ genbank_wrap(sprintf('%-2d%s', num, pos))}
+   AUTHORS   #{ genbank_wrap_words(alist) }
+   TITLE     #{ genbank_wrap(ref.title.to_s) }
+   JOURNAL   #{ genbank_wrap(journal) }
+ __END_OF_REFERENCE__
+       unless ref.pubmed.to_s.empty? then
+         result.concat "   PUBMED   #{ genbank_wrap(ref.pubmed) }\n"
+       end
+       if ref.comments and !(ref.comments.empty?) then
+         ref.comments.each do |c|
+           result.concat "  REMARK    #{ genbank_wrap(c) }\n"
+         end
+       end
+       result
+     end
+ 
      # formats sequence lines as GenBank
      def each_genbank_seqline(str) #:yields: counter, seqline
***************
*** 56,87 ****
      (references or []).each do |ref|
        n += 1
!       pos = ref.sequence_position.to_s.gsub(/\s/, '')
!       pos.gsub!(/(\d+)\-(\d+)/, "\\1 to \\2")
!       pos.gsub!(/\s*\,\s*/, '; ')
!       if pos.empty?
!         pos = ''
!       else
!         pos = " (bases #{pos})"
!       end
!       volissue = "#{ref.volume.to_s}"
!       volissue += " (#{ref.issue})" unless ref.issue.to_s.empty? 
!       journal = "#{ref.journal.to_s}"
!       journal += " #{volissue}" unless volissue.empty? 
!       journal += ", #{ref.pages}" unless ref.pages.to_s.empty?
!       journal += " (#{ref.year})" unless ref.year.to_s.empty?
! 
!       alist = ref.authors.collect { |x| x.gsub(/\, /, ',') }
!       lastauthor = alist.pop
!       authorsline = alist.join(', ')
!       authorsline.concat(" and ") unless alist.empty?
!       authorsline.concat lastauthor.to_s
!       
! %>REFERENCE   <%= genbank_wrap(sprintf('%-2d%s', n, pos)) %>
!   AUTHORS   <%= genbank_wrap(authorsline) %>
!   TITLE     <%= genbank_wrap(ref.title.to_s) %>
!   JOURNAL   <%= genbank_wrap(journal) %>
! <%   unless ref.pubmed.to_s.empty?
!  %>  PUBMED    <%= ref.pubmed %>
! <%   end
      end
  %>FEATURES             Location/Qualifiers
--- 124,128 ----
      (references or []).each do |ref|
        n += 1
! %><%= reference_format_genbank(ref, n) %><%
      end
  %>FEATURES             Location/Qualifiers




More information about the bioruby-cvs mailing list