[BioRuby-cvs] bioruby/lib/bio/db/genbank format_genbank.rb, 1.1.2.4, 1.1.2.5

Naohisa Goto ngoto at dev.open-bio.org
Tue Jun 17 15:59:26 UTC 2008


Update of /home/repository/bioruby/bioruby/lib/bio/db/genbank
In directory dev.open-bio.org:/tmp/cvs-serv21201/lib/bio/db/genbank

Modified Files:
      Tag: BRANCH-biohackathon2008
	format_genbank.rb 
Log Message:
* Added support for COMMENT.
* Added support for GI number output.
* Many improvements are added.


Index: format_genbank.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/genbank/Attic/format_genbank.rb,v
retrieving revision 1.1.2.4
retrieving revision 1.1.2.5
diff -C2 -d -r1.1.2.4 -r1.1.2.5
*** format_genbank.rb	28 May 2008 13:26:33 -0000	1.1.2.4
--- format_genbank.rb	17 Jun 2008 15:59:24 -0000	1.1.2.5
***************
*** 101,104 ****
--- 101,115 ----
      end
  
+     # formats comments lines as GenBank
+     def comments_format_genbank(cmnts)
+       return '' if !cmnts or cmnts.empty?
+       cmnts = [ cmnts ] unless cmnts.kind_of?(Array)
+       a = []
+       cmnts.each do |str|
+         a.push "COMMENT     #{ genbank_wrap(str) }\n"
+       end
+       a.join('')
+     end
+ 
      # formats sequence lines as GenBank
      def seq_format_genbank(str)
***************
*** 113,122 ****
      end
  
      # Erb template of GenBank format for Bio::Sequence
      erb_template <<'__END_OF_TEMPLATE__'
! LOCUS       <%= sprintf("%-16s", entry_id) %> <%= sprintf("%11d", length) %> bp <%= sprintf("%3s", '') %><%= sprintf("%-6s", molecule_type) %>  <%= sprintf("%-8s", topology) %><%= sprintf("%4s", division) %> <%= sprintf("%-11s", date_modified) %>
  DEFINITION  <%= genbank_wrap_dot(definition.to_s) %>
  ACCESSION   <%= genbank_wrap(([ primary_accession ] + (secondary_accessions or [])).join(" ")) %>
! VERSION     <%= primary_accession %>.<%= sequence_version %><% unless true or gi_number.to_s.empty? %>GI:<%= gi_number %><% end %>
  KEYWORDS    <%= genbank_wrap_dot((keywords or []).join('; ')) %>
  SOURCE      <%= genbank_wrap(species) %>
--- 124,168 ----
      end
  
+     # formats date
+     def date_format_genbank
+       date_modified || date_created || null_date
+     end
+ 
+     # moleculue type
+     def mol_type_genbank
+       if /(DNA|(t|r|m|u|sn|sno)?RNA)/i =~ molecule_type.to_s then
+         $1.sub(/[DR]NA/) { |x| x.upcase }
+       else
+         'NA'
+       end
+     end
+ 
+     # NCBI GI number
+     def ncbi_gi_number
+       ids = other_seqids
+       if ids and r = ids.find { |x| x.database == 'GI' } then
+         r.id
+       else
+         nil
+       end
+     end
+ 
+     # strandedness
+     def strandedness_genbank
+       return nil unless strandedness
+       case strandedness
+       when 'single'; 'ss-'; 
+       when 'double'; 'ds-'; 
+       when 'mixed';  'ms-'; 
+       else; nil
+       end
+     end
+ 
      # Erb template of GenBank format for Bio::Sequence
      erb_template <<'__END_OF_TEMPLATE__'
! LOCUS       <%= sprintf("%-16s", entry_id) %> <%= sprintf("%11d", length) %> bp <%= sprintf("%3s", strandedness_genbank) %><%= sprintf("%-6s", mol_type_genbank) %>  <%= sprintf("%-8s", topology) %><%= sprintf("%4s", division) %> <%= date_format_genbank %>
  DEFINITION  <%= genbank_wrap_dot(definition.to_s) %>
  ACCESSION   <%= genbank_wrap(([ primary_accession ] + (secondary_accessions or [])).join(" ")) %>
! VERSION     <%= primary_accession %>.<%= sequence_version %><% if gi = ncbi_gi_number then %>  GI:<%= gi %><% end %>
  KEYWORDS    <%= genbank_wrap_dot((keywords or []).join('; ')) %>
  SOURCE      <%= genbank_wrap(species) %>
***************
*** 129,132 ****
--- 175,179 ----
  %><%= reference_format_genbank(ref, n) %><%
      end
+ %><%= comments_format_genbank(comments)
  %>FEATURES             Location/Qualifiers
  <%= format_features_genbank(features || [])




More information about the bioruby-cvs mailing list