[BioRuby-cvs] bioruby/lib/bio/db/genbank format_genbank.rb, 1.1.2.4, 1.1.2.5
Naohisa Goto
ngoto at dev.open-bio.org
Tue Jun 17 15:59:26 UTC 2008
Update of /home/repository/bioruby/bioruby/lib/bio/db/genbank
In directory dev.open-bio.org:/tmp/cvs-serv21201/lib/bio/db/genbank
Modified Files:
Tag: BRANCH-biohackathon2008
format_genbank.rb
Log Message:
* Added support for COMMENT.
* Added support for GI number output.
* Many improvements are added.
Index: format_genbank.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/genbank/Attic/format_genbank.rb,v
retrieving revision 1.1.2.4
retrieving revision 1.1.2.5
diff -C2 -d -r1.1.2.4 -r1.1.2.5
*** format_genbank.rb 28 May 2008 13:26:33 -0000 1.1.2.4
--- format_genbank.rb 17 Jun 2008 15:59:24 -0000 1.1.2.5
***************
*** 101,104 ****
--- 101,115 ----
end
+ # formats comments lines as GenBank
+ def comments_format_genbank(cmnts)
+ return '' if !cmnts or cmnts.empty?
+ cmnts = [ cmnts ] unless cmnts.kind_of?(Array)
+ a = []
+ cmnts.each do |str|
+ a.push "COMMENT #{ genbank_wrap(str) }\n"
+ end
+ a.join('')
+ end
+
# formats sequence lines as GenBank
def seq_format_genbank(str)
***************
*** 113,122 ****
end
# Erb template of GenBank format for Bio::Sequence
erb_template <<'__END_OF_TEMPLATE__'
! LOCUS <%= sprintf("%-16s", entry_id) %> <%= sprintf("%11d", length) %> bp <%= sprintf("%3s", '') %><%= sprintf("%-6s", molecule_type) %> <%= sprintf("%-8s", topology) %><%= sprintf("%4s", division) %> <%= sprintf("%-11s", date_modified) %>
DEFINITION <%= genbank_wrap_dot(definition.to_s) %>
ACCESSION <%= genbank_wrap(([ primary_accession ] + (secondary_accessions or [])).join(" ")) %>
! VERSION <%= primary_accession %>.<%= sequence_version %><% unless true or gi_number.to_s.empty? %>GI:<%= gi_number %><% end %>
KEYWORDS <%= genbank_wrap_dot((keywords or []).join('; ')) %>
SOURCE <%= genbank_wrap(species) %>
--- 124,168 ----
end
+ # formats date
+ def date_format_genbank
+ date_modified || date_created || null_date
+ end
+
+ # moleculue type
+ def mol_type_genbank
+ if /(DNA|(t|r|m|u|sn|sno)?RNA)/i =~ molecule_type.to_s then
+ $1.sub(/[DR]NA/) { |x| x.upcase }
+ else
+ 'NA'
+ end
+ end
+
+ # NCBI GI number
+ def ncbi_gi_number
+ ids = other_seqids
+ if ids and r = ids.find { |x| x.database == 'GI' } then
+ r.id
+ else
+ nil
+ end
+ end
+
+ # strandedness
+ def strandedness_genbank
+ return nil unless strandedness
+ case strandedness
+ when 'single'; 'ss-';
+ when 'double'; 'ds-';
+ when 'mixed'; 'ms-';
+ else; nil
+ end
+ end
+
# Erb template of GenBank format for Bio::Sequence
erb_template <<'__END_OF_TEMPLATE__'
! LOCUS <%= sprintf("%-16s", entry_id) %> <%= sprintf("%11d", length) %> bp <%= sprintf("%3s", strandedness_genbank) %><%= sprintf("%-6s", mol_type_genbank) %> <%= sprintf("%-8s", topology) %><%= sprintf("%4s", division) %> <%= date_format_genbank %>
DEFINITION <%= genbank_wrap_dot(definition.to_s) %>
ACCESSION <%= genbank_wrap(([ primary_accession ] + (secondary_accessions or [])).join(" ")) %>
! VERSION <%= primary_accession %>.<%= sequence_version %><% if gi = ncbi_gi_number then %> GI:<%= gi %><% end %>
KEYWORDS <%= genbank_wrap_dot((keywords or []).join('; ')) %>
SOURCE <%= genbank_wrap(species) %>
***************
*** 129,132 ****
--- 175,179 ----
%><%= reference_format_genbank(ref, n) %><%
end
+ %><%= comments_format_genbank(comments)
%>FEATURES Location/Qualifiers
<%= format_features_genbank(features || [])
More information about the bioruby-cvs
mailing list