[BioRuby-cvs] bioruby/lib/bio/sequence format.rb,1.4.2.6,1.4.2.7
Naohisa Goto
ngoto at dev.open-bio.org
Tue Mar 4 11:10:30 UTC 2008
Update of /home/repository/bioruby/bioruby/lib/bio/sequence
In directory dev.open-bio.org:/tmp/cvs-serv7656/lib/bio/sequence
Modified Files:
Tag: BRANCH-biohackathon2008
format.rb
Log Message:
* lib/bio/sequence.rb
Bio::Sequence#output is moved to lib/bio/sequence/format.rb.
* lib/bio/sequence/format.rb
* Bio::Sequence#output is changed not to directly read erb file.
* Bio::Sequence::Format::FormatterBase class, a base class of formatter,
is newly added.
* Bio::Sequence::Format::Formatter, NucFormatter, AminoFormatter are
newly added to store formatter classes.
* Bio::Sequence#list_output_formats is added.
* (The names of above classes/modules/methods might be changed if more
appropriate names are given.)
Index: format.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/format.rb,v
retrieving revision 1.4.2.6
retrieving revision 1.4.2.7
diff -C2 -d -r1.4.2.6 -r1.4.2.7
*** format.rb 22 Feb 2008 14:30:44 -0000 1.4.2.6
--- format.rb 4 Mar 2008 11:10:28 -0000 1.4.2.7
***************
*** 2,9 ****
# = bio/sequence/format.rb - various output format of the biological sequence
#
! # Copyright:: Copyright (C) 2006
# Toshiaki Katayama <k at bioruby.org>,
# Naohisa Goto <ng at bioruby.org>,
! # Ryan Raaum <ryan at raaum.org>
# License:: The Ruby License
#
--- 2,10 ----
# = bio/sequence/format.rb - various output format of the biological sequence
#
! # Copyright:: Copyright (C) 2006-2008
# Toshiaki Katayama <k at bioruby.org>,
# Naohisa Goto <ng at bioruby.org>,
! # Ryan Raaum <ryan at raaum.org>,
! # Jan Aerts <jan.aerts at bbsrc.ac.uk>
# License:: The Ruby License
#
***************
*** 15,18 ****
--- 16,20 ----
#
+ require 'erb'
module Bio
***************
*** 32,62 ****
module Format
! # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
! # case, it would be difficult to successfully call this method outside
! # its expected context).
! #
! # Output the FASTA format string of the sequence.
! #
! # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of
! # using either the header or width arguments. So something needs to be
! # changed...
#
! # Currently, this method is used in Bio::Sequence#output like so,
#
# s = Bio::Sequence.new('atgc')
# puts s.output(:fasta) #=> "> \natgc\n"
# ---
! # *Arguments*:
! # * (optional) _header_: String (default nil)
! # * (optional) _width_: Fixnum (default nil)
# *Returns*:: String object
! def format_fasta(header = nil, width = nil)
! header ||= "#{@entry_id} #{@definition}"
! ">#{header}\n" +
! if width
! @seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
else
! @seq.to_s + "\n"
end
end
--- 34,181 ----
module Format
! # Repository of generic (or both nucleotide and protein) sequence
! # formatter classes
! module Formatter
!
! # Raw format generatar
! autoload :Raw, 'bio/sequence/format_raw'
!
! # Fasta format generater
! autoload :Fasta, 'bio/db/fasta/format_fasta'
!
! # NCBI-style Fasta format generatar
! # (resemble to EMBOSS "ncbi" format)
! autoload :Fasta_ncbi, 'bio/db/fasta/format_fasta'
!
! end #module Formatter
!
! # Repository of nucleotide sequence formatter classes
! module NucFormatter
!
! # GenBank format generater
! # Note that the name is 'Genbank' and NOT 'GenBank'
! autoload :Genbank, 'bio/db/genbank/format_genbank'
!
! # EMBL format generater
! # Note that the name is 'Embl' and NOT 'EMBL'
! autoload :Embl, 'bio/db/embl/format_embl'
!
! end #module NucFormatter
!
! # Repository of protein sequence formatter classes
! module AminoFormatter
! # currently no formats available
! end #module AminoFormatter
!
! # Formatter base class.
! # Any formatter class should inherit this class.
! class FormatterBase
!
! # Returns a formatterd string of the given sequence
! # ---
! # *Arguments*:
! # * (required) _sequence_: Bio::Sequence object
! # * (optional) _options_: a Hash object
! # *Returns*:: String object
! def self.output(sequence, options = {})
! self.new(sequence, options).output
! end
!
! # register new Erb template
! def self.erb_template(str)
! erb = ERB.new(str)
! erb.def_method(self, 'output')
! true
! end
! private_class_method :erb_template
!
! # generates output data
! # ---
! # *Returns*:: String object
! def output
! raise NotImplementedError, 'should be implemented in subclass'
! end
!
! # creates a new formatter object for output
! def initialize(sequence, options = {})
! @sequence = sequence
! @options = options
! end
!
! private
!
! # any unknown methods are delegated to the sequence object
! def method_missing(sym, *args, &block) #:nodoc:
! begin
! @sequence.__send__(sym, *args, &block)
! rescue NoMethodError => evar
! lineno = __LINE__ - 2
! file = __FILE__
! bt_here = [ "#{file}:#{lineno}:in \`__send__\'",
! "#{file}:#{lineno}:in \`method_missing\'"
! ]
! if bt_here == evar.backtrace[0, 2] then
! bt = evar.backtrace[2..-1]
! evar = evar.class.new("undefined method \`#{sym.to_s}\' for #{self.inspect}")
! evar.set_backtrace(bt)
! end
! raise(evar)
! end
! end
! end #class FormatterBase
!
! # Using Bio::Sequence::Format, return a String with the Bio::Sequence
! # object formatted in the given style.
#
! # Formats currently implemented are: 'fasta', 'genbank', and 'embl'
#
# s = Bio::Sequence.new('atgc')
# puts s.output(:fasta) #=> "> \natgc\n"
+ #
+ # The style argument is given as a Ruby
+ # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html)
# ---
! # *Arguments*:
! # * (required) _format_: :fasta, :genbank, *or* :embl
# *Returns*:: String object
! def output(format = :fasta, options = {})
! formatter_const = format.to_s.capitalize.intern
! formatter_class = nil
! get_formatter_repositories.each do |mod|
! begin
! formatter_class = mod.const_get(formatter_const)
! rescue NameError
! end
! break if formatter_class
! end
! unless formatter_class then
! raise "unknown format name #{format.inspect}"
! end
!
! formatter_class.output(self, options)
! end
!
! # Returns a list of available output formats for the sequence
! # ---
! # *Arguments*:
! # *Returns*:: Array of Symbols
! def list_output_formats
! a = get_formatter_repositories.collect { |mod| mod.constants }
! a.flatten!
! a.collect! { |x| x.to_s.downcase.intern }
! a
! end
!
! private
!
! # returns formatter repository modules
! def get_formatter_repositories
! if self.moltype == Bio::Sequence::NA then
! [ NucFormatter, Formatter ]
! elsif self.moltype == Bio::Sequence::AA then
! [ AminoFormatter, Formatter ]
else
! [ NucFormatter, AminoFormatter, Formatter ]
end
end
***************
*** 72,90 ****
#end
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
# case, it would be difficult to successfully call this method outside
# its expected context).
#
! # Output the Genbank format string of the sequence.
# Used in Bio::Sequence#output.
# ---
# *Returns*:: String object
! #def format_genbank
! # prefix = ' ' * 5
! # indent = prefix + ' ' * 16
! # fwidth = 79 - indent.length
! #
! # format_features(prefix, indent, fwidth)
! #end
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
--- 191,215 ----
#end
+ #+++
+
+ # Formatting helper methods for INSD (NCBI, EMBL, DDBJ) feature table
+ module INSDFeatureHelper
+ private
+
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
# case, it would be difficult to successfully call this method outside
# its expected context).
#
! # Output the Genbank feature format string of the sequence.
# Used in Bio::Sequence#output.
# ---
# *Returns*:: String object
! def format_features_genbank(features)
! prefix = ' ' * 5
! indent = prefix + ' ' * 16
! fwidth = 79 - indent.length
!
! format_features(features, prefix, indent, fwidth)
! end
# INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
***************
*** 92,130 ****
# its expected context).
#
! # Output the EMBL format string of the sequence.
# Used in Bio::Sequence#output.
# ---
# *Returns*:: String object
! #def format_embl
! # prefix = 'FT '
! # indent = prefix + ' ' * 16
! # fwidth = 80 - indent.length
! #
! # format_features(prefix, indent, fwidth)
! #end
!
! #+++
!
! private
! def format_features(prefix, indent, width)
! result = ''
! @features.each do |feature|
! result << prefix + sprintf("%-16s", feature.feature)
! position = feature.position
! #position = feature.locations.to_s
! head = ''
! wrap(position, width).each_line do |line|
! result << head << line
! head = indent
! end
! result << format_qualifiers(feature.qualifiers, indent, width)
! end
return result
end
def format_qualifiers(qualifiers, indent, width)
qualifiers.collect do |qualifier|
--- 217,255 ----
# its expected context).
#
! # Output the EMBL feature format string of the sequence.
# Used in Bio::Sequence#output.
# ---
# *Returns*:: String object
! def format_features_embl(features)
! prefix = 'FT '
! indent = prefix + ' ' * 16
! fwidth = 80 - indent.length
!
! format_features(features, prefix, indent, fwidth)
! end
! # format INSD featurs
! def format_features(features, prefix, indent, width)
! result = []
! features.each do |feature|
! result.push format_feature(feature, prefix, indent, width)
! end
! return result.join('')
! end
! # format an INSD feature
! def format_feature(feature, prefix, indent, width)
! result = prefix + sprintf("%-16s", feature.feature)
! position = feature.position
! #position = feature.locations.to_s
! result << wrap_and_split_lines(position, width).join("\n" + indent)
! result << "\n"
! result << format_qualifiers(feature.qualifiers, indent, width)
return result
end
+ # format qualifiers
def format_qualifiers(qualifiers, indent, width)
qualifiers.collect do |qualifier|
***************
*** 133,137 ****
if v == true
! lines = wrap('/' + q, width)
elsif q == 'translation'
lines = fold("/#{q}=\"#{v}\"", width)
--- 258,262 ----
if v == true
! lines = wrap_with_newline('/' + q, width)
elsif q == 'translation'
lines = fold("/#{q}=\"#{v}\"", width)
***************
*** 142,146 ****
v = '"' + v + '"'
end
! lines = wrap('/' + q + '=' + v, width)
end
--- 267,271 ----
v = '"' + v + '"'
end
! lines = wrap_with_newline('/' + q + '=' + v, width)
end
***************
*** 154,158 ****
end
! def wrap(str, width)
result = []
left = str.dup
--- 279,287 ----
end
! def fold_and_split_lines(str, width)
! str.scan(Regexp.new(".{1,#{width}}"))
! end
!
! def wrap_and_split_lines(str, width)
result = []
left = str.dup
***************
*** 172,176 ****
result << line
end
! result << left if left
result_string = result.join("\n")
result_string << "\n" unless result_string.empty?
--- 301,310 ----
result << line
end
! result << left if left and !(left.to_s.empty?)
! return result
! end
!
! def wrap_with_newline(str, width)
! result = wrap_and_split_lines(str, width)
result_string = result.join("\n")
result_string << "\n" unless result_string.empty?
***************
*** 178,185 ****
end
! end # Format
! end # Sequence
! end # Bio
--- 312,329 ----
end
! def wrap(str, width = 80, prefix = '')
! actual_width = width - prefix.length
! result = wrap_and_split_lines(str, actual_width)
! result_string = result.join("\n#{prefix}")
! result_string = prefix + result_string unless result_string.empty?
! return result_string
! end
! end #module INSDFeatureHelper
! end #module Format
!
! end #class Sequence
!
! end #module Bio
More information about the bioruby-cvs
mailing list