[BioRuby-cvs] bioruby/lib/bio/sequence format.rb,1.4.2.6,1.4.2.7

Naohisa Goto ngoto at dev.open-bio.org
Tue Mar 4 11:10:30 UTC 2008


Update of /home/repository/bioruby/bioruby/lib/bio/sequence
In directory dev.open-bio.org:/tmp/cvs-serv7656/lib/bio/sequence

Modified Files:
      Tag: BRANCH-biohackathon2008
	format.rb 
Log Message:
* lib/bio/sequence.rb
  Bio::Sequence#output is moved to lib/bio/sequence/format.rb.
* lib/bio/sequence/format.rb
 * Bio::Sequence#output is changed not to directly read erb file.
 * Bio::Sequence::Format::FormatterBase class, a base class of formatter, 
   is newly added. 
 * Bio::Sequence::Format::Formatter, NucFormatter, AminoFormatter are
   newly added to store formatter classes.
 * Bio::Sequence#list_output_formats is added.
 * (The names of above classes/modules/methods might be changed if more
   appropriate names are given.)


Index: format.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/format.rb,v
retrieving revision 1.4.2.6
retrieving revision 1.4.2.7
diff -C2 -d -r1.4.2.6 -r1.4.2.7
*** format.rb	22 Feb 2008 14:30:44 -0000	1.4.2.6
--- format.rb	4 Mar 2008 11:10:28 -0000	1.4.2.7
***************
*** 2,9 ****
  # = bio/sequence/format.rb - various output format of the biological sequence
  #
! # Copyright::   Copyright (C) 2006
  #               Toshiaki Katayama <k at bioruby.org>,
  #               Naohisa Goto <ng at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     The Ruby License
  #
--- 2,10 ----
  # = bio/sequence/format.rb - various output format of the biological sequence
  #
! # Copyright::   Copyright (C) 2006-2008
  #               Toshiaki Katayama <k at bioruby.org>,
  #               Naohisa Goto <ng at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>,
! #               Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # License::     The Ruby License
  #
***************
*** 15,18 ****
--- 16,20 ----
  #
  
+ require 'erb'
  
  module Bio
***************
*** 32,62 ****
  module Format
  
!   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
!   # case, it would be difficult to successfully call this method outside
!   # its expected context).
!   #
!   # Output the FASTA format string of the sequence.  
!   #
!   # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of 
!   # using either the header or width arguments.  So something needs to be
!   # changed...
    #
!   # Currently, this method is used in Bio::Sequence#output like so,
    #
    #   s = Bio::Sequence.new('atgc')
    #   puts s.output(:fasta)                   #=> "> \natgc\n"
    # ---
!   # *Arguments*:
!   # * (optional) _header_: String (default nil)
!   # * (optional) _width_: Fixnum (default nil)
    # *Returns*:: String object
!   def format_fasta(header = nil, width = nil)
!     header ||= "#{@entry_id} #{@definition}"
  
!     ">#{header}\n" +
!     if width
!       @seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
      else
!       @seq.to_s + "\n"
      end
    end
--- 34,181 ----
  module Format
  
!   # Repository of generic (or both nucleotide and protein) sequence
!   # formatter classes
!   module Formatter
! 
!     # Raw format generatar
!     autoload :Raw, 'bio/sequence/format_raw'
! 
!     # Fasta format generater
!     autoload :Fasta, 'bio/db/fasta/format_fasta'
! 
!     # NCBI-style Fasta format generatar
!     # (resemble to EMBOSS "ncbi" format)
!     autoload :Fasta_ncbi, 'bio/db/fasta/format_fasta'
! 
!   end #module Formatter
! 
!   # Repository of nucleotide sequence formatter classes
!   module NucFormatter
! 
!     # GenBank format generater
!     # Note that the name is 'Genbank' and NOT 'GenBank'
!     autoload :Genbank, 'bio/db/genbank/format_genbank'
! 
!     # EMBL format generater
!     # Note that the name is 'Embl' and NOT 'EMBL'
!     autoload :Embl, 'bio/db/embl/format_embl'
! 
!   end #module NucFormatter
! 
!   # Repository of protein sequence formatter classes
!   module AminoFormatter
!     # currently no formats available
!   end #module AminoFormatter
! 
!   # Formatter base class.
!   # Any formatter class should inherit this class.
!   class FormatterBase
! 
!     # Returns a formatterd string of the given sequence
!     # ---
!     # *Arguments*:
!     # * (required) _sequence_: Bio::Sequence object
!     # * (optional) _options_: a Hash object
!     # *Returns*:: String object
!     def self.output(sequence, options = {})
!       self.new(sequence, options).output
!     end
! 
!     # register new Erb template
!     def self.erb_template(str)
!       erb = ERB.new(str)
!       erb.def_method(self, 'output')
!       true
!     end
!     private_class_method :erb_template
! 
!     # generates output data
!     # ---
!     # *Returns*:: String object
!     def output
!       raise NotImplementedError, 'should be implemented in subclass'
!     end
! 
!     # creates a new formatter object for output
!     def initialize(sequence, options = {})
!       @sequence = sequence
!       @options = options
!     end
! 
!     private
! 
!     # any unknown methods are delegated to the sequence object
!     def method_missing(sym, *args, &block) #:nodoc:
!       begin
!         @sequence.__send__(sym, *args, &block)
!       rescue NoMethodError => evar
!         lineno = __LINE__ - 2
!         file = __FILE__
!         bt_here = [ "#{file}:#{lineno}:in \`__send__\'",
!                     "#{file}:#{lineno}:in \`method_missing\'"
!                   ]
!         if bt_here == evar.backtrace[0, 2] then
!           bt = evar.backtrace[2..-1]
!           evar = evar.class.new("undefined method \`#{sym.to_s}\' for #{self.inspect}")
!           evar.set_backtrace(bt)
!         end
!         raise(evar)
!       end
!     end
!   end #class FormatterBase
! 
!   # Using Bio::Sequence::Format, return a String with the Bio::Sequence
!   # object formatted in the given style.
    #
!   # Formats currently implemented are: 'fasta', 'genbank', and 'embl'
    #
    #   s = Bio::Sequence.new('atgc')
    #   puts s.output(:fasta)                   #=> "> \natgc\n"
+   #
+   # The style argument is given as a Ruby 
+   # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html)
    # ---
!   # *Arguments*: 
!   # * (required) _format_: :fasta, :genbank, *or* :embl
    # *Returns*:: String object
!   def output(format = :fasta, options = {})
!     formatter_const = format.to_s.capitalize.intern
  
!     formatter_class = nil
!     get_formatter_repositories.each do |mod|
!       begin
!         formatter_class = mod.const_get(formatter_const)
!       rescue NameError
!       end
!       break if formatter_class
!     end
!     unless formatter_class then
!       raise "unknown format name #{format.inspect}"
!     end
! 
!     formatter_class.output(self, options)
!   end
! 
!   # Returns a list of available output formats for the sequence
!   # ---
!   # *Arguments*: 
!   # *Returns*:: Array of Symbols
!   def list_output_formats
!     a = get_formatter_repositories.collect { |mod| mod.constants }
!     a.flatten!
!     a.collect! { |x| x.to_s.downcase.intern }
!     a
!   end
! 
!   private
! 
!   # returns formatter repository modules
!   def get_formatter_repositories
!     if self.moltype == Bio::Sequence::NA then
!       [ NucFormatter, Formatter ]
!     elsif self.moltype == Bio::Sequence::AA then
!       [ AminoFormatter, Formatter ]
      else
!       [ NucFormatter, AminoFormatter, Formatter ]
      end
    end
***************
*** 72,90 ****
    #end
  
    # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
    # case, it would be difficult to successfully call this method outside
    # its expected context).
    #
!   # Output the Genbank format string of the sequence.  
    # Used in Bio::Sequence#output.
    # ---
    # *Returns*:: String object
!   #def format_genbank
!   #  prefix = ' ' * 5
!   #  indent = prefix + ' ' * 16
!   #  fwidth = 79 - indent.length
!   #
!   #  format_features(prefix, indent, fwidth)
!   #end
  
    # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
--- 191,215 ----
    #end
  
+   #+++
+ 
+ # Formatting helper methods for INSD (NCBI, EMBL, DDBJ) feature table
+ module INSDFeatureHelper
+   private
+ 
    # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
    # case, it would be difficult to successfully call this method outside
    # its expected context).
    #
!   # Output the Genbank feature format string of the sequence.
    # Used in Bio::Sequence#output.
    # ---
    # *Returns*:: String object
!   def format_features_genbank(features)
!     prefix = ' ' * 5
!     indent = prefix + ' ' * 16
!     fwidth = 79 - indent.length
!   
!     format_features(features, prefix, indent, fwidth)
!   end
  
    # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
***************
*** 92,130 ****
    # its expected context).
    #
!   # Output the EMBL format string of the sequence.  
    # Used in Bio::Sequence#output.
    # ---
    # *Returns*:: String object
!   #def format_embl
!   #  prefix = 'FT   '
!   #  indent = prefix + ' ' * 16
!   #  fwidth = 80 - indent.length
!   #
!   #  format_features(prefix, indent, fwidth)
!   #end
! 
!   #+++
! 
!   private
  
!   def format_features(prefix, indent, width)
!     result = ''
!     @features.each do |feature|
!       result << prefix + sprintf("%-16s", feature.feature)
  
!       position = feature.position
!       #position = feature.locations.to_s
  
!       head = ''
!       wrap(position, width).each_line do |line|
!         result << head << line
!         head = indent
!       end
  
!       result << format_qualifiers(feature.qualifiers, indent, width)
!     end
      return result
    end
  
    def format_qualifiers(qualifiers, indent, width)
      qualifiers.collect do |qualifier|
--- 217,255 ----
    # its expected context).
    #
!   # Output the EMBL feature format string of the sequence.
    # Used in Bio::Sequence#output.
    # ---
    # *Returns*:: String object
!   def format_features_embl(features)
!     prefix = 'FT   '
!     indent = prefix + ' ' * 16
!     fwidth = 80 - indent.length
!   
!     format_features(features, prefix, indent, fwidth)
!   end
  
!   # format INSD featurs
!   def format_features(features, prefix, indent, width)
!     result = []
!     features.each do |feature|
!       result.push format_feature(feature, prefix, indent, width)
!     end
!     return result.join('')
!   end
  
!   # format an INSD feature
!   def format_feature(feature, prefix, indent, width)
!     result = prefix + sprintf("%-16s", feature.feature)
  
!     position = feature.position
!     #position = feature.locations.to_s
  
!     result << wrap_and_split_lines(position, width).join("\n" + indent)
!     result << "\n"
!     result << format_qualifiers(feature.qualifiers, indent, width)
      return result
    end
  
+   # format qualifiers
    def format_qualifiers(qualifiers, indent, width)
      qualifiers.collect do |qualifier|
***************
*** 133,137 ****
  
        if v == true
!         lines = wrap('/' + q, width)
        elsif q == 'translation'
          lines = fold("/#{q}=\"#{v}\"", width)
--- 258,262 ----
  
        if v == true
!         lines = wrap_with_newline('/' + q, width)
        elsif q == 'translation'
          lines = fold("/#{q}=\"#{v}\"", width)
***************
*** 142,146 ****
            v = '"' + v + '"'
          end
!         lines = wrap('/' + q + '=' + v, width)
        end
  
--- 267,271 ----
            v = '"' + v + '"'
          end
!         lines = wrap_with_newline('/' + q + '=' + v, width)
        end
  
***************
*** 154,158 ****
    end
  
!   def wrap(str, width)
      result = []
      left = str.dup
--- 279,287 ----
    end
  
!   def fold_and_split_lines(str, width)
!     str.scan(Regexp.new(".{1,#{width}}"))
!   end
! 
!   def wrap_and_split_lines(str, width)
      result = []
      left = str.dup
***************
*** 172,176 ****
        result << line
      end
!     result << left if left
      result_string = result.join("\n")
      result_string << "\n" unless result_string.empty?
--- 301,310 ----
        result << line
      end
!     result << left if left and !(left.to_s.empty?)
!     return result
!   end
! 
!   def wrap_with_newline(str, width)
!     result = wrap_and_split_lines(str, width)
      result_string = result.join("\n")
      result_string << "\n" unless result_string.empty?
***************
*** 178,185 ****
    end
  
! end # Format
  
! end # Sequence
  
! end # Bio
  
--- 312,329 ----
    end
  
!   def wrap(str, width = 80, prefix = '')
!     actual_width = width - prefix.length
!     result = wrap_and_split_lines(str, actual_width)
!     result_string = result.join("\n#{prefix}")
!     result_string = prefix + result_string unless result_string.empty?
!     return result_string
!   end
  
! end #module INSDFeatureHelper
  
! end #module Format
! 
! end #class Sequence
! 
! end #module Bio
  




More information about the bioruby-cvs mailing list