[BioRuby-cvs] bioruby/lib/bio/appl/mafft report.rb,1.12,1.13

Naohisa Goto ngoto at dev.open-bio.org
Mon Jul 16 12:21:41 UTC 2007


Update of /home/repository/bioruby/bioruby/lib/bio/appl/mafft
In directory dev.open-bio.org:/tmp/cvs-serv19932/lib/bio/appl/mafft

Modified Files:
	report.rb 
Log Message:
* lib/bio/mafft/report.rb
  For generic multi-fasta formatted sequence alignment,
  Bio::Alignment::MultiFastaFormat is newly added based on
  Bio::MAFFT::Report class, and Bio::MAFFT::Report is
  changed to inherit the new class.
  Tests are added in test/unit/bio/appl/mafft/test_report.rb.
* lib/bio/alignment.rb
  added autoload of Bio::Alignment::MultiFastaFormat.
  New modules and classes Bio::Alignment::FactoryTemplate::* are added.


Index: report.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/mafft/report.rb,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** report.rb	5 Apr 2007 23:35:40 -0000	1.12
--- report.rb	16 Jul 2007 12:21:39 -0000	1.13
***************
*** 2,6 ****
  # = bio/appl/mafft/report.rb - MAFFT report class
  #
! # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto at gen-info.osaka-u.ac.jp>
  # License::   The Ruby License
  #
--- 2,6 ----
  # = bio/appl/mafft/report.rb - MAFFT report class
  #
! # Copyright:: Copyright (C) 2003, 2007  Naohisa Goto <ng at bioruby.org>
  # License::   The Ruby License
  #
***************
*** 14,17 ****
--- 14,21 ----
  # interface between Bio::ClustalW::Report.
  #
+ # Bio::Alignment::MultiFastaFormat is a generic data class for
+ # fasta-formatted multiple sequence alignment data.
+ # Bio::MAFFT::Report inherits Bio::Alignment::MultiFastaFormat.
+ #
  # == References
  #
***************
*** 26,32 ****
--- 30,121 ----
  require 'bio/db/fasta'
  require 'bio/io/flatfile'
+ require 'bio/alignment'
  require 'bio/appl/mafft'
  
  module Bio
+   module Alignment
+     # Data class for fasta-formatted multiple sequence alignment data,
+     # which is simply multiple entiries of fasta formatted sequences.
+     class MultiFastaFormat
+ 
+       # delimiter for flatfile
+       DELIMITER = RS = nil
+ 
+       # Creates a new data object.
+       # +str+ should be a (multi-)fasta formatted string.
+       def initialize(str)
+         ff = Bio::FlatFile.new(Bio::FastaFormat, StringIO.new(str))
+         @data = ff.to_a
+         @alignment = nil
+         @seq_method = nil
+       end
+ 
+       # Gets an multiple alignment.
+       # Returns a Bio::Alignment object.
+       # +method+ should be one of :naseq, :aaseq, :seq, or nil (default).
+       # nil means to automatically determine nucleotide or amino acid.
+       #
+       # This method returns previously parsed object
+       # if the same method is given (or guessed method is the same).
+       def alignment(method = nil)
+         m = determine_seq_method(@data, method)
+         if !@alignment or m != @seq_method then
+           @seq_method = m
+           @alignment = do_parse(@data, @seq_method)
+         end
+         @alignment
+       end
+ 
+       # Gets an array of the fasta formatted sequence objects.
+       # Returns an array of Bio::FastaFormat objects.
+       def entries
+         @data
+       end
+ 
+       private
+       # determines seqtype.
+       # if nil is given, try to guess DNA or protein.
+       def determine_seq_method(data, m = nil)
+         case m
+         when :aaseq
+           :aaseq
+         when :naseq
+           :naseq
+         when :seq
+           :seq
+         when nil
+           # auto-detection
+           score = 0
+           data[0, 3].each do |e|
+             k = e.to_seq.guess
+             if k == Bio::Sequence::NA then
+               score += 1
+             elsif k == Bio::Sequence::AA then
+               score -= 1
+             end
+           end
+           if score > 0 then
+             :naseq
+           elsif score < 0 then
+             :aaseq
+           else
+             :seq
+           end
+         else
+           raise 'one of :naseq, :aaseq, :seq, or nil should be given'
+         end
+       end
+ 
+       # Parses a result.
+       def do_parse(ary, seqmethod)
+         a = Bio::Alignment.new
+         a.add_sequences(ary) do |x|
+           [ x.__send__(seqmethod), x.definition ]
+         end
+         a
+       end
+     end #class MultiFastaFormat
+   end #module Alignment
+ 
    class MAFFT
  
***************
*** 37,50 ****
      # the significance of this class is to keep standard form and
      # interface between Bio::ClustalW::Report.
!     class Report
  
        # Creates a new Report object.
        # +str+ should be multi-fasta formatted text as a string.
-       # +seqclass+ should on of following:
-       # Class:  Bio::Sequence::AA, Bio::Sequence::NA, ...
-       # String: 'PROTEIN', 'DNA', ...
        #
        # Compatibility Note: the old usage (to get array of Bio::FastaFormat
        # objects) is deprecated.
        def initialize(str, seqclass = nil)
          if str.is_a?(Array) then
--- 126,143 ----
      # the significance of this class is to keep standard form and
      # interface between Bio::ClustalW::Report.
!     class Report < Bio::Alignment::MultiFastaFormat
  
        # Creates a new Report object.
        # +str+ should be multi-fasta formatted text as a string.
        #
        # Compatibility Note: the old usage (to get array of Bio::FastaFormat
        # objects) is deprecated.
+       #
+       # Compatibility Note 2: the argument +seqclass+ is deprecated.
+       #
+       # +seqclass+ should be one of following:
+       # Class:  Bio::Sequence::AA, Bio::Sequence::NA, ...
+       # String: 'PROTEIN', 'DNA', ...
+       #
        def initialize(str, seqclass = nil)
          if str.is_a?(Array) then
***************
*** 52,69 ****
            @data = str
          else
!           ff = Bio::FlatFile.new(Bio::FastaFormat, StringIO.new(str))
!           @data = ff.to_a
          end
!         @align = nil
!         case seqclass
!         when /PROTEIN/i
!           @seqclass = Bio::Sequence::AA
!         when /[DR]NA/i
!           @seqclass = Bio::Sequence::NA
!         else
!           if seqclass.is_a?(Module) then
!             @seqclass = seqclass
            else
!             @seqclass = Bio::Sequence
            end
          end
--- 145,164 ----
            @data = str
          else
!           super(str)
          end
! 
!         if seqclass then
!           warn "the 2nd argument (seqclass) will be no deprecated."
!           case seqclass
!           when /PROTEIN/i
!             @seqclass = Bio::Sequence::AA
!           when /[DR]NA/i
!             @seqclass = Bio::Sequence::NA
            else
!             if seqclass.is_a?(Module) then
!               @seqclass = seqclass
!             else
!               @seqclass = nil
!             end
            end
          end
***************
*** 74,103 ****
  
        # Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...)
        attr_reader :seqclass
  
        # Gets an multiple alignment.
        # Returns a Bio::Alignment object.
!       def alignment
!         do_parse() unless @align
!         @align
        end
  
!       # This will be deprecated. Instead, please use alignment.
        #
        # Gets an multiple alignment.
        # Returns a Bio::Alignment object.
        def align
!         warn "align method will be deprecated. Please use \'alignment\'."
          alignment
        end
  
        # Gets an fasta-format string of the sequences.
        # Returns a string.
        # Same as align.to_fasta.
!       # Please refer to Bio::Alignment#to_fasta for arguments.
        def to_fasta(*arg)
!         alignment.to_fasta(*arg)
        end
  
        # Gets an array of the sequences.
        # Returns an array of Bio::FastaFormat instances.
--- 169,205 ----
  
        # Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...)
+       #
+       # Compatibility note: This method will be removed in the tufure.
        attr_reader :seqclass
  
        # Gets an multiple alignment.
        # Returns a Bio::Alignment object.
!       def alignment(method = nil)
!         super
        end
  
!       # This method will be deprecated. Instead, please use alignment.
        #
        # Gets an multiple alignment.
        # Returns a Bio::Alignment object.
        def align
!         warn "Bio::MAFFT::Report#align is deprecated. Please use \'alignment\'."
          alignment
        end
  
+       # This will be deprecated. Instead, please use alignment.output_fasta.
+       #
        # Gets an fasta-format string of the sequences.
        # Returns a string.
        # Same as align.to_fasta.
!       # Please refer to Bio::Alignment#output_fasta for arguments.
        def to_fasta(*arg)
!         warn "Bio::MAFFT::report#to_fasta is deprecated. Please use \'alignment.output_fasta\'"
!         alignment.output_fasta(*arg)
        end
  
+       # Compatibility note: Behavior of the method will be changed
+       # in the future.
+       #
        # Gets an array of the sequences.
        # Returns an array of Bio::FastaFormat instances.
***************
*** 108,117 ****
        private
        # Parsing a result.
!       def do_parse
!         return nil if @align
!         @align = Bio::Alignment.new(@data) do |x|
!           [ @seqclass.new(x.seq), x.definition ]
          end
-         nil
        end
  
--- 210,222 ----
        private
        # Parsing a result.
!       def do_parse(ary, seqmethod)
!         if @seqclass then
!           a = Bio::Alignment.new
!           a.add_sequences(ary) do |x|
!             [ @seqclass.new(x.seq), x.definition ]
!           end
!         else
!           super(ary, seqmethod)
          end
        end
  




More information about the bioruby-cvs mailing list