[BioRuby-cvs] bioruby/lib/bio/appl/mafft report.rb,1.12,1.13
Naohisa Goto
ngoto at dev.open-bio.org
Mon Jul 16 12:21:41 UTC 2007
Update of /home/repository/bioruby/bioruby/lib/bio/appl/mafft
In directory dev.open-bio.org:/tmp/cvs-serv19932/lib/bio/appl/mafft
Modified Files:
report.rb
Log Message:
* lib/bio/mafft/report.rb
For generic multi-fasta formatted sequence alignment,
Bio::Alignment::MultiFastaFormat is newly added based on
Bio::MAFFT::Report class, and Bio::MAFFT::Report is
changed to inherit the new class.
Tests are added in test/unit/bio/appl/mafft/test_report.rb.
* lib/bio/alignment.rb
added autoload of Bio::Alignment::MultiFastaFormat.
New modules and classes Bio::Alignment::FactoryTemplate::* are added.
Index: report.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/mafft/report.rb,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** report.rb 5 Apr 2007 23:35:40 -0000 1.12
--- report.rb 16 Jul 2007 12:21:39 -0000 1.13
***************
*** 2,6 ****
# = bio/appl/mafft/report.rb - MAFFT report class
#
! # Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto at gen-info.osaka-u.ac.jp>
# License:: The Ruby License
#
--- 2,6 ----
# = bio/appl/mafft/report.rb - MAFFT report class
#
! # Copyright:: Copyright (C) 2003, 2007 Naohisa Goto <ng at bioruby.org>
# License:: The Ruby License
#
***************
*** 14,17 ****
--- 14,21 ----
# interface between Bio::ClustalW::Report.
#
+ # Bio::Alignment::MultiFastaFormat is a generic data class for
+ # fasta-formatted multiple sequence alignment data.
+ # Bio::MAFFT::Report inherits Bio::Alignment::MultiFastaFormat.
+ #
# == References
#
***************
*** 26,32 ****
--- 30,121 ----
require 'bio/db/fasta'
require 'bio/io/flatfile'
+ require 'bio/alignment'
require 'bio/appl/mafft'
module Bio
+ module Alignment
+ # Data class for fasta-formatted multiple sequence alignment data,
+ # which is simply multiple entiries of fasta formatted sequences.
+ class MultiFastaFormat
+
+ # delimiter for flatfile
+ DELIMITER = RS = nil
+
+ # Creates a new data object.
+ # +str+ should be a (multi-)fasta formatted string.
+ def initialize(str)
+ ff = Bio::FlatFile.new(Bio::FastaFormat, StringIO.new(str))
+ @data = ff.to_a
+ @alignment = nil
+ @seq_method = nil
+ end
+
+ # Gets an multiple alignment.
+ # Returns a Bio::Alignment object.
+ # +method+ should be one of :naseq, :aaseq, :seq, or nil (default).
+ # nil means to automatically determine nucleotide or amino acid.
+ #
+ # This method returns previously parsed object
+ # if the same method is given (or guessed method is the same).
+ def alignment(method = nil)
+ m = determine_seq_method(@data, method)
+ if !@alignment or m != @seq_method then
+ @seq_method = m
+ @alignment = do_parse(@data, @seq_method)
+ end
+ @alignment
+ end
+
+ # Gets an array of the fasta formatted sequence objects.
+ # Returns an array of Bio::FastaFormat objects.
+ def entries
+ @data
+ end
+
+ private
+ # determines seqtype.
+ # if nil is given, try to guess DNA or protein.
+ def determine_seq_method(data, m = nil)
+ case m
+ when :aaseq
+ :aaseq
+ when :naseq
+ :naseq
+ when :seq
+ :seq
+ when nil
+ # auto-detection
+ score = 0
+ data[0, 3].each do |e|
+ k = e.to_seq.guess
+ if k == Bio::Sequence::NA then
+ score += 1
+ elsif k == Bio::Sequence::AA then
+ score -= 1
+ end
+ end
+ if score > 0 then
+ :naseq
+ elsif score < 0 then
+ :aaseq
+ else
+ :seq
+ end
+ else
+ raise 'one of :naseq, :aaseq, :seq, or nil should be given'
+ end
+ end
+
+ # Parses a result.
+ def do_parse(ary, seqmethod)
+ a = Bio::Alignment.new
+ a.add_sequences(ary) do |x|
+ [ x.__send__(seqmethod), x.definition ]
+ end
+ a
+ end
+ end #class MultiFastaFormat
+ end #module Alignment
+
class MAFFT
***************
*** 37,50 ****
# the significance of this class is to keep standard form and
# interface between Bio::ClustalW::Report.
! class Report
# Creates a new Report object.
# +str+ should be multi-fasta formatted text as a string.
- # +seqclass+ should on of following:
- # Class: Bio::Sequence::AA, Bio::Sequence::NA, ...
- # String: 'PROTEIN', 'DNA', ...
#
# Compatibility Note: the old usage (to get array of Bio::FastaFormat
# objects) is deprecated.
def initialize(str, seqclass = nil)
if str.is_a?(Array) then
--- 126,143 ----
# the significance of this class is to keep standard form and
# interface between Bio::ClustalW::Report.
! class Report < Bio::Alignment::MultiFastaFormat
# Creates a new Report object.
# +str+ should be multi-fasta formatted text as a string.
#
# Compatibility Note: the old usage (to get array of Bio::FastaFormat
# objects) is deprecated.
+ #
+ # Compatibility Note 2: the argument +seqclass+ is deprecated.
+ #
+ # +seqclass+ should be one of following:
+ # Class: Bio::Sequence::AA, Bio::Sequence::NA, ...
+ # String: 'PROTEIN', 'DNA', ...
+ #
def initialize(str, seqclass = nil)
if str.is_a?(Array) then
***************
*** 52,69 ****
@data = str
else
! ff = Bio::FlatFile.new(Bio::FastaFormat, StringIO.new(str))
! @data = ff.to_a
end
! @align = nil
! case seqclass
! when /PROTEIN/i
! @seqclass = Bio::Sequence::AA
! when /[DR]NA/i
! @seqclass = Bio::Sequence::NA
! else
! if seqclass.is_a?(Module) then
! @seqclass = seqclass
else
! @seqclass = Bio::Sequence
end
end
--- 145,164 ----
@data = str
else
! super(str)
end
!
! if seqclass then
! warn "the 2nd argument (seqclass) will be no deprecated."
! case seqclass
! when /PROTEIN/i
! @seqclass = Bio::Sequence::AA
! when /[DR]NA/i
! @seqclass = Bio::Sequence::NA
else
! if seqclass.is_a?(Module) then
! @seqclass = seqclass
! else
! @seqclass = nil
! end
end
end
***************
*** 74,103 ****
# Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...)
attr_reader :seqclass
# Gets an multiple alignment.
# Returns a Bio::Alignment object.
! def alignment
! do_parse() unless @align
! @align
end
! # This will be deprecated. Instead, please use alignment.
#
# Gets an multiple alignment.
# Returns a Bio::Alignment object.
def align
! warn "align method will be deprecated. Please use \'alignment\'."
alignment
end
# Gets an fasta-format string of the sequences.
# Returns a string.
# Same as align.to_fasta.
! # Please refer to Bio::Alignment#to_fasta for arguments.
def to_fasta(*arg)
! alignment.to_fasta(*arg)
end
# Gets an array of the sequences.
# Returns an array of Bio::FastaFormat instances.
--- 169,205 ----
# Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...)
+ #
+ # Compatibility note: This method will be removed in the tufure.
attr_reader :seqclass
# Gets an multiple alignment.
# Returns a Bio::Alignment object.
! def alignment(method = nil)
! super
end
! # This method will be deprecated. Instead, please use alignment.
#
# Gets an multiple alignment.
# Returns a Bio::Alignment object.
def align
! warn "Bio::MAFFT::Report#align is deprecated. Please use \'alignment\'."
alignment
end
+ # This will be deprecated. Instead, please use alignment.output_fasta.
+ #
# Gets an fasta-format string of the sequences.
# Returns a string.
# Same as align.to_fasta.
! # Please refer to Bio::Alignment#output_fasta for arguments.
def to_fasta(*arg)
! warn "Bio::MAFFT::report#to_fasta is deprecated. Please use \'alignment.output_fasta\'"
! alignment.output_fasta(*arg)
end
+ # Compatibility note: Behavior of the method will be changed
+ # in the future.
+ #
# Gets an array of the sequences.
# Returns an array of Bio::FastaFormat instances.
***************
*** 108,117 ****
private
# Parsing a result.
! def do_parse
! return nil if @align
! @align = Bio::Alignment.new(@data) do |x|
! [ @seqclass.new(x.seq), x.definition ]
end
- nil
end
--- 210,222 ----
private
# Parsing a result.
! def do_parse(ary, seqmethod)
! if @seqclass then
! a = Bio::Alignment.new
! a.add_sequences(ary) do |x|
! [ @seqclass.new(x.seq), x.definition ]
! end
! else
! super(ary, seqmethod)
end
end
More information about the bioruby-cvs
mailing list