[BioRuby-cvs] bioruby/lib/bio alignment.rb,1.10,1.11
Naohisa Goto
ngoto at pub.open-bio.org
Fri Nov 25 10:36:45 EST 2005
Update of /home/repository/bioruby/bioruby/lib/bio
In directory pub.open-bio.org:/tmp/cvs-serv24169
Modified Files:
alignment.rb
Log Message:
many changes
Bio::Alignment is changed to a module instead of class.
GeneralExtension is changed to EnumerableExtension.
CharacterProperty is changed to PropertyMethods.
Many methods are changed.
Index: alignment.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/alignment.rb,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** alignment.rb 24 Nov 2005 16:21:00 -0000 1.10
--- alignment.rb 25 Nov 2005 15:36:43 -0000 1.11
***************
*** 25,31 ****
#++
#
! # = About Bio::Alignment class
#
! # Bio::Alignment is a multiple alignment container class.
#
# = References
--- 25,31 ----
#++
#
! # = About Bio::Alignment
#
! # Please refer document of Bio::Alignment module.
#
# = References
***************
*** 33,52 ****
# * Bio::Align::AlignI class of the BioPerl.
# http://doc.bioperl.org/releases/bioperl-1.4/Bio/Align/AlignI.html
! #
# * Bio::SimpleAlign class of the BioPerl.
# http://doc.bioperl.org/releases/bioperl-1.4/Bio/SimpleAlign.html
#
- #
require 'bio/sequence'
module Bio
- class Alignment
! # Bio::Alignment::CharacterProperty is a module to store
# the gap character and so on.
! module CharacterProperty
GAP_REGEXP = /[^a-zA-Z]/
GAP_CHAR = '-'.freeze
MISSING_CHAR = '?'.freeze
--- 33,95 ----
# * Bio::Align::AlignI class of the BioPerl.
# http://doc.bioperl.org/releases/bioperl-1.4/Bio/Align/AlignI.html
! #
# * Bio::SimpleAlign class of the BioPerl.
# http://doc.bioperl.org/releases/bioperl-1.4/Bio/SimpleAlign.html
#
require 'bio/sequence'
module Bio
! =begin rdoc
!
! = About Bio::Alignment
!
! Bio::Alignment is a namespace of classes/modules for multiple sequence
! alignment.
!
! = Multiple alignment container classes
!
! == Bio::Alignment::OriginalAlignment
!
! == Bio::Alignment::SequenceArray
!
! == Bio::Alignment::SequenceHash
!
! = Bio::Alignment::Site
!
! = Modules
!
! == Bio::Alignment::EnumerableExtension
!
! Mix-in for classes included Enumerable.
!
! == Bio::Alignment::ArrayExtension
!
! Mix-in for Array or Array-like classes.
!
! == Bio::Alignment::HashExtension
!
! Mix-in for Hash or Hash-like classes.
!
! == Bio::Alignment::SiteMethods
!
! == Bio::Alignment::PropertyMethods
!
! = Bio::Alignment::GAP
!
! = Compatibility from older BioRuby
!
! =end
! module Alignment
!
! # Bio::Alignment::PropertyMethods is a set of methods to treat
# the gap character and so on.
! module PropertyMethods
! # regular expression for detecting gaps.
GAP_REGEXP = /[^a-zA-Z]/
+ # gap character
GAP_CHAR = '-'.freeze
+ # missing character
MISSING_CHAR = '?'.freeze
***************
*** 79,84 ****
attr_writer :missing_char
! # Returns character property defined in the object as an hash.
! def get_character_property
ret = {}
if defined? @gap_regexp
--- 122,140 ----
attr_writer :missing_char
! # Returns class of the sequence.
! # If instance variable @seqclass (which can be
! # set by 'seqclass=' method) is set, simply returns the value.
! # Otherwise, returns the first sequence's class.
! # If no sequences are found, returns nil.
! def seqclass
! @seqclass or String
! end
!
! # The class of the sequence.
! # The value must be String or its derivatives.
! attr_writer :seqclass
!
! # Returns properties defined in the object as an hash.
! def get_all_property
ret = {}
if defined? @gap_regexp
***************
*** 91,106 ****
ret[:missing_char] = @missing_char
end
ret
end
! # Sets character property from given hash.
# <em>hash</em> would be a return value of <tt>get_character</tt> method.
! def set_character_property(hash)
@gap_regexp = hash[:gap_regexp] if hash.has_key?(:gap_regexp)
@gap_char = hash[:gap_char] if hash.has_key?(:gap_char)
@missing_char = hash[:missing_char] if hash.has_key?(:missing_char)
self
end
! end #module ChacaterProperty
# Bio::Alignment::SiteMethods is a set of methods for
--- 147,166 ----
ret[:missing_char] = @missing_char
end
+ if defined? @seqclass
+ ret[:seqclass] = @seqclass
+ end
ret
end
! # Sets properties from given hash.
# <em>hash</em> would be a return value of <tt>get_character</tt> method.
! def set_all_property(hash)
@gap_regexp = hash[:gap_regexp] if hash.has_key?(:gap_regexp)
@gap_char = hash[:gap_char] if hash.has_key?(:gap_char)
@missing_char = hash[:missing_char] if hash.has_key?(:missing_char)
+ @seqclass = hash[:seqclass] if hash.has_key?(:seqclass)
self
end
! end #module PropertyMethods
# Bio::Alignment::SiteMethods is a set of methods for
***************
*** 108,112 ****
# It can also be used for extending an array of single-letter strings.
module SiteMethods
! include CharacterProperty
# If there are gaps, returns true. Otherwise, returns false.
--- 168,172 ----
# It can also be used for extending an array of single-letter strings.
module SiteMethods
! include PropertyMethods
# If there are gaps, returns true. Otherwise, returns false.
***************
*** 245,258 ****
end #module Site
! # The module Bio::Alignment::GeneralExtension is a set of useful
# methods for multiple sequence alignment.
! # It can be included to any classes or can be extended to any objects.
! #
# The classes or objects must have methods defined in Enumerable,
! # and must have 'each_seq' method
# which iterates over each sequence (or string) and yields
! # each sequence (or string) object.
! # Note that the 'each_seq' method would be called multiple times,
! # 'break' would be used in the given block and
# destructive methods would be used to the sequences.
#
--- 305,324 ----
end #module Site
! # The module Bio::Alignment::EnumerableExtension is a set of useful
# methods for multiple sequence alignment.
! # It can be included by any classes or can be extended to any objects.
# The classes or objects must have methods defined in Enumerable,
! # and must have the <tt>each</tt> method
# which iterates over each sequence (or string) and yields
! # a sequence (or string) object.
! #
! # Optionally, if <tt>each_seq</tt> method is defined,
! # which iterates over each sequence (or string) and yields
! # each sequence (or string) object, it is used instead of <tt>each</tt>.
! #
! # Note that the <tt>each</tt> or <tt>each_seq</tt> method would be
! # called multiple times.
! # This means that the module is not suitable for IO objects.
! # In addition, <tt>break</tt> would be used in the given block and
# destructive methods would be used to the sequences.
#
***************
*** 261,266 ****
# They have built-in 'each_seq' method.
#
! module GeneralExtension
! include CharacterProperty
# Returns class of the sequence.
--- 327,341 ----
# They have built-in 'each_seq' method.
#
! module EnumerableExtension
! include PropertyMethods
!
! # Iterates over each sequences.
! # Yields a sequence.
! # It acts same as Enumerable#each.
! #
! # You would redefine the method suitable for the class/object.
! def each_seq(&block) #:yields: seq
! each(&block)
! end
# Returns class of the sequence.
***************
*** 284,291 ****
end
- # The class of the sequence.
- # The class is expected to be String or its derivatives.
- attr_writer :seqclass
-
# Returns the alignment length.
# Returns the longest length of the sequence in the alignment.
--- 359,362 ----
***************
*** 305,311 ****
# If the position is out of range, it returns the site
# of which all are gaps.
! def alignment_site(position)
site = Site.new
- site.set_character_property(get_character_property)
each_seq do |s|
c = s[position, 1]
--- 376,381 ----
# If the position is out of range, it returns the site
# of which all are gaps.
! def _alignment_site(position)
site = Site.new
each_seq do |s|
c = s[position, 1]
***************
*** 317,320 ****
--- 387,402 ----
site
end
+ private :_alignment_site
+
+ # Gets a site of the position.
+ # Returns a Bio::Alignment::Site object.
+ #
+ # If the position is out of range, it returns the site
+ # of which all are gaps.
+ def alignment_site(position)
+ site = _alignment_site(position)
+ site.set_all_property(get_all_property)
+ site
+ end
# Iterates over each site of the alignment.
***************
*** 322,327 ****
# It returns self.
def each_site
(0...alignment_length).each do |i|
! site = alignment_site(i)
yield(site)
end
--- 404,411 ----
# It returns self.
def each_site
+ cp = get_all_property
(0...alignment_length).each do |i|
! site = _alignment_site(i)
! site.set_all_property(cp)
yield(site)
end
***************
*** 336,341 ****
# <tt>start.step(stop, step) { |i| yield alignment_site(i) }</tt>.
def each_site_step(start, stop, step = 1)
start.step(stop, step) do |i|
! site = alignment_site(i)
yield(site)
end
--- 420,427 ----
# <tt>start.step(stop, step) { |i| yield alignment_site(i) }</tt>.
def each_site_step(start, stop, step = 1)
+ cp = get_all_property
start.step(stop, step) do |i|
! site = _alignment_site(i)
! site.set_all_property(cp)
yield(site)
end
***************
*** 351,356 ****
#
def alignment_collect
! a = Bio::Alignment::SequenceArray.new
! a.set_character_property(get_character_property)
each_seq do |str|
a << yield(str)
--- 437,442 ----
#
def alignment_collect
! a = SequenceArray.new
! a.set_all_property(get_all_property)
each_seq do |str|
a << yield(str)
***************
*** 461,465 ****
end
end
- alias consensus consensus_string
# Returns the IUPAC consensus string of the alignment
--- 547,550 ----
***************
*** 737,742 ****
alias subseq alignment_subseq
! # Concatinates the given alignment.
! # The given alignment must have 'each_seq' method.
# Returns self.
#
--- 822,829 ----
alias subseq alignment_subseq
! # Concatenates the given alignment.
! # <em>align</em> must have <tt>each_seq</tt>
! # or <tt>each</tt> method.
! #
# Returns self.
#
***************
*** 748,761 ****
#
def alignment_concat(align)
a = []
each_seq { |s| a << s }
i = 0
! align.each_seq do |seq|
! a[i].concat(seq) if a[i]
i += 1
end
self
end
! end #module GeneralExtension
# ClustalWFormatter is a module to create ClustalW-formatted text
--- 835,859 ----
#
def alignment_concat(align)
+ flag = nil
a = []
each_seq { |s| a << s }
i = 0
! begin
! align.each_seq do |seq|
! flag = true
! a[i].concat(seq) if a[i] and seq
! i += 1
! end
! return self
! rescue NoMethodError, ArgumentError => evar
! raise evar if flag
! end
! align.each do |seq|
! a[i].concat(seq) if a[i] and seq
i += 1
end
self
end
! end #module EnumerableExtension
# ClustalWFormatter is a module to create ClustalW-formatted text
***************
*** 838,847 ****
aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ]
len = seqs.seq_length
!
! if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
! sn = avoid_same_name(names)
! else
! sn = names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
! end
if options[:replace_space]
sn.collect! { |x| x.gsub(/\s/, '_') }
--- 936,940 ----
aln = [ "CLUSTAL (0.00) multiple sequence alignment\n\n" ]
len = seqs.seq_length
! sn = names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
if options[:replace_space]
sn.collect! { |x| x.gsub(/\s/, '_') }
***************
*** 853,856 ****
--- 946,952 ----
sn.collect! { |x| x.split(/\s/)[0].to_s }
end
+ if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
+ sn = avoid_same_name(sn)
+ end
if sn.find { |x| x.length > 10 } then
***************
*** 921,930 ****
# (It can also be included in Array, though not recommended.)
#
! # It possesses all methods defined in GeneralExtension.
! # For usage of methods, please refer to GeneralExtension.
module ArrayExtension
! include GeneralExtension
! def each_seq(&block)
each(&block)
end
--- 1017,1030 ----
# (It can also be included in Array, though not recommended.)
#
! # It possesses all methods defined in EnumerableExtension.
! # For usage of methods, please refer to EnumerableExtension.
module ArrayExtension
! include EnumerableExtension
! # Iterates over each sequences.
! # Yields a sequence.
! #
! # It works the same as Array#each.
! def each_seq(&block) #:yields: seq
each(&block)
end
***************
*** 943,961 ****
# (It can also be included in Hash, though not recommended.)
#
! # It possesses all methods defined in GeneralExtension.
! # For usage of methods, please refer to GeneralExtension.
#
# Because the order of the objects in a hash is inconstant,
# some methods strictly affected with the order of objects
# might not work correctly,
! # e.g. GeneralExtension#convert_match, #convert_unmatch
! # and #alignment_concat.
module HashExtension
! include GeneralExtension
! def each_seq(&block)
each_value(&block)
end
include ClustalWFormatter
# Returns a string of Clustal W formatted text of the alignment.
--- 1043,1127 ----
# (It can also be included in Hash, though not recommended.)
#
! # It possesses all methods defined in EnumerableExtension.
! # For usage of methods, please refer to EnumerableExtension.
! #
! # Because SequenceHash#alignment_collect is redefined,
! # some methods' return value's class are changed to
! # SequenceHash instead of SequenceArray.
#
# Because the order of the objects in a hash is inconstant,
# some methods strictly affected with the order of objects
# might not work correctly,
! # e.g. EnumerableExtension#convert_match and #convert_unmatch.
module HashExtension
! include EnumerableExtension
! # Iterates over each sequences.
! # Yields a sequence.
! #
! # It works the same as Hash#each_value.
! def each_seq(&block) #:yields: seq
each_value(&block)
end
+ # Iterates over each sequence and each results running block
+ # are collected and returns a new alignment as a
+ # Bio::Alignment::SequenceHash object.
+ #
+ # Note that it would be redefined if you want to change
+ # return value's class.
+ #
+ def alignment_collect
+ a = SequenceHash.new
+ a.set_all_property(get_all_property)
+ each_pair do |key, str|
+ a.store(key, yield(str))
+ end
+ a
+ end
+
+ # Concatenates the given alignment.
+ # If <em>align</em> is a Hash (or SequenceHash),
+ # sequences of same keys are concatenated.
+ # Otherwise, <em>align</em> must have <tt>each_seq</tt>
+ # or <tt>each</tt> method and
+ # works same as EnumerableExtension#alignment_concat.
+ #
+ # Returns self.
+ #
+ # Note that it is a destructive method.
+ #
+ def alignment_concat(align)
+ flag = nil
+ begin
+ align.each_pair do |key, seq|
+ flag = true
+ if origseq = self[key]
+ origseq.concat(seq)
+ end
+ end
+ return self
+ rescue NoMethodError, ArgumentError =>evar
+ raise evar if flag
+ end
+ a = values
+ i = 0
+ begin
+ align.each_seq do |seq|
+ flag = true
+ a[i].concat(seq) if a[i] and seq
+ i += 1
+ end
+ return self
+ rescue NoMethodError, ArgumentError => evar
+ raise evar if flag
+ end
+ align.each do |seq|
+ a[i].concat(seq) if a[i] and seq
+ i += 1
+ end
+ self
+ end
+
include ClustalWFormatter
# Returns a string of Clustal W formatted text of the alignment.
***************
*** 973,977 ****
# multiple sequence alignment.
# Since it inherits Array, it acts completely same as Array.
! # In addition, methods defined in ArrayExtension and GeneralExtension
# can be used.
class SequenceArray < Array
--- 1139,1143 ----
# multiple sequence alignment.
# Since it inherits Array, it acts completely same as Array.
! # In addition, methods defined in ArrayExtension and EnumerableExtension
# can be used.
class SequenceArray < Array
***************
*** 982,986 ****
# multiple sequence alignment.
# Since it inherits Hash, it acts completely same as Hash.
! # In addition, methods defined in HashExtension and GeneralExtension
# can be used.
class SequenceHash < Hash
--- 1148,1152 ----
# multiple sequence alignment.
# Since it inherits Hash, it acts completely same as Hash.
! # In addition, methods defined in HashExtension and EnumerableExtension
# can be used.
class SequenceHash < Hash
***************
*** 1030,1036 ****
# Bio::Alignment::OriginalAlignment is
# the BioRuby original multiple sequence alignment container class.
! # It includes GeneralExtension.
#
! # It is recommended only to use methods defined in GeneralExtension
# (and the each_seq method).
# The method only defined in this class might be obsoleted in the future.
--- 1196,1202 ----
# Bio::Alignment::OriginalAlignment is
# the BioRuby original multiple sequence alignment container class.
! # It includes HashExtension.
#
! # It is recommended only to use methods defined in EnumerableExtension
# (and the each_seq method).
# The method only defined in this class might be obsoleted in the future.
***************
*** 1038,1053 ****
class OriginalAlignment
- # default value of gap regular expression
- GAP_REGEXP = CharacterProperty::GAP_REGEXP
- # default value of gap character
- GAP_CHAR = CharacterProperty::GAP_CHAR
- # default value of missing character
- MISSING_CHAR = CharacterProperty::MISSING_CHAR
-
include Enumerable
! include GeneralExtension
include OriginalPrivate
# Read files and creates a new alignment object.
def self.readfiles(*files)
require 'bio/io/flatfile'
--- 1204,1214 ----
class OriginalAlignment
include Enumerable
! include HashExtension
include OriginalPrivate
# Read files and creates a new alignment object.
+ #
+ # It will be obsoleted.
def self.readfiles(*files)
require 'bio/io/flatfile'
***************
*** 1062,1065 ****
--- 1223,1228 ----
# Creates a new alignment object from given arguments.
+ #
+ # It will be obsoleted.
def self.new2(*arg)
self.new(arg)
***************
*** 1067,1071 ****
# Creates a new alignment object.
! # _seqs_ may be one of follows:
# an array of sequences (or strings),
# an array of sequence database objects,
--- 1230,1234 ----
# Creates a new alignment object.
! # <em>seqs</em> may be one of follows:
# an array of sequences (or strings),
# an array of sequence database objects,
***************
*** 1077,1081 ****
end
! # compares object
def ==(x)
#(original)
--- 1240,1245 ----
end
! # If <em>x</em> is the same value, returns true.
! # Otherwise, returns false.
def ==(x)
#(original)
***************
*** 1093,1098 ****
end
! # Adds sequences to the alignment. _seqs_ may be
! # _seqs_ may be one of follows:
# an array of sequences (or strings),
# an array of sequence database objects,
--- 1257,1262 ----
end
! # Adds sequences to the alignment.
! # <em>seqs</em> may be one of follows:
# an array of sequences (or strings),
# an array of sequence database objects,
***************
*** 1125,1129 ****
end
! # sequence names
attr_reader :keys
--- 1289,1293 ----
end
! # identifiers (or definitions or names) of the sequences
attr_reader :keys
***************
*** 1141,1145 ****
# stores a sequence with <em>key</em>
# (name or definition of the sequence).
! # Unlike <em>__store__</em> method, the method doesn't allow
# same keys.
# If the key is already used, returns nil.
--- 1305,1309 ----
# stores a sequence with <em>key</em>
# (name or definition of the sequence).
! # Unlike <tt>__store__</tt> method, the method doesn't allow
# same keys.
# If the key is already used, returns nil.
***************
*** 1259,1263 ****
end
end
! alias :each_seq :each
# Iterates over each key and sequence.
--- 1423,1427 ----
end
end
! alias each_seq each
# Iterates over each key and sequence.
***************
*** 1288,1303 ****
def new(*arg)
na = self.class.new(*arg)
! if defined?(@seqclass)
! na.seqclass = @seqclass
! end
! if defined?(@gap_char)
! na.gap_char = @gap_char
! end
! if defined?(@gap_regexp)
! na.gap_regexp = @gap_regexp
! end
! if defined?(@missing_char)
! na.missing_char = @missing_char
! end
na
end
--- 1452,1456 ----
def new(*arg)
na = self.class.new(*arg)
! na.set_all_property(get_all_property)
na
end
***************
*** 1366,1369 ****
--- 1519,1524 ----
# If keys are given to the argument, sequences of given keys are
# duplicated.
+ #
+ # It will be obsoleted.
def isolate(*arg)
#(original)
***************
*** 1388,1394 ****
# The method name 'collect_align' will be obsoleted.
# Please use 'alignment_collect' instead.
! def collect_align
#(original)
! na = self.new
self.each_pair do |k, s|
na.store(k, yield(s))
--- 1543,1550 ----
# The method name 'collect_align' will be obsoleted.
# Please use 'alignment_collect' instead.
! def alignment_collect
#(original)
! na = self.class.new
! na.set_all_property(get_all_property)
self.each_pair do |k, s|
na.store(k, yield(s))
***************
*** 1396,1402 ****
na
end
! alias alignment_collect collect_align
# Removes empty sequences or nil in the alignment.
def compact!
#(Array-like)
--- 1552,1559 ----
na
end
! alias collect_align alignment_collect
# Removes empty sequences or nil in the alignment.
+ # (Like Array#compact!)
def compact!
#(Array-like)
***************
*** 1414,1417 ****
--- 1571,1575 ----
# Removes empty sequences or nil and returns new alignment.
+ # (Like Array#compact)
def compact
#(Array-like)
***************
*** 1466,1474 ****
# If block is given, it acts like Array#select (Enumerable#select).
# Returns a new alignment containing all sequences of the alignment
! # for which return value of given block is not false.
#
# If no block is given, it acts like the BioPerl's AlignI::select.
# Returns a new alignment containing sequences of given keys.
#
def select(*arg)
#(original)
--- 1624,1633 ----
# If block is given, it acts like Array#select (Enumerable#select).
# Returns a new alignment containing all sequences of the alignment
! # for which return value of given block is not false nor nil.
#
# If no block is given, it acts like the BioPerl's AlignI::select.
# Returns a new alignment containing sequences of given keys.
#
+ # The BioPerl's AlignI::select-like action will be obsoleted.
def select(*arg)
#(original)
***************
*** 1491,1495 ****
--- 1650,1659 ----
end
+ # The method name <tt>slice</tt> will be obsoleted.
+ # Please use <tt>alignment_slice</tt> instead.
alias slice alignment_slice
+
+ # The method name <tt>subseq</tt> will be obsoleted.
+ # Please use <tt>alignment_subseq</tt> instead.
alias subseq alignment_subseq
***************
*** 1565,1593 ****
end
- # Concatenates given alignment.
- def alignment_concat(aln)
- if aln.is_a?(self.class) then
- aln.each_pair do |k, s|
- self[k] << s
- end
- elsif aln.respond_to?(:each_seq) then
- i = 0
- aln.each_seq do |s|
- self.order(i) << s
- i += 1
- end
- else
- i = 0
- aln.each do |s|
- self.order(i) << s
- i += 1
- end
- end
- self
- end
-
# Replace the specified region of the alignment to aln.
# aln:: String or Bio::Alignment object
# arg:: same format as String#slice
def replace_slice(aln, *arg)
#(original)
--- 1729,1737 ----
end
# Replace the specified region of the alignment to aln.
# aln:: String or Bio::Alignment object
# arg:: same format as String#slice
+ #
+ # It will be obsoleted.
def replace_slice(aln, *arg)
#(original)
***************
*** 1677,1684 ****
include ClustalWFormatter
def to_clustal(options = {})
clustalw_formatter(self, self.keys, options)
end
! end #module Original
# Bio::Alignment::GAP is a set of class methods for
--- 1821,1833 ----
include ClustalWFormatter
+ # Returns a string of Clustal W formatted text of the alignment.
def to_clustal(options = {})
clustalw_formatter(self, self.keys, options)
end
!
! # The method name <tt>consensus</tt> will be obsoleted.
! # Please use <tt>consensus_string</tt> instead.
! alias consensus consensus_string
! end #class OriginalAlignment
# Bio::Alignment::GAP is a set of class methods for
***************
*** 1735,1739 ****
OriginalAlignment.readfiles(*files)
end
! end #class Alignment
end #module Bio
--- 1884,1888 ----
OriginalAlignment.readfiles(*files)
end
! end #module Alignment
end #module Bio
More information about the bioruby-cvs
mailing list