[BioRuby-cvs] bioruby/lib/bio/appl/phylip alignment.rb,NONE,1.1
Naohisa Goto
ngoto at dev.open-bio.org
Thu Dec 14 22:38:55 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/appl/phylip
In directory dev.open-bio.org:/tmp/cvs-serv16512
Added Files:
alignment.rb
Log Message:
Phylip format multiple sequence alignment parser class
Bio::Phylip::PhylipFormat is added.
--- NEW FILE: alignment.rb ---
#
# = bio/appl/phylip/alignment.rb - phylip multiple alignment format parser
#
# Copyright:: Copyright (C) 2006
# GOTO Naohisa <ng at bioruby.org>
#
# License:: Ruby's
#
# $Id: alignment.rb,v 1.1 2006/12/14 22:38:53 ngoto Exp $
#
# = About Bio::Phylip::PhylipFormat
#
# Please refer document of Bio::Phylip::PhylipFormat class.
#
module Bio
module Phylip
# This is phylip multiple alignment format parser.
# The two formats, interleaved and non-interleaved, are
# automatically determined.
#
class PhylipFormat
# create a new object from a string
def initialize(str)
@data = str.strip.split(/(?:\r\n|\r|\n)/)
@first_line = @data.shift
@number_of_sequences, @alignment_length =
@first_line.to_s.strip.split(/\s+/).collect { |x| x.to_i }
end
# number of sequences
attr_reader :number_of_sequences
# alignment length
attr_reader :alignment_length
# If the alignment format is "interleaved", returns true.
# If not, returns false.
# It would mistake to determine if the alignment is very short.
def interleaved?
unless defined? @interleaved_flag then
if /\A +/ =~ @data[1].to_s then
@interleaved_flag = false
else
@interleaved_flag = true
end
end
@interleaved_flag
end
# Gets the alignment. Returns a Bio::Alignment object.
def alignment
unless defined? @alignment then
do_parse
a = Bio::Alignment.new
(0... at number_of_sequences).each do |i|
a.add_seq(@sequences[i], @sequence_names[i])
end
@alignment = a
end
@alignment
end
private
def do_parse
if interleaved? then
do_parse_interleaved
else
do_parse_noninterleaved
end
end
def do_parse_interleaved
first_block = @data[0, @number_of_sequences]
@data[0, @number_of_sequences] = ''
@sequence_names = Array.new(@number_of_sequences) { '' }
@sequences = Array.new(@number_of_sequences) do
' ' * @alignment_length
end
first_block.each_with_index do |x, i|
n, s = x.split(/ +/, 2)
@sequence_names[i] = n
@sequences[i].replace(s.gsub(/\s+/, ''))
end
i = 0
@data.each do |x|
if x.strip.length <= 0 then
i = 0
else
@sequences[i] << x.gsub(/\s+/, '')
i = (i + 1) % @number_of_sequences
end
end
@data.clear
true
end
def do_parse_noninterleaved
@sequence_names = Array.new(@number_of_sequences) { '' }
@sequences = Array.new(@number_of_sequences) do
' ' * @alignment_length
end
curseq = nil
i = 0
@data.each do |x|
next if x.strip.length <= 0
if !curseq or
curseq.length > @alignment_length or /^\s/ !~ x then
p i
n, s = x.strip.split(/ +/, 2)
@sequence_names[i] = n
curseq = @sequences[i]
curseq.replace(s.gsub(/\s+/, ''))
i += 1
else
curseq << x.gsub(/\s+/, '')
end
end
@data.clear
true
end
end #class PhylipFormat
end #module Phylip
end #module Bio
More information about the bioruby-cvs
mailing list