[BioRuby-cvs] bioruby/lib/bio/appl/fasta format10.rb,1.6,1.7
Katayama Toshiaki
k at dev.open-bio.org
Fri Apr 6 12:04:07 UTC 2007
Update of /home/repository/bioruby/bioruby/lib/bio/appl/fasta
In directory dev.open-bio.org:/tmp/cvs-serv22095/lib/bio/appl/fasta
Modified Files:
format10.rb
Log Message:
* Licensed under the same terms as Ruby
* converted to RDoc
Index: format10.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/fasta/format10.rb,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** format10.rb 26 Sep 2005 13:00:05 -0000 1.6
--- format10.rb 6 Apr 2007 12:04:05 -0000 1.7
***************
*** 1,22 ****
#
! # bio/appl/fasta/format10.rb - FASTA output (-m 10) parser
#
! # Copyright (C) 2002 KATAYAMA Toshiaki <k at bioruby.org>
! #
! # This library is free software; you can redistribute it and/or
! # modify it under the terms of the GNU Lesser General Public
! # License as published by the Free Software Foundation; either
! # version 2 of the License, or (at your option) any later version.
! #
! # This library is distributed in the hope that it will be useful,
! # but WITHOUT ANY WARRANTY; without even the implied warranty of
! # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! # Lesser General Public License for more details.
! #
! # You should have received a copy of the GNU Lesser General Public
! # License along with this library; if not, write to the Free Software
! # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! # $Id$
#
--- 1,9 ----
#
! # = bio/appl/fasta/format10.rb - FASTA output (-m 10) parser
#
! # Copyright:: Copyright (C) 2002 Toshiaki Katayama <k at bioruby.org>
! # License:: The Ruby License
#
! # $Id$
#
***************
*** 24,267 ****
module Bio
! class Fasta
! class Report
! def initialize(data)
! # header lines - brief list of the hits
! if data.sub!(/.*\nThe best scores are/m, '')
! data.sub!(/(.*)\n\n>>>/m, '')
! @list = "The best scores are" + $1
! else
! data.sub!(/.*\n!!\s+/m, '')
! data.sub!(/.*/) { |x| @list = x; '' }
! end
! # body lines - fasta execution result
! program, *hits = data.split(/\n>>/)
! # trailing lines - log messages of the execution
! @log = hits.pop
! @log.sub!(/.*<\n/m, '')
! @log.strip!
! # parse results
! @program = Program.new(program)
! @hits = []
! hits.each do |x|
! @hits.push(Hit.new(x))
! end
! end
! attr_reader :list, :log, :program, :hits
! def each
! @hits.each do |x|
! yield x
! end
! end
! def threshold(evalue_max = 0.1)
! list = []
! @hits.each do |x|
! list.push(x) if x.evalue < evalue_max
! end
! return list
! end
! def lap_over(length_min = 0)
! list = []
! @hits.each do |x|
! list.push(x) if x.overlap > length_min
! end
! return list
! end
! class Program
! def initialize(data)
! @definition, *program = data.split(/\n/)
! @program = {}
! pat = /;\s+([^:]+):\s+(.*)/
! program.each do |x|
! if pat.match(x)
! @program[$1] = $2
! end
! end
end
- attr_reader :definition, :program
end
- class Hit
- def initialize(data)
- score, query, target = data.split(/\n>/)
! @definition, *score = score.split(/\n/)
! @score = {}
! pat = /;\s+([^:]+):\s+(.*)/
! score.each do |x|
! if pat.match(x)
! @score[$1] = $2
! end
! end
! @query = Query.new(query)
! @target = Target.new(target)
end
! attr_reader :definition, :score, :query, :target
! def evalue
! if @score['fa_expect']
! @score['fa_expect'].to_f
! elsif @score['sw_expect']
! @score['sw_expect'].to_f
! elsif @score['fx_expect']
! @score['fx_expect'].to_f
! elsif @score['tx_expect']
! @score['tx_expect'].to_f
! end
! end
! def bit_score
! if @score['fa_bits']
! @score['fa_bits'].to_f
! elsif @score['sw_bits']
! @score['sw_bits'].to_f
! elsif @score['fx_bits']
! @score['fx_bits'].to_f
! elsif @score['tx_bits']
! @score['tx_bits'].to_f
! end
! end
! def direction
! @score['fa_frame'] || @score['sw_frame'] || @score['fx_frame'] || @score['tx_frame']
! end
! def sw
! @score['sw_score'].to_i
! end
! def identity
! @score['sw_ident'].to_f
! end
! def overlap
! @score['sw_overlap'].to_i
! end
! def query_id
! @query.entry_id
! end
! def target_id
! @target.entry_id
! end
! def query_def
! @query.definition
! end
! def target_def
! @target.definition
! end
! def query_len
! @query.length
! end
! def target_len
! @target.length
! end
! def query_seq
! @query.sequence
! end
! def target_seq
! @target.sequence
! end
! def query_type
! @query.moltype
! end
! def target_type
! @target.moltype
! end
! def query_start
! @query.start
! end
! def query_end
! @query.stop
! end
! def target_start
! @target.start
! end
! def target_end
! @target.stop
! end
! def lap_at
! [ query_start, query_end, target_start, target_end ]
! end
! class Query
! def initialize(data)
! @definition, *data = data.split(/\n/)
! @data = {}
! @sequence = ''
! pat = /;\s+([^:]+):\s+(.*)/
! data.each do |x|
! if pat.match(x)
! @data[$1] = $2
! else
! @sequence += x
! end
! end
! end
! attr_reader :definition, :data, :sequence
- def entry_id
- @definition[/\S+/]
- end
! def length
! @data['sq_len'].to_i
! end
! def moltype
! @data['sq_type']
! end
! def start
! @data['al_start'].to_i
end
! def stop
! @data['al_stop'].to_i
! end
! end
! class Target < Query; end
end
end
end
! end
--- 11,312 ----
module Bio
! class Fasta
! # Summarized results of the fasta execution results.
! class Report
! def initialize(data)
! # header lines - brief list of the hits
! if data.sub!(/.*\nThe best scores are/m, '')
! data.sub!(/(.*)\n\n>>>/m, '')
! @list = "The best scores are" + $1
! else
! data.sub!(/.*\n!!\s+/m, '')
! data.sub!(/.*/) { |x| @list = x; '' }
! end
! # body lines - fasta execution result
! program, *hits = data.split(/\n>>/)
! # trailing lines - log messages of the execution
! @log = hits.pop
! @log.sub!(/.*<\n/m, '')
! @log.strip!
! # parse results
! @program = Program.new(program)
! @hits = []
! hits.each do |x|
! @hits.push(Hit.new(x))
! end
! end
!
! # Returns the 'The best scores are' lines as a String.
! attr_reader :list
! # Returns the trailing lines including library size, execution date,
! # fasta function used, and fasta versions as a String.
! attr_reader :log
! # Returns a Bio::Fasta::Report::Program object.
! attr_reader :program
! # Returns an Array of Bio::Fasta::Report::Hit objects.
! attr_reader :hits
+ # Iterates on each Bio::Fasta::Report::Hit object.
+ def each
+ @hits.each do |x|
+ yield x
+ end
+ end
! # Returns an Array of Bio::Fasta::Report::Hit objects having
! # better evalue than 'evalue_max'.
! def threshold(evalue_max = 0.1)
! list = []
! @hits.each do |x|
! list.push(x) if x.evalue < evalue_max
! end
! return list
! end
! # Returns an Array of Bio::Fasta::Report::Hit objects having
! # longer overlap length than 'length_min'.
! def lap_over(length_min = 0)
! list = []
! @hits.each do |x|
! list.push(x) if x.overlap > length_min
! end
! return list
! end
! # Log of the fasta execution environments.
! class Program
! def initialize(data)
! @definition, *program = data.split(/\n/)
! @program = {}
!
! pat = /;\s+([^:]+):\s+(.*)/
!
! program.each do |x|
! if pat.match(x)
! @program[$1] = $2
end
end
+ end
+
+ # Returns a String containing query and library filenames.
+ attr_reader :definition
+ # Accessor for a Hash containing 'mp_name', 'mp_ver', 'mp_argv',
+ # 'pg_name', 'pg_ver, 'pg_matrix', 'pg_gap-pen', 'pg_ktup',
+ # 'pg_optcut', 'pg_cgap', 'mp_extrap', 'mp_stats', and 'mp_KS' values.
+ attr_reader :program
+ end
! class Hit
! def initialize(data)
! score, query, target = data.split(/\n>/)
! @definition, *score = score.split(/\n/)
! @score = {}
! pat = /;\s+([^:]+):\s+(.*)/
! score.each do |x|
! if pat.match(x)
! @score[$1] = $2
end
! end
! @query = Query.new(query)
! @target = Target.new(target)
! end
! attr_reader :definition, :score, :query, :target
! # E-value score
! def evalue
! if @score['fa_expect']
! @score['fa_expect'].to_f
! elsif @score['sw_expect']
! @score['sw_expect'].to_f
! elsif @score['fx_expect']
! @score['fx_expect'].to_f
! elsif @score['tx_expect']
! @score['tx_expect'].to_f
! end
! end
! # Bit score
! def bit_score
! if @score['fa_bits']
! @score['fa_bits'].to_f
! elsif @score['sw_bits']
! @score['sw_bits'].to_f
! elsif @score['fx_bits']
! @score['fx_bits'].to_f
! elsif @score['tx_bits']
! @score['tx_bits'].to_f
! end
! end
! def direction
! @score['fa_frame'] || @score['sw_frame'] || @score['fx_frame'] || @score['tx_frame']
! end
! # Smith-Waterman score
! def sw
! @score['sw_score'].to_i
! end
! # percent identity
! def identity
! @score['sw_ident'].to_f
! end
! # overlap length
! def overlap
! @score['sw_overlap'].to_i
! end
! # Shortcuts for the methods of Bio::Fasta::Report::Hit::Query
! def query_id
! @query.entry_id
! end
! def target_id
! @target.entry_id
! end
! def query_def
! @query.definition
! end
! def target_def
! @target.definition
! end
! def query_len
! @query.length
! end
! # Shortcuts for the methods of Bio::Fasta::Report::Hit::Target
! def target_len
! @target.length
! end
! def query_seq
! @query.sequence
! end
! def target_seq
! @target.sequence
! end
! def query_type
! @query.moltype
! end
! def target_type
! @target.moltype
! end
! # Information on matching region
! def query_start
! @query.start
! end
+ def query_end
+ @query.stop
+ end
! def target_start
! @target.start
! end
! def target_end
! @target.stop
! end
! def lap_at
! [ query_start, query_end, target_start, target_end ]
! end
! class Query
! def initialize(data)
! @definition, *data = data.split(/\n/)
! @data = {}
! @sequence = ''
! pat = /;\s+([^:]+):\s+(.*)/
! data.each do |x|
! if pat.match(x)
! @data[$1] = $2
! else
! @sequence += x
end
+ end
+ end
! # Returns the definition of the entry as a String.
! # You can access this value by Report::Hit#query_def method.
! attr_reader :definition
! # Returns a Hash containing 'sq_len', 'sq_offset', 'sq_type',
! # 'al_start', 'al_stop', and 'al_display_start' values.
! # You can access most of these values by Report::Hit#query_* methods.
! attr_reader :data
! # Returns the sequence (with gaps) as a String.
! # You can access this value by the Report::Hit#query_seq method.
! attr_reader :sequence
!
! # Returns the first word in the definition as a String.
! # You can get this value by Report::Hit#query_id method.
! def entry_id
! @definition[/\S+/]
! end
!
! # Returns the sequence length.
! # You can access this value by the Report::Hit#query_len method.
! def length
! @data['sq_len'].to_i
! end
!
! # Returns 'p' for protein sequence, 'D' for nucleotide sequence.
! def moltype
! @data['sq_type']
! end
!
! # Returns alignment start position. You can also access this value
! # by Report::Hit#query_start method for shortcut.
! def start
! @data['al_start'].to_i
! end
!
! # Returns alignment end position. You can access this value
! # by Report::Hit#query_end method for shortcut.
! def stop
! @data['al_stop'].to_i
end
end
+ # Same as Bio::Fasta::Report::Hit::Query but for Target.
+ class Target < Query; end
end
!
! end # Report
!
! end # Fasta
! end # Bio
***************
*** 279,428 ****
- =begin
-
- = Bio::Fasta::Report
-
- Summarized results of the fasta execution hits.
-
- --- Bio::Fasta::Report.new(data)
- --- Bio::Fasta::Report#each
-
- Iterates on each Bio::Fasta::Report::Hit object.
-
- --- Bio::Fasta::Report#hits
-
- Returns an Array of Bio::Fasta::Report::Hit objects.
-
- --- Bio::Fasta::Report#threshold(evalue_max = 0.1)
-
- Returns an Array of Bio::Fasta::Report::Hit objects having
- better evalue than 'evalue_max'.
-
- --- Bio::Fasta::Report#lap_over(length_min = 0)
-
- Returns an Array of Bio::Fasta::Report::Hit objects having
- longer overlap length than 'length_min'.
-
- --- Bio::Fasta::Report#program
-
- Returns a Bio::Fasta::Report::Program object.
-
- --- Bio::Fasta::Report#list
-
- Returns the 'The best scores are' lines as a String.
-
- --- Bio::Fasta::Report#log
-
- Returns the trailing lines including library size, execution date,
- fasta function used, and fasta versions as a String.
-
-
- == Bio::Fasta::Report::Program
-
- Log of the fasta execution environments.
-
- --- Bio::Fasta::Report::Program#definition
-
- Returns a String containing query and library filenames.
-
- --- Bio::Fasta::Report::Program#program
-
- Accessor for a Hash containing 'mp_name', 'mp_ver', 'mp_argv',
- 'pg_name', 'pg_ver, 'pg_matrix', 'pg_gap-pen', 'pg_ktup',
- 'pg_optcut', 'pg_cgap', 'mp_extrap', 'mp_stats', and 'mp_KS' values.
-
-
- == Bio::Fasta::Report::Hit
-
- --- Bio::Fasta::Report::Hit#definition
- --- Bio::Fasta::Report::Hit#score
- --- Bio::Fasta::Report::Hit#query
- --- Bio::Fasta::Report::Hit#target
-
- Accessors for the internal structures.
-
- --- Bio::Fasta::Report::Hit#evalue
- --- Bio::Fasta::Report::Hit#bit_score
- --- Bio::Fasta::Report::Hit#sw
- --- Bio::Fasta::Report::Hit#identity
-
- Matching scores.
-
- --- Bio::Fasta::Report::Hit#query_id
- --- Bio::Fasta::Report::Hit#query_def
- --- Bio::Fasta::Report::Hit#query_len
- --- Bio::Fasta::Report::Hit#query_seq
- --- Bio::Fasta::Report::Hit#query_type
- --- Bio::Fasta::Report::Hit#target_id
- --- Bio::Fasta::Report::Hit#target_def
- --- Bio::Fasta::Report::Hit#target_len
- --- Bio::Fasta::Report::Hit#target_seq
- --- Bio::Fasta::Report::Hit#target_type
-
- Matching subjects.
- Shortcuts for the methods of Hit::Query and the Hit::Target.
-
- --- Bio::Fasta::Report::Hit#query_start
- --- Bio::Fasta::Report::Hit#query_end
- --- Bio::Fasta::Report::Hit#target_start
- --- Bio::Fasta::Report::Hit#target_end
- --- Bio::Fasta::Report::Hit#overlap
- --- Bio::Fasta::Report::Hit#lap_at
- --- Bio::Fasta::Report::Hit#direction
-
- Matching regions.
-
-
- == Bio::Fasta::Report::Hit::Query
-
- --- Bio::Fasta::Report::Hit::Query#entry_id
-
- Returns the first word in the definition as a String.
- You can get this value by Report::Hit#query_id method.
-
- --- Bio::Fasta::Report::Hit::Query#definition
-
- Returns the definition of the entry as a String.
- You can access this value by Report::Hit#query_def method.
-
- --- Bio::Fasta::Report::Hit::Query#sequence
-
- Returns the sequence (with gaps) as a String.
- You can access this value by the Report::Hit#query_seq method.
-
- --- Bio::Fasta::Report::Hit::Query#length
-
- Returns the sequence length.
- You can access this value by the Report::Hit#query_len method.
-
- --- Bio::Fasta::Report::Hit::Query#moltype
-
- Returns 'p' for protein sequence, 'D' for nucleotide sequence.
-
- --- Bio::Fasta::Report::Hit::Query#start
- --- Bio::Fasta::Report::Hit::Query#stop
-
- Returns alignment start and stop position.
- You can access these values by Report::Hit#query_start and
- Report::Hit#query_end methods.
-
- --- Bio::Fasta::Report::Hit::Query#data
-
- Returns a Hash containing 'sq_len', 'sq_offset', 'sq_type',
- 'al_start', 'al_stop', and 'al_display_start' values.
- You can access most of these values by Report::Hit#query_* methods.
-
-
- == Bio::Fasta::Report::Hit::Target
-
- --- Bio::Fasta::Report::Hit::Target#entry_id
- --- Bio::Fasta::Report::Hit::Target#definition
- --- Bio::Fasta::Report::Hit::Target#data
- --- Bio::Fasta::Report::Hit::Target#sequence
- --- Bio::Fasta::Report::Hit::Target#length
- --- Bio::Fasta::Report::Hit::Target#start
- --- Bio::Fasta::Report::Hit::Target#stop
-
- Same as Bio::Fasta::Report::Hit::Query but for Target.
-
- =end
--- 324,325 ----
More information about the bioruby-cvs
mailing list