[BioRuby-cvs] bioruby/lib/bio/appl/blast report.rb,1.9,1.10
Katayama Toshiaki
k at dev.open-bio.org
Tue Sep 19 06:09:22 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/appl/blast
In directory dev.open-bio.org:/tmp/cvs-serv32637/lib/bio/appl/blast
Modified Files:
report.rb
Log Message:
* license is changed from LGPL to Ruby's
* document is changed to RDoc
Index: report.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/blast/report.rb,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** report.rb 26 Sep 2005 13:00:04 -0000 1.9
--- report.rb 19 Sep 2006 06:09:20 -0000 1.10
***************
*** 1,22 ****
#
! # bio/appl/blast/report.rb - BLAST Report class
#
! # Copyright (C) 2003 KATAYAMA Toshiaki <k at bioruby.org>
! #
! # This library is free software; you can redistribute it and/or
! # modify it under the terms of the GNU Lesser General Public
! # License as published by the Free Software Foundation; either
! # version 2 of the License, or (at your option) any later version.
! #
! # This library is distributed in the hope that it will be useful,
! # but WITHOUT ANY WARRANTY; without even the implied warranty of
! # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! # Lesser General Public License for more details.
! #
! # You should have received a copy of the GNU Lesser General Public
! # License along with this library; if not, write to the Free Software
! # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! # $Id$
#
--- 1,9 ----
#
! # = bio/appl/blast/report.rb - BLAST Report class
#
! # Copyright:: Copyright (C) 2003 Toshiaki Katayama <k at bioruby.org>
! # License:: Ruby's
#
! # $Id$
#
***************
*** 27,206 ****
module Bio
! class Blast
!
! class Report
! # for Bio::FlatFile support (only for XML data)
! DELIMITER = RS = "</BlastOutput>\n"
! def self.xmlparser(data)
! self.new(data, :xmlparser)
! end
! def self.rexml(data)
! self.new(data, :rexml)
! end
! def self.tab(data)
! self.new(data, :tab)
! end
! def auto_parse(data)
! if /<?xml/.match(data[/.*/])
! if defined?(XMLParser)
! xmlparser_parse(data)
! else
! rexml_parse(data)
! end
! else
! tab_parse(data)
! end
! end
! private :auto_parse
! def initialize(data, parser = nil)
! @iterations = []
! @parameters = {}
! case parser
! when :xmlparser # format 7
! xmlparser_parse(data)
! when :rexml # format 7
! rexml_parse(data)
! when :tab # format 8
! tab_parse(data)
! else
! auto_parse(data)
! end
! end
! attr_reader :iterations, :parameters,
! :program, :version, :reference, :db, :query_id, :query_def, :query_len
! # shortcut for @parameters
! def matrix; @parameters['matrix']; end
! def expect; @parameters['expect'].to_i; end
! def inclusion; @parameters['include'].to_i; end
! def sc_match; @parameters['sc-match'].to_i; end
! def sc_mismatch; @parameters['sc-mismatch'].to_i; end
! def gap_open; @parameters['gap-open'].to_i; end
! def gap_extend; @parameters['gap-extend'].to_i; end
! def filter; @parameters['filter']; end
! def pattern; @parameters['pattern']; end
! def entrez_query; @parameters['entrez-query']; end
! # <for blastpgp>
! def each_iteration
! @iterations.each do |x|
! yield x
! end
end
! # <for blastall> shortcut for the last iteration's hits
! def each_hit
! @iterations.last.each do |x|
! yield x
! end
! end
! alias each each_hit
! # shortcut for the last iteration's hits
! def hits
! @iterations.last.hits
! end
! # shortcut for the last iteration's statistics
! def statistics
! @iterations.last.statistics
! end
! def db_num; statistics['db-num']; end
! def db_len; statistics['db-len']; end
! def hsp_len; statistics['hsp-len']; end
! def eff_space; statistics['eff-space']; end
! def kappa; statistics['kappa']; end
! def lambda; statistics['lambda']; end
! def entropy; statistics['entropy']; end
! # shortcut for the last iteration's message (for checking 'CONVERGED')
! def message
! @iterations.last.message
! end
! # Bio::Blast::Report::Iteration
! class Iteration
! def initialize
! @message = nil
! @statistics = {}
! @num = 1
! @hits = []
! end
! attr_reader :hits, :statistics
! attr_accessor :num, :message
! def each
! @hits.each do |x|
! yield x
! end
! end
! end
! # Bio::Blast::Report::Hit
! class Hit
! def initialize
! @hsps = []
! end
! attr_reader :hsps
! attr_accessor :query_id, :query_def, :query_len,
! :num, :hit_id, :len, :definition, :accession
! def each
! @hsps.each do |x|
! yield x
! end
! end
! # Compatible with Bio::Fasta::Report::Hit
- alias target_id accession
- alias target_def definition
- alias target_len len
! # Shortcut methods for the best Hsp
! def evalue; @hsps.first.evalue; end
! def bit_score; @hsps.first.bit_score; end
! def identity; @hsps.first.identity; end
! def percent_identity; @hsps.first.percent_identity; end
! def overlap; @hsps.first.align_len; end
! def query_seq; @hsps.first.qseq; end
! def target_seq; @hsps.first.hseq; end
! def midline; @hsps.first.midline; end
! def query_start; @hsps.first.query_from; end
! def query_end; @hsps.first.query_to; end
! def target_start; @hsps.first.hit_from; end
! def target_end; @hsps.first.hit_to; end
! def lap_at
! [ query_start, query_end, target_start, target_end ]
! end
end
! # Bio::Blast::Report::Hsp
! class Hsp
! def initialize
! @hsp = {}
! end
! attr_reader :hsp
! attr_accessor :num, :bit_score, :score, :evalue,
! :query_from, :query_to, :hit_from, :hit_to,
! :pattern_from, :pattern_to, :query_frame, :hit_frame,
! :identity, :positive, :gaps, :align_len, :density,
! :qseq, :hseq, :midline,
! :percent_identity, :mismatch_count # only for '-m 8'
end
end
end
! end
--- 14,331 ----
module Bio
! class Blast
! # = Bio::Blast::Report
! #
! # Parsed results of the blast execution for Tab-delimited and XML output
! # format. Tab-delimited reports are consists of
! #
! # Query id,
! # Subject id,
! # percent of identity,
! # alignment length,
! # number of mismatches (not including gaps),
! # number of gap openings,
! # start of alignment in query,
! # end of alignment in query,
! # start of alignment in subject,
! # end of alignment in subject,
! # expected value,
! # bit score.
! #
! # according to the MEGABLAST document (README.mbl). As for XML output,
! # see the following DTDs.
! #
! # * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd
! # * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod
! # * http://www.ncbi.nlm.nih.gov/dtd/NCBI_Entity.mod
! #
! class Report
! # for Bio::FlatFile support (only for XML data)
! DELIMITER = RS = "</BlastOutput>\n"
! # Specify to use XMLParser to parse XML (-m 7) output.
! def self.xmlparser(data)
! self.new(data, :xmlparser)
! end
! # Specify to use REXML to parse XML (-m 7) output.
! def self.rexml(data)
! self.new(data, :rexml)
! end
! # Specify to use tab delimited output parser.
! def self.tab(data)
! self.new(data, :tab)
! end
! def auto_parse(data)
! if /<?xml/.match(data[/.*/])
! if defined?(XMLParser)
! xmlparser_parse(data)
! else
! rexml_parse(data)
end
+ else
+ tab_parse(data)
+ end
+ end
+ private :auto_parse
! # Passing a BLAST output from 'blastall -m 7' or '-m 8' as a String.
! # Formats are auto detected.
! def initialize(data, parser = nil)
! @iterations = []
! @parameters = {}
! case parser
! when :xmlparser # format 7
! xmlparser_parse(data)
! when :rexml # format 7
! rexml_parse(data)
! when :tab # format 8
! tab_parse(data)
! else
! auto_parse(data)
! end
! end
! # Returns an Array of Bio::Blast::Report::Iteration objects.
! attr_reader :iterations
! # Returns a Hash containing execution parameters. Valid keys are:
! # 'matrix', 'expect', 'include', 'sc-match', 'sc-mismatch',
! # 'gap-open', 'gap-extend', 'filter'
! attr_reader :parameters
! # Shortcut for BlastOutput values.
! attr_reader :program, :version, :reference, :db, :query_id, :query_def, :query_len
+ # Matrix used (-M) : shortcuts for @parameters
+ def matrix; @parameters['matrix']; end
+ # Expectation threshold (-e) : shortcuts for @parameters
+ def expect; @parameters['expect'].to_i; end
+ # Inclusion threshold (-h) : shortcuts for @parameters
+ def inclusion; @parameters['include'].to_i; end
+ # Match score for NT (-r) : shortcuts for @parameters
+ def sc_match; @parameters['sc-match'].to_i; end
+ # Mismatch score for NT (-q) : shortcuts for @parameters
+ def sc_mismatch; @parameters['sc-mismatch'].to_i; end
+ # Gap opening cost (-G) : shortcuts for @parameters
+ def gap_open; @parameters['gap-open'].to_i; end
+ # Gap extension cost (-E) : shortcuts for @parameters
+ def gap_extend; @parameters['gap-extend'].to_i; end
+ # Filtering options (-F) : shortcuts for @parameters
+ def filter; @parameters['filter']; end
+ # PHI-BLAST pattern : shortcuts for @parameters
+ def pattern; @parameters['pattern']; end
+ # Limit of request to Entrez : shortcuts for @parameters
+ def entrez_query; @parameters['entrez-query']; end
! # Iterates on each Bio::Blast::Report::Iteration object. (for blastpgp)
! def each_iteration
! @iterations.each do |x|
! yield x
! end
! end
! # Iterates on each Bio::Blast::Report::Hit object of the the last Iteration.
! # Shortcut for the last iteration's hits (for blastall)
! def each_hit
! @iterations.last.each do |x|
! yield x
! end
! end
! alias each each_hit
+ # Returns a Array of Bio::Blast::Report::Hits of the last iteration.
+ # Shortcut for the last iteration's hits
+ def hits
+ @iterations.last.hits
+ end
! # Returns a Hash containing execution statistics of the last iteration.
! # Valid keys are:
! # 'db-num', 'db-len', 'hsp-len', 'eff-space', 'kappa', 'lambda', 'entropy'
! # Shortcut for the last iteration's statistics.
! def statistics
! @iterations.last.statistics
! end
! # Number of sequences in BLAST db
! def db_num; statistics['db-num']; end
! # Length of BLAST db
! def db_len; statistics['db-len']; end
! # Effective HSP length
! def hsp_len; statistics['hsp-len']; end
! # Effective search space
! def eff_space; statistics['eff-space']; end
! # Karlin-Altschul parameter K
! def kappa; statistics['kappa']; end
! # Karlin-Altschul parameter Lamba
! def lambda; statistics['lambda']; end
! # Karlin-Altschul parameter H
! def entropy; statistics['entropy']; end
! # Returns a String (or nil) containing execution message of the last
! # iteration (typically "CONVERGED").
! # Shortcut for the last iteration's message (for checking 'CONVERGED')
! def message
! @iterations.last.message
! end
! # Bio::Blast::Report::Iteration
! class Iteration
! def initialize
! @message = nil
! @statistics = {}
! @num = 1
! @hits = []
! end
! # Returns an Array of Bio::Blast::Report::Hit objects.
! attr_reader :hits
! # Returns a Hash containing execution statistics.
! # Valid keys are:
! # 'db-len', 'db-num', 'eff-space', 'entropy', 'hsp-len', 'kappa', 'lambda'
! attr_reader :statistics
! # Returns the number of iteration counts.
! attr_accessor :num
! # Returns a String (or nil) containing execution message (typically
! # "CONVERGED").
! attr_accessor :message
!
! # Iterates on each Bio::Blast::Report::Hit object.
! def each
! @hits.each do |x|
! yield x
end
+ end
+ end
! # Bio::Blast::Report::Hit
! class Hit
! def initialize
! @hsps = []
! end
!
! # Returns an Array of Bio::Blast::Report::Hsp objects.
! attr_reader :hsps
!
! # Hit number
! attr_accessor :num
! # SeqId of subject
! attr_accessor :hit_id
! # Length of subject
! attr_accessor :len
! # Definition line of subject
! attr_accessor :definition
! # Accession
! attr_accessor :accession
!
! # Iterates on each Hsp object.
! def each
! @hsps.each do |x|
! yield x
end
+ end
+
+ # Compatible method with Bio::Fasta::Report::Hit class.
+ attr_accessor :query_id
+ # Compatible method with Bio::Fasta::Report::Hit class.
+ attr_accessor :query_def
+ # Compatible method with Bio::Fasta::Report::Hit class.
+ attr_accessor :query_len
+ # Compatible method with Bio::Fasta::Report::Hit class.
+ alias target_id accession
+ # Compatible method with Bio::Fasta::Report::Hit class.
+ alias target_def definition
+ # Compatible method with Bio::Fasta::Report::Hit class.
+ alias target_len len
+
+ # Shortcut methods for the best Hsp, some are also compatible with
+ # Bio::Fasta::Report::Hit class.
+ def evalue; @hsps.first.evalue; end
+ def bit_score; @hsps.first.bit_score; end
+ def identity; @hsps.first.identity; end
+ def percent_identity; @hsps.first.percent_identity; end
+ def overlap; @hsps.first.align_len; end
+
+ def query_seq; @hsps.first.qseq; end
+ def target_seq; @hsps.first.hseq; end
+ def midline; @hsps.first.midline; end
+
+ def query_start; @hsps.first.query_from; end
+ def query_end; @hsps.first.query_to; end
+ def target_start; @hsps.first.hit_from; end
+ def target_end; @hsps.first.hit_to; end
+ def lap_at
+ [ query_start, query_end, target_start, target_end ]
end
end
!
!
! # Bio::Blast::Report::Hsp
! class Hsp
! def initialize
! @hsp = {}
! end
! attr_reader :hsp
!
! # HSP number
! attr_accessor :num
! # Score (in bits) of HSP
! attr_accessor :bit_score
! # Sscore of HSP
! attr_accessor :score
! # E-value of HSP
! attr_accessor :evalue
! # Start of HSP in query
! attr_accessor :query_from
! # End of HSP
! attr_accessor :query_to
! # Start of HSP in subject
! attr_accessor :hit_from
! # End of HSP
! attr_accessor :hit_to
! # Start of PHI-BLAST pattern
! attr_accessor :pattern_from
! # End of PHI-BLAST pattern
! attr_accessor :pattern_to
! # Translation frame of query
! attr_accessor :query_frame
! # Translation frame of subject
! attr_accessor :hit_frame
! # Number of identities in HSP
! attr_accessor :identity
! # Number of positives in HSP
! attr_accessor :positive
! # Number of gaps in HSP
! attr_accessor :gaps
! # Length of the alignment used
! attr_accessor :align_len
! # Score density
! attr_accessor :density
! # Alignment string for the query (with gaps)
! attr_accessor :qseq
! # Alignment string for subject (with gaps)
! attr_accessor :hseq
! # Formating middle line
! attr_accessor :midline
! # Available only for '-m 8' format outputs.
! attr_accessor :percent_identity
! # Available only for '-m 8' format outputs.
! attr_accessor :mismatch_count
! end
!
! end # Report
!
! end # Blast
! end # Bio
***************
*** 390,652 ****
- =begin
-
- = Bio::Blast::Report
-
- Parsed results of the blast execution for Tab-delimited and XML output
- format. Tab-delimited reports are consists of
-
- Query id,
- Subject id,
- percent of identity,
- alignment length,
- number of mismatches (not including gaps),
- number of gap openings,
- start of alignment in query,
- end of alignment in query,
- start of alignment in subject,
- end of alignment in subject,
- expected value,
- bit score.
-
- according to the MEGABLAST document (README.mbl). As for XML output,
- see the following DTDs.
-
- * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.dtd
- * http://www.ncbi.nlm.nih.gov/dtd/NCBI_BlastOutput.mod
- * http://www.ncbi.nlm.nih.gov/dtd/NCBI_Entity.mod
-
-
- --- Bio::Blast::Report.new(data)
-
- Passing a BLAST output from 'blastall -m 7' or '-m 8' as a String.
- Formats are auto detected.
-
- --- Bio::Blast::Report.xmlparaser(xml)
-
- Specify to use XMLParser to parse XML (-m 7) output.
-
- --- Bio::Blast::Report.rexml(xml)
-
- Specify to use REXML to parse XML (-m 7) output.
-
- --- Bio::Blast::Report.tab(data)
-
- Specify to use tab delimited output parser.
-
- --- Bio::Blast::Report#program
- --- Bio::Blast::Report#version
- --- Bio::Blast::Report#reference
- --- Bio::Blast::Report#db
- --- Bio::Blast::Report#query_id
- --- Bio::Blast::Report#query_def
- --- Bio::Blast::Report#query_len
-
- Shortcut for BlastOutput values.
-
- --- Bio::Blast::Report#parameters
-
- Returns a Hash containing execution parameters. Valid keys are:
- 'matrix', 'expect', 'include', 'sc-match', 'sc-mismatch',
- 'gap-open', 'gap-extend', 'filter'
-
- --- Bio::Blast::Report#matrix
- * Matrix used (-M)
- --- Bio::Blast::Report#expect
- * Expectation threshold (-e)
- --- Bio::Blast::Report#inclusion
- * Inclusion threshold (-h)
- --- Bio::Blast::Report#sc_match
- * Match score for NT (-r)
- --- Bio::Blast::Report#sc_mismatch
- * Mismatch score for NT (-q)
- --- Bio::Blast::Report#gap_open
- * Gap opening cost (-G)
- --- Bio::Blast::Report#gap_extend
- * Gap extension cost (-E)
- --- Bio::Blast::Report#filter
- * Filtering options (-F)
- --- Bio::Blast::Report#pattern
- * PHI-BLAST pattern
- --- Bio::Blast::Report#entrez_query
- * Limit of request to Entrez
-
- These are shortcuts for parameters.
-
-
- --- Bio::Blast::Report#iterations
-
- Returns an Array of Bio::Blast::Report::Iteration objects.
-
- --- Bio::Blast::Report#each_iteration
-
- Iterates on each Bio::Blast::Report::Iteration object.
-
- --- Bio::Blast::Report#each_hit
- --- Bio::Blast::Report#each
-
- Iterates on each Bio::Blast::Report::Hit object of the the
- last Iteration.
-
- --- Bio::Blast::Report#statistics
-
- Returns a Hash containing execution statistics of the last iteration.
- Valid keys are:
- 'db-num', 'db-len', 'hsp-len', 'eff-space', 'kappa',
- 'lambda', 'entropy'
-
- --- Bio::Blast::Report#db_num
- * Number of sequences in BLAST db
- --- Bio::Blast::Report#db_len
- * Length of BLAST db
- --- Bio::Blast::Report#hsp_len
- * Effective HSP length
- --- Bio::Blast::Report#eff_space
- * Effective search space
- --- Bio::Blast::Report#kappa
- * Karlin-Altschul parameter K
- --- Bio::Blast::Report#lambda
- * Karlin-Altschul parameter Lamba
- --- Bio::Blast::Report#entropy
- * Karlin-Altschul parameter H
-
- These are shortcuts for statistics.
-
-
- --- Bio::Blast::Report#message
-
- Returns a String (or nil) containing execution message of the last
- iteration (typically "CONVERGED").
-
- --- Bio::Blast::Report#hits
-
- Returns a Array of Bio::Blast::Report::Hits of the last iteration.
-
-
- == Bio::Blast::Report::Iteration
-
- --- Bio::Blast::Report::Iteration#num
-
- Returns the number of iteration counts.
-
- --- Bio::Blast::Report::Iteration#hits
-
- Returns an Array of Bio::Blast::Report::Hit objects.
-
- --- Bio::Blast::Report::Iteration#each
-
- Iterates on each Bio::Blast::Report::Hit object.
-
- --- Bio::Blast::Report::Iteration#statistics
-
- Returns a Hash containing execution statistics.
- Valid keys are:
- 'db-len', 'db-num', 'eff-space', 'entropy', 'hsp-len',
- 'kappa', 'lambda'
-
- --- Bio::Blast::Report::Iteration#message
-
- Returns a String (or nil) containing execution message (typically
- "CONVERGED").
-
-
- == Bio::Blast::Report::Hit
-
- --- Bio::Blast::Report::Hit#each
-
- Iterates on each Hsp object.
-
- --- Bio::Blast::Report::Hit#hsps
-
- Returns an Array of Bio::Blast::Report::Hsp objects.
-
- --- Bio::Blast::Report::Hit#num
- * hit number
- --- Bio::Blast::Report::Hit#hit_id
- * SeqId of subject
- --- Bio::Blast::Report::Hit#len
- * length of subject
- --- Bio::Blast::Report::Hit#definition
- * definition line of subject
- --- Bio::Blast::Report::Hit#accession
- * accession
-
- Accessors for the Hit values.
-
- --- Bio::Blast::Report::Hit#query_id
- --- Bio::Blast::Report::Hit#query_def
- --- Bio::Blast::Report::Hit#query_len
- --- Bio::Blast::Report::Hit#target_id
- --- Bio::Blast::Report::Hit#target_def
- --- Bio::Blast::Report::Hit#target_len
-
- Compatible methods with Bio::Fasta::Report::Hit class.
-
- --- Bio::Blast::Report::Hit#evalue
- --- Bio::Blast::Report::Hit#bit_score
- --- Bio::Blast::Report::Hit#identity
- --- Bio::Blast::Report::Hit#overlap
-
- --- Bio::Blast::Report::Hit#query_seq
- --- Bio::Blast::Report::Hit#midline
- --- Bio::Blast::Report::Hit#target_seq
-
- --- Bio::Blast::Report::Hit#query_start
- --- Bio::Blast::Report::Hit#query_end
- --- Bio::Blast::Report::Hit#target_start
- --- Bio::Blast::Report::Hit#target_end
- --- Bio::Blast::Report::Hit#lap_at
-
- Shortcut methods for the best Hsp, some are also compatible with
- Bio::Fasta::Report::Hit class.
-
-
- == Bio::Blast::Report::Hsp
-
- --- Bio::Blast::Report::Hsp#num
- * HSP number
- --- Bio::Blast::Report::Hsp#bit_score
- * score (in bits) of HSP
- --- Bio::Blast::Report::Hsp#score
- * score of HSP
- --- Bio::Blast::Report::Hsp#evalue
- * e-value of HSP
- --- Bio::Blast::Report::Hsp#query_from
- * start of HSP in query
- --- Bio::Blast::Report::Hsp#query_to
- * end of HSP
- --- Bio::Blast::Report::Hsp#hit_from
- * start of HSP in subject
- --- Bio::Blast::Report::Hsp#hit_to
- * end of HSP
- --- Bio::Blast::Report::Hsp#pattern_from
- * start of PHI-BLAST pattern
- --- Bio::Blast::Report::Hsp#pattern_to
- * end of PHI-BLAST pattern
- --- Bio::Blast::Report::Hsp#query_frame
- * translation frame of query
- --- Bio::Blast::Report::Hsp#hit_frame
- * translation frame of subject
- --- Bio::Blast::Report::Hsp#identity
- * number of identities in HSP
- --- Bio::Blast::Report::Hsp#positive
- * number of positives in HSP
- --- Bio::Blast::Report::Hsp#gaps
- * number of gaps in HSP
- --- Bio::Blast::Report::Hsp#align_len
- * length of the alignment used
- --- Bio::Blast::Report::Hsp#density
- * score density
- --- Bio::Blast::Report::Hsp#qseq
- * alignment string for the query (with gaps)
- --- Bio::Blast::Report::Hsp#hseq
- * alignment string for subject (with gaps)
- --- Bio::Blast::Report::Hsp#midline
- * formating middle line
-
- --- Bio::Blast::Report::Hsp#percent_identity
- --- Bio::Blast::Report::Hsp#mismatch_count
-
- Available only for '-m 8' format outputs.
-
- =end
--- 515,516 ----
More information about the bioruby-cvs
mailing list