[BioRuby-cvs] bioruby/lib/bio/appl/psort report.rb,1.7,1.8
Katayama Toshiaki
k at pub.open-bio.org
Wed Sep 7 21:22:12 EDT 2005
Update of /home/repository/bioruby/bioruby/lib/bio/appl/psort
In directory pub.open-bio.org:/tmp/cvs-serv9021/lib/bio/appl/psort
Modified Files:
report.rb
Log Message:
* expanded tab at the line head
Index: report.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/psort/report.rb,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** report.rb 13 Jun 2004 09:26:11 -0000 1.7
--- report.rb 8 Sep 2005 01:22:10 -0000 1.8
***************
*** 32,78 ****
class Report
! def self.parser(str)
! self.default_parser(str)
! end
!
! def self.default_parser(str)
! rpt = self.new
! rpt.raw = str
! query_info = str.scan(/^Query Information\n\n(.+?)\n\n/m)[0][0].split(/\n/)
! result_info = str.scan(/^Result Information\n\n(.+?)\n\n\*/m)[0][0]
! step1 = str.scan(/^\*\*\* Reasoning Step: 1\n\n(.+?)\n\n/m)[0][0]
! step2 = str.scan(/^\*\*\* Reasoning Step: 2\n\n(.+?)\n\n/m)[0][0]
! final_result = str.scan(/\n\n----- Final Results -----\n\n(.+?)\n\n\n/m)[0][0]
! rpt.entry_id = query_info[2].scan(/^>(\S+) */).to_s
! rpt.origin = query_info[0].scan(/ORIGIN (\w+)/).to_s
! rpt.sequence = Bio::Sequence::AA.new(query_info[3..query_info.size].to_s)
! # rpt.reasoning
! rpt.final_result = final_result.split(/\n/).map {|x|
! x = x.strip.split(/---/).map {|y| y.strip }
! { 'prediction' => x[0],
! 'certainty' => x[1].scan(/Certainty= (\d\.\d{3})/).to_s,
! 'comment' => x[1].scan(/\((\w+)\)/).to_s
! }
! }
! return rpt
! end
! # new
! def initialize(entry_id = '', origin = '', title = '', sequence = '',
! result_info = '', reasoning = {}, final_result = [])
! @entry_id = entry_id
! @origin = origin
! @title = title
! @sequence = sequence
! @result_info = result_info
! @reasoning = reasoning
! @final_result = final_result
! @raw = ''
! end
! attr_accessor :entry_id, :origin, :title, :sequence,
! :result_info, :reasoning,:final_result, :raw
--- 32,78 ----
class Report
! def self.parser(str)
! self.default_parser(str)
! end
!
! def self.default_parser(str)
! rpt = self.new
! rpt.raw = str
! query_info = str.scan(/^Query Information\n\n(.+?)\n\n/m)[0][0].split(/\n/)
! result_info = str.scan(/^Result Information\n\n(.+?)\n\n\*/m)[0][0]
! step1 = str.scan(/^\*\*\* Reasoning Step: 1\n\n(.+?)\n\n/m)[0][0]
! step2 = str.scan(/^\*\*\* Reasoning Step: 2\n\n(.+?)\n\n/m)[0][0]
! final_result = str.scan(/\n\n----- Final Results -----\n\n(.+?)\n\n\n/m)[0][0]
! rpt.entry_id = query_info[2].scan(/^>(\S+) */).to_s
! rpt.origin = query_info[0].scan(/ORIGIN (\w+)/).to_s
! rpt.sequence = Bio::Sequence::AA.new(query_info[3..query_info.size].to_s)
! # rpt.reasoning
! rpt.final_result = final_result.split(/\n/).map {|x|
! x = x.strip.split(/---/).map {|y| y.strip }
! { 'prediction' => x[0],
! 'certainty' => x[1].scan(/Certainty= (\d\.\d{3})/).to_s,
! 'comment' => x[1].scan(/\((\w+)\)/).to_s
! }
! }
! return rpt
! end
! # new
! def initialize(entry_id = '', origin = '', title = '', sequence = '',
! result_info = '', reasoning = {}, final_result = [])
! @entry_id = entry_id
! @origin = origin
! @title = title
! @sequence = sequence
! @result_info = result_info
! @reasoning = reasoning
! @final_result = final_result
! @raw = ''
! end
! attr_accessor :entry_id, :origin, :title, :sequence,
! :result_info, :reasoning,:final_result, :raw
***************
*** 86,176 ****
SclNames = {
! 'csk' => 'cytoskeletal',
! 'cyt' => 'cytoplasmic',
! 'nuc' => 'nuclear',
! 'mit' => 'mitochondrial',
! 'ves' => 'vesicles of secretory system',
! 'end' => 'endoplasmic reticulum',
! 'gol' => 'Golgi',
! 'vac' => 'vacuolar',
! 'pla' => 'plasma membrane',
! 'pox' => 'peroxisomal',
! 'exc' => 'extracellular, including cell wall',
! '---' => 'other'
}
Features = [
! 'psg', # PSG: PSG score
! 'gvh', # GvH: GvH score
! 'alm', # ALOM: $xmax
! 'tms', # ALOM: $count
! 'top', # MTOP: Charge difference: $mtopscr
! 'mit', # MITDISC: Score: $score
! 'mip', # Gavel: motif at $isite
! 'nuc', # NUCDISC: NLS Score: $score
! 'erl', # KDEL: ($seg|none)
! 'erm', # ER Membrane Retention Signals: ($cseg|none) $scr
! 'pox', # SKL: ($pat|none) $scr
! 'px2', # PTS2: (found|none) ($#match < 0) ? 0 : ($#match+1);
! 'vac', # VAC: (found|none) ($#match < 0) ? 0 : ($#match+1);
! 'rnp', # RNA-binding motif: (found|none) ($#match < 0) ? 0 : ($#match+1);
! 'act', # Actinin-type actin-binding motif: (found|none) $hit
! 'caa', # Prenylation motif: (2|1|0) CaaX,CXC,CC,nil
! 'yqr', # memYQRL: (found|none) $scr
! 'tyr', # Tyrosines in the tail: (none|\S+[,])
! # 10 * scalar(@ylist) / ($end - $start + 1);
! 'leu', # Dileucine motif in the tail: (none|found) $scr
! 'gpi', # >>> Seem to be GPI anchored
! 'myr', # NMYR: (none|\w) $scr
! 'dna', # checking 63 PROSITE DNA binding motifs: $hit
! 'rib', # checking 71 PROSITE ribosomal protein motifs: $hit
! 'bac', # checking 33 PROSITE prokaryotic DNA binding motifs: $hit
! 'm1a', # $mtype eq '1a'
! 'm1b', # $mtype eq '1b'
! 'm2', # $mtype eq '2 '
! 'mNt', # $mtype eq 'Nt'
! 'm3a', # $mtype eq '3a'
! 'm3b', # $mtype eq '3b'
! 'm_', # $mtype eq '__' tms == 0
! 'ncn', # NNCN: ($NetOutput[1] > $NetOutput[0]) ? $output : (-$output);
! 'lps', # COIL: $count
! 'len' # $leng
]
FeaturesLong = {
! 'psg' => 'PSG',
! 'gvh' => 'GvH',
! 'tms' => 'ALOM',
! 'alm' => 'ALOM',
! 'top' => 'MTOP',
! 'mit' => 'MITDISC',
! 'mip' => 'Gavel',
! 'nuc' => 'NUCDISC',
! 'erl' => 'KDEL',
! 'erm' => 'ER Membrane Retention Signals',
! 'pox' => 'SKL',
! 'px2' => 'PTS2',
! 'vac' => 'VAC',
! 'rnp' => 'RNA-binding motif',
! 'act' => 'Actinin-type actin-binding motif',
! 'caa' => 'Prenylation motif',
! 'yqr' => 'memYQRL',
! 'tyr' => 'Tyrosines in the tail',
! 'leu' => 'Dileucine motif in the tail',
! 'gpi' => '>>> Seems to be GPI anchored',
! 'myr' => 'NMYR',
! 'dna' => 'checking 63 PROSITE DNA binding motifs',
! 'rib' => 'checking 71 PROSITE ribosomal protein motifs',
! 'bac' => 'ochecking 33 PROSITE prokaryotic DNA binding motifs:',
! 'm1a' => '',
! 'm1b' => '',
! 'm2' => '',
! 'mNt' => '',
! 'm3a' => '',
! 'm3b' => '',
! 'm_' => '',
! 'ncn' => 'NNCN',
! 'lps' => 'COIL',
! 'len' => 'AA' # length of input sequence
}
--- 86,176 ----
SclNames = {
! 'csk' => 'cytoskeletal',
! 'cyt' => 'cytoplasmic',
! 'nuc' => 'nuclear',
! 'mit' => 'mitochondrial',
! 'ves' => 'vesicles of secretory system',
! 'end' => 'endoplasmic reticulum',
! 'gol' => 'Golgi',
! 'vac' => 'vacuolar',
! 'pla' => 'plasma membrane',
! 'pox' => 'peroxisomal',
! 'exc' => 'extracellular, including cell wall',
! '---' => 'other'
}
Features = [
! 'psg', # PSG: PSG score
! 'gvh', # GvH: GvH score
! 'alm', # ALOM: $xmax
! 'tms', # ALOM: $count
! 'top', # MTOP: Charge difference: $mtopscr
! 'mit', # MITDISC: Score: $score
! 'mip', # Gavel: motif at $isite
! 'nuc', # NUCDISC: NLS Score: $score
! 'erl', # KDEL: ($seg|none)
! 'erm', # ER Membrane Retention Signals: ($cseg|none) $scr
! 'pox', # SKL: ($pat|none) $scr
! 'px2', # PTS2: (found|none) ($#match < 0) ? 0 : ($#match+1);
! 'vac', # VAC: (found|none) ($#match < 0) ? 0 : ($#match+1);
! 'rnp', # RNA-binding motif: (found|none) ($#match < 0) ? 0 : ($#match+1);
! 'act', # Actinin-type actin-binding motif: (found|none) $hit
! 'caa', # Prenylation motif: (2|1|0) CaaX,CXC,CC,nil
! 'yqr', # memYQRL: (found|none) $scr
! 'tyr', # Tyrosines in the tail: (none|\S+[,])
! # 10 * scalar(@ylist) / ($end - $start + 1);
! 'leu', # Dileucine motif in the tail: (none|found) $scr
! 'gpi', # >>> Seem to be GPI anchored
! 'myr', # NMYR: (none|\w) $scr
! 'dna', # checking 63 PROSITE DNA binding motifs: $hit
! 'rib', # checking 71 PROSITE ribosomal protein motifs: $hit
! 'bac', # checking 33 PROSITE prokaryotic DNA binding motifs: $hit
! 'm1a', # $mtype eq '1a'
! 'm1b', # $mtype eq '1b'
! 'm2', # $mtype eq '2 '
! 'mNt', # $mtype eq 'Nt'
! 'm3a', # $mtype eq '3a'
! 'm3b', # $mtype eq '3b'
! 'm_', # $mtype eq '__' tms == 0
! 'ncn', # NNCN: ($NetOutput[1] > $NetOutput[0]) ? $output : (-$output);
! 'lps', # COIL: $count
! 'len' # $leng
]
FeaturesLong = {
! 'psg' => 'PSG',
! 'gvh' => 'GvH',
! 'tms' => 'ALOM',
! 'alm' => 'ALOM',
! 'top' => 'MTOP',
! 'mit' => 'MITDISC',
! 'mip' => 'Gavel',
! 'nuc' => 'NUCDISC',
! 'erl' => 'KDEL',
! 'erm' => 'ER Membrane Retention Signals',
! 'pox' => 'SKL',
! 'px2' => 'PTS2',
! 'vac' => 'VAC',
! 'rnp' => 'RNA-binding motif',
! 'act' => 'Actinin-type actin-binding motif',
! 'caa' => 'Prenylation motif',
! 'yqr' => 'memYQRL',
! 'tyr' => 'Tyrosines in the tail',
! 'leu' => 'Dileucine motif in the tail',
! 'gpi' => '>>> Seems to be GPI anchored',
! 'myr' => 'NMYR',
! 'dna' => 'checking 63 PROSITE DNA binding motifs',
! 'rib' => 'checking 71 PROSITE ribosomal protein motifs',
! 'bac' => 'ochecking 33 PROSITE prokaryotic DNA binding motifs:',
! 'm1a' => '',
! 'm1b' => '',
! 'm2' => '',
! 'mNt' => '',
! 'm3a' => '',
! 'm3b' => '',
! 'm_' => '',
! 'ncn' => 'NNCN',
! 'lps' => 'COIL',
! 'len' => 'AA' # length of input sequence
}
***************
*** 178,364 ****
class Report
! BOUNDARY = '-' * 75
! RS = DELIMITER = "\)\n\n#{BOUNDARY}"
! def initialize(raw = '', entry_id = nil, scl = nil, definition = nil,
! seq = nil, k = nil, features = {}, prob = {}, pred = nil)
! @entry_id = entry_id
! @scl = scl
! @definition = definition
! @seq = seq
! @features = features
! @prob = prob
! @pred = pred
! @k = k
! @raw = raw
! end
! attr_accessor :entry_id, :scl, :definition, :seq,
! :k, :features, :prob, :pred, :raw
!
! # report format to be auto detection
! def self.parser(str, entry_id)
! case str
! when /^ psg:/ # default report
! self.default_parser(str, entry_id)
! when /^PSG:/ # -v report
! self.v_parser(str, entry_id)
! when /: too short length /
! self.too_short_parser(str, entry_id)
! when /PSORT II server/
! tmp = self.new(ent, entry_id)
! else
! raise ArgumentError, "invalid format\n[#{str}]"
! end
! end
! # $id: too short length ($leng), skipped\n";
! def self.too_short_parser(ent, entry_id = nil)
! report = self.new(ent)
! report.entry_id = entry_id
! if ent =~ /^(.+)?: too short length/
! report.entry_id = $1 unless report.entry_id
! report.scl = '---'
! end
! report
! end
! # default report
! # ``psort test.faa'' output
! def self.default_parser(ent, entry_id = nil)
! report = self.new(ent, entry_id)
! ent = ent.split(/\n\n/).map {|e| e.chomp }
! report.set_header_line(ent[0])
! # feature matrix
! ent[1].gsub(/\n/,' ').strip.split(/ /).map {|fe|
! pair = fe.split(/: /)
! report.features[pair[0].strip] = pair[1].strip.to_f
! }
! report.prob = self.set_kNN_prob(ent[2])
! report.set_prediction(ent[3])
! return report
! end
! def set_header_line(str)
! str.sub!(/^-+\n/,'')
! tmp = str.split(/\t| /)
! @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id
! case tmp.join(' ').chomp
! when /\(\d+ aa\) (.+)$/
! @definition = $1
! else
! @definition = tmp.join(' ').chomp
! end
! scl = @definition.split(' ')[0]
! @scl = scl if SclNames.keys.index(scl)
! end
! def self.set_kNN_prob(str)
! prob = Hash.new
! Bio::PSORT::PSORT2::SclNames.keys.each {|a|
! prob.update( {a => 0.0} )
! }
! str.gsub(/\t/,'').split(/\n/).each {|a|
! val,scl = a.strip.split(/ %: /)
! key = Bio::PSORT::PSORT2::SclNames.index(scl)
! prob[key] = val.to_f
! }
! return prob
! end
! def set_prediction(str)
! case str
! when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/
! @entry_id ||= $1 unless @entry_id
! @pred = $2
! @k = $3
! else
! raise ArgumentError,
! "Invalid format at(#{self.entry_id}):\n[#{str}]\n"
! end
! end
! # ``psort -v report'' and WWW server output
! def self.v_parser(ent, entry_id = nil)
! report = Bio::PSORT::PSORT2::Report.new(ent, entry_id)
! ent = ent.split(/\n\n/).map {|e| e.chomp }
! ent.each_with_index {|e, i|
! unless /^(\w|-|\>|\t)/ =~ e
! j = self.__send__(:search_j, i, ent)
! ent[i - j] += e
! ent[i] = nil
! end
! if /^none/ =~ e # for psort output bug
! j = self.__send__(:search_j, i, ent)
! ent[i - j] += e
! ent[i] = nil
! end
! }
! ent.compact!
! # ent.each_with_index {|e,i| p [i.to_s.ljust(2), e] }
! if /^ PSORT II server/ =~ ent[0] # for WWW version
! ent.shift
! delline = ''
! ent.each {|e| delline = e if /^Results of Subprograms/ =~ e }
! i = ent.index(delline)
! ent.delete(delline)
! ent.delete_at(i - 1)
! end
! report.set_header_line(ent.shift)
! report.seq = Bio::Sequence::AA.new(ent.shift)
! fent, pent = self.divent(ent)
! report.set_features(fent)
! report.prob = self.set_kNN_prob(pent[0].strip)
! report.set_prediction(pent[1].strip)
! return report
! end
! def self.search_j(i, ent)
! j = 1
! 1.upto(ent.size) {|x|
! if ent[i - x]
! j = x
! break
! end
! }
! return j
! end
! private_class_method :search_j
! # divide entry body
! def self.divent(ent)
! boundary = ent.index(BOUNDARY)
! return ent[0..(boundary - 1)], ent[(boundary + 2)..ent.length]
! end
! def set_features(fary)
! fary.each {|fent|
! key = fent.split(/\:( |\n)/)[0].strip
! self.features[key] = fent # unless /^\>/ =~ key
! }
! self.features['AA'] = self.seq.length
! end
!
end # class Report
--- 178,364 ----
class Report
! BOUNDARY = '-' * 75
! RS = DELIMITER = "\)\n\n#{BOUNDARY}"
! def initialize(raw = '', entry_id = nil, scl = nil, definition = nil,
! seq = nil, k = nil, features = {}, prob = {}, pred = nil)
! @entry_id = entry_id
! @scl = scl
! @definition = definition
! @seq = seq
! @features = features
! @prob = prob
! @pred = pred
! @k = k
! @raw = raw
! end
! attr_accessor :entry_id, :scl, :definition, :seq,
! :k, :features, :prob, :pred, :raw
!
! # report format to be auto detection
! def self.parser(str, entry_id)
! case str
! when /^ psg:/ # default report
! self.default_parser(str, entry_id)
! when /^PSG:/ # -v report
! self.v_parser(str, entry_id)
! when /: too short length /
! self.too_short_parser(str, entry_id)
! when /PSORT II server/
! tmp = self.new(ent, entry_id)
! else
! raise ArgumentError, "invalid format\n[#{str}]"
! end
! end
! # $id: too short length ($leng), skipped\n";
! def self.too_short_parser(ent, entry_id = nil)
! report = self.new(ent)
! report.entry_id = entry_id
! if ent =~ /^(.+)?: too short length/
! report.entry_id = $1 unless report.entry_id
! report.scl = '---'
! end
! report
! end
! # default report
! # ``psort test.faa'' output
! def self.default_parser(ent, entry_id = nil)
! report = self.new(ent, entry_id)
! ent = ent.split(/\n\n/).map {|e| e.chomp }
! report.set_header_line(ent[0])
! # feature matrix
! ent[1].gsub(/\n/,' ').strip.split(/ /).map {|fe|
! pair = fe.split(/: /)
! report.features[pair[0].strip] = pair[1].strip.to_f
! }
! report.prob = self.set_kNN_prob(ent[2])
! report.set_prediction(ent[3])
! return report
! end
! def set_header_line(str)
! str.sub!(/^-+\n/,'')
! tmp = str.split(/\t| /)
! @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id
! case tmp.join(' ').chomp
! when /\(\d+ aa\) (.+)$/
! @definition = $1
! else
! @definition = tmp.join(' ').chomp
! end
! scl = @definition.split(' ')[0]
! @scl = scl if SclNames.keys.index(scl)
! end
! def self.set_kNN_prob(str)
! prob = Hash.new
! Bio::PSORT::PSORT2::SclNames.keys.each {|a|
! prob.update( {a => 0.0} )
! }
! str.gsub(/\t/,'').split(/\n/).each {|a|
! val,scl = a.strip.split(/ %: /)
! key = Bio::PSORT::PSORT2::SclNames.index(scl)
! prob[key] = val.to_f
! }
! return prob
! end
! def set_prediction(str)
! case str
! when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/
! @entry_id ||= $1 unless @entry_id
! @pred = $2
! @k = $3
! else
! raise ArgumentError,
! "Invalid format at(#{self.entry_id}):\n[#{str}]\n"
! end
! end
! # ``psort -v report'' and WWW server output
! def self.v_parser(ent, entry_id = nil)
! report = Bio::PSORT::PSORT2::Report.new(ent, entry_id)
! ent = ent.split(/\n\n/).map {|e| e.chomp }
! ent.each_with_index {|e, i|
! unless /^(\w|-|\>|\t)/ =~ e
! j = self.__send__(:search_j, i, ent)
! ent[i - j] += e
! ent[i] = nil
! end
! if /^none/ =~ e # for psort output bug
! j = self.__send__(:search_j, i, ent)
! ent[i - j] += e
! ent[i] = nil
! end
! }
! ent.compact!
! # ent.each_with_index {|e,i| p [i.to_s.ljust(2), e] }
! if /^ PSORT II server/ =~ ent[0] # for WWW version
! ent.shift
! delline = ''
! ent.each {|e| delline = e if /^Results of Subprograms/ =~ e }
! i = ent.index(delline)
! ent.delete(delline)
! ent.delete_at(i - 1)
! end
! report.set_header_line(ent.shift)
! report.seq = Bio::Sequence::AA.new(ent.shift)
! fent, pent = self.divent(ent)
! report.set_features(fent)
! report.prob = self.set_kNN_prob(pent[0].strip)
! report.set_prediction(pent[1].strip)
! return report
! end
! def self.search_j(i, ent)
! j = 1
! 1.upto(ent.size) {|x|
! if ent[i - x]
! j = x
! break
! end
! }
! return j
! end
! private_class_method :search_j
! # divide entry body
! def self.divent(ent)
! boundary = ent.index(BOUNDARY)
! return ent[0..(boundary - 1)], ent[(boundary + 2)..ent.length]
! end
! def set_features(fary)
! fary.each {|fent|
! key = fent.split(/\:( |\n)/)[0].strip
! self.features[key] = fent # unless /^\>/ =~ key
! }
! self.features['AA'] = self.seq.length
! end
!
end # class Report
More information about the bioruby-cvs
mailing list