From k at pub.open-bio.org Wed Oct 5 03:09:04 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Wed Oct 5 03:59:30 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/shell/plugin midi.rb,NONE,1.1 Message-ID: <200510050709.j95794dZ032759@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/shell/plugin In directory pub.open-bio.org:/tmp/cvs-serv32752 Added Files: midi.rb Log Message: * sequence to midi converter contributed by Natsuhiro Ichinose http://www.genome.ist.i.kyoto-u.ac.jp/~ichinose/bio/biomidi/ --- NEW FILE: midi.rb --- # # bio/shell/plugin/midi.rb - Sequence to MIDI converter # # Copyright (C) 2004 Natsuhiro Ichinose # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: midi.rb,v 1.1 2005/10/05 07:09:02 k Exp $ # require 'bio/sequence' module Bio class Sequence class NA class MidiTrack def initialize(channel=0,program=0,base=60,range=2,scale=[0,2,4,5,7,9,11]) @base=base @code=[] @tune=0 @time=0 @tunes=[] @channel=channel&0xff range.times do |i| scale.each do |c| @tunes.push c+i*12 end end @ttype={'aa'=>1,'at'=>0,'ac'=>3,'ag'=>-1, 'ta'=>0,'tt'=>-1,'tc'=>1,'tg'=>-2, 'ca'=>2,'ct'=>1,'cc'=>2,'cg'=>6, 'ga'=>-1,'gt'=>-3,'gc'=>0,'gg'=>-2} @dtype=[{'aa'=>2,'at'=>4,'ac'=>4,'ag'=>2, 'ta'=>2,'tt'=>4,'tc'=>4,'tg'=>2, 'ca'=>2,'ct'=>3,'cc'=>1,'cg'=>2, 'ga'=>1,'gt'=>2,'gc'=>2,'gg'=>3}, {'aa'=>3,'at'=>3,'ac'=>2,'ag'=>3, 'ta'=>3,'tt'=>3,'tc'=>2,'tg'=>2, 'ca'=>3,'ct'=>2,'cc'=>1,'cg'=>1, 'ga'=>1,'gt'=>1,'gc'=>1,'gg'=>1}, {'aa'=>2,'at'=>2,'ac'=>2,'ag'=>2, 'ta'=>1,'tt'=>1,'tc'=>2,'tg'=>2, 'ca'=>2,'ct'=>2,'cc'=>2,'cg'=>3, 'ga'=>2,'gt'=>2,'gc'=>3,'gg'=>1}, {'aa'=>1,'at'=>1,'ac'=>1,'ag'=>1, 'ta'=>1,'tt'=>1,'tc'=>1,'tg'=>1, 'ca'=>1,'ct'=>1,'cc'=>1,'cg'=>3, 'ga'=>1,'gt'=>1,'gc'=>1,'gg'=>1} ] @code.concat [0x00,0xc0|(@channel&0xff)] @code.concat icode(program&0xff,1) end def icode(num,n) code=[] n.times do |i| code.push num&0xff num>>=8 end code.reverse end def rcode(num) code=[] code.push num&0x7f while num>0x7f num>>=7 code.push num&0x7f|0x80 end code.reverse end def c2s(code) ans="" code.each do |c| ans+=c.chr end ans end def push(s) tt=@time%4 t=@ttype[s[0,2]] d=@dtype[tt][s[2,2]] if !t.nil? && !d.nil? @tune+=t @tune%=@tunes.length if tt==0 vel=90 elsif tt==1 && d>1 vel=100 elsif tt==2 vel=60 else vel=50 end @code.concat rcode(1) @code.concat [0x90|@channel,@tunes[@tune]+@base,vel] @code.concat rcode(240*d) @code.concat [0x80|@channel,@tunes[@tune]+@base,0] @time+=d end end def push_silent(d) @code.concat rcode(1) @code.concat [0x90|@channel,0,0] @code.concat rcode(240*d) @code.concat [0x80|@channel,0,0] @time+=d; end def get_time @time end def encode ans="MTrk" ans+=c2s(icode(@code.length+4,4)) ans+=c2s(@code) ans+=c2s([0x00,0xff,0x2f,0x00]) ans end def header(num,tempo=120) ans="MThd" ans+=c2s(icode(6,4)) ans+=c2s(icode(1,2)) ans+=c2s(icode(num+1,2)) ans+=c2s(icode(480,2)) ans+="MTrk" ans+=c2s(icode(11,4)) ans+=c2s([0x00,0xff,0x51,0x03]) ans+=c2s(icode(60000000/tempo,3)) ans+=c2s([0x00,0xff,0x2f,0x00]) ans end end def to_midi(tempo=120,track_info=[[9,60,2],[13,48,2],[41,48,2],[44,36,2]], drum=false,scale=[0,2,4,5,7,9,11]) track=[] track_info.each_with_index do |i,j| k=j k+=1 if(j>=9) track.push MidiTrack.new(k,i[0],i[1],i[2],scale) end track.push(MidiTrack.new(9,0,35,2,[0,1,2,3,4,5,6,7,8,9,10,11])) if drum cur=0 window_search(4) do |s| track[cur%track.length].push(s) cur+=1 end track.each do |t| t.push_silent(12) end ans=track[0].header(track.length,tempo) track.each do |t| ans+=t.encode end ans end end end end if $0==__FILE__ require 'bio/io/flatfile' ff=Bio::FlatFile.open(nil,ARGV[0]) ff.each do |f| puts f.naseq[1..1000].to_midi end end From k at pub.open-bio.org Wed Oct 5 04:58:35 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Wed Oct 5 05:54:35 2005 Subject: [BioRuby-cvs] bioruby/lib/bio shell.rb,1.2,1.3 Message-ID: <200510050858.j958wZdZ000943@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory pub.open-bio.org:/tmp/cvs-serv937 Modified Files: shell.rb Log Message: * midi plugin is refined and officially added Index: shell.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/shell.rb,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** shell.rb 24 Sep 2005 12:33:07 -0000 1.2 --- shell.rb 5 Oct 2005 08:58:33 -0000 1.3 *************** *** 35,38 **** --- 35,39 ---- require 'bio/shell/plugin/flatfile' require 'bio/shell/plugin/obda' + require 'bio/shell/plugin/midi' extend Core From k at pub.open-bio.org Wed Oct 5 04:58:35 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Wed Oct 5 05:55:33 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/shell/plugin midi.rb,1.1,1.2 Message-ID: <200510050858.j958wZdZ000947@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/shell/plugin In directory pub.open-bio.org:/tmp/cvs-serv937/shell/plugin Modified Files: midi.rb Log Message: * midi plugin is refined and officially added Index: midi.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/plugin/midi.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** midi.rb 5 Oct 2005 07:09:02 -0000 1.1 --- midi.rb 5 Oct 2005 08:58:33 -0000 1.2 *************** *** 2,6 **** # bio/shell/plugin/midi.rb - Sequence to MIDI converter # ! # Copyright (C) 2004 Natsuhiro Ichinose # # This library is free software; you can redistribute it and/or --- 2,6 ---- # bio/shell/plugin/midi.rb - Sequence to MIDI converter # ! # Copyright (C) 2003 Natsuhiro Ichinose # # This library is free software; you can redistribute it and/or *************** *** 21,178 **** # ! require 'bio/sequence' ! module Bio ! class Sequence ! class NA ! class MidiTrack ! def initialize(channel=0,program=0,base=60,range=2,scale=[0,2,4,5,7,9,11]) ! @base=base ! @code=[] ! @tune=0 ! @time=0 ! @tunes=[] ! @channel=channel&0xff ! range.times do |i| ! scale.each do |c| ! @tunes.push c+i*12 ! end ! end ! @ttype={'aa'=>1,'at'=>0,'ac'=>3,'ag'=>-1, ! 'ta'=>0,'tt'=>-1,'tc'=>1,'tg'=>-2, ! 'ca'=>2,'ct'=>1,'cc'=>2,'cg'=>6, ! 'ga'=>-1,'gt'=>-3,'gc'=>0,'gg'=>-2} ! @dtype=[{'aa'=>2,'at'=>4,'ac'=>4,'ag'=>2, ! 'ta'=>2,'tt'=>4,'tc'=>4,'tg'=>2, ! 'ca'=>2,'ct'=>3,'cc'=>1,'cg'=>2, ! 'ga'=>1,'gt'=>2,'gc'=>2,'gg'=>3}, ! {'aa'=>3,'at'=>3,'ac'=>2,'ag'=>3, ! 'ta'=>3,'tt'=>3,'tc'=>2,'tg'=>2, ! 'ca'=>3,'ct'=>2,'cc'=>1,'cg'=>1, ! 'ga'=>1,'gt'=>1,'gc'=>1,'gg'=>1}, ! {'aa'=>2,'at'=>2,'ac'=>2,'ag'=>2, ! 'ta'=>1,'tt'=>1,'tc'=>2,'tg'=>2, ! 'ca'=>2,'ct'=>2,'cc'=>2,'cg'=>3, ! 'ga'=>2,'gt'=>2,'gc'=>3,'gg'=>1}, ! {'aa'=>1,'at'=>1,'ac'=>1,'ag'=>1, ! 'ta'=>1,'tt'=>1,'tc'=>1,'tg'=>1, ! 'ca'=>1,'ct'=>1,'cc'=>1,'cg'=>3, ! 'ga'=>1,'gt'=>1,'gc'=>1,'gg'=>1} ! ] ! @code.concat [0x00,0xc0|(@channel&0xff)] ! @code.concat icode(program&0xff,1) ! end ! def icode(num,n) ! code=[] ! n.times do |i| ! code.push num&0xff ! num>>=8 ! end ! code.reverse ! end ! def rcode(num) ! code=[] ! code.push num&0x7f ! while num>0x7f ! num>>=7 ! code.push num&0x7f|0x80 ! end ! code.reverse ! end ! def c2s(code) ! ans="" ! code.each do |c| ! ans+=c.chr ! end ! ans ! end ! def push(s) ! tt=@time%4 ! t=@ttype[s[0,2]] ! d=@dtype[tt][s[2,2]] ! if !t.nil? && !d.nil? ! @tune+=t ! @tune%=@tunes.length ! if tt==0 ! vel=90 ! elsif tt==1 && d>1 ! vel=100 ! elsif tt==2 ! vel=60 ! else ! vel=50 ! end ! @code.concat rcode(1) ! @code.concat [0x90|@channel,@tunes[@tune]+@base,vel] ! @code.concat rcode(240*d) ! @code.concat [0x80|@channel,@tunes[@tune]+@base,0] ! @time+=d ! end ! end ! def push_silent(d) ! @code.concat rcode(1) ! @code.concat [0x90|@channel,0,0] ! @code.concat rcode(240*d) ! @code.concat [0x80|@channel,0,0] ! @time+=d; ! end ! def get_time ! @time ! end ! def encode ! ans="MTrk" ! ans+=c2s(icode(@code.length+4,4)) ! ans+=c2s(@code) ! ans+=c2s([0x00,0xff,0x2f,0x00]) ! ans ! end ! def header(num,tempo=120) ! ans="MThd" ! ans+=c2s(icode(6,4)) ! ans+=c2s(icode(1,2)) ! ans+=c2s(icode(num+1,2)) ! ans+=c2s(icode(480,2)) ! ans+="MTrk" ! ans+=c2s(icode(11,4)) ! ans+=c2s([0x00,0xff,0x51,0x03]) ! ans+=c2s(icode(60000000/tempo,3)) ! ans+=c2s([0x00,0xff,0x2f,0x00]) ! ans end end ! def to_midi(tempo=120,track_info=[[9,60,2],[13,48,2],[41,48,2],[44,36,2]], ! drum=false,scale=[0,2,4,5,7,9,11]) ! track=[] ! track_info.each_with_index do |i,j| ! k=j ! k+=1 if(j>=9) ! track.push MidiTrack.new(k,i[0],i[1],i[2],scale) ! end ! track.push(MidiTrack.new(9,0,35,2,[0,1,2,3,4,5,6,7,8,9,10,11])) if drum ! cur=0 ! window_search(4) do |s| ! track[cur%track.length].push(s) ! cur+=1 ! end ! track.each do |t| ! t.push_silent(12) ! end ! ans=track[0].header(track.length,tempo) ! track.each do |t| ! ans+=t.encode end ! ans end end end end ! if $0==__FILE__ ! require 'bio/io/flatfile' ! ff=Bio::FlatFile.open(nil,ARGV[0]) ff.each do |f| ! puts f.naseq[1..1000].to_midi end end --- 21,236 ---- # ! class Bio::Sequence::NA ! class MidiTrack ! def initialize(channel = 0, program = nil, base = nil, range = nil, scale = nil) ! @channel = channel & 0xff ! @program = program || 0 ! @base = base || 60 ! @range = range || 2 ! @scale = scale || [0, 2, 4, 5, 7, 9, 11] ! ! @tunes = [] ! @tune = 0 ! @code = [] ! @time = 0 ! ! @range.times do |i| ! @scale.each do |c| ! @tunes.push c + i * 12 end end ! @ttype = { ! 'aa' => 1, 'at' => 0, 'ac' => 3, 'ag' => -1, ! 'ta' => 0, 'tt' => -1, 'tc' => 1, 'tg' => -2, ! 'ca' => 2, 'ct' => 1, 'cc' => 2, 'cg' => 6, ! 'ga' => -1, 'gt' => -3, 'gc' => 0, 'gg' => -2, ! } ! @dtype = [ ! { 'aa' => 2, 'at' => 4, 'ac' => 4, 'ag' => 2, ! 'ta' => 2, 'tt' => 4, 'tc' => 4, 'tg' => 2, ! 'ca' => 2, 'ct' => 3, 'cc' => 1, 'cg' => 2, ! 'ga' => 1, 'gt' => 2, 'gc' => 2, 'gg' => 3, ! }, ! { 'aa' => 3, 'at' => 3, 'ac' => 2, 'ag' => 3, ! 'ta' => 3, 'tt' => 3, 'tc' => 2, 'tg' => 2, ! 'ca' => 3, 'ct' => 2, 'cc' => 1, 'cg' => 1, ! 'ga' => 1, 'gt' => 1, 'gc' => 1, 'gg' => 1, ! }, ! { 'aa' => 2, 'at' => 2, 'ac' => 2, 'ag' => 2, ! 'ta' => 1, 'tt' => 1, 'tc' => 2, 'tg' => 2, ! 'ca' => 2, 'ct' => 2, 'cc' => 2, 'cg' => 3, ! 'ga' => 2, 'gt' => 2, 'gc' => 3, 'gg' => 1, ! }, ! { 'aa' => 1, 'at' => 1, 'ac' => 1, 'ag' => 1, ! 'ta' => 1, 'tt' => 1, 'tc' => 1, 'tg' => 1, ! 'ca' => 1, 'ct' => 1, 'cc' => 1, 'cg' => 3, ! 'ga' => 1, 'gt' => 1, 'gc' => 1, 'gg' => 1, ! }, ! ] ! ! @code.concat [0x00, 0xc0 | (@channel & 0xff)] ! @code.concat icode(@program & 0xff, 1) ! end ! ! def icode(num, n) ! code = [] ! n.times do |i| ! code.push num & 0xff ! num >>= 8 ! end ! code.reverse ! end ! ! def rcode(num) ! code = [] ! code.push num & 0x7f ! while num > 0x7f ! num >>= 7 ! code.push num & 0x7f | 0x80 ! end ! code.reverse ! end ! ! def c2s(code) ! ans = "" ! code.each do |c| ! ans += c.chr ! end ! ans ! end ! ! def push(s) ! tt = @time % 4 ! t = @ttype[s[0, 2]] ! d = @dtype[tt][s[2, 2]] ! if !t.nil? && !d.nil? ! @tune += t ! @tune %= @tunes.length ! if tt == 0 ! vel = 90 ! elsif tt == 1 && d > 1 ! vel = 100 ! elsif tt == 2 ! vel = 60 ! else ! vel = 50 end ! @code.concat rcode(1) ! @code.concat [0x90 | @channel, @tunes[@tune] + @base, vel] ! @code.concat rcode(240 * d) ! @code.concat [0x80 | @channel, @tunes[@tune] + @base, 0] ! @time += d ! end ! end ! ! def push_silent(d) ! @code.concat rcode(1) ! @code.concat [0x90 | @channel, 0, 0] ! @code.concat rcode(240 * d) ! @code.concat [0x80 | @channel, 0, 0] ! @time += d; ! end ! ! def get_time ! @time ! end ! ! def encode ! ans ="MTrk" ! ans += c2s(icode(@code.length + 4, 4)) ! ans += c2s(@code) ! ans += c2s([0x00, 0xff, 0x2f, 0x00]) ! ans ! end ! ! def header(num, tempo = 120) ! ans = "MThd" ! ans += c2s(icode(6, 4)) ! ans += c2s(icode(1, 2)) ! ans += c2s(icode(num + 1, 2)) ! ans += c2s(icode(480, 2)) ! ans += "MTrk" ! ans += c2s(icode(11, 4)) ! ans += c2s([0x00, 0xff, 0x51, 0x03]) ! ans += c2s(icode(60000000 / tempo, 3)) ! ans += c2s([0x00, 0xff, 0x2f, 0x00]) ! ans ! end ! ! end # MidiTrack ! ! ! ! # drum: ! # true (with rhythm part), false (without rhythm part) ! # scale: ! # C C# D D# E F F# G G# A A# B ! # 0 1 2 3 4 5 6 7 8 9 10 11 ! def to_midi(tempo = 120, drum = true, scale = nil, track_info = nil) ! scale ||= [0, 2, 4, 5, 7, 9, 11] ! track_info ||= [[9, 60, 2], [13, 48, 2], [41, 48, 2], [44, 36, 2]] ! ! track = [] ! ! track_info.each_with_index do |i, j| ! k = j ! k += 1 if j >= 9 ! track.push MidiTrack.new(k, i[0], i[1], i[2], scale) ! end ! ! if drum ! rhythm = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] ! track.push(MidiTrack.new(9, 0, 35, 2, rhythm)) ! end ! ! cur = 0 ! window_search(4) do |s| ! track[cur % track.length].push(s) ! cur += 1 ! end ! ! track.each do |t| ! t.push_silent(12) ! end ! ! ans = track[0].header(track.length, tempo) ! track.each do |t| ! ans += t.encode ! end ! return ans ! end ! ! end ! ! ! module Bio::Shell ! ! def midi(seq, filename, *args) ! begin ! print "Saving MIDI file (#{filename}) ... " ! File.open(filename, "w") do |file| ! file.puts seq.to_midi(*args) end + puts "done" + rescue + raise "Failed to save (#{filename}) : #{$!}" end end + end ! ! if $0 == __FILE__ ! include Bio::Shell ! ! seq_file = ARGV.shift ! mid_file = ARGV.shift ! ! ff = Bio::FlatFile.auto(seq_file) ff.each do |f| ! midi(f.naseq[1..1000], save_file) end end From k at pub.open-bio.org Wed Oct 5 04:56:16 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Wed Oct 5 05:59:13 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/shell/plugin flatfile.rb,1.2,1.3 Message-ID: <200510050856.j958uGdZ000914@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/shell/plugin In directory pub.open-bio.org:/tmp/cvs-serv910/shell/plugin Modified Files: flatfile.rb Log Message: * added flatauto method to open and retrieve one entry from a flatfile, or iterates on all entries of a flatfile. Index: flatfile.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/plugin/flatfile.rb,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** flatfile.rb 25 Sep 2005 05:25:14 -0000 1.2 --- flatfile.rb 5 Oct 2005 08:56:14 -0000 1.3 *************** *** 25,28 **** --- 25,43 ---- module Bio::Shell + def flatauto(filename) + if block_given? + Bio::FlatFile.auto(filename) do |flat| + flat.each do |entry| + yield entry + end + end + else + flat = Bio::FlatFile.auto(filename) + entry = flat.next_entry + flat.close + return entry + end + end + def convert_to_fasta(fastafile, *flatfiles) puts "Saving fasta file (#{fastafile}) ... " From k at pub.open-bio.org Sun Oct 23 03:00:18 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 03:48:50 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db genbank.rb,0.33,0.34 Message-ID: <200510230700.j9N70IdZ008553@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory pub.open-bio.org:/tmp/cvs-serv8539 Modified Files: genbank.rb Log Message: * changed to use autoload instead of require Index: genbank.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/genbank.rb,v retrieving revision 0.33 retrieving revision 0.34 diff -C2 -d -r0.33 -r0.34 *** genbank.rb 23 Aug 2004 23:40:35 -0000 0.33 --- genbank.rb 23 Oct 2005 07:00:15 -0000 0.34 *************** *** 21,26 **** # ! require 'bio/db/genbank/genbank' ! require 'bio/db/genbank/genpept' ! require 'bio/db/genbank/refseq' ! require 'bio/db/genbank/ddbj' --- 21,32 ---- # ! module Bio ! autoload :NCBIDB, 'bio/db' ! class NCBIDB ! autoload :Common, 'bio/db/genbank/common' ! end ! autoload :GenBank, 'bio/db/genbank/genbank' ! autoload :GenPept, 'bio/db/genbank/genpept' ! autoload :RefSeq, 'bio/db/genbank/refseq' ! autoload :DDBJ, 'bio/db/genbank/ddbj' ! end From k at pub.open-bio.org Sun Oct 23 03:11:14 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 03:59:39 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db genbank.rb,0.34,NONE Message-ID: <200510230711.j9N7BEdZ008635@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory pub.open-bio.org:/tmp/cvs-serv8631 Removed Files: genbank.rb Log Message: * This file is obsoleted. Use "require 'bio'" instead of "require 'bio/db/genbank'" --- genbank.rb DELETED --- From k at pub.open-bio.org Sun Oct 23 03:20:39 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 04:09:08 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db/genbank common.rb, 1.6, 1.7 genbank.rb, 0.35, 0.36 genpept.rb, 1.9, 1.10 Message-ID: <200510230720.j9N7KddZ008761@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/genbank In directory pub.open-bio.org:/tmp/cvs-serv8757 Modified Files: common.rb genbank.rb genpept.rb Log Message: * Bio::GenBank::Common is changed to Bio::NCBIDB::Common Index: genpept.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/genbank/genpept.rb,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** genpept.rb 26 Sep 2005 13:24:25 -0000 1.9 --- genpept.rb 23 Oct 2005 07:20:37 -0000 1.10 *************** *** 21,24 **** --- 21,25 ---- # + require 'bio/db/genbank/common' require 'bio/db/genbank/genbank' *************** *** 26,31 **** class GenPept < NCBIDB ! require 'bio/db/genbank/common' ! include Bio::GenBank::Common # LOCUS --- 27,31 ---- class GenPept < NCBIDB ! include Bio::NCBIDB::Common # LOCUS Index: genbank.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/genbank/genbank.rb,v retrieving revision 0.35 retrieving revision 0.36 diff -C2 -d -r0.35 -r0.36 *** genbank.rb 26 Sep 2005 13:24:25 -0000 0.35 --- genbank.rb 23 Oct 2005 07:20:37 -0000 0.36 *************** *** 25,30 **** module Bio ! class GenBank ! include Bio::GenBank::Common # LOCUS --- 25,31 ---- module Bio ! class GenBank < NCBIDB ! ! include Bio::NCBIDB::Common # LOCUS Index: common.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/genbank/common.rb,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** common.rb 26 Sep 2005 13:00:07 -0000 1.6 --- common.rb 23 Oct 2005 07:20:36 -0000 1.7 *************** *** 24,28 **** module Bio ! class GenBank < NCBIDB module Common --- 24,28 ---- module Bio ! class NCBIDB module Common From k at pub.open-bio.org Sun Oct 23 03:21:03 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 04:09:34 2005 Subject: [BioRuby-cvs] bioruby/lib/bio feature.rb,1.6,1.7 Message-ID: <200510230721.j9N7L3dZ008790@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory pub.open-bio.org:/tmp/cvs-serv8786 Modified Files: feature.rb Log Message: * Changed to RDoc format Index: feature.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/feature.rb,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** feature.rb 8 Sep 2005 01:22:08 -0000 1.6 --- feature.rb 23 Oct 2005 07:21:00 -0000 1.7 *************** *** 1,6 **** # ! # bio/feature.rb - Features/Feature class (GenBank Feature table) # ! # Copyright (C) 2002 KATAYAMA Toshiaki # # This library is free software; you can redistribute it and/or --- 1,43 ---- # ! # = bio/feature.rb - Features/Feature class (GenBank Feature table) # ! # Author:: Toshiaki Katayama ! # Copyright:: Copyright (c) 2002, 2005 BioRuby project ! # License:: LGPL ! # ! # $Id$ ! # ! # == INSD Feature table definition ! # ! # See http://www.ddbj.nig.ac.jp/FT/full_index.html for the INSD ! # (GenBank/EMBL/DDBJ) Feature table definition. ! # ! # === Example ! # ! # # suppose features is a Bio::Features object ! # features.each do |feature| ! # f_name = feature.feature ! # f_pos = feature.position ! # puts "#{f_name}:\t#{f_pos}" ! # feature.each do |qualifier| ! # q_name = qualifier.qualifier ! # q_val = qualifier.value ! # puts "- #{q_name}:\t#{q_val}" ! # end ! # end ! # ! # # Iterates only on CDS features and extract translated amino acid sequences ! # features.each("CDS") do |feature| ! # hash = feature.assoc ! # name = hash["gene"] || hash["product"] || hash["note"] ! # seq = hash["translation"] ! # pos = feature.position ! # if gene and seq ! # puts ">#{gene} #{feature.position}" ! # puts aaseq ! # end ! # end ! # ! #-- # # This library is free software; you can redistribute it and/or *************** *** 18,22 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # --- 55,59 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! #++ # *************** *** 25,93 **** module Bio ! class Feature ! ! def initialize(feature = '', position = '', qualifiers = []) ! @feature, @position, @qualifiers = feature, position, qualifiers ! end ! attr_accessor :feature, :position, :qualifiers ! ! def locations ! Locations.new(@position) ! end ! def append(a) ! @qualifiers.push(a) if a.is_a? Qualifier ! return self ! end ! def each ! @qualifiers.each do |x| ! yield x ! end ! end ! def assoc ! hash = Hash.new ! @qualifiers.each do |x| ! hash[x.qualifier] = x.value ! end ! return hash ! end ! class Qualifier ! def initialize(key, value) ! @qualifier, @value = key, value ! end ! attr_reader :qualifier, :value end - end - class Features ! def initialize(ary = []) ! @features = ary ! end ! attr_accessor :features ! def append(a) ! @features.push(a) if a.is_a? Feature ! return self end ! def each(arg = nil) ! @features.each do |x| ! next if arg and x.feature != arg ! yield x ! end ! end ! def [](*arg) ! @features[*arg] ! end end --- 62,125 ---- module Bio ! # Container for the sequence annotation. ! class Feature ! def initialize(feature = '', position = '', qualifiers = []) ! @feature, @position, @qualifiers = feature, position, qualifiers ! end ! # Returns type of feature in String (e.g 'CDS', 'gene') ! attr_accessor :feature ! # Returns position of the feature in String (e.g. 'complement(123..146)') ! attr_accessor :position + # Returns an Array of Qualifier objects. + attr_accessor :qualifiers ! # Returns a Bio::Locations object translated from the position string. ! def locations ! Locations.new(@position) ! end ! # Appends a Qualifier object to the Feature. ! # ! # * Returns an Array of Qualifier objects. ! # * If the argument is not a Qualifier object, returns nil. ! # ! def append(a) ! @qualifiers.push(a) if a.is_a? Qualifier ! return self ! end + # Iterates on each qualifier. + def each + @qualifiers.each do |x| + yield x end end + # Returns a Hash constructed from qualifier objects. + def assoc + hash = Hash.new + @qualifiers.each do |x| + hash[x.qualifier] = x.value + end + return hash + end ! # Container for the qualifier-value pair. ! class Qualifier ! def initialize(key, value) ! @qualifier, @value = key, value end ! # Qualifier name in String ! attr_reader :qualifier ! # Qualifier value in String ! attr_reader :value end *************** *** 96,129 **** ! =begin ! ! = Bio::Feature ! ! --- Bio::Feature.new(feature = '', position = '', qualifiers = []) ! ! --- Bio::Feature#feature -> String ! --- Bio::Feature#position -> String ! --- Bio::Feature#qualifiers -> Array ! ! --- Bio::Feature#locations -> Bio::Locations ! --- Bio::Feature#append -> Bio::Feature ! --- Bio::Feature#each -> Array ! == Bio::Feature::Qualifier ! --- Bio::Feature::Qualifier.new(key, value) ! --- Bio::Feature::Qualifier#qualifier -> String ! --- Bio::Feature::Qualifier#value -> String ! = Bio::Features ! --- Bio::Features.new(ary = []) ! --- Bio::Features#features -> Array ! --- Bio::Features#append(a) -> Bio::Features ! --- Bio::Features#each -> Array ! =end --- 128,164 ---- ! # Container for the list of Feature objects. ! class Features ! def initialize(ary = []) ! @features = ary ! end ! # Returns an Array of Feature objects. ! attr_accessor :features ! # Appends a Feature object to Features. ! def append(a) ! @features.push(a) if a.is_a? Feature ! return self ! end ! # Iterates on each feature. If a feature name is given as an argument, ! # only iterates on each feature belongs to the name (e.g. 'CDS' etc.) ! def each(arg = nil) ! @features.each do |x| ! next if arg and x.feature != arg ! yield x ! end ! end ! # Short cut for the Features#features[n] ! def [](*arg) ! @features[*arg] ! end ! end ! end # Bio From k at pub.open-bio.org Sun Oct 23 04:27:24 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 05:15:52 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db/kegg kgml.rb,NONE,1.1 Message-ID: <200510230827.j9N8ROdZ018154@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg In directory pub.open-bio.org:/tmp/cvs-serv18150/db/kegg Added Files: kgml.rb Log Message: * Newly added KEGG KGML parser --- NEW FILE: kgml.rb --- # # = bio/db/kegg/kgml.rb - KEGG KGML parser class # # Author:: Toshiaki Katayama # Copyright:: Copyright (C) 2005 BioRuby project # License:: LGPL # # $Id: kgml.rb,v 1.1 2005/10/23 08:27:22 k Exp $ # # == KGML (KEGG XML) parser # # See http://www.genome.jp/kegg/xml/ for more details on KGML. # # === Examples # # file = ARGF.read # kgml = Bio::KEGG::KGML.new(file) # # # attributes # puts kgml.name # puts kgml.org # puts kgml.number # puts kgml.title # puts kgml.image # puts kgml.link # # kgml.entries.each do |entry| # # attributes # puts entry.id # puts entry.name # puts entry.type # puts entry.link # puts entry.reaction # puts entry.map # # attributes # puts entry.label # This is an accessor for # puts entry.x # puts entry.y # puts entry.type # puts entry.width # puts entry.height # puts entry.fgcolor # puts entry.bgcolor # end # # kgml.relations.each do |relation| # # attributes # puts relation.entry1 # puts relation.entry2 # puts relation.type # # attributes # relation.subtype.each do |value, name| # puts value, name # end # end # # kgml.reactions.each do |reaction| # # attributes # puts reaction.name # puts reaction.type # # attributes # reaction.substrates.each do |name| # puts name # # attributes # altnames = reaction.alt[name] # altnames.each do |altname| # puts altname # end # end # # attributes # reaction.products.each do |name| # puts name # # attributes # altnames = reaction.alt[name] # altnames.each do |altname| # puts altname # end # end # end # #-- # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # #++ # require 'rexml/document' module Bio class KEGG class KGML def initialize(xml) @dom = REXML::Document.new(xml) parse_root parse_entry parse_relation parse_reaction end attr_reader :name, :org, :number, :title, :image, :link attr_reader :entries, :relations, :reactions # Array class Entry attr_accessor :id, :name, :type, :link, :reaction, :map attr_accessor :label, :x, :y, :type, :width, :height, :fgcolor, :bgcolor end class Relation attr_accessor :entry1, :entry2, :type attr_accessor :subtype # Hash end class Reaction attr_accessor :name, :type attr_accessor :substrates, :products # Array attr_accessor :alt # Hash end def parse_root root = @dom.root.attributes @name = root["name"] @org = root["org"] @number = root["number"] @title = root["title"] @image = root["image"] @link = root["link"] end def parse_entry @entries = Array.new @dom.elements.each("/pathway/entry") { |node| attr = node.attributes entry = Entry.new entry.id = attr["id"].to_i entry.map = attr["map"] entry.name = attr["name"] entry.type = attr["type"] entry.link = attr["link"] entry.reaction = attr["reaction"] node.elements.each("graphics") { |graphics| attr = graphics.attributes entry.x = attr["x"].to_i entry.y = attr["y"].to_i entry.type = attr["type"] entry.label = attr["label"] # name entry.width = attr["width"].to_i entry.height = attr["height"].to_i entry.fgcolor = attr["fgcolor"] entry.bgcolor = attr["bgcolor"] } @entries << entry } end def parse_relation @relations = Array.new @dom.elements.each("/pathway/relation") { |node| attr = node.attributes relation = Relation.new relation.entry1 = attr["entry1"] relation.entry2 = attr["entry2"] relation.type = attr["type"] hash = Hash.new node.elements.each("subtype") { |subtype| attr = subtype.attributes name = attr["name"] e_id = attr["value"].to_i hash[e_id] = name } relation.subtype = hash } end def parse_reaction @reactions = Array.new @dom.elements.each("/pathway/reaction") { |node| attr = node.attributes reaction = Reaction.new reaction.name = attr["name"] reaction.type = attr["type"] substrates = Array.new products = Array.new hash = Hash.new node.elements.each("substrate") { |substrate| name = substrate.attributes["name"] substrates << name substrate.elements.each("alt") { |alt| hash[name] ||= Array.new hash[name] << alt.attributes["name"] } } node.elements.each("product") { |product| name = product.attributes["name"] products << name product.elements.each("alt") { |alt| hash[name] ||= Array.new hash[name] << alt.attributes["name"] } } reaction.substrates = substrates reaction.products = products reaction.alt = hash } end end # KGML end # KEGG end # Bio if __FILE__ == $0 require 'pp' xml = ARGF.read pp Bio::KEGG::KGML.new(xml) end =begin # This is a test implementation which reflects original KGML data structure. class KGML class Pathway attr_accessor :name, :org, :number, :title, :image, :link attr_accessor :entries, :relations, :reactions class Entry attr_accessor :id, :name, :type, :link, :reaction, :map attr_accessor :components, :graphics class Component attr_accessor :id end class Graphics attr_accessor :name, :x, :y, :type, :width, :height, :fgcolor, :bgcolor end end class Relation attr_accessor :entry1, :entry2, :type attr_accessor : class Subtype attr_accessor :name, :value end end class Reaction attr_accessor :name, :type class Substrate attr_accessor :name end class Product attr_accessor :name end class Alt attr_accessor :name end end end end =end From k at pub.open-bio.org Sun Oct 23 04:33:28 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 05:21:52 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/util/color_scheme - New directory Message-ID: <200510230833.j9N8XSdZ018218@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/util/color_scheme In directory pub.open-bio.org:/tmp/cvs-serv18214/lib/bio/util/color_scheme Log Message: Directory /home/repository/bioruby/bioruby/lib/bio/util/color_scheme added to the repository From k at pub.open-bio.org Sun Oct 23 04:37:59 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 05:26:24 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/util - New directory Message-ID: <200510230837.j9N8bxdZ018421@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/util In directory pub.open-bio.org:/tmp/cvs-serv18417/test/unit/bio/util Log Message: Directory /home/repository/bioruby/bioruby/test/unit/bio/util added to the repository From k at pub.open-bio.org Sun Oct 23 04:40:43 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 05:29:17 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/util color_scheme.rb,NONE,1.1 Message-ID: <200510230840.j9N8ehdZ018497@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/util In directory pub.open-bio.org:/tmp/cvs-serv18484/lib/bio/util Added Files: color_scheme.rb Log Message: * Newly added Bio::ColorScheme module contributed by Trevor Wennblom --- NEW FILE: color_scheme.rb --- # # bio/util/color_scheme.rb - Popular color codings for nucleic and amino acids # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: color_scheme.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # =begin rdoc == Synopsis The Bio::ColorScheme module contains classes that return popular color codings for nucleic and amino acids in RGB hex format suitable for HTML code. The current schemes supported are: * Buried - Buried index * Helix - Helix propensity * Hydropathy - Hydrophobicity * Nucleotide - Nucelotide color coding * Strand - Strand propensity * Taylor - Taylor color coding * Turn - Turn propensity * Zappo - Zappo color coding Planned color schemes include: * BLOSUM62 * ClustalX * Percentage Identity (PID) Color schemes BLOSUM62, ClustalX, and Percentage Identity are all dependent on the alignment consensus. This data is currently referenced from the JalView alignment editor. Clamp, M., Cuff, J., Searle, S. M. and Barton, G. J. (2004), "The Jalview Java Alignment Editor," Bioinformatics, 12, 426-7 http://www.jalview.org Currently the score data for things such as hydropathy, helix, turn, etc. are contained here but should be moved to bio/data/aa once a good reference is found for these values. == Usage require 'bio/util/color_scheme' puts Bio::ColorScheme::Buried['A'] # 00DC22 puts Bio::ColorScheme::Buried[:c] # 00BF3F puts Bio::ColorScheme::Buried[nil] # nil puts Bio::ColorScheme::Buried['-'] # FFFFFF puts Bio::ColorScheme::Buried[7] # FFFFFF puts Bio::ColorScheme::Buried['junk'] # FFFFFF puts Bio::ColorScheme::Buried['t'] # 00CC32 seq = 'gattaca' scheme = Bio::ColorScheme::Zappo postfix = '' html = '' seq.each_byte do |c| color = scheme[c.chr] prefix = %Q() html += prefix + c.chr + postfix end puts html == Author Trevor Wennblom == Copyright Copyright (C) 2005 Trevor Wennblom Licensed under the same terms as BioRuby. =end module Bio module ColorScheme cs_location = 'bio/util/color_scheme' # Score sub-classes autoload :Buried, "#{cs_location}/buried" autoload :Helix, "#{cs_location}/helix" autoload :Hydropathy, "#{cs_location}/hydropathy" autoload :Strand, "#{cs_location}/strand" autoload :Turn, "#{cs_location}/turn" # Simple sub-classes autoload :Nucleotide, "#{cs_location}/nucleotide" autoload :Taylor, "#{cs_location}/taylor" autoload :Zappo, "#{cs_location}/zappo" # Consensus sub-classes # NOTE todo # BLOSUM62 # ClustalX # PID # A very basic class template for color code referencing. class Simple def self.[](x) return if x.nil? # accept symbols and any case @colors[x.to_s.upcase] end def self.colors() @colors end ####### private ####### # Example @colors = { 'A' => '64F73F', } @colors.default = 'FFFFFF' # return white by default end # A class template for color code referencing of color schemes # that are score based. This template is expected to change # when the scores are moved into bio/data/aa class Score def self.[](x) return if x.nil? # accept symbols and any case @colors[x.to_s.upcase] end def self.min(x) @min end def self.max(x) @max end def self.scores() @scores end def self.colors() @colors end ######### protected ######### def self.percent_to_hex(percent) percent = percent.to_f if percent.is_a?(String) if (percent > 1.0) or (percent < 0.0) or percent.nil? raise 'Percentage must be between 0.0 and 1.0' end "%02X" % (percent * 255.0) end def self.rgb_percent_to_hex(red, green, blue) percent_to_hex(red) + percent_to_hex(green) + percent_to_hex(blue) end def self.score_to_percent(score, min, max) # .to_f to ensure every operation is float-aware percent = (score.to_f - min) / (max.to_f - min) percent = 1.0 if percent > 1.0 percent = 0.0 if percent < 0.0 percent end ####### private ####### # Example def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, 0.0, 1.0-percent) end @colors = {} @scores = { 'A' => 0.83, } @min = 0.37 @max = 1.7 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end # NOTE todo class Consensus end end # module ColorScheme end # module Bio From k at pub.open-bio.org Sun Oct 23 04:40:43 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 05:30:40 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/util test_color_scheme.rb, NONE, 1.1 Message-ID: <200510230840.j9N8ehdZ018510@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/util In directory pub.open-bio.org:/tmp/cvs-serv18484/test/unit/bio/util Added Files: test_color_scheme.rb Log Message: * Newly added Bio::ColorScheme module contributed by Trevor Wennblom --- NEW FILE: test_color_scheme.rb --- # # test/unit/bio/util/test_color_scheme.rb - Unit test for Bio::ColorScheme # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_color_scheme.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4 , 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/util/color_scheme' module Bio class TestColorScheme < Test::Unit::TestCase def test_buried s = Bio::ColorScheme::Buried assert_equal('00DC22', s['A']) assert_equal('00BF3F', s[:c]) assert_equal(nil, s[nil]) assert_equal('FFFFFF', s['-']) assert_equal('FFFFFF', s[7]) assert_equal('FFFFFF', s['junk']) assert_equal('00CC32', s['t']) end end end From k at pub.open-bio.org Sun Oct 23 04:40:43 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 05:30:41 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/util/color_scheme buried.rb, NONE, 1.1 helix.rb, NONE, 1.1 hydropathy.rb, NONE, 1.1 nucleotide.rb, NONE, 1.1 strand.rb, NONE, 1.1 taylor.rb, NONE, 1.1 turn.rb, NONE, 1.1 zappo.rb, NONE, 1.1 Message-ID: <200510230840.j9N8ehdZ018501@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/util/color_scheme In directory pub.open-bio.org:/tmp/cvs-serv18484/lib/bio/util/color_scheme Added Files: buried.rb helix.rb hydropathy.rb nucleotide.rb strand.rb taylor.rb turn.rb zappo.rb Log Message: * Newly added Bio::ColorScheme module contributed by Trevor Wennblom --- NEW FILE: helix.rb --- # # bio/util/color_scheme/helix.rb - Color codings for helix propensity # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: helix.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Helix < Score ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, 1.0-percent, percent) end @colors = {} @scores = { 'A' => 1.42, 'C' => 0.7, 'D' => 1.01, 'E' => 1.51, 'F' => 1.13, 'G' => 0.57, 'H' => 1.0, 'I' => 1.08, 'K' => 1.16, 'L' => 1.21, 'M' => 1.45, 'N' => 0.67, 'P' => 0.57, 'Q' => 1.11, 'R' => 0.98, 'S' => 0.77, 'T' => 0.83, 'U' => 0.0, 'V' => 1.06, 'W' => 1.08, 'Y' => 0.69, 'B' => 0.84, 'X' => 1.0, 'Z' => 1.31, } @min = 0.57 @max = 1.51 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end --- NEW FILE: strand.rb --- # # bio/util/color_scheme/strand.rb - Color codings for strand propensity # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: strand.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Strand < Score ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, 0.0, 1.0-percent) end @colors = {} @scores = { 'A' => 0.83, 'C' => 1.19, 'D' => 0.54, 'E' => 0.37, 'F' => 1.38, 'G' => 0.75, 'H' => 0.87, 'I' => 1.6, 'K' => 0.74, 'L' => 1.3, 'M' => 1.05, 'N' => 0.89, 'P' => 0.55, 'Q' => 1.1, 'R' => 0.93, 'S' => 0.75, 'T' => 1.19, 'U' => 0.0, 'V' => 1.7, 'W' => 1.37, 'Y' => 1.47, 'B' => 0.72, 'X' => 1.0, 'Z' => 0.74, } @min = 0.37 @max = 1.7 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end --- NEW FILE: turn.rb --- # # bio/util/color_scheme/turn.rb - Color codings for turn propensity # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: turn.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Turn < Score ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, 1.0-percent, 1.0-percent) end @colors = {} @scores = { 'A' => 0.66, 'C' => 1.19, 'D' => 1.46, 'E' => 0.74, 'F' => 0.6, 'G' => 1.56, 'H' => 0.95, 'I' => 0.47, 'K' => 1.01, 'L' => 0.59, 'M' => 0.6, 'N' => 1.56, 'P' => 1.52, 'Q' => 0.98, 'R' => 0.95, 'S' => 1.43, 'T' => 0.96, 'U' => 0, 'V' => 0.5, 'W' => 0.96, 'Y' => 1.14, 'B' => 1.51, 'X' => 1.0, 'Z' => 0.86, } @min = 0.47 @max = 1.56 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end --- NEW FILE: taylor.rb --- # # bio/util/color_scheme/taylor.rb - Taylor color codings for amino acids # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: taylor.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Taylor < Simple ######### protected ######### @colors = { 'A' => 'CCFF00', 'C' => 'FFFF00', 'D' => 'FF0000', 'E' => 'FF0066', 'F' => '00FF66', 'G' => 'FF9900', 'H' => '0066FF', 'I' => '66FF00', 'K' => '6600FF', 'L' => '33FF00', 'M' => '00FF00', 'N' => 'CC00FF', 'P' => 'FFCC00', 'Q' => 'FF00CC', 'R' => '0000FF', 'S' => 'FF3300', 'T' => 'FF6600', 'U' => 'FFFFFF', 'V' => '99FF00', 'W' => '00CCFF', 'Y' => '00FFCC', 'B' => 'FFFFFF', 'X' => 'FFFFFF', 'Z' => 'FFFFFF', } @colors.default = 'FFFFFF' # return white by default end end --- NEW FILE: hydropathy.rb --- # # bio/util/color_scheme/hydropathy.rb - Color codings for hydrophobicity # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: hydropathy.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # require 'bio/util/color_scheme' # Hydropathy index # Kyte, J., and Doolittle, R.F., J. Mol. Biol. # 1157, 105-132, 1982 module Bio::ColorScheme class Hydropathy < Score ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(percent, 0.0, 1.0-percent) end @colors = {} @scores = { 'A' => 1.8, 'C' => 2.5, 'D' => -3.5, 'E' => -3.5, 'F' => 2.8, 'G' => -0.4, 'H' => -3.2, 'I' => 4.5, 'K' => -3.9, 'L' => 3.8, 'M' => 1.9, 'N' => -3.5, 'P' => -1.6, 'Q' => -3.5, 'R' => -4.5, 'S' => -0.8, 'T' => -0.7, 'U' => 0.0, 'V' => 4.2, 'W' => -0.9, 'Y' => -1.3, 'B' => -3.5, 'X' => -0.49, 'Z' => -3.5, } @min = -3.9 @max = 4.5 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end --- NEW FILE: zappo.rb --- # # bio/util/color_scheme/zappo.rb - Zappo color codings for amino acids # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: zappo.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Zappo < Simple ######### protected ######### @colors = { 'A' => 'FFAFAF', 'C' => 'FFFF00', 'D' => 'FF0000', 'E' => 'FF0000', 'F' => 'FFC800', 'G' => 'FF00FF', 'H' => 'FF0000', 'I' => 'FFAFAF', 'K' => '6464FF', 'L' => 'FFAFAF', 'M' => 'FFAFAF', 'N' => '00FF00', 'P' => 'FF00FF', 'Q' => '00FF00', 'R' => '6464FF', 'S' => '00FF00', 'T' => '00FF00', 'U' => 'FFFFFF', 'V' => 'FFAFAF', 'W' => 'FFC800', 'Y' => 'FFC800', 'B' => 'FFFFFF', 'X' => 'FFFFFF', 'Z' => 'FFFFFF', } @colors.default = 'FFFFFF' # return white by default end end --- NEW FILE: nucleotide.rb --- # # bio/util/color_scheme/nucleotide.rb - Color codings for nucleotides # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: nucleotide.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Nucleotide < Simple ######### protected ######### @colors = { 'A' => '64F73F', 'C' => 'FFB340', 'G' => 'EB413C', 'T' => '3C88EE', 'U' => '3C88EE', } @colors.default = 'FFFFFF' # return white by default end NA = Nuc = Nucleotide end --- NEW FILE: buried.rb --- # # bio/util/color_scheme/buried.rb - Color codings for buried amino acids # # Copyright (C) 2005 Trevor Wennblom # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: buried.rb,v 1.1 2005/10/23 08:40:41 k Exp $ # require 'bio/util/color_scheme' module Bio::ColorScheme class Buried < Score ######### protected ######### def self.score_to_rgb_hex(score, min, max) percent = score_to_percent(score, min, max) rgb_percent_to_hex(0.0, 1.0-percent, percent) end @colors = {} @scores = { 'A' => 0.66, 'C' => 1.19, 'D' => 1.46, 'E' => 0.74, 'F' => 0.6, 'G' => 1.56, 'H' => 0.95, 'I' => 0.47, 'K' => 1.01, 'L' => 0.59, 'M' => 0.6, 'N' => 1.56, 'P' => 1.52, 'Q' => 0.98, 'R' => 0.95, 'S' => 1.43, 'T' => 0.96, 'U' => 0, 'V' => 0.5, 'W' => 0.96, 'Y' => 1.14, 'B' => 1.51, 'X' => 1.0, 'Z' => 0.86, } @min = 0.05 @max = 4.6 @scores.each { |k,s| @colors[k] = score_to_rgb_hex(s, @min, @max) } @colors.default = 'FFFFFF' # return white by default end end From k at pub.open-bio.org Sun Oct 23 05:11:30 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 06:00:21 2005 Subject: [BioRuby-cvs] bioruby/lib/bio pathway.rb,1.30,1.31 Message-ID: <200510230911.j9N9BTdZ018751@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory pub.open-bio.org:/tmp/cvs-serv18742 Modified Files: pathway.rb Log Message: * nodes method returns list of nodes intead of number of nodes * edges method returns list of edges using @relations instead of @graph * use nodes.length and edges.length instead of nodes and edges method * removed clear_relations!, to_relations to ensure edges method works * removed accessor for @relations to ensure user can't change it Index: pathway.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/pathway.rb,v retrieving revision 1.30 retrieving revision 1.31 diff -C2 -d -r1.30 -r1.31 *** pathway.rb 8 Sep 2005 01:22:08 -0000 1.30 --- pathway.rb 23 Oct 2005 09:11:27 -0000 1.31 *************** *** 28,31 **** --- 28,34 ---- class Pathway + #require 'chem' + #include Chem::Graph + # Initial graph (adjacency list) generation from the list of Relation def initialize(relations, undirected = false) *************** *** 37,41 **** self.to_list # generate adjacency list end ! attr_reader :relations, :graph, :index attr_accessor :label --- 40,44 ---- self.to_list # generate adjacency list end ! attr_reader :graph, :index attr_accessor :label *************** *** 62,81 **** end - # clear @relations to reduce the memory usage - def clear_relations! - @relations.clear - end - - # reconstruct @relations from the adjacency list @graph - def to_relations - @relations.clear - @graph.each_key do |from| - @graph[from].each do |to, w| - @relations << Relation.new(from, to, w) - end - end - return @relations - end - # Graph (adjacency list) generation from the Relations --- 65,68 ---- *************** *** 108,120 **** def nodes ! @graph.keys.length end def edges ! edges = 0 ! @graph.each_value do |v| ! edges += v.size ! end ! edges end --- 95,103 ---- def nodes ! [ @graph.keys + @graph.values ].sort.uniq end def edges ! @relations end *************** *** 131,140 **** matrix = Array.new ! nodes.times do ! matrix.push(Array.new(nodes, default_value)) end if diagonal_value ! nodes.times do |i| matrix[i][i] = diagonal_value end --- 114,123 ---- matrix = Array.new ! nodes.length.times do ! matrix.push(Array.new(nodes.length, default_value)) end if diagonal_value ! nodes.length.times do |i| matrix[i][i] = diagonal_value end *************** *** 224,230 **** sg = subgraph(neighbors) if sg.graph.size != 0 ! edges = sg.edges / 2.0 ! nodes = sg.nodes ! complete = (nodes * (nodes - 1)) / 2.0 return edges/complete else --- 207,213 ---- sg = subgraph(neighbors) if sg.graph.size != 0 ! edges = sg.edges.length / 2.0 ! nodes = sg.nodes.length ! complete = (nodes.length * (nodes.length - 1)) / 2.0 return edges/complete else *************** *** 374,378 **** def bellman_ford(root) distance, predecessor = initialize_single_source(root) ! for i in 1 ..(self.nodes - 1) do @graph.each_key do |u| @graph[u].each do |v, w| --- 357,361 ---- def bellman_ford(root) distance, predecessor = initialize_single_source(root) ! for i in 1 ..(self.nodes.length - 1) do @graph.each_key do |u| @graph[u].each do |v, w| *************** *** 404,408 **** m = self.to_matrix(inf, 0) d = m.dup ! n = self.nodes for k in 0 .. n - 1 do for i in 0 .. n - 1 do --- 387,391 ---- m = self.to_matrix(inf, 0) d = m.dup ! n = self.nodes.length for k in 0 .. n - 1 do for i in 0 .. n - 1 do *************** *** 498,501 **** --- 481,488 ---- attr_accessor :node, :edge + def [](n) + [@node, @edge].flatten[n] + end + def from @node[0] *************** *** 730,736 **** instance variable @relations) redundantly. ! Note: you can clear the @relations list by calling clear_relations! method to ! reduce the memory usage, and the content of the @relations can be re-generated ! from the @graph by to_relations method. --- Bio::Pathway.new(list, undirected = false) --- 717,724 ---- instance variable @relations) redundantly. ! # *** OBSOLETED *** ! #Note: you can clear the @relations list by calling clear_relations! method to ! #reduce the memory usage, and the content of the @relations can be re-generated ! #from the @graph by to_relations method. --- Bio::Pathway.new(list, undirected = false) *************** *** 745,752 **** g = Bio::Pathway.new(list, 'undirected') ! --- Bio::Pathway#relations ! ! Read-only accessor for the internal list of the Bio::Relation objects ! '@relations'. --- Bio::Pathway#graph --- 733,741 ---- g = Bio::Pathway.new(list, 'undirected') ! # *** OBSOLETED *** ! #--- Bio::Pathway#relations ! # ! # Read-only accessor for the internal list of the Bio::Relation objects ! # '@relations'. --- Bio::Pathway#graph *************** *** 788,796 **** to_relations first. ! --- Bio::Pathway#clear_relations! ! --- Bio::Pathway#to_relations ! ! Clear @relations array and re-generate @relations from @graph. ! Useful when you want to reduce the memory usage of the object. --- Bio::Pathway#to_list --- 777,786 ---- to_relations first. ! # *** OBSOLETED *** ! #--- Bio::Pathway#clear_relations! ! #--- Bio::Pathway#to_relations ! # ! # Clear @relations array and re-generate @relations from @graph. ! # Useful when you want to reduce the memory usage of the object. --- Bio::Pathway#to_list *************** *** 813,817 **** --- Bio::Pathway#edges ! Returns the number of the nodes or edges in the graph. --- Bio::Pathway#to_matrix(default_value = nil, diagonal_value = nil) --- 803,808 ---- --- Bio::Pathway#edges ! Returns the nodes or edges in the graph. Use Bio::Pathway#nodes.length ! and Bio::Pathway#edges.length to have numbers of nodes and edges. --- Bio::Pathway#to_matrix(default_value = nil, diagonal_value = nil) From k at pub.open-bio.org Sun Oct 23 05:49:00 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 06:37:29 2005 Subject: [BioRuby-cvs] bioruby/lib/bio pathway.rb,1.31,1.32 Message-ID: <200510230949.j9N9n0dZ018911@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory pub.open-bio.org:/tmp/cvs-serv18907 Modified Files: pathway.rb Log Message: * Changed to RDoc format Index: pathway.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/pathway.rb,v retrieving revision 1.31 retrieving revision 1.32 diff -C2 -d -r1.31 -r1.32 *** pathway.rb 23 Oct 2005 09:11:27 -0000 1.31 --- pathway.rb 23 Oct 2005 09:48:58 -0000 1.32 *************** *** 1,7 **** # ! # bio/pathway.rb - Binary relations and Graph algorithms # ! # Copyright (C) 2001 KATAYAMA Toshiaki ! # KAWASHIMA Shuichi # # This library is free software; you can redistribute it and/or --- 1,21 ---- # ! # = bio/pathway.rb - Binary relations and Graph algorithms [...1486 lines suppressed...] - --- Bio::Relation#eql?(rel) - --- Bio::Relation#hash - - Method eql? is an alias of the === method and is used with hash method - to make uniq arry of the Bio::Relation objects. - - a1 = Bio::Relation.new('a', 'b', 1) - a2 = Bio::Relation.new('b', 'a', 1) - a3 = Bio::Relation.new('b', 'c', 1) - p [ a1, a2, a3 ].uniq - - --- Bio::Relation#<=>(rel) - - Used by the each method to compare with another Bio::Relation object. - This method is only usable when the edge objects have the property of - the module Comparable. - - =end - --- 883,884 ---- From k at pub.open-bio.org Sun Oct 23 06:41:02 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Sun Oct 23 07:29:27 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/io soapwsdl.rb,1.1,1.2 Message-ID: <200510231041.j9NAf2dZ019195@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory pub.open-bio.org:/tmp/cvs-serv19173/io Modified Files: soapwsdl.rb Log Message: * Fixed to run with Ruby 1.8.3 (API of the SOAP4R is changed between 1.8.2 and 1.8.3) Index: soapwsdl.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/soapwsdl.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** soapwsdl.rb 23 Jun 2004 14:32:15 -0000 1.1 --- soapwsdl.rb 23 Oct 2005 10:41:00 -0000 1.2 *************** *** 38,42 **** def create_driver ! @driver = SOAP::WSDLDriverFactory.new(@wsdl).create_driver @driver.generate_explicit_type = true # Ruby obj <-> SOAP obj end --- 38,46 ---- def create_driver ! if RUBY_VERSION > "1.8.2" ! @driver = SOAP::WSDLDriverFactory.new(@wsdl).create_rpc_driver ! else ! @driver = SOAP::WSDLDriverFactory.new(@wsdl).create_driver ! end @driver.generate_explicit_type = true # Ruby obj <-> SOAP obj end *************** *** 59,61 **** --- 63,80 ---- end # Bio + + =begin + + To use HTTP proxy, you need to set following two environmental variables + (case might be insensitive) as required by SOAP4R. + + --- soap_use_proxy + + Set the value of this variable to 'on'. + + --- http_proxy + + Set the URL of your proxy server (http://myproxy.com:8080 etc.). + + =end From pjotr at pub.open-bio.org Thu Oct 27 07:56:00 2005 From: pjotr at pub.open-bio.org (Pjotr Prins) Date: Thu Oct 27 10:06:54 2005 Subject: [BioRuby-cvs] bioruby/doc Tutorial.rd,1.7,1.8 Message-ID: <200510271156.j9RBu0VL004503@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/doc In directory pub.open-bio.org:/tmp/cvs-serv4489/doc Modified Files: Tutorial.rd Log Message: Documenting bioruby shell Index: Tutorial.rd =================================================================== RCS file: /home/repository/bioruby/bioruby/doc/Tutorial.rd,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** Tutorial.rd 24 Sep 2005 14:58:21 -0000 1.7 --- Tutorial.rd 27 Oct 2005 11:55:58 -0000 1.8 *************** *** 39,42 **** --- 39,61 ---- + == Trying Bioruby + + Bioruby comes with its own shell. After unpacking the sources run the + following command + + $BIORUBY/bin/bioruby + + and you should see a prompt + + bioruby> + + Now test the following: + + bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa") + bioruby> puts seq + atgcatgcaaaa + bioruby> puts seq.complement + ttttgcatgcat + == Working with nucleic / amino acid sequences (Bio::Sequence class) *************** *** 80,84 **** --- 99,105 ---- Windows). For example + % ri puts % ri p + % ri File.open Nucleic acid sequence is an object of +Bio::Sequence::NA+ class, and From pjotr at pub.open-bio.org Thu Oct 27 07:56:00 2005 From: pjotr at pub.open-bio.org (Pjotr Prins) Date: Thu Oct 27 10:06:55 2005 Subject: [BioRuby-cvs] bioruby/bin bioruby,1.2,1.3 Message-ID: <200510271156.j9RBu0VL004501@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/bin In directory pub.open-bio.org:/tmp/cvs-serv4489/bin Modified Files: bioruby Log Message: Documenting bioruby shell Index: bioruby =================================================================== RCS file: /home/repository/bioruby/bioruby/bin/bioruby,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** bioruby 24 Sep 2005 12:33:07 -0000 1.2 --- bioruby 27 Oct 2005 11:55:58 -0000 1.3 *************** *** 22,25 **** --- 22,27 ---- # + $: << File.dirname(__FILE__)+'/../lib' + require 'bio/shell' From ngoto at pub.open-bio.org Tue Oct 11 12:08:05 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Thu Oct 27 10:09:35 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/spidey report.rb,1.3,1.4 Message-ID: <200510111518.j9BFINdZ009941@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/spidey In directory pub.open-bio.org:/tmp/cvs-serv9931 Modified Files: report.rb Log Message: lib/bio/appl/spidey/report.rb: Bio::Spidey::Report::Hit#query_len, query_id, query_def, target_len, target_id, target_def did not work correctly. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/spidey/report.rb,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** report.rb 8 Sep 2005 01:22:10 -0000 1.3 --- report.rb 11 Oct 2005 15:18:21 -0000 1.4 *************** *** 343,353 **** # Bio::BLAST::*::Report::Hit compatible methods ! def query_len; @mrna.len; end ! def query_id; @mrna.entry_id; end ! def query_def; @mrna.definition; end ! def target_len; @genomic.len; end ! def target_id; @genomic.entry_id; end ! def target_def; @genomic.definition; end alias hit_id target_id --- 343,353 ---- # Bio::BLAST::*::Report::Hit compatible methods ! def query_len; mrna.len; end ! def query_id; mrna.entry_id; end ! def query_def; mrna.definition; end ! def target_len; genomic.len; end ! def target_id; genomic.entry_id; end ! def target_def; genomic.definition; end alias hit_id target_id From ngoto at pub.open-bio.org Tue Oct 11 12:19:43 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Thu Oct 27 10:09:36 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/spidey report.rb,1.4,1.5 Message-ID: <200510111530.j9BFU3dZ010022@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/spidey In directory pub.open-bio.org:/tmp/cvs-serv10007 Modified Files: report.rb Log Message: comment out forgotten debug code Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/spidey/report.rb,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** report.rb 11 Oct 2005 15:18:21 -0000 1.4 --- report.rb 11 Oct 2005 15:30:01 -0000 1.5 *************** *** 241,245 **** end @d0.each do |x| ! p x if x =~ /^Exon\s*\d+(\(.*\))?\:/ then if a = aln.shift then --- 241,245 ---- end @d0.each do |x| ! #p x if x =~ /^Exon\s*\d+(\(.*\))?\:/ then if a = aln.shift then From nakao at pub.open-bio.org Sun Oct 23 03:16:31 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:09:55 2005 Subject: [BioRuby-cvs] bioruby/lib/bio db.rb,0.28,0.29 Message-ID: <200510230716.j9N7GVdZ008665@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory pub.open-bio.org:/tmp/cvs-serv8653/lib/bio Modified Files: db.rb Log Message: * added 'autolaod :Common, ' for Bio::NCBIDB. * added 'autolaod :Common, ' for Bio::EMBLDB. Index: db.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db.rb,v retrieving revision 0.28 retrieving revision 0.29 diff -C2 -d -r0.28 -r0.29 *** db.rb 8 Sep 2005 01:22:08 -0000 0.28 --- db.rb 23 Oct 2005 07:16:29 -0000 0.29 *************** *** 1,7 **** # ! # bio/db.rb - DataBase parser general API # # Copyright (C) 2001, 2002 KATAYAMA Toshiaki ! # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,7 ---- # ! #= bio/db.rb - DataBase parser general API # # Copyright (C) 2001, 2002 KATAYAMA Toshiaki ! #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,21 **** # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! # # $Id$ # --- 17,21 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! #-- # $Id$ # *************** *** 26,30 **** module Bio ! class DB --- 26,31 ---- module Bio ! ! # Bio::DB API class DB *************** *** 86,91 **** end ! class NCBIDB < DB def initialize(entry, tagsize) --- 87,93 ---- end ! # Bio::NCBIDB class NCBIDB < DB + autoload :Common, 'bio/db/genbank/common' def initialize(entry, tagsize) *************** *** 122,132 **** end ! class KEGGDB < NCBIDB end ! class EMBLDB < DB ! def initialize(entry, tagsize) @tagsize = tagsize --- 124,135 ---- end ! # Bio::KEGG class KEGGDB < NCBIDB end ! # Bio::EMBLDB class EMBLDB < DB ! autoload :Common, 'bio/db/embl/common' ! def initialize(entry, tagsize) @tagsize = tagsize *************** *** 134,138 **** @data = {} # Hash of the parsed entry end - private --- 137,140 ---- From nakao at pub.open-bio.org Sun Oct 23 05:00:47 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:09:56 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db embl.rb,1.22,NONE Message-ID: <200510230900.j9N90ldZ018627@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory pub.open-bio.org:/tmp/cvs-serv18612/lib/bio/db Removed Files: embl.rb Log Message: * This file is obsoleted. Use "require 'bio'" insted of "require 'bio/db/embl'". --- embl.rb DELETED --- From nakao at pub.open-bio.org Tue Oct 11 22:59:55 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:09:56 2005 Subject: [BioRuby-cvs] bioruby/sample psortplot_html.rb,NONE,1.1 Message-ID: <200510120210.j9C2AEdZ011957@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/sample In directory pub.open-bio.org:/tmp/cvs-serv11939/sample Added Files: psortplot_html.rb Log Message: * initially inported sample/psortplot_html.rb, a sample script for Bio::PSORT classes and KEGG API class. --- NEW FILE: psortplot_html.rb --- #!/usr/bin/env ruby # # psortplot_html.rb - A KEGG API demo script. Generates a HTML file of # genes marked by PSORT II predictions onto a # KEGG/PATHWAY map. # # Usage: # # % ruby psortplot_html.rb # % cat sce00010_psort2.html # % ruby psortplot_html.rb path:eco00010 # % cat eco00010_psort2.html # # Copyright (C) 2005 Mitsuteru C. Nakao # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: psortplot_html.rb,v 1.1 2005/10/12 02:10:11 nakao Exp $ # require 'bio' class KEGG DBGET_BASEURI = 'http://kegg.com/dbget-bin' WWW_BGET_BASEURI = DBGET_BASEURI + '/www_bget' WWW_PATHWAY_BASEURI = DBGET_BASEURI + '/get_pathway' # path := path:sce00010 def self.link_pathway(path0) path, path = path0.split(':') org_name = path.scan(/(^\w{3})/).to_s mapno = path.sub(org_name, '') str = "#{path0}" end # ec_num := ec:1.2.3.4 def self.link_ec(ec_num) ec = ec_num.sub(/^ec:/, '') str = "#{ec_num}" return str end # gene := eco:b0002 def self.link_genes(gene) org_name, gene_name = gene.split(':') str = "#{gene}" return str end end class PSORT COLOR_Palette = { 'csk' => "#FF0000", # 'cytoskeletal' 'cyt' => "#FF8000", # 'cytoplasmic' 'nuc' => "#FFFF00", # 'nuclear' 'mit' => "#80FF00", # 'mitochondrial' 'ves' => "#00FF00", # 'vesicles of secretory system' 'end' => "#00FF80", # 'endoplasmic reticulum' 'gol' => "#00FFFF", # 'Golgi' 'vac' => "#0080FF", # 'vacuolar' 'pla' => "#0000FF", # 'plasma membrane' 'pox' => "#8000FF", # 'peroxisomal' 'exc' => "#FF00FF", # 'extracellular, including cell wall' '---' => "#FF0080" # 'other' } end keggapi = Bio::KEGG::API.new psort2serv = Bio::PSORT::PSORT2.imsut # Obtains a list of genes on specified pathway pathway = ARGV.shift || "path:sce00010" genes = keggapi.get_genes_by_pathway(pathway) scl = Hash.new # protein subcelluler localizations ec = Hash.new # EC numbers serial = 0 sync_default = $stdout.sync $stdout.sync = true genes.each do |gene| print "#{(serial += 1).to_s.rjust(genes.size.to_s.size)}\t#{gene}\t" # Obtains amino acid sequence from KEGG GENES entry aaseq = keggapi.get_aaseqs([gene]) # Predicts protein subcellualr localization result = psort2serv.exec(aaseq) scl[gene] = result.pred print "#{scl[gene]}\t" # Obtains the EC number from KEGG GENES entry ec[gene] = keggapi.get_enzymes_by_gene(gene) puts "#{ec[gene].inspect}" end $stdout.sync = sync_default fg_list = Array.new bg_list = Array.new genes.each do |gene| fg_list << "#FF0000" bg_list << PSORT::COLOR_Palette[scl[gene]] end # coloring KEGG pathway according to gene's localization url = keggapi.color_pathway_by_objects(pathway, genes, fg_list, bg_list) puts "#{url} downloaded." # remove "path:" prefix from pathway_id path_code = pathway.sub(/^path:/, '') # save the result image image_file = "#{path_code}_psort2.gif" begin keggapi.save_image(url, image_file) end # create html with a color palette html = < PSORT II prediction protein subcellular localization map of KEGG/PATHWAY (#{pathway})

  • PSORT II prediction protein subcellular localization map of KEGG/PATHWAY (#{KEGG.link_pathway(pathway)})
  • END # generate gene table with localization names = Bio::PSORT::PSORT2::SclNames multi_genes = Hash.new(0) ec.values.flatten.sort.uniq.each do |ec_num| ec.find_all {|x| x[1].include?(ec_num) }.each do |gene| gene = gene[0] loc = scl[gene] color = PSORT::COLOR_Palette[loc] name = names[loc] multi_genes[gene] += 1 html += < END end end html += <
    EC Gene Localization
    #{multi_genes[gene]} #{KEGG.link_ec(ec_num)} #{KEGG.link_genes(gene)} #{name}
    END # generate color code table also PSORT::COLOR_Palette.sort.each do |code, color| html += < END end html += <
    Code Color
    #{code} #{names[code]}

    END # save generated HTML file html_file = "#{path_code}_psort2.html" File.open(html_file, "w+") do |file| file.puts html end puts "Open #{html_file}" From nakao at pub.open-bio.org Sun Oct 23 04:59:45 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:09:57 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db/embl sptr.rb,1.25,1.26 Message-ID: <200510230859.j9N8xjdZ018605@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv18595/lib/bio/db/embl Modified Files: sptr.rb Log Message: * Changed to rdoc format. * Bio::EMBL::Common changed to Bio::EMBLDB::Common. Index: sptr.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/sptr.rb,v retrieving revision 1.25 retrieving revision 1.26 diff -C2 -d -r1.25 -r1.26 *** sptr.rb 26 Sep 2005 13:00:06 -0000 1.25 --- sptr.rb 23 Oct 2005 08:59:43 -0000 1.26 *************** *** 1,6 **** # ! # bio/db/embl/sptr.rb - SwissProt and TrEMBL database class # ! # Copyright (C) 2001-2003 Mitsuteru C. Nakao # # This library is free software; you can redistribute it and/or --- 1,18 ---- # ! # = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class # ! # Author:: Mitsuteru C. Nakao ! # Copyright:: Copyright (C) 2001-2005 BioRuby Project ! # License:: LGPL ! # ! # $Id$ ! # ! # == UniProtKB/SwissProt and TrEMBL ! # ! # See the SWISS-PROT dicument file SPECLIST.TXT. ! # ! # == Example ! # ! #-- # # This library is free software; you can redistribute it and/or *************** *** 18,52 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # require 'bio/db' module Bio class SPTR < EMBLDB ! ! require 'bio/db/embl/common' ! include Bio::EMBL::Common ! # ID Line # # "ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}." # ! # ENTRY_NAME = "#{X}_#{Y}" ! # X =~ /[A-Z0-9]{1,4}/ # The protein name. # Y =~ /[A-Z0-9]{1,5}/ # The biological source of the protein. ! # MOLECULE_TYPE = 'PRT' =~ /\w{3}/ # SEQUENCE_LENGTH =~ /\d+ AA/ - # - # See also the SWISS-PROT dicument file SPECLIST.TXT. - # - - @@entry_regrexp = /[A-Z0-9]{1,4}_[A-Z0-9]{1,5}/ - @@data_class = ["STANDARD", "PRELIMINARY"] - - # Bio::SPTR#id_line -> hsh - # Bio::SPTR#id_line(key) -> str or int def id_line(key = nil) unless @data['ID'] --- 30,61 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! #++ # require 'bio/db' + require 'bio/db/embl/common' module Bio + # Parser class for UniProtKB/SwissProt and TrEMBL database entry class SPTR < EMBLDB ! include Bio::EMBLDB::Common + @@entry_regrexp = /[A-Z0-9]{1,4}_[A-Z0-9]{1,5}/ + @@data_class = ["STANDARD", "PRELIMINARY"] ! ! # returns a Hash of the ID line. ! # returns a content (Int or String) of the ID line by a given key. ! # Hash keys: ['ENTRY_NAME', 'DATA_CLASS', 'MODECULE_TYPE', 'SEQUENCE_LENGTH'] # + # ID Line # "ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}." # ! # ENTRY_NAME := "#{X}_#{Y}" ! # X =~ /[A-Z0-9]{1,5}/ # The protein name. # Y =~ /[A-Z0-9]{1,5}/ # The biological source of the protein. ! # MOLECULE_TYPE := 'PRT' =~ /\w{3}/ # SEQUENCE_LENGTH =~ /\d+ AA/ def id_line(key = nil) unless @data['ID'] *************** *** 67,78 **** end # ! def entry id_line('ENTRY_NAME') end ! alias entry_name entry ! alias entry_id entry # def molecule id_line('MOLECULE_TYPE') --- 76,94 ---- end + + + # returns a ENTRY_NAME in the ID line. # ! # A short-cut for Bio::SPTR#id_line('ENTRY_NAME'). ! def entry_id id_line('ENTRY_NAME') end ! alias entry_name entry_id ! alias entry entry_id + + # returns a MOLECULE_TYPE in the ID line. # + # A short-cut for Bio::SPTR#id_line('MOLECULE_TYPE'). def molecule id_line('MOLECULE_TYPE') *************** *** 80,84 **** alias molecule_type molecule ! # def sequence_length id_line('SEQUENCE_LENGTH') --- 96,103 ---- alias molecule_type molecule ! ! # returns a SEQUENCE_LENGTH in the ID line. ! # ! # A short-cut for Bio::SPTR#id_line('SEQUENCE_LENGHT'). def sequence_length id_line('SEQUENCE_LENGTH') *************** *** 87,115 **** ! # AC Line ! # ! # "AC A12345; B23456;" ! # ! # AC [AC1;]+ ! # ! # Accession numbers format: ! # 1 2 3 4 5 6 ! # [O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9] ! # ! # Bio::SPTR#ac -> ary ! # #accessions -> ary ! # #accession -> accessions.first ! @@ac_regrexp = /[OPQ][0-9][A-Z0-9]{3}[0-9]/ ! # DT Line; date (3/entry) ! # DT DD-MMM-YYY (rel. NN, Created) ! # DT DD-MMM-YYY (rel. NN, Last sequence update) ! # DT DD-MMM-YYY (rel. NN, Last annotation update) # ! # Bio::SPTR#dt -> Hash ! # Bio::SPTR#dt(key) -> String ! # key = (created|sequence|annotation) def dt(key = nil) unless @data['DT'] --- 106,128 ---- ! # Bio::EMBL::Common#ac -> ary ! # #accessions -> ary ! # #accession -> String (accessions.first) @@ac_regrexp = /[OPQ][0-9][A-Z0-9]{3}[0-9]/ ! ! # returns a Hash of information in the DT lines. ! # hash keys: ! # ['created', 'sequence', 'annotation'] ! # also Symbols acceptable (ASAP): ! # [:created, :sequence, :annotation] # ! # returns a String of information in the DT lines by a given key.. ! # ! # DT Line; date (3/entry) ! # DT DD-MMM-YYY (rel. NN, Created) ! # DT DD-MMM-YYY (rel. NN, Last sequence update) ! # DT DD-MMM-YYY (rel. NN, Last annotation update) def dt(key = nil) unless @data['DT'] *************** *** 130,141 **** # DE Line; description (>=1) ! # "DE #{OFFICIAL_NAME} (#{SYNONYM})" ! # "DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]." ! # OFFICIAL_NAME 1/entry ! # SYNONYM >=0 ! # CONTEINS >=0 ! # ! # Returns the proposed official name of the protein def protein_name name = "" --- 143,154 ---- + # returns the proposed official name of the protein. + # # DE Line; description (>=1) ! # "DE #{OFFICIAL_NAME} (#{SYNONYM})" ! # "DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]." ! # OFFICIAL_NAME 1/entry ! # SYNONYM >=0 ! # CONTEINS >=0 def protein_name name = "" *************** *** 147,152 **** return name end ! # synonyms are each placed in () following the official name on the DE line ! # Returns an array of synonyms (unofficial names) def synonyms ary = Array.new --- 160,168 ---- return name end ! ! ! # returns an array of synonyms (unofficial names). ! # ! # synonyms are each placed in () following the official name on the DE line. def synonyms ary = Array.new *************** *** 163,166 **** --- 179,197 ---- + # returns gene names in the GN line. + # + # New UniProt/SwissProt format: + # * Bio::SPTR#gn -> [ * ] + # where is: + # { :name => '...', + # :synonyms => [ 's1', 's2', ... ], + # :loci => [ 'l1', 'l2', ... ], + # :orfs => [ 'o1', 'o2', ... ] + # } + # + # Old format: + # * Bio::SPTR#gn -> Array # AND + # * Bio::SPTR#gn[0] -> Array # OR + # # GN Line: Gene name(s) (>=0, optional) def gn *************** *** 176,189 **** # GN Line: Gene name(s) (>=0, optional) ! # GN HNS OR DRDX OR OSMZ OR BGLY. ! # GN CECA1 AND CECA2. ! # GN CECA1 AND (HOGE OR FUGA). # ! # GN NAME1 [(AND|OR) NAME]+. # # Bio::SPTR#gn -> Array # AND # #gn[0] -> Array # OR # #gene_names -> Array - # def gn_old_parser names = Array.new --- 207,219 ---- # GN Line: Gene name(s) (>=0, optional) ! # GN HNS OR DRDX OR OSMZ OR BGLY. ! # GN CECA1 AND CECA2. ! # GN CECA1 AND (HOGE OR FUGA). # ! # GN NAME1 [(AND|OR) NAME]+. # # Bio::SPTR#gn -> Array # AND # #gn[0] -> Array # OR # #gene_names -> Array def gn_old_parser names = Array.new *************** *** 202,212 **** # The new format of the GN line is: ! # GN Name=; Synonyms=[, ...]; OrderedLocusNames=[, ...]; ! # GN ORFNames=[, ...]; ! # ! # GN and # ! # Bio::SPTR#gn -> [ * ] ! # where is: # { :name => '...', # :synonyms => [ 's1', 's2', ... ], --- 232,240 ---- # The new format of the GN line is: ! # GN Name=; Synonyms=[, ...]; OrderedLocusNames=[, ...]; ! # GN ORFNames=[, ...]; # ! # * Bio::SPTR#gn -> [ * ] ! # where is: # { :name => '...', # :synonyms => [ 's1', 's2', ... ], *************** *** 239,243 **** ! # Bio::SPTR#gene_names -> [String] def gene_names gn # set @data['GN'] if it hasn't been already done --- 267,271 ---- ! # returns a Array of gene names in the GN line. def gene_names gn # set @data['GN'] if it hasn't been already done *************** *** 250,255 **** ! # Bio::SPTR#gene_name -> String ! # def gene_name gene_names.first --- 278,282 ---- ! # returns a String of the first gene name in the GN line. def gene_name gene_names.first *************** *** 257,276 **** # OS Line; organism species (>=1) ! # "OS Genus species (name)." ! # "OS Genus species (name0) (name1)." ! # "OS Genus species (name0) (name1)." ! # "OS Genus species (name0), G s0 (name0), and G s (name0) (name1)." ! # "OS Homo sapiens (Human), and Rarrus norveticus (Rat)" ! # ! # Bio::EMBLDB#os -> Array of Hash ! # [{'name'=>'(Human)', 'os'=>'Homo sapiens'}, ! # {'name'=>'(Rat)', 'os'=>'Rattus norveticus'}] ! # Bio::SPTR#os[0]['name'] => "(Human)" ! # Bio::EPTR#os[0] => {'name'=>"(Human)", 'os'=>'Homo sapiens'} ! # Bio::EPTR#os(0) => "Homo sapiens (Human)" ! # ! # Bio::SPTR#os -> Array of Hash ! # Bio::SPTR#os(num) -> String def os(num = nil) unless @data['OS'] --- 284,302 ---- + # returns a Array of Hashs or a String of the OS line when a key given. + # * Bio::EMBLDB#os -> Array + # [{'name' => '(Human)', 'os' => 'Homo sapiens'}, + # {'name' => '(Rat)', 'os' => 'Rattus norveticus'}] + # * Bio::EPTR#os[0] -> Hash + # {'name' => "(Human)", 'os' => 'Homo sapiens'} + # * Bio::SPTR#os[0]['name'] -> "(Human)" + # * Bio::EPTR#os(0) -> "Homo sapiens (Human)" + # # OS Line; organism species (>=1) ! # OS Genus species (name). ! # OS Genus species (name0) (name1). ! # OS Genus species (name0) (name1). ! # OS Genus species (name0), G s0 (name0), and G s (name0) (name1). ! # OS Homo sapiens (Human), and Rarrus norveticus (Rat) def os(num = nil) unless @data['OS'] *************** *** 297,318 **** # OG Line; organella (0 or 1/entry) # ["MITOCHONDRION", "CHLOROPLAST", "Cyanelle", "Plasmid"] # or a plasmid name (e.g. "Plasmid pBR322"). - # - # Bio::SPTR#og -> Array # OC Line; organism classification (>=1) ! # OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae; ! # OC Theileria. ! # ! # Bio::EMBLDB#oc -> Array ! # OX Line; organism taxonomy cross-reference (>=1 per entry) ! # OX NCBI_TaxID=1234; ! # OX NCBI_TaxID=1234, 2345, 3456, 4567; # ! # Bio::SPTR#ox -> {'NCBI_TaxID' => ['1234','2345','3456','4567']} def ox unless @data['OX'] --- 323,346 ---- + # Bio::EMBL::Common#og -> Array # OG Line; organella (0 or 1/entry) # ["MITOCHONDRION", "CHLOROPLAST", "Cyanelle", "Plasmid"] # or a plasmid name (e.g. "Plasmid pBR322"). + # Bio::EMBL::Common#oc -> Array # OC Line; organism classification (>=1) ! # "OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;" ! # "OC Theileria." ! ! ! # returns a Hash of oraganism taxonomy cross-references. ! # * Bio::SPTR#ox -> Hash ! # {'NCBI_TaxID' => ['1234','2345','3456','4567'], ...} # ! # OX Line; organism taxonomy cross-reference (>=1 per entry) ! # OX NCBI_TaxID=1234; ! # OX NCBI_TaxID=1234, 2345, 3456, 4567; def ox unless @data['OX'] *************** *** 328,345 **** end ! # R Lines # RN RC RP RX RA RT RL - # Bio::EMBLDB#ref -> Array - # CC lines (>=0, optional) - # CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT - # CC IN LIVER, KIDNEY, LUNG AND BRAIN. - # - # CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK; - # CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK. - # - # CC -!- CAUTION: HOGE HOGE IS FUGA FUGA! - # @@cc_topics = ['ALTERNATIVE PRODUCTS','CATALYTIC ACTIVITY','CAUTION', --- 356,364 ---- end ! ! # Bio::EMBL::Common#ref -> Array # R Lines # RN RC RP RX RA RT RL @@cc_topics = ['ALTERNATIVE PRODUCTS','CATALYTIC ACTIVITY','CAUTION', *************** *** 348,373 **** 'MISCELLANEOUS','PATHWAY','PHARMACEUTICAL','POLYMORPHISM','PTM', 'SIMILARITY','SUBCELLULAR LOCATION','SUBUNIT','TISSUE SPECIFICITY'] ! # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"]. ! # MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX]. # ! # Bio::SPTR#cc -> Hash w/in Array ! # Bio::SPTR#cc(Int) -> String ! # Bio::SPTR#cc(TOPIC) -> Array w/in Hash ! # Bio::SPTR#cc('ALTERNATIVE PRODUCTS') -> {'Event'=>str, ! # 'Named isoforms'=>int, ! # 'Comment'=>str, ! # 'Variants'=>[{'Name'=>str, ! # 'Synonyms'=>str, ! # 'IsoId'=>str, ! # 'Sequence'=>[]}]} ! # Bio::SPTR#cc('DATABASE') -> [{'NAME'=>str,'NOTE'=>str, ! # 'WWW'=>URI,'FTP'=>URI}] ! # Bio::SPTR#cc('MASS SPECTROMETRY') -> [{'MW"=>float,'MW_ERR'=>float, ! # 'METHOD'=>str,'RANGE'=>str}] # def cc(tag = nil) - - # @data['CC'] = {'DATABASE'=>['hoge','fuga'], ... } unless @data['CC'] cc = Hash.new --- 367,410 ---- 'MISCELLANEOUS','PATHWAY','PHARMACEUTICAL','POLYMORPHISM','PTM', 'SIMILARITY','SUBCELLULAR LOCATION','SUBUNIT','TISSUE SPECIFICITY'] + # returns contents in the CC lines. + # * Bio::SPTR#cc -> Hash ! # * Bio::SPTR#cc(Int) -> String ! # returns an Array of contents in the TOPIC string. ! # * Bio::SPTR#cc(TOPIC) -> Array w/in Hash, Hash # ! # returns contents of the "ALTERNATIVE PRODUCTS". ! # * Bio::SPTR#cc('ALTERNATIVE PRODUCTS') -> Hash ! # {'Event' => str, ! # 'Named isoforms' => int, ! # 'Comment' => str, ! # 'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]} ! # ! # CC -!- ALTERNATIVE PRODUCTS: ! # CC Event=Alternative splicing; Named isoforms=15; ! # ... ! # CC placentae isoforms. All tissues differentially splice exon 13; ! # CC Name=A; Synonyms=no del; ! # CC IsoId=P15529-1; Sequence=Displayed; # + # returns contents of the "DATABASE". + # * Bio::SPTR#cc('DATABASE') -> Array + # [{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...] + # + # CC -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"]. + # + # returns contents of the "MASS SPECTROMETRY". + # * Bio::SPTR#cc('MASS SPECTROMETRY') -> Array + # [{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...] + # + # MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX]. + # + # CC lines (>=0, optional) + # CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT + # CC IN LIVER, KIDNEY, LUNG AND BRAIN. + # + # CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK; + # CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK. def cc(tag = nil) unless @data['CC'] cc = Hash.new *************** *** 405,409 **** end - case tag when 'ALTERNATIVE PRODUCTS' --- 442,445 ---- *************** *** 411,421 **** return ap unless ap - # CC -!- ALTERNATIVE PRODUCTS: - # CC Event=Alternative splicing; Named isoforms=15; - # ... - # CC placentae isoforms. All tissues differentially splice exon 13; - # CC Name=A; Synonyms=no del; - # CC IsoId=P15529-1; Sequence=Displayed; - # Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+ tmp = {'Event' => nil, 'Named isoforms' => nil, 'Comment' => nil, 'Variants' => []} --- 447,450 ---- *************** *** 508,516 **** # DR Line; defabases cross-reference (>=0) # a cross_ref pre one line ! # "DR database_identifier; primary_identifier; secondary_identifier." ! # Bio::EMBLDB#dr -> Hash w/in Array ! @@dr_database_identifier = ['EMBL','CARBBANK','DICTYDB','ECO2DBASE', 'ECOGENE', --- 537,546 ---- + # returns databases cross-references in the DR lines. + # * Bio::EMBLDB#dr -> Hash w/in Array + # # DR Line; defabases cross-reference (>=0) # a cross_ref pre one line ! # DR database_identifier; primary_identifier; secondary_identifier. @@dr_database_identifier = ['EMBL','CARBBANK','DICTYDB','ECO2DBASE', 'ECOGENE', *************** *** 520,544 **** 'SWISS-2DPAGE','TIGR','TRANSFAC','TUBERCULIST','WORMPEP','YEPD','ZFIN'] ! # KW Line; keyword (>=1) # KW [Keyword;]+ - # Bio::EMBLDB#kw -> Array - # #keywords -> Array ! # FT Line; feature table data (>=0, optional) # ! # Col Data item ! # ----- ----------------- ! # 1- 2 FT ! # 6-13 Feature name ! # 15-20 `FROM' endpoint ! # 22-27 `TO' endpoint ! # 35-75 Description (>=0 per key) ! # ----- ----------------- # ! # Bio::SPTR#ft -> {'feature_name'=>[{'From'=>str,'To'=>str, ! # 'Description'=>str, 'FTId'=>str}],} ! # Bio::SPTR#ft(feature_name) -> [{'From'=>str,'To'=>str, ! # 'Description'=>str, 'FTId'=>str},...] def ft(feature_name = nil) unless @data['FT'] --- 550,579 ---- 'SWISS-2DPAGE','TIGR','TRANSFAC','TUBERCULIST','WORMPEP','YEPD','ZFIN'] ! # Bio::EMBL::Common#kw - Array ! # #keywords -> Array ! # # KW Line; keyword (>=1) # KW [Keyword;]+ ! ! # returns conteins in the feature table. ! # * Bio::SPTR#ft -> Hash ! # {'feature_name' => [{'From' => str, 'To' => str, ! # 'Description' => str, 'FTId' => str}],...} # ! # returns an Array of the information about the feature_name in the feature table. ! # * Bio::SPTR#ft(feature_name) -> Array of Hash ! # [{'From' => str, 'To' => str, 'Description' => str, 'FTId' => str},...] # ! # FT Line; feature table data (>=0, optional) ! # ! # Col Data item ! # ----- ----------------- ! # 1- 2 FT ! # 6-13 Feature name ! # 15-20 `FROM' endpoint ! # 22-27 `TO' endpoint ! # 35-75 Description (>=0 per key) ! # ----- ----------------- def ft(feature_name = nil) unless @data['FT'] *************** *** 635,648 **** ! # SQ Line; sequence header (1/entry) ! # SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64; ! # SQ SEQUENCE \d+ AA; \d+ MW; [0-9A-Z]+ CRC64; # ! # MW, Dalton unit ! # CRC64 (64-bit Cyclic Redundancy Check, ISO 3309) # ! # Bio::SPTRL#sq -> hsh ! # Bio::SPTRL#sq(key) -> int or str # def sq(key = nil) unless @data['SQ'] --- 670,686 ---- ! # returns a Hash of conteins in the SQ lines. ! # * Bio::SPTRL#sq -> hsh # ! # returns a value of a key given in the SQ lines. ! # * Bio::SPTRL#sq(key) -> int or str ! # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length', 'CRC64'] # ! # SQ Line; sequence header (1/entry) ! # SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64; ! # SQ SEQUENCE \d+ AA; \d+ MW; [0-9A-Z]+ CRC64; # + # MW, Dalton unit. + # CRC64 (64-bit Cyclic Redundancy Check, ISO 3309). def sq(key = nil) unless @data['SQ'] *************** *** 668,675 **** end ! # @orig[''] as sequence ! # blank Line; sequence data (>=1) ! # Bio::SPTR#seq -> Bio::Sequence::AA # def seq unless @data[''] --- 706,714 ---- end ! ! # returns a Bio::Sequence::AA of the amino acid sequence. ! # * Bio::SPTR#seq -> Bio::Sequence::AA # + # blank Line; sequence data (>=1) def seq unless @data[''] *************** *** 680,686 **** alias aaseq seq ! end ! end --- 719,725 ---- alias aaseq seq ! end # class SPTR ! end # module Bio From nakao at pub.open-bio.org Sun Oct 23 05:25:19 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:09:57 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db/embl common.rb,1.4,1.5 Message-ID: <200510230925.j9N9PJdZ018801@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv18789/lib/bio/db/embl Modified Files: common.rb Log Message: * Chaged to rdoc format. * Bio::EMBL::Common changed to Bio::EMBLDB::Common. Index: common.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/common.rb,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** common.rb 26 Sep 2005 13:00:06 -0000 1.4 --- common.rb 23 Oct 2005 09:25:16 -0000 1.5 *************** *** 1,6 **** # ! # bio/db/embl.rb - Common methods for EMBL style database classes # ! # Copyright (C) 2001-2004 Mitsuteru C. Nakao # # This library is free software; you can redistribute it and/or --- 1,54 ---- # ! # = bio/db/embl.rb - Common methods for EMBL style database classes ! # ! # Author:: Mitsuteru C. Nakao ! # Copyright:: Copyright (C) 2001-2005 BioRuby Project ! # License:: LGPL ! # ! # $Id$ ! # ! # == EMBL style databases class ! # ! # This module defines a common framework among EMBL, SWISS-PROT, TrEMBL. ! # For more details, see the documentations in each embl/*.rb libraries. ! # ! # EMBL style format: ! # ID - identification (begins each entry; 1 per entry) ! # AC - accession number (>=1 per entry) ! # SV - sequence version (1 per entry) ! # DT - date (2 per entry) ! # DE - description (>=1 per entry) ! # KW - keyword (>=1 per entry) ! # OS - organism species (>=1 per entry) ! # OC - organism classification (>=1 per entry) ! # OG - organelle (0 or 1 per entry) ! # RN - reference number (>=1 per entry) ! # RC - reference comment (>=0 per entry) ! # RP - reference positions (>=1 per entry) ! # RX - reference cross-reference (>=0 per entry) ! # RA - reference author(s) (>=1 per entry) ! # RG - reference group (>=0 per entry) ! # RT - reference title (>=1 per entry) ! # RL - reference location (>=1 per entry) ! # DR - database cross-reference (>=0 per entry) ! # FH - feature table header (0 or 2 per entry) ! # FT - feature table data (>=0 per entry) ! # CC - comments or notes (>=0 per entry) ! # XX - spacer line (many per entry) ! # SQ - sequence header (1 per entry) ! # bb - (blanks) sequence data (>=1 per entry) ! # // - termination line (ends each entry; 1 per entry) # ! # ! # == Example ! # ! # require 'bio/db/embl/common' ! # module Bio ! # class NEWDB < EMBLDB ! # include Bio::EMBLDB::Common ! # end ! # end ! # ! #-- # # This library is free software; you can redistribute it and/or *************** *** 18,28 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # require 'bio/db' module Bio ! class EMBL < EMBLDB module Common --- 66,77 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! #++ # require 'bio/db' + require 'bio/reference' module Bio ! class EMBLDB module Common *************** *** 34,48 **** end ! # AC Line ! # "AC A12345; B23456;" ! # AC [AC1;]+ # # Accession numbers format: ! # 1 2 3 4 5 6 ! # [O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9] ! # ! # Bio::EMBL::Common#ac -> Array ! # #accessions -> Array def ac unless @data['AC'] --- 83,95 ---- end ! # returns a Array of accession numbers in the AC lines. ! # # AC Line ! # "AC A12345; B23456;" ! # AC [AC1;]+ # # Accession numbers format: ! # 1 2 3 4 5 6 ! # [O,P,Q] [0-9] [A-Z, 0-9] [A-Z, 0-9] [A-Z, 0-9] [0-9] def ac unless @data['AC'] *************** *** 57,61 **** alias accessions ac ! # Bio::EMBL::Common#accession -> String def accession ac[0] --- 104,109 ---- alias accessions ac ! ! # returns the first accession number in the AC lines def accession ac[0] *************** *** 63,67 **** ! # DE Line: def de unless @data['DE'] --- 111,117 ---- ! # returns a String int the DE line. ! # ! # DE Line def de unless @data['DE'] *************** *** 71,96 **** end alias description de ! # API ! alias definition de ! # OS Line; organism species (>=1) ! # "OS Trifolium repens (white clover)" ! # ! # OS Genus species (name). ! # OS Genus species (name0) (name1). ! # OS Genus species (name0) (name1). ! # OS Genus species (name0), G s0 (name0), and G s (name1). # ! # Bio::EMBL#os -> Array w/in Hash ! # [{'name'=>'Human', 'os'=>'Homo sapiens'}, ! # {'name'=>'Rat', 'os'=>'Rattus norveticus'}] ! # Bio::STPR#os[0]['name'] => "Human" ! # Bio::STPR#os[0] => {'name'=>"Human", 'os'=>'Homo sapiens'} ! # Bio::STPR#os(0) => "Homo sapiens (Human)" # ! # Bio::SPTR#os -> Array w/in Hash ! # Bio::SPTR#os(num) -> String def os(num = nil) unless @data['OS'] --- 121,144 ---- end alias description de ! alias definition de # API ! # returns contents in the OS line. ! # * Bio::EMBLDB#os -> Array of ! # where is: ! # [{'name'=>'Human', 'os'=>'Homo sapiens'}, ! # {'name'=>'Rat', 'os'=>'Rattus norveticus'}] ! # * Bio::SPTR#os[0]['name'] => "Human" ! # * Bio::SPTR#os[0] => {'name'=>"Human", 'os'=>'Homo sapiens'} ! # * Bio::STPR#os(0) => "Homo sapiens (Human)" # ! # OS Line; organism species (>=1) ! # "OS Trifolium repens (white clover)" # ! # OS Genus species (name). ! # OS Genus species (name0) (name1). ! # OS Genus species (name0) (name1). ! # OS Genus species (name0), G s0 (name0), and G s (name1). def os(num = nil) unless @data['OS'] *************** *** 115,121 **** ! # OG Line; organella (0 or 1/entry) # ! # Bio::EMBL::Common#og -> Array def og unless @data['OG'] --- 163,170 ---- ! # returns contents in the OG line. ! # * Bio::EMBLDB::Common#og -> [ * ] # ! # OG Line; organella (0 or 1/entry) def og unless @data['OG'] *************** *** 130,140 **** @data['OG'] end ! # OC Line; organism classification (>=1) ! # OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae; ! # OC Theileria. ! # ! # Bio::EMBL::Common#oc -> Array def oc unless @data['OC'] --- 179,189 ---- @data['OG'] end + ! # returns contents in the OC line. ! # * Bio::EMBLDB::Common#oc -> [ * ] # OC Line; organism classification (>=1) ! # OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae; ! # OC Theileria. def oc unless @data['OC'] *************** *** 150,158 **** end ! # KW Line; keyword (>=1) ! # KW [Keyword;]+ ! # Bio::EMBL::Common#kw -> Array ! # #keywords -> Array def kw unless @data['KW'] --- 199,206 ---- end ! # returns keywords in the KW line. ! # * Bio::EMBLDB::Common#kw -> [ * ] # KW Line; keyword (>=1) ! # KW [Keyword;]+ def kw unless @data['KW'] *************** *** 169,175 **** # R Lines ! # RN RC RP RX RA RT RL RG ! # Bio::EMBL::Common#ref -> Array def ref unless @data['R'] --- 217,228 ---- + # returns contents in the R lines. + # * Bio::EMBLDB::Common#ref -> [ * ] + # where is: + # {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', + # 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''} + # # R Lines ! # * RN RC RP RX RA RT RL RG def ref unless @data['R'] *************** *** 199,203 **** end ! # Bio::EMBL::Common#references -> Bio::References def references unless @data['references'] --- 252,257 ---- end ! # returns Bio::Reference object from Bio::EMBLDB::Common#ref. ! # * Bio::EMBLDB::Common#ref -> Bio::References def references unless @data['references'] *************** *** 235,244 **** ! # DR Line; defabases cross-reference (>=0) # a cross_ref pre one line ! # "DR database_identifier; primary_identifier; secondary_identifier." ! # Bio::EMBL::Common#dr -> Hash w/in Array ! # Bio::EMBL::Common#dr {|k,v| } def dr unless @data['DR'] --- 289,300 ---- ! # returns contents in the DR line. ! # * Bio::EMBLDB::Common#dr -> [ * ] ! # where is: ! # * Bio::EMBLDB::Common#dr {|k,v| } ! # # DR Line; defabases cross-reference (>=0) # a cross_ref pre one line ! # "DR database_identifier; primary_identifier; secondary_identifier." def dr unless @data['DR'] *************** *** 262,342 **** end # module Common ! end # class EMBL end # module Bio - - - # ID - identification (begins each entry; 1 per entry) - # AC - accession number (>=1 per entry) - # SV - sequence version (1 per entry) - # DT - date (2 per entry) - # DE - description (>=1 per entry) - # KW - keyword (>=1 per entry) - # OS - organism species (>=1 per entry) - # OC - organism classification (>=1 per entry) - # OG - organelle (0 or 1 per entry) - # RN - reference number (>=1 per entry) - # RC - reference comment (>=0 per entry) - # RP - reference positions (>=1 per entry) - # RX - reference cross-reference (>=0 per entry) - # RA - reference author(s) (>=1 per entry) - # RG - reference group (>=0 per entry) - # RT - reference title (>=1 per entry) - # RL - reference location (>=1 per entry) - # DR - database cross-reference (>=0 per entry) - # FH - feature table header (0 or 2 per entry) - # FT - feature table data (>=0 per entry) - # CC - comments or notes (>=0 per entry) - # XX - spacer line (many per entry) - # SQ - sequence header (1 per entry) - # bb - (blanks) sequence data (>=1 per entry) - # // - termination line (ends each entry; 1 per entry) - - - =begin - - = Bio::EMBL::Common - - This module defines a common framework among EMBL, SWISS-PROT, TrEMBL. - For more details, see the documentations in each embl/*.rb libraries. - - - --- Bio::EMBL::Common::DELIMITER - --- Bio::EMBL::Common::RS - --- Bio::EMBL::Common::TAGSIZE - - - --- Bio::EMBL::Common#ac - #accessions - - --- Bio::EMBL::Common#accession - - - --- Bio::EMBL::Common#de - #description - #definition - - - --- Bio::EMBL::Common#os - - - --- Bio::EMBL::Common#og - - - --- Bio::EMBL::Common#oc - - - --- Bio::EMBL::Common#kw - #keywords - - - --- Bio::EMBL::Common#ref - Reterns R* lines in hsh w/in ary. - - --- Bio::EMBL::Common#references - Retruns Bio::References. - - - --- Bio::EMBL::Common#dr - - =end --- 318,322 ---- end # module Common ! end # class EMBLDB end # module Bio From nakao at pub.open-bio.org Sun Oct 23 05:59:50 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:09:59 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db/embl embl.rb,1.21,1.22 Message-ID: <200510230959.j9N9xodZ018990@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv18980/bio/db/embl Modified Files: embl.rb Log Message: * Changed to RDocc format. * Bio::EMBL::Common changed to Bio::EMBLDB::Common. Index: embl.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/embl.rb,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** embl.rb 24 Sep 2005 01:15:56 -0000 1.21 --- embl.rb 23 Oct 2005 09:59:48 -0000 1.22 *************** *** 1,6 **** # ! # bio/db/embl/embl.rb - EMBL database class # ! # Copyright (C) 2001, 2002 Mitsuteru C. Nakao # # This library is free software; you can redistribute it and/or --- 1,26 ---- # ! # = bio/db/embl/embl.rb - EMBL database class # ! # ! # Author:: Mitsuteru C. Nakao ! # Copyright:: Copyright (C) 2001-2005 BioRuby Project ! # License:: LGPL ! # ! # $Id$ ! # ! # == EMBL database entry ! # ! # ! # ! # == Example ! # ! # emb = Bio::EMBL.new($<.read) ! # emb.entry_id ! # emb.each_cds do |cds| ! # cds ! # end ! # emb.seq ! # ! #-- # # This library is free software; you can redistribute it and/or *************** *** 18,22 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # --- 38,42 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! #++ # *************** *** 25,34 **** module Bio ! class EMBL ! include Bio::EMBL::Common ! ## # ID Line ! # "ID ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP." # # DATA_CLASS = ['standard'] --- 45,59 ---- module Bio ! class EMBL < EMBLDB ! include Bio::EMBLDB::Common ! # returns contents in the ID line. ! # * Bio::EMBL#id_line -> ! # where is: ! # {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String, ! # 'SEQUENCE_LENGTH' => Int} ! # # ID Line ! # "ID ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP." # # DATA_CLASS = ['standard'] *************** *** 59,75 **** unless @data['ID'] tmp = Hash.new ! idline = @orig['ID'].split(/ +/) ! tmp['ENTRY_NAME'] = idline[1] ! tmp['DATA_CLASS'] = idline[2].sub(/;/,'') ! tmp['MOLECULE_TYPE'] = idline[3].sub(/;/,'') # "cyclic DNA" ! tmp['DVISION'] = idline[4].sub(/;/,'') ! tmp['SEQUENCE_LENGTH'] = idline[5].to_i @data['ID'] = tmp end ! if block_given? ! @data['ID'].each do |k,v| ! yield(k,v) ! end ! elsif key @data['ID'][key] else --- 84,97 ---- unless @data['ID'] tmp = Hash.new ! idline = fetch('ID').split(/; +/) ! tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline[0].split(/ +/) ! tmp['MOLECULE_TYPE'] = idline[1] ! tmp['DIVISION'] = idline[2] ! tmp['SEQUENCE_LENGTH'] = idline[3].strip.split(' ').first.to_i ! @data['ID'] = tmp end ! ! if key @data['ID'][key] else *************** *** 78,84 **** end ! ## ! # Bio::EMBL#entry -> String ! # #entry_name -> String def entry id_line('ENTRY_NAME') --- 100,105 ---- end ! # returns ENTRY_NAME in the ID line. ! # * Bio::EMBL#entry -> String def entry id_line('ENTRY_NAME') *************** *** 87,93 **** alias entry_id entry ! ## ! # Bio::EMBL#molecule -> String ! # def molecule id_line('MOLECULE_TYPE') --- 108,113 ---- alias entry_id entry ! # returns MOLECULE_TYPE in the ID line. ! # * Bio::EMBL#molecule -> String def molecule id_line('MOLECULE_TYPE') *************** *** 95,108 **** alias molecule_type molecule ! ## ! # Bio::EMBL#division -> String ! # def division id_line('DIVISION') end ! ## ! # Bio::EMBL#sequencelength -> String ! # def sequence_length id_line('SEQUENCE_LENGTH') --- 115,126 ---- alias molecule_type molecule ! # returns DIVISION in the ID line. ! # * Bio::EMBL#division -> String def division id_line('DIVISION') end ! # returns SEQUENCE_LENGTH in the ID line. ! # * Bio::EMBL#sequencelength -> String def sequence_length id_line('SEQUENCE_LENGTH') *************** *** 110,128 **** alias seqlen sequence_length ! ## # AC Line # "AC A12345; B23456;" - # - # Bio::EMBLDB#ac -> Array - # #accessions -> Array ! ## ! # SV Line; sequence version (1/entry) ! # "SV Accession.Version" ! # ! # Bio::EMBL#sv -> String ! # Bio::EMBL#version -> Int # def sv field_fetch('SV').sub(/;/,'') --- 128,142 ---- alias seqlen sequence_length ! # AC Line # "AC A12345; B23456;" ! # returns the version information in the sequence version (SV) line. ! # * Bio::EMBL#sv -> Accession.Version in String ! # * Bio::EMBL#version -> accession in Int # + # SV Line; sequence version (1/entry) + # SV Accession.Version def sv field_fetch('SV').sub(/;/,'') *************** *** 131,141 **** sv.split(".")[1].to_i end ! ## ! # DT Line; date (2/entry) ! # Bio::EMBL#dt -> Hash ! # Bio::EMBL#dt(key) -> String ! # key = (created|updated) # def dt(key=nil) unless @data['DT'] --- 145,158 ---- sv.split(".")[1].to_i end + ! # returns contents in the date (DT) line. ! # * Bio::EMBL#dt ->
    ! # where
    is: ! # {} ! # * Bio::EMBL#dt(key) -> String ! # keys: 'created' and 'updated' # + # DT Line; date (2/entry) def dt(key=nil) unless @data['DT'] *************** *** 146,154 **** @data['DT'] = tmp end ! if block_given? ! @data['DT'].each do |k,v| ! yield(k,v) ! end ! elsif key @data['DT'][key] else --- 163,167 ---- @data['DT'] = tmp end ! if key @data['DT'][key] else *************** *** 208,223 **** ! ## ! # FH Line; feature table header (0 or 2) ! # FT Line; feature table data (>=0) ! # ! # Bio::EMBL#ft -> Array ! # Bio::EMBL#ft {} -> {|Hash| } ! # Bio::EMBL#ft(Int) -> Hash # def fh ! get('FH') end # same as features method in bio/db/genbank.rb def ft(num = nil) unless @data['FT'] --- 221,239 ---- ! # returns feature table header (String) in the feature header (FH) line. # + # FH Line; feature table header (0 or 2) def fh ! fetch('FH') end + + # returns contents in the feature table (FT) lines. + # * Bio::EMBL#ft -> [ * ] + # * Bio::EMBL#ft {} -> {|| } + # * Bio::EMBL#ft(Int) -> Hash + # # same as features method in bio/db/genbank.rb + # + # FT Line; feature table data (>=0) def ft(num = nil) unless @data['FT'] *************** *** 255,260 **** end if block_given? ! @data['FT'].each do |f| ! yield f end else --- 271,276 ---- end if block_given? ! @data['FT'].each do |feature_table| ! yield feature_table end else *************** *** 264,279 **** alias features ft def each_cds ! ft.each do |feature| ! if feature.feature == 'CDS' ! yield feature end end end def each_gene ! ft.each do |feature| ! if feature.feature == 'gene' ! yield feature end end --- 280,297 ---- alias features ft + # iterates on CDS features in the FT lines. def each_cds ! ft.each do |cds_feature| ! if cds_feature.feature == 'CDS' ! yield cds_feature end end end + # iterates on gene features in the FT lines. def each_gene ! ft.each do |gene_feature| ! if gene_feature.feature == 'gene' ! yield gene_feature end end *************** *** 281,287 **** ! ## ! # CC Line; comments of notes (>=0) # def cc get('CC') --- 299,305 ---- ! # returns comment text in the comments (CC) line. # + # CC Line; comments of notes (>=0) def cc get('CC') *************** *** 295,319 **** ! ## ! # SQ Line; sequence header (1/entry) ! # "SQ Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;" ! # Bio::EMBL#sq -> Hash ! # Bio::EMBL#sq(base) -> Int ! # #sq[base] -> Int # def sq(base = nil) unless @data['SQ'] fetch('SQ') =~ \ /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/ ! @data['SQ']={'ntlen'=>$1.to_i, 'other'=>$6.to_i, ! 'a'=>$2.to_i,'c'=>$3.to_i,'g'=>$4.to_i,'t'=>$5.to_i} else @data['SQ'] end ! if block_given? ! @data['SQ'].each do |k,v| ! yield(k,v) ! end ! elsif base @data['SQ'][base.downcase] else --- 313,337 ---- ! # returns sequence header information in the sequence header (SQ) line. ! # * Bio::EMBL#sq -> ! # where is: ! # {'ntlen' => Int, 'other' => Int, ! # 'a' => Int, 'c' => Int, 'g' => Int, 't' => Int} ! # * Bio::EMBL#sq(base) -> ! # * Bio::EMBL#sq[base] -> # + # SQ Line; sequence header (1/entry) + # SQ Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other; def sq(base = nil) unless @data['SQ'] fetch('SQ') =~ \ /(\d+) BP\; (\d+) A; (\d+) C; (\d+) G; (\d+) T; (\d+) other;/ ! @data['SQ'] = {'ntlen' => $1.to_i, 'other' => $6.to_i, ! 'a' => $2.to_i, 'c' => $3.to_i , 'g' => $4.to_i, 't' => $5.to_i} else @data['SQ'] end ! ! if base @data['SQ'][base.downcase] else *************** *** 321,335 **** end end - # Bio::EMBL#gc -> Float - def gc - ( sq('g') + sq('c') ) / sq('ntlen').to_f * 100 - end ! ## # @orig[''] as sequence # bb Line; (blanks) sequence data (>=1) - # Bio::EMBL#seq -> Bio::Sequence::NA - # def seq Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') ) --- 339,349 ---- end end ! # returns the nucleotie sequence in this entry. ! # * Bio::EMBL#seq -> Bio::Sequence::NA ! # # @orig[''] as sequence # bb Line; (blanks) sequence data (>=1) def seq Sequence::NA.new( fetch('').gsub(/ /,'').gsub(/\d+/,'') ) *************** *** 341,345 **** - ### private methods --- 355,358 ---- *************** *** 434,517 **** - =begin - - = Bio::EMBL - - === Initialize - - --- Bio::EMBL#new(an_embl_entry) - - === ID line (Identification) - - --- Bio::EMBL#id_line -> Hash - --- Bio::EMBL#id_line(key) -> String - - key = (entryname|molecule|division|sequencelength) - - --- Bio::EMBL#entry -> String - --- Bio::EMBL#entryname -> String - --- Bio::EMBL#molecule -> String - --- Bio::EMBL#division -> String - --- Bio::EMBL#sequencelength -> Int - - === AC lines (Accession number) - - --- Bio::EMBL#ac -> Array - - === SV line (Sequence version) - - --- Bio::EMBL#sv -> String - - === DT lines (Date) - - --- Bio::EMBL#dt -> Hash - --- Bio::EMBL#dt(key) -> String - - key = (created|updated) - - === DE lines (Description) - - --- Bio::EMBL#de -> String - - === KW lines (Keyword) - - --- Bio::EMBL#kw -> Array - - === OS lines (Organism species) - - --- Bio::EMBL#os -> Hash - - === OC lines (organism classification) - - --- Bio::EMBL#oc -> Array - - === OG line (Organella) - - --- Bio::EMBL#og -> String - - === RN RC RP RX RA RT RL lines (Reference) - - --- Bio::EMBL#ref -> String - - === DR lines (Database cross-reference) - - --- Bio::EMBL#dr -> Array - - === FH FT lines (Feature table header and data) - - --- Bio::EMBL#ft -> Bio::Features - --- Bio::EMBL#each_cds -> Array - --- Bio::EMBL#each_gene -> Array - - - === SQ Lines (Sequence header and data) - - --- Bio::EMBL#sq -> Hash - --- Bio::EMBL#sq(base) -> Int - - base = (a|c|g|t|u|other) - - --- Bio::EMBL#gc -> Float - --- Bio::EMBL#seq -> Bio::Sequece::NA - - =end --- 447,448 ---- From nakao at pub.open-bio.org Sun Oct 23 06:40:42 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:10:00 2005 Subject: [BioRuby-cvs] bioruby/test runner.rb,1.1,1.2 Message-ID: <200510231040.j9NAegdZ019177@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test In directory pub.open-bio.org:/tmp/cvs-serv19147/test Modified Files: runner.rb Log Message: * Fixed bioruby_libpath. Index: runner.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/runner.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** runner.rb 23 Sep 2005 11:02:50 -0000 1.1 --- runner.rb 23 Oct 2005 10:40:40 -0000 1.2 *************** *** 2,5 **** --- 2,9 ---- require 'test/unit' + require 'pathname' + + bioruby_libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'], 'lib')).cleanpath.to_s + $:.unshift(bioruby_libpath) unless $:.include?(bioruby_libpath) exit Test::Unit::AutoRunner.run(false, File.dirname($0)) From nakao at pub.open-bio.org Sun Oct 23 11:34:32 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:10:04 2005 Subject: [BioRuby-cvs] bioruby/lib bio.rb,1.55,1.56 Message-ID: <200510231534.j9NFYWVL019902@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib In directory pub.open-bio.org:/tmp/cvs-serv19892/lib Modified Files: bio.rb Log Message: * added autoload for Bio::ColorScheme. Index: bio.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio.rb,v retrieving revision 1.55 retrieving revision 1.56 diff -C2 -d -r1.55 -r1.56 *** bio.rb 9 Sep 2005 16:02:04 -0000 1.55 --- bio.rb 23 Oct 2005 15:34:30 -0000 1.56 *************** *** 254,257 **** --- 254,258 ---- autoload :SiRNA, 'bio/util/sirna' + autoload :ColorScheme, 'bio/util/color_scheme' end From ngoto at pub.open-bio.org Wed Oct 26 05:12:09 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Thu Oct 27 10:10:11 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/blast format0.rb,1.12,1.13 Message-ID: <200510260912.j9Q9C9VL032173@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/blast In directory pub.open-bio.org:/tmp/cvs-serv31454 Modified Files: format0.rb Log Message: * RDoc are added. * In Report class, * delegate_to_f0dbstat (private class method) is added, and 11 methods are changed to use it. * f0header, f0reference, f0query, f0database, f0dbstat methods are removed. (They were methods for debugging.) * in format0_split_stat_params, changed to use instance_eval (due to the removal of Iteration#f0stat and Iteration#f0dbstat methods). * In F0dbstat class, * method_after_parse_params and method_after_parse_dbstat (private class methods) are deleted and methods are changed not to use them. * AlwaysNil class (include Singleton) are added for internal use. Users must not use the class directly. * In Iteration class, * Default value of @f0dbstat is changed to AlwaysNil.instance. * f0message, f0hitlist, f0stat, f0dbstat methods are removed. (They were internal use only.) * method_delegate_f0dbstat (private class method) are changed to delegate_to_f0dbstat. * In Hit class, * f0hitname method is removed. (It was only for debugging.) * In HSP class, * f0score and f0alignment are removed. (They are methods for debugging.) Index: format0.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/blast/format0.rb,v retrieving revision 1.12 retrieving revision 1.13 diff -C2 -d -r1.12 -r1.13 *** format0.rb 26 Sep 2005 13:00:04 -0000 1.12 --- format0.rb 26 Oct 2005 09:12:06 -0000 1.13 *************** *** 1,6 **** # ! # bio/appl/blast/format0.rb - BLAST default output (-m 0) parser # ! # Copyright (C) 2003 GOTO Naohisa # # This library is free software; you can redistribute it and/or --- 1,8 ---- # ! # = bio/appl/blast/format0.rb - BLAST default output (-m 0) parser # [...1016 lines suppressed...] ! ! # start position of the hit (the first position is 1) ! attr_reader :hit_from if false #dummy ! method_after_parse_alignment :hit_from ! ! # end position of the hit (including its position) ! attr_reader :hit_to if false #dummy ! method_after_parse_alignment :hit_to ! end #class HSP end #class Report + # NCBI BLAST default (-m 0 option) output parser for TBLAST. + # All methods are equal to Bio::Blast::Default::Report. + # Only DELIMITER (and RS) is different. class Report_TBlast < Report + # Delimter of each entry for TBLAST. Bio::FlatFile uses it. DELIMITER = RS = "\nTBLAST" end #class Report_TBlast From nakao at pub.open-bio.org Thu Oct 27 05:27:54 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:10:12 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db test_gff.rb,NONE,1.1 Message-ID: <200510270927.j9R9RsVL004009@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db In directory pub.open-bio.org:/tmp/cvs-serv3999/test/unit/bio/db Added Files: test_gff.rb Log Message: * Initially imported unit test for Bio::GFF. --- NEW FILE: test_gff.rb --- # # test/unit/bio/db/test_gff.rb - Unit test for Bio::GFF # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_gff.rb,v 1.1 2005/10/27 09:27:52 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/db/gff' module Bio class TestGFF < Test::Unit::TestCase def setup data = <"Chromosome I Centromere", "Gene"=>"CEN1"} assert_equal(@obj.attributes, at) end def test_comments assert_equal(@obj.comments, '') end end # class TestGFFRecord class TestGFFRecordConstruct < Test::Unit::TestCase def setup @obj = Bio::GFF.new end def test_add_seqname name = "test" record = Bio::GFF::Record.new record.seqname = name @obj.records << record assert_equal(@obj.records[0].seqname, name) end end # class TestGFFRecordConstruct end From nakao at pub.open-bio.org Thu Oct 27 05:28:46 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:10:12 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db/embl test_sptr.rb, NONE, 1.1 test_uniprot.rb, NONE, 1.1 Message-ID: <200510270928.j9R9SkVL004026@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv4016/test/unit/bio/db/embl Added Files: test_sptr.rb test_uniprot.rb Log Message: * Initially imported unit test for Bio::SPTR. * Initially imported unit test for Bio::UniProt. --- NEW FILE: test_uniprot.rb --- # # test/unit/bio/db/embl/test_uniprot.rb - Unit test for Bio::UniProt # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_uniprot.rb,v 1.1 2005/10/27 09:28:43 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/db/embl/uniprot' module Bio class TestUniProt < Test::Unit::TestCase def setup bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s data = File.open(File.join(bioruby_root, 'test', 'data', 'uniprot', 'p53_human.uniprot')).read @obj = Bio::UniProt.new(data) end def test_id_line assert(@obj.id_line) end def test_id_line_entry_name assert_equal(@obj.id_line('ENTRY_NAME'), 'P53_HUMAN') end def test_id_line_data_class assert_equal(@obj.id_line('DATA_CLASS'), 'STANDARD') end def test_id_line_molecule_type assert_equal(@obj.id_line('MOLECULE_TYPE'), 'PRT') end def test_id_line_sequence_length assert_equal(@obj.id_line('SEQUENCE_LENGTH'), 393) end def test_ac assert_equal(@obj.ac, []) assert_equal(@obj.acccessions, []) end def test_accession assert_equal(@obj.accession, '') end def test_de assert(@obj.de) end def test_protein_name assert_equal(@obj.protein_name, "Cellular tumor antigen p53") end def test_synonyms assert_equal(@obj.synonyms, ["Tumor suppressor p53", "Phosphoprotein p53", "Antigen NY-CO-13"]) end def test_gn assert_equal(@obj.gn, [{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}]) end def test_gn_uniprot_parser gn_uniprot_data = '' assert_equal(@obj.instance_eval(gn_uniprot_parser(gn_uniprot_data)), '') end # def test_gn_old_parser # gn_old_data = '' # assert_equal(@obj.instance_eval(gn_old_parser(gn_old_data)), '') # end def test_gene_names assert_equal(@obj.gene_names, ["TP53"]) end def test_gene_name assert_equal(@obj.gene_name, 'TP53') end def test_os assert(@obj.os) end def test_os_access assert_equal(@obj.os(1), {'name' => '', 'os' => ''}) end def test_os_access2 assert_equal(@obj.os[1], {}) end def test_cc data = '' assert_equal(@obj.instance_eval(cc_scan_alternative_products(data)), '') data = '' assert_equal(@obj.instance_eval(cc_scan_database(data)), '') data = '' assert_equal(@obj.instance_eval(cc_scan_mass_spectorometry(data)), '') assert_equal(@obj.cc, []) end def test_cc_database assert_equal(@obj.cc('DATABASE'), []) end def test_cc_alternative_products assert_equal(@obj.cc('ALTERNATIVE PRODUCTS'), {}) end def test_cc_mass_spectrometry assert_equal(@obj.cc('MASS SPECTROMETRY'), []) end def test_cc_interaction data =< # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_sptr.rb,v 1.1 2005/10/27 09:28:43 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/db/embl/sptr' module Bio class TestSPTR < Test::Unit::TestCase def setup bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s data = File.open(File.join(bioruby_root, 'test', 'data', 'uniprot', 'p53_human.uniprot')).read @obj = Bio::SPTR.new(data) end def test_id_line assert(@obj.id_line) end def test_id_line_entry_name assert_equal(@obj.id_line('ENTRY_NAME'), 'P53_HUMAN') end def test_id_line_data_class assert_equal(@obj.id_line('DATA_CLASS'), 'STANDARD') end def test_id_line_molecule_type assert_equal(@obj.id_line('MOLECULE_TYPE'), 'PRT') end def test_id_line_sequence_length assert_equal(@obj.id_line('SEQUENCE_LENGTH'), 393) end def test_entry entry = 'P53_HUMAN' assert_equal(@obj.entry, entry) assert_equal(@obj.entry_name, entry) assert_equal(@obj.entry_id, entry) end def test_molecule assert_equal(@obj.molecule, 'PRT') assert_equal(@obj.molecule_type, 'PRT') end def test_sequence_length seqlen = 393 assert_equal(@obj.sequence_length, seqlen) assert_equal(@obj.aalen, seqlen) end def test_ac acs = ["P04637", "Q15086", "Q15087", "Q15088", "Q16535", "Q16807", "Q16808", "Q16809", "Q16810", "Q16811", "Q16848", "Q86UG1", "Q8J016", "Q99659", "Q9BTM4", "Q9HAQ8", "Q9NP68", "Q9NPJ2", "Q9NZD0", "Q9UBI2", "Q9UQ61"] assert_equal(@obj.ac, acs) assert_equal(@obj.accessions, acs) end def test_accession assert_equal(@obj.accession, 'P04637') end def test_dt assert(@obj.dt) end def test_dt_created assert_equal(@obj.dt('created'), '13-AUG-1987 (Rel. 05, Created)') end def test_dt_sequence assert_equal(@obj.dt('sequence'), '01-MAR-1989 (Rel. 10, Last sequence update)') end def test_dt_annotation assert_equal(@obj.dt('annotation'), '13-SEP-2005 (Rel. 48, Last annotation update)') end def test_de assert(@obj.de) end def test_protein_name assert_equal(@obj.protein_name, "Cellular tumor antigen p53") end def test_synonyms assert_equal(@obj.synonyms, ["Tumor suppressor p53", "Phosphoprotein p53", "Antigen NY-CO-13"]) end def test_gn assert_equal(@obj.gn, [{:orfs=>[], :synonyms=>["P53"], :name=>"TP53", :loci=>[]}]) end def test_gn_uniprot_parser gn_uniprot_data = '' assert_equal(@obj.instance_eval("gn_uniprot_parser"), [{:orfs=>[], :loci=>[], :name=>"TP53", :synonyms=>["P53"]}]) end def test_gn_old_parser gn_old_data = '' assert_equal(@obj.instance_eval("gn_old_parser"), [["Name=TP53; Synonyms=P53;"]]) end def test_gene_names assert_equal(@obj.gene_names, ["TP53"]) end def test_gene_name assert_equal(@obj.gene_name, 'TP53') end def test_os assert(@obj.os) end def test_os_access assert_equal(@obj.os(0), "Homo sapiens (Human)") end def test_os_access2 assert_equal(@obj.os[0], {"name"=>"(Human)", "os"=>"Homo sapiens"}) end def test_og_1 og = "OG Plastid; Chloroplast." ary = ['Plastid', 'Chloroplast'] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(@obj.og, ary) end def test_og_2 og = "OG Mitochondrion." ary = ['Mitochondrion'] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(@obj.og, ary) end def test_og_3 og = "OG Plasmid sym pNGR234a." ary = ["Plasmid sym pNGR234a"] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(@obj.og, ary) end def test_og_4 og = "OG Plastid; Cyanelle." ary = ['Plastid', 'Cyanelle'] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(@obj.og, ary) end def test_og_5 og = "OG Plasmid pSymA (megaplasmid 1)." ary = ["Plasmid pSymA (megaplasmid 1)"] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(@obj.og, ary) end def test_og_6 og = "OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1." ary = ['Plasmid pNRC100', 'Plasmid pNRC200', 'Plasmid pHH1'] @obj.instance_eval("@orig['OG'] = '#{og}'") assert_equal(@obj.og, ary) end def test_oc assert_equal(@obj.oc, ["Eukaryota", "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Mammalia", "Eutheria", "Euarchontoglires", "Primates", "Catarrhini", "Hominidae", "Homo"]) end def test_ox assert_equal(@obj.ox, {"NCBI_TaxID"=>["9606"]}) end def test_ref # Bio::EMBL::COMMON#ref @obj.ref end def test_cc assert_equal(@obj.cc.class, Hash) end def test_cc_database db = [{"NAME"=>"IARC TP53 mutation database", "WWW"=>"http://www.iarc.fr/p53/", "FTP"=>nil, "NOTE"=>"IARC db of somatic p53 mutations"}, {"NAME"=>"Tokyo p53", "WWW"=>"http://p53.genome.ad.jp/", "FTP"=>nil, "NOTE"=>"University of Tokyo db of p53 mutations"}, {"NAME"=>"p53 web site at the Institut Curie", "WWW"=>"http://p53.curie.fr/", "FTP"=>nil, "NOTE"=>nil}, {"NAME"=>"Atlas Genet. Cytogenet. Oncol. Haematol.", "WWW"=>"http://www.infobiogen.fr/services/chromcancer/Genes/P53ID88.html", "FTP"=>nil, "NOTE"=>nil}] assert_equal(@obj.cc('DATABASE'), db) end def test_cc_alternative_products ap = {"Comment"=>nil, "Named isoforms"=>"2", "Variants"=> [{"IsoId"=>"P04637-1", "Name"=>"1", "Sequence"=>["Displayed"]}, {"IsoId"=>"P04637-2", "Name"=>"2", "Synonyms"=>"I9RET", "Sequence"=>["VSP_006535", "VSP_006536"]}], "Event"=>"Alternative splicing"} assert_equal(@obj.cc('ALTERNATIVE PRODUCTS'), ap) end def test_cc_mass_spectrometry assert_equal(@obj.cc('MASS SPECTROMETRY'), nil) end def test_kw keywords = ["3D-structure", "Acetylation", "Activator", "Alternative splicing", "Anti-oncogene", "Apoptosis", "Cell cycle", "Disease mutation", "DNA-binding", "Glycoprotein", "Li-Fraumeni syndrome", "Metal-binding", "Nuclear protein", "Phosphorylation", "Polymorphism", "Transcription", "Transcription regulation", "Zinc"] assert_equal(@obj.kw, keywords) end def test_ft assert(@obj.ft) name = 'DNA_BIND' assert_equal(@obj.ft(name), [{"FTId"=>nil, "From"=>102, "diff"=>[], "To"=>292, "Description"=>nil}]) end def test_sq assert_equal(@obj.sq, {"CRC64"=>"AD5C149FD8106131", "aalen"=>393, "MW"=>43653}) end def test_sq_crc64 assert_equal(@obj.sq('CRC64'), "AD5C149FD8106131") end def test_sq_mw mw = 43653 assert_equal(@obj.sq('mw'), mw) assert_equal(@obj.sq('molecular'), mw) assert_equal(@obj.sq('weight'), mw) end def test_sq_len length = 393 assert_equal(@obj.sq('len'), length) assert_equal(@obj.sq('length'), length) assert_equal(@obj.sq('AA'), length) end def test_seq seq = 'MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD' assert_equal(@obj.seq, seq) assert_equal(@obj.aaseq, seq) end end end From nakao at pub.open-bio.org Thu Oct 27 05:30:44 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:10:12 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db/embl sptr.rb,1.26,1.27 Message-ID: <200510270930.j9R9UiVL004071@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv4056/lib/bio/db/embl Modified Files: sptr.rb Log Message: * Updated RDoc documents. * Chaged Bio::EMBL::Common to Bio::EMBLDB::Common. Index: sptr.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/sptr.rb,v retrieving revision 1.26 retrieving revision 1.27 diff -C2 -d -r1.26 -r1.27 *** sptr.rb 23 Oct 2005 08:59:43 -0000 1.26 --- sptr.rb 27 Oct 2005 09:30:42 -0000 1.27 *************** *** 106,110 **** ! # Bio::EMBL::Common#ac -> ary # #accessions -> ary # #accession -> String (accessions.first) --- 106,110 ---- ! # Bio::EMBLDB::Common#ac -> ary # #accessions -> ary # #accession -> String (accessions.first) *************** *** 206,209 **** --- 206,210 ---- end + # returns contents in the old style GN line. # GN Line: Gene name(s) (>=0, optional) # GN HNS OR DRDX OR OSMZ OR BGLY. *************** *** 230,234 **** private :gn_old_parser ! # The new format of the GN line is: # GN Name=; Synonyms=[, ...]; OrderedLocusNames=[, ...]; --- 231,235 ---- private :gn_old_parser ! # returns contents in the structured GN line. # The new format of the GN line is: # GN Name=; Synonyms=[, ...]; OrderedLocusNames=[, ...]; *************** *** 323,327 **** ! # Bio::EMBL::Common#og -> Array # OG Line; organella (0 or 1/entry) # ["MITOCHONDRION", "CHLOROPLAST", "Cyanelle", "Plasmid"] --- 324,328 ---- ! # Bio::EMBLDB::Common#og -> Array # OG Line; organella (0 or 1/entry) # ["MITOCHONDRION", "CHLOROPLAST", "Cyanelle", "Plasmid"] *************** *** 329,333 **** ! # Bio::EMBL::Common#oc -> Array # OC Line; organism classification (>=1) # "OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;" --- 330,334 ---- ! # Bio::EMBLDB::Common#oc -> Array # OC Line; organism classification (>=1) # "OC Eukaryota; Alveolata; Apicomplexa; Piroplasmida; Theileriidae;" *************** *** 357,361 **** ! # Bio::EMBL::Common#ref -> Array # R Lines # RN RC RP RX RA RT RL --- 358,362 ---- ! # Bio::EMBLDB::Common#ref -> Array # R Lines # RN RC RP RX RA RT RL *************** *** 513,516 **** --- 514,520 ---- return tmp + when 'INTERACTION' + return cc_interaction_parse(@data['CC']['INTERACTION'].to_s) + when nil return @data['CC'] *************** *** 522,525 **** --- 526,530 ---- + def cc_ap_variants_parse(ent) hsh = {} *************** *** 536,539 **** --- 541,556 ---- + # returns conteins in a line of the CC INTERACTION section. + # + # CC P46527:CDKN1B; NbExp=1; IntAct=EBI-359815, EBI-519280; + def cc_interaction_parse(str) + it = str.scan(/(.+?); NbExp=(.+?); IntAct=(.+?);/) + it.map {|ent| + {:partner_id => ent[0].strip, + :nbexp => ent[1].strip, + :intact_acc => ent[2].split(', ') } + } + end + private :cc_interaction_parse # returns databases cross-references in the DR lines. *************** *** 550,555 **** 'SWISS-2DPAGE','TIGR','TRANSFAC','TUBERCULIST','WORMPEP','YEPD','ZFIN'] ! # Bio::EMBL::Common#kw - Array ! # #keywords -> Array # # KW Line; keyword (>=1) --- 567,572 ---- 'SWISS-2DPAGE','TIGR','TRANSFAC','TUBERCULIST','WORMPEP','YEPD','ZFIN'] ! # Bio::EMBLDB::Common#kw - Array ! # #keywords -> Array # # KW Line; keyword (>=1) From nakao at pub.open-bio.org Thu Oct 27 05:34:51 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:10:13 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db/embl embl.rb,1.22,1.23 Message-ID: <200510270934.j9R9YpVL004110@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv4100/lib/bio/db/embl Modified Files: embl.rb Log Message: * Removed a argument in the ft(num) method. Bio::EMBL#ft no longer accepts any argument. Index: embl.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/embl.rb,v retrieving revision 1.22 retrieving revision 1.23 diff -C2 -d -r1.22 -r1.23 *** embl.rb 23 Oct 2005 09:59:48 -0000 1.22 --- embl.rb 27 Oct 2005 09:34:49 -0000 1.23 *************** *** 229,240 **** # returns contents in the feature table (FT) lines. ! # * Bio::EMBL#ft -> [ * ] ! # * Bio::EMBL#ft {} -> {|| } ! # * Bio::EMBL#ft(Int) -> Hash # # same as features method in bio/db/genbank.rb # # FT Line; feature table data (>=0) ! def ft(num = nil) unless @data['FT'] @data['FT'] = Array.new --- 229,239 ---- # returns contents in the feature table (FT) lines. ! # * Bio::EMBL#ft -> Bio::Features ! # * Bio::EMBL#ft {} -> {|Bio::Feature| } # # same as features method in bio/db/genbank.rb # # FT Line; feature table data (>=0) ! def ft unless @data['FT'] @data['FT'] = Array.new *************** *** 271,276 **** end if block_given? ! @data['FT'].each do |feature_table| ! yield feature_table end else --- 270,275 ---- end if block_given? ! @data['FT'].each do |feature| ! yield feature end else From nakao at pub.open-bio.org Thu Oct 27 05:36:09 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:10:14 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db/embl common.rb,1.5,1.6 Message-ID: <200510270936.j9R9a9VL004140@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv4130/lib/bio/db/embl Modified Files: common.rb Log Message: * Cleaned up Bio::EMBLDB::Common#og code. Index: common.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/common.rb,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** common.rb 23 Oct 2005 09:25:16 -0000 1.5 --- common.rb 27 Oct 2005 09:36:07 -0000 1.6 *************** *** 167,175 **** # # OG Line; organella (0 or 1/entry) def og unless @data['OG'] og = Array.new if get('OG').size > 0 ! fetch('OG').sub(/\.$/,'').sub(/ and/,'').split(/,/).each do |tmp| og.push(tmp.strip) end --- 167,185 ---- # # OG Line; organella (0 or 1/entry) + # OG Plastid; Chloroplast. + # OG Mitochondrion. + # OG Plasmid sym pNGR234a. + # OG Plastid; Cyanelle. + # OG Plasmid pSymA (megaplasmid 1). + # OG Plasmid pNRC100, Plasmid pNRC200, and Plasmid pHH1. def og unless @data['OG'] og = Array.new if get('OG').size > 0 ! ogstr = fetch('OG') ! ogstr.sub!(/\.$/,'') ! ogstr.sub!(/ and/,'') ! ogstr.sub!(/;/, ',') ! ogstr.split(',').each do |tmp| og.push(tmp.strip) end From nakao at pub.open-bio.org Thu Oct 27 05:37:24 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:10:15 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db/embl test_common.rb, NONE, 1.1 Message-ID: <200510270937.j9R9bOVL004188@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv4178/test/unit/bio/db/embl Added Files: test_common.rb Log Message: * Initially imported unit test for Bio::EMBLDB::Common. --- NEW FILE: test_common.rb --- # # test/unit/bio/db/embl/common.rb - Unit test for Bio::EMBL::COMMON module # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_common.rb,v 1.1 2005/10/27 09:37:22 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/db/embl/common' module Bio class EMBL_API include Bio::EMBLDB::Common end class TestEMBLCommon < Test::Unit::TestCase def setup @obj = Bio::EMBLDB::Common end def test_ac assert(@obj.instance_methods.find {|x| x == 'ac' }) end def test_accessions assert(@obj.instance_methods.find {|x| x == 'accessions' }) end def test_accession assert(@obj.instance_methods.find {|x| x == 'accession' }) end def test_de assert(@obj.instance_methods.find {|x| x == 'de' }) end def test_description assert(@obj.instance_methods.find {|x| x == 'description' }) end def test_definition assert(@obj.instance_methods.find {|x| x == 'definition' }) end def test_os assert(@obj.instance_methods.find {|x| x == 'os' }) end def test_og assert(@obj.instance_methods.find {|x| x == 'og' }) end def test_oc assert(@obj.instance_methods.find {|x| x == 'oc' }) end def test_kw assert(@obj.instance_methods.find {|x| x == 'kw' }) end def test_keywords assert(@obj.instance_methods.find {|x| x == 'keywords' }) end def test_ref assert(@obj.instance_methods.find {|x| x == 'ref' }) end def test_references assert(@obj.instance_methods.find {|x| x == 'references' }) end def test_dr assert(@obj.instance_methods.find {|x| x == 'dr' }) end end class TestEMBLAPI < Test::Unit::TestCase def setup data =< Update of /home/repository/bioruby/bioruby/test/unit/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv4206/test/unit/bio/db/embl Modified Files: test_embl.rb Log Message: * Changed Bio::EMBL::Common to Bio::EMBLDB::Common. Index: test_embl.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/db/embl/test_embl.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** test_embl.rb 25 Sep 2005 05:07:12 -0000 1.1 --- test_embl.rb 27 Oct 2005 09:38:12 -0000 1.2 *************** *** 41,59 **** --- 41,65 ---- assert(@obj.id_line) end + def test_id_line_iterator assert(@obj.id_line {|key, value| }) end + def test_id_line_entry_name assert_equal(@obj.id_line('ENTRY_NAME'), 'AB090716') end + def test_id_line_data_class assert_equal(@obj.id_line('DATA_CLASS'), 'standard') end + def test_id_line_molecule_type assert_equal(@obj.id_line('MOLECULE_TYPE'), 'genomic DNA') end + def test_id_line_division assert_equal(@obj.id_line('DIVISION'), 'VRT') end + def test_id_line_sequence_length assert_equal(@obj.id_line('SEQUENCE_LENGTH'), 166) *************** *** 83,91 **** end ! def test_ac # Bio::EMBLDB#ac ac = ['AB090716'] assert_equal(@obj.ac, ac) assert_equal(@obj.accessions, ac) end def test_accession assert_equal(@obj.accession, 'AB090716') --- 89,100 ---- end ! # Bio::EMBLDB::COMMON#ac ! def test_ac ac = ['AB090716'] assert_equal(@obj.ac, ac) assert_equal(@obj.accessions, ac) end + + # Bio::EMBLDB::COMMON#accession def test_accession assert_equal(@obj.accession, 'AB090716') *************** *** 103,121 **** assert(@obj.dt) end def test_dt_iterator assert(@obj.dt {|key, value| }) end def test_dt_created assert_equal(@obj.dt('created'), '25-OCT-2002 (Rel. 73, Created)') end def test_dt_updated assert_equal(@obj.dt('updated'), '29-NOV-2002 (Rel. 73, Last updated, Version 2)') end ! def test_de # Bio::EMBL::COMMON#de assert_equal(@obj.de, "Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.") end ! def test_kw # Bio::EMBL::COMMON#kw k = [] assert_equal(@obj.kw, []) --- 112,135 ---- assert(@obj.dt) end + def test_dt_iterator assert(@obj.dt {|key, value| }) end + def test_dt_created assert_equal(@obj.dt('created'), '25-OCT-2002 (Rel. 73, Created)') end + def test_dt_updated assert_equal(@obj.dt('updated'), '29-NOV-2002 (Rel. 73, Last updated, Version 2)') end ! # Bio::EMBLDB::COMMON#de ! def test_de assert_equal(@obj.de, "Haplochromis sp. 'muzu, rukwa' LWS gene for long wavelength-sensitive opsin, partial cds, specimen_voucher:specimen No. HT-9361.") end ! # Bio::EMBLDB::COMMON#kw ! def test_kw k = [] assert_equal(@obj.kw, []) *************** *** 133,153 **** end ! def test_oc # Bio::EMBL::COMMON#oc assert_equal(@obj.oc.first, 'Eukaryota') end ! def test_og # Bio::EMBL::COMMON#og assert_equal(@obj.og, []) end ! def test_ref # Bio::EMBL::COMMON#ref assert_equal(@obj.ref.size, 2) end ! def test_references # Bio::EMBL::COMMON#references assert_equal(@obj.references.class, Bio::References) end ! def test_dr # Bio::EMBL::COMMON#dr assert_equal(@obj.dr, {}) end --- 147,172 ---- end ! # Bio::EMBLDB::COMMON#oc ! def test_oc assert_equal(@obj.oc.first, 'Eukaryota') end ! # Bio::EMBLDB::COMMON#og ! def test_og assert_equal(@obj.og, []) end ! # Bio::EMBLDB::COMMON#ref ! def test_ref assert_equal(@obj.ref.size, 2) end ! # Bio::EMBLDB::COMMON#references ! def test_references assert_equal(@obj.references.class, Bio::References) end ! # Bio::EMBLDB::COMMON#dr ! def test_dr assert_equal(@obj.dr, {}) end *************** *** 158,169 **** def test_ft ! assert(@obj.ft) ! assert(@obj.features) end def test_ft_iterator ! assert(@obj.ft {|x| }) end def test_ft_accessor ! assert_equal(@obj.ft(1).features.first.feature, 'source') end --- 177,191 ---- def test_ft ! assert_equal(@obj.ft.class, Bio::Features) end + def test_ft_iterator ! @obj.ft.each do |feature| ! assert_equal(feature.class, Bio::Feature) ! end end + def test_ft_accessor ! assert_equal(@obj.ft.features[1].feature, 'CDS') end *************** *** 188,193 **** def test_sq ! assert_equal(@obj.sq, {"a"=>29, "c"=>42, "ntlen"=>166, "g"=>41, "t"=>54, "other"=>0}) end def test_sq_get assert_equal(@obj.sq("a"), 29) --- 210,217 ---- def test_sq ! data = {"a"=>29, "c"=>42, "ntlen"=>166, "g"=>41, "t"=>54, "other"=>0} ! assert_equal(@obj.sq, data) end + def test_sq_get assert_equal(@obj.sq("a"), 29) From nakao at pub.open-bio.org Thu Oct 27 10:21:25 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 10:20:01 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db test_fasta.rb,NONE,1.1 Message-ID: <200510271421.j9RELPVL008516@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db In directory pub.open-bio.org:/tmp/cvs-serv8486/test/unit/bio/db Added Files: test_fasta.rb Log Message: * Initially imported unit test for Bio::FastaFormat classes. --- NEW FILE: test_fasta.rb --- # # test/unit/bio/db/test_fasta.rb - Unit test for Bio::FastaFormat # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_fasta.rb,v 1.1 2005/10/27 14:21:23 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/db/fasta' module Bio class TestFastaFormatConst < Test::Unit::TestCase def test_delimiter assert_equal(Bio::FastaFormat::DELIMITER, "\n>") assert_equal(Bio::FastaFormat::RS, "\n>") end end # class TestFastaFormatConst class TestFastaFormat < Test::Unit::TestCase def setup text =<sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST] MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES >sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST] MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER KTGDPLEWRRLFKKISTICRDIILIPN END @obj = Bio::FastaFormat.new(text) end def test_entry data = ">sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]\nMSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG\nVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME\nGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL\nKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC\nIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP\nQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES\n" assert_equal(@obj.entry, data) end def test_entry_id assert_equal(@obj.entry_id, 'sce:YBR160W') end def test_definition data = "sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]" assert_equal(@obj.definition, data) end def test_data data = "\nMSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG\nVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME\nGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL\nKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC\nIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP\nQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES\n" assert_equal(@obj.data, data) end def test_seq seq = 'MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES' assert_equal(@obj.seq, seq) end def test_length assert_equal(@obj.length, 298) end def test_naseq seq = 'msgelanykrlekvgegtygvvykaldlrpgqgqrvvalkkirlesedegvpstaireisllkelkddnivrlydivhsdahklylvfefldldlkrymegipkdqplgadivkkfmmqlckgiaychshrilhrdlkpqnllinkdgnlklgdfglarafgvplraytheivtlwyrapevllggkqystgvdtwsigcifaemcnrkpifsgdseidqifkifrvlgtpneaiwpdivylpdfkpsfpqwrrkdlsqvvpsldprgidlldkllaydpinrisarraaihpyfqes' assert_equal(@obj.naseq, seq) end def test_nalen assert_equal(@obj.nalen, 298) end def test_aaseq seq = "MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEGVPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYMEGIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNLKLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGCIFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFPQWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES" assert_equal(@obj.aaseq, seq) end def test_aalen assert_equal(@obj.aalen, 298) end def test_identifiers assert_equal(@obj.identifiers, '') end def test_gi assert_equal(@obj.gi, '') end def test_accession assert_equal(@obj.accession, '') end def test_accessions assert_equal(@obj.accessions, '') end def test_acc_version assert_equal(@obj.acc_version, '') end def test_locus assert_equal(@obj.locus, '') end end # class TestFastaFormat class TestFastaNumericFormat < Test::Unit::TestCase def setup text =<CRA3575282.F 24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26 32 29 29 25 END @obj = Bio::FastaNumericFormat.new(text) end def test_entry assert_equal(@obj.entry, ">CRA3575282.F\n24 15 23 29 20 13 20 21 21 23 22 25 13 22 17 15 25 27 32 26 \n32 29 29 25\n") end def test_entry_id assert_equal(@obj.entry_id, 'CRA3575282.F') end def test_definition assert_equal(@obj.definition, 'CRA3575282.F') end def test_data data = [24, 15, 23, 29, 20, 13, 20, 21, 21, 23, 22, 25, 13, 22, 17, 15, 25, 27, 32, 26, 32, 29, 29, 25] assert_equal(@obj.data, data) end def test_length assert_equal(@obj.length, 24) end def test_each assert(@obj.each {|x| }) end def test_arg assert(@obj[0], '') assert(@obj[-1], '') end end # class TestFastaFormatNumeric class TestFastaDefinition < Test::Unit::TestCase def setup end def test_defline end end # class TestFastaDefinition end From nakao at pub.open-bio.org Thu Oct 27 11:07:27 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 11:05:54 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/shell - New directory Message-ID: <200510271507.j9RF7RVL008786@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/shell In directory pub.open-bio.org:/tmp/cvs-serv8776/test/unit/bio/shell Log Message: Directory /home/repository/bioruby/bioruby/test/unit/bio/shell added to the repository From nakao at pub.open-bio.org Thu Oct 27 11:10:50 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 11:09:16 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio test_shell.rb,NONE,1.1 Message-ID: <200510271510.j9RFAoVL008894@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio In directory pub.open-bio.org:/tmp/cvs-serv8884/test/unit/bio Added Files: test_shell.rb Log Message: * Initially imported unit test for Bio::Shell. --- NEW FILE: test_shell.rb --- # # test/unit/bio/test_shell.rb - Unit test for Bio::Shell # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_shell.rb,v 1.1 2005/10/27 15:10:48 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/shell' module Bio class TestShell < Test::Unit::TestCase def test_const_bioruby_config assert_equal($bioruby_config, {}) end def test_const_bioruby_cache assert_equal($bioruby_cache, {}) end end end From nakao at pub.open-bio.org Thu Oct 27 11:11:53 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 11:10:18 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio test_command.rb,NONE,1.1 Message-ID: <200510271511.j9RFBrVL008911@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio In directory pub.open-bio.org:/tmp/cvs-serv8901/test/unit/bio Added Files: test_command.rb Log Message: * Initially imported unit test for Bio::Command::Tools. --- NEW FILE: test_command.rb --- # # test/unit/bio/test_command.rb - Unit test for external command execution methods # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_command.rb,v 1.1 2005/10/27 15:11:51 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/command' module Bio class TestCommandTools < Test::Unit::TestCase def test_command_tools_constants Bio::Command::Tools::UNSAFE_CHARS_UNIX Bio::Command::Tools::QUOTE_CHARS_WINDOWS Bio::Command::Tools::UNESCAPABLE_CHARS end def test_escape_shell_windows end def test_escape_shell_unix end def test_escape_shell end def test_make_command_line end def test_make_command_line_windows end def test_make_command_line_unix end def test_call_commandline_local end def test_call_commandline_local_popen end def test_call_commandline_local_open3 end def test_errorlog end end end From nakao at pub.open-bio.org Thu Oct 27 11:13:06 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 11:11:32 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio test_extend.rb,NONE,1.1 Message-ID: <200510271513.j9RFD6VL008928@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio In directory pub.open-bio.org:/tmp/cvs-serv8918/test/unit/bio Added Files: test_extend.rb Log Message: * Initially imported unit test for miscellaneous methods in bio/extend.rb. --- NEW FILE: test_extend.rb --- # # test/unit/bio/test_extend.rb - Unit test for add-on methods # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_extend.rb,v 1.1 2005/10/27 15:13:04 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 3, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/extend' module Bio class TestString < Test::Unit::TestCase def test_to_naseq assert_equal(String.new("ACGT").to_naseq, Bio::Sequence::NA.new("ACGT")) end def test_toaaseq assert_equal(String.new("ACGT").to_aaseq, Bio::Sequence::AA.new("ACGT")) end def test_fold end def test_fill end end class TestArray < Test::Unit::TestCase def test_inject end def test_sum end def test_product end end end From nakao at pub.open-bio.org Thu Oct 27 12:56:50 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 12:55:17 2005 Subject: [BioRuby-cvs] bioruby/test/data/uniprot p53_human.uniprot,NONE,1.1 Message-ID: <200510271656.j9RGuoVL009568@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/uniprot In directory pub.open-bio.org:/tmp/cvs-serv9558/test/data/uniprot Added Files: p53_human.uniprot Log Message: * Initially imported data file for Bio::SPTR and Bio::UniProt unit test. --- NEW FILE: p53_human.uniprot --- ID P53_HUMAN STANDARD; PRT; 393 AA. AC P04637; Q15086; Q15087; Q15088; Q16535; Q16807; Q16808; Q16809; AC Q16810; Q16811; Q16848; Q86UG1; Q8J016; Q99659; Q9BTM4; Q9HAQ8; AC Q9NP68; Q9NPJ2; Q9NZD0; Q9UBI2; Q9UQ61; DT 13-AUG-1987 (Rel. 05, Created) DT 01-MAR-1989 (Rel. 10, Last sequence update) DT 13-SEP-2005 (Rel. 48, Last annotation update) DE Cellular tumor antigen p53 (Tumor suppressor p53) (Phosphoprotein p53) DE (Antigen NY-CO-13). GN Name=TP53; Synonyms=P53; OS Homo sapiens (Human). OC Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi; OC Mammalia; Eutheria; Euarchontoglires; Primates; Catarrhini; Hominidae; OC Homo. OX NCBI_TaxID=9606; RN [1] RP NUCLEOTIDE SEQUENCE. RX MEDLINE=85230577; PubMed=4006916; RA Zakut-Houri R., Bienz-Tadmor B., Givol D., Oren M.; [...1417 lines suppressed...] FT STRAND 230 236 FT TURN 240 241 FT TURN 243 248 FT STRAND 251 258 FT TURN 260 261 FT STRAND 264 274 FT HELIX 278 286 FT HELIX 335 354 FT HELIX 376 378 FT HELIX 379 384 FT TURN 385 386 SQ SEQUENCE 393 AA; 43653 MW; AD5C149FD8106131 CRC64; MEEPQSDPSV EPPLSQETFS DLWKLLPENN VLSPLPSQAM DDLMLSPDDI EQWFTEDPGP DEAPRMPEAA PPVAPAPAAP TPAAPAPAPS WPLSSSVPSQ KTYQGSYGFR LGFLHSGTAK SVTCTYSPAL NKMFCQLAKT CPVQLWVDST PPPGTRVRAM AIYKQSQHMT EVVRRCPHHE RCSDSDGLAP PQHLIRVEGN LRVEYLDDRN TFRHSVVVPY EPPEVGSDCT TIHYNYMCNS SCMGGMNRRP ILTIITLEDS SGNLLGRNSF EVRVCACPGR DRRTEEENLR KKGEPHHELP PGSTKRALPN NTSSSPQPKK KPLDGEYFTL QIRGRERFEM FRELNEALEL KDAQAGKEPG GSRAHSSHLK SKKGQSTSRH KKLMFKTEGP DSD // From nakao at pub.open-bio.org Thu Oct 27 13:17:52 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 13:16:17 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/db test_prosite.rb,NONE,1.1 Message-ID: <200510271717.j9RHHqVL009643@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/db In directory pub.open-bio.org:/tmp/cvs-serv9633/test/unit/bio/db Added Files: test_prosite.rb Log Message: * Intially imported unit test for Bio::PROSITE. --- NEW FILE: test_prosite.rb --- # # test/unit/bio/db/test_prosite.rb - Unit test for Bio::PROSITE # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # [...1418 lines suppressed...] def test_pa2re assert_equal(@obj.pa2re, '') end def test_ma2re assert_raise(@obj.ma2re, NotImplementedError) end def test_self_pa2re assert(Bio::PROSITE.pattern2re) end def test_self_ma2re assert(Bio::PROSITE.profile2re) end end # class TestPROSITE end From nakao at pub.open-bio.org Thu Oct 27 22:02:12 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 22:00:41 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/shell/plugin - New directory Message-ID: <200510280202.j9S22CVL010828@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/shell/plugin In directory pub.open-bio.org:/tmp/cvs-serv10818/test/unit/bio/shell/plugin Log Message: Directory /home/repository/bioruby/bioruby/test/unit/bio/shell/plugin added to the repository From nakao at pub.open-bio.org Thu Oct 27 22:03:36 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 22:02:17 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/shell/plugin test_seq.rb, NONE, 1.1 Message-ID: <200510280203.j9S23aVL010855@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/shell/plugin In directory pub.open-bio.org:/tmp/cvs-serv10845/test/unit/bio/shell/plugin Added Files: test_seq.rb Log Message: * Initially imported unit test for bio/shell/plugin/seq.rb. --- NEW FILE: test_seq.rb --- # # test/unit/bio/shell/plugin/test_seq.rb - Unit test for Bio::Shell plugin for biological sequence manipulations # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_seq.rb,v 1.1 2005/10/28 02:03:34 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/shell' module Bio class TestShellPluginSeq < Test::Unit::TestCase include Bio::Shell def test_naseq str = 'ACGT' assert_equal(naseq(str).class, Bio::Sequence::NA) assert_equal(naseq(str), Bio::Sequence::NA.new(str)) assert_equal(naseq(str), 'acgt') end def test_aaseq str = 'WD' assert_equal(aaseq(str).class, Bio::Sequence::AA) assert_equal(aaseq(str), Bio::Sequence::AA.new('WD')) assert_equal(aaseq(str), 'WD') end def test_revseq str = 'acgta' assert_equal(revseq(str), 'tacgt') end def test_translate str = 'ATGATG' assert_equal(translate(str), Bio::Sequence::AA.new('MM')) end def test_seq_report_na str = 'ACGT' output = '' assert_equal(seq_report(str), output) end def test_seq_report_aa str = 'WD' output = '' assert_equal(seq_report(str), output) end def test_na_report naseq = 'ACGT' output =<1, \"c\"=>1, \"g\"=>1, \"t\"=>1} molecular weight : 1245.88148 complemnet weight : 1245.88148 protein weight : 119.12 // END assert_equal(na_report(naseq), output) end def test_aa_report aaseq = 'WD' output =<1, \"D\"=>1} protein weight : 319.315 amino acid codes : [\"Trp\", \"Asp\"] amino acid names : [\"tryptophan\", \"aspartic acid\"] // END assert_equal(aa_report(aaseq), output) end def test_double_helix seq = 'ACGTACGTACGTACGT' output = < Update of /home/repository/bioruby/bioruby/lib/bio/shell/plugin In directory pub.open-bio.org:/tmp/cvs-serv10873/lib/bio/shell/plugin Modified Files: seq.rb Log Message: * Changed way to print output (puts -> display). * Now returns the pretty DNA sequence in String. Index: seq.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/plugin/seq.rb,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** seq.rb 23 Sep 2005 15:51:59 -0000 1.3 --- seq.rb 28 Oct 2005 02:08:10 -0000 1.4 *************** *** 100,104 **** --- 100,108 ---- end + # Reterns and displays a DNA sequence pretty printing + # in B-type double helix. + # Argument ``seq'' required at least 16 bases length. def double_helix(seq) + str = '' m = [[5, 0], [4, 2], [3, 3], [2, 4], [1, 4], [0, 3], [0, 2], [1, 0]] *************** *** 106,116 **** m.each_with_index do |mij, x| base = subseq[x, 1] ! puts ' ' * mij[0] + base + '-' * mij[1] + base.complement end m.reverse.each_with_index do |mij, x| base = subseq[x + 8, 1] ! puts ' ' * mij[0] + base.complement + '-' * mij[1] + base end end end --- 110,122 ---- m.each_with_index do |mij, x| base = subseq[x, 1] ! str << ' ' * mij[0] + base + '-' * mij[1] + base.complement + "\n" end m.reverse.each_with_index do |mij, x| base = subseq[x + 8, 1] ! str << ' ' * mij[0] + base.complement + '-' * mij[1] + base + "\n" end end + display(str) + return str end From nakao at pub.open-bio.org Thu Oct 27 22:16:43 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 22:15:08 2005 Subject: [BioRuby-cvs] bioruby/test/data/prosite - New directory Message-ID: <200510280216.j9S2GhVL010941@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/prosite In directory pub.open-bio.org:/tmp/cvs-serv10931/test/data/prosite Log Message: Directory /home/repository/bioruby/bioruby/test/data/prosite added to the repository From nakao at pub.open-bio.org Thu Oct 27 22:17:29 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 22:15:58 2005 Subject: [BioRuby-cvs] bioruby/test/data/genbank - New directory Message-ID: <200510280217.j9S2HTVL010968@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/genbank In directory pub.open-bio.org:/tmp/cvs-serv10958/test/data/genbank Log Message: Directory /home/repository/bioruby/bioruby/test/data/genbank added to the repository From nakao at pub.open-bio.org Thu Oct 27 22:19:05 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 22:17:31 2005 Subject: [BioRuby-cvs] bioruby/test/data/refseq - New directory Message-ID: <200510280219.j9S2J5VL010986@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/refseq In directory pub.open-bio.org:/tmp/cvs-serv10976/test/data/refseq Log Message: Directory /home/repository/bioruby/bioruby/test/data/refseq added to the repository From nakao at pub.open-bio.org Thu Oct 27 22:30:59 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 22:29:47 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/blast test_report.rb, NONE, 1.1 Message-ID: <200510280230.j9S2UxVL011077@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/blast In directory pub.open-bio.org:/tmp/cvs-serv11067/test/unit/bio/appl/blast Added Files: test_report.rb Log Message: * Initial import unit test for Bio::Blast::Report. --- NEW FILE: test_report.rb --- # # test/unit/bio/appl/blast/test_report.rb - Unit test for Bio::Blast::Report # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_report.rb,v 1.1 2005/10/28 02:30:57 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib'))).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/appl/blast/report' module Bio class TestBlastReportData bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s TestDataBlast = Pathname.new(File.join(bioruby_root, 'test', 'data', 'blast')).cleanpath.to_s def self.input File.open(File.join(TestDataBlast, 'eco:b0002.faa')).read end def self.output(format = 7) case format when 0 File.open(File.join(TestDataBlast, 'eco:b0002.faa.m0')).read when 7 File.open(File.join(TestDataBlast, 'eco:b0002.faa.m7')).read when 8 File.open(File.join(TestDataBlast, 'eco:b0002.faa.m8')).read end end end class TestBlastReport < Test::Unit::TestCase require 'bio/appl/blast/report' def setup @report = Bio::Blast::Report.new(Bio::TestBlastReportData.output) end def test_iterations assert(@report.iterations) end def test_parameters assert_equal(@report.parameters['matrix'], 'BLOSUM62') assert_equal(@report.parameters['expect'], 10) assert_equal(@report.parameters['gap-open'], 11) assert_equal(@report.parameters['gap-extend'], 1) assert_equal(@report.parameters['filter'], 'S') end def test_program assert_equal(@report.program, 'blastp') end def test_version assert_equal(@report.version, 'blastp 2.2.10 [Oct-19-2004]') end def test_reference xml_quoted_str = "~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402." text_str = '~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.' assert_equal(@report.reference, xml_quoted_str) assert_equal(@report.reference, text_str) end def test_db assert_equal(@report.db, 'eco:b0002.faa') end def test_query_id assert_equal(@report.query_id, 'lcl|QUERY') end def test_query_def assert_equal(@report.query_def, 'eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A)') end def test_query_len assert_equal(@report.query_len, 820) end def test_matrix assert_equal(@report.matrix, 'BLOSUM62') end def test_expect assert_equal(@report.expect, 10) end def test_inclusion assert(@report.inclusion) end def test_sc_match assert(@report.sc_match) end def test_sc_mismatch assert(@report.sc_mismatch) end def test_gap_open assert_equal(@report.gap_open, 11) end def test_gap_extend assert_equal(@report.gap_extend, 1) end def test_filter assert_equal(@report.filter, 'S') end def test_pattern assert(@report.pattern) end def test_extrez_query assert(@report.entrez_query) end def test_each_iteration @report.each_iteration { |itr| } end def test_each_hit @report.each_hit { |hit| } end def test_hits assert(@report.hits) end def test_statistics assert_equal(@report.statistics, {"kappa"=>0.041, "db-num"=>1, "eff-space"=>605284.0, "hsp-len"=>42, "db-len"=>820, "lambda"=>0.267, "entropy"=>0.14}) end def test_db_num assert_equal(@report.db_num, 1) end def test_db_len assert_equal(@report.db_len, 820) end def test_hsp_len assert_equal(@report.hsp_len, 42) end def test_eff_space assert_equal(@report.eff_space, 605284) end def test_kappa assert_equal(@report.kappa, 0.041) end def test_lambda assert_equal(@report.lambda, 0.267) end def test_entropy assert_equal(@report.entropy, 0.14) end def test_message assert(@report.message) end end class TestBlastReportIteration < Test::Unit::TestCase def setup data = Bio::TestBlastData.data report = Bio::Blast::Report.new(data) @itr = report.iterations.first end def test_hits assert(@itr.hits) end def test_statistics assert(@itr.statistics) end def test_num assert_equal(@itr.num, 1) end def test_message assert(@itr.message) end end class TestBlastReportHit < Test::Unit::TestCase def setup data = Bio::TestBlastData.data report = Bio::Blast::Report.new(data) @hit = report.hits.first end def test_Hit_hsps assert(@hit.hsps) end def test_Hit_query_id assert_equal(@hit.query_id, 'lcl|QUERY') end def test_Hit_query_def assert_equal(@hit.query_def, 'eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A)') end def test_Hit_query_len assert_equal(@hit.query_len, 820) end def test_Hit_num assert(@hit.num) end def test_Hit_hit_id assert_equal(@hit.hit_id, 'gnl|BL_ORD_ID|0') end def test_Hit_len assert_equal(@hit.len, 820) end def test_Hit_target_len assert_equal(@hit.target_len, 820) end def test_Hit_definition assert(@hit.definition) end def test_Hit_taeget_def assert(@hit.target_def) end def test_Hit_accession assert(@hit.accession) end def test_Hit_target_id assert(@hit.target_id) end def test_Hit_evalue assert_equal(@hit.evalue, 0) end def test_Hit_bit_score assert_equal(@hit.bit_score, 1567.75) end def test_Hit_identity assert_equal(@hit.identity, 820) end def test_Hit_overlap assert_equal(@hit.overlap, 820) end def test_Hit_query_seq seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(@hit.query_seq, seq) end def test_Hit_target_seq seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(@hit.target_seq, seq) end def test_Hit_midline seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(@hit.midline, seq) end def test_Hit_query_start assert_equal(@hit.query_start, 1) # assert_equal(@hit.query_from, 1) end def test_Hit_query_end assert_equal(@hit.query_end, 820) # assert_equal(@hit.query_to, 820) end def test_Hit_target_start assert_equal(@hit.target_start, 1) # assert_equal(@hit.hit_from, 1) end def test_Hit_target_end assert_equal(@hit.target_end, 820) # assert_equal(@hit.hit_to, 820) end def test_Hit_lap_at assert_equal(@hit.lap_at, [1, 820, 1, 820]) end end class TestBlastReportHsp < Test::Unit::TestCase def setup data = Bio::TestBlastData.data report = Bio::Blast::Report.new(data) @hsp = report.hits.first.hsps.first end def test_Hsp_num assert_equal(@hsp.num, 1) end def test_Hsp_hit_score assert_equal(@hsp.bit_score, 1567.75) end def test_Hsp_score assert_equal(@hsp.score, 4058) end def test_Hsp_evalue assert_equal(@hsp.evalue, 0) end def test_Hsp_identity assert_equal(@hsp.identity, 820) end def test_Hsp_gaps assert(@hsp.gaps) end def test_Hsp_positive assert_equal(@hsp.positive, 820) end def test_Hsp_align_len assert_equal(@hsp.align_len, 820) end def test_Hsp_density assert(@hsp.density) end def test_Hsp_query_frame assert_equal(@hsp.query_frame, 1) end def test_Hsp_query_from assert_equal(@hsp.query_from, 1) end def test_Hsp_query_to assert_equal(@hsp.query_to, 820) end def test_Hsp_hit_frame assert_equal(@hsp.hit_frame, 1) end def test_Hsp_hit_from assert_equal(@hsp.hit_from, 1) end def test_Hsp_hit_to assert_equal(@hsp.hit_to, 820) end def test_Hsp_pattern_from @hsp.pattern_from end def test_Hsp_pattern_to @hsp.pattern_to end def test_Hsp_qseq seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(@hsp.qseq, seq) end def test_Hsp_midline seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(@hsp.midline, seq) end def test_Hsp_hseq seq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERIFAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEARGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLITQSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAALARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSWLKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAVADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELMKFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIEIEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFKVKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV' assert_equal(@hsp.hseq, seq) end def test_Hsp_percent_identity @hsp.percent_identity end def test_Hsp_mismatch_count @hsp.mismatch_count end end end # module Bio From nakao at pub.open-bio.org Thu Oct 27 22:32:40 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 22:31:06 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/blast test_xmlparser.rb, NONE, 1.1 Message-ID: <200510280232.j9S2WeVL011094@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/blast In directory pub.open-bio.org:/tmp/cvs-serv11084/test/unit/bio/appl/blast Added Files: test_xmlparser.rb Log Message: * Initial import unit test for XMLParser based Bio::Blast::Report. --- NEW FILE: test_xmlparser.rb --- # # test/unit/bio/appl/blast/test_xmlparser.rb - Unit test for Bio::Blast::Report # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_xmlparser.rb,v 1.1 2005/10/28 02:32:38 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib'))).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/appl/blast' module Bio class TestBlastFormat7XMLParserData bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s TestDataBlast = Pathname.new(File.join(bioruby_root, 'test', 'data', 'blast')).cleanpath.to_s def self.input File.open(File.join(TestDataBlast, 'eco:b0002.faa')).read end def self.output File.open(File.join(TestDataBlast, 'eco:b0002.faa.m7')).read end end class TestBlastReport < Test::Unit::TestCase require 'bio/appl/blast/report' def setup @report = Bio::Blast::Report.new(Bio::TestBlastFormat7XMLParserData.output) end def test_iterations @report.iterations end def test_parameters @report.parameters end def test_program @report.program end def test_version @report.version end def test_reference @report.reference end def test_db assert_equal(@report.db, "eco:b0002.faa") end def test_query_id @report.query_id end def test_query_def @report.query_def end def test_query_len @report.query_len end def test_matrix @report.matrix end def test_expect @report.expect end def test_inclusion @report.inclusion end def test_sc_match @report.sc_match end def test_sc_mismatch @report.sc_mismatch end def test_gap_open @report.gap_open end def test_gap_extend @report.gap_extend end def test_filter @report.filter end def test_pattern @report.pattern end def test_extrez_query @report.entrez_query end def test_each_iteration end def test_each_hit end def test_hits end def test_statistics end def test_db_num @report.db_num end def test_db_len @report.db_len end def test_hsp_len @report.hsp_len end def test_eff_space @report.eff_space end def test_kappa @report.kappa end def test_lambda @report.lambda end def test_entropy @report.entropy end def test_message @report.message end end class TestBlastReportIteration < Test::Unit::TestCase def setup data = Bio::TestBlastData.data report = Bio::Blast::Report.new(data) @itr = report.iterations.first end def test_hits @itr.hits end def test_statistics @itr.statistics end def test_num @itr.num end def test_message @itr.message end end class TestBlastReportHit < Test::Unit::TestCase def setup data = Bio::TestBlastFormat7XMLParserData.output report = Bio::Blast::Report.new(data) @hit = report.hits.first end def test_hsps @hit.hsps end def test_query_id @hit.query_id end def test_query_def @hit.query_def end def test_query_len @hit.query_len end def test_num @hit.num end def test_hit_id @hit.hit_id end def test_len @hit.len end def test_target_len @hit.target_len end def test_definition @hit.definition end def test_taeget_def @hit.target_def end def test_accession @hit.accession end def test_target_id @hit.target_id end def test_evalue @hit.evalue end def test_bit_score @hit.bit_score end def test_identity @hit.identity end def test_overlap @hit.overlap end def test_query_seq @hit.query_seq end def test_target_seq @hit.target_seq end def test_midline @hit.midline end def test_query_start @hit.query_start end def test_query_end @hit.query_end end def test_target_start @hit.target_start end def test_target_end @hit.target_end end def test_lap_at @hit.lap_at end end class TestBlastReportHsp < Test::Unit::TestCase def setup data = Bio::TestBlastFormat7XMLParserData.output report = Bio::Blast::Report.new(data) @hsp = report.hits.first.hsps.first end def test_num assert_equal(@hsp.num, 1) end def test_hit_score @hsp.bit_score end def test_score @hsp.score end def test_evalue @hsp.evalue end def test_identity @hsp.identity end def test_gaps @hsp.gaps end def test_positive @hsp.positive end def test_align_len @hsp.align_len end def test_density @hsp.density end def test_query_frame @hsp.query_frame end def test_query_from @hsp.query_from end def test_query_to @hsp.query_to end def test_hit_frame @hsp.hit_frame end def test_hit_from @hsp.hit_from end def test_hit_to @hsp.hit_to end def test_pattern_from @hsp.pattern_from end def test_pattern_to @hsp.pattern_to end def test_qseq @hsp.qseq end def test_midline @hsp.midline end def test_hseq @hsp.hseq end def test_percent_identity @hsp.percent_identity end def test_mismatch_count @hsp.mismatch_count end end end From nakao at pub.open-bio.org Thu Oct 27 22:44:29 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 22:42:52 2005 Subject: [BioRuby-cvs] bioruby/test/data/prosite prosite.dat,NONE,1.1 Message-ID: <200510280244.j9S2iTVL011149@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/prosite In directory pub.open-bio.org:/tmp/cvs-serv11139/test/data/prosite Added Files: prosite.dat Log Message: * Initial import test data for Bio::PROSITE. --- NEW FILE: prosite.dat --- ID G_PROTEIN_RECEP_F1_1; PATTERN. AC PS00237; DT APR-1990 (CREATED); NOV-1997 (DATA UPDATE); JUL-1998 (INFO UPDATE). DE G-protein coupled receptors family 1 signature. PA [GSTALIVMFYWC]-[GSTANCPDE]-{EDPKRH}-x(2)-[LIVMNQGA]-x(2)-[LIVMFT]- PA [GSTANC]-[LIVMFYWSTAC]-[DENH]-R-[FYWCSH]-x(2)-[LIVM]. NR /RELEASE=40.7,103373; NR /TOTAL=1121(1121); /POSITIVE=1057(1057); /UNKNOWN=0(0); /FALSE_POS=64(64); NR /FALSE_NEG=112; /PARTIAL=48; CC /TAXO-RANGE=??E?V; /MAX-REPEAT=1; DR O42385, 5H1A_FUGRU, T; P08908, 5H1A_HUMAN, T; Q64264, 5H1A_MOUSE, T; DR P19327, 5H1A_RAT , T; O08892, 5H1B_CAVPO, T; P46636, 5H1B_CRIGR, T; DR P35404, 5H1B_DIDMA, T; O42384, 5H1B_FUGRU, T; P28222, 5H1B_HUMAN, T; DR P28334, 5H1B_MOUSE, T; P49144, 5H1B_RABIT, T; P28564, 5H1B_RAT , T; DR P56496, 5H1B_SPAEH, T; P11614, 5H1D_CANFA, T; Q60484, 5H1D_CAVPO, T; DR P79748, 5H1D_FUGRU, T; P28221, 5H1D_HUMAN, T; Q61224, 5H1D_MOUSE, T; DR P79400, 5H1D_PIG , T; P49145, 5H1D_RABIT, T; P28565, 5H1D_RAT , T; DR P28566, 5H1E_HUMAN, T; Q29003, 5H1E_PIG , T; O08890, 5H1F_CAVPO, T; DR P30939, 5H1F_HUMAN, T; Q02284, 5H1F_MOUSE, T; P30940, 5H1F_RAT , T; [...2194 lines suppressed...] DR Q9R024, OPSG_CAVPO, T; P28683, OPSG_CHICK, T; P35358, OPSG_GECGE, T; DR P04001, OPSG_HUMAN, T; O35599, OPSG_MOUSE, T; P87366, OPSG_ORYLA, T; DR O18910, OPSG_RABIT, T; O35476, OPSG_RAT , T; O35478, OPSG_SCICA, T; DR P22331, OPSH_ASTFA, T; P32312, OPSH_CARAU, T; P51474, OPSI_ASTFA, T; DR P34989, OPSL_CALJA, T; O13018, OPSO_SALSA, T; P51475, OPSP_CHICK, T; DR P51476, OPSP_COLLI, T; O42266, OPSP_ICTPU, T; O42490, OPSP_PETMA, T; DR P41592, OPSR_ANOCA, T; P22332, OPSR_ASTFA, T; Q95170, OPSR_CAPHI, T; DR P32313, OPSR_CARAU, T; P22329, OPSR_CHICK, T; O18913, OPSR_FELCA, T; DR P04000, OPSR_HUMAN, T; P87367, OPSR_ORYLA, T; O12948, OPSR_XENLA, T; DR P35359, OPSU_BRARE, T; Q90309, OPSU_CARAU, T; O61303, OPSV_APIME, T; DR P28684, OPSV_CHICK, T; P87368, OPSV_ORYLA, T; P51473, OPSV_XENLA, T; DR O14718, OPSX_HUMAN, T; O35214, OPSX_MOUSE, T; P23820, REIS_TODPA, T; DR P47803, RGR_BOVIN , T; P47804, RGR_HUMAN , T; DR P17645, OPS3_DROVI, P; O18911, OPSG_ODOVI, P; O18914, OPSR_CANFA, P; DR O18912, OPSR_HORSE, P; DR Q9Z2B3, RGR_MOUSE , N; DR Q9CL24, OADB_PASMU, F; Q99NF8, RP17_MOUSE, F; 3D 1BOJ; 1BOK; 1F88; DO PDOC00211; // From nakao at pub.open-bio.org Thu Oct 27 22:46:56 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Thu Oct 27 22:45:19 2005 Subject: [BioRuby-cvs] bioruby/test/data/refseq nm_126355.entret,NONE,1.1 Message-ID: <200510280246.j9S2kuVL011167@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/refseq In directory pub.open-bio.org:/tmp/cvs-serv11157/test/data/refseq Added Files: nm_126355.entret Log Message: * Initial import test data for Bio::Refseq. --- NEW FILE: nm_126355.entret --- LOCUS NM_126355 615 bp mRNA linear PLN 25-JAN-2005 DEFINITION Arabidopsis thaliana Toll-Interleukin-Resistance (TIR) domain-containing protein (At2g03030) mRNA, complete cds. ACCESSION NM_126355 VERSION NM_126355.1 GI:18395472 KEYWORDS . SOURCE Arabidopsis thaliana (thale cress) ORGANISM Arabidopsis thaliana Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids; eurosids II; Brassicales; Brassicaceae; Arabidopsis. COMMENT PROVISIONAL REFSEQ: This record has not yet been subject to final NCBI review. This record is derived from an annotated genomic sequence (NC_003071). The reference sequence was derived from mrna.At2g03030.1. FEATURES Location/Qualifiers source 1..615 /organism="Arabidopsis thaliana" /mol_type="mRNA" /db_xref="taxon:3702" /chromosome="2" /map="unknown" /clone="CHR2v01212004" /ecotype="Columbia" gene 1..615 /locus_tag="At2g03030" /note="synonym: T17M13.20; Toll-Interleukin-Resistance (TIR) domain-containing protein" /db_xref="GeneID:814832" CDS 1..615 /locus_tag="At2g03030" /note="domain signature TIR exists, suggestive of a disease resistance protein; go_function: defense/immunity protein activity [goid 0003793]; go_process: defense response signaling pathway, resistance-gene dependent [goid 0009870]" /codon_start=1 /product="Toll-Interleukin-Resistance (TIR) domain-containing protein" /protein_id="NP_178403.1" /db_xref="GI:15227520" /db_xref="GeneID:814832" /translation="MTFFSPTQVFLNYRGEQLRRSFVSHLIDAFERNEINFFVDKYEQ RGKDLKNLFLRIQESKIALAIFSTRYTESSWCLDELVKIKKLADKKKLHVIPIFYKVK VEDVRKQTGEFGDNFWTLAKVSSGDQIKKWKEALECIPNKMGLSLGDKSSEADFIKEV VKAVQCVVATIGLEEEEENHFGKKKRKDCKCELPDLKKSRTKKL" misc_feature 22..423 /locus_tag="At2g03030" /note="TIR; Region: Toll - interleukin 1 - resistance" /db_xref="CDD:22729" ORIGIN 1 atgacattct tctctcccac tcaggtgttt ttgaactaca ggggagaaca actgcgtcgc 61 agcttcgtga gccacctcat tgatgccttt gaaaggaatg agatcaactt cttcgtagac 121 aaatacgaac agagaggcaa agacctcaaa aatctctttc ttaggatcca agagtcgaag 181 atcgcgcttg ccatcttctc aaccagatac acggagtcaa gctggtgttt ggatgagttg 241 gtgaagataa agaaacttgc tgataaaaaa aaactccatg tcattccaat tttctacaag 301 gtgaaggtag aagacgttcg aaaacagaca ggtgagtttg gtgacaactt ctggacgctg 361 gcaaaggttt caagtggtga tcagatcaag aaatggaaag aagccttgga atgtatcccc 421 aacaagatgg gtttgtcgtt gggagacaag agttctgaag cagatttcat caaggaagtt 481 gttaaggcgg ttcagtgtgt tgtagcaacg attggacttg aggaagaaga agagaatcat 541 tttgggaaaa agaagagaaa ggattgcaaa tgtgagcttc ctgatttgaa gaaaagcaga 601 accaaaaagt tgtga // From nakao at pub.open-bio.org Mon Oct 31 23:31:50 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:52:24 2005 Subject: [BioRuby-cvs] bioruby/doc Tutorial.rd,1.8,1.9 Message-ID: <200511010431.jA14VoVL006389@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/doc In directory pub.open-bio.org:/tmp/cvs-serv6369/doc Modified Files: Tutorial.rd Log Message: * Fixed rd format. Index: Tutorial.rd =================================================================== RCS file: /home/repository/bioruby/bioruby/doc/Tutorial.rd,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** Tutorial.rd 27 Oct 2005 11:55:58 -0000 1.8 --- Tutorial.rd 1 Nov 2005 04:31:48 -0000 1.9 *************** *** 232,236 **** require 'bio' ! # Read all lines from STDIN split by the GenBank delimiter while entry = gets(Bio::GenBank::DELIMITER) gb = Bio::GenBank.new(entry) # creates GenBank object --- 232,236 ---- require 'bio' ! # Read all lines from STDIN split by the GenBank delimiter while entry = gets(Bio::GenBank::DELIMITER) gb = Bio::GenBank.new(entry) # creates GenBank object *************** *** 246,252 **** #!/usr/bin/env ruby ! require 'bio' ! ff = Bio::FlatFile.new(Bio::GenBank, ARGF) ff.each_entry do |gb| --- 246,252 ---- #!/usr/bin/env ruby ! require 'bio' ! ff = Bio::FlatFile.new(Bio::GenBank, ARGF) ff.each_entry do |gb| *************** *** 301,305 **** puts "# #{gb.accession} - #{gb.organism}" ! # iterates over each element in 'features' gb.features.each do |feature| position = feature.position --- 301,305 ---- puts "# #{gb.accession} - #{gb.organism}" ! # iterates over each element in 'features' gb.features.each do |feature| position = feature.position *************** *** 477,481 **** # Creates FASTA factory object ("ssearch" instead of "fasta34" can also work) factory = Bio::Fasta.local('fasta34', ARGV.pop) ! (EDITOR's NOTE: not consistent pop command) # Reads FASTA-formatted files (TRANSLATOR'S NOTE: something wrong in Japanese text) --- 477,481 ---- # Creates FASTA factory object ("ssearch" instead of "fasta34" can also work) factory = Bio::Fasta.local('fasta34', ARGV.pop) ! (EDITOR's NOTE: not consistent pop command) # Reads FASTA-formatted files (TRANSLATOR'S NOTE: something wrong in Japanese text) *************** *** 806,809 **** --- 806,810 ---- E-Utils|URL:http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html>)) for more details. + From ngoto at pub.open-bio.org Mon Oct 31 21:56:13 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:52:31 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.37,1.38 Message-ID: <200511010256.jA12uDVL006121@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory pub.open-bio.org:/tmp/cvs-serv6111 Modified Files: flatfile.rb Log Message: Added autodetection of Bio::HMMER::Report. Changed document format from RD to RDoc. Index: flatfile.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v retrieving revision 1.37 retrieving revision 1.38 diff -C2 -d -r1.37 -r1.38 *** flatfile.rb 26 Sep 2005 13:00:08 -0000 1.37 --- flatfile.rb 1 Nov 2005 02:56:11 -0000 1.38 *************** *** 1,7 **** # ! # bio/io/flatfile.rb - flatfile access wrapper class # ! # Copyright (C) 2001, 2002 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/io/flatfile.rb - flatfile access wrapper class # ! # Copyright:: Copyright (C) 2001, 2002 GOTO Naohisa ! # License:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,30 **** # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id$ # ! module Bio class FlatFile include Enumerable def self.open(dbclass, file, *arg) # 3rd and 4th arg: mode, perm (passed to File.open) --- 19,74 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # Bio::FlatFile is a helper and wrapper class to read a biological data file. + # It acts like a IO object. + # It can automatically detect data format, and users do not need to tell + # the class what the data is. + # ! module Bio #:nodoc: + # Bio::FlatFile is a helper and wrapper class to read a biological data file. + # It acts like a IO object. + # It can automatically detect data format, and users do not need to tell + # the class what the data is. class FlatFile include Enumerable + # Creates a new Bio::FlatFile object to read a file or a stream + # which contains +dbclass+ data. + # + # +dbclass+ shoud be a class (or module) or nil. + # e.g. Bio::GenBank, Bio::FastaFormat. + # + # If +file+ is a filename (which doesn't have gets method), + # the method opens a local file named +file+ + # with 'File.open(filename, mode, perm)'. + # + # When nil is given to dbclass, trying to determine database class + # (file format) automatically. If fails to determine, dbclass is + # set to nil and FlatFile#next_entry works same as IO#gets when + # raw = true. It is recommended to set dbclass using + # FlatFile#dbclass= method if fails to determine automatically. + # + # * Example 1 + # Bio::FlatFile.open(Bio::GenBank, "genbank/gbest40.seq") + # * Example 2 + # Bio::FlatFile.open(nil, "embl/est_hum17.dat") + # * Example 3 + # Bio::FlatFile.open(Bio::GenBank, $stdin) + # + # If it is called with block, the block will be executed with + # a newly opened Bio::FlatFile instance object. If filename + # is given, the file is automatically closed when leaving the block. + # + # * Example 4 + # Bio::FlatFile.open(nil, 'test4.fst') do |ff| + # ff.each { |e| print e.definition, "\n" } + # end + # def self.open(dbclass, file, *arg) # 3rd and 4th arg: mode, perm (passed to File.open) *************** *** 53,60 **** --- 97,115 ---- end + # Same as Bio::FlatFile.open(nil, filename_or_stream, mode, perm, options). + # + # * Example 1 + # Bio::FlatFile.auto(ARGF) + # * Example 2 + # Bio::FlatFile.auto("embl/est_hum17.dat") + # * Example 3 + # Bio::FlatFile.auto(IO.popen("gzip -dc nc1101.flat.gz")) + # def self.auto(*arg, &block) self.open(nil, *arg, &block) end + # Same as FlatFile.auto(filename_or_stream, *arg).to_a + # (It might be OBSOLETED in the future.) def self.to_a(*arg) self.auto(*arg) do |ff| *************** *** 64,67 **** --- 119,140 ---- end + # Same as FlatFile.open, except that 'stream' should be a opened + # stream object (IO, File, ..., who have the 'gets' method). + # + # * Example 1 + # Bio::FlatFile.new(Bio::GenBank, ARGF) + # * Example 2 + # Bio::FlatFile.new(Bio::GenBank, IO.popen("gzip -dc nc1101.flat.gz")) + # + # +options+ should be a hash (or nil). It will be OBSOLETED!! + # Available options are below: + # [:raw] if true, "raw mode" (same as #raw=true). + # default: false (not "raw mode"). + # + # * Example 3 + # Bio::FlatFile.new(nil, $stdin, :raw=>true) + # * Example 3 in old style (deprecated) + # Bio::FlatFile.new(nil, $stdin, true) + # def initialize(dbclass, stream, options = nil) # 2nd arg: IO object *************** *** 83,88 **** --- 156,164 ---- end end + + # IO object in the flatfile object. attr_reader :io + # Get next entry. def next_entry @entry_raw = gets(@rs) *************** *** 104,109 **** --- 180,195 ---- end end + + # Returns the last raw entry as a string. attr_reader :entry_raw + # Iterates over each entry in the flatfile. + # + # * Example + # include Bio + # ff = FlatFile.open(GenBank, "genbank/gbhtg14.seq") + # ff.each_entry do |x| + # puts x.definition + # end def each_entry while e = self.next_entry *************** *** 113,116 **** --- 199,204 ---- alias each each_entry + # Resets file pointer to the start of the flatfile. + # (similar to IO#rewind) def rewind r = @io.rewind *************** *** 119,130 **** --- 207,232 ---- end + # Closes input stream. + # (similar to IO#close) def close @io.close end + # Returns current position of input stream. + # If the input stream is not a normal file, + # the result is not guaranteed. + # It is similar to IO#pos. + # Note that it will not be equal to io.pos, + # because FlatFile#autodetect may pre-read some lines. def pos @io.pos - @prefetch.size end + # Sets position of input stream. + # If the input stream is not a normal file, + # the result is not guaranteed. + # It is similar to IO#pos=. + # Note that it will not be equal to io.pos=, + # because FlatFile#autodetect may pre-read some lines. def pos=(p) r = (@io.pos = p) *************** *** 133,136 **** --- 235,242 ---- end + # Returns true if input stream is end-of-file. + # Otherwise, returns false. + # (Similar to IO#eof?, but may not be equal to io.eof?, + # because FlatFile#autodetect may pre-read some lines.) def eof? if @prefetch.size > 0 *************** *** 141,144 **** --- 247,252 ---- end + # Similar to IO#gets. + # Internal use only. Users shold not call it directly. def gets(io_rs = $/) if @prefetch.size > 0 *************** *** 176,179 **** --- 284,289 ---- end + # Unread read data. + # Internal use only. Users must not call it. def ungets(str) @prefetch = str + @prefetch *************** *** 181,184 **** --- 291,296 ---- end + # Similar to IO#getc. + # Internal use only. Users should not call it directly. def getc if @prefetch.size > 0 then *************** *** 191,194 **** --- 303,308 ---- end + # Similar to IO#ungetc. + # Internal use only. Users should not call it directly. def ungetc(c) @prefetch = sprintf("%c", c) + @prefetch *************** *** 196,204 **** --- 310,323 ---- end + # If true is given, the next_entry method returns + # a entry as a text, whereas if false, returns as a parsed object. def raw=(bool) @raw = (bool ? true : false) end + + # If true, raw mode. attr_reader :raw + # Sets database class. Plese use only if autodetect fails. def dbclass=(k) if k then *************** *** 210,216 **** end end attr_reader :dbclass ! # format autodetection def autodetect(lines = 31) r = nil --- 329,343 ---- end end + + # Returns database class which is automatically detected or + # given in FlatFile#initialize. attr_reader :dbclass ! # Performs determination of database class (file format). ! # Pre-reads +lines+ lines for format determination (default 31 lines). ! # If fails, returns nil or false. Otherwise, returns database class. ! # ! # The method can be called anytime if you want (but not so recommended). ! # It may be useful if input file is a mixture of muitiple format data. def autodetect(lines = 31) r = nil *************** *** 231,234 **** --- 358,363 ---- end + # Detects database class (== file format) of given file. + # If fails to determine, returns nil. def self.autodetect_file(filename) ff = self.open(nil, filename) *************** *** 238,241 **** --- 367,374 ---- end + # Detects database class (== file format) of given input stream. + # If fails to determine, returns nil. + # Caution: the method reads some data from the input stream, + # and the data will be lost. def self.autodetect_stream(io) ff = self.new(nil, io) *************** *** 244,247 **** --- 377,382 ---- end + # Detects database class (== file format) of given string. + # If fails to determine, returns false or nil. def self.autodetect(text) require 'bio' *************** *** 325,328 **** --- 460,466 ---- Bio::Spidey::Report + when /^HMMER +\d+\./ + Bio::HMMER::Report + when /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ Bio::Sim4::Report *************** *** 355,526 **** end end - - - =begin - - = Bio::FlatFile - - --- Bio::FlatFile.auto(filename_or_stream[, mode, perm, options]) - - Same as Bio::FlatFile.open(nil, filename_or_stream, mode, perm, options). - - * Example 1 - Bio::FlatFile.auto(ARGF) - * Example 2 - Bio::FlatFile.auto("embl/est_hum17.dat") - * Example 3 - Bio::FlatFile.auto(IO.popen("gzip -dc nc1101.flat.gz")) - - --- Bio::FlatFile.open(dbclass, filename_or_stream[, mode, perm, options]) - - Prepare to read a file or a stream 'filename_or_stream' - which contains 'dbclass'-style formatted data. - - 'dbclass' shoud be a class (or module) or nil. - e.g. Bio::GenBank, Bio::FastaFormat. - - If 'filename_or_stream' is a filename (which doesn't have gets method), - the method opens a local file named 'filename_or_stream' - with 'File.open(filename, mode, perm)'. - - When nil is given to dbclass, trying to determine database class - (file format) automatically. If fails to determine, dbclass is - set to nil and FlatFile#next_entry works same as IO#gets when - raw = true. It is recommended to set dbclass using - FlatFile#dbclass= method if fails to determine automatically. - - * Example 1 - Bio::FlatFile.open(Bio::GenBank, "genbank/gbest40.seq") - * Example 2 - Bio::FlatFile.open(nil, "embl/est_hum17.dat") - * Example 3 - Bio::FlatFile.open(Bio::GenBank, $stdin) - - If it is called with block, the block will be executed with - a newly opened Bio::FlatFile instance object. If filename - is given, the file is automatically closed when leaving the block. - - * Example 4 - Bio::FlatFile.open(nil, 'test4.fst') do |ff| - ff.each { |e| print e.definition, "\n" } - end - - --- Bio::FlatFile.new(dbclass, stream, options = nil) - - Same as FlatFile.open, except that 'stream' should be a opened - stream object (IO, File, ..., who have the 'gets' method). - - * Example 1 - Bio::FlatFile.new(Bio::GenBank, ARGF) - * Example 2 - Bio::FlatFile.new(Bio::GenBank, IO.popen("gzip -dc nc1101.flat.gz")) - - 'options' needs to be a hash (or nil). - Current options are below: - :raw --> if true, "raw mode" (same as #raw=true). - default: false (not "raw mode"). - - * Example 3 - Bio::FlatFile.new(nil, $stdin, :raw=>true) - * Example 3 in old style (deprecated) - Bio::FlatFile.new(nil, $stdin, true) - - --- Bio::FlatFile.to_a(filename_or_stream, *arg) - - Same as FlatFile.auto(filename_or_stream, *arg).to_a - - --- Bio::FlatFile#next_entry - - Get next entry. - - --- Bio::FlatFile#each_entry { |entry| ... } - --- Bio::FlatFile#each { |entry| ... } - - Iterates over each entry in the flatfile. - - * Example - include Bio - ff = FlatFile.open(GenBank, "genbank/gbhtg14.seq") - ff.each_entry do |x| - puts x.definition - end - - --- Bio::FlatFile#to_a - - Creates an array that contains all entries in the flatfile. - - --- Bio::FlatFile#rewind - - Resets file pointer to the start of the flatfile. - (Same as IO#rewind) - - --- Bio::FlatFile#close - - Closes input stream. - (Same as IO#close) - - --- Bio::FlatFile#raw= - - Assign true or false. If true, the next_entry method returns - a entry as a text, whereas if false, as a parsed object. - - --- Bio::FlatFile#raw - - Returns current state of the raw mode. - - --- Bio::FlatFile#entry_raw - - Returns the current entry as a text. - - --- Bio::FlatFile#io - - Returns input stream (IO object). - - --- Bio::FlatFile#pos - - Returns current position of input stream. - (Same as IO#pos, but may not be equal to io.pos, - because FlatFile#autodetect may pre-read some lines.) - - --- Bio::FlatFile#eof? - - Returns true if input stream is end-of-file. - Otherwise, returns false. - (Same as IO#eof?, but may not be equal to io.eof?, - because FlatFile#autodetect may pre-read some lines.) - - --- Bio::FlatFile#dbclass - - Returns database class given in FlatFile#initialize - (FlatFile.new or FlatFile.open). - - --- Bio::FlatFile#dbclass=(klass) - - Sets database class. (Plese use only if autodetect fails.) - - --- Bio::FlatFile#autodetect([lines]) - - Performs determination of database class (file format). - Pre-reads 'lines' lines for format determination (default 31 lines). - If fails, returns nil or false. Otherwise, returns database class. - It may be useful if input file is a mixture of muitiple format data. - - --- Bio::FlatFile.autodetect(str) - - Determines database class (== file format) of given string. - If fails to determine, returns false or nil. - - --- Bio::FlatFile.autodetect_file(filename) - - Determines database class (== file format) of given file. - If fails to determine, returns nil. - - --- Bio::FlatFile.autodetect_stream(io) - - Determines database class (== file format) of given input stream. - If fails to determine, returns nil. - Caution: the method reads some data from the input stream, - and the data will be lost. - - =end --- 493,495 ---- From ngoto at pub.open-bio.org Mon Oct 31 22:15:36 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:52:32 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.38,1.39 Message-ID: <200511010315.jA13FaVL006175@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory pub.open-bio.org:/tmp/cvs-serv6163 Modified Files: flatfile.rb Log Message: Fixed typo and possibly unsuitable descriptions. Index: flatfile.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v retrieving revision 1.38 retrieving revision 1.39 diff -C2 -d -r1.38 -r1.39 *** flatfile.rb 1 Nov 2005 02:56:11 -0000 1.38 --- flatfile.rb 1 Nov 2005 03:15:33 -0000 1.39 *************** *** 42,46 **** # which contains +dbclass+ data. # ! # +dbclass+ shoud be a class (or module) or nil. # e.g. Bio::GenBank, Bio::FastaFormat. # --- 42,46 ---- # which contains +dbclass+ data. # ! # +dbclass+ should be a class (or module) or nil. # e.g. Bio::GenBank, Bio::FastaFormat. # *************** *** 223,226 **** --- 223,227 ---- end + # (Not recommended to use it.) # Sets position of input stream. # If the input stream is not a normal file, *************** *** 248,252 **** # Similar to IO#gets. ! # Internal use only. Users shold not call it directly. def gets(io_rs = $/) if @prefetch.size > 0 --- 249,253 ---- # Similar to IO#gets. ! # Internal use only. Users should not call it directly. def gets(io_rs = $/) if @prefetch.size > 0 *************** *** 304,308 **** # Similar to IO#ungetc. ! # Internal use only. Users should not call it directly. def ungetc(c) @prefetch = sprintf("%c", c) + @prefetch --- 305,309 ---- # Similar to IO#ungetc. ! # Internal use only. Users should not call it. def ungetc(c) @prefetch = sprintf("%c", c) + @prefetch *************** *** 338,343 **** # If fails, returns nil or false. Otherwise, returns database class. # ! # The method can be called anytime if you want (but not so recommended). ! # It may be useful if input file is a mixture of muitiple format data. def autodetect(lines = 31) r = nil --- 339,344 ---- # If fails, returns nil or false. Otherwise, returns database class. # ! # The method can be called anytime if you want (but not recommended). ! # This might be useful if input file is a mixture of muitiple format data. def autodetect(lines = 31) r = nil From nakao at pub.open-bio.org Mon Oct 31 21:16:08 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:52:37 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db/embl common.rb, 1.6, 1.7 embl.rb, 1.23, 1.24 sptr.rb, 1.27, 1.28 Message-ID: <200511010216.jA12G8VL005966@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/embl In directory pub.open-bio.org:/tmp/cvs-serv5954/lib/bio/db/embl Modified Files: common.rb embl.rb sptr.rb Log Message: * Removed Author:: description. Index: sptr.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/sptr.rb,v retrieving revision 1.27 retrieving revision 1.28 diff -C2 -d -r1.27 -r1.28 *** sptr.rb 27 Oct 2005 09:30:42 -0000 1.27 --- sptr.rb 1 Nov 2005 02:16:06 -0000 1.28 *************** *** 2,7 **** # = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class # ! # Author:: Mitsuteru C. Nakao ! # Copyright:: Copyright (C) 2001-2005 BioRuby Project # License:: LGPL # --- 2,6 ---- # = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class # ! # Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao # License:: LGPL # Index: embl.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/embl.rb,v retrieving revision 1.23 retrieving revision 1.24 diff -C2 -d -r1.23 -r1.24 *** embl.rb 27 Oct 2005 09:34:49 -0000 1.23 --- embl.rb 1 Nov 2005 02:16:06 -0000 1.24 *************** *** 3,8 **** # # ! # Author:: Mitsuteru C. Nakao ! # Copyright:: Copyright (C) 2001-2005 BioRuby Project # License:: LGPL # --- 3,7 ---- # # ! # Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao # License:: LGPL # Index: common.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/common.rb,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** common.rb 27 Oct 2005 09:36:07 -0000 1.6 --- common.rb 1 Nov 2005 02:16:06 -0000 1.7 *************** *** 2,7 **** # = bio/db/embl.rb - Common methods for EMBL style database classes # ! # Author:: Mitsuteru C. Nakao ! # Copyright:: Copyright (C) 2001-2005 BioRuby Project # License:: LGPL # --- 2,6 ---- # = bio/db/embl.rb - Common methods for EMBL style database classes # ! # Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao # License:: LGPL # From ngoto at pub.open-bio.org Mon Oct 31 18:58:22 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:52:38 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db nbrf.rb,1.4,1.5 Message-ID: <200510312358.j9VNwMVL005664@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory pub.open-bio.org:/tmp/cvs-serv5654 Modified Files: nbrf.rb Log Message: changed document from RD to RDoc. Index: nbrf.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/nbrf.rb,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** nbrf.rb 26 Sep 2005 13:00:06 -0000 1.4 --- nbrf.rb 31 Oct 2005 23:58:19 -0000 1.5 *************** *** 1,8 **** # ! # bio/db/nbrf.rb - NBRF/PIR format sequence data class # ! # Copyright (C) 2001-2003 GOTO Naohisa ! # Copyright (C) 2001-2002 KATAYAMA Toshiaki # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,10 ---- # ! # = bio/db/nbrf.rb - NBRF/PIR format sequence data class # ! # Copyright:: Copyright (C) 2001-2003 GOTO Naohisa ! # Copyright (C) 2001-2002 KATAYAMA Toshiaki ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 18,34 **** # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id$ # require 'bio/db' require 'bio/sequence' ! module Bio class NBRF < DB # based on Bio::FastaFormat class DELIMITER = RS = "*\n" def initialize(str) str = str.sub(/\A[\r\n]+/, '') # remove first void lines --- 20,53 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # Sequence data class for NBRF/PIR flatfile format. + # + # = References + # + # * http://pir.georgetown.edu/pirwww/otherinfo/doc/techbulletin.html + # * http://www.sander.embl-ebi.ac.uk/Services/webin/help/webin-align/align_format_help.html#pir + # * http://www.cmbi.kun.nl/bioinf/tools/crab_pir.html + # require 'bio/db' require 'bio/sequence' ! # Sequence data class for NBRF/PIR flatfile format. ! module Bio #:nodoc: class NBRF < DB + #-- # based on Bio::FastaFormat class + #++ + # Delimiter of each entry. Bio::FlatFile uses it. DELIMITER = RS = "*\n" + # Creates a new NBRF object. It stores the comment and sequence + # information from one entry of the NBRF/PIR format string. + # If the argument contains more than one + # entry, only the first entry is used. def initialize(str) str = str.sub(/\A[\r\n]+/, '') # remove first void lines *************** *** 47,55 **** end end - attr_accessor :seq_type, :entry_id, :definition, :data - attr_reader :entry_overrun alias accession entry_id def entry @entry = ">#{@seq_type or 'XX'};#{@entry_id}\n#{definition}\n#{@data}*\n" --- 66,92 ---- end end + # Returns sequence type described in the entry. + # P1 (protein), F1 (protein fragment) + # DL (DNA linear), DC (DNA circular) + # RL (DNA linear), RC (DNA circular) + # N3 (tRNA), N1 (other functional RNA) + attr_accessor :seq_type + + # Returns ID described in the entry. + attr_accessor :entry_id alias accession entry_id + # Returns the description line of the NBRF/PIR formatted data. + attr_accessor :definition + + # sequence data of the entry (???) + attr_accessor :data + + # piece of next entry. Bio::FlatFile uses it. + attr_reader :entry_overrun + + + # Returns the stored one entry as a NBRF/PIR format. (same as to_s) def entry @entry = ">#{@seq_type or 'XX'};#{@entry_id}\n#{definition}\n#{@data}*\n" *************** *** 57,60 **** --- 94,99 ---- alias to_s entry + # Returns Bio::Sequence::AA, Bio::Sequence::NA, or Bio::Sequence, + # depending on sequence type. def seq_class case @seq_type *************** *** 70,73 **** --- 109,115 ---- end + # Returns sequence data. + # Returns Bio::Sequence::NA, Bio::Sequence::AA or Bio::Sequence, + # according to the sequence type. def seq unless defined?(@seq) *************** *** 77,84 **** --- 119,130 ---- end + # Returns sequence length. def length seq.length end + # Returens the nucleic acid sequence. + # If you call naseq for protein sequence, RuntimeError will be occurred. + # Use the method if you know whether the sequence is NA or AA. def naseq if seq.is_a?(Bio::Sequence::AA) then *************** *** 91,98 **** --- 137,151 ---- end + # Returens the length of sequence. + # If you call nalen for protein sequence, RuntimeError will be occurred. + # Use the method if you know whether the sequence is NA or AA. def nalen naseq.length end + # Returens the protein (amino acids) sequence. + # If you call aaseq for nucleic acids sequence, + # RuntimeError will be occurred. + # Use the method if you know whether the sequence is NA or AA. def aaseq if seq.is_a?(Bio::Sequence::NA) then *************** *** 105,113 **** --- 158,175 ---- end + # Returens the length of protein (amino acids) sequence. + # If you call aaseq for nucleic acids sequence, + # RuntimeError will be occurred. + # Use the method if you know whether the sequence is NA or AA. def aalen aaseq.length end + #-- #class method + #++ + + # Creates a NBRF/PIR formatted text. + # Parameters can be omitted. def self.to_nbrf(hash) seq_type = hash[:seq_type] *************** *** 134,218 **** end #class NBRF end #module Bio - - =begin - - = Bio::NBRF - - This is a sequence data class for NBRF/PIR flatfile format. - - http://pir.georgetown.edu/pirwww/otherinfo/doc/techbulletin.html - http://www.sander.embl-ebi.ac.uk/Services/webin/help/webin-align/align_format_help.html#pir - http://www.cmbi.kun.nl/bioinf/tools/crab_pir.html - - The precedent '>' can be omitted and the trailing '>' will be removed - automatically. - - --- Bio::NBRF.new(entry) - - Stores the comment and sequence information from one entry of the - NBRF/PIR format string. If the argument contains more than one - entry, only the first entry is used. - - --- Bio::NBRF#entry - - Returns the stored one entry as a NBRF/PIR format. (same as to_s) - - - --- Bio::NBRF#seq_type - - Returns sequence type described in the entry. - - * P1 (protein), F1 (protein fragment) - * DL (DNA linear), DC (DNA circular) - * RL (DNA linear), RC (DNA circular) - * N3 (tRNA), N1 (other functional RNA) - - --- Bio::NBRF#seq_class - - Returns Bio::Sequence::AA, Bio::Sequence::NA, or Bio::Sequence, - depending on sequence type. - - --- Bio::NBRF#entry_id - - Returns ID described in the entry. - - --- Bio::NBRF#accession - - Same as Bio::NBRF#entry_id. - - --- Bio::NBRF#definition - - Returns the description line of the NBRF/PIR formatted data. - - --- Bio::NBRF#seq - - Returns a joined sequence line as a String. - Returns Bio::Sequence::NA, Bio::Sequence::AA or Bio::Sequence, - according to the sequence type. - - --- Bio::NBRF#length - - Returns sequence length. - - --- Bio::NBRF#naseq - --- Bio::NBRF#nalen - --- Bio::NBRF#aaseq - --- Bio::NBRF#aalen - - If you know whether the sequence is NA or AA, use these methods. - 'naseq' and 'aaseq' methods returen the Bio::Sequence::NA or - Bio::Sequence::AA object respectively. 'nalen' and 'aalen' methods - return the length of them. - - If you call naseq for protein sequence, or aaseq for nucleic sequence, - a RuntimeError will be occurred. - - --- Bio::NBRF.to_nbrf(:seq_type=>'P1', :entry_id=>'XXX00000', - :definition=>'xxx protein', - :seq=>seq, :width=>70) - - Creates a NBRF/PIR formatted text. - Parameters can be omitted. - - =end --- 196,198 ---- From ngoto at pub.open-bio.org Mon Oct 31 15:03:43 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:52:43 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl sim4.rb,1.2,1.3 Message-ID: <200510312003.j9VK3hVL004579@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory pub.open-bio.org:/tmp/cvs-serv4569 Modified Files: sim4.rb Log Message: Changed document format from RD to RDoc. Added references in document. Index: sim4.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/sim4.rb,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** sim4.rb 9 Sep 2005 15:58:42 -0000 1.2 --- sim4.rb 31 Oct 2005 20:03:41 -0000 1.3 *************** *** 1,7 **** # ! # bio/appl/sim4.rb - sim4 wrapper class # ! # Copyright (C) 2004 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/sim4.rb - sim4 wrapper class # ! # Copyright:: Copyright (C) 2004 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,32 **** # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id$ # require 'open3' require 'tempfile' ! module Bio class Sim4 autoload :Report, 'bio/appl/sim4/report' def initialize(program = 'sim4', database = nil, option = []) @program = program --- 19,49 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # The sim4 execution wrapper class. + # + # == References + # + # * Florea, L., et al., A Computer program for aligning a cDNA sequence + # with a genomic DNA sequence, Genome Research, 8, 967--974, 1998. + # http://www.genome.org/cgi/content/abstract/8/9/967 + # require 'open3' require 'tempfile' ! module Bio #:nodoc: ! ! # The sim4 execution wrapper class. class Sim4 autoload :Report, 'bio/appl/sim4/report' + # Creates a new sim4 execution wrapper object. + # [+program+] Program name. Usually 'sim4' in UNIX. + # [+database+] Default file name of database('seq2'). + # [+option+] Options (array of strings). def initialize(program = 'sim4', database = nil, option = []) @program = program *************** *** 38,46 **** @log = nil end attr_accessor :database - attr_reader :program, :option - attr_reader :command, :log - attr_reader :output, :report def query(seq1) tf = Tempfile.open('sim4') --- 55,83 ---- @log = nil end + + # default file name of database('seq2') attr_accessor :database + # name of the program (usually 'sim4' in UNIX) + attr_reader :program + + # options + attr_reader :option + + # last command-line strings executed by the object + attr_reader :command + + # last messages of program reported to the STDERR + attr_reader :log + + # last result text (String) + attr_reader :output + + # last result. Returns a Bio::Sim4::Report object. + attr_reader :report + + # Executes the sim4 program. + # seq1 shall be a Bio::Sequence object. + # Returns a Bio::Sim4::Report object. def query(seq1) tf = Tempfile.open('sim4') *************** *** 52,55 **** --- 89,96 ---- end + # Executes the sim4 program. + # Perform mRNA-genome alignment between given sequences. + # seq1 and seq2 should be Bio::Sequence objects. + # Returns a Bio::Sim4::Report object. def query_pairwise(seq1, seq2) tf = Tempfile.open('sim4') *************** *** 65,68 **** --- 106,113 ---- end + # Executes the sim4 program. + # Perform mRNA-genome alignment between sequences in given files. + # filename1 and filename2 should be file name strings. + # If filename2 is not specified, using self.database. def exec_local(filename1, filename2 = nil) @command = [ @program, filename1, (filename2 or @database), *@option ] *************** *** 87,147 **** end #class Sim4 end #module Bio - - =begin - - = Bio::Sim4 - - Sim4 wrapper. - - --- Bio::Sim4.new(program = 'sim4', database = nil, option = []) - - Creates new wrapper. - program: program name (String) - database: default file name of database('seq2') (String) - option: options (Array of String) - - --- Bio::Sim4#database - --- Bio::Sim4#program - --- Bio::Sim4#option - - Access to the variables specified in initialize. - - --- Bio::Sim4#query(seq) - - Executes the program(sim4). - seq: Bio::Sequence object - Returns a Bio::Sim4::Report object. - - --- Bio::Sim4#query_pairwise(seq1, seq2) - - Executes the program(sim4). - Perform mRNA-genome alignment between given sequences. - seq1: Bio::Sequence object - seq2: Bio::Sequence object - Returns a Bio::Sim4::Report object. - - --- Bio::Sim4#exec(filename1, filename2 = nil) - - Executes the program(sim4). - filename1: file name (String) - filename2: file name (String) - If not specified, using self.database. - - --- Bio::Sim4#command - - Shows latest command-line executed by this class. - - --- Bio::Sim4#log - - Shows latest messages of program reported to stderr. - - --- Bio::Sim4#report - - Shows latest result (Bio::Sim4::Report object) - - --- Bio::Sim4#output - - Shows latest raw result. - - =end --- 132,134 ---- From ngoto at pub.open-bio.org Mon Oct 31 14:46:21 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:52:43 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl clustalw.rb,1.7,1.8 Message-ID: <200510311946.j9VJkLVL004536@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory pub.open-bio.org:/tmp/cvs-serv4526 Modified Files: clustalw.rb Log Message: Changed document format from RD to RDoc. Added references in document. Index: clustalw.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/clustalw.rb,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** clustalw.rb 9 Sep 2005 15:52:57 -0000 1.7 --- clustalw.rb 31 Oct 2005 19:46:19 -0000 1.8 *************** *** 1,7 **** # ! # bio/appl/clustalw.rb - CLUSTAL W wrapper class # ! # Copyright (C) 2003 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/clustalw.rb - CLUSTAL W wrapper class # ! # Copyright:: Copyright (C) 2003 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,23 **** --- 19,41 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # Bio::ClustalW is a CLUSTAL W execution wrapper class. + # Its object is also called an alignment factory. + # CLUSTAL W is a very popular software for multiple sequence alignment. + # + # == References + # + # * Thompson,J.D., Higgins,D.G. and Gibson,T.J.. + # CLUSTAL W: improving the sensitivity of progressive multiple sequence + # alignment through sequence weighting, position-specific gap penalties + # and weight matrix choice. Nucleic Acids Research, 22:4673-4680, 1994. + # http://nar.oxfordjournals.org/cgi/content/abstract/22/22/4673 + # * http://www.ebi.ac.uk/clustalw/ + # * ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/ + # + require 'tempfile' *************** *** 27,35 **** require 'bio/alignment' ! module Bio class ClustalW autoload :Report, 'bio/appl/clustalw/report' def initialize(program = 'clustalw', option = []) @program = program --- 45,58 ---- require 'bio/alignment' ! module Bio #:nodoc: ! ! # Bio::ClustalW is a CLUSTAL W execution wrapper class. ! # Its object is also called an alignment factory. ! # CLUSTAL W is a very popular software for multiple sequence alignment. class ClustalW autoload :Report, 'bio/appl/clustalw/report' + # Creates a new CLUSTAL W execution wrapper object (alignment factory). def initialize(program = 'clustalw', option = []) @program = program *************** *** 40,47 **** @log = nil end - attr_accessor :program, :option - attr_reader :command, :log - attr_reader :output, :report def query(seqs) if seqs then --- 63,92 ---- @log = nil end + # name of the program (usually 'clustalw' in UNIX) + attr_accessor :program + + # options + attr_accessor :option + + # Returns last command-line strings executed by this factory. + # Note that filenames described in the command-line may already + # be removed because they are temporary files. + # Returns an array. + attr_reader :command + + # Returns last messages of CLUSTAL W execution. + attr_reader :log + + # Returns last raw alignment result (String). + attr_reader :output + + # Returns last alignment result. + # Returns a Bio::ClustalW::Report object. + attr_reader :report + + # Executes the program(clustalw). + # If +seqs+ is not nil, perform alignment for seqs. + # If +seqs+ is nil, simply executes CLUSTAL W. def query(seqs) if seqs then *************** *** 52,57 **** end def query_align(seqs) - # seqs should be Bio::Alignment or Array of sequences or nil seqtype = nil unless seqs.is_a?(Bio::Alignment) --- 97,103 ---- end + # Performs alignment for +seqs+. + # +seqs+ should be Bio::Alignment or Array of sequences or nil. def query_align(seqs) seqtype = nil unless seqs.is_a?(Bio::Alignment) *************** *** 69,72 **** --- 115,120 ---- end + # Performs alignment for +str+. + # +str+ should be a string that can be recognized by CLUSTAL W. def query_string(str, *arg) begin *************** *** 81,84 **** --- 129,133 ---- end + # Performs alignment of sequences in the file named +path+. def query_by_filename(path, seqtype = nil) require 'bio/appl/clustalw/report' *************** *** 107,114 **** --- 156,168 ---- @report end + + # Returns last alignment guild-tree (file.dnd). attr_reader :output_dnd + # Returns last error messages (to stderr) of CLUSTAL W execution. attr_reader :errorlog + private + # Executes the program in the local machine. def exec_local(opt) @command = [ @program, *opt ] *************** *** 136,198 **** end #module Bio - =begin - - = Bio::ClustalW - - --- Bio::ClustalW.new(path_to_clustalw = 'clustalw', option = []) - - Creates new alignment factory. - - --- Bio::ClustalW#program - --- Bio::ClustalW#option - - Access to the variables specified in Bio::ClustalW.new. - - --- Bio::ClustalW#query(seqs) - - Executes the program(clustalw). - If 'seqs' is not nil, perform alignment for seqs. - If 'seqs' is nil, simply executes CLUSTAL W. - - --- Bio::ClustalW#query_align(seqs) - - Performs alignment for seqs. - - --- Bio::ClustalW#query_string(str) - - Performs alignment for str. - Str should be a string that can be recognized by CLUSTAL W. - - --- Bio::ClustalW#query_by_filename(filename) - - Performs alignment of sequences in the file named filename. - - --- Bio::ClustalW#command - - Shows latest command-line executed by this factory. - Note that filenames described in the command-line may already - be removed because they are temporary files. - Returns an array. - - --- Bio::ClustalW#log - - Shows latest messages of CLUSTAL W execution. - - --- Bio::ClustalW#report - - Shows latest alignment result (instance of Bio::ClustalW::Report) - performed by this factory. - - --- Bio::ClustalW#output - - Shows latest raw alignment result (String). - - --- Bio::ClustalW#output_dnd - - Shows latest alignment guild-tree (filename.dnd). - - --- Bio::ClustalW#errorlog - - Shows latest error messages (thourgh stderr) of CLUSTAL W execution. - - =end --- 190,191 ---- From ngoto at pub.open-bio.org Mon Oct 31 14:24:16 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:52:46 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/blast wublast.rb,1.2,1.3 Message-ID: <200510311924.j9VJOGVL004482@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/blast In directory pub.open-bio.org:/tmp/cvs-serv4472 Modified Files: wublast.rb Log Message: Added Rdoc documentation. Fixed bug in F0dbstat#parse_dbstat. Catch up changes in format0.rb. Index: wublast.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/blast/wublast.rb,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** wublast.rb 8 Sep 2005 01:22:08 -0000 1.2 --- wublast.rb 31 Oct 2005 19:24:14 -0000 1.3 *************** *** 1,7 **** # ! # bio/appl/blast/wublast.rb - WU-BLAST default output parser # ! # Copyright (C) 2003 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/blast/wublast.rb - WU-BLAST default output parser # ! # Copyright:: Copyright (C) 2003 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,32 **** # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id$ # require 'bio/appl/blast/format0' ! module Bio ! class Blast ! module WU class Report < Default::Report def parameters parse_parameters --- 19,52 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # WU-BLAST default output parser. + # + # The parser is still incomplete and may contain many bugs, + # because I didn't have WU-BLAST license. + # It was tested under web-based WU-BLAST results and + # obsolete version downloaded from http://blast.wustl.edu/ . + # + # = References + # * http://blast.wustl.edu/ + # * http://www.ebi.ac.uk/blast2/ + # require 'bio/appl/blast/format0' ! module Bio #:nodoc: ! class Blast #:nodoc: ! module WU #:nodoc: + # Bio::Blast::WU::Report parses WU-BLAST default output + # and stores information in the data. + # It may contain a Bio::Blast::WU::Report::Iteration object. + # Because it inherits Bio::Blast::Default::Report, + # please also refer Bio::Blast::Default::Report. class Report < Default::Report + # Returns parameters (???) def parameters parse_parameters *************** *** 34,37 **** --- 54,58 ---- end + # Returns parameter matrix (???) def parameter_matrix parse_parameters *************** *** 39,44 **** --- 60,67 ---- end + # Returns e-value threshold specified when BLAST was executed. def expect; parse_parameters; @parameters['E']; end + # Returns warning messages. def warnings unless defined?(@warnings) *************** *** 49,52 **** --- 72,76 ---- end + # Returns notice messages. def notice unless defined?(@notice) *************** *** 57,60 **** --- 81,85 ---- private + # Splits headers. def format0_split_headers(data) @f0header = data.shift *************** *** 80,87 **** --- 105,114 ---- end + # Splits search data. def format0_split_search(data) [ Iteration.new(data) ] end + # Splits statistics parameters. def format0_split_stat_params(data) @f0warnings = [] *************** *** 100,106 **** @f0dbstat = F0dbstat.new(@f0wu_stats) itr = @iterations[0] ! itr.f0dbstat = @f0dbstat if itr end def parse_parameters unless defined?(@parse_parameters) --- 127,135 ---- @f0dbstat = F0dbstat.new(@f0wu_stats) itr = @iterations[0] ! x = @f0dbstat ! itr.instance_eval { @f0dbstat = x } if itr end + # Splits parameters. def parse_parameters unless defined?(@parse_parameters) *************** *** 126,130 **** end ! class F0dbstat < Default::Report::F0dbstat def initialize(ary) @f0stat = ary --- 155,161 ---- end ! # Stores database statistics. ! # Internal use only. Users must not use the class. ! class F0dbstat < Default::Report::F0dbstat #:nodoc: def initialize(ary) @f0stat = ary *************** *** 132,145 **** end #undef :f0params #undef :matrix, :gap_open, :gap_extend, # :eff_space, :expect, :sc_match, :sc_mismatch, # :num_hits def parse_dbstat unless defined?(@parse_dbstat) ! @f0stat.each do |x| ! parse_colon_separated(@hash, x) ! end @database = @hash['Database'] @posted_date = @hash['Posted'] --- 163,177 ---- end + #-- #undef :f0params #undef :matrix, :gap_open, :gap_extend, # :eff_space, :expect, :sc_match, :sc_mismatch, # :num_hits + #++ + # Parses database statistics. def parse_dbstat unless defined?(@parse_dbstat) ! parse_colon_separated_params(@hash, @f0stat) @database = @hash['Database'] @posted_date = @hash['Posted'] *************** *** 157,167 **** end #class F0dbstat ! class Frame ! end #class FrameParams class Iteration < Default::Report::Iteration def initialize(data) @f0stat = [] ! @f0dbstat = nil @f0hitlist = [] @hits = [] --- 189,212 ---- end #class F0dbstat ! #-- ! #class Frame ! #end #class FrameParams ! #++ + # Iteration class for WU-BLAST report. + # Though WU-BLAST does not iterate like PSI-BLAST, + # Bio::Blast::WU::Report::Iteration aims to keep compatibility + # with Bio::Blast::Default::Report::* classes. + # It may contain some Bio::Blast::WU::Report::Hit objects. + # Because it inherits Bio::Blast::Default::Report::Iteration, + # please also refer Bio::Blast::Default::Report::Iteration. class Iteration < Default::Report::Iteration + # Creates a new Iteration object. + # It is designed to be called only internally from + # the Bio::Blast::WU::Report class. + # Users shall not use the method directly. def initialize(data) @f0stat = [] ! @f0dbstat = Default::Report::AlwaysNil.instance @f0hitlist = [] @hits = [] *************** *** 183,186 **** --- 228,232 ---- end + # Returns warning messages. def warnings @f0warnings *************** *** 188,191 **** --- 234,238 ---- private + # Parses hit list. def parse_hitlist unless defined?(@parse_hitlist) *************** *** 232,236 **** --- 279,292 ---- end #class Iteration + # Bio::Blast::WU::Report::Hit contains information about a hit. + # It may contain some Bio::Blast::WU::Report::HSP objects. + # + # Because it inherits Bio::Blast::Default::Report::Hit, + # please also refer Bio::Blast::Default::Report::Hit. class Hit < Default::Report::Hit + # Creates a new Hit object. + # It is designed to be called only internally from the + # Bio::Blast::WU::Report::Iteration class. + # Users should not call the method directly. def initialize(data) @f0hitname = data.shift *************** *** 250,266 **** end def score @score end ! attr_reader :pvalue, :n_number end #class Hit class HSP < Default::Report::HSP ! method_after_parse_score :pvalue, :p_sum_n end #class HSP end #class Report class Report_TBlast < Report DELIMITER = RS = "\nTBLAST" end #class Report_TBlast --- 306,340 ---- end + # Returns score. def score @score end ! # p-value ! attr_reader :pvalue ! # n-number (???) ! attr_reader :n_number end #class Hit + # Bio::Blast::WU::Report::HSP holds information about the hsp + # (high-scoring segment pair). + # + # Because it inherits Bio::Blast::Default::Report::HSP, + # please also refer Bio::Blast::Default::Report::HSP. class HSP < Default::Report::HSP ! # p-value ! attr_reader :pvalue if false #dummy ! method_after_parse_score :pvalue ! # p_sum_n (???) ! attr_reader :p_sum_n if false #dummy ! method_after_parse_score :p_sum_n end #class HSP end #class Report + # WU-BLAST default output parser for TBLAST. + # All methods are equal to Bio::Blast::WU::Report. + # Only DELIMITER (and RS) is different. class Report_TBlast < Report + # Delimter of each entry for TBLAST. Bio::FlatFile uses it. DELIMITER = RS = "\nTBLAST" end #class Report_TBlast *************** *** 478,501 **** ###################################################################### - =begin - - = Bio::Blast::WU::Report - - WU-BLAST default output parser. - It is still incomplete and may contain many bugs, - because I don't have WU-BLAST license. - It was tested under web-based WU-BLAST results and - obsolete version downloaded from (()). - - = Bio::Blast::WU::Report_TBlast - - WU-BLAST default output parser for TBLAST. - All methods are equal to Bio::Blast::WU::Report. - Only DELIMITER (and RS) is different. - - = References - - * (()) - * (()) - - =end --- 552,553 ---- From ngoto at pub.open-bio.org Mon Oct 31 14:22:29 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:52:49 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/blast format0.rb,1.14,1.15 Message-ID: <200510311922.j9VJMTVL004465@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/blast In directory pub.open-bio.org:/tmp/cvs-serv4445 Modified Files: format0.rb Log Message: In F0dbstat#parse_colon_separated_params, fixed parse error when WU-BLAST. Index: format0.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/blast/format0.rb,v retrieving revision 1.14 retrieving revision 1.15 diff -C2 -d -r1.14 -r1.15 *** format0.rb 31 Oct 2005 11:07:05 -0000 1.14 --- format0.rb 31 Oct 2005 19:22:27 -0000 1.15 *************** *** 343,347 **** @num_hits = sc[2].tr(',', '').to_i end ! if sc.skip(/([\-\,\.\'\(\)\w ]+)\: *(.+)/) then hash[sc[1]] = sc[2] else --- 343,347 ---- @num_hits = sc[2].tr(',', '').to_i end ! if sc.skip(/([\-\,\.\'\(\)\#\w ]+)\: *(.*)/) then hash[sc[1]] = sc[2] else From nakao at pub.open-bio.org Mon Oct 31 13:32:38 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:52:54 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/db go.rb,1.8,1.9 Message-ID: <200510311832.j9VIWcVL004289@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db In directory pub.open-bio.org:/tmp/cvs-serv4279/lib/bio/db Modified Files: go.rb Log Message: * Updated RDoc. Index: go.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/go.rb,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** go.rb 26 Sep 2005 13:00:06 -0000 1.8 --- go.rb 31 Oct 2005 18:32:36 -0000 1.9 *************** *** 1,6 **** # ! # bio/db/go.rb - Classes for Gene Ontology # ! # Copyright (C) 2003 Mitsuteru C. Nakao # # This library is free software; you can redistribute it and/or --- 1,16 ---- # ! # = bio/db/go.rb - Classes for Gene Ontology # ! # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao ! # License:: ! # ! # $Id$ ! # ! # == Gene Ontology ! # ! # == Example ! # ! # == References ! #-- # # This library is free software; you can redistribute it and/or *************** *** 18,22 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # --- 28,32 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! #++ # *************** *** 25,37 **** module Bio ! # Bio::GO class GO ! # Bio::GO::Ontology - Class for a DAG Edit format of Gene Ontology. class Ontology < Bio::Pathway # Bio::GO::Ontology.parse_ogids(line) # Parsing GOID line in the DAGEdit format ! # GO:ID[ ; GO:ID...] def self.parse_goids(line) goids = [] --- 35,57 ---- module Bio ! # = Bio::GO ! # Classes for Gene Ontology http://www.geneontology.org class GO ! # = Bio::GO::Ontology ! # ! # Container class for ontologies in the DAG Edit format. ! # ! # == Example ! # ! # c_data = File.open('component.oontology').read ! # go_c = Bio::GO::Ontology.new(c_data) ! # p go_c.bfs_shortest_path('0003673','0005632') class Ontology < Bio::Pathway # Bio::GO::Ontology.parse_ogids(line) + # # Parsing GOID line in the DAGEdit format ! # GO:ID[ ; GO:ID...] def self.parse_goids(line) goids = [] *************** *** 51,61 **** end ! attr_reader :header_lines attr_reader :id2term attr_reader :id2id # Bio::GO::Ontology.new(str) def initialize(str) @id2term = {} --- 71,86 ---- end ! # Returns a Hash instance of the header lines in ontology flatfile. attr_reader :header_lines + + # attr_reader :id2term + + # attr_reader :id2id # Bio::GO::Ontology.new(str) + # The DAG Edit format ontology data parser. def initialize(str) @id2term = {} *************** *** 67,71 **** ! # Bio::GO::Ontology.goid2term(goid) def goid2term(goid) term = id2term[goid] --- 92,96 ---- ! # Returns a GO_Term correspondig with the given GO_ID. def goid2term(goid) term = id2term[goid] *************** *** 138,142 **** ! # Bio::GO::Ontology#parse_goids(line) def parse_goids(line) Ontology.parse_goids(line) --- 163,168 ---- ! # Returns an ary of GO IDs by parsing an entry line in the DAG Edit ! # format. def parse_goids(line) Ontology.parse_goids(line) *************** *** 162,174 **** ! # Bio::GO::GeneAssociation # $CVSROOT/go/gene-associations/gene_association.* # class GeneAssociation # < Bio::DB ! DELIMITER = RS = "\n" ! # Bio::GO::GeneAssociation.parser(str) ! # gene_association.* file parser def self.parser(str) if block_given? --- 188,216 ---- ! # = Bio::GO::GeneAssociation # $CVSROOT/go/gene-associations/gene_association.* # + # Data parser for the gene_association go annotation. + # See also the file format http://www.geneontology.org/doc/GO.annotation.html#file + # + # == Example + # + # mgi_data = File.open('gene_association.mgi').read + # mgi = Bio::GO::GeneAssociation.parser(mgi_data) + # + # Bio::GO::GeneAssociation.parser(mgi_data) do |entry| + # p [entry.entry_id, entry.evidence, entry.goid] + # end + # class GeneAssociation # < Bio::DB ! # Delimiter ! DELIMITER = "\n" ! # Delimiter ! RS = DELIMITER ! ! # Retruns an Array of parsed gene_association flatfile. ! # Block is acceptable. def self.parser(str) if block_given? *************** *** 187,208 **** end attr_reader :db # -> aStr attr_reader :db_object_id # -> aStr attr_reader :db_object_symbol attr_reader :qualifier attr_reader :db_reference # -> [] attr_reader :evidence attr_reader :with # -> [] attr_reader :aspect attr_reader :db_object_name attr_reader :db_object_synonym # -> [] attr_reader :db_object_type attr_reader :taxon attr_reader :date attr_reader :assigned_by alias entry_id db_object_id ! # Bio::GO::GeneAssociation.new(entry) def initialize(entry) tmp = entry.chomp.split(/\t/) --- 229,278 ---- end + # Returns DB variable. attr_reader :db # -> aStr + + # Returns Db_Object_Id variable. Alias to entry_id. attr_reader :db_object_id # -> aStr + + # Returns Db_Object_Symbol variable. attr_reader :db_object_symbol + + # Returns Db_Object_Name variable. attr_reader :qualifier + + # Returns Db_Reference variable. attr_reader :db_reference # -> [] + + # Retruns Evidence code variable. attr_reader :evidence + + # Returns the entry is associated with this value. attr_reader :with # -> [] + + # Returns Aspect valiable. attr_reader :aspect + + # attr_reader :db_object_name + + # attr_reader :db_object_synonym # -> [] + + # Returns Db_Object_Type variable. attr_reader :db_object_type + + # Returns Taxon variable. attr_reader :taxon + + # Returns Date variable. attr_reader :date + + # attr_reader :assigned_by + alias entry_id db_object_id ! # Parsing an entry (in a line) in the gene_association flatfile. def initialize(entry) tmp = entry.chomp.split(/\t/) *************** *** 225,232 **** ! # Bio::GO::GeneAssociation#goid(org = nil) -> GO ID # ! # Bio::GO::GeneAssociation#goid -> "001234" ! # Bio::GO::GeneAssociation#goid(true) -> "GO:001234" def goid(org = nil) if org --- 295,303 ---- ! # Returns GO_ID in /\d{7}/ format. Giving not nil arg, returns ! # /GO:\d{7}/ style. # ! # * Bio::GO::GeneAssociation#goid -> "001234" ! # * Bio::GO::GeneAssociation#goid(true) -> "GO:001234" def goid(org = nil) if org *************** *** 249,253 **** ! # Container class for files in geneontology.org/go/external2go/*2go. # # The line syntax is: --- 320,324 ---- ! # = Container class for files in geneontology.org/go/external2go/*2go. # # The line syntax is: *************** *** 255,271 **** # database: > GO: ; GO: # # == SAMPLE ! # !date: 2005/02/08 18:02:54 ! # !Mapping of SWISS-PROT KEYWORDS to GO terms. ! # !Evelyn Camon, SWISS-PROT. ! # ! ! # SP_KW:ATP synthesis > GO:ATP biosynthesis ; GO:0006754 ! # ... # class External2go < Array attr_reader :header ! # Bio::GO::External2go.parser(str) ! # Constructor def self.parser(str) e2g = self.new --- 326,352 ---- # database: > GO: ; GO: # + # == Example + # + # spkw2go = Bio::GO::External2go.new(File.read("spkw2go")) + # spkw2go.size + # spkw2go.each do |relation| + # relation # -> {:db => "", :db_id => "", :go_term => "", :go_id => ""} + # end + # spkw2go.dbs + # # == SAMPLE ! # !date: 2005/02/08 18:02:54 ! # !Mapping of SWISS-PROT KEYWORDS to GO terms. ! # !Evelyn Camon, SWISS-PROT. ! # ! ! # SP_KW:ATP synthesis > GO:ATP biosynthesis ; GO:0006754 ! # ... # class External2go < Array + + # Returns aHash of the external2go header information attr_reader :header ! # Constructor from parsing external2go file. def self.parser(str) e2g = self.new *************** *** 285,289 **** end ! # Bio::GO::External2go.new def initialize @header = {:date => '', :desc => []} --- 366,372 ---- end ! ! # Constructor. ! # relation := {:db => aStr, :db_id => aStr, :go_term => aStr, :go_id => aStr} def initialize @header = {:date => '', :desc => []} *************** *** 291,294 **** --- 374,378 ---- end + # Bio::GO::External2go#set_date(value) def set_date(value) *************** *** 296,299 **** --- 380,384 ---- end + # Bio::GO::External2go#set_desc(ary) def set_desc(ary) *************** *** 301,305 **** --- 386,392 ---- end + # Bio::GO::External2go#to_str + # Returns the contents in the external2go format. def to_str ["!date: #{@header[:date]}", *************** *** 310,329 **** ! # Bio::GO::External2go#db def dbs self.map {|rel| rel[:db] }.uniq end ! # Bio::GO::External2go#db_ids def db_ids self.map {|rel| rel[:db_id] }.uniq end ! # Bio::GO::External2go#go_terms def go_terms self.map {|rel| rel[:go_term] }.uniq end ! # Bio::GO::External2go#go_ids def go_ids self.map {|rel| rel[:go_id] }.uniq --- 397,417 ---- ! # Returns ary of databases. def dbs self.map {|rel| rel[:db] }.uniq end ! ! # Returns ary of database IDs. def db_ids self.map {|rel| rel[:db_id] }.uniq end ! # Returns ary of GO Terms. def go_terms self.map {|rel| rel[:go_term] }.uniq end ! # Returns ary of GO IDs. def go_ids self.map {|rel| rel[:go_id] }.uniq *************** *** 408,579 **** end - - - - - =begin - - = Bio::GO - - * Classes for (()). - - - = Bio::GO::Ontology < Bio::Pathway - - * Container class for ontologies in the DAG Edit format. - - c_data = File.open('component.oontology').read - go_c = Bio::GO::Ontology.new(c_data) - p go_c.bfs_shortest_path('0003673','0005632') - - - --- Bio::GO::Ontology.new(data) - - The DAG Edit format ontology data is allowed. - - --- Bio::GO::Ontology#hader_lines - - Returns a Hash instance of the header lines in ontology flatfile. - - - --- Bio::GO::Ontology#goid2term(GO_ID) - - Returns a GO_Term correspondig with the given GO_ID. - - --- Bio::GO::Ontology.parse_goids(line) - - Returns an ary of GO IDs by parsing an entry line in the DAG Edit - format. - - - = Bio::GO::GeneAssociation - - * Data parser for the gene_association go annotation. - See also (()). - - - mgi_data = File.open('gene_association.mgi').read - mgi = Bio::GO::GeneAssociation.parser(mgi_data) - - or - - Bio::GO::GeneAssociation.parser(mgi_data) {|entry| - p [entry.entry_id, entry.evidence, entry.goid] - } - - - --- Bio::GO::GeneAssociation.parser(data) - - Retruns an Array of parsed gene_association flatfile. - Block is acceptable. - - --- Bio::GO::GeneAssociation.new(line) - - Parsing an entry (in a line) in the gene_association flatfile. - - --- Bio::GO::GeneAssociation.DELIMITER - - The entry delimiter is "\n". - alias as RS. - - --- Bio::GO::GeneAssociation#goid(arg = nil) - - Returns GO_ID in /\d{7}/ format. Giving not nil arg, returns - /GO:\d{7}/ style. - - --- Bio::GO::GeneAssociation#db - - DB variable. - - --- Bio::GO::GeneAssociation#db_object_id - - Db_Object_Id variable. Alias to entry_id. - - --- Bio::GO::GeneAssociation#db_object_symbol - - Db_Object_Symbol variable. - - --- Bio::GO::GeneAssociation#db_object_name - - Db_Object_Name variable. - - --- Bio::GO::GeneAssociation#db_object_type - - Db_Object_Type variable. - - --- Bio::GO::GeneAssociation#db_reference - - Db_Reference variable. - - --- Bio::GO::GeneAssociation#evidence - - Evidence code variable. - - --- Bio::GO::GeneAssociation#with - - The entry is associated with this value. - - --- Bio::GO::GeneAssociation#aspect - - Aspect valiable. - - --- Bio::GO::GeneAssociation#taxon - - Taxon variable. - - --- Bio::GO::GeneAssociation#date - - Date variable. - - - - = Bio::GO::External2go < Array - - Class for files in geneontology.org/go/external2go/ - - - spkw2go = Bio::GO::External2go.new(File.read("spkw2go")) - spkw2go.size - spkw2go.each do |relation| - relation # -> {:db => "", :db_id => "", :go_term => "", :go_id => ""} - end - spkw2go.dbs - - --- Bio::GO::External2go.parser(str) - - Constructor from parsing external2go file. - - --- Bio::GO::External2go.new - - Constructor. - relation := {:db => aStr, :db_id => aStr, :go_term => aStr, :go_id => aStr} - - --- Bio::GO::External2go#[index] -> relation - - Index accessing to a list of external2go relations. - - --- Bio::GO::External2go#header -> {:date => "", :desc => ""} - - Hash of the header information. - - --- Bio::GO::External2go#dbs -> ary - - List of databases. - - --- Bio::GO::External2go#db_ids -> ary - - List of database IDs. - - --- Bio::GO::External2go#go_terms -> ary - - List of GO Terms. - - --- Bio::GO::External2go#go_ids -> ary - - List of GO IDs. - - --- Bio::GO::External2go#to_str -> str - - Formats the content in the external2go format. - - =end --- 496,497 ---- From nakao at pub.open-bio.org Mon Oct 31 13:00:18 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:52:56 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/targetp report.rb,1.5,1.6 Message-ID: <200510311800.j9VI0IVL004171@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/targetp In directory pub.open-bio.org:/tmp/cvs-serv4161/lib/bio/appl/targetp Modified Files: report.rb Log Message: * Updated RDoc. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/targetp/report.rb,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** report.rb 26 Sep 2005 13:00:05 -0000 1.5 --- report.rb 31 Oct 2005 18:00:16 -0000 1.6 *************** *** 1,6 **** # ! # bio/appl/targetp/report.rb - TargetP report class # ! # Copyright (C) 2003 Mitsuteru C. Nakao # # This library is free software; you can redistribute it and/or --- 1,17 ---- # ! # = bio/appl/targetp/report.rb - TargetP report class # ! # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao ! # Licence:: LGPL ! # ! # $Id$ ! # ! # == Description ! # ! # TargetP class for http://www.cbs.dtu.dk/services/TargetP/ ! # ! # == Example ! # == References ! #-- # # This library is free software; you can redistribute it and/or *************** *** 18,32 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # module Bio class TargetP class Report DELIMITER = "\n \n" def initialize(str) @version = nil --- 29,80 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! #++ # module Bio + class TargetP + # = A parser and container class for TargetP report. class Report + # Delimiter DELIMITER = "\n \n" + # Delimiter + RS = DELIMITER + + # Returns the program version. + attr_reader :version + + # Returns the query sequences. + attr_reader :query_sequences + + # Returns 'included' or 'not included'. + # If the value is 'included', Bio::TargetP::Report#prediction['TPlen'] + # contains a valid value. + attr_reader :cleavage_site_prediction + + # Returns ``PLANT'' or ``NON-PLANT'' networks. + attr_reader :networks + + # Returns a Hash of the prediction results. + # + # {"Name"=>"MGI_2141503", "Loc."=>"_", "RC"=>3, "SP"=>0.271, + # "other"=>0.844, "mTP"=>0.161, "cTP"=>0.031, "Length"=>640} + # + # Keys: Name, Len, SP, mTP, other, Loc, RC + # Optional key for PLANT networks: cTP + # Optional key in Cleavage site: TPlen + # + # Use 'Length' and 'Loc.' instead of 'Len' and 'Loc' respectively + # for the version 1.0 report. + attr_reader :prediction + + # Returns a Hash of cutoff values. + attr_reader :cutoff + + # Sets output report. def initialize(str) @version = nil *************** *** 39,48 **** end - attr_reader :version, :query_sequences, - :cleavage_site_prediction, :networks, - :prediction, :cutoff - alias pred prediction def name @prediction['Name'] --- 87,93 ---- end alias pred prediction + # Returns the name of query sequence. def name @prediction['Name'] *************** *** 50,53 **** --- 95,99 ---- alias entry_id name + # Returns length of query sequence. def query_len if @prediction['Len'] *************** *** 59,62 **** --- 105,114 ---- alias length query_len + # Returns the predicted localization signal: + # 1. S (Signal peptide) + # 2. M (mTP) + # 3. C (cTP) + # 4. * + # 5. _ def loc if @prediction['Loc'] *************** *** 67,70 **** --- 119,123 ---- end + # Returns RC. def rc @prediction['RC'] *************** *** 73,76 **** --- 126,130 ---- private + # def parse_entry(str) labels = [] *************** *** 227,288 **** - =begin - - = Bio::TargetP - - TargetP class for (()) - - = Bio::TargetP::Report - - A parser and container class for TargetP report. - - --- Bio::TargetP::Report.new(str) - - --- Bio::TargetP::Report#version - - This class is tested by version 1.0 and 1.1 reports. - - --- Bio::TargetP::Report#query_sequences - --- Bio::TargetP::Report#cleavage_site_prediction - - Returns 'included' or 'not included'. - If the value is 'included', Bio::TargetP::Report#prediction['TPlen'] - contains a valid value. - - --- Bio::TargetP::Report#networks - - There are PLANT and NON-PLANT networks. - - --- Bio::TargetP::Report#entry_id - --- Bio::TargetP::Report#name - - Returns the qeury entry_id. - - --- Bio::TargetP::Report#query_len - - Returns query length. - - --- Bio::TargetP::Report#prediction - - Returns a Hash of the prediction results. - - Valid keys: Name, Len, SP, mTP, other, Loc, RC - Additional key in PLANT networks: cTP - Additional key in Cleavage site: TPlen - - Use 'Length' and 'Loc.' instead of 'Len' and 'Loc' respectively - for the version 1.0 report. - - --- Bio::TargetP::Report#cutoff - - Returns a Hash of cutoff values. - - --- Bio::TargetP::Report#loc - - Returns the predicted localization S, M, C, * or _. - - --- Bio::TargetP::Report#rc - - - =end --- 281,283 ---- From nakao at pub.open-bio.org Mon Oct 31 12:59:49 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:52:59 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/targetp test_report.rb, 1.1, 1.2 Message-ID: <200510311759.j9VHxnVL004149@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/targetp In directory pub.open-bio.org:/tmp/cvs-serv4139/test/unit/bio/appl/targetp Modified Files: test_report.rb Log Message: * Added test methods. Index: test_report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/appl/targetp/test_report.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** test_report.rb 31 Oct 2005 17:53:48 -0000 1.1 --- test_report.rb 31 Oct 2005 17:59:46 -0000 1.2 *************** *** 123,126 **** --- 123,127 ---- hash = {"Name"=>"MGI_2141503", "Loc."=>"_", "RC"=>3, "SP"=>0.271, "other"=>0.844, "mTP"=>0.161, "cTP"=>0.031, "Length"=>640} + assert_equal(@obj.pred, hash) assert_equal(@obj.prediction, hash) end *************** *** 142,145 **** --- 143,150 ---- def test_query_len assert_equal(@obj.query_len, 640) + end + + def test_length + assert_equal(@obj.length, 640) end From nakao at pub.open-bio.org Mon Oct 31 12:53:50 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:09 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/targetp test_report.rb, NONE, 1.1 Message-ID: <200510311753.j9VHroVL004097@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/targetp In directory pub.open-bio.org:/tmp/cvs-serv4087/test/unit/bio/appl/targetp Added Files: test_report.rb Log Message: * Initial import. --- NEW FILE: test_report.rb --- # # test/unit/bio/appl/targetp/test_report.rb - Unit test for Bio::TargetP::Report # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_report.rb,v 1.1 2005/10/31 17:53:48 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/appl/targetp/report' module Bio TargetPReport_plant =<"MGI_2141503", "Loc."=>"_", "RC"=>3, "SP"=>0.271, "other"=>0.844, "mTP"=>0.161, "cTP"=>0.031, "Length"=>640} assert_equal(@obj.prediction, hash) end def test_cutoff hash = {"SP"=>0.0, "other"=>0.0, "mTP"=>0.0, "cTP"=>0.0} assert_equal(@obj.cutoff, hash) end def test_entry_id assert_equal(@obj.entry_id, 'MGI_2141503') end def test_name assert_equal(@obj.name, 'MGI_2141503') end def test_query_len assert_equal(@obj.query_len, 640) end def test_loc assert_equal(@obj.loc, '_') end def test_rc assert_equal(@obj.rc, 3) end end # class TestTargetPReport end From nakao at pub.open-bio.org Mon Oct 31 12:01:53 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:12 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/sosui report.rb,1.7,1.8 Message-ID: <200510311701.j9VH1rVL002734@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/sosui In directory pub.open-bio.org:/tmp/cvs-serv2690/lib/bio/appl/sosui Modified Files: report.rb Log Message: * Added Bio::SOSUI::Report::TMH class to contain the information about TM lines. * Update RDoc. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/sosui/report.rb,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** report.rb 31 Oct 2005 16:31:29 -0000 1.7 --- report.rb 31 Oct 2005 17:01:50 -0000 1.8 *************** *** 1,6 **** # ! # bio/appl/sosui/report.rb - SOSUI report class # ! # Copyright (C) 2003 Mitsuteru C. Nakao # # This library is free software; you can redistribute it and/or --- 1,15 ---- # ! # = bio/appl/sosui/report.rb - SOSUI report class # ! # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao ! # Licence:: LGPL ! # ! # $Id$ ! # ! # == Example ! # ! # == References ! # * http://sosui.proteome.bio.tuat.ac.jp/sosui_submit.html ! #-- # # This library is free software; you can redistribute it and/or *************** *** 18,22 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # --- 27,31 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! #++ # *************** *** 26,62 **** class SOSUI class Report ! RS = DELIMITER = "\n>" ! def initialize(entry) ! entry = entry.split(/\n/) @entry_id = entry[0].strip.sub(/^>/,'') @prediction = entry[1].strip ! @tmh = [] ! @tmhs = 0 parse_tmh(entry) if /MEMBRANE/ =~ @prediction end - attr_reader :entry_id, :prediction, :tmhs, :tmh - - private def parse_tmh(entry) entry.each do |line| if /NUMBER OF TM HELIX = (\d+)/ =~ line ! @tmhs = $1 elsif /TM (\d+) +(\d+)- *(\d+) (\w+) +(\w+)/ =~ line ! tmp = {'TMH' => $1.to_i, ! 'range' => Range.new($2.to_i, $3.to_i), ! 'grade' => $4, ! 'seq' => $5 } ! @tmh.push(tmp) end end end end # class Report --- 35,110 ---- class SOSUI + # = SOSUI output report parsing class + # + # == References + # * http://sosui.proteome.bio.tuat.ac.jp/sosui_submit.html class Report ! # Delimiter ! DELIMITER = "\n>" ! RS = DELIMITER ! # Query entry_id ! attr_reader :entry_id ! ! # Returns the prediction result whether "MEMBRANE PROTEIN" or ! # "SOLUBLE PROTEIN". ! attr_reader :prediction ! ! # Transmembrane helixes ary ! attr_reader :tmhs ! ! # Parser for SOSUI output report. ! def initialize(output_report) ! entry = output_report.split(/\n/) @entry_id = entry[0].strip.sub(/^>/,'') @prediction = entry[1].strip ! @tms = 0 ! @tmhs = [] parse_tmh(entry) if /MEMBRANE/ =~ @prediction end private + # Parser for TMH lines. def parse_tmh(entry) entry.each do |line| if /NUMBER OF TM HELIX = (\d+)/ =~ line ! @tms = $1 elsif /TM (\d+) +(\d+)- *(\d+) (\w+) +(\w+)/ =~ line ! tmh = $1.to_i ! range = Range.new($2.to_i, $3.to_i) ! grade = $4 ! seq = $5 ! @tmhs.push(TMH.new(range, grade, seq)) end end end + + # = Bio::SOSUI::Report::TMH + # Container class for transmembrane helix information. + # + # TM 1 31- 53 SECONDARY HIRMTFLRKVYSILSLQVLLTTV + class TMH + + # Returns aRng of transmembrane helix + attr_reader :range + + # Retruns ``PRIMARY'' or ``SECONDARY'' of helix. + attr_reader :grade + + # Returns the sequence. of transmembrane helix. + attr_reader :sequence + + # Sets values. + def initialize(range, grade, sequence) + @range = range + @grade = grade + @sequence = sequence + end + end + end # class Report *************** *** 99,104 **** p [:entry_id, sosui.entry_id] p [:prediction, sosui.prediction] ! p [:tmhs, sosui.tmhs] ! pp [:tmh, sosui.tmh] end --- 147,152 ---- p [:entry_id, sosui.entry_id] p [:prediction, sosui.prediction] ! p [:tmhs.size, sosui.tmhs] ! pp [:tmhs, sosui.tmh] end *************** *** 117,145 **** - =begin - - = Bio::SOSUI - - SOSUI class for - (()) - - = Bio::SOSUI::Report - - A parser and contianer class - - --- Bio::SOSUI::Report.new(str) - --- Bio::SOSUI::Report#entry_id - --- Bio::SOSUI::Report#prediction - - Returns the prediction result whether "MEMBRANE PROTEIN" or - "SOLUBLE PROTEIN". - - --- Bio::SOSUI::Report#tmhs - - Returns the number of predicted TMHs. - - --- Bio::SOSUI::Report#tmh - - Returns an Array of TMHs in Hash. - - =end --- 165,166 ---- From nakao at pub.open-bio.org Mon Oct 31 12:52:33 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:14 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/targetp - New directory Message-ID: <200510311752.j9VHqXVL004064@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/targetp In directory pub.open-bio.org:/tmp/cvs-serv4054/test/unit/bio/appl/targetp Log Message: Directory /home/repository/bioruby/bioruby/test/unit/bio/appl/targetp added to the repository From nakao at pub.open-bio.org Mon Oct 31 12:02:47 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:15 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/sosui test_report.rb, 1.1, 1.2 Message-ID: <200510311702.j9VH2lVL002773@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/sosui In directory pub.open-bio.org:/tmp/cvs-serv2749/test/unit/bio/appl/sosui Modified Files: test_report.rb Log Message: * Added unit test for Bio::SOSUI::Report::TMH. Index: test_report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/unit/bio/appl/sosui/test_report.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** test_report.rb 31 Oct 2005 16:26:55 -0000 1.1 --- test_report.rb 31 Oct 2005 17:02:45 -0000 1.2 *************** *** 39,47 **** def test_delimiter ! assert_equal(Bio::SOSUI::DELIMITER, "\n>") end def test_rs ! assert_equal(Bio::SOSUI::RS, "\n>") end --- 39,47 ---- def test_delimiter ! assert_equal(Bio::SOSUI::Report::DELIMITER, "\n>") end def test_rs ! assert_equal(Bio::SOSUI::Report::RS, "\n>") end *************** *** 65,73 **** def test_tmhs assert_equal(@obj.tmhs.class, Array) ! assert_equal(@obj.tmhs[0].class, Bio::SOSUI::TMH) end def test_tmh ! assert_equal(@obj.tmh.size, 7) end --- 65,73 ---- def test_tmhs assert_equal(@obj.tmhs.class, Array) ! assert_equal(@obj.tmhs[0].class, Bio::SOSUI::Report::TMH) end def test_tmh ! assert_equal(@obj.tmhs.size, 7) end *************** *** 76,80 **** class TestSOSUITMH < Test::Unit::TestCase def setup ! @obj = Bio::SOSUI::Report.new(SOSUIReport).tmh.first end --- 76,80 ---- class TestSOSUITMH < Test::Unit::TestCase def setup ! @obj = Bio::SOSUI::Report.new(SOSUIReport).tmhs.first end *************** *** 83,88 **** end ! def test_status ! assert_equal(@obj.status, 'SECONDARY') end --- 83,88 ---- end ! def test_grade ! assert_equal(@obj.grade, 'SECONDARY') end From nakao at pub.open-bio.org Mon Oct 31 11:24:34 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:33 2005 Subject: [BioRuby-cvs] bioruby/test/data/TMHMM - New directory Message-ID: <200510311624.j9VGOYVL001702@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/TMHMM In directory pub.open-bio.org:/tmp/cvs-serv1692/test/data/TMHMM Log Message: Directory /home/repository/bioruby/bioruby/test/data/TMHMM added to the repository From nakao at pub.open-bio.org Mon Oct 31 11:24:48 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:33 2005 Subject: [BioRuby-cvs] bioruby/test/data/SOSUI - New directory Message-ID: <200510311624.j9VGOmVL001718@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/SOSUI In directory pub.open-bio.org:/tmp/cvs-serv1708/test/data/SOSUI Log Message: Directory /home/repository/bioruby/bioruby/test/data/SOSUI added to the repository From nakao at pub.open-bio.org Mon Oct 31 11:31:32 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:34 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/sosui report.rb,1.6,1.7 Message-ID: <200510311631.j9VGVWVL001928@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/sosui In directory pub.open-bio.org:/tmp/cvs-serv1918/lib/bio/appl/sosui Modified Files: report.rb Log Message: * Added Bio::SOSUI::Report::RS constant. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/sosui/report.rb,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** report.rb 26 Sep 2005 13:00:05 -0000 1.6 --- report.rb 31 Oct 2005 16:31:29 -0000 1.7 *************** *** 28,32 **** class Report ! DELIMITER = "\n>" def initialize(entry) --- 28,32 ---- class Report ! RS = DELIMITER = "\n>" def initialize(entry) From nakao at pub.open-bio.org Mon Oct 31 11:29:57 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:36 2005 Subject: [BioRuby-cvs] bioruby/test/data/SOSUI sample.report,NONE,1.1 Message-ID: <200510311629.j9VGTvVL001880@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/SOSUI In directory pub.open-bio.org:/tmp/cvs-serv1870/test/data/SOSUI Added Files: sample.report Log Message: * Initial import. --- NEW FILE: sample.report --- >Q9HC19 MEMBRANE PROTEIN NUMBER OF TM HELIX = 7 TM 1 31- 53 SECONDARY HIRMTFLRKVYSILSLQVLLTTV TM 2 69- 90 PRIMARY HESPALILLFALGSLGLIFALT TM 3 99- 121 PRIMARY NLYLLFGFTLLEALTVAVVVTFY TM 4 124- 146 PRIMARY YIILQAFILTTTVFFGLTVYTLQ TM 5 153- 175 PRIMARY KFGAGLFALLWILCLSGILEVFF TM 6 181- 203 PRIMARY ELVLAAAGALLFCGFIIYDTHSL TM 7 212- 234 SECONDARY YVLAAISLYLDIINLFLHLLRFL From nakao at pub.open-bio.org Mon Oct 31 11:30:22 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:38 2005 Subject: [BioRuby-cvs] bioruby/test/data/TMHMM sample.report,NONE,1.1 Message-ID: <200510311630.j9VGUMVL001901@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/data/TMHMM In directory pub.open-bio.org:/tmp/cvs-serv1891/test/data/TMHMM Added Files: sample.report Log Message: * Initial import. --- NEW FILE: sample.report --- # O42385 Length: 423 # O42385 Number of predicted TMHs: 7 # O42385 Exp number of AAs in TMHs: 157.40784 # O42385 Exp number, first 60 AAs: 13.85627 # O42385 Total prob of N-in: 0.00993 # O42385 POSSIBLE N-term signal sequence O42385 TMHMM2.0 outside 1 46 O42385 TMHMM2.0 TMhelix 47 69 O42385 TMHMM2.0 inside 70 81 O42385 TMHMM2.0 TMhelix 82 104 O42385 TMHMM2.0 outside 105 118 O42385 TMHMM2.0 TMhelix 119 141 O42385 TMHMM2.0 inside 142 161 O42385 TMHMM2.0 TMhelix 162 184 O42385 TMHMM2.0 outside 185 205 O42385 TMHMM2.0 TMhelix 206 228 O42385 TMHMM2.0 inside 229 348 O42385 TMHMM2.0 TMhelix 349 371 O42385 TMHMM2.0 outside 372 380 O42385 TMHMM2.0 TMhelix 381 403 O42385 TMHMM2.0 inside 404 423 From nakao at pub.open-bio.org Mon Oct 31 11:26:07 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:39 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/tmhmm - New directory Message-ID: <200510311626.j9VGQ7VL001758@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/tmhmm In directory pub.open-bio.org:/tmp/cvs-serv1748/test/unit/bio/appl/tmhmm Log Message: Directory /home/repository/bioruby/bioruby/test/unit/bio/appl/tmhmm added to the repository From nakao at pub.open-bio.org Mon Oct 31 11:26:20 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:42 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/sosui - New directory Message-ID: <200510311626.j9VGQKVL001774@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/sosui In directory pub.open-bio.org:/tmp/cvs-serv1764/test/unit/bio/appl/sosui Log Message: Directory /home/repository/bioruby/bioruby/test/unit/bio/appl/sosui added to the repository From nakao at pub.open-bio.org Mon Oct 31 11:26:58 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:43 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/sosui test_report.rb, NONE, 1.1 Message-ID: <200510311626.j9VGQwVL001801@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/sosui In directory pub.open-bio.org:/tmp/cvs-serv1791/test/unit/bio/appl/sosui Added Files: test_report.rb Log Message: * inital import. --- NEW FILE: test_report.rb --- # # test/unit/bio/appl/sosui/test_report.rb - Unit test for Bio::SOSUI::Report # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_report.rb,v 1.1 2005/10/31 16:26:55 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/appl/sosui/report' module Bio bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s test_data = Pathname.new(File.join(bioruby_root, 'test', 'data', 'SOSUI')).cleanpath.to_s SOSUIReport = File.open(File.join(test_data, 'sample.report')).read class TestSOSUIReportConst < Test::Unit::TestCase def test_delimiter assert_equal(Bio::SOSUI::DELIMITER, "\n>") end def test_rs assert_equal(Bio::SOSUI::RS, "\n>") end end class TestSOSUIReport < Test::Unit::TestCase def setup @obj = Bio::SOSUI::Report.new(SOSUIReport) end def test_entry_id assert_equal(@obj.entry_id, 'Q9HC19') end def test_prediction assert_equal(@obj.prediction, 'MEMBRANE PROTEIN') end def test_tmhs assert_equal(@obj.tmhs.class, Array) assert_equal(@obj.tmhs[0].class, Bio::SOSUI::TMH) end def test_tmh assert_equal(@obj.tmh.size, 7) end end # class TestSOSUIReport class TestSOSUITMH < Test::Unit::TestCase def setup @obj = Bio::SOSUI::Report.new(SOSUIReport).tmh.first end def test_range assert_equal(@obj.range, 31..53) end def test_status assert_equal(@obj.status, 'SECONDARY') end def test_sequence assert_equal(@obj.sequence, 'HIRMTFLRKVYSILSLQVLLTTV') end end # class TestSOSUITMH end From nakao at pub.open-bio.org Mon Oct 31 11:27:48 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:50 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/genscan test_report.rb, NONE, 1.1 Message-ID: <200510311627.j9VGRmVL001829@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/genscan In directory pub.open-bio.org:/tmp/cvs-serv1819/test/unit/bio/appl/genscan Added Files: test_report.rb Log Message: * Initial import. --- NEW FILE: test_report.rb --- # # test/unit/bio/appl/genscan/test_report.rb - Unit test for Bio::Genscan::Report # # Copyright (C) 2005 Mitsuteru Nakao # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: test_report.rb,v 1.1 2005/10/31 16:27:46 nakao Exp $ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'bio/appl/genscan/report' module Bio class TestGenscanReport < Test::Unit::TestCase def setup bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s test_data = Pathname.new(File.join(bioruby_root, 'test', 'data', 'genscan')).cleanpath.to_s report = File.open(File.join(test_data, 'sample.report')).read @obj = Bio::Genscan::Report.new(report) end def test_genscan_version assert_equal(@obj.genscan_version, '1.0') end def test_date_run assert_equal(@obj.date_run, '30-May-103') end def test_time assert_equal(@obj.time, "14:06:28") end def test_query_name assert_equal(@obj.query_name, 'HUMRASH') end def test_length assert_equal(@obj.length, 12942) end def test_gccontent assert_equal(@obj.gccontent, 68.17) end def test_isochore assert_equal(@obj.isochore, '4 (57 - 100 C+G%)') end def test_matrix assert_equal(@obj.matrix, 'HumanIso.smat') end def test_predictions_size assert_equal(@obj.predictions.size, 2) end end # TestGenscanReport class TestGenscanReportGene < Test::Unit::TestCase def setup bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s test_data = Pathname.new(File.join(bioruby_root, 'test', 'data', 'genscan')).cleanpath.to_s report = File.open(File.join(test_data, 'sample.report')).read @obj = Bio::Genscan::Report.new(report).predictions end def test_number assert_equal(@obj.first.number, 1) end def test_aaseq assert_equal(@obj.first.aaseq.class, Bio::FastaFormat) seq = "MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPGCMSCKCVLS" assert_equal(@obj.first.aaseq.seq, seq) definition = "HUMRASH|GENSCAN_predicted_peptide_1|189_aa" assert_equal(@obj.first.aaseq.definition, definition) end def test_naseq assert_equal(@obj.first.naseq.class, Bio::FastaFormat) seq = "atgacggaatataagctggtggtggtgggcgccggcggtgtgggcaagagtgcgctgaccatccagctgatccagaaccattttgtggacgaatacgaccccactatagaggattcctaccggaagcaggtggtcattgatggggagacgtgcctgttggacatcctggataccgccggccaggaggagtacagcgccatgcgggaccagtacatgcgcaccggggagggcttcctgtgtgtgtttgccatcaacaacaccaagtcttttgaggacatccaccagtacagggagcagatcaaacgggtgaaggactcggatgacgtgcccatggtgctggtggggaacaagtgtgacctggctgcacgcactgtggaatctcggcaggctcaggacctcgcccgaagctacggcatcccctacatcgagacctcggccaagacccggcagggagtggaggatgccttctacacgttggtgcgtgagatccggcagcacaagctgcggaagctgaaccctcctgatgagagtggccccggctgcatgagctgcaagtgtgtgctctcctga" assert_equal(@obj.first.naseq.seq, seq) definition = "HUMRASH|GENSCAN_predicted_CDS_1|570_bp" assert_equal(@obj.first.naseq.definition, definition) end def test_promoter assert_equal(@obj.last.promoter.class, Bio::Genscan::Report::Exon) assert_equal(@obj.last.promoter.exon_type, "Prom") end def test_polyA assert_equal(@obj.first.polyA.class, Bio::Genscan::Report::Exon) assert_equal(@obj.first.polyA.exon_type, 'PlyA') end end # TestGenscanReportGene class TestGenscanReportExon < Test::Unit::TestCase def setup bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s test_data = Pathname.new(File.join(bioruby_root, 'test', 'data', 'genscan')).cleanpath.to_s report = File.open(File.join(test_data, 'sample.report')).read @obj = Bio::Genscan::Report.new(report).predictions.first.exons.first end def test_number assert_equal(@obj.number, 1) end def test_exon_type assert_equal(@obj.exon_type, 'Init') end def test_exon_type_long assert_equal(@obj.exon_type_long, 'Initial exon') end def test_strand assert_equal(@obj.strand, '+') end def test_first assert_equal(@obj.first, 1664) end def test_last assert_equal(@obj.last, 1774) end def test_range assert_equal(@obj.range, 1664..1774) end def test_phase assert_equal(@obj.phase, '0') end def test_acceptor_score assert_equal(@obj.acceptor_score, 94) end def test_donor_score assert_equal(@obj.donor_score, 83) end def test_initiation_score assert_equal(@obj.initiation_score, 94) end def test_termination_score assert_equal(@obj.termination_score, 83) end def test_score assert_equal(@obj.score, 212) end def test_p_value assert_equal(@obj.p_value, 0.997) end def test_t_score assert_equal(@obj.t_score, 21.33) end end # TestGenscanReportExon end From ngoto at pub.open-bio.org Mon Oct 31 09:37:32 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:53:51 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/bl2seq report.rb,1.3,1.4 Message-ID: <200510311437.j9VEbWVL000313@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/bl2seq In directory pub.open-bio.org:/tmp/cvs-serv32761 Modified Files: report.rb Log Message: Changes to follow modifications of Bio::Blast::Default::Report classes. Added RDoc documents. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/bl2seq/report.rb,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** report.rb 8 Sep 2005 01:22:08 -0000 1.3 --- report.rb 31 Oct 2005 14:37:30 -0000 1.4 *************** *** 1,7 **** # ! # bio/appl/bl2seq/report.rb - bl2seq (BLAST 2 sequences) parser # ! # Copyright (C) 2005 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/bl2seq/report.rb - bl2seq (BLAST 2 sequences) parser # ! # Copyright:: Copyright (C) 2005 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,36 **** # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id$ # ! # Acknowledgements: ! # Thanks to Tomoaki NISHIYAMA ! # for providing bl2seq parser patches based on ! # lib/bio/appl/blast/format0.rb. # require 'bio/appl/blast/format0' ! module Bio ! class Blast ! class Bl2seq class Report < Bio::Blast::Default::Report DELIMITER = RS = nil --- 19,50 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # ! # Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser. ! # ! # = Acknowledgements ! # ! # Thanks to Tomoaki NISHIYAMA ! # for providing bl2seq parser patches based on ! # lib/bio/appl/blast/format0.rb. # require 'bio/appl/blast/format0' ! module Bio #:nodoc: ! class Blast #:nodoc: ! class Bl2seq #:nodoc: ! ! # Bio::Bl2seq::Report is a NCBI bl2seq (BLAST 2 sequences) output parser. ! # It inherits Bio::Blast::Default::Report. ! # Most of its methods are the same as Bio::Blast::Default::Report, ! # but it lacks many methods. class Report < Bio::Blast::Default::Report + + # Delimiter of each entry. Bio::FlatFile uses it. + # In Bio::Bl2seq::Report, it it nil (1 entry 1 file). DELIMITER = RS = nil *************** *** 39,46 **** --- 53,63 ---- message, converged?, reference, db + # Splits headers. def format0_split_headers(data) @f0query = data.shift end + private :format0_split_headers + # Splits the search results. def format0_split_search(data) iterations = [] *************** *** 53,58 **** iterations end ! class F0dbstat < Bio::Blast::Default::Report::F0dbstat def db_num unless defined?(@db_num) --- 70,79 ---- iterations end + private :format0_split_search ! # Stores format0 database statistics. ! # Internal use only. Users must not use the class. ! class F0dbstat < Bio::Blast::Default::Report::F0dbstat #:nodoc: ! # Returns number of sequences in database. def db_num unless defined?(@db_num) *************** *** 63,66 **** --- 84,88 ---- end + # Returns number of letters in database. def db_len unless defined?(@db_len) *************** *** 72,79 **** end #class F0dbstat class Iteration < Bio::Blast::Default::Report::Iteration def initialize(data) @f0stat = [] ! @f0dbstat = nil @hits = [] @num = 1 --- 94,111 ---- end #class F0dbstat + # Bio::Bl2seq::Report::Iteration stores information about + # a iteration. + # Normally, it may contain some Bio::Bl2seq::Report::Hit objects. + # + # Note that its main existance reason is to keep complatibility + # between Bio::Blast::Default::Report::* classes. class Iteration < Bio::Blast::Default::Report::Iteration + # Creates a new Iteration object. + # It is designed to be called only internally from + # the Bio::Blast::Default::Report class. + # Users shall not use the method directly. def initialize(data) @f0stat = [] ! @f0dbstat = Bio::Blast::Default::Report::AlwaysNil.instance @hits = [] @num = 1 *************** *** 83,88 **** end def hits; @hits; end ! undef message, pattern_in_database, f0message, f0hitlist, pattern, pattern_positions, hits_found_again, hits_newly_found, hits_for_pattern, parse_hitlist, --- 115,123 ---- end + # Returns the hits of the iteration. + # It returns an array of Bio::Bl2seq::Report::Hit objects. def hits; @hits; end ! ! undef message, pattern_in_database, pattern, pattern_positions, hits_found_again, hits_newly_found, hits_for_pattern, parse_hitlist, *************** *** 90,97 **** end #class Iteration class Hit < Bio::Blast::Default::Report::Hit end #class Hit ! # NOTE: HSP class below is NOT used!! class HSP < Bio::Blast::Default::Report::HSP end #class HSP --- 125,141 ---- end #class Iteration + # Bio::Bl2seq::Report::Hit contains information about a hit. + # It may contain some Bio::Blast::Default::Report::HSP objects. + # All methods are the same as Bio::Blast::Default::Report::Hit class. + # Please refer to Bio::Blast::Default::Report::Hit. class Hit < Bio::Blast::Default::Report::Hit end #class Hit ! # Bio::Bl2seq::Report::HSP holds information about the hsp ! # (high-scoring segment pair). ! # NOTE that the HSP class below is NOT used because ! # Ruby's constants namespace are normally statically determined ! # and HSP object is created in Bio::Blast::Default::Report::Hit class. ! # Please refer to Bio::Blast::Default::Report::HSP. class HSP < Bio::Blast::Default::Report::HSP end #class HSP From ngoto at pub.open-bio.org Mon Oct 31 08:56:11 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:53:52 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/clustalw report.rb,1.6,1.7 Message-ID: <200510311356.j9VDuBVL032569@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/clustalw In directory pub.open-bio.org:/tmp/cvs-serv32559 Modified Files: report.rb Log Message: Changed document format from RD to RDoc. Added references to the document. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/clustalw/report.rb,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** report.rb 26 Sep 2005 13:00:05 -0000 1.6 --- report.rb 31 Oct 2005 13:56:09 -0000 1.7 *************** *** 1,7 **** # ! # bio/appl/clustalw/report.rb - CLUSTAL W format data (*.aln) class # ! # Copyright (C) 2003 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/clustalw/report.rb - CLUSTAL W format data (*.aln) class # ! # Copyright:: Copyright (C) 2003 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,23 **** --- 19,39 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # Bio::ClustalW::Report is a CLUSTAL W report (*.aln file) parser. + # CLUSTAL W is a very popular software for multiple sequence alignment. + # + # == References + # + # * Thompson,J.D., Higgins,D.G. and Gibson,T.J.. + # CLUSTAL W: improving the sensitivity of progressive multiple sequence + # alignment through sequence weighting, position-specific gap penalties + # and weight matrix choice. Nucleic Acids Research, 22:4673-4680, 1994. + # http://nar.oxfordjournals.org/cgi/content/abstract/22/22/4673 + # * http://www.ebi.ac.uk/clustalw/ + # * ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/ + # require 'bio/sequence' *************** *** 26,34 **** require 'bio/appl/clustalw' ! module Bio ! class ClustalW class Report < Bio::DB DELIMITER = nil def initialize(str, seqclass = nil) @raw = str --- 42,60 ---- require 'bio/appl/clustalw' ! module Bio #:nodoc: ! class ClustalW #:nodoc: ! ! # CLUSTAL W result data (*.aln file) parser class. class Report < Bio::DB + + # Delimiter of each entry. Bio::FlatFile uses it. + # In Bio::ClustalW::Report, it it nil (1 entry 1 file). DELIMITER = nil + # Creates new instance. + # +str+ should be a CLUSTAL format string. + # +seqclass+ should on of following: + # * Class: Bio::Sequence::AA, Bio::Sequence::NA, ... + # * String: 'PROTEIN', 'DNA', ... def initialize(str, seqclass = nil) @raw = str *************** *** 49,63 **** --- 75,101 ---- end end + # string of whole result attr_reader :raw + + # sequence class (one of Bio::Sequence, Bio::Sequence::NA, + # Bio::Sequence::AA, ...) attr_reader :seqclass + # Shows first line of the result data, for example, + # 'CLUSTAL W (1.82) multiple sequence alignment'. + # Returns a string. def header @header or (do_parse or @header) end + # Shows "match line" of CLUSTAL's alignment result, for example, + # ':* :* .* * .*::*. ** :* . * . '. + # Returns a string. def match_line @match_line or (do_parse or @match_line) end + # Gets an multiple alignment. + # Returns a Bio::Alignment object. def align do_parse() unless @align *************** *** 66,73 **** --- 104,115 ---- alias alignment align + # Gets an fasta-format string of the sequences. + # Returns a string. def to_fasta(*arg) align.to_fasta(*arg) end + # Gets an array of the sequences. + # Returns an array of Bio::FastaFormat objects. def to_a align.to_fastaformat_array *************** *** 75,78 **** --- 117,121 ---- private + # Parses Clustal W result text. def do_parse return nil if @align *************** *** 110,159 **** end #module Bio - =begin - - = Bio::ClustalW::Report - - CLUSTAL W result data (*.aln file) parser class. - - --- Bio::ClustalW::Report.new(raw, seqclass = nil) - - Creates new instance. - 'raw' should be a string of CLUSTAL format data. - 'seqclass' should on of following: - Class: Bio::Sequence::AA, Bio::Sequence::NA, ... - String: 'PROTEIN', 'DNA', ... - - --- Bio::ClustalW::Report#raw - --- Bio::ClustalW::Report#seqclass - - Acess methods of variables given in Bio::ClustalW::Report.new method. - - --- Bio::ClustalW::Report#alginment - --- Bio::ClustalW::Report#algin - - Gets an multiple alignment. - Returns an instance of Bio::Alignment class. - - --- Bio::ClustalW::Report#to_a - - Gets an array of the sequences. - Returns an array of Bio::FastaFormat instances. - - --- Bio::ClustalW::Report#to_fasta - - Gets an fasta-format string of the sequences. - Returns a string. - - --- Bio::ClustalW::Report#header - - Shows first line of the result data, for example, - 'CLUSTAL W (1.82) multiple sequence alignment'. - Returns a string. - - --- Bio::ClustalW::Report#match_line - - Shows "match line" of CLUSTAL's alignment result, for example, - ':* :* .* * .*::*. ** :* . * . '. - Returns a string. - - =end --- 153,154 ---- From nakao at pub.open-bio.org Mon Oct 31 09:27:37 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:53:54 2005 Subject: [BioRuby-cvs] bioruby/test/unit/bio/appl/genscan - New directory Message-ID: <200510311427.j9VERbVL032704@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/test/unit/bio/appl/genscan In directory pub.open-bio.org:/tmp/cvs-serv32694/test/unit/bio/appl/genscan Log Message: Directory /home/repository/bioruby/bioruby/test/unit/bio/appl/genscan added to the repository From ngoto at pub.open-bio.org Mon Oct 31 08:17:20 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:53:58 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl mafft.rb,1.6,1.7 Message-ID: <200510311317.j9VDHKVL032457@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory pub.open-bio.org:/tmp/cvs-serv32447 Modified Files: mafft.rb Log Message: changed document format form RD to RDoc. added references to the document. added MAFFT version 5 information. Index: mafft.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/mafft.rb,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** mafft.rb 9 Sep 2005 15:56:20 -0000 1.6 --- mafft.rb 31 Oct 2005 13:17:18 -0000 1.7 *************** *** 1,7 **** # ! # bio/appl/mafft.rb - MAFFT wrapper class # ! # Copyright (C) 2003 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/mafft.rb - MAFFT wrapper class # ! # Copyright:: Copyright (C) 2003 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,35 **** # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id$ # require 'bio/db/fasta' require 'bio/io/flatfile' # We use Open3.popen3, because MAFFT on win32 requires Cygwin. require 'open3' ! module Bio class MAFFT autoload :Report, 'bio/appl/mafft/report' def self.fftns(n = nil) opt = [] --- 19,65 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # Bio::MAFFT is a wrapper class to execute MAFFT. + # MAFFT is a very fast multiple sequence alignment software. + # + # = Important Notes + # + # Though Bio::MAFFT class currently supports only MAFFT version 3, + # you can use MAFFT version 5 because the class is a wrapper class. + # + # == References + # + # * K. Katoh, K. Misawa, K. Kuma and T. Miyata. + # MAFFT: a novel method for rapid multiple sequence alignment based + # on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002. + # http://nar.oupjournals.org/cgi/content/abstract/30/14/3059 + # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/ + # require 'bio/db/fasta' require 'bio/io/flatfile' + #-- # We use Open3.popen3, because MAFFT on win32 requires Cygwin. + #++ require 'open3' ! module Bio #:nodoc: ! ! # Bio::MAFFT is a wrapper class to execute MAFFT. ! # MAFFT is a very fast multiple sequence alignment software. ! # ! # Though Bio::MAFFT class currently supports only MAFFT version 3, ! # you can use MAFFT version 5 because the class is a wrapper class. class MAFFT autoload :Report, 'bio/appl/mafft/report' + # Creates a new alignment factory. + # When +n+ is a number (1,2,3, ...), performs 'fftns n'. + # When +n+ is :i or 'i', performs 'fftnsi'. def self.fftns(n = nil) opt = [] *************** *** 42,49 **** --- 72,85 ---- end + # Creates a new alignment factory. + # Performs 'fftnsi'. def self.fftnsi self.new2(nil, 'fftnsi') end + # Creates a new alignment factory. + # When +n+ is a number (1,2,3, ...), performs 'nwns n'. + # When +n+ is :i or 'i', performs 'nwnsi'. + # In both case, if all_positive is true, add option '--all-positive'. def self.nwns(n = nil, ap = nil) opt = [] *************** *** 57,70 **** end ! def self.nwnsi(ap = nil) opt = [] ! opt << '--all-positive' if ap self.new2(nil, 'nwnsi', *opt) end def self.nwap(n = nil) self.nwns(n, true) end def self.new2(dir, prog, *opt) if dir then --- 93,116 ---- end ! # Creates a new alignment factory. ! # Performs 'nwnsi'. ! # If +all_positive+ is true, add option '--all-positive'. ! def self.nwnsi(all_positive = nil) opt = [] ! opt << '--all-positive' if all_positive self.new2(nil, 'nwnsi', *opt) end + # Creates a new alignment factory. + # Performs 'nwns --all-positive n' or 'nwnsi --all-positive'. + # Same as Bio::MAFFT.nwap(n, true). def self.nwap(n = nil) self.nwns(n, true) end + # Creates a new alignment factory. + # +dir+ is the path of the MAFFT program. + # +prog+ is the name of the program. + # +opt+ is options of the program. def self.new2(dir, prog, *opt) if dir then *************** *** 74,77 **** --- 120,126 ---- end + # Creates a new alignment factory. + # +program+ is the name of the program. + # +opt+ is options of the program. def initialize(program, option) @program = program *************** *** 82,89 **** @log = nil end ! attr_accessor :program, :option ! attr_reader :command, :log ! attr_reader :output, :report ! def query(seqs) if seqs then --- 131,162 ---- @log = nil end ! ! # program name ! attr_accessor :program ! ! # options ! attr_accessor :option ! ! # Shows last command-line string. Returns nil or an array of String. ! # Note that filenames described in the command-line may already ! # be removed because they are temporary files. ! attr_reader :command ! ! # last message to STDERR when executing the program. ! attr_reader :log ! ! # Shows latest raw alignment result. ! # Since a result of MAFFT is simply a multiple-fasta format, ! # it returns an array of Bio::FastaFormat instances ! # instead of raw string. ! attr_reader :output ! ! # Shows last alignment result (instance of Bio::MAFFT::Report class) ! # performed by the factory. ! attr_reader :report ! ! # Executes the program. ! # If +seqs+ is not nil, perform alignment for seqs. ! # If +seqs+ is nil, simply executes the program. def query(seqs) if seqs then *************** *** 94,99 **** end def query_align(seqs, *arg) - # seqs should be Bio::Alignment or Array of sequences or nil unless seqs.is_a?(Bio::Alignment) seqs = Bio::Alignment.new(seqs, *arg) --- 167,173 ---- end + # Performs alignment for seqs. + # +seqs+ should be Bio::Alignment or Array of sequences or nil. def query_align(seqs, *arg) unless seqs.is_a?(Bio::Alignment) seqs = Bio::Alignment.new(seqs, *arg) *************** *** 102,105 **** --- 176,181 ---- end + # Performs alignment for +str+. + # Str should be a string that can be recognized by the program. def query_string(str, *arg) begin *************** *** 114,117 **** --- 190,194 ---- end + # Performs alignment of sequences in the file named +fn+. def query_by_filename(fn, seqtype = nil) opt = @option + [ fn ] *************** *** 122,125 **** --- 199,203 ---- private + # Executes a program in the local machine. def exec_local(opt) @command = [ @program, *opt ] *************** *** 143,234 **** end #module Bio - - =begin - - = Bio::MAFFT - - Bio::MAFFT is a wrapper class of MAFFT, multiple sequence alignment software. - (()) - - --- Bio::MAFFT.fftns(n = nil) - - Create new alignment factory. - When n is a number (1,2,3, ...), performs 'fftns n'. - When n is :i or 'i', performs 'fftnsi'. - - --- Bio::MAFFT.fftnsi - - Create new alignment factory. - Performs 'fftnsi'. - - --- Bio::MAFFT.nwns(n = nil, all_positive = nil) - - Create new alignment factory. - When n is a number (1,2,3, ...), performs 'nwns n'. - When n is :i or 'i', performs 'nwnsi'. - In both case, if all_positive is true, add option '--all-positive'. - - --- Bio::MAFFT.nwnsi(all_positive = true) - - Create new alignment factory. - Performs 'nwnsi'. - If all_positive is true, add option '--all-positive'. - - --- Bio::MAFFT.nwap(n = nil) - - Create new alignment factory. - Performs 'nwns --all-positive n' or 'nwnsi --all-positive'. - Same as Bio::MAFFT.nwap(n, true). - - --- Bio::MAFFT.new(program, option) - - Creates new alignment factory. - - --- Bio::MAFFT#program - --- Bio::MAFFT#option - - Access to the variables specified in initialize. - - --- Bio::MAFFT#query(seqs) - - Executes the program(clustalw). - If 'seqs' is not nil, perform alignment for seqs. - If 'seqs' is nil, simply executes the program. - - --- Bio::MAFFT#query_align(seqs) - - Performs alignment for seqs. - - --- Bio::MAFFT#query_string(str) - - Performs alignment for str. - Str should be a string that can be recognized by the program. - - --- Bio::MAFFT#query_by_filename(filename) - - Performs alignment of sequences in the file named filename. - - --- Bio::MAFFT#command - - Shows latest command-line executed by this factory. - Note that filenames described in the command-line may already - be removed because they are temporary files. - Returns an array of string. - - --- Bio::MAFFT#log - - Shows latest messages of execution. - - --- Bio::MAFFT#report - - Shows latest alignment result (instance of Bio::MAFFT::Report class) - performed by this factory. - - --- Bio::MAFFT#output - - Shows latest raw alignment result. - Since a result of MAFFT is simply a multiple-fasta format, - it returns an array of Bio::FastaFormat instances - instead of raw string. - - =end --- 221,222 ---- From nakao at pub.open-bio.org Mon Oct 31 08:13:37 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:54:08 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/psort report.rb,1.9,1.10 Message-ID: <200510311313.j9VDDbVL032405@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/psort In directory pub.open-bio.org:/tmp/cvs-serv32395/lib/bio/appl/psort Modified Files: report.rb Log Message: * Updated RDoc. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/psort/report.rb,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -d -r1.9 -r1.10 *** report.rb 9 Sep 2005 15:48:17 -0000 1.9 --- report.rb 31 Oct 2005 13:13:35 -0000 1.10 *************** *** 1,6 **** # ! # bio/appl/psort/report.rb - PSORT systems report classes # ! # Copyright (C) 2003 Mitsuteru C. Nakao # # This library is free software; you can redistribute it and/or --- 1,13 ---- # ! # = bio/appl/psort/report.rb - PSORT systems report classes # ! # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao ! # License:: LGPL ! # ! # $Id$ ! # ! # == A Report classes for PSORT Systems ! # ! #-- # # This library is free software; you can redistribute it and/or *************** *** 18,22 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # --- 25,29 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # ++ # *************** *** 31,44 **** class PSORT1 class Report ! def self.parser(str) ! self.default_parser(str) end ! ! def self.default_parser(str) rpt = self.new ! rpt.raw = str query_info = str.scan(/^Query Information\n\n(.+?)\n\n/m)[0][0].split(/\n/) result_info = str.scan(/^Result Information\n\n(.+?)\n\n\*/m)[0][0] --- 38,56 ---- class PSORT1 + # = Bio::PSORT::PSORT1::Report + # Parser class for PSORT1 output report. + # + # == Example class Report ! # Returns aBio::PSORT::PSORT1::Report. ! def self.parser(output_report) ! self.default_parser(output_report) end ! # Returns aBio::PSORT::PSORT1::Report. ! def self.default_parser(output_report) rpt = self.new ! rpt.raw = output_report query_info = str.scan(/^Query Information\n\n(.+?)\n\n/m)[0][0].split(/\n/) result_info = str.scan(/^Result Information\n\n(.+?)\n\n\*/m)[0][0] *************** *** 62,66 **** end ! # new def initialize(entry_id = '', origin = '', title = '', sequence = '', result_info = '', reasoning = {}, final_result = []) --- 74,89 ---- end ! attr_accessor :entry_id ! attr_accessor :origin ! attr_accessor :title ! attr_accessor :sequence ! attr_accessor :result_info ! attr_accessor :reasoning ! attr_accessor :final_result ! attr_accessor :raw ! ! ! ! # Constructs aBio::PSORT::PSORT1::Report object. def initialize(entry_id = '', origin = '', title = '', sequence = '', result_info = '', reasoning = {}, final_result = []) *************** *** 74,79 **** @raw = '' end - attr_accessor :entry_id, :origin, :title, :sequence, - :result_info, :reasoning,:final_result, :raw --- 97,100 ---- *************** *** 83,89 **** ! # Bio::PSORT::PSORT2 class PSORT2 SclNames = { 'csk' => 'cytoskeletal', --- 104,111 ---- ! class PSORT2 + # Subcellular localization name codes used by PSORT2 SclNames = { 'csk' => 'cytoskeletal', *************** *** 101,104 **** --- 123,127 ---- } + # Feature name codes Features = [ 'psg', # PSG: PSG score *************** *** 139,142 **** --- 162,166 ---- ] + # Feature name codes (long version). FeaturesLong = { 'psg' => 'PSG', *************** *** 176,185 **** } ! # Bio::PSORT::PSORT2::Report class Report BOUNDARY = '-' * 75 RS = DELIMITER = "\)\n\n#{BOUNDARY}" def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, seq = nil, k = nil, features = {}, prob = {}, pred = nil) --- 200,244 ---- } ! # = Bio::PSORT::PSORT2::Report ! # Report parser classe for PSORT II(PSORT2). ! # == Example class Report + # Report boundary string. BOUNDARY = '-' * 75 + + + # Report delimiter. RS = DELIMITER = "\)\n\n#{BOUNDARY}" + # entry_id of query sequence. + attr_accessor :entry_id + + # Given subcellular localization (three letters code). + attr_accessor :scl + + # Definition of query sequence. + attr_accessor :definition + + # Sequence of query sequence. + attr_accessor :seq + + # k parameter of k-nearest neighbors classifier. + attr_accessor :k + + # Feature vector used the kNN prediction. + attr_accessor :features + + # Probability vector of kNN prediction. + attr_accessor :prob + + # Predicted subcellular localization (three letters code). + attr_accessor :pred + + # Raw text of output report. + attr_accessor :raw + + + # Constructs aBio::PSORT::PSORT2::Report object. def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, seq = nil, k = nil, features = {}, prob = {}, pred = nil) *************** *** 194,202 **** @raw = raw end - attr_accessor :entry_id, :scl, :definition, :seq, - :k, :features, :prob, :pred, :raw ! # report format to be auto detection def self.parser(str, entry_id) case str --- 253,259 ---- @raw = raw end ! # Parses output report with output format detection automatically. def self.parser(str, entry_id) case str *************** *** 214,219 **** end ! ! # $id: too short length ($leng), skipped\n"; def self.too_short_parser(ent, entry_id = nil) report = self.new(ent) --- 271,277 ---- end ! # Parser for ``too short length'' report. ! # ! # $id: too short length ($leng), skipped\n"; def self.too_short_parser(ent, entry_id = nil) report = self.new(ent) *************** *** 227,232 **** ! # default report ! # ``psort test.faa'' output def self.default_parser(ent, entry_id = nil) report = self.new(ent, entry_id) --- 285,290 ---- ! # Parser for the default report format. ! # ``psort report'' output. def self.default_parser(ent, entry_id = nil) report = self.new(ent, entry_id) *************** *** 247,251 **** end ! def set_header_line(str) str.sub!(/^-+\n/,'') --- 305,309 ---- end ! # Returns header information. def set_header_line(str) str.sub!(/^-+\n/,'') *************** *** 264,268 **** end ! def self.set_kNN_prob(str) prob = Hash.new --- 322,326 ---- end ! # Returns @prob value. def self.set_kNN_prob(str) prob = Hash.new *************** *** 278,282 **** end ! def set_prediction(str) case str --- 336,340 ---- end ! # Returns @prob and @k values. def set_prediction(str) case str *************** *** 292,297 **** ! ! # ``psort -v report'' and WWW server output def self.v_parser(ent, entry_id = nil) report = Bio::PSORT::PSORT2::Report.new(ent, entry_id) --- 350,355 ---- ! # Parser for the verbose output report format. ! # ``psort -v report'' and WWW server output. def self.v_parser(ent, entry_id = nil) report = Bio::PSORT::PSORT2::Report.new(ent, entry_id) *************** *** 304,308 **** ent[i] = nil end ! if /^none/ =~ e # for psort output bug j = self.__send__(:search_j, i, ent) ent[i - j] += e --- 362,366 ---- ent[i] = nil end ! if /^none/ =~ e # psort output bug j = self.__send__(:search_j, i, ent) ent[i - j] += e *************** *** 311,315 **** } ent.compact! - # ent.each_with_index {|e,i| p [i.to_s.ljust(2), e] } if /^ PSORT II server/ =~ ent[0] # for WWW version --- 369,372 ---- *************** *** 334,337 **** --- 391,395 ---- + # def self.search_j(i, ent) j = 1 *************** *** 347,359 **** ! # divide entry body ! def self.divent(ent) ! boundary = ent.index(BOUNDARY) return ent[0..(boundary - 1)], ent[(boundary + 2)..ent.length] end ! ! def set_features(fary) ! fary.each {|fent| key = fent.split(/\:( |\n)/)[0].strip self.features[key] = fent # unless /^\>/ =~ key --- 405,417 ---- ! # Divides entry body ! def self.divent(entry) ! boundary = entry.index(BOUNDARY) return ent[0..(boundary - 1)], ent[(boundary + 2)..ent.length] end ! # Sets @features values. ! def set_features(features_ary) ! features_ary.each {|fent| key = fent.split(/\:( |\n)/)[0].strip self.features[key] = fent # unless /^\>/ =~ key *************** *** 414,479 **** end - - - - - - =begin - - = Bio::PSORT::PSORT1 - - = Bio::PSORT::PSORT1::Report - - --- Bio::PSORT::PSORT1::Report.parser - --- Bio::PSORT::PSORT1::Report#entry_id - --- Bio::PSORT::PSORT1::Report#origin - --- Bio::PSORT::PSORT1::Report#title - --- Bio::PSORT::PSORT1::Report#sequence - --- Bio::PSORT::PSORT1::Report#result_info - --- Bio::PSORT::PSORT1::Report#reasoning - --- Bio::PSORT::PSORT1::Report#final_result - --- Bio::PSORT::PSORT1::Report#raw - - - - - - = Bio::PSORT::PSORT2 - - --- Bio::PSORT::SclNames - --- Bio::PSORT::Features - --- Bio::PSORT::FeaturesLong - - = Bio::PSORT::PSORT2::Report - - Parsed results of the PSORT2 report for default, ``-v'' and WWW version - output format. - - --- Bio::PSORT::PSORT2::Report.new - --- Bio::PSORT::PSORT2::Report#entry_id - - - --- Bio::PSORT::PSORT2::Report#scl - --- Bio::PSORT::PSORT2::Report#definition - --- Bio::PSORT::PSORT2::Report#seq - --- Bio::PSORT::PSORT2::Report#features - --- Bio::PSORT::PSORT2::Report#prob - --- Bio::PSORT::PSORT2::Report#pred - --- Bio::PSORT::PSORT2::Report#k - --- Bio::PSORT::PSORT2::Report#raw - - - --- Bio::PSORT::PSORT2::Report.parser(report) - - Returns a PSORT report object (Bio::PSORT::PSORT2::Report). - Formats are auto detedted. - - --- Bio::PSORT::PSORT2::Report::BOUNDARY - - Fields boundary in a PSORT report. - - --- Bio::PSORT::PSORT2::Report::DELIMITER - - Entry boundary in PSORT reports. - - =end --- 472,473 ---- From ngoto at pub.open-bio.org Mon Oct 31 07:47:15 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:54:09 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/mafft report.rb,1.5,1.6 Message-ID: <200510311247.j9VClFVL032334@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/mafft In directory pub.open-bio.org:/tmp/cvs-serv32324/mafft Modified Files: report.rb Log Message: Changed document format from RD to RDoc. Added references to the document. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/mafft/report.rb,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** report.rb 26 Sep 2005 13:00:05 -0000 1.5 --- report.rb 31 Oct 2005 12:47:12 -0000 1.6 *************** *** 1,7 **** # ! # bio/appl/mafft/report.rb - MAFFT report class # ! # Copyright (C) 2003 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/mafft/report.rb - MAFFT report class # ! # Copyright:: Copyright (C) 2003 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,23 **** --- 19,41 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # MAFFT result parser class. + # MAFFT is a very fast multiple sequence alignment software. + # + # Since a result of MAFFT is simply a multiple-fasta format, + # the significance of this class is to keep standard form and + # interface between Bio::ClustalW::Report. + # + # == References + # + # * K. Katoh, K. Misawa, K. Kuma and T. Miyata. + # MAFFT: a novel method for rapid multiple sequence alignment based + # on fast Fourier transform. Nucleic Acids Res. 30: 3059-3066, 2002. + # http://nar.oupjournals.org/cgi/content/abstract/30/14/3059 + # * http://www.biophys.kyoto-u.ac.jp/~katoh/programs/align/mafft/ + # require 'bio/db/fasta' *************** *** 25,32 **** require 'bio/appl/mafft' ! module Bio ! class MAFFT class Report def initialize(ary, seqclass = nil) @data = ary --- 43,62 ---- require 'bio/appl/mafft' ! module Bio #:nodoc: ! class MAFFT #:nodoc: ! ! # MAFFT result parser class. ! # MAFFT is a very fast multiple sequence alignment software. ! # ! # Since a result of MAFFT is simply a multiple-fasta format, ! # the significance of this class is to keep standard form and ! # interface between Bio::ClustalW::Report. class Report + # Creates a new Report object. + # +ary+ should be an Array of Bio::FastaFormat. + # +seqclass+ should on of following: + # Class: Bio::Sequence::AA, Bio::Sequence::NA, ... + # String: 'PROTEIN', 'DNA', ... def initialize(ary, seqclass = nil) @data = ary *************** *** 45,51 **** --- 75,87 ---- end end + + # sequence data. Returns an array of Bio::FastaFormat. attr_reader :data + + # Sequence class (Bio::Sequence::AA, Bio::Sequence::NA, ...) attr_reader :seqclass + # Gets an multiple alignment. + # Returns an instance of Bio::Alignment class. def align do_parse() unless @align *************** *** 54,61 **** --- 90,103 ---- alias alignment align + # Gets an fasta-format string of the sequences. + # Returns a string. + # Same as align.to_fasta. + # Please refer to Bio::Alignment#to_fasta for arguments. def to_fasta(*arg) align.to_fasta(*arg) end + # Gets an array of the sequences. + # Returns an array of Bio::FastaFormat instances. def to_a @data *************** *** 63,66 **** --- 105,109 ---- private + # Parsing a result. def do_parse return nil if @align *************** *** 75,115 **** end #module Bio - =begin - - = Bio::MAFFT::Report - - MAFFT result parser class. - Since a result of MAFFT is simply a multiple-fasta format, - the significance of this class is to keep standard form and - interface between Bio::ClustalW::Report. - - --- Bio::MAFFT::Report.new(data, seqclass = nil) - - Creates new instance. - 'data' should be an Array of Bio::FastaFormat. - 'seqclass' should on of following: - Class: Bio::Sequence::AA, Bio::Sequence::NA, ... - String: 'PROTEIN', 'DNA', ... - - --- Bio::MAFFT::Report#data - --- Bio::MAFFT::Report#seqclass - - Acess methods of variables given in Bio::MAFFT::Report.new method. - - --- Bio::MAFFT::Report#alginment - --- Bio::MAFFT::Report#algin - - Gets an multiple alignment. - Returns an instance of Bio::Alignment class. - - --- Bio::MAFFT::Report#to_a - - Gets an array of the sequences. - Returns an array of Bio::FastaFormat instances. - - --- Bio::MAFFT::Report#to_fasta - - Gets an fasta-format string of the sequences. - Returns a string. - - =end --- 118,119 ---- From ngoto at pub.open-bio.org Mon Oct 31 06:09:24 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:54:10 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/blat report.rb,1.3,1.4 Message-ID: <200510311109.j9VB9OVL032176@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/blat In directory pub.open-bio.org:/tmp/cvs-serv32164/blat Modified Files: report.rb Log Message: added #:nodoc: to Bio, Bio::Blat, Bio::Sim4. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/blat/report.rb,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** report.rb 31 Oct 2005 09:29:14 -0000 1.3 --- report.rb 31 Oct 2005 11:09:22 -0000 1.4 *************** *** 41,46 **** require 'bio' ! module Bio ! class Blat # Bio::Blat::Report is a BLAT report parser class. --- 41,46 ---- require 'bio' ! module Bio #:nodoc: ! class Blat #:nodoc: # Bio::Blat::Report is a BLAT report parser class. From ngoto at pub.open-bio.org Mon Oct 31 06:09:24 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:54:15 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/sim4 report.rb,1.4,1.5 Message-ID: <200510311109.j9VB9OVL032180@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/sim4 In directory pub.open-bio.org:/tmp/cvs-serv32164/sim4 Modified Files: report.rb Log Message: added #:nodoc: to Bio, Bio::Blat, Bio::Sim4. Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/sim4/report.rb,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** report.rb 31 Oct 2005 07:51:27 -0000 1.4 --- report.rb 31 Oct 2005 11:09:22 -0000 1.5 *************** *** 32,37 **** # ! module Bio ! class Sim4 # Bio::Sim4::Report is the sim4 report parser class. --- 32,37 ---- # ! module Bio #:nodoc: ! class Sim4 #:nodoc: # Bio::Sim4::Report is the sim4 report parser class. From ngoto at pub.open-bio.org Mon Oct 31 05:56:16 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:54:21 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/spidey report.rb,1.5,1.6 Message-ID: <200510311056.j9VAuGVL032088@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/spidey In directory pub.open-bio.org:/tmp/cvs-serv32078 Modified Files: report.rb Log Message: changed document from RD to RDoc Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/spidey/report.rb,v retrieving revision 1.5 retrieving revision 1.6 diff -C2 -d -r1.5 -r1.6 *** report.rb 11 Oct 2005 15:30:01 -0000 1.5 --- report.rb 31 Oct 2005 10:56:14 -0000 1.6 *************** *** 1,7 **** # ! # bio/appl/spidey/report.rb - SPIDEY result parser # ! # Copyright (C) 2004 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/spidey/report.rb - SPIDEY result parser # ! # Copyright:: Copyright (C) 2004 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,35 **** # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id$ # require 'bio' ! module Bio ! class Spidey class Report #< DB ! # # File format: -p 0 (default) or -p 1 options DELIMITER = RS = "\n--SPIDEY " def initialize(str) str = str.sub(/\A\s+/, '') --- 19,59 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # NCBI Spidey result parser. + # Currently, output of default (-p 0 option) or -p 1 option are supported. + # + # == Notes + # + # The mRNA sequence is regarded as a query, and + # the enomic sequence is regarded as a target (subject, hit). + # + # == References + # + # * Wheelan, S.J., et al., Spidey: a tool for mRNA-to-genomic alignments, + # Genome Research, 11, 1952--1957, 2001. + # http://www.genome.org/cgi/content/abstract/11/11/1952 + # * http://www.ncbi.nlm.nih.gov/spidey/ + # require 'bio' ! module Bio #:nodoc: ! class Spidey #:nodoc: + # Spidey report parser class. + # Its object may contain some Bio::Spidey::Report::Hit objects. class Report #< DB ! #-- # File format: -p 0 (default) or -p 1 options + #++ + # Delimiter of each entry. Bio::FlatFile uses it. DELIMITER = RS = "\n--SPIDEY " + # Creates a new Bio::Spidey::Report object from String. + # You can use Bio::FlatFile to read a file. def initialize(str) str = str.sub(/\A\s+/, '') *************** *** 46,54 **** end end attr_reader :entry_overrun - attr_reader :hits, :all_hits class SeqDesc # description/definitions of a sequence def initialize(seqid, seqdef, len) @entry_id = seqid --- 70,96 ---- end end + # piece of next entry. Bio::FlatFile uses it. attr_reader :entry_overrun + # Returns an Array of Bio::Spidey::Report::Hit objects. + # Because current version of SPIDEY supports only 1 genomic sequences, + # the number of hits is 1 or 0. + attr_reader :hits + + # Returns an Array of Bio::Spidey::Report::Hit objects. + # Unlike Bio::Spidey::Report#hits, the method returns + # results of all trials of pairwise alignment. + # This would be a Bio::Spidey specific method. + attr_reader :all_hits + + # SeqDesc stores sequence information of query or subject. class SeqDesc + #-- # description/definitions of a sequence + #++ + + # Creates a new SeqDesc object. + # It is designed to be called from Bio::Spidey::Report::* classes. + # Users shall not call it directly. def initialize(seqid, seqdef, len) @entry_id = seqid *************** *** 56,61 **** @len = len end - attr_reader :entry_id, :definition, :len def self.parse(str) /^(Genomic|mRNA)\:\s*(([^\s]*) (.+))\, (\d+) bp\s*$/ =~ str.to_s --- 98,114 ---- @len = len end + # Identifier of the sequence. + attr_reader :entry_id + + # Definition of the sequence. + attr_reader :definition + + # Length of the sequence. + attr_reader :len + + # Parses piece of Spidey result text and creates a new SeqDesc object. + # It is designed to be called from Bio::Spidey::Report::* classes. + # Users shall not call it directly. def self.parse(str) /^(Genomic|mRNA)\:\s*(([^\s]*) (.+))\, (\d+) bp\s*$/ =~ str.to_s *************** *** 67,72 **** end #class SeqDesc class SegmentPair ! # segment pair (like Bio::BLAST::*::Report::HSP) def initialize(genomic, mrna, midline, aaseqline, percent_identity, mismatches, gaps, splice_site, --- 120,137 ---- end #class SeqDesc + # Sequence segment pair of Spidey result. + # Similar to Bio::Blast::Report::Hsp but lacks many methods. + # For mRNA-genome mapping programs, unlike other homology search + # programs, the class is used not only for exons but also for introns. + # (Note that intron data would not be available according to run-time + # options of the program.) class SegmentPair ! #-- ! # segment pair (like Bio::BLAST::*::Report::Hsp) ! #++ ! ! # Creates a new SegmentPair object. ! # It is designed to be called from Bio::Spidey::Report::* classes. ! # Users shall not call it directly. def initialize(genomic, mrna, midline, aaseqline, percent_identity, mismatches, gaps, splice_site, *************** *** 82,89 **** @align_len = align_len end - attr_reader :genomic, :mrna, :midline, :aaseqline, - :percent_identity, :mismatches, :gaps, - :splice_site, :align_len def self.new_intron(from, to, strand, aln) genomic = Segment.new(from, to, strand, aln[0]) --- 147,194 ---- @align_len = align_len end + # Returns segment informations of the 'Genomic'. + # Returns a Bio::Spidey::Report::Segment object. + # This would be a Bio::Spidey specific method. + attr_reader :genomic + + # Returns segment informations of the 'mRNA'. + # Returns a Bio::Spidey::Report::Segment object. + # This would be a Bio::Spidey specific method. + attr_reader :mrna + + # Returns the middle line of the alignment of the segment pair. + # Returns nil if no alignment data are available. + attr_reader :midline + + # Returns amino acide sequence in alignment. + # Returns String, because white spaces is also important. + # Returns nil if no alignment data are available. + attr_reader :aaseqline + + # Returns percent identity of the segment pair. + attr_reader :percent_identity + + # Returns mismatches. + attr_reader :mismatches + alias mismatch_count mismatches + + # Returns gaps. + attr_reader :gaps + + # Returns splice site information. + # Returns a hash which contains :d and :a for keys and + # 0, 1, or nil for values. + # This would be a Bio::Spidey specific methods. + attr_reader :splice_site + + # Returns alignment length of the segment pair. + # Returns nil if no alignment data are available. + attr_reader :align_len + + # Creates a new SegmentPair object when the segment pair is an intron. + # It is designed to be called internally from + # Bio::Spidey::Report::* classes. + # Users shall not call it directly. def self.new_intron(from, to, strand, aln) genomic = Segment.new(from, to, strand, aln[0]) *************** *** 94,98 **** nil, nil, nil, nil, nil) end ! def self.parse(str, strand, complement, aln) /\AExon\s*\d+(\(\-\))?\:\s*(\d+)\-(\d+)\s*\(gen\)\s+(\d+)\-(\d+)\s*\(mRNA\)\s+id\s*([\d\.]+)\s*\%\s+mismatches\s+(\d+)\s+gaps\s+(\d+)\s+splice site\s*\(d +a\)\s*\:\s*(\d+)\s+(\d+)/ =~ str --- 199,208 ---- nil, nil, nil, nil, nil) end ! ! # Parses a piece of Spidey result text and creates a new ! # SegmentPair object. ! # It is designed to be called internally from ! # Bio::Spidey::Report::* classes. ! # Users shall not call it directly. def self.parse(str, strand, complement, aln) /\AExon\s*\d+(\(\-\))?\:\s*(\d+)\-(\d+)\s*\(gen\)\s+(\d+)\-(\d+)\s*\(mRNA\)\s+id\s*([\d\.]+)\s*\%\s+mismatches\s+(\d+)\s+gaps\s+(\d+)\s+splice site\s*\(d +a\)\s*\:\s*(\d+)\s+(\d+)/ =~ str *************** *** 121,139 **** end # Bio::BLAST::*::Report::Hsp compatible methods # Methods already defined: midline, percent_identity, ! # gaps, align_len ! alias mismatch_count mismatches def query_from; @mrna.from; end def query_to; @mrna.to; end def qseq; @mrna.seq; end def query_strand; @mrna.strand; end def hit_from; @genomic.from; end def hit_to; @genomic.to; end def hseq; @genomic.seq; end def hit_strand; @genomic.strand; end end #class SegmentPair class Segment def initialize(pos_st, pos_ed, strand = nil, seq = nil) @from = pos_st ? pos_st.to_i : nil --- 231,275 ---- end + #-- # Bio::BLAST::*::Report::Hsp compatible methods # Methods already defined: midline, percent_identity, ! # gaps, align_len, mismatch_count ! #++ ! ! # Returns start position of the mRNA (query) (the first position is 1). def query_from; @mrna.from; end + + # Returns end position (including its position) of the mRNA (query). def query_to; @mrna.to; end + + # Returns the sequence (with gaps) of the mRNA (query). def qseq; @mrna.seq; end + + # Returns strand information of the mRNA (query). + # Returns 'plus', 'minus', or nil. def query_strand; @mrna.strand; end + + # Returns start position of the genomic (target, hit) + # (the first position is 1). def hit_from; @genomic.from; end + + # Returns end position (including its position) of the + # genomic (target, hit). def hit_to; @genomic.to; end + + # Returns the sequence (with gaps) of the genomic (target, hit). def hseq; @genomic.seq; end + + # Returns strand information of the genomic (target, hit). + # Returns 'plus', 'minus', or nil. def hit_strand; @genomic.strand; end end #class SegmentPair + # Segment informations of a segment pair. class Segment + # Creates a new Segment object. + # It is designed to be called internally from + # Bio::Spidey::Report::* classes. + # Users shall not call it directly. def initialize(pos_st, pos_ed, strand = nil, seq = nil) @from = pos_st ? pos_st.to_i : nil *************** *** 142,149 **** @seq = seq end ! attr_reader :from, :to, :strand, :seq end #class Segment class Hit def initialize(data, d0) @data = data --- 278,302 ---- @seq = seq end ! ! # start position ! attr_reader :from ! ! # end position ! attr_reader :to ! ! # strand information ! attr_reader :strand ! ! # sequence data ! attr_reader :seq end #class Segment + # Hit object of Spidey result. + # Similar to Bio::Blast::Report::Hit but lacks many methods. class Hit + # Creates a new Hit object. + # It is designed to be called internally from + # Bio::Spidey::Report::* classes. + # Users shall not call it directly. def initialize(data, d0) @data = data *************** *** 151,154 **** --- 304,308 ---- end + # Fetches fields. def field_fetch(t, ary) reg = Regexp.new(/^#{Regexp.escape(t)}\:\s*(.+)\s*$/) *************** *** 161,164 **** --- 315,319 ---- private :field_fetch + # Parses information about strand. def parse_strand x = field_fetch('Strand', @d0) *************** *** 173,176 **** --- 328,334 ---- private :parse_strand + # Returns strand information of the hit. + # Returns 'plus', 'minus', or nil. + # This would be a Bio::Spidey specific method. def strand unless defined?(@strand); parse_strand; end *************** *** 178,181 **** --- 336,342 ---- end + # Returns true if the result reports 'Reverse complement'. + # Otherwise, return false or nil. + # This would be a Bio::Spidey specific method. def complement? unless defined?(@complement); parse_strand; end *************** *** 183,186 **** --- 344,348 ---- end + # Returns number of exons in the hit. def number_of_exons unless defined?(@number_of_exons) *************** *** 190,193 **** --- 352,356 ---- end + # Returns number of splice sites of the hit. def number_of_splice_sites unless defined?(@number_of_splice_sites) *************** *** 198,201 **** --- 361,365 ---- end + # Returns overall percent identity of the hit. def percent_identity unless defined?(@percent_identity) *************** *** 207,210 **** --- 371,375 ---- end + # Returns missing mRNA ends of the hit. def missing_mrna_ends unless defined?(@missing_mrna_ends) *************** *** 214,217 **** --- 379,385 ---- end + # Returns sequence informations of the 'Genomic'. + # Returns a Bio::Spidey::Report::SeqDesc object. + # This would be a Bio::Spidey specific method. def genomic unless defined?(@genomic) *************** *** 221,224 **** --- 389,395 ---- end + # Returns sequence informations of the mRNA. + # Returns a Bio::Spidey::Report::SeqDesc object. + # This would be a Bio::Spidey specific method. def mrna unless defined?(@mrna) *************** *** 228,231 **** --- 399,403 ---- end + # Parses segment pairs. def parse_segmentpairs aln = self.align.dup *************** *** 271,275 **** end private :parse_segmentpairs ! def exons unless defined?(@exons); parse_segmentpairs; end --- 443,449 ---- end private :parse_segmentpairs ! ! # Returns exons of the hit. ! # Returns an array of Bio::Spidey::Report::SegmentPair object. def exons unless defined?(@exons); parse_segmentpairs; end *************** *** 277,280 **** --- 451,459 ---- end + # Returns introns of the hit. + # Some of them would contain untranscribed regions. + # Returns an array of Bio::Spidey::Report::SegmentPair objects. + # (Note that intron data is not always available + # according to run-time options of the program.) def introns unless defined?(@introns); parse_segmentpairs; end *************** *** 282,285 **** --- 461,469 ---- end + # Returns segment pairs (exons and introns) of the hit. + # Each segment pair is a Bio::Spidey::Report::SegmentPair object. + # Returns an array of Bio::Spidey::Report::SegmentPair objects. + # (Note that intron data is not always available + # according to run-time options of the program.) def segmentpairs unless defined?(@segmentparis); parse_segmentpairs; end *************** *** 287,290 **** --- 471,477 ---- end + # Returns alignments. + # Returns an Array of arrays. + # This would be a Bio::Spidey specific method. def align unless defined?(@align); parse_align; end *************** *** 292,295 **** --- 479,483 ---- end + # Parses alignment lines. def parse_align_lines(data) misc = [ [], [], [], [] ] *************** *** 328,331 **** --- 516,520 ---- private :parse_align_lines + # Parses alignments. def parse_align r = [] *************** *** 342,352 **** --- 531,557 ---- private :parse_align + #-- # Bio::BLAST::*::Report::Hit compatible methods + #++ + + # Length of the mRNA (query) sequence. + # Same as Bio::Spidey::Report#query_len. def query_len; mrna.len; end + + # Identifier of the mRNA (query). + # Same as Bio::Spidey::Report#query_id. def query_id; mrna.entry_id; end + + # Definition of the mRNA (query). + # Same as Bio::Spidey::Report#query_def. def query_def; mrna.definition; end + # The genomic (target) sequence length. def target_len; genomic.len; end + + # Identifier of the genomic (target) sequence. def target_id; genomic.entry_id; end + + # Definition of the genomic (target) sequence. def target_def; genomic.definition; end *************** *** 356,370 **** alias hsps exons ! def each(&x); exons.each(&x); end end #class Hit def mrna; @hit.mrna; end #Bio::BLAST::*::Report compatible methods def num_hits; @hits.size; end ! def each_hit(&x); @hits.each(&x); end alias each each_hit def query_def; @hit.mrna.definition; end def query_id; @hit.mrna.entry_id; end def query_len; @hit.mrna.len; end end #class Report --- 561,600 ---- alias hsps exons ! ! # Iterates over each exon of the hit. ! # Yields Bio::Spidey::Report::SegmentPair object. ! def each(&x) #:yields: segmentpair ! exons.each(&x) ! end end #class Hit + # Returns sequence informationsof the mRNA. + # Returns a Bio::Spidey::Report::SeqDesc object. + # This would be a Bio::Spidey specific method. def mrna; @hit.mrna; end + #-- #Bio::BLAST::*::Report compatible methods + #++ + + # Returns number of hits. + # Same as hits.size. def num_hits; @hits.size; end ! ! # Iterates over each hits. ! # Same as hits.each. ! # Yields a Bio::Spidey::Report::Hit object. ! def each_hit(&x) #:yields: hit ! @hits.each(&x) ! end alias each each_hit + + # Returns definition of the mRNA (query) sequence. def query_def; @hit.mrna.definition; end + + # Returns identifier of the mRNA (query) sequence. def query_id; @hit.mrna.entry_id; end + + # Returns the length of the mRNA (query) sequence. def query_len; @hit.mrna.len; end end #class Report *************** *** 373,605 **** end #module Bio - =begin - - = Bio::Spidey::Report - - --- Bio::Spidey::Report.new(text) - - Creates new Bio::Spidey::Report object from String. - You can use Bio::FlatFile to read a file. - - Currently, result created with options -p 0 (default) or -p 1 - are supported. - - Note that "mRNA" is always regarded as "query" and - "Genomic" is always regarded as "subject"(target, hit). - - --- Bio::Spidey::Report#hits - - Returns an Array of Bio::Spidey::Report::Hit objects. - Because current version of SPIDEY supports only 1 genomic sequences, - the number of hits is 1 or 0. - - --- Bio::Spidey::Report#all_hits - - Returns an Array of Bio::Spidey::Report::Hit objects. - Unlike Bio::Spidey::Report#hits, the method returns - results of all trials of pairwise alignment. - This would be a Bio::Spidey specific method. - - --- Bio::Spidey::Report#each_hit - --- Bio::Spidey::Report#each - - Iterates over each Bio::Spidey::Report::Hit object. - Same as hits.each. - - --- Bio::Spidey::Report#num_hits - - Returns number of hits. - Same as hits.size. - - --- Bio::Spidey::Report#query_id - - Returns the identifier of query sequence. - - --- Bio::Spidey::Report#query_def - - Returns the definition of query sequence. - - --- Bio::Spidey::Report#query_len - - Returns the length of query sequence. - - --- Bio::Spidey::Report#mrna - - Returns sequence informations of "mRNA". - Returns a Bio::Spidey::Report::SeqDesc object. - This would be a Bio::Spidey specific method. - - == Bio::Spidey::Report::Hit - - Hit object of SPIDEY result. - Similar to Bio::Blast::Report::Hit but lacks many methods. - - --- Bio::Spidey::Report::Hit#hit_id - --- Bio::Spidey::Report::Hit#target_id - - Returns the identifier of subject sequence. - - --- Bio::Spidey::Report::Hit#definition - --- Bio::Spidey::Report::Hit#target_def - - Returns the identifier of subject sequence. - - --- Bio::Spidey::Report::Hit#len - --- Bio::Spidey::Report::Hit#target_len - - Returns the length of subject sequence. - - --- Bio::Spidey::Report::Hit#query_id - --- Bio::Spidey::Report::Hit#query_def - --- Bio::Spidey::Report::Hit#query_len - - Same as Bio::Spidey::Report#(query_id|query_def|query_len). - - --- Bio::Spidey::Report::Hit#exons - - Returns exons of the hit. - Each exon is a Bio::Spidey::Report::SegmentPair object. - - --- Bio::Spidey::Report::Hit#hsps - - Same as Bio::Spidey::Report#exons - The method aims to provide compatibility between - other homology search program's result objects. - - --- Bio::Spidey::Report::Hit#each - - Iterates over each exon (Bio::Spidey::Report::SegmentPair object) - of the hit. - - --- Bio::Spidey::Report::Hit#segmentpairs - - Returns segment pairs (exons and introns) of the hit. - Each segment pair is a Bio::Spidey::Report::SegmentPair object. - Returns an array of Bio::Spidey::Report::SegmentPair objects. - (Note that intron data is not always available - according to run-time options of the program.) - - --- Bio::Spidey::Report::Hit#introns - - Returns introns of the hit. - Some of them would contain untranscribed regions. - Returns an array of Bio::Spidey::Report::SegmentPair objects. - (Note that intron data is not always available - according to run-time options of the program.) - - --- Bio::Spidey::Report::Hit#mrna - --- Bio::Spidey::Report::Hit#genomic - - Returns sequence informations of "mRNA" or "Genomic", respectively. - Returns a Bio::Spidey::Report::SeqDesc object. - These would be Bio::Spidey specific methods. - - --- Bio::Spidey::Report::Hit#strand - - Returns strand information of the hit. - Returns 'plus', 'minus', or nil. - This would be a Bio::Spidey specific method. - - --- Bio::Spidey::Report::Hit#complement? - - Returns true if the result reports 'Reverse complement'. - Otherwise, return false or nil. - This would be a Bio::Spidey specific method. - - --- Bio::Spidey::Report::Hit#align - - Returns alignments. - Returns an Array of arrays. - This would be a Bio::Spidey specific method. - - == Bio::Spidey::Report::SegmentPair - - Sequence segment pair of SPIDEY result. - Similar to Bio::Blast::Report::HSP but lacks many methods. - For mRNA-genome mapping programs, unlike other homology search programs, - the class is used not only for exons but also for introns. - (Note that intron data would not be available according to run-time - options of the program.) - - --- Bio::Spidey::Report::SegmentPair#query_from - --- Bio::Spidey::Report::SegmentPair#query_to - --- Bio::Spidey::Report::SegmentPair#qseq - - --- Bio::Spidey::Report::SegmentPair#hit_from - --- Bio::Spidey::Report::SegmentPair#hit_to - --- Bio::Spidey::Report::SegmentPair#hseq - - --- Bio::Spidey::Report::SegmentPair#query_strand - --- Bio::Spidey::Report::SegmentPair#hit_strand - - Returns strand information of query or hit, respectively. - Returns 'plus', 'minus', or nil. - - --- Bio::Spidey::Report::SegmentPair#gaps - - Returns gaps. - - --- Bio::Spidey::Report::SegmentPair#mismatches - --- Bio::Spidey::Report::SegmentPair#mismatch_count - - Returns mismatches. - - --- Bio::Spidey::Report::SegmentPair#midline - - Returns the "midline" of the segment pair. - Returns nil if no alignment data are available. - - --- Bio::Spidey::Report::SegmentPair#percent_identity - - Returns percent identity of the segment pair. - - --- Bio::Spidey::Report::SegmentPair#align_len - - Returns alignment length of the segment pair. - Returns nil if no alignment data are available. - - --- Bio::Spidey::Report::SegmentPair#aaseqline - - Returns amino acide sequence in alignment. - Returns String, because white spaces in the result is also important. - Returns nil if no alignment data are available. - This would be a Bio::Spidey specific methods. - - --- Bio::Spidey::Report::SegmentPair#splice_site - - Returns splice site information. - Returns a hash which contains :d and :a for keys and - 0, 1, or nil for values. - This would be a Bio::Spidey specific methods. - - --- Bio::Spidey::Report::SegmentPair#mrna - --- Bio::Spidey::Report::SegmentPair#genomic - - Returns segment informations of 'mRNA' or 'Genomic', respectively. - Returns a Bio::Spidey::Report::Segment object. - These would be Bio::Spidey specific methods. - - == Bio::Spidey::Report::Segment - - Segment informations of a segment pair. - - --- Bio::Spidey::Report::Segment#from - --- Bio::Spidey::Report::Segment#to - --- Bio::Spidey::Report::Segment#seq - --- Bio::Spidey::Report::Segment#strand - - == Bio::Spidey::Report::SeqDesc - - Sequence information of query or subject. - - --- Bio::Spidey::Report::SeqDesc#entry_id - --- Bio::Spidey::Report::SeqDesc#definition - --- Bio::Spidey::Report::SeqDesc#len - - = References - - * (()) - Wheelan, S.J., et al., Spidey: a tool for mRNA-to-genomic alignments, - Genome Research, 11, 1952--1957, 2001. - - =end --- 603,604 ---- From ngoto at pub.open-bio.org Mon Oct 31 06:07:07 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:54:22 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/blast format0.rb,1.13,1.14 Message-ID: <200510311107.j9VB77VL032114@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/blast In directory pub.open-bio.org:/tmp/cvs-serv32104/blast Modified Files: format0.rb Log Message: added references to document. added #:nodoc: to Bio, Bio::Blast, Bio::Blast::Default. Index: format0.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/blast/format0.rb,v retrieving revision 1.13 retrieving revision 1.14 diff -C2 -d -r1.13 -r1.14 *** format0.rb 26 Oct 2005 09:12:06 -0000 1.13 --- format0.rb 31 Oct 2005 11:07:05 -0000 1.14 *************** *** 6,9 **** --- 6,10 ---- # License:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 19,25 **** --- 20,37 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # NCBI BLAST default (-m 0 option) output parser. + # + # == References + # + # * Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, + # Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), + # "Gapped BLAST and PSI-BLAST: a new generation of protein database search + # programs", Nucleic Acids Res. 25:3389-3402. + # * http://www.ncbi.nlm.nih.gov/blast/ + # begin *************** *** 29,38 **** require 'singleton' #require 'bio/db' require 'bio/io/flatfile' ! module Bio ! class Blast ! module Default # Bio::Blast::Default::Report parses NCBI BLAST default output --- 41,52 ---- require 'singleton' + #-- #require 'bio/db' + #++ require 'bio/io/flatfile' ! module Bio #:nodoc: ! class Blast #:nodoc: ! module Default #:nodoc: # Bio::Blast::Default::Report parses NCBI BLAST default output *************** *** 1387,1402 **** ###################################################################### - - =begin - - = Bio::Blast::Default::Report - - NCBI BLAST default (-m 0 option) output parser - - = Bio::Blast::Default::Report_TBlast - - NCBI BLAST default (-m 0 option) output parser for TBLAST. - All methods are equal to Bio::Blast::Default::Report. - Only DELIMITER (and RS) is different. - - =end --- 1401,1402 ---- From nakao at pub.open-bio.org Mon Oct 31 05:27:00 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:54:27 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl psort.rb,1.6,1.7 Message-ID: <200510311027.j9VAR0VL031995@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl In directory pub.open-bio.org:/tmp/cvs-serv31985/lib/bio/appl Modified Files: psort.rb Log Message: * Updated rdoc. Index: psort.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/psort.rb,v retrieving revision 1.6 retrieving revision 1.7 diff -C2 -d -r1.6 -r1.7 *** psort.rb 9 Sep 2005 15:48:17 -0000 1.6 --- psort.rb 31 Oct 2005 10:26:58 -0000 1.7 *************** *** 1,6 **** # ! # bio/appl/psort.rb - PSORT, protein sorting site prediction systems # ! # Copyright (C) 2003 Mitsuteru C. Nakao # # This library is free software; you can redistribute it and/or --- 1,29 ---- # ! # = bio/appl/psort.rb - PSORT, protein sorting site prediction systems # ! # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao ! # License:: LGPL ! # ! # ! # $Id$ ! # ! # == A client for PSORT WWW Server ! # ! # A client for PSORT WWW Server for predicting protein subcellular ! # localization. ! # ! # PSORT family members, ! # 1. PSORT ! # 2. PSORT II ! # 3. iPSORT ! # 4. PSORT-B http://psort.org ! # 5. WoLF-PSORT ! # ! # See http://psort.ims.u-tokyo.ac.jp. ! # ! # === Example ! # ! # ! #-- # # This library is free software; you can redistribute it and/or *************** *** 18,26 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # - - require 'bio/sequence' require 'bio/db/fasta' --- 41,47 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! #++ # require 'bio/sequence' require 'bio/db/fasta' *************** *** 31,36 **** module Bio ! class PSORT WWWServer = { 'IMSUT' => {'host' => 'psort.hgc.jp', #'psort.ims.u-tokyo.ac.jp', --- 52,65 ---- module Bio ! + + class PSORT + # a Hash for PSORT official hosts: + # Key value (host) + # ------- ----------------------- + # IMSUT psort.ims.u-tokyo.ac.jp + # Okazaki psort.nibb.ac.jp + # Peking srs.pku.edu.cn:8088 WWWServer = { 'IMSUT' => {'host' => 'psort.hgc.jp', #'psort.ims.u-tokyo.ac.jp', *************** *** 46,54 **** ! # Command-line client super class ! ! # HTTP client super class ! # inherited claaes should have make_form_args and parse_html method. class CGIDriver def initialize(host = '', path = '') @host = host --- 75,108 ---- ! # = Generic CGI client class ! # A generic CGI client class for Bio::PSORT::* classes. ! # The class provides an interface for CGI argument processing and output ! # report parsing. ! # ! # == Example ! # ! # class NewClient < CGIDriver ! # def initialize(host, path) ! # super(host, path) ! # end ! # end ! # private ! # def make_args(query) ! # # ... ! # end ! # def parse_report(output) ! # # ... ! # end ! # class CGIDriver + + # CGI query argument in Hash ({key => value, ...}). + attr_accessor :args + + # CGI output raw text + attr_reader :report + + + # Sets remote ``host'' and cgi ``path''. def initialize(host = '', path = '') @host = host *************** *** 57,65 **** @report end - attr_accessor :args - attr_reader :report ! # CGIDriver#exec(query) -> aReport def exec(query) data = make_args(query) --- 111,117 ---- @report end ! # Executes a CGI ``query'' and returns aReport def exec(query) data = make_args(query) *************** *** 74,94 **** end - private def make_args(args_hash) # The routin should be provided in the inherited class end def parse_report(result_body) # The routin should be provided in the inherited class end ! # tools ! def erase_html_tags(str) return str.gsub(/<\S.*?>/,'') end def args_join(hash, delim = '&') tmp = [] --- 126,147 ---- end private + # Bio::CGIDriver#make_args. An API skelton. def make_args(args_hash) # The routin should be provided in the inherited class end + # Bio::CGIDriver#parse_report. An API skelton. def parse_report(result_body) # The routin should be provided in the inherited class end ! # Erases HTML tags def erase_html_tags(str) return str.gsub(/<\S.*?>/,'') end + # Returns CGI argument text in String (key=value&) from a Hash ({key=>value}). def args_join(hash, delim = '&') tmp = [] *************** *** 103,111 **** ! class PSORT1 autoload :Report, 'bio/appl/psort/report' def self.imsut self.new(Remote.new(WWWServer['IMSUT']['host'], --- 156,181 ---- ! # = Bio::PSORT::PSORT1 ! # Bio::PSORT::PSORT1 is a wapper class for the original PSORT program. ! # ! # == Example ! # ! # serv = Bio::PSORT::PSORT1.imsut ! # serv.title = 'Query_title_splited_by_white space' ! # serv.exec(seq, false) # seq.class => String ! # serv.exec(seq) ! # report = serv.exec(Bio::FastaFormat.new(seq)) ! # report_raw = serv.exec(Bio::FastaFormat.new(seq), false) ! # ! # == References ! # 1. Nakai, K. and Kanehisa, M., A knowledge base for predicting protein ! # localization sites in eukaryotic cells, Genomics 14, 897-911 (1992). ! # [PMID:1478671] class PSORT1 autoload :Report, 'bio/appl/psort/report' + # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote) + # connecting to the IMSUT server. def self.imsut self.new(Remote.new(WWWServer['IMSUT']['host'], *************** *** 113,116 **** --- 183,189 ---- end + + # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote) + # connecting to the NIBB server. def self.okazaki self.new(Remote.new(WWWServer['Okazaki']['host'], *************** *** 118,121 **** --- 191,197 ---- end + + # Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote) + # connecting to the Peking server. def self.peking self.new(Remote.new(WWWServer['Peking']['host'], *************** *** 123,137 **** end ! # ! def initialize(serv) ! @serv = serv ! @origin = 'yeast' # Gram-positive bacterium, Gram-negative bacterium, # yeast, aminal, plant @title = 'MYSEQ' @sequence = '' end - attr_accessor :origin, :sequence, :title ! # def exec(faa, parsing = true) if faa.class == Bio::FastaFormat --- 199,238 ---- end ! ! # Sets a server CGI Driver (Bio::PSORT::PSORT1::Remote). ! def initialize(driver, origin = 'yeast') ! @serv = driver ! @origin = origin # Gram-positive bacterium, Gram-negative bacterium, # yeast, aminal, plant @title = 'MYSEQ' @sequence = '' end ! ! # An accessor of the origin argument. Default setting is "yeast". ! # Usable values: ! # 1. Gram-positive bacterium ! # 2. Gram-negative bacterium ! # 3. yeast ! # 4. animal ! # 5. plant ! attr_accessor :origin ! ! # An accessor of the query sequence argument. ! attr_accessor :sequence ! ! # An accessor of the title argument. Default setting is 'MYSEQ'. ! # The value is automatically setted if you use a query in ! # Bio::FastaFormat. ! attr_accessor :title ! ! ! # Executes the query (faa) and returns an Bio::PSORT::PSORT1::Report. ! # ! # The ``faa'' argument is acceptable a sequence both in String and in ! # Bio::FastaFormat. ! # ! # If you set the second argument is ``parsing = false'', ! # returns ourput text without any parsing. def exec(faa, parsing = true) if faa.class == Bio::FastaFormat *************** *** 147,166 **** ! # PSORT1 specific CGIDriver class Remote < CGIDriver def initialize(host, path) ! @origin = 'yeast' # Gram-positive bacterium, ! # Gram-negative bacterium, ! # yeast, aminal, plant @title = 'MYSEQ' @parsing = true super(host, path) end - attr_accessor :origin, :title, :parsing - private def make_args(query) @args.update({'sequence' => query}) --- 248,282 ---- ! # = Bio::PSORT::PSORT1::Remote ! # PSORT1 specific CGIDriver. class Remote < CGIDriver + # Accessor for Bio::PSORT::PSORT1::Remote#origin to contein target domain. + # Taget domains: + # 1. Gram-positive bacterium + # 2. Gram-negative bacterium + # 3. yeast + # 4. animal + # 5. plant + attr_accessor :origin + + # Accessor for Bio::POSRT::PSORT1#sequence to contein the query sequence. + attr_accessor :title + + # Accessor for Bio::PSORT::PSORT1#title to contain the query title. + attr_accessor :parsing + + # Sets remote ``host'' and cgi ``path''. def initialize(host, path) ! @origin = 'yeast' @title = 'MYSEQ' @parsing = true super(host, path) end private + # Returns parsed CGI argument. + # An API implementation. def make_args(query) @args.update({'sequence' => query}) *************** *** 168,171 **** --- 284,290 ---- end + + # Returns parsed output report. + # An API implementation. def parse_report(str) str = erase_html_tags(str) *************** *** 179,198 **** ! ! # Nakai and Horton 1999 TiBS class PSORT2 autoload :Report, 'bio/appl/psort/report' ! # remote def self.remote(host, path) self.new(Remote.new(host, path)) end def self.imsut self.remote(WWWServer['IMSUT']['host'], WWWServer['IMSUT']['PSORT2']) end ! def self.okazaki self.remote(WWWServer['Okazaki']['host'], --- 298,343 ---- ! # = Bio::PSORT::PSORT2 ! # Bio::PSORT::PSORT2 is a wapper class for the original PSORT program. ! # ! # == Example ! # ! # serv = Bio::PSORT::PSORT2.imsut ! # serv.title = 'Query_title_splited_by_white space' ! # serv.exec(seq, false) # seq.class => String ! # serv.exec(seq) ! # report = serv.exec(Bio::FastaFormat.new(seq)) ! # report_raw = serv.exec(Bio::FastaFormat.new(seq), false) ! # ! # == References ! # 1. Nakai, K. and Horton, P., PSORT: a program for detecting the sorting ! # signals of proteins and predicting their subcellular localization, ! # Trends Biochem. Sci, 24(1) 34-35 (1999). ! # [PMID:10087920] class PSORT2 autoload :Report, 'bio/appl/psort/report' ! # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote). ! # ! # PSORT official hosts: ! # key host path ! # ------- ----------------------- -------------------- --------- ! # IMSUT psort.ims.u-tokyo.ac.jp /cgi-bin/runpsort.pl (default) ! # Okazaki psort.nibb.ac.jp /cgi-bin/runpsort.pl ! # Peking srs.pku.edu.cn:8088 /cgi-bin/runpsort.pl def self.remote(host, path) self.new(Remote.new(host, path)) end + # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote) + # connecting to the IMSUT server. def self.imsut self.remote(WWWServer['IMSUT']['host'], WWWServer['IMSUT']['PSORT2']) end ! ! # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote) ! # connecting to the NIBB server. def self.okazaki self.remote(WWWServer['Okazaki']['host'], *************** *** 200,203 **** --- 345,350 ---- end + # Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote) + # connecting to the Peking server. def self.peking self.remote(WWWServer['Peking']['host'], *************** *** 205,216 **** end ! # wrapper for ``psort'' command ! def initialize(serv, origin = 'yeast') ! @serv = serv @origin = origin ! @title = nil end - attr_accessor :origin, :title def exec(faa, parsing = true) if faa.class == Bio::FastaFormat --- 352,375 ---- end ! # An accessor of the origin argument. ! # Default setting is ``yeast''. ! attr_accessor :origin ! ! # An accessor of the title argument. Default setting is ``QUERY''. ! # The value is automatically setted if you use a query in ! # Bio::FastaFormat. ! attr_accessor :title ! ! # Sets a server CGI Driver (Bio::PSORT::PSORT2::Remote). ! def initialize(driver, origin = 'yeast') ! @serv = driver @origin = origin ! @title = '' end + + # Executes PSORT II prediction and returns Report object + # (Bio::PSORT::PSORT2::Report) if parsing = true. + # Returns PSORT II report in text if parsing = false. def exec(faa, parsing = true) if faa.class == Bio::FastaFormat *************** *** 226,231 **** --- 385,393 ---- + # = Bio::PSORT::PSORT2::Remote # PSORT2 specific CGIDriver class Remote < CGIDriver + + # Sets remote ``host'' and cgi ``path''. def initialize(host, path) @origin = 'yeast' *************** *** 233,241 **** @parsing = true end ! attr_accessor :origin, :parsing private ! def make_args(query) @args.update({'sequence' => query}) --- 395,412 ---- @parsing = true end ! ! # An accessor of the origin argument. ! # Default setting is ``yeast''. ! attr_accessor :origin ! ! # An accessor of the output parsing. ! # Default setting is ``true''. ! attr_accessor :parsing private ! ! # Returns parsed CGI argument. ! # An API implementation. def make_args(query) @args.update({'sequence' => query}) *************** *** 243,246 **** --- 414,420 ---- end + + # Returns parsed output report. + # An API implementation. def parse_report(str) str = str.gsub(/\n
    /i, Report::BOUNDARY) *************** *** 262,265 **** --- 436,442 ---- end # class PSORTB + class WoLF_PSORT + end # class PSORTB + end # class PSORT *************** *** 296,300 **** " - puts "\n Bio::PSORT::PSORT" --- 473,476 ---- *************** *** 377,505 **** p eval("report.#{method}") end - end - - - - - - =begin - - = Bio::PSORT - - Wrapper classes of PSORT family for predicting protein subcellular - localization. - (()) - - PSORT family contains, - (1) PSORT - (2) PSORT II - (3) iPSORT - (4) PSORT-B (()) - - - - --- Bio::PSORT::WWWServer - - Constants for PSORT official hosts: - - Key value (host) - ------- ----------------------- - IMSUT psort.ims.u-tokyo.ac.jp - Okazaki psort.nibb.ac.jp - Peking srs.pku.edu.cn:8088 - - - - = Bio::PSORT::PSORT1 - - Bio::PSORT::PSORT1 is a wapper class for original PSORT program. - - - --- Bio::PSORT::PSORT1.remote(host, path) - - Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote). - - --- Bio::PSORT::PSORT1.imsut - - Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote) - to the IMSUT server. - - --- Bio::PSORT::PSORT1.okazaki - - Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote) - to the NIBB server. - - --- Bio::PSORT::PSORT1.peking - - Returns a PSORT1 CGI Driver object (Bio::PSORT::PSORT1::Remote) - to the Peking server. - - - --- Bio::PSORT::PSORT1#origin - - An accessor of the origin argument. Default setting is "yeast". - Usable values: - ----------------------- - Gram-positive bacterium - Gram-nebative bacterium - yeast - animal - plant - - - --- Bio::PSORT::PSORT1#title - - An accessor of the title argument. Default setting is 'MYSEQ'. - The value is automatically setted if you use a query in - Bio::FastaFormat. - - --- Bio::PSORT::PSORT1#exec(faa, parsing = true) -> aRpt - - Execute a query. - Returns a PSORT1::Report instance if parsing = true. - Returns a PSORT1 report in text format if parsing = false. - - - - - - = Bio::PSORT::PSORT2 - - --- Bio::PSORT::PSORT2.remote(host, path) - - Returns a PSORT2 CGI Driver object (Bio::PSORT::PSORT2::Remote). - - PSORT official hosts: - key host path - ------- ----------------------- -------------------- --------- - IMSUT psort.ims.u-tokyo.ac.jp /cgi-bin/runpsort.pl (default) - Okazaki psort.nibb.ac.jp /cgi-bin/runpsort.pl - Peking srs.pku.edu.cn:8088 /cgi-bin/runpsort.pl - - --- Bio::PSORT::PSORT2.imsut - --- Bio::PSORT::PSORT2.okazaki - --- Bio::PSORT::PSORT2.peking - - - --- Bio::PSORT::PSORT2#origin - - Accessor of the origin argument. - Default setting is 'yeast'. - - --- Bio::PSORT::PSORT2#title - - Accessor of the title argument. Default setting is 'QUERY'. - The value is automatically setted if you use a query in - Bio::FastaFormat. - - --- Bio::PSORT::PSORT2#exec(faa, parsing = true) - - Executes PSORT II prediction and returns Report object - (Bio::PSORT::PSORT2::Report) if parsing = true. - Returns PSORT II report in text if parsing = false. - - - =end - - --- 553,555 ---- From ngoto at pub.open-bio.org Mon Oct 31 04:29:16 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:54:34 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/blat report.rb,1.2,1.3 Message-ID: <200510310929.j9V9TGVL031826@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/blat In directory pub.open-bio.org:/tmp/cvs-serv31816 Modified Files: report.rb Log Message: changed RD to RDoc Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/blat/report.rb,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** report.rb 8 Sep 2005 01:22:09 -0000 1.2 --- report.rb 31 Oct 2005 09:29:14 -0000 1.3 *************** *** 1,7 **** # ! # bio/appl/blat/report.rb - BLAT result parser # ! # Copyright (C) 2004 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/blat/report.rb - BLAT result parser # ! # Copyright:: Copyright (C) 2004 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,23 **** --- 19,41 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # BLAT result parser (psl / pslx format). + # + # == Important Notes + # + # In BLAT results, the start position of a sequnece is numbered as 0. + # On the other hand, in many other homology search programs, + # the start position of a sequence is numbered as 1. + # To keep compatibility, the BLAT parser adds 1 to every position number. + # + # == References + # + # * Kent, W.J., BLAT--the BLAST-like alignment tool, + # Genome Research, 12, 656--664, 2002. + # http://www.genome.org/cgi/content/abstract/12/4/656 + # require 'bio' *************** *** 25,31 **** --- 43,69 ---- module Bio class Blat + + # Bio::Blat::Report is a BLAT report parser class. + # Its object may contain some Bio::Blat::Report::Hits objects. + # + # In BLAT results, the start position of a sequnece is numbered as 0. + # On the other hand, in many other homology search programs, + # the start position of a sequence is numbered as 1. + # To keep compatibility, the BLAT parser adds 1 to every position number. + # + # Note that Bio::Blat::Report#query_def, #query_id, #query_len methods + # simply return first hit's query_*. + # If multiple query sequences are given, these values + # will be incorrect. + # class Report #< DB + # Delimiter of each entry. Bio::FlatFile uses it. + # In Bio::Blat::Report, it it nil (1 entry 1 file). DELIMITER = RS = nil # 1 file 1 entry + # Creates a new Bio::Blat::Report object from BLAT result text (String). + # You can use Bio::FlatFile to read a file. + # Currently, results created with options -out=psl (default) or + # -out=pslx are supported. def initialize(text) flag = false *************** *** 46,51 **** @columns = parse_header(head) end - attr_reader :hits, :columns def parse_header(ary) ary.shift # first line is removed --- 84,98 ---- @columns = parse_header(head) end + # hits of the result. + # Returns an Array of Bio::Blat::Report::Hit objects. + attr_reader :hits + + # Returns descriptions of columns. + # Returns an Array. + # This would be a Bio::Blat specific method. + attr_reader :columns + + # Parses headers. def parse_header(ary) ary.shift # first line is removed *************** *** 63,67 **** --- 110,120 ---- private :parse_header + # Bio::Blat::Report::SeqDesc stores sequence information of + # query or subject of the BLAT report. + # It also includes some hit information. class SeqDesc + # Creates a new SeqDesc object. + # It is designed to be called internally from Bio::Blat::Report class. + # Users shall not use it directly. def initialize(gap_count, gap_bases, name, size, st, ed, starts, seqs) *************** *** 75,83 **** @seqs = seqs end ! attr_reader :gap_count, :gap_bases, ! :name, :size, :start, :end, :starts, :seqs end #class SeqDesc class SegmentPair def initialize(query_len, strand, blksize, qstart, tstart, qseq, tseq) --- 128,158 ---- @seqs = seqs end ! # gap count ! attr_reader :gap_count ! # gap bases ! attr_reader :gap_bases ! # name of the sequence ! attr_reader :name ! # length of the sequence ! attr_reader :size ! # start position of the first segment ! attr_reader :start ! # end position of the final segment ! attr_reader :end ! # start positions of segments. ! # Returns an array of numbers. ! attr_reader :starts ! # sequences of segments. ! # Returns an array of String. ! # Returns nil if there are no sequence data. ! attr_reader :seqs end #class SeqDesc + # Sequence segment pair of BLAT result. + # Similar to Bio::Blast::Report::Hsp but lacks many methods. class SegmentPair + # Creates a new SegmentPair object. + # It is designed to be called internally from Bio::Blat::Report class. + # Users shall not use it directly. def initialize(query_len, strand, blksize, qstart, tstart, qseq, tseq) *************** *** 107,114 **** end end ! attr_reader :query_from, :query_to, :qseq, :query_strand ! attr_reader :hit_from, :hit_to, :hseq, :hit_strand ! attr_reader :blocksize def align_len @qseq ? @qseq.size : nil --- 182,227 ---- end end ! # Returns query start position. ! # CAUTION: In Blat's raw result(psl format), first position is 0. ! # To keep compatibility, the parser add 1 to the position. ! attr_reader :query_from ! ! # Returns query end position. ! # CAUTION: In Blat's raw result(psl format), first position is 0. ! # To keep compatibility, the parser add 1 to the position. ! attr_reader :query_to ! ! # Returns query sequence. ! # If sequence data is not available, returns nil. ! attr_reader :qseq ! ! # Returns strand information of the query. ! # Returns 'plus' or 'minus'. ! attr_reader :query_strand ! ! # Returns target (subject, hit) start position. ! # CAUTION: In Blat's raw result(psl format), first position is 0. ! # To keep compatibility, the parser add 1 to the position. ! attr_reader :hit_from ! ! # Returns target (subject, hit) end position. ! # CAUTION: In Blat's raw result(psl format), first position is 0. ! # To keep compatibility, the parser add 1 to the position. ! attr_reader :hit_to + # Returns the target (subject, hit) sequence. + # If sequence data is not available, returns nil. + attr_reader :hseq + + # Returns strand information of the target (subject, hit). + # Returns 'plus' or 'minus'. + attr_reader :hit_strand + + # Returns block size (length) of the segment pair. + # This would be a Bio::Blat specific method. + attr_reader :blocksize + + # Returns alignment length of the segment pair. + # Returns nil if no alignment data are available. def align_len @qseq ? @qseq.size : nil *************** *** 116,125 **** --- 229,248 ---- end #class SegmentPair + # Hit class for the BLAT result parser. + # Similar to Bio::Blast::Report::Hit but lacks many methods. + # Its object may contain some Bio::Blat::Report::SegmentPair objects. class Hit + # Creates a new Hit object from a piece of BLAT result text. + # It is designed to be called internally from Bio::Blat::Report object. + # Users shall not use it directly. def initialize(str) @data = str.chomp.split(/\t/) end + + # Raw data of the hit. + # (Note that it doesn't add 1 to position numbers.) attr_reader :data + # split comma-separeted text def split_comma(str) str.to_s.sub(/\s*\,+\s*\z/, '').split(/\s*\,\s*/) *************** *** 127,130 **** --- 250,256 ---- private :split_comma + # Returns sequence informations of the query. + # Returns a Bio::Blat::Report::SeqDesc object. + # This would be Bio::Blat specific method. def query unless defined?(@query) *************** *** 136,139 **** --- 262,268 ---- end + # Returns sequence informations of the target(hit). + # Returns a Bio::Blat::Report::SeqDesc object. + # This would be Bio::Blat specific method. def target unless defined?(@target) *************** *** 145,156 **** --- 274,296 ---- end + # Match nucleotides. def match; @data[0].to_i; end + # Mismatch nucleotides. def mismatch; @data[1].to_i; end + # rep. match (???) def rep_match; @data[2].to_i; end + # N's (???) def n_s; @data[3].to_i; end + # Returns strand information of the hit. + # Returns '+' or '-'. + # This would be a Bio::Blat specific method. def strand; @data[8]; end + + # Number of blocks(exons, segment pairs). def block_count; @data[17].to_i; end + # Sizes of all blocks(exons, segment pairs). + # Returns an array of numbers. def block_sizes unless defined?(@block_sizes) then *************** *** 160,163 **** --- 300,305 ---- end + # Returns blocks(exons, segment pairs) of the hit. + # Returns an array of Bio::Blat::Report::SegmentPair objects. def blocks unless defined?(@blocks) *************** *** 176,200 **** alias exons blocks # Bio::BLAST::*::Report::Hit compatible methods def query_len; query.size; end def query_def; query.name; end alias query_id query_def def target_len; target.size; end def target_def; target.name; end alias target_id target_def - - alias len target_len alias definition target_def ! alias hsps blocks ! def each(&x); exons.each(&x); end end #class Hit #Bio::BLAST::*::Report compatible methods def num_hits; @hits.size; end ! def each_hit(&x); @hits.each(&x); end alias each each_hit def query_def; (x = @hits.first) ? x.query_def : nil; end def query_len; (x = @hits.first) ? x.query_len : nil; end alias query_id query_def --- 318,374 ---- alias exons blocks + #-- # Bio::BLAST::*::Report::Hit compatible methods + #++ + alias hsps blocks + + # Returns the length of query sequence. def query_len; query.size; end + + # Returns the name of query sequence. def query_def; query.name; end alias query_id query_def + # Returns the length of the target(subject) sequence. def target_len; target.size; end + alias len target_len + + # Returns the name of the target(subject) sequence. def target_def; target.name; end alias target_id target_def alias definition target_def ! #Iterates over each block(exon, segment pair) of the hit. ! # Yields a Bio::Blat::Report::SegmentPair object. ! def each(&x) #:yields: segmentpair ! exons.each(&x) ! end end #class Hit + #-- #Bio::BLAST::*::Report compatible methods + #++ + + # Returns number of hits. + # Same as hits.size. def num_hits; @hits.size; end ! ! # Iterates over each Bio::Blat::Report::Hit object. ! # Same as hits.each. ! def each_hit(&x) #:yields: hit ! @hits.each(&x) ! end alias each each_hit + + # Returns the name of query sequence. + # CAUTION: query_* methods simply return first hit's query_*. + # If multiple query sequences are given, these values + # will be incorrect. def query_def; (x = @hits.first) ? x.query_def : nil; end + + # Returns the length of query sequence. + # CAUTION: query_* methods simply return first hit's query_*. + # If multiple query sequences are given, these values + # will be incorrect. def query_len; (x = @hits.first) ? x.query_len : nil; end alias query_id query_def *************** *** 209,387 **** BLAT result parser. (psl / pslx format) - - --- Bio::Blat::Report.new(text) - - Creates new Bio::Blat::Report object from String. - You can use Bio::FlatFile to read a file. - - Currently, results created with options -out=psl (default) or - -out=pslx are supported. - - --- Bio::Blat::Report#hits - - Returns an Array of Bio::Blat::Report::Hit objects. - - --- Bio::Blat::Report#each_hit - --- Bio::Blat::Report#each - - Iterates over each Bio::Blat::Report::Hit object. - Same as hits.each. - - --- Bio::Blat::Report#num_hits - - Returns number of hits. - Same as hits.size. - - --- Bio::Blat::Report#query_id - - Returns the identifier of query sequence. - This method is alias of query_def method. - CAUTION: query_* methods simply return first hit's query_*. - If multiple query sequences are given, these values - will be incorrect. - - --- Bio::Blat::Report#query_def - - Returns the name of query sequence. - CAUTION: query_* methods simply return first hit's query_*. - If multiple query sequences are given, these values - will be incorrect. - - --- Bio::Blat::Report#query_len - - Returns the length of query sequence. - CAUTION: query_* methods simply return first hit's query_*. - If multiple query sequences are given, these values - will be incorrect. - - --- Bio::Blat::Report#columns - - Returns descriptions of columns. - Returns an Array. - This would be a Bio::Blat specific method. - - == Bio::Blat::Report::Hit - - Hit object. - Similar to Bio::Blast::Report::Hit but lacks many methods. - - --- Bio::Blat::Report::Hit#hit_id - --- Bio::Blat::Report::Hit#target_id - - Returns the identifier of subject sequence. - This method is alias of target_def method. - - --- Bio::Blat::Report::Hit#definition - --- Bio::Blat::Report::Hit#target_def - - Returns the name of subject sequence. - - --- Bio::Blat::Report::Hit#len - --- Bio::Blat::Report::Hit#target_len - - Returns the length of subject sequence. - - --- Bio::Blat::Report::Hit#query_id - - Returns the identifier of query sequence. - This method is alias of query_def method. - - --- Bio::Blat::Report::Hit#query_def - - Returns the name of query sequence. - - --- Bio::Blat::Report::Hit#query_len - - Returns the length of query sequence. - - --- Bio::Blat::Report::Hit#blocks - --- Bio::Blat::Report::Hit#exons - - Returns blocks(exons) of the hit. - Each exon is a Bio::Blat::Report::SegmentPair object. - - --- Bio::Blat::Report::Hit#hsps - - Same as Bio::Blat::Report#exons - The method aims to provide compatibility between - other homology search program's result objects. - - --- Bio::Blat::Report::Hit#each - - Iterates over each exon (Bio::Blat::Report::SegmentPair object) - of the hit. - - --- Bio::Blat::Report::Hit#query - --- Bio::Blat::Report::Hit#target - - Returns sequence informations of "query" or "target", respectively. - Returns a Bio::Blat::Report::SeqDesc object. - These would be Bio::Blat specific methods. - - --- Bio::Blat::Report::Hit#data - - Returns raw data. - Returns an Array. - These would be Bio::Blat specific methods. - - --- Bio::Blat::Report::Hit#strand - - Returns strand information of the hit. - Returns '+' or '-'. - This would be a Bio::Blat specific method. - - == Bio::Blat::Report::SegmentPair - - Sequence segment pair of BLAT result. - Similar to Bio::Blast::Report::HSP but lacks many methods. - - --- Bio::Blat::Report::SegmentPair#query_from - - Returns query start position. - Note that first position is 1. - CAUTION: In Blat's raw result(psl format), first position is 0. - However, we add 1 to the position to keep compatibility. - - --- Bio::Blat::Report::SegmentPair#query_to - - Returns query end position. - - --- Bio::Blat::Report::SegmentPair#qseq - - Returns query sequence. - - --- Bio::Blat::Report::SegmentPair#hit_from - --- Bio::Blat::Report::SegmentPair#hit_to - --- Bio::Blat::Report::SegmentPair#hseq - - --- Bio::Blat::Report::SegmentPair#query_strand - --- Bio::Blat::Report::SegmentPair#hit_strand - - Returns strand information of query or hit, respectively. - Returns 'plus' or 'minus'. - - --- Bio::Blat::Report::SegmentPair#align_len - - Returns alignment length of the segment pair. - Returns nil if no alignment data are available. - - --- Bio::Blat::Report::SegmentPair#blocksize - - Returns block size (length) of the segment pair. - This would be a Bio::Blat specific method. - - == Bio::Blat::Report::SeqDesc - - Sequence information of query or target. - It also includes some hit information. - - --- Bio::Blat::Report::SeqDesc#gap_count - --- Bio::Blat::Report::SeqDesc#gap_bases - --- Bio::Blat::Report::SeqDesc#name - --- Bio::Blat::Report::SeqDesc#size - --- Bio::Blat::Report::SeqDesc#start - --- Bio::Blat::Report::SeqDesc#end - --- Bio::Blat::Report::SeqDesc#starts - --- Bio::Blat::Report::SeqDesc#seqs = References --- 383,386 ---- From k at pub.open-bio.org Mon Oct 31 04:12:05 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Tue Nov 1 16:54:35 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/hmmer report.rb,1.8,1.9 Message-ID: <200510310912.j9V9C5VL031768@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/hmmer In directory pub.open-bio.org:/tmp/cvs-serv31374/lib/bio/appl/hmmer Modified Files: report.rb Log Message: * Rewrited and contributed by Masashi Fujita * DELIMITER, RS is defined for multiple reports * Fixed for multiple reports - not expect first few banner lines * Fixed for a bug that hangs on no hit * Added support for rare tags such as CS and RF * Bio::HMMER::Report#Hsp#csline, rfline methods are added * Bio::HMMER::Report#hsps method is added for the result which contains HSP but no Hit (caused by some combinations of threshold parameters of HSP and Hit). * Returned strings of description etc. is now stripped * Changed unnecessary accessers to readers Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/hmmer/report.rb,v retrieving revision 1.8 retrieving revision 1.9 diff -C2 -d -r1.8 -r1.9 *** report.rb 26 Sep 2005 13:00:05 -0000 1.8 --- report.rb 31 Oct 2005 09:12:03 -0000 1.9 *************** *** 1,6 **** # ! # bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parser # # Copyright (C) 2002 Hiroshi Suga # # This library is free software; you can redistribute it and/or --- 1,7 ---- # ! # bio/appl/hmmer/report.rb - hmmsearch, hmmpfam parserer # # Copyright (C) 2002 Hiroshi Suga + # Copyright (C) 2005 Masashi Fujita # # This library is free software; you can redistribute it and/or *************** *** 25,123 **** module Bio class HMMER class Report def initialize(data) - # HMM and sequence profiles - data.sub!(/(.+ -$\n)(.+ -$\n)\n(.+?\n\n)Scores/m, '') ! @program = parse_program($1) ! @parameter = parse_parameter($2) ! @query_info =parse_query_info($3) ! case @program['name'] ! when /hmmsearch/ ! is_hmmsearch = true else ! is_hmmsearch = false # hmmpfam end ! # Scores for complete sequences. (parsed to Hit objects) ! data.sub!(/.+-$\n(.+?)\n\nParsed/m, '') ! @hits = [] ! $1.each do |l| ! @hits.push(Hit.new(l)) ! end ! # Scores for domains. (parsed to Hsp objects) ! data.sub!(/.+-$\n(.+?)\n\nAlignments of top-scoring domains:\n/m, '') ! hsps=[] ! $1.each do |l| ! hsps.push(Hsp.new(l,is_hmmsearch)) ! end ! # Alignments ! if is_hmmsearch ! data.sub!(/(.+?)\n\n\nHistogram of all scores:\n/m, '') ! else ! data.sub!(/(.+?)\n\n\/\//m, '') end ! $1.split(/^\S+.*?\n/).slice(1..-1).each_with_index do |al,k| ! al2 = al.gsub(/\n\n/,"\n").to_s.collect { |l| ! l.sub(/^.{19}/,'').sub(/\s(\d+|-)\s*$/,'') ! } ! align = ['', '', ''] ! al2.each_with_index { |s,i| align[i%3] += s.chomp } ! align.each { |a| a.sub!(/^.{3}(.*).{3}$/, '\1') } ! hsps[k].hmmseq << align[0] ! hsps[k].midline << align[1] ! hsps[k].flatseq << align[2] end ! hsps.each do |s| ! @hits.each do |h| ! if h.accession == s.accession ! h.hsps.push(s) ! next ! end end end if is_hmmsearch ! data.sub!(/(.+?)\n\n\n%/m, '') ! @histogram = $1 ! @statistical_detail = {} ! data.sub!(/(.+?)\n\n/m, '') ! $1.each do |l| ! @statistical_detail[$1] = $2.to_f if /^\s*(.+)\s*=\s*(\S+)/ =~ l ! end ! @total_seq_searched = nil ! data.sub!(/(.+?)\n\n/m, '') ! $1.each do |l| ! @total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l end ! @whole_seq_top_hits = {} ! data.sub!(/(.+?)\n\n/m, '') ! $1.each do |l| ! @whole_seq_top_hits[$1] = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l end ! @domain_top_hits = {} ! data.each do |l| ! @domain_top_hits[$1] = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l end end - end - attr_reader :program, :parameter, :query_info, :hits, - :histogram, :statistical_detail, :total_seq_searched, - :whole_seq_top_hits, :domain_top_hits ! def each ! @hits.each do |x| ! yield x end end class Hsp def initialize(data, is_hmmsearch) --- 26,147 ---- module Bio class HMMER + + def self.reports(input) + ary = [] + input.each("\n//\n") do |data| + if block_given? + yield Report.new(data) + else + ary << Report.new(data) + end + end + return ary + end + + + # Bio::HMMER::Report class Report + # for Bio::FlatFile support + DELIMITER = RS = "\n//\n" + def initialize(data) ! # The input data is divided into six data fields, i.e. header, ! # query infomation, hits, HSPs, alignments and search statistics. ! # However, header and statistics data don't necessarily exist. ! subdata, is_hmmsearch = get_subdata(data) ! # if header exists, parse it ! if subdata["header"] ! @program, @parameter = parse_header_data(subdata["header"]) else ! @program, @parameter = [{}, {}] end ! @query_info = parse_query_info(subdata["query"]) ! @hits = parse_hit_data(subdata["hit"]) ! @hsps = parse_hsp_data(subdata["hsp"], is_hmmsearch) ! if @hsps != [] ! # split alignment subdata into an array of alignments ! aln_ary = subdata["alignment"].split(/^\S+.*?\n/).slice(1..-1) ! # append alignment information to corresponding Hsp ! aln_ary.each_with_index do |aln, i| ! @hsps[i].set_alignment(aln) ! end end ! ! # assign each Hsp object to its parent Hit ! hits_hash = {} ! @hits.each do |hit| ! hits_hash[hit.accession] = hit end ! @hsps.each do |hsp| ! if hits_hash.has_key?(hsp.accession) ! hits_hash[hsp.accession].append_hsp(hsp) end end + + # parse statistics (for hmmsearch) if is_hmmsearch ! @histogram, @statistical_detail, @total_seq_searched, \ ! @whole_seq_top_hits, @domain_top_hits = \ ! parse_stat_data(subdata["statistics"]) ! end ! end ! attr_reader :program, :parameter, :query_info, :hits, :hsps, ! :histogram, :statistical_detail, :total_seq_searched, ! :whole_seq_top_hits, :domain_top_hits ! ! def each ! @hits.each do |x| ! yield x ! end ! end ! ! ! # Bio::HMMER::Report::Hit ! class Hit ! def initialize(data) ! @hsps = Array.new ! if /^(\S+)\s+(.*?)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ data ! @accession, @description, @score, @evalue, @num = ! [$1, $2, $3.to_f, $4.to_f, $5.to_i] end + end + attr_reader :hsps, :accession, :description, :score, :evalue, :num ! def each ! @hsps.each do |x| ! yield x end + end ! alias target_id accession ! alias hit_id accession ! alias entry_id accession ! alias definition description ! alias bit_score score ! ! def target_def ! if @hsps.size == 1 ! "<#{@hsps[0].domain}> #{@description}" ! else ! "<#{@num.to_s}> #{@description}" end end ! def append_hsp(hsp) ! @hsps << hsp end + end + # Bio::HMMER::Report::Hsp class Hsp def initialize(data, is_hmmsearch) *************** *** 137,229 **** @query_frame = 1 @target_frame = 1 end ! attr_accessor :accession, :domain, :seq_f, :seq_t, :seq_ft, :hmm_f, :hmm_t, :hmm_ft, :score, :evalue, :midline, :hmmseq, ! :flatseq, :query_frame, :target_frame ! ! def query_seq ! if @is_hmmsearch; @hmmseq else; @flatseq end ! end ! ! def target_seq ! if @is_hmmsearch; @flatseq else; @hmmseq end ! end ! ! def target_from ! if @is_hmmsearch; @seq_f else; @hmm_f end ! end ! def target_to ! if @is_hmmsearch; @seq_t else; @hmm_t end end ! def query_from ! if @is_hmmsearch; @hmm_f else; @seq_f end ! end ! def query_to ! if @is_hmmsearch; @hmm_t else; @seq_t end ! end - def bit_score; @score; end - def target_id; @accession; end end ! class Hit ! def initialize(data) ! @hsps = Array.new ! if /^(\S+)\s+(.*)\s+(\S+)\s+(\S+)\s+(\S+)$/ =~ data ! @accession, @description, @score, @evalue, @num = ! [$1, $2, $3.to_f, $4.to_f, $5.to_i] ! end ! end ! attr_accessor :hsps, :accession, :description, :score, :evalue, :num ! def each ! @hsps.each do |x| ! yield x ! end end ! def target_id; @accession; end ! def hit_id; @accession; end ! def entry_id; @accession; end ! def definition; @description; end ! def bit_score; @score; end ! def target_def ! if @hsps.size == 1 ! "<#{@hsps[0].domain}> #{@description}" ! else ! "<#{@num.to_s}> #{@description}" ! end end - end ! private ! def parse_program(data) ! hash = {} ! hash['name'], hash['version'], hash['copyright'], hash['license'] = ! data.split(/\n/) ! hash end ! def parse_parameter(data) ! hash = {} ! data.each do |x| ! if /(.+):\s+(.*)/ =~ x ! hash[$1] = $2 end end ! hash end def parse_query_info(data) hash = {} data.each do |x| ! if /(.+):\s+(.*)/ =~ x hash[$1] = $2 elsif /\s+\[(.+)\]/ =~ x --- 161,278 ---- @query_frame = 1 @target_frame = 1 + # CS and RF lines are rarely used. + @csline = nil + @rfline = nil end ! attr_reader :accession, :domain, :seq_f, :seq_t, :seq_ft, :hmm_f, :hmm_t, :hmm_ft, :score, :evalue, :midline, :hmmseq, ! :flatseq, :query_frame, :target_frame, :csline, :rfline ! def set_alignment(aln) ! # First, split the input alignment into an array of ! # "alignment blocks." One block usually has three lines, ! # i.e. hmmseq, midline and flatseq. ! # However, although infrequent, it can contain CS or RF lines. ! aln.split(/ (?:\d+|-)\s*\n\n/).each do |blk| ! lines = blk.split(/\n/) ! cstmp = (lines[0] =~ /^ {16}CS/) ? lines.shift : nil ! rftmp = (lines[0] =~ /^ {16}RF/) ? lines.shift : nil ! aln_width = lines[0][/\S+/].length ! @csline = @csline.to_s + cstmp[19, aln_width] if cstmp ! @rfline = @rfline.to_s + rftmp[19, aln_width] if rftmp ! @hmmseq += lines[0][19, aln_width] ! @midline += lines[1][19, aln_width] ! @flatseq += lines[2][19, aln_width] ! end ! @csline = @csline[3...-3] if @csline ! @rfline = @rfline[3...-3] if @rfline ! @hmmseq = @hmmseq[3...-3] ! @midline = @midline[3...-3] ! @flatseq = @flatseq[3...-3] end ! def query_seq; @is_hmmsearch ? @hmmseq : @flatseq; end ! def target_seq; @is_hmmsearch ? @flatseq : @hmmseq; end ! def target_from; @is_hmmsearch ? @seq_f : @hmm_f; end ! def target_to; @is_hmmsearch ? @seq_t : @hmm_t; end ! def query_from; @is_hmmsearch ? @hmm_f : @seq_f; end ! def query_to; @is_hmmsearch ? @hmm_t : @seq_t; end ! alias bit_score score ! alias target_id accession end ! # Bio::HMMER::Report#get_subdata ! def get_subdata(data) ! subdata = {} ! header_prefix = '\Ahmm(search|pfam) - search' ! query_prefix = '^Query (HMM|sequence): .*\nAccession: ' ! hit_prefix = '^Scores for (complete sequences|sequence family)' ! hsp_prefix = '^Parsed for domains:' ! aln_prefix = '^Alignments of top-scoring domains:\n' ! stat_prefix = '^\nHistogram of all scores:' ! # if header exists, get it ! if data =~ /#{header_prefix}/ ! is_hmmsearch = ($1 == "search") # hmmsearch or hmmpfam ! subdata["header"] = data[/(\A.+?)(?=#{query_prefix})/m] ! else ! is_hmmsearch = false # if no header, assumed to be hmmpfam end ! # get query, Hit and Hsp data ! subdata["query"] = data[/(#{query_prefix}.+?)(?=#{hit_prefix})/m] ! subdata["hit"] = data[/(#{hit_prefix}.+?)(?=#{hsp_prefix})/m] ! subdata["hsp"] = data[/(#{hsp_prefix}.+?)(?=#{aln_prefix})/m] ! # get alignment data ! if is_hmmsearch ! data =~ /#{aln_prefix}(.+?)#{stat_prefix}/m ! subdata["alignment"] = $1 ! else ! data =~ /#{aln_prefix}(.+?)\/\/\n/m ! subdata["alignment"] = $1 ! raise "multiple reports found" if $'.length > 0 end + # handle -A option of HMMER + cutoff_line = '\t\[output cut off at A = \d+ top alignments\]\n\z' + subdata["alignment"].sub!(/#{cutoff_line}/, '') ! # get statistics data ! subdata["statistics"] = data[/(#{stat_prefix}.+)\z/m] ! [subdata, is_hmmsearch] end + private :get_subdata + + # Bio::HMMER::Report#parse_header_data + def parse_header_data(data) + data =~ /\A(.+? - - -$\n)(.+? - - -$\n)\n\z/m + program_data = $1 + parameter_data = $2 ! program = {} ! program['name'], program['version'], program['copyright'], \ ! program['license'] = program_data.split(/\n/) ! ! parameter = {} ! parameter_data.each do |x| ! if /^(.+?):\s+(.*?)\s*$/ =~ x ! parameter[$1] = $2 end end ! ! [program, parameter] end + private :parse_header_data + # Bio::HMMER::Report#parse_query_info def parse_query_info(data) hash = {} data.each do |x| ! if /^(.+?):\s+(.*?)\s*$/ =~ x hash[$1] = $2 elsif /\s+\[(.+)\]/ =~ x *************** *** 233,236 **** --- 282,351 ---- hash end + private :parse_query_info + + # Bio::HMMER::Report#parse_hit_data + def parse_hit_data(data) + data.sub!(/.+?---\n/m, '').chop! + hits = [] + return hits if data == "\t[no hits above thresholds]\n" + data.each do |l| + hits.push(Hit.new(l)) + end + hits + end + private :parse_hit_data + + # Bio::HMMER::Report#parse_hsp_data + def parse_hsp_data(data, is_hmmsearch) + data.sub!(/.+?---\n/m, '').chop! + hsps=[] + return hsps if data == "\t[no hits above thresholds]\n" + data.each do |l| + hsps.push(Hsp.new(l, is_hmmsearch)) + end + return hsps + end + private :parse_hsp_data + + # Bio::HMMER::Report#parse_stat_data + def parse_stat_data(data) + data.sub!(/\nHistogram of all scores:\n(.+?)\n\n\n%/m, '') + histogram = $1 + + statistical_detail = {} + data.sub!(/(.+?)\n\n/m, '') + $1.each do |l| + statistical_detail[$1] = $2.to_f if /^\s*(.+?)\s*=\s*(\S+)/ =~ l + end + + total_seq_searched = nil + data.sub!(/(.+?)\n\n/m, '') + $1.each do |l| + total_seq_searched = $2.to_i if /^\s*(.+)\s*:\s*(\S+)/ =~ l + end + + whole_seq_top_hits = {} + data.sub!(/(.+?)\n\n/m, '') + $1.each do |l| + if /^\s*(.+?):\s*(\d+)\s*$/ =~ l + whole_seq_top_hits[$1] = $2.to_i + elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l + whole_seq_top_hits[$1] = $2 + end + end + + domain_top_hits = {} + data.each do |l| + if /^\s*(.+?):\s*(\d+)\s*$/ =~ l + domain_top_hits[$1] = $2.to_i + elsif /^\s*(.+?):\s*(\S+)\s*$/ =~ l + domain_top_hits[$1] = $2 + end + end + + [histogram, statistical_detail, total_seq_searched, \ + whole_seq_top_hits, domain_top_hits] + end + private :parse_stat_data end *************** *** 242,245 **** --- 357,374 ---- if __FILE__ == $0 + =begin + + # + # for multiple reports in a single output file (hmmpfam) + # + Bio::HMMER.reports(ARGF.read) do |report| + report.hits.each do |hit| + hit.hsps.each do |hsp| + end + end + end + + =end + begin require 'pp' *************** *** 387,390 **** --- 516,526 ---- == Bio::HMMER::Report::Hsp + --- Bio::HMMER::Report#hsps + + Returns an Array of Bio::HMMER::Report::Hsp objects. + Under special circumstances, some HSPs do not have + parent Hit objects. If you want to access such HSPs, + use this method. + --- Bio::HMMER::Report::Hsp#target_id --- Bio::HMMER::Report::Hsp#accession *************** *** 413,416 **** --- 549,555 ---- --- Bio::HMMER::Report::Hsp#target_from --- Bio::HMMER::Report::Hsp#target_to + + --- Bio::HMMER::Report::Hsp#csline + --- Bio::HMMER::Report::Hsp#rfline =end From ngoto at pub.open-bio.org Mon Oct 31 02:51:29 2005 From: ngoto at pub.open-bio.org (Naohisa Goto) Date: Tue Nov 1 16:54:41 2005 Subject: [BioRuby-cvs] bioruby/lib/bio/appl/sim4 report.rb,1.3,1.4 Message-ID: <200510310751.j9V7pTVL031244@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/appl/sim4 In directory pub.open-bio.org:/tmp/cvs-serv31234 Modified Files: report.rb Log Message: documents format is changed from RD to RDoc Index: report.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/appl/sim4/report.rb,v retrieving revision 1.3 retrieving revision 1.4 diff -C2 -d -r1.3 -r1.4 *** report.rb 8 Sep 2005 01:22:10 -0000 1.3 --- report.rb 31 Oct 2005 07:51:27 -0000 1.4 *************** *** 1,7 **** # ! # bio/appl/sim4/report.rb - sim4 result parser # ! # Copyright (C) 2004 GOTO Naohisa # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,9 ---- # ! # = bio/appl/sim4/report.rb - sim4 result parser # ! # Copyright:: Copyright (C) 2004 GOTO Naohisa ! # Licence:: LGPL # + #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,31 **** --- 19,59 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + #++ # # $Id$ # + # The sim4 report parser classes. + # + # == References + # + # * Florea, L., et al., A Computer program for aligning a cDNA sequence + # with a genomic DNA sequence, Genome Research, 8, 967--974, 1998. + # http://www.genome.org/cgi/content/abstract/8/9/967 + # module Bio class Sim4 + # Bio::Sim4::Report is the sim4 report parser class. + # Its object may contain some Bio::Sim4::Report::Hit objects. class Report #< DB + #-- # format: A=0, A=3, or A=4 + #++ + + # Delimiter of each entry. Bio::FlatFile uses it. + # In Bio::Sim4::Report, it it nil (1 entry 1 file). DELIMITER = RS = nil # 1 entry 1 file + # Creates new Bio::Sim4::Report object from String. + # You can use Bio::FlatFile to read a file. + # Currently, format A=0, A=3, and A=4 are supported. + # (A=1, A=2, A=5 are NOT supported yet.) + # + # Note that 'seq1' in sim4 result is always regarded as 'query', + # and 'seq2' is always regarded as 'subject'(target, hit). + # + # Note that first 'seq1' informations are used for + # Bio::Sim4::Report#query_id, #query_def, #query_len, and #seq1 methods. def initialize(text) @hits = [] *************** *** 45,52 **** @seq1 = @all_hits[0].seq1 end - attr_reader :hits, :all_hits, :seq1 class SeqDesc # description/definitions of a sequence def initialize(seqid, seqdef, len, filename) @entry_id = seqid --- 73,105 ---- @seq1 = @all_hits[0].seq1 end + # Returns hits of the entry. + # Unlike Bio::Sim4::Report#all_hits, it returns + # hits which have alignments. + # Returns an Array of Bio::Sim4::Report::Hit objects. + attr_reader :hits + + # Returns all hits of the entry. + # Unlike Bio::Sim4::Report#hits, it returns + # results of all trials of pairwise alignment. + # This would be a Bio::Sim4 specific method. + # Returns an Array of Bio::Sim4::Report::Hit objects. + attr_reader :all_hits + + # Returns sequence informations of 'seq1'. + # Returns a Bio::Sim4::Report::SeqDesc object. + # This would be a Bio::Sim4 specific method. + attr_reader :seq1 + + # Bio::Sim4::Report::SeqDesc stores sequence information of + # query or subject of sim4 report. class SeqDesc + #-- # description/definitions of a sequence + #++ + + # Creates a new object. + # It is designed to be called internally from Bio::Sim4::Report object. + # Users shall not use it directly. def initialize(seqid, seqdef, len, filename) @entry_id = seqid *************** *** 55,60 **** @filename = filename end ! attr_reader :entry_id, :definition, :len, :filename def self.parse(str, str2 = nil) /^seq[12] \= (.*)(?: \((.*)\))?\,\s*(\d+)\s*bp\s*$/ =~ str --- 108,123 ---- @filename = filename end ! # identifier of the sequence ! attr_reader :entry_id ! # definition of the sequence ! attr_reader :definition ! # sequence length of the sequence ! attr_reader :len ! # filename of the sequence ! attr_reader :filename + # Parses part of sim4 result text and creates new SeqDesc object. + # It is designed to be called internally from Bio::Sim4::Report object. + # Users shall not use it directly. def self.parse(str, str2 = nil) /^seq[12] \= (.*)(?: \((.*)\))?\,\s*(\d+)\s*bp\s*$/ =~ str *************** *** 73,78 **** --- 136,156 ---- end #class SeqDesc + + # Sequence segment pair of the sim4 result. + # Similar to Bio::Blast::Report::HSP but lacks many methods. + # For mRNA-genome mapping programs, + # unlike other homology search programs, + # the class is used not only for exons but also for introns. + # (Note that intron data would not be available according to run-time + # options of the program.) class SegmentPair + #-- # segment pair (like Bio::BLAST::*::Report::HSP) + #++ + + # Creates a new SegmentPair object. + # It is designed to be called internally from + # Bio::Sim4::Report::Hit object. + # Users shall not use it directly. def initialize(seq1, seq2, midline = nil, percent_identity = nil, direction = nil) *************** *** 83,89 **** @direction = direction end ! attr_reader :seq1, :seq2, :midline, ! :percent_identity, :direction def self.parse(str, aln) /^(\d+)\-(\d+)\s*\((\d+)\-(\d+)\)\s*([\d\.]+)\%\s*([\-\<\>]*)/ =~ str --- 161,189 ---- @direction = direction end ! # Returns segment informations of 'seq1'. ! # Returns a Bio::Sim4::Report::Segment object. ! # These would be Bio::Sim4 specific methods. ! attr_reader :seq1 ! # Returns segment informations of 'seq2'. ! # Returns a Bio::Sim4::Report::Segment object. ! # These would be Bio::Sim4 specific methods. ! attr_reader :seq2 ! ! # Returns the "midline" of the segment pair. ! # Returns nil if no alignment data are available. ! attr_reader :midline ! ! # Returns percent identity of the segment pair. ! attr_reader :percent_identity ! ! # Returns directions of mapping. ! # Maybe one of "->", "<-" or "" or nil. ! # This would be a Bio::Sim4 specific method. ! attr_reader :direction + # Parses part of sim4 result text and creates a new SegmentPair object. + # It is designed to be called internally from + # Bio::Sim4::Report::Hit class. + # Users shall not use it directly. def self.parse(str, aln) /^(\d+)\-(\d+)\s*\((\d+)\-(\d+)\)\s*([\d\.]+)\%\s*([\-\<\>]*)/ =~ str *************** *** 93,96 **** --- 193,201 ---- end + # Parses part of sim4 result text and creates a new SegmentPair + # object when the seq1 is a intron. + # It is designed to be called internally from + # Bio::Sim4::Report::Hit class. + # Users shall not use it directly. def self.seq1_intron(prev_e, e, aln) self.new(Segment.new(prev_e.seq1.to+1, e.seq1.from-1, aln[0]), *************** *** 99,102 **** --- 204,212 ---- end + # Parses part of sim4 result text and creates a new SegmentPair + # object when seq2 is a intron. + # It is designed to be called internally from + # Bio::Sim4::Report::Hit class. + # Users shall not use it directly. def self.seq2_intron(prev_e, e, aln) self.new(Segment.new(nil, nil, aln[0]), *************** *** 105,117 **** --- 215,244 ---- end + #-- # Bio::BLAST::*::Report::Hsp compatible methods # Methods already defined: midline, percent_identity + #++ + + # start position of the query (the first position is 1) def query_from; @seq1.from; end + + # end position of the query (including its position) def query_to; @seq1.to; end + + # query sequence (with gaps) of the alignment of the segment pair. def qseq; @seq1.seq; end + + # start position of the hit(target) (the first position is 1) def hit_from; @seq2.from; end + + # end position of the hit(target) (including its position) def hit_to; @seq2.to; end + + # hit(target) sequence (with gaps) of the alignment + # of the segment pair. def hseq; @seq2.seq; end + # Returns alignment length of the segment pair. + # Returns nil if no alignment data are available. def align_len (@midline and @seq1.seq and @seq2.seq) ? @midline.length : nil *************** *** 119,124 **** --- 246,259 ---- end #class SegmentPair + # Segment informations of a segment pair. class Segment + #-- # the segment of a sequence + #++ + + # Creates a new Segment object. + # It is designed to be called internally from + # Bio::Sim4::Report::SegmentPair class. + # Users shall not use it directly. def initialize(pos_st, pos_ed, seq = nil) @from = pos_st.to_i *************** *** 126,133 **** @seq = seq end ! attr_reader :from, :to, :seq end #class Segment class Hit def initialize(str) @data = str.split(/\n(?:\r?\n)+/) --- 261,279 ---- @seq = seq end ! # start position of the segment (the first position is 1) ! attr_reader :from ! # end position of the segment (including its position) ! attr_reader :to ! # sequence (with gaps) of the segment ! attr_reader :seq end #class Segment + # Hit object of the sim4 result. + # Similar to Bio::Blast::Report::Hit but lacks many methods. class Hit + + # Parses part of sim4 result text and creates a new Hit object. + # It is designed to be called internally from Bio::Sim4::Report class. + # Users shall not use it directly. def initialize(str) @data = str.split(/\n(?:\r?\n)+/) *************** *** 135,140 **** end ! # seq1: query, seq2: target(hit) def parse_seqdesc a0 = @data.shift.split(/\r?\n/) if @data[0].to_s =~ /^\>/ then --- 281,287 ---- end ! # Parses sequence descriptions. def parse_seqdesc + # seq1: query, seq2: target(hit) a0 = @data.shift.split(/\r?\n/) if @data[0].to_s =~ /^\>/ then *************** *** 154,163 **** end private :parse_seqdesc - attr_reader :seq1, :seq2 def complement? @complement end def parse_segmentpairs aln = (self.align ? self.align.dup : []) --- 301,324 ---- end private :parse_seqdesc + # Returns sequence informations of 'seq1'. + # Returns a Bio::Sim4::Report::SeqDesc object. + # This would be Bio::Sim4 specific method. + attr_reader :seq1 + + # Returns sequence informations of 'seq2'. + # Returns a Bio::Sim4::Report::SeqDesc object. + # This would be Bio::Sim4 specific method. + attr_reader :seq2 + + # Returns true if the hit reports '-'(complemental) strand + # search result. + # Otherwise, return false or nil. + # This would be a Bio::Sim4 specific method. def complement? @complement end + # Parses segment pair. def parse_segmentpairs aln = (self.align ? self.align.dup : []) *************** *** 191,194 **** --- 352,356 ---- private :parse_segmentpairs + # Parses alignment. def parse_align s1 = []; ml = []; s2 = [] *************** *** 218,222 **** end private :parse_align ! def exons unless defined?(@exons); parse_segmentpairs; end --- 380,386 ---- end private :parse_align ! ! # Returns exons of the hit. ! # Each exon is a Bio::Sim4::Report::SegmentPair object. def exons unless defined?(@exons); parse_segmentpairs; end *************** *** 224,227 **** --- 388,396 ---- end + # Returns segment pairs (exons and introns) of the hit. + # Each segment pair is a Bio::Sim4::Report::SegmentPair object. + # Returns an array of Bio::Sim4::Report::SegmentPair objects. + # (Note that intron data is not always available + # according to run-time options of the program.) def segmentpairs unless defined?(@segmentpairs); parse_segmentpairs; end *************** *** 229,232 **** --- 398,406 ---- end + # Returns introns of the hit. + # Some of them would contain untranscribed regions. + # Returns an array of Bio::Sim4::Report::SegmentPair objects. + # (Note that intron data is not always available + # according to run-time options of the program.) def introns unless defined?(@introns); parse_segmentpairs; end *************** *** 234,237 **** --- 408,416 ---- end + # Returns alignments. + # Returns an Array of arrays. + # Each array contains sequence of seq1, midline, sequence of seq2, + # respectively. + # This would be a Bio::Sim4 specific method. def align unless defined?(@align); parse_align; end *************** *** 239,249 **** --- 418,444 ---- end + #-- # Bio::BLAST::*::Report::Hit compatible methods + #++ + + # Length of the query sequence. + # Same as Bio::Sim4::Report#query_len. def query_len; seq1.len; end + + # Identifier of the query sequence. + # Same as Bio::Sim4::Report#query_id. def query_id; seq1.entry_id; end + + # Definition of the query sequence + # Same as Bio::Sim4::Report#query_def. def query_def; seq1.definition; end + # length of the hit(target) sequence def target_len; seq2.len; end + + # Identifier of the hit(target) sequence def target_id; seq2.entry_id; end + + # Definition of the hit(target) sequence def target_def; seq2.definition; end *************** *** 253,265 **** alias hsps exons ! def each(&x); exons.each(&x); end end #class Hit #Bio::BLAST::*::Report compatible methods def num_hits; @hits.size; end ! def each_hit(&x); @hits.each(&x); end alias each each_hit def query_def; @seq1.definition; end def query_id; @seq1.entry_id; end def query_len; @seq1.len; end end #class Report --- 448,486 ---- alias hsps exons ! ! # Iterates over each exon of the hit. ! # Yields a Bio::Sim4::Report::SegmentPair object. ! def each(&x) #:yields: segmentpair ! exons.each(&x) ! end end #class Hit + #-- #Bio::BLAST::*::Report compatible methods + #++ + + # Returns number of hits. + # Same as hits.size. def num_hits; @hits.size; end ! ! # Iterates over each hits of the sim4 result. ! # Same as hits.each. ! # Yields a Bio::Sim4::Report::Hit object. ! def each_hit(&x) #:yields: hit ! @hits.each(&x) ! end alias each each_hit + + # Returns the definition of query sequence. + # The value will be filename or (first word of) sequence definition + # according to sim4 run-time options. def query_def; @seq1.definition; end + + # Returns the identifier of query sequence. + # The value will be filename or (first word of) sequence definition + # according to sim4 run-time options. def query_id; @seq1.entry_id; end + + # Returns the length of query sequence. def query_len; @seq1.len; end end #class Report *************** *** 271,476 **** = Bio::Sim4::Report - - --- Bio::Sim4::Report.new(text) - - Creates new Bio::Sim4::Report object from String. - You can use Bio::FlatFile to read a file. - - Currently, format A=0, A=3, and A=4 are supported. - (A=1, A=2, A=5 are NOT supported yet.) - - Note that 'seq1' in sim4 result is always regarded as 'query', - and 'seq2' is always regarded as 'subject'(target, hit). - - Note that first 'seq1' informations are used for - Bio::Sim4::Report#query_id, #query_def, #query_len, and #seq1 methods. - - --- Bio::Sim4::Report#hits - - Returns an Array of Bio::Sim4::Report::Hit objects. - - --- Bio::Sim4::Report#all_hits - - Returns an Array of Bio::Sim4::Report::Hit objects. - Unlike Bio::Sim4::Report#hits, the method returns - results of all trials of pairwise alignment. - This would be a Bio::Sim4 specific method. - - --- Bio::Sim4::Report#each_hit - --- Bio::Sim4::Report#each - - Iterates over each Bio::Sim4::Report::Hit object. - Same as hits.each. - - --- Bio::Sim4::Report#num_hits - - Returns number of hits. - Same as hits.size. - - --- Bio::Sim4::Report#query_id - - Returns the identifier of query sequence. - The value will be filename or (first word of) sequence definition - according to sim4 run-time options. - - --- Bio::Sim4::Report#query_def - - Returns the definition of query sequence. - The value will be filename or (first word of) sequence definition - according to sim4 run-time options. - - --- Bio::Sim4::Report#query_len - - Returns the length of query sequence. - - --- Bio::Sim4::Report#seq1 - - Returns sequence informations of 'seq1'. - Returns a Bio::Sim4::Report::SeqDesc object. - This would be a Bio::Sim4 specific method. - - == Bio::Sim4::Report::Hit - - Hit object of sim4 result. - Similar to Bio::Blast::Report::Hit but lacks many methods. - - --- Bio::Sim4::Report::Hit#hit_id - --- Bio::Sim4::Report::Hit#target_id - - Returns the identifier of subject sequence. - The value will be filename or (first word of) sequence definition - according to sim4 run-time options. - - --- Bio::Sim4::Report::Hit#definition - --- Bio::Sim4::Report::Hit#target_def - - Returns the identifier of subject sequence. - The value will be filename or (first word of) sequence definition - according to sim4 run-time options. - - --- Bio::Sim4::Report::Hit#len - --- Bio::Sim4::Report::Hit#target_len - - Returns the length of subject sequence. - - --- Bio::Sim4::Report::Hit#query_id - --- Bio::Sim4::Report::Hit#query_def - --- Bio::Sim4::Report::Hit#query_len - - Same as Bio::Sim4::Report#(query_id|query_def|query_len). - - --- Bio::Sim4::Report::Hit#exons - - Returns exons of the hit. - Each exon is a Bio::Sim4::Report::SegmentPair object. - - --- Bio::Sim4::Report::Hit#hsps - - Same as Bio::Sim4::Report#exons - The method aims to provide compatibility between - other homology search program's result objects. - - --- Bio::Sim4::Report::Hit#each - - Iterates over each exon (Bio::Sim4::Report::SegmentPair object) - of the hit. - - --- Bio::Sim4::Report::Hit#segmentpairs - - Returns segment pairs (exons and introns) of the hit. - Each segment pair is a Bio::Sim4::Report::SegmentPair object. - Returns an array of Bio::Sim4::Report::SegmentPair objects. - (Note that intron data is not always available - according to run-time options of the program.) - - --- Bio::Sim4::Report::Hit#introns - - Returns introns of the hit. - Some of them would contain untranscribed regions. - Returns an array of Bio::Sim4::Report::SegmentPair objects. - (Note that intron data is not always available - according to run-time options of the program.) - - --- Bio::Sim4::Report::Hit#seq1 - --- Bio::Sim4::Report::Hit#seq2 - - Returns sequence informations of 'seq1' or 'seq2', respectively. - Returns a Bio::Sim4::Report::SeqDesc object. - These would be Bio::Sim4 specific methods. - - --- Bio::Sim4::Report::Hit#complement? - - Returns true if the hit reports '-'(complemental) strand search result. - Otherwise, return false or nil. - This would be a Bio::Sim4 specific method. - - --- Bio::Sim4::Report::Hit#align - - Returns alignments. - Returns an Array of arrays. - Each array contains sequence of seq1, midline, sequence of seq2, - respectively. - This would be a Bio::Sim4 specific method. - - == Bio::Sim4::Report::SegmentPair - - Sequence segment pair of sim4 result. - Similar to Bio::Blast::Report::HSP but lacks many methods. - For mRNA-genome mapping programs, unlike other homology search programs, - the class is used not only for exons but also for introns. - (Note that intron data would not be available according to run-time - options of the program.) - - --- Bio::Sim4::Report::SegmentPair#query_from - --- Bio::Sim4::Report::SegmentPair#query_to - --- Bio::Sim4::Report::SegmentPair#qseq - - --- Bio::Sim4::Report::SegmentPair#hit_from - --- Bio::Sim4::Report::SegmentPair#hit_to - --- Bio::Sim4::Report::SegmentPair#hseq - - --- Bio::Sim4::Report::SegmentPair#midline - - Returns the "midline" of the segment pair. - Returns nil if no alignment data are available. - - --- Bio::Sim4::Report::SegmentPair#percent_identity - - Returns percent identity of the segment pair. - - --- Bio::Sim4::Report::SegmentPair#align_len - - Returns alignment length of the segment pair. - Returns nil if no alignment data are available. - - --- Bio::Sim4::Report::SegmentPair#direction - - Returns directions of mapping. - Maybe one of "->", "<-" or "" or nil. - This would be a Bio::Sim4 specific method. - - --- Bio::Sim4::Report::SegmentPair#seq1 - --- Bio::Sim4::Report::SegmentPair#seq2 - - Returns segment informations of 'seq1' or 'seq2', respectively. - Returns a Bio::Sim4::Report::Segment object. - These would be Bio::Sim4 specific methods. - - == Bio::Sim4::Report::Segment - - Segment informations of a segment pair. - - --- Bio::Sim4::Report::Segment#from - --- Bio::Sim4::Report::Segment#to - --- Bio::Sim4::Report::Segment#seq - - == Bio::Sim4::Report::SeqDesc - - Sequence information of query or subject. - - --- Bio::Sim4::Report::SeqDesc#filename - --- Bio::Sim4::Report::SeqDesc#entry_id - --- Bio::Sim4::Report::SeqDesc#definition - --- Bio::Sim4::Report::SeqDesc#len = References --- 492,495 ---- From nakao at pub.open-bio.org Mon Oct 31 02:39:15 2005 From: nakao at pub.open-bio.org (Mitsuteru C. Nakao) Date: Tue Nov 1 16:54:43 2005 Subject: [BioRuby-cvs] bioruby/sample color_scheme_na.rb,NONE,1.1 Message-ID: <200510310739.j9V7dFVL031160@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/sample In directory pub.open-bio.org:/tmp/cvs-serv31150/sample Added Files: color_scheme_na.rb Log Message: * Initial import. --- NEW FILE: color_scheme_na.rb --- #!/usr/bin/env ruby # # color_scheme_na.rb - A Bio::ColorScheme demo script for Nucleic Acids # sequences. # # Usage: # # % ruby color_scheme_na.rb > cs-seq-fna.html # # % cat seq.fna # >DNA_sequence # acgtgtgtcatgctagtcgatcgtactagtcgtagctagtca # % ruby color_scheme_na.rb seq.fna > colored-seq-fna.html # # # Copyright (C) 2005 Mitsuteru C. Nakao # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # $Id: color_scheme_na.rb,v 1.1 2005/10/31 07:39:13 nakao Exp $ # require 'bio' # returns folded sequence with
    . def br(i, width = 80) return "" if i % width == 0 "" end # returns sequence html doc def display(seq, cs) html = '

    ' postfix = '' i = 0 seq.each_byte do |c| color = cs[c.chr] prefix = %Q() html += prefix + c.chr + postfix html += br(i += 1) end html + '

    ' end # returns scheme wise html doc def display_scheme(scheme, naseq, aaseq) html = '' cs = eval("Bio::ColorScheme::#{scheme}") [naseq, aaseq].each do |seq| html += display(seq, cs) end return ['
    ', "

    #{cs}

    ", html, '
    '] end if fna = ARGV.shift naseq = Bio::FastaFormat.new(File.open(fna, 'r').read).naseq aaseq = naseq.translate else naseq = Bio::Sequence::NA.new('acgtu' * 20).randomize aaseq = naseq.translate end title = 'Bio::ColorScheme for DNA sequences' doc = ['', '
    ', '', title, '', '
    ', '', '

    ', title, '

    '] doc << ['
    ', '

    ', 'Simple colors', '

    '] ['Nucleotide'].each do |scheme| doc << display_scheme(scheme, naseq, "") end doc << ['
    '] ['Zappo', 'Taylor' ].each do |scheme| doc << display_scheme(scheme, "", aaseq) end doc << [''] doc << ['
    ', '

    ', 'Score colors', '

    '] ['Buried', 'Helix', 'Hydropathy', 'Strand', 'Turn'].each do |score| doc << display_scheme(score, "", aaseq) end doc << ['
    '] puts doc + ['',''] From k at pub.open-bio.org Sun Oct 30 17:59:58 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Tue Nov 1 16:54:46 2005 Subject: [BioRuby-cvs] bioruby/lib/bio location.rb,0.20,0.21 Message-ID: <200510302259.j9UMxwVL029910@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory pub.open-bio.org:/tmp/cvs-serv29906 Modified Files: location.rb Log Message: * converted to RDoc Index: location.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/location.rb,v retrieving revision 0.20 retrieving revision 0.21 diff -C2 -d -r0.20 -r0.21 *** location.rb 26 Sep 2005 13:00:04 -0000 0.20 --- location.rb 30 Oct 2005 22:59:56 -0000 0.21 *************** *** 1,6 **** # ! # bio/location.rb - Locations/Location class (GenBank location format) # ! # Copyright (C) 2001 KATAYAMA Toshiaki # # This library is free software; you can redistribute it and/or --- 1,237 ---- # ! # = bio/location.rb - Locations/Location class (GenBank location format) # [...1181 lines suppressed...] - * [CEU34893] replace(1..22,"ggttttaacccagttactcaag") - * [APLPCII] replace(1905^1906,"acaaagacaccgccctacgcc") - * [MBDR3S1] replace(1400..>9281,"") - * [HUMMHDPB1F] replace(complement(36..37),"ttc") - * [HUMMIC2A] replace((651.655)..(651.655),"") - * [LEIMDRPGP] replace(1..1554,"L01572") - * [TRBND3] replace(376..395,"atttgtgtgtggtaatta") - * [TRBND3] replace(376..395,"atttgtgtgggtaatttta") - * [TRBND3] replace(376..395,"attttgttgttgttttgttttgaatta") - * [TRBND3] replace(376..395,"atgtgtggtgaatta") - * [TRBND3] replace(376..395,"atgtgtgtggtaatta") - * [TRBND3] replace(376..395,"gatttgttgtggtaatttta") - * [MSU09460] replace(193, <= replace(193, "t") - * [HUMMAGE12X] replace(3002..3003, <= replace(3002..3003, "GC") - * [ADR40FIB] replace(510..520, <= replace(510..520, "taatcctaccg") - * [RATDYIIAAB] replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa") - - =end --- 648,650 ---- From k at pub.open-bio.org Sun Oct 30 12:15:07 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Tue Nov 1 16:54:47 2005 Subject: [BioRuby-cvs] bioruby/lib/bio db.rb,0.29,0.30 Message-ID: <200510301715.j9UHF7VL029241@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory pub.open-bio.org:/tmp/cvs-serv29235/lib/bio Modified Files: db.rb Log Message: * converted to RDoc Index: db.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db.rb,v retrieving revision 0.29 retrieving revision 0.30 diff -C2 -d -r0.29 -r0.30 *** db.rb 23 Oct 2005 07:16:29 -0000 0.29 --- db.rb 30 Oct 2005 17:15:05 -0000 0.30 *************** *** 1,7 **** # ! #= bio/db.rb - DataBase parser general API # ! # Copyright (C) 2001, 2002 KATAYAMA Toshiaki #-- # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public --- 1,150 ---- # ! # = bio/db.rb - common API for database parsers # ! # Copyright:: Copyright (C) 2001, 2002, 2005 ! # KATAYAMA Toshiaki ! # License:: LGPL ! # ! # $Id$ ! # ! # == On-demand parsing and cache ! # ! # The flatfile parsers (sub classes of the Bio::DB) split the original entry ! # into a Hash and store the hash in the @orig instance variable. To parse ! # in detail is delayed until the method is called which requires a further ! # parsing of a content of the @orig hash. Fully parsed data is cached in the ! # another hash, @data, separately. ! # ! # == Guide lines for the developers to create an new database class ! # ! # --- Bio::DB.new(entry) ! # ! # The 'new' method should accept the entire entry in one String and ! # return the parsed database object. ! # ! # --- Bio::DB#entry_id ! # ! # Database classes should implement the following methods if appropriate: ! # ! # * entry_id ! # * definition ! # ! # Every sub class should define the following constants if appropriate: ! # ! # * DELIMITER (RS) ! # * entry separator of the flatfile of the database. ! # * RS (= record separator) is an alias for the DELIMITER in short. ! # ! # * TAGSIZE ! # * length of the tag field in the FORTRAN-like format. ! # ! # |<- tag ->||<- data ---->| ! # ENTRY_ID A12345 ! # DEFINITION Hoge gene of the Pokemonia pikachuae ! # ! # === Template of the sub class ! # ! # module Bio ! # class Hoge < DB ! # ! # DELIMITER = RS = "\n//\n" ! # TAGSIZE = 12 # You can omit this line if not needed ! # ! # def initialize(entry) ! # end ! # ! # def entry_id ! # end ! # ! # end # class Hoge ! # end # module Bio ! # ! # === Recommended method names for sub classes ! # ! # In general, the method name should be in the singular form when returns ! # a Object (including the case when the Object is a String), and should be ! # the plural form when returns same Objects in Array. It depends on the ! # database classes that which form of the method name can be use. ! # ! # For example, GenBank has several REFERENCE fields in one entry, so define ! # Bio::GenBank#references and this method should return an Array of the ! # Reference objects. On the other hand, MEDLINE has one REFERENCE information ! # per one entry, so define Bio::MEDLINE#reference method and this should ! # return a Reference object. ! # ! # The method names used in the sub classes should be taken from the following ! # list if appropriate: ! # ! # --- entry_id #=> String ! # ! # The entry identifier. ! # ! # --- definition #=> String ! # ! # The description of the entry. ! # ! # --- reference #=> Bio::Reference ! # --- references #=> Array of Bio::Reference ! # ! # The reference field(s) of the entry. ! # ! # --- dblink #=> String ! # --- dblinks #=> Array of String ! # ! # The link(s) to the other database entry. ! # ! # --- naseq #=> Bio::Sequence::NA ! # ! # The DNA/RNA sequence of the entry. ! # ! # --- nalen #=> Integer ! # ! # The length of the DNA/RNA sequence of the entry. ! # ! # --- aaseq #=> Bio::Sequence::AA ! # ! # The amino acid sequence of the entry. ! # ! # --- aalen #=> Integer ! # ! # The length of the amino acid sequence of the entry. ! # ! # --- seq #=> Bio::Sequence::NA or Bio::Sequence::AA ! # ! # Returns an appropriate sequence object. ! # ! # --- position #=> String ! # ! # The position of the sequence in the entry or in the genome (depends on ! # the database). ! # ! # --- locations #=> Bio::Locations ! # ! # Returns Bio::Locations.new(position). ! # ! # --- division #=> String ! # ! # The sub division name of the database. ! # ! # * Example: ! # * EST, VRL etc. for GenBank ! # * PATTERN, RULE etc. for PROSITE ! # ! # --- date #=> String ! # ! # The date of the entry. ! # Should we use Date (by ParseDate) instead of String? ! # ! # --- gene #=> String ! # --- genes #=> Array of String ! # ! # The name(s) of the gene. ! # ! # --- organism #=> String ! # ! # The name of the organism. ! # #-- + # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public *************** *** 17,22 **** # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! #-- ! # $Id$ # --- 160,165 ---- # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ! # ! #++ # *************** *** 27,394 **** module Bio ! # Bio::DB API ! class DB ! ! def self.open(filename, *mode, &block) ! Bio::FlatFile.open(self, filename, *mode, &block) ! end ! ! def entry_id ! raise NotImplementedError ! end ! ! def tags ! @orig.keys ! end ! ! def exists?(tag) ! @orig.include?(tag) ! end ! def get(tag) ! @orig[tag] ! end ! def fetch(tag, skip = 0) ! field = @orig[tag].split(/\n/, skip + 1).last.to_s ! truncate(field.gsub(/^.{0,#{@tagsize}}/,'')) ! end ! private ! def truncate(str) ! return str.gsub(/\s+/, ' ').strip ! end ! def tag_get(str) ! return str[0,@tagsize].strip ! end - def tag_cut(str) - str[0,@tagsize] = '' - return str - end ! def field_fetch(tag, skip = 0) ! unless @data[tag] ! @data[tag] = fetch(tag, skip) ! end ! return @data[tag] ! end ! def lines_fetch(tag) ! unless @data[tag] ! @data[tag] = get(tag).split(/\n/).map{ |l| tag_cut(l) } ! end ! @data[tag] ! end end ! # Bio::NCBIDB ! class NCBIDB < DB ! autoload :Common, 'bio/db/genbank/common' ! def initialize(entry, tagsize) ! @tagsize = tagsize ! @orig = entry2hash(entry.strip) # Hash of the original entry ! @data = {} # Hash of the parsed entry end ! private - def toptag2array(str) - sep = "\001" - str.gsub(/\n([A-Za-z\/])/, "\n#{sep}\\1").split(sep) - end ! def subtag2array(str) ! sep = "\001" ! str.gsub(/\n(\s{1,#{@tagsize-1}}\S)/, "\n#{sep}\\1").split(sep) ! end ! def entry2hash(entry) ! hash = Hash.new('') ! fields = toptag2array(entry) ! fields.each do |field| ! tag = tag_get(field) ! hash[tag] += field ! end ! return hash ! end end ! # Bio::KEGG ! class KEGGDB < NCBIDB end ! # Bio::EMBLDB ! class EMBLDB < DB ! autoload :Common, 'bio/db/embl/common' ! ! def initialize(entry, tagsize) ! @tagsize = tagsize ! @orig = entry2hash(entry.strip) # Hash of the original entry ! @data = {} # Hash of the parsed entry ! end ! private ! def entry2hash(entry) ! hash = Hash.new('') ! entry.each_line do |line| ! tag = tag_get(line) ! next if tag == 'XX' ! tag = 'R' if tag =~ /^R./ # Reference lines ! hash[tag] += line ! end ! return hash end ! end ! end ! ! ! ! =begin ! ! = Bio::DB ! ! * On-demand parsing and cache ! ! The flatfile parsers (sub classes of the Bio::DB) split the original entry ! into a Hash and store the hash in the @orig instance variable. To parse ! in detail is delayed until the method is called which requires a further ! parsing of a content of the @orig hash. Fully parsed data is cached in the ! another hash, @data, separately. ! ! == Class methods ! ! --- Bio::DB.new(entry) ! ! This class method accepts the String of one entire entry and parse it to ! return the parsed database object. ! ! == Object methods ! ! --- Bio::DB#entry_id ! ! Returns an entry identifier as a String. This method must be ! implemented in every database classes, so sub classes must override ! this method. ! ! --- Bio::DB#tags ! ! Returns a list of the top level tags of the entry as an Array of a ! String. ! ! --- Bio::DB#exists?(tag) ! ! Returns true or false - wether the entry contains the field of the tag. ! ! --- Bio::DB#get(tag) ! ! Returns an intact field of the tag as a String. ! ! --- Bio::DB#fetch(tag) ! ! Similar to the get method, however, fetch returns the content of the ! field without its tag and any extra white spaces stripped. ! ! == Private/Protected methods ! ! --- Bio::DB#truncate(str) ! ! Returns a String as extra white spaces removed. ! ! --- Bio::DB#tag_get(str) ! ! Returns a tag name of the field as a String. ! ! --- Bio::DB#tag_cut(str) ! ! Returns a String without a tag field. ! ! --- Bio::DB#field_fetch(tag) ! ! Returns the content of the field as a String like the fetch method. ! Furthermore, field_fetch stores the result in the @data hash. ! ! --- Bio::DB#lines_fetch(tag) ! ! Returns an Array containing each line of the field without a tag. ! lines_fetch also stores the result in the @data hash. ! ! == For the sub class developpers ! ! Every sub class should define the following constants if appropriate: ! ! * DELIMITER (RS) ! * entry separator of the flatfile of the database. ! * RS (= record separator) is a alias for the DELIMITER in short. ! ! * TAGSIZE ! * length of the tag field in the FORTRAN-like format. - |<- tag field ->||<- data field ---->| - ENTRY_ID A12345 - DEFINITION Hoge gene of the Pokemonia pikachuae ! == Template of the sub class - module Bio ! class Hoge < DB ! DELIMITER = RS = "\n//\n" ! TAGSIZE = 12 # You can omit this line if not needed ! def initialize(entry) ! end ! def entry_id ! end end ! end ! == Recommended method names for sub classes ! ! In general, the method name should be in the singular form when returns ! a Object (including the case when the Object is a String), and should be ! the plural form when returns same Objects in Array. It depends on the ! database classes that which form of the method name can be use. ! ! For example, GenBank has several REFERENCE fields in one entry, so define ! Bio::GenBank#references and this method should return an Array of the ! Reference objects. On the other hand, MEDLINE has one REFERENCE information ! per one entry, so define Bio::MEDLINE#reference method and this should ! return a Reference object. ! ! The method names used in the sub classes should be taken from the following ! list if appropriate: ! ! --- entry_id -> String ! ! The entry identifier. ! ! --- definition -> String ! ! The description of the entry. ! ! --- reference -> Bio::Reference ! --- references -> Array of Bio::Reference ! ! The reference field(s) of the entry. ! ! --- dblink -> String ! --- dblinks -> Array of String ! ! The link(s) to the other database entry. ! ! --- naseq -> Bio::Sequence::NA ! ! The DNA/RNA sequence of the entry. ! ! --- nalen -> Integer ! ! The length of the DNA/RNA sequence of the entry. ! ! --- aaseq -> Bio::Sequence::AA ! ! The amino acid sequence of the entry. ! ! --- aalen -> Integer ! ! The length of the amino acid sequence of the entry. ! ! --- seq -> Bio::Sequence::NA or Bio::Sequence::AA ! ! Returns an appropriate sequence object. ! ! --- position -> String ! ! The position of the sequence in the entry or in the genome (depends on ! the database). ! ! --- locations -> Bio::Locations ! ! Returns Bio::Locations.new(position). ! ! --- division -> String ! ! The sub division name of the database. ! ! * Example: ! * EST, VRL etc. for GenBank ! * PATTERN, RULE etc. for PROSITE ! ! --- date -> String ! ! The date of the entry. ! Should we use Date (by ParseDate) instead of String? ! ! --- gene -> String ! --- genes -> Array of String ! ! The name(s) of the gene. ! ! --- organism -> String ! ! The name of the organism. ! ! = Bio::NCBIDB ! ! Stores a NCBI style (GenBank, KEGG etc.) entry. ! ! --- new(entry, tagsize) ! ! The entire entry is passed as a String. The length of the tag field is ! passed as an Integer. Parses the entry roughly by the entry2hash method ! and returns a database object. ! ! == Private methods ! ! --- toptag2array(str) ! ! Splits an entry into an Array of Strings at the level of top tags. ! ! --- subtag2array(str) ! ! Splits a field into an Array of Strings at the level of sub tags. ! ! --- entry2hash(str) ! ! Returns the contents of the entry as a Hash with the top level tags as ! its keys. ! ! = Bio::KEGGDB ! ! Inherits a NCBIDB class. ! ! = Bio::EMBLDB ! ! Stores an EMBL style (EMBL, TrEMBL, Swiss-Prot etc.) entry. ! ! --- new(entry, tagsize) ! ! The entire entry is passed as a String. The length of the tag field is ! passed as an Integer. Parses the entry roughly by the entry2hash method ! and returns a database object. ! ! == Private methods ! ! --- entry2hash(str) ! ! Returns the contents of the entry as a Hash. ! ! =end --- 170,329 ---- module Bio ! class DB ! def self.open(filename, *mode, &block) ! Bio::FlatFile.open(self, filename, *mode, &block) ! end ! # Returns an entry identifier as a String. This method must be ! # implemented in every database classes by overriding this method. ! def entry_id ! raise NotImplementedError ! end + # Returns a list of the top level tags of the entry as an Array of String. + def tags + @orig.keys + end ! # Returns true or false - wether the entry contains the field of the ! # given tag name. ! def exists?(tag) ! @orig.include?(tag) ! end ! # Returns an intact field of the tag as a String. ! def get(tag) ! @orig[tag] ! end ! # Similar to the get method, however, fetch returns the content of the ! # field without its tag and any extra white spaces stripped. ! def fetch(tag, skip = 0) ! field = @orig[tag].split(/\n/, skip + 1).last.to_s ! truncate(field.gsub(/^.{0,#{@tagsize}}/,'')) ! end ! private ! # Returns a String with successive white spaces are replaced by one ! # space and stripeed. ! def truncate(str) ! return str.gsub(/\s+/, ' ').strip ! end + # Returns a tag name of the field as a String. + def tag_get(str) + return str[0,@tagsize].strip end ! # Returns a String of the field without a tag name. ! def tag_cut(str) ! str[0,@tagsize] = '' ! return str ! end ! # Returns the content of the field as a String like the fetch method. ! # Furthermore, field_fetch stores the result in the @data hash. ! def field_fetch(tag, skip = 0) ! unless @data[tag] ! @data[tag] = fetch(tag, skip) end + return @data[tag] + end + # Returns an Array containing each line of the field without a tag. + # lines_fetch also stores the result in the @data hash. + def lines_fetch(tag) + unless @data[tag] + @data[tag] = get(tag).split(/\n/).map{ |l| tag_cut(l) } + end + @data[tag] + end ! end # class DB ! # Stores a NCBI style (GenBank, KEGG etc.) entry. ! class NCBIDB < DB ! autoload :Common, 'bio/db/genbank/common' ! # The entire entry is passed as a String. The length of the tag field is ! # passed as an Integer. Parses the entry roughly by the entry2hash method ! # and returns a database object. ! def initialize(entry, tagsize) ! @tagsize = tagsize ! @orig = entry2hash(entry.strip) # Hash of the original entry ! @data = {} # Hash of the parsed entry ! end ! private + # Splits an entry into an Array of Strings at the level of top tags. + def toptag2array(str) + sep = "\001" + str.gsub(/\n([A-Za-z\/])/, "\n#{sep}\\1").split(sep) end ! # Splits a field into an Array of Strings at the level of sub tags. ! def subtag2array(str) ! sep = "\001" ! str.gsub(/\n(\s{1,#{@tagsize-1}}\S)/, "\n#{sep}\\1").split(sep) end ! # Returns the contents of the entry as a Hash with the top level tags as ! # its keys. ! def entry2hash(entry) ! hash = Hash.new('') ! fields = toptag2array(entry) ! fields.each do |field| ! tag = tag_get(field) ! hash[tag] += field end ! return hash end ! end # class NCBIDB ! # Class for KEGG databases. Inherits a NCBIDB class. ! class KEGGDB < NCBIDB ! end ! # Stores an EMBL style (EMBL, TrEMBL, Swiss-Prot etc.) entry. ! class EMBLDB < DB ! autoload :Common, 'bio/db/embl/common' ! # The entire entry is passed as a String. The length of the tag field is ! # passed as an Integer. Parses the entry roughly by the entry2hash method ! # and returns a database object. ! def initialize(entry, tagsize) ! @tagsize = tagsize ! @orig = entry2hash(entry.strip) # Hash of the original entry ! @data = {} # Hash of the parsed entry ! end ! private + # Returns the contents of the entry as a Hash. + def entry2hash(entry) + hash = Hash.new('') + entry.each_line do |line| + tag = tag_get(line) + next if tag == 'XX' + tag = 'R' if tag =~ /^R./ # Reference lines + hash[tag] += line end ! return hash end ! end # class EMBLDB + end # module Bio From k at pub.open-bio.org Sun Oct 30 11:41:39 2005 From: k at pub.open-bio.org (Katayama Toshiaki) Date: Tue Nov 1 16:54:53 2005 Subject: [BioRuby-cvs] bioruby/lib/bio command.rb,1.1,1.2 Message-ID: <200510301641.j9UGfdVL029125@pub.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio In directory pub.open-bio.org:/tmp/cvs-serv29121/lib/bio Modified Files: command.rb Log Message: * converted to RDoc Index: command.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/command.rb,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -d -r1.1 -r1.2 *** command.rb 16 Aug 2005 09:38:34 -0000 1.1 --- command.rb 30 Oct 2005 16:41:37 -0000 1.2 *************** *** 1,7 **** # ! # bio/command.rb - useful methods for external command execution # ! # Copyright (C) 2003-2005 GOTO Naohisa ! # Copyright (C) 2004 KATAYAMA Toshiaki # # This library is free software; you can redistribute it and/or --- 1,13 ---- # ! # = bio/command.rb - general methods for external command execution # ! # Copyright:: Copyright (C) 2003-2005 ! # Naohisa Goto ! # Toshiaki Katayama ! # License:: LGPL ! # ! # $Id$ ! # ! #-- # # This library is free software; you can redistribute it and/or *************** *** 19,23 **** # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! # $Id$ # --- 25,29 ---- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # ! #++ # *************** *** 26,29 **** --- 32,42 ---- module Bio module Command + + # = Bio::Command::Tools + # + # Bio::Command::Tools is a collection of useful methods for execution + # of external commands or web applications. Any wrapper class for + # applications shall include this class. Note that all methods below + # are private except for some methods. module Tools *************** *** 35,38 **** --- 48,52 ---- private + # Escape special characters in command line string for cmd.exe on Windows. def escape_shell_windows(str) str = str.to_s *************** *** 45,48 **** --- 59,63 ---- end + # Escape special characters in command line string for UNIX shells. def escape_shell_unix(str) str = str.to_s *************** *** 51,54 **** --- 66,70 ---- end + # Escape special characters in command line string. def escape_shell(str) case RUBY_PLATFORM *************** *** 60,63 **** --- 76,80 ---- end + # Generate command line string with special characters escaped. def make_command_line(ary) case RUBY_PLATFORM *************** *** 69,80 **** --- 86,107 ---- end + # Generate command line string with special characters escaped + # for cmd.exe on Windows. def make_command_line_windows(ary) ary.collect { |str| escape_shell_windows(str) }.join(" ") end + # Generate command line string with special characters escaped + # for UNIX shells. def make_command_line_unix(ary) ary.collect { |str| escape_shell_unix(str) }.join(" ") end + # Executes the program. Automatically select popen for Windows + # environment and open3 for the others. + # + # If block is given, yield the block with input and output IO objects. + # Note that in some platform, inn and out are the same object. + # Please be careful to do inn.close and out.close. def call_command_local(cmd, query = nil, &block) case RUBY_PLATFORM *************** *** 86,89 **** --- 113,119 ---- end + # Executes the program via IO.popen for OS which doesn't support fork. + # If block is given, yield the block with IO objects. + # The two objects are the same because of limitation of IO.popen. def call_command_local_popen(cmd, query = nil) str = make_command_line(cmd) *************** *** 101,104 **** --- 131,139 ---- end + # Executes the program via Open3.popen3 + # If block is given, yield the block with input and output IO objects. + # + # From the view point of security, this method is recommended + # rather than exec_local_popen. def call_command_local_open3(cmd, query = nil) cmd = cmd.collect { |x| x.to_s } *************** *** 121,200 **** end attr_reader :errorlog public :errorlog ! end #module Tools ! end #module Command end # module Bio - - =begin - - = Bio::Command - - = Bio::Command::Tools - - Bio::Command::Tools is a collection of useful methods for execution - of external commands or web applications. Any wrapper class for - applications shall include this class. Note that all methods below - are private except for some methods. - - --- Bio::Command::Tools#escape_shell(str) - - Escape special characters in command line string. - - --- Bio::Command::Tools#escape_shell_unix(str) - - Escape special characters in command line string for UNIX shells. - - --- Bio::Command::Tools#escape_shell_windows(str) - - Escape special characters in command line string for cmd.exe on Windows. - - --- Bio::Command::Tools#make_command_line(ary) - - Generate command line string with special characters escaped. - - --- Bio::Command::Tools#make_command_line_unix(ary) - - Generate command line string with special characters escaped - for UNIX shells. - - --- Bio::Command::Tools#make_command_line_windows(ary) - - Generate command line string with special characters escaped - for cmd.exe on Windows. - - --- Bio::Command::Tools#exec_command_local(cmd, query = nil) - --- Bio::Command::Tools#exec_command_local(cmd) {|inn, out| ... } - - Executes the program. Automatically select popen for Windows - environment and open3 for the others. - - If block is given, yield the block with input and output IO objects. - Note that in some platform, inn and out are the same object. - Please be careful to do inn.close and out.close. - - --- Bio::Command::Tools#exec_command_local_popen(cmd, query = nil) - --- Bio::Command::Tools#exec_command_local_popen(cmd) {|io, io| ... } - - Executes the program via IO.popen for OS which doesn't support - fork. - If block is given, yield the block with IO objects. - The two objects are the same because of limitation of IO.popen. - - --- Bio::Command::Tools#exec_command_local_open3(cmd, query = nil) - --- Bio::Command::Tools#exec_command_local_open3(cmd) {|inn, out| ... } - - Executes the program via Open3.popen3 - If block is given, yield the block with input and output IO objects. - - From the view point of security, this method is recommended - rather than exec_local_popen. - - --- Bio::Command::Tools#errorlog - - Shows the latest stderr of the program execution. - Note that this method may be thread unsafe. - - =end --- 156,166 ---- end + # Shows the latest stderr of the program execution. + # Note that this method may be thread unsafe. attr_reader :errorlog public :errorlog ! end # module Tools ! end # module Command end # module Bio