From ngoto at pub.open-bio.org Fri Mar 3 03:18:51 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Fri, 03 Mar 2006 08:18:51 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.46,1.47
Message-ID: <200603030818.k238IpVL028555@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv28535/lib/bio/io
Modified Files:
flatfile.rb
Log Message:
* Removed duplicated initializing of @path in BufferedInputStream#initialize.
* Fiexed a bug that buffered input stream was nested.
Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.46
retrieving revision 1.47
diff -C2 -d -r1.46 -r1.47
*** flatfile.rb 22 Feb 2006 10:01:27 -0000 1.46
--- flatfile.rb 3 Mar 2006 08:18:49 -0000 1.47
***************
*** 35,39 ****
# initialize prefetch buffer
@buffer = ''
- @path = path
end
--- 35,38 ----
***************
*** 519,529 ****
def initialize(dbclass, stream)
# 2nd arg: IO object
! if @stream.kind_of?(BufferedInputStream)
@stream = stream
else
@stream = BufferedInputStream.for_io(stream)
end
- # default is raw mode
- self.raw = false
# 1st arg: database class (or file format autodetection)
if dbclass then
--- 518,526 ----
def initialize(dbclass, stream)
# 2nd arg: IO object
! if stream.kind_of?(BufferedInputStream)
@stream = stream
else
@stream = BufferedInputStream.for_io(stream)
end
# 1st arg: database class (or file format autodetection)
if dbclass then
***************
*** 535,538 ****
--- 532,537 ----
@skip_leader_mode = :firsttime
@firsttime_flag = true
+ # default raw mode is false
+ self.raw = false
end
***************
*** 743,747 ****
self.new(*arg)
end
!
# Creates a new element.
def initialize
--- 742,746 ----
self.new(*arg)
end
!
# Creates a new element.
def initialize
From ngoto at pub.open-bio.org Fri Mar 3 04:31:59 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Fri, 03 Mar 2006 09:31:59 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.47,1.48
Message-ID: <200603030931.k239VxVL029035@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv29020/lib/bio/io
Modified Files:
flatfile.rb
Log Message:
* added RulesArray class only for inspect
* changed constant (like Bio::GenBank) to String (like "Bio::GenBank")
to avoid doing require almost all files when using autodetect
Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.47
retrieving revision 1.48
diff -C2 -d -r1.47 -r1.48
*** flatfile.rb 3 Mar 2006 08:18:49 -0000 1.47
--- flatfile.rb 3 Mar 2006 09:31:57 -0000 1.48
***************
*** 736,739 ****
--- 736,748 ----
include TSort
+ # Array to store autodetection rules.
+ # This is defined only for inspect.
+ class RulesArray < Array
+ # visualize contents
+ def inspect
+ "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
+ end
+ end #class RulesArray
+
# Template of a single rule of autodetection
class RuleTemplate
***************
*** 745,754 ****
# Creates a new element.
def initialize
! a = Array.new
! def a.inspect
! "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
! end
! @higher_priority_elements = a.clone
! @lower_priority_elements = a.clone
@name = nil
end
--- 754,759 ----
# Creates a new element.
def initialize
! @higher_priority_elements = RulesArray.new
! @lower_priority_elements = RulesArray.new
@name = nil
end
***************
*** 784,787 ****
--- 789,810 ----
nil
end
+
+ private
+ # Gets constant from constant name given as a string.
+ def str2const(str)
+ const = Object
+ str.split(/\:\:/).each do |x|
+ const = const.const_get(x)
+ end
+ const
+ end
+
+ # Gets database class from given object.
+ # Current implementation is:
+ # if _obj_ is kind of String, regarded as a constant.
+ # Otherwise, returns _obj_ as is.
+ def get_dbclass(obj)
+ obj.kind_of?(String) ? str2const(obj) : obj
+ end
end #class Rule_Template
***************
*** 835,841 ****
super()
@re = re
- @dbclass = dbclass
- @dbclasses = [ dbclass ]
@name = dbclass.to_s
end
--- 858,878 ----
super()
@re = re
@name = dbclass.to_s
+ @dbclass = nil
+ @dbclass_lazy = dbclass
+ end
+
+ # database class (lazy evaluation)
+ def dbclass
+ unless @dbclass
+ @dbclass = get_dbclass(@dbclass_lazy)
+ end
+ @dbclass
+ end
+ private :dbclass
+
+ # returns database classes
+ def dbclasses
+ [ dbclass ]
end
***************
*** 844,872 ****
# _meta_ is ignored.
def guess(text, meta)
! @re =~ text ? @dbclass : nil
end
end #class RuleRegexp
# A autodetection rule to use more than two regular expressions.
! class RuleRegexp2 < RuleTemplate
# Creates a new instance.
def initialize(dbclass, *regexps)
! super()
@regexps = regexps
- @dbclass = dbclass
- @dbclasses = [ dbclass ]
- if name
- @name = name
- else
- @name = @dbclass.to_s
- end
end
! # If given text matches the regexp, returns the database class.
# Otherwise, returns nil or false.
# _meta_ is ignored.
def guess(text, meta)
@regexps.each do |re|
! return @dbclass if re =~ text
end
nil
--- 881,904 ----
# _meta_ is ignored.
def guess(text, meta)
! @re =~ text ? dbclass : nil
end
end #class RuleRegexp
# A autodetection rule to use more than two regular expressions.
! # If given string matches one of the regular expressions,
! # returns the database class.
! class RuleRegexp2 < RuleRegexp
# Creates a new instance.
def initialize(dbclass, *regexps)
! super(dbclass, nil)
@regexps = regexps
end
! # If given text matches one of the regexp, returns the database class.
# Otherwise, returns nil or false.
# _meta_ is ignored.
def guess(text, meta)
@regexps.each do |re|
! return dbclass if re =~ text
end
nil
***************
*** 880,887 ****
super()
@proc = proc
! @dbclasses = dbclasses
@name = dbclasses.collect { |x| x.to_s }.join('|')
end
# If given text (and/or meta information) is known, returns
# the database class.
--- 912,928 ----
super()
@proc = proc
! @dbclasses = nil
! @dbclasses_lazy = dbclasses
@name = dbclasses.collect { |x| x.to_s }.join('|')
end
+ # database classes (lazy evaluation)
+ def dbclasses
+ unless @dbclasses
+ @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
+ end
+ @dbclasses
+ end
+
# If given text (and/or meta information) is known, returns
# the database class.
***************
*** 1039,1058 ****
def self.make_default
a = self[
! genbank = RuleRegexp[ Bio::GenBank,
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
! genpept = RuleRegexp[ Bio::GenPept,
/^LOCUS .+ aa .+/ ],
! medline = RuleRegexp[ Bio::MEDLINE,
/^UI \- [0-9]+$/ ],
! embl = RuleRegexp[ Bio::EMBL,
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
! sptr = RuleRegexp[ Bio::SPTR,
/^ID .+\; *PRT\;/ ],
! prosite = RuleRegexp[ Bio::PROSITE,
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
! transfac = RuleRegexp[ Bio::TRANSFAC,
/^AC [-A-Za-z0-9_\.]+$/ ],
! aaindex = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
if /^H [-A-Z0-9_\.]+$/ =~ text then
if text =~ /^M [rc]/ then
--- 1080,1099 ----
def self.make_default
a = self[
! genbank = RuleRegexp[ 'Bio::GenBank',
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
! genpept = RuleRegexp[ 'Bio::GenPept',
/^LOCUS .+ aa .+/ ],
! medline = RuleRegexp[ 'Bio::MEDLINE',
/^UI \- [0-9]+$/ ],
! embl = RuleRegexp[ 'Bio::EMBL',
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
! sptr = RuleRegexp[ 'Bio::SPTR',
/^ID .+\; *PRT\;/ ],
! prosite = RuleRegexp[ 'Bio::PROSITE',
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
! transfac = RuleRegexp[ 'Bio::TRANSFAC',
/^AC [-A-Za-z0-9_\.]+$/ ],
! aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
if /^H [-A-Z0-9_\.]+$/ =~ text then
if text =~ /^M [rc]/ then
***************
*** 1068,1098 ****
end,
! litdb = RuleRegexp[ Bio::LITDB,
/^CODE [0-9]+$/ ],
! brite = RuleRegexp[ Bio::KEGG::BRITE,
/^Entry [A-Z0-9]+/ ],
! ko = RuleRegexp[ Bio::KEGG::KO,
/^ENTRY .+ KO\s*/ ],
! glycan = RuleRegexp[ Bio::KEGG::GLYCAN,
/^ENTRY .+ Glycan\s*/ ],
! enzyme = RuleRegexp2[ Bio::KEGG::ENZYME,
/^ENTRY EC [0-9\.]+$/,
/^ENTRY .+ Enzyme\s*/
],
! compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
/^ENTRY C[A-Za-z0-9\._]+$/,
/^ENTRY .+ Compound\s*/
],
! reaction = RuleRegexp2[ Bio::KEGG::REACTION,
/^ENTRY R[A-Za-z0-9\._]+$/,
/^ENTRY .+ Reaction\s*/
],
! genes = RuleRegexp[ Bio::KEGG::GENES,
/^ENTRY .+ (CDS|gene|.*RNA) / ],
! genome = RuleRegexp[ Bio::KEGG::GENOME,
/^ENTRY [a-z]+$/ ],
! fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
! Bio::FANTOM::MaXML::Sequence) do |text|
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
case $1
--- 1109,1139 ----
end,
! litdb = RuleRegexp[ 'Bio::LITDB',
/^CODE [0-9]+$/ ],
! brite = RuleRegexp[ 'Bio::KEGG::BRITE',
/^Entry [A-Z0-9]+/ ],
! ko = RuleRegexp[ 'Bio::KEGG::KO',
/^ENTRY .+ KO\s*/ ],
! glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
/^ENTRY .+ Glycan\s*/ ],
! enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
/^ENTRY EC [0-9\.]+$/,
/^ENTRY .+ Enzyme\s*/
],
! compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
/^ENTRY C[A-Za-z0-9\._]+$/,
/^ENTRY .+ Compound\s*/
],
! reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
/^ENTRY R[A-Za-z0-9\._]+$/,
/^ENTRY .+ Reaction\s*/
],
! genes = RuleRegexp[ 'Bio::KEGG::GENES',
/^ENTRY .+ (CDS|gene|.*RNA) / ],
! genome = RuleRegexp[ 'Bio::KEGG::GENOME',
/^ENTRY [a-z]+$/ ],
! fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
! 'Bio::FANTOM::MaXML::Sequence') do |text|
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
case $1
***************
*** 1109,1143 ****
end,
! pdb = RuleRegexp[ Bio::PDB,
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
! het = RuleRegexp[ Bio::PDB::ChemicalComponent,
/^RESIDUE +.+ +\d+\s*$/ ],
! clustal = RuleRegexp[ Bio::ClustalW::Report,
/^CLUSTAL .*\(.*\).*sequence +alignment/ ],
! blastxml = RuleRegexp[ Bio::Blast::Report,
/\<\!DOCTYPE BlastOutput PUBLIC / ],
! wublast = RuleRegexp[ Bio::Blast::WU::Report,
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! blast = RuleRegexp[ Bio::Blast::Default::Report,
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! tblast = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! blat = RuleRegexp[ Bio::Blat::Report,
/^psLayout version \d+\s*$/ ],
! spidey = RuleRegexp[ Bio::Spidey::Report,
/^\-\-SPIDEY version .+\-\-$/ ],
! hmmer = RuleRegexp[ Bio::HMMER::Report,
/^HMMER +\d+\./ ],
! sim4 = RuleRegexp[ Bio::Sim4::Report,
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
! fastaformat = RuleProc.new(Bio::FastaFormat,
! Bio::NBRF,
! Bio::FastaNumericFormat) do |text|
if /^>.+$/ =~ text
case text
--- 1150,1184 ----
end,
! pdb = RuleRegexp[ 'Bio::PDB',
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
! het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
/^RESIDUE +.+ +\d+\s*$/ ],
! clustal = RuleRegexp[ 'Bio::ClustalW::Report',
/^CLUSTAL .*\(.*\).*sequence +alignment/ ],
! blastxml = RuleRegexp[ 'Bio::Blast::Report',
/\<\!DOCTYPE BlastOutput PUBLIC / ],
! wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! blast = RuleRegexp[ 'Bio::Blast::Default::Report',
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! blat = RuleRegexp[ 'Bio::Blat::Report',
/^psLayout version \d+\s*$/ ],
! spidey = RuleRegexp[ 'Bio::Spidey::Report',
/^\-\-SPIDEY version .+\-\-$/ ],
! hmmer = RuleRegexp[ 'Bio::HMMER::Report',
/^HMMER +\d+\./ ],
! sim4 = RuleRegexp[ 'Bio::Sim4::Report',
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
! fastaformat = RuleProc.new('Bio::FastaFormat',
! 'Bio::NBRF',
! 'Bio::FastaNumericFormat') do |text|
if /^>.+$/ =~ text
case text
From pjotr at pub.open-bio.org Fri Mar 3 09:52:00 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 14:52:00 +0000
Subject: [BioRuby-cvs] bioruby/test/data/fasta - New directory
Message-ID: <200603031452.k23Eq0VL029679@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/test/data/fasta
In directory pub.open-bio.org:/tmp/cvs-serv29669/fasta
Log Message:
Directory /home/repository/bioruby/bioruby/test/data/fasta added to the repository
From pjotr at pub.open-bio.org Fri Mar 3 10:31:08 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 15:31:08 +0000
Subject: [BioRuby-cvs] bioruby/test/data/fasta example1.txt, NONE,
1.1 example2.txt, NONE, 1.1
Message-ID: <200603031531.k23FV8VL029797@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/test/data/fasta
In directory pub.open-bio.org:/tmp/cvs-serv29781/test/data/fasta
Added Files:
example1.txt example2.txt
Log Message:
Added example of enzyme cuts using Trevor's libs - and two short
FASTA data files for testing
--- NEW FILE: example2.txt ---
>At1g11545.1 68414.m01326 xyloglucan:xyloglucosyl transferase, putative / xyloglucan endotransglycosylase, putative / endo-xyloglucan transferase, putative similar to endo-xyloglucan transferase GI:2244732 from [Gossypium hirsutum]
actcacggaacaagtgtagattgcattacctctctctctctctctcttcgaaatattcga
agtagagacaaccaATGGAGACGGAAAGGAGGATCATAACGAGCTGTTCTGCCATGACGG
CTCTGTTCTTGTTCATGACGGCTCTAATGGCGTCGTCCTCTATCGCAGCAACACCGACAC
AATCGTTTGAAGATAATTTCAACATTATGTGGTCTGAAAATCACTTCACGACTTCCGATG
ATGGAGAGATCTGGAATCTTTCCTTAGATAACGACACCGGATGTGGATTTCAGACAAAGC
ACATGTATAGATTCGGATGGTTTAGTATGAAGCTAAAGCTCGTCGGAGGCGACTCCGCCG
GCGTCGTCACCGCTTACTACATGTGTTCGGAGAATGGGGCAGGACCGGAGAGAGACGAGA
TAGATTTCGAATTTCTAGGGAACCGAACCGGACAGCCTTACATTATTCAGACCAATGTGT
ATAAGAACGGAACCGGGAATCGGGAGATGCGACATTCCCTCTGGTTCGACCCGACCAAGG
ATTATCACACCTACTCAATTCTTTGGAATAACCACCAGCTTGTGTTCTTCGTGGATAGGG
TACCAATTCGAGTATACAAGAACAGTGATAAGGTACCAAACAACGACTTCTTCCCGAACC
AGAAGCCGATGTACTTGTTCTCCAGCATTTGGAACGCTGACGATTGGGCTACACGTGGTG
GTCTGGAGAAGACTGACTGGAAAAAAGCTCCATTCGTCTCTTCTTACAAGGACTTCGCCG
TCGAAGGCTGCCGTTGGAAGGATCCATTCCCTGCATGCGTCTCTACCACAACAGAGAATT
GGTGGGATCAGTACGACGCGTGGCATTTGTCCAAGACACAGAAGATGGATTATGCGTGGG
TGCAGCGTAATCTCGTCGTATACGATTATTGCAAAGACAGTGAGAGGTTCCCTACTCTTC
CTTGGGAGTGTTCCATTAGCCCTTGGGCTTAAaatcaattttgttttgagtgtattaaag
tggaaatggtttatgtaataattttactctcttttttttggcatttcttattttgttatg
gactatatcctctgtttatttatttaattaattatttatttagtcggctat
--- NEW FILE: example1.txt ---
>At1g02580 mRNA (2291 bp) UTR's and CDS
aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
tttatgttgtt
>At1g65300: mRNA 837bp
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
ga gatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcagattccaaagaaaattcatggttt taatatgaatatgaataaggattcgaatcaaagtatg
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
g
>At1g65300: mRNA 837bp (shortened at end)
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
gagatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcag
>At1g65300: mRNA 837bp (shortened from start)
ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
g
>At1g02580 - shortened for test - inserted cutpoint
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
agtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggt
tttaattggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
From pjotr at pub.open-bio.org Fri Mar 3 10:31:08 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 15:31:08 +0000
Subject: [BioRuby-cvs] bioruby/sample enzymes.rb,NONE,1.1
Message-ID: <200603031531.k23FV8VL029793@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/sample
In directory pub.open-bio.org:/tmp/cvs-serv29781/sample
Added Files:
enzymes.rb
Log Message:
Added example of enzyme cuts using Trevor's libs - and two short
FASTA data files for testing
--- NEW FILE: enzymes.rb ---
#!/usr/bin/env ruby
#
# enzymes.rb - cut input file using enzyme on command line
#
# Copyright (C) 2006 Pjotr Prins
and Trevor Wennblom
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# $Id: enzymes.rb,v 1.1 2006/03/03 15:31:06 pjotr Exp $
#
require 'bio/io/flatfile'
require 'bio/util/restriction_enzyme'
include Bio
usage = < '+entry.definition+"\n"
print frag.primary,"\n"
end
end
end
From aerts at pub.open-bio.org Thu Mar 16 12:29:07 2006
From: aerts at pub.open-bio.org (Jan Aerts)
Date: Thu, 16 Mar 2006 17:29:07 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb, 1.12, 1.13 fetch.rb, 1.4,
1.5
Message-ID: <200603161729.k2GHT7VL007097@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv7087
Modified Files:
pubmed.rb fetch.rb
Log Message:
* Added documentation to pubmed.rb and fetch.rb
* For fetch.rb: replaced 'net/http' with 'open-uri' to allow people behind a proxy to use this class.
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** pubmed.rb 8 Sep 2005 01:22:12 -0000 1.12
--- pubmed.rb 16 Mar 2006 17:29:05 -0000 1.13
***************
*** 3,6 ****
--- 3,7 ----
#
# Copyright (C) 2001 KATAYAMA Toshiaki
+ # 2006 Jan Aerts
#
# This library is free software; you can redistribute it and/or
***************
*** 26,61 ****
module Bio
class PubMed
! def self.query(id)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
!
! http = Net::HTTP.new(host)
! response, = http.get(path + id.to_s)
! result = response.body
! if result =~ /#{id}\s+Error/
! raise( result )
! else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
! end
! end
!
! def self.pmfetch(id)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
!
! http = Net::HTTP.new(host)
! response, = http.get(path + id.to_s)
! result = response.body
! if result =~ /#{id}\s+Error/
! raise( result )
! else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
! end
! end
!
def self.search(str)
host = "www.ncbi.nlm.nih.gov"
--- 27,85 ----
module Bio
+ # = DESCRIPTION
+ # The Bio::PubMed class provides several ways to retrieve bibliographic
+ # information from the PubMed database at
+ # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
+ # types of queries are possible:
+ # * searching for PubMed IDs given a query string:
+ # * Bio::PubMed#search
+ # * Bio::PubMed#esearch
+ # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) given a PubMed ID
+ # * Bio::PubMed#query
+ # * Bio::PubMed#pmfetch
+ # * Bio::PubMed#efetch
+ #
+ # The different methods within the same group are interchangeable and should
+ # return the same result.
+ #
+ # Additional information about the MEDLINE format and PubMed programmable
+ # APIs can be found on the following websites:
+ # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
+ # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
+ # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
+ # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
+ # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
+ # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
+ # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+ #
+ # = USAGE
+ # require 'bio'
+ #
+ # # If you don't know the pubmed ID:
+ # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+ # p x
+ # end
+ # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+ # p x
+ # end
+ #
+ # # To retrieve the MEDLINE entry for a given PubMed ID:
+ # puts Bio::PubMed.query("10592173")
+ # puts Bio::PubMed.pmfetch("10592173")
+ # puts Bio::PubMed.efetch("10592173", "14693808")
+ # # This can be converted into a Bio::MEDLINE object:
+ # manuscript = Bio::PubMed.query("10592173")
+ # medline = Bio::MEDLINE(manuscript)
+ #
+ # = REMARK
+ # This class can not be used at the moment if you're behind a proxy server. This will be solved in the near future.
class PubMed
! # Search the PubMed database by given keywords using entrez query and returns
! # an array of PubMed IDs.
! # ---
! # *Arguments*:
! # * _id_: query string (required)
! # *Returns*:: array of PubMed IDs
def self.search(str)
host = "www.ncbi.nlm.nih.gov"
***************
*** 70,73 ****
--- 94,115 ----
end
+ # Search the PubMed database by given keywords using E-Utils and returns
+ # an array of PubMed IDs.
+ #
+ # For information on the possible arguments, see
+ # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
+ # ---
+ # *Arguments*:
+ # * _id_: query string (required)
+ # * _field_
+ # * _reldate_
+ # * _mindate_
+ # * _maxdate_
+ # * _datetype_
+ # * _retstart_
+ # * _retmax_ (default 100)
+ # * _retmode_
+ # * _rettype_
+ # *Returns*:: array of PubMed IDs
def self.esearch(str, hash = {})
hash['retmax'] = 100 unless hash['retmax']
***************
*** 88,91 ****
--- 130,184 ----
end
+ # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+ # entrez query.
+ # ---
+ # *Arguments*:
+ # * _id_: PubMed ID (required)
+ # *Returns*:: MEDLINE formatted String
+ def self.query(id)
+ host = "www.ncbi.nlm.nih.gov"
+ path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
+
+ http = Net::HTTP.new(host)
+ response, = http.get(path + id.to_s)
+ result = response.body
+ if result =~ /#{id}\s+Error/
+ raise( result )
+ else
+ result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
+ return result
+ end
+ end
+
+ # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+ # entrez pmfetch.
+ # ---
+ # *Arguments*:
+ # * _id_: PubMed ID (required)
+ # *Returns*:: MEDLINE formatted String
+ def self.pmfetch(id)
+ host = "www.ncbi.nlm.nih.gov"
+ path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
+
+ http = Net::HTTP.new(host)
+ response, = http.get(path + id.to_s)
+ result = response.body
+ if result =~ /#{id}\s+Error/
+ raise( result )
+ else
+ result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
+ return result
+ end
+ end
+
+ # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+ # entrez efetch. Multiple PubMed IDs can be provided:
+ # Bio::PubMed.efetch(123)
+ # Bio::PubMed.efetch(123,456,789)
+ # Bio::PubMed.efetch([123,456,789])
+ # ---
+ # *Arguments*:
+ # * _ids_: list of PubMed IDs (required)
+ # *Returns*:: MEDLINE formatted String
def self.efetch(*ids)
return [] if ids.empty?
***************
*** 125,189 ****
end
-
- =begin
-
- = Bio::PubMed
-
- These class methods access NCBI/PubMed database via HTTP.
-
- --- Bio::PubMed.esearch(str, options)
-
- Search keywords in PubMed by E-Utils and returns an array of PubMed IDs.
- Options can be a hash containing keys include 'field', 'reldate',
- 'mindate', 'maxdate', 'datetype', 'retstart', 'retmax', 'retmode',
- and 'rettype' as specified in the following URL:
-
- (())
-
- Default 'retmax' is 100.
-
- --- Bio::PubMed.efetch(pmids)
-
- Returns an array of MEDLINE records. A list of PubMed IDs can be
- supplied as following:
-
- Bio::PubMed.efetch(123)
- Bio::PubMed.efetch(123,456,789)
- Bio::PubMed.efetch([123,456,789])
-
- --- Bio::PubMed.query(pmid)
-
- Retrieve PubMed entry by PMID and returns MEDLINE format string (can
- be parsed by the Bio::MEDLINE and can be converted into Bio::Reference
- object).
-
- --- Bio::PubMed.pmfetch(pmid)
-
- Just another query method (by pmfetch).
-
- --- Bio::PubMed.search(str)
-
- Search the PubMed database by given keywords and returns the list of
- matched records in MEDLINE format.
-
-
- = For more informations
-
- * Overview
- * (())
- * How to link
- * (())
- * MEDLINE format
- * (())
- * Search field descriptions and tags
- * (())
- * Entrez utilities index
- * (())
- * PmFetch CGI help
- * (())
- * E-Utilities CGI help
- * (())
-
- =end
-
-
--- 218,219 ----
Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** fetch.rb 18 Dec 2005 15:58:42 -0000 1.4
--- fetch.rb 16 Mar 2006 17:29:05 -0000 1.5
***************
*** 1,12 ****
#
! # = bio/io/biofetch.rb - BioFetch access module
! #
! # Copyright:: Copyright (C) 2002, 2005
! # Toshiaki Katayama
! # License:: LGPL
#
! # $Id$
#
- #--
#
# This library is free software; you can redistribute it and/or
--- 1,10 ----
#
! # bio/io/biofetch.rb - BioFetch access module
#
! # Copyright (C) 2002, 2005 Toshiaki Katayama
! # 2006 Jan Aerts
!
! # License: LGPL
#
#
# This library is free software; you can redistribute it and/or
***************
*** 24,95 ****
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! #++
#
require 'uri'
! require 'net/http'
module Bio
! class Fetch
!
! # Create a new Bio::Fetch server object.
! # Use Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch') to connect
! # to EBI BioFetch server.
! def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
! schema, user, @host, @port, reg, @path, = URI.split(url)
! end
!
! # Set default database to dbname (prepare for get_by_id).
! attr_accessor :database
!
! # Get raw database entry by id (mainly used by Bio::Registry).
! def get_by_id(id)
! fetch(@database, id)
! end
!
! # Fetch a database entry as specified by database (db), entry id (id),
! # 'raw' text or 'html' (style), and format. When using BioRuby's
! # BioFetch server, value for the format should not be set.
! def fetch(db, id, style = 'raw', format = nil)
! data = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
! data.push("format=#{format}") if format
! data = data.join('&')
!
! responce, result = Net::HTTP.new(@host, @port).post(@path, data)
! return result
! end
!
! # Short cut for using BioRuby's BioFetch server. You can fetch an entry
! # without creating instance of BioFetch server.
! def self.query(*args)
! self.new.fetch(*args)
! end
! # What databases are available?
! def databases
! query = "info=dbs"
! responce, result = Net::HTTP.new(@host, @port).post(@path, query)
! return result
! end
! # What formats does the database X have?
! def formats(database = @database)
! if database
! query = "info=formats;db=#{database}"
! responce, result = Net::HTTP.new(@host, @port).post(@path, query)
return result
end
end
- # How many entries can be retrieved simultaneously?
- def maxids
- query = "info=maxids"
- responce, result = Net::HTTP.new(@host, @port).post(@path, query)
- return result
- end
-
- end
-
end # module Bio
--- 22,183 ----
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! # $Id$
#
require 'uri'
! require 'open-uri'
module Bio
+ # = DESCRIPTION
+ # The Bio::Fetch class provides an interface to dbfetch servers. Given
+ # a database name and an accession number, these servers return the nucleic
+ # or amino acid sequence for that accession number in that database.
+ #
+ # Possible dbfetch servers include:
+ # * http://bioruby.org/cgi-bin/biofetch.rb (default)
+ # * http://www.ebi.ac.uk/cgi-bin/dbfetch
+ #
+ # If you're behind a proxy server, be sure to set your HTTP_PROXY
+ # environment variable accordingly.
+ #
+ # = USAGE
+ # require 'bio'
+ #
+ # # Retrieve the sequence of accession number M33388 from the EMBL
+ # # database.
+ # server = Bio::Fetch.new() #uses default server
+ # puts server.fetch('embl','M33388')
+ #
+ # # Do the same thing without creating a Bio::Fetch object. This method always
+ # # uses the default dbfetch server: http://bioruby.org/cgi-bin/biofetch.rb
+ # puts Bio::Fetch.query('embl','M33388')
+ #
+ # # To know what databases are available on the bioruby dbfetch server:
+ # server = Bio::Fetch.new()
+ # puts server.databases
+ #
+ # # Some databases provide their data in different formats (e.g. 'fasta',
+ # # 'genbank' or 'embl'). To check which formats are supported by a given
+ # # database:
+ # puts server.formats('embl')
+ #
+ class Fetch
+
+ # Create a new Bio::Fetch server object that can subsequently be queried
+ # using the Bio::Fetch#fetch method
+ # ---
+ # *Arguments*:
+ # * _url_: URL of dbfetch server (default = 'http://bioruby.org/cgi-bin/biofetch.rb')
+ # *Returns*:: Bio::Fetch object
+ def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
+ @url = url
+ schema, user, @host, @port, reg, @path, = URI.split(@url)
+ end
+
+ # The default database to query
+ #--
+ # This will be used by the get_by_id method
+ #++
+ attr_accessor :database
+
+ # Get raw database entry by id. This method lets the Bio::Registry class
+ # use Bio::Fetch objects and should probably not be used directly.
+ def get_by_id(id)
+ fetch(@database, id)
+ end
+
+ # Fetch a database entry as specified by database (db), entry id (id),
+ # 'raw' text or 'html' (style), and format. When using BioRuby's
+ # BioFetch server, value for the format should not be set.
+ # Examples:
+ # server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
+ # puts server.fetch('embl','M33388','raw','fasta')
+ # puts server.fetch('refseq','NM_12345','html','embl')
+ # ---
+ # *Arguments*:
+ # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
+ # * _id_: single ID or ID list separated by commas or white space
+ # * _style_: [raw|html] (default = 'raw')
+ # * _format_: name of output format (see Bio::Fetch#formats)
+ def fetch(db, id, style = 'raw', format = nil)
+ query = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
+ query.push("format=#{format}") if format
+ query = query.join('&')
+
+ result = open(@url + '?' + query).readlines.join('')
+ return result
+ end
+
+ # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
+ # without creating an instance of BioFetch server. This method uses the
+ # default dbfetch server, which is http://bioruby.org/cgi-bin/biofetch.rb
+ #
+ # Example:
+ # puts Bio::Fetch.query('refseq','NM_12345')
+ #
+ # ---
+ # *Arguments*:
+ # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
+ # * _id_: single ID or ID list separated by commas or white space
+ # * _style_: [raw|html] (default = 'raw')
+ # * _format_: name of output format (see Bio::Fetch#formats)
+ def self.query(*args)
+ self.new.fetch(*args)
+ end
+
+ # Using this method, the user can ask a dbfetch server what databases
+ # it supports. This would normally be the first step you'd take when
+ # you use a dbfetch server for the first time.
+ # Example:
+ # server = Bio::Fetch.new()
+ # puts server.databases # returns "aa aax bl cpd dgenes dr ec eg emb ..."
+ #
+ # This method only works for the bioruby dbfetch server. For a list
+ # of databases available from the EBI, see the EBI website at
+ # http://www.ebi.ac.uk/cgi-bin/dbfetch/
+ # ---
+ # *Returns*:: array of database names
+ def databases
+ query = "info=dbs"
! result = open(@url + '?' + query).readlines.join('')
! return result
! end
!
! # Lists the formats that are available for a given database. Like the
! # Bio::Fetch#databases method, this method is only available on
! # the bioruby dbfetch server.
! # Example:
! # server = Bio::Fetch.new()
! # puts server.formats('embl') # returns "default fasta"
! # ---
! # *Arguments*:
! # * _database_:: name of database you want the supported formats for
! # *Returns*:: array of formats
! def formats(database = @database)
! if database
! query = "info=formats;db=#{database}"
! result = open(@url + '?' + query).readlines.join('')
! return result
! end
! end
!
! # A dbfetch server will only return entries up to a given maximum number.
! # This method retrieves that number from the server. As for the databases
! # and formats methods, the maxids method only works for the bioruby
! # dbfetch server.
! # ---
! # *Arguments*: none
! # *Returns*:: number
! def maxids
! query = "info=maxids"
! result = open(@url + '?' + query).readlines.join('')
return result
end
+
end
end # module Bio
***************
*** 98,113 ****
if __FILE__ == $0
- # bfserv = Bio::Fetch.new('http://www.ebi.ac.uk:80/cgi-bin/dbfetch')
- bfserv = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
puts "# test 1"
! puts bfserv.fetch('embl', 'J00231', 'raw')
puts "# test 2"
! puts bfserv.fetch('embl', 'J00231', 'html')
!
puts "# test 3"
! puts Bio::Fetch.query('genbank', 'J00231')
puts "# test 4"
puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
!
end
--- 186,204 ----
if __FILE__ == $0
puts "# test 1"
! br_server = Bio::Fetch.new()
! puts br_server.databases
! puts br_server.formats('embl')
! puts br_server.maxids
! ebi_server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
puts "# test 2"
! puts ebi_server.fetch('embl', 'J00231', 'raw')
puts "# test 3"
! puts ebi_server.fetch('embl', 'J00231', 'html')
puts "# test 4"
+ puts Bio::Fetch.query('genbank', 'J00231')
+ puts "# test 5"
puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
!
end
From ngoto at pub.open-bio.org Mon Mar 20 05:34:59 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Mon, 20 Mar 2006 10:34:59 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio command.rb,1.3,1.4
Message-ID: <200603201035.k2KAYxVL030067@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio
In directory pub.open-bio.org:/tmp/cvs-serv30042/lib/bio
Modified Files:
command.rb
Log Message:
* New module Bio::Command::NetTools for miscellaneous network methods.
Currently, this module is intended to be used only inside
BioRuby library. Please do not use it in user's programs now.
* New methods: Bio::Command::NetTools.open_uri(uri, *arg) and
Bio::Command::NetTools.read_uri(uri).
* Changed license to Ruby's.
Index: command.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/command.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** command.rb 4 Nov 2005 17:36:00 -0000 1.3
--- command.rb 20 Mar 2006 10:34:57 -0000 1.4
***************
*** 2,32 ****
# = bio/command.rb - general methods for external command execution
#
! # Copyright:: Copyright (C) 2003-2005
# Naohisa Goto ,
# Toshiaki Katayama
! # License:: LGPL
#
# $Id$
#
- #--
- #
- # This library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Lesser General Public
- # License as published by the Free Software Foundation; either
- # version 2 of the License, or (at your option) any later version.
- #
- # This library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Lesser General Public License for more details.
- #
- # You should have received a copy of the GNU Lesser General Public
- # License along with this library; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- #
- #++
- #
require 'open3'
module Bio
--- 2,15 ----
# = bio/command.rb - general methods for external command execution
#
! # Copyright:: Copyright (C) 2003-2006
# Naohisa Goto ,
# Toshiaki Katayama
! # License:: Ruby's
#
# $Id$
#
require 'open3'
+ require 'uri'
module Bio
***************
*** 162,165 ****
--- 145,291 ----
end # module Tools
+
+
+ # = Bio::Command::NetTools
+ #
+ # Bio::Command::NetTools is a collection of miscellaneous methods
+ # for data transport through network.
+ #
+ # Library internal use only. Users should not directly use it.
+ #
+ # Note that it is under construction.
+ module NetTools
+
+ # Same as OpenURI.open_uri(*arg).
+ # If open-uri.rb is already loaded, ::OpenURI is used.
+ # Otherwise, internal OpenURI in sandbox is used because
+ # open-uri.rb redefines Kernel.open.
+ def self.open_uri(uri, *arg)
+ if defined? ::OpenURI
+ ::OpenURI.open_uri(uri, *arg)
+ else
+ SandBox.load_openuri_in_sandbox
+ uri = uri.to_s if ::URI::Generic === uri
+ SandBox::OpenURI.open_uri(uri, *arg)
+ end
+ end
+
+ # Same as OpenURI.open_uri(uri).read.
+ # If open-uri.rb is already loaded, ::OpenURI is used.
+ # Otherwise, internal OpenURI in sandbox is used becase
+ # open-uri.rb redefines Kernel.open.
+ def self.read_uri(uri)
+ self.open_uri(uri).read
+ end
+
+ # Sandbox to load open-uri.rb.
+ # Internal use only.
+ module SandBox #:nodoc:
+
+ # Dummy module definition.
+ module Kernel #:nodoc:
+ # dummy method
+ def open(*arg); end #:nodoc:
+ end #module Kernel
+
+ # a method to find proxy. dummy definition
+ module FindProxy; end #:nodoc:
+
+ # dummy module definition
+ module OpenURI #:nodoc:
+ module OpenRead; end #:nodoc:
+ end #module OpenURI
+
+ # Dummy module definition.
+ module URI #:nodoc:
+ class Generic < ::URI::Generic #:nodoc:
+ include SandBox::FindProxy
+ end
+
+ class HTTPS < ::URI::HTTPS #:nodoc:
+ include SandBox::FindProxy
+ include SandBox::OpenURI::OpenRead
+ end
+
+ class HTTP < ::URI::HTTP #:nodoc:
+ include SandBox::FindProxy
+ include SandBox::OpenURI::OpenRead
+ end
+
+ class FTP < ::URI::FTP #:nodoc:
+ include SandBox::FindProxy
+ include SandBox::OpenURI::OpenRead
+ end
+
+ # parse and new. internal use only.
+ def self.__parse_and_new__(klass, uri) #:nodoc:
+ scheme, userinfo, host, port,
+ registry, path, opaque, query, fragment = ::URI.split(uri)
+ klass.new(scheme, userinfo, host, port,
+ registry, path, opaque, query,
+ fragment)
+ end
+ private_class_method :__parse_and_new__
+
+ # same as ::URI.parse. internal use only.
+ def self.parse(uri) #:nodoc:
+ r = ::URI.parse(uri)
+ case r
+ when ::URI::HTTPS
+ __parse_and_new__(HTTPS, uri)
+ when ::URI::HTTP
+ __parse_and_new__(HTTP, uri)
+ when ::URI::FTP
+ __parse_and_new__(FTP, uri)
+ else
+ r
+ end
+ end
+ end #module URI
+
+ @load_openuri = nil
+ # load open-uri.rb in SandBox module.
+ def self.load_openuri_in_sandbox #:nodoc:
+ return if @load_openuri
+ fn = nil
+ unless $:.find do |x|
+ fn = File.join(x, 'open-uri.rb')
+ FileTest.exist?(fn)
+ end then
+ warn('Warning: cannot find open-uri.rb in $LOAD_PATH')
+ else
+ # reading open-uri.rb
+ str = File.read(fn)
+ # eval open-uri.rb contents in SandBox module
+ module_eval(str)
+
+ # finds 'find_proxy' method
+ find_proxy_lines = nil
+ flag = nil
+ endstr = nil
+ str.each do |line|
+ if flag then
+ find_proxy_lines << line
+ if endstr == line[0, endstr.length] and
+ /^\s+end(\s+.*)?$/ =~ line then
+ break
+ end
+ elsif /^(\s+)def\s+find_proxy(\s+.*)?$/ =~ line then
+ flag = true
+ endstr = "#{$1}end"
+ find_proxy_lines = line
+ end
+ end
+ if find_proxy_lines
+ module_eval("module FindProxy;\n#{find_proxy_lines}\n;end\n")
+ else
+ warn('Warning: cannot find find_proxy method in open-uri.rb.')
+ end
+ @load_openuri = true
+ end
+ end
+ end #module SandBox
+ end #module NetTools
+
end # module Command
end # module Bio
From ngoto at pub.open-bio.org Mon Mar 20 07:40:16 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Mon, 20 Mar 2006 12:40:16 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fetch.rb,1.5,1.6
Message-ID: <200603201240.k2KCeGVL030358@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv30167/lib/bio/io
Modified Files:
fetch.rb
Log Message:
* "require 'open-uri'" is removed because open-uri.rb changes Kernel#open.
Instead, Bio::Command::NetTools.read_uri is used.
* query should be escaped by using URI.escape.
* Bio::Fetch#databases, #formats are changed to return an array of string,
as described in the documents.
* Bio::Fetch#maxids are changed to return an Integer number,
as described in the document.
Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** fetch.rb 16 Mar 2006 17:29:05 -0000 1.5
--- fetch.rb 20 Mar 2006 12:40:13 -0000 1.6
***************
*** 26,30 ****
require 'uri'
! require 'open-uri'
module Bio
--- 26,30 ----
require 'uri'
! require 'bio/command'
module Bio
***************
*** 105,110 ****
query = query.join('&')
! result = open(@url + '?' + query).readlines.join('')
! return result
end
--- 105,109 ----
query = query.join('&')
! Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query))
end
***************
*** 141,146 ****
query = "info=dbs"
! result = open(@url + '?' + query).readlines.join('')
! return result
end
--- 140,144 ----
query = "info=dbs"
! Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
end
***************
*** 159,164 ****
query = "info=formats;db=#{database}"
! result = open(@url + '?' + query).readlines.join('')
! return result
end
end
--- 157,161 ----
query = "info=formats;db=#{database}"
! Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
end
end
***************
*** 174,179 ****
query = "info=maxids"
! result = open(@url + '?' + query).readlines.join('')
! return result
end
--- 171,175 ----
query = "info=maxids"
! Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).to_i
end
From aerts at pub.open-bio.org Tue Mar 21 07:18:16 2006
From: aerts at pub.open-bio.org (Jan Aerts)
Date: Tue, 21 Mar 2006 12:18:16 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fastacmd.rb,1.10,1.11
Message-ID: <200603211218.k2LCIGVL001647@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv1637
Modified Files:
fastacmd.rb
Log Message:
Added/reformatted documentation.
Index: fastacmd.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fastacmd.rb,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** fastacmd.rb 28 Jan 2006 08:12:21 -0000 1.10
--- fastacmd.rb 21 Mar 2006 12:18:14 -0000 1.11
***************
*** 5,45 ****
# Shuji SHIGENOBU ,
# Toshiaki Katayama ,
! # Mitsuteru C. Nakao
# Lisence:: LGPL
#
# $Id$
#
- # == Description
- #
- # Retrives FASTA formatted sequences from a blast database using
- # NCBI fastacmd command.
- #
- # This class requires 'fastacmd' command and a blast database
- # (formatted using the '-o' option of 'formatdb').
- #
- # == Examples
- #
- # database = ARGV.shift || "/db/myblastdb"
- # entry_id = ARGV.shift || "sp:128U_DROME"
- # ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
- #
- # fastacmd = Bio::Blast::Fastacmd.new(database)
- #
- # entry = fastacmd.get_by_id(entry_id)
- # fastacmd.fetch(entry_id)
- # fastacmd.fetch(ent_list)
- #
- # fastacmd.fetch(ent_list).each do |fasta|
- # puts fasta
- # end
- #
- # == References
- #
- # * NCBI tool
- # ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
- #
- # * fastacmd.html
- # http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
- #
#--
#
--- 5,14 ----
# Shuji SHIGENOBU ,
# Toshiaki Katayama ,
! # Mitsuteru C. Nakao ,
! # Jan Aerts
# Lisence:: LGPL
#
# $Id$
#
#--
#
***************
*** 68,72 ****
class Blast
! # NCBI fastacmd wrapper class
#
class Fastacmd
--- 37,68 ----
class Blast
! # = DESCRIPTION
! #
! # Retrieves FASTA formatted sequences from a blast database using
! # NCBI fastacmd command.
! #
! # This class requires 'fastacmd' command and a blast database
! # (formatted using the '-o' option of 'formatdb').
! #
! # = USAGE
! # require 'bio'
! #
! # fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
! #
! # entry = fastacmd.get_by_id("sp:128U_DROME")
! # fastacmd.fetch("sp:128U_DROME")
! # fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
! #
! # fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"]).each do |fasta|
! # puts fasta
! # end
! #
! # = REFERENCES
! #
! # * NCBI tool
! # ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
! #
! # * fastacmd.html
! # http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
#
class Fastacmd
***************
*** 78,90 ****
attr_accessor :database
! # fastcmd command file path.
attr_accessor :fastacmd
- #
attr_accessor :errorlog
! # Initalize a fastacmd object.
! #
! # fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
def initialize(blast_database_file_path)
@database = blast_database_file_path
--- 74,103 ----
attr_accessor :database
! # fastacmd command file path.
attr_accessor :fastacmd
attr_accessor :errorlog
! # This method provides a handle to a BLASTable database, which you can then
! # use to retrieve sequences.
! #
! # Prerequisites:
! # * You have created a BLASTable database with the '-o T' option.
! # * You have the NCBI fastacmd tool installed.
! #
! # For example, suppose the original input file looks like:
! # >my_seq_1
! # ACCGACCTCCGGAACGGATAGCCCGACCTACG
! # >my_seq_2
! # TCCGACCTTTCCTACCGCACACCTACGCCATCAC
! # ...
! # and you've created a BLASTable database from that with the command
! # cd /my_dir/
! # formatdb -i my_input_file -t Test -n Test -o T
! # then you can get a handle to this database with the command
! # fastacmd = Bio::Blast::Fastacmd.new("/my_dir/Test")
! # ---
! # *Arguments*:
! # * _database_:: path and name of BLASTable database
def initialize(blast_database_file_path)
@database = blast_database_file_path
***************
*** 93,117 ****
! # get an entry_id and returns a Bio::FastaFormat object.
! #
! # entry_id = "sp:128U_DROME"
! # entry = fastacmd.get_by_id(entry_id)
def get_by_id(entry_id)
fetch(entry_id).shift
end
! # get one or more entry_id and returns an Array of Bio::FastaFormat objects.
! #
! # Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
! # object even when the result is a single entry.
! #
! # p fastacmd.fetch(entry_id)
#
! # Fastacmd#fetch method also accepts a list of entry_id and returns
! # an Array of Bio::FastaFormat objects.
! #
! # ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
! # p fastacmd.fetch(ent_list)
#
def fetch(list)
if list.respond_to?(:join)
--- 106,131 ----
! # Get the sequence of a specific entry in the BLASTable database.
! # For example:
! # entry = fastacmd.get_by_id("sp:128U_DROME")
! # ---
! # *Arguments*:
! # * _id_: id of an entry in the BLAST database
! # *Returns*:: a Bio::FastaFormat object
def get_by_id(entry_id)
fetch(entry_id).shift
end
! # Get the sequence for a _list_ of IDs in the database.
#
! # For example:
! # p fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
#
+ # This method always returns an array of Bio::FastaFormat objects, even when
+ # the result is a single entry.
+ # ---
+ # *Arguments*:
+ # * _ids_: list of IDs to retrieve from the database
+ # *Returns*:: array of Bio::FastaFormat objects
def fetch(list)
if list.respond_to?(:join)
***************
*** 128,138 ****
end
! # Iterates each entry.
! #
! # You can also iterate on all sequences in the database!
! # fastacmd.each do |fasta|
! # p [ fasta.definition[0..30], fasta.seq.size ]
! # end
#
def each_entry
cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
--- 142,152 ----
end
! # Iterates over _all_ sequences in the database.
#
+ # fastacmd.each_entry do |fasta|
+ # p [ fasta.definition[0..30], fasta.seq.size ]
+ # end
+ # ---
+ # *Returns*:: a Bio::FastaFormat object for each iteration
def each_entry
cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
***************
*** 154,156 ****
--- 168,184 ----
end # module Bio
+ if $0 == __FILE__
+ fastacmd = Bio::Blast::Fastacmd.new("/path_to_my_db/db_name")
+ seq = fastacmd.get_by_id('id_of_entry1')
+ puts seq.class
+ puts seq
+
+ seqs = fastacmd.fetch(['id_of_entry1','id_of_entry2'])
+ seqs.each do |seq|
+ puts seq
+ end
+ fastacmd.each_entry do |fasta|
+ puts fasta.seq.size.to_s + "\t" + fasta.definition
+ end
+ end
From ngoto at pub.open-bio.org Wed Mar 22 05:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.48,1.49
Message-ID: <200603221019.k2MAJOVL005746@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv5657/lib/bio/io
Modified Files:
flatfile.rb
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.
Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.48
retrieving revision 1.49
diff -C2 -d -r1.48 -r1.49
*** flatfile.rb 3 Mar 2006 09:31:57 -0000 1.48
--- flatfile.rb 22 Mar 2006 10:19:22 -0000 1.49
***************
*** 262,265 ****
--- 262,268 ----
attr_reader :entry
+ # a flag to write down entry start and end positions
+ attr_accessor :entry_pos_flag
+
# start position of the entry
attr_reader :entry_start_pos
***************
*** 290,293 ****
--- 293,297 ----
end
@delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
+ @entry_pos_flag = nil
end
***************
*** 330,334 ****
# gets a entry
def get_entry
! p0 = @stream.pos
e = @stream.gets(@delimiter)
if e and @delimiter_overrun then
--- 334,338 ----
# gets a entry
def get_entry
! p0 = @entry_pos_flag ? @stream.pos : nil
e = @stream.gets(@delimiter)
if e and @delimiter_overrun then
***************
*** 339,343 ****
end
end
! p1 = @stream.pos
@entry_start_pos = p0
@entry = e
--- 343,347 ----
end
end
! p1 = @entry_pos_flag ? @stream.pos : nil
@entry_start_pos = p0
@entry = e
***************
*** 585,588 ****
--- 589,602 ----
def entry_raw
@splitter.entry
+ end
+
+ # a flag to write down entry start and end positions
+ def entry_pos_flag
+ @splitter.entry_pos_flag
+ end
+
+ # Sets flag to write down entry start and end positions
+ def entry_pos_flag=(x)
+ @splitter.entry_pos_flag = x
end
From ngoto at pub.open-bio.org Wed Mar 22 05:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io/flatfile indexer.rb,1.23,1.24
Message-ID: <200603221019.k2MAJOVL005748@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io/flatfile
In directory pub.open-bio.org:/tmp/cvs-serv5657/lib/bio/io/flatfile
Modified Files:
indexer.rb
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.
Index: indexer.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile/indexer.rb,v
retrieving revision 1.23
retrieving revision 1.24
diff -C2 -d -r1.23 -r1.24
*** indexer.rb 22 Feb 2006 08:41:03 -0000 1.23
--- indexer.rb 22 Mar 2006 10:19:22 -0000 1.24
***************
*** 115,118 ****
--- 115,119 ----
@flatfile = Bio::FlatFile.open(@dbclass, file, 'rb')
@flatfile.raw = nil
+ @flatfile.entry_pos_flag = true
@entry = nil
end
From ngoto at pub.open-bio.org Wed Mar 22 05:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/doc Changes-0.7.rd,1.16,1.17
Message-ID: <200603221019.k2MAJOVL005750@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/doc
In directory pub.open-bio.org:/tmp/cvs-serv5657/doc
Modified Files:
Changes-0.7.rd
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.
Index: Changes-0.7.rd
===================================================================
RCS file: /home/repository/bioruby/bioruby/doc/Changes-0.7.rd,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** Changes-0.7.rd 27 Feb 2006 11:38:14 -0000 1.16
--- Changes-0.7.rd 22 Mar 2006 10:19:22 -0000 1.17
***************
*** 262,265 ****
--- 262,270 ----
structure (which is not recommended) would not work.
+ In 1.0.1:
+
+ * Bio::FlatFile#entry_start_pos and #entry_ended_pos are enabled
+ only when Bio::FlatFile#entry_pos_flag is true.
+
=== Deleted files
From k at portal.open-bio.org Sat Mar 25 19:38:12 2006
From: k at portal.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 00:38:12 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/shell demo.rb,1.1,1.2
Message-ID: <200603260038.k2Q0cCgZ028442@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/shell
In directory dev.open-bio.org:/tmp/cvs-serv28438/lib/bio/shell
Modified Files:
demo.rb
Log Message:
* 1st commit test on dev.open-bio.org after the server migration
* fixed some typos in BioRuby shell demo
Index: demo.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/demo.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** demo.rb 27 Feb 2006 09:33:22 -0000 1.1
--- demo.rb 26 Mar 2006 00:38:10 -0000 1.2
***************
*** 90,94 ****
run(%q[head ent_1bl8], "Head part of the entry ...", false) &&
run(%q[savefile("1bl8.pdb", ent_1bl8)], "Saving the original entry in file ...", false) &&
! run(%q[less "data/1bl8.pdb"], "Look through the entire entry ...", false) &&
run(%q[pdb_1bl8 = flatparse(ent_1bl8)], "Parsing the entry ...", false) &&
run(%q[pdb_1bl8.entry_id], "Showing the entry ID ...", true) &&
--- 90,94 ----
run(%q[head ent_1bl8], "Head part of the entry ...", false) &&
run(%q[savefile("1bl8.pdb", ent_1bl8)], "Saving the original entry in file ...", false) &&
! run(%q[disp "data/1bl8.pdb"], "Look through the entire entry ...", false) &&
run(%q[pdb_1bl8 = flatparse(ent_1bl8)], "Parsing the entry ...", false) &&
run(%q[pdb_1bl8.entry_id], "Showing the entry ID ...", true) &&
***************
*** 98,112 ****
def pdb_hetdic
! run(%q[het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read],
! "Retrieving the het_dic database ...", false) &&
! run(%q[savefile("data/het_dictionary.txt", het_dic)],
! "Saving the file ... ", false) &&
run(%q[het_dic.size], "Bytes of the file ...", true) &&
! run(%q[less "data/het_dictionary.txt"], "Take a look on the contents ...", true) &&
run(%q[flatindex("het_dic", "data/het_dictionary.txt")],
"Creating index to make the seaarchable database ...", false) &&
run(%q[ethanol = flatsearch("het_dic", "EOH")], "Search an ethanol entry ...", true) &&
run(%q[osake = flatparse(ethanol)], "Parse the entry ...", true) &&
! run(%q[sake.conect], "Showing connect table (conect) of the molecule ...", true) &&
true
end
--- 98,112 ----
def pdb_hetdic
! # run(%q[het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read],
! # "Retrieving the het_dic database ...", false) &&
! # run(%q[savefile("data/het_dictionary.txt", het_dic)],
! # "Saving the file ... ", false) &&
run(%q[het_dic.size], "Bytes of the file ...", true) &&
! run(%q[disp "data/het_dictionary.txt"], "Take a look on the contents ...", true) &&
run(%q[flatindex("het_dic", "data/het_dictionary.txt")],
"Creating index to make the seaarchable database ...", false) &&
run(%q[ethanol = flatsearch("het_dic", "EOH")], "Search an ethanol entry ...", true) &&
run(%q[osake = flatparse(ethanol)], "Parse the entry ...", true) &&
! run(%q[osake.conect], "Showing connect table (conect) of the molecule ...", true) &&
true
end
From ngoto at dev.open-bio.org Tue Mar 28 09:00:50 2006
From: ngoto at dev.open-bio.org (Naohisa Goto)
Date: Tue, 28 Mar 2006 14:00:50 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio command.rb,1.4,1.5
Message-ID: <200603281400.k2SE0oK6024842@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv24822
Modified Files:
command.rb
Log Message:
* added "require 'open-uri'"
* removed complicated hacks for open-uri
Index: command.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/command.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** command.rb 20 Mar 2006 10:34:57 -0000 1.4
--- command.rb 28 Mar 2006 14:00:48 -0000 1.5
***************
*** 12,15 ****
--- 12,16 ----
require 'open3'
require 'uri'
+ require 'open-uri'
module Bio
***************
*** 157,289 ****
module NetTools
- # Same as OpenURI.open_uri(*arg).
- # If open-uri.rb is already loaded, ::OpenURI is used.
- # Otherwise, internal OpenURI in sandbox is used because
- # open-uri.rb redefines Kernel.open.
- def self.open_uri(uri, *arg)
- if defined? ::OpenURI
- ::OpenURI.open_uri(uri, *arg)
- else
- SandBox.load_openuri_in_sandbox
- uri = uri.to_s if ::URI::Generic === uri
- SandBox::OpenURI.open_uri(uri, *arg)
- end
- end
-
# Same as OpenURI.open_uri(uri).read.
- # If open-uri.rb is already loaded, ::OpenURI is used.
- # Otherwise, internal OpenURI in sandbox is used becase
- # open-uri.rb redefines Kernel.open.
def self.read_uri(uri)
! self.open_uri(uri).read
end
-
- # Sandbox to load open-uri.rb.
- # Internal use only.
- module SandBox #:nodoc:
-
- # Dummy module definition.
- module Kernel #:nodoc:
- # dummy method
- def open(*arg); end #:nodoc:
- end #module Kernel
-
- # a method to find proxy. dummy definition
- module FindProxy; end #:nodoc:
-
- # dummy module definition
- module OpenURI #:nodoc:
- module OpenRead; end #:nodoc:
- end #module OpenURI
-
- # Dummy module definition.
- module URI #:nodoc:
- class Generic < ::URI::Generic #:nodoc:
- include SandBox::FindProxy
- end
-
- class HTTPS < ::URI::HTTPS #:nodoc:
- include SandBox::FindProxy
- include SandBox::OpenURI::OpenRead
- end
-
- class HTTP < ::URI::HTTP #:nodoc:
- include SandBox::FindProxy
- include SandBox::OpenURI::OpenRead
- end
-
- class FTP < ::URI::FTP #:nodoc:
- include SandBox::FindProxy
- include SandBox::OpenURI::OpenRead
- end
-
- # parse and new. internal use only.
- def self.__parse_and_new__(klass, uri) #:nodoc:
- scheme, userinfo, host, port,
- registry, path, opaque, query, fragment = ::URI.split(uri)
- klass.new(scheme, userinfo, host, port,
- registry, path, opaque, query,
- fragment)
- end
- private_class_method :__parse_and_new__
-
- # same as ::URI.parse. internal use only.
- def self.parse(uri) #:nodoc:
- r = ::URI.parse(uri)
- case r
- when ::URI::HTTPS
- __parse_and_new__(HTTPS, uri)
- when ::URI::HTTP
- __parse_and_new__(HTTP, uri)
- when ::URI::FTP
- __parse_and_new__(FTP, uri)
- else
- r
- end
- end
- end #module URI
-
- @load_openuri = nil
- # load open-uri.rb in SandBox module.
- def self.load_openuri_in_sandbox #:nodoc:
- return if @load_openuri
- fn = nil
- unless $:.find do |x|
- fn = File.join(x, 'open-uri.rb')
- FileTest.exist?(fn)
- end then
- warn('Warning: cannot find open-uri.rb in $LOAD_PATH')
- else
- # reading open-uri.rb
- str = File.read(fn)
- # eval open-uri.rb contents in SandBox module
- module_eval(str)
-
- # finds 'find_proxy' method
- find_proxy_lines = nil
- flag = nil
- endstr = nil
- str.each do |line|
- if flag then
- find_proxy_lines << line
- if endstr == line[0, endstr.length] and
- /^\s+end(\s+.*)?$/ =~ line then
- break
- end
- elsif /^(\s+)def\s+find_proxy(\s+.*)?$/ =~ line then
- flag = true
- endstr = "#{$1}end"
- find_proxy_lines = line
- end
- end
- if find_proxy_lines
- module_eval("module FindProxy;\n#{find_proxy_lines}\n;end\n")
- else
- warn('Warning: cannot find find_proxy method in open-uri.rb.')
- end
- @load_openuri = true
- end
- end
- end #module SandBox
end #module NetTools
--- 158,165 ----
module NetTools
# Same as OpenURI.open_uri(uri).read.
def self.read_uri(uri)
! OpenURI.open_uri(uri).read
end
end #module NetTools
From k at dev.open-bio.org Sat Mar 25 21:28:01 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:28:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio sequence.rb,0.56,0.57
Message-ID: <200603260228.k2Q2S1uq028859@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv28853
Modified Files:
sequence.rb
Log Message:
* comprehensive documentations contributed by Ryan Raaum and Jan Aerts are added.
* bug fixes in sequence.rb contributed by Ryan Raaum
* Added 'U' and 'u' to the bases counted towards the nucleic acid total in Bio::Sequence#guess. (Without this, RNA sequences were "guessed" to be Amino Acid sequences).
* Changed the arguments for method_missing in Bio::Sequence from (*arg) to (sym, *args, &block). With this argument set, blocks will be properly passed through to the encapsulated object.
Index: sequence.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence.rb,v
retrieving revision 0.56
retrieving revision 0.57
diff -C2 -d -r0.56 -r0.57
*** sequence.rb 17 Feb 2006 17:15:08 -0000 0.56
--- sequence.rb 26 Mar 2006 02:27:59 -0000 0.57
***************
*** 5,9 ****
# Toshiaki Katayama ,
# Yoshinori K. Okuji ,
! # Naohisa Goto
# License:: Ruby's
#
--- 5,11 ----
# Toshiaki Katayama ,
# Yoshinori K. Okuji ,
! # Naohisa Goto ,
! # Ryan Raaum ,
! # Jan Aerts
# License:: Ruby's
#
***************
*** 15,18 ****
--- 17,67 ----
module Bio
+ # = DESCRIPTION
+ # Bio::Sequence objects represent annotated sequences in bioruby.
+ # A Bio::Sequence object is a wrapper around the actual sequence,
+ # represented as either a Bio::Sequence::NA or a Bio::Sequence::AA object.
+ # For most users, this encapsulation will be completely transparent.
+ # Bio::Sequence responds to all methods defined for Bio::Sequence::NA/AA
+ # objects using the same arguments and returning the same values (even though
+ # these methods are not documented specifically for Bio::Sequence).
+ #
+ # = USAGE
+ # # Create a nucleic or amino acid sequence
+ # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
+ # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
+ # aa = Bio::Sequence.auto('ACDEFGHIKLMNPQRSTVWYU')
+ #
+ # # Print it out
+ # puts dna.to_s
+ # puts aa.to_s
+ #
+ # # Get a subsequence, bioinformatics style (first nucleotide is '1')
+ # puts dna.subseq(2,6)
+ #
+ # # Get a subsequence, informatics style (first nucleotide is '0')
+ # puts dna[2,6]
+ #
+ # # Print in FASTA format
+ # puts dna.output(:fasta)
+ #
+ # # Print all codons
+ # dna.window_search(3,3) do |codon|
+ # puts codon
+ # end
+ #
+ # # Splice or otherwise mangle your sequence
+ # puts dna.splicing("complement(join(1..5,16..20))")
+ # puts rna.splicing("complement(join(1..5,16..20))")
+ #
+ # # Convert a sequence containing ambiguity codes into a
+ # # regular expression you can use for subsequent searching
+ # puts aa.to_re
+ #
+ # # These should speak for themselves
+ # puts dna.complement
+ # puts dna.composition
+ # puts dna.molecular_weight
+ # puts dna.translate
+ # puts dna.gc_percent
class Sequence
***************
*** 23,37 ****
autoload :Format, 'bio/sequence/format'
def initialize(str)
@seq = str
end
! def method_missing(*arg)
! @seq.send(*arg)
end
!
! attr_accessor :entry_id, :definition, :features, :references, :comments,
! :date, :keywords, :dblinks, :taxonomy, :moltype, :seq
!
def output(style)
extend Bio::Sequence::Format
--- 72,151 ----
autoload :Format, 'bio/sequence/format'
+ # Create a new Bio::Sequence object
+ #
+ # s = Bio::Sequence.new('atgc')
+ # puts s #=> 'atgc'
+ #
+ # Note that this method does not intialize the contained sequence
+ # as any kind of bioruby object, only as a simple string
+ #
+ # puts s.seq.class #=> String
+ #
+ # See Bio::Sequence#na, Bio::Sequence#aa, and Bio::Sequence#auto
+ # for methods to transform the basic String of a just created
+ # Bio::Sequence object to a proper bioruby object
+ # ---
+ # *Arguments*:
+ # * (required) _str_: String or Bio::Sequence::NA/AA object
+ # *Returns*:: Bio::Sequence object
def initialize(str)
@seq = str
end
! # Pass any unknown method calls to the wrapped sequence object. see
! # http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing
! def method_missing(sym, *args, &block) #:nodoc:
! @seq.send(sym, *args, &block)
end
!
! # The sequence identifier. For example, for a sequence
! # of Genbank origin, this is the accession number.
! attr_accessor :entry_id
!
! # A String with a description of the sequence
! attr_accessor :definition
!
! # An Array of Bio::Feature objects
! attr_accessor :features
!
! # An Array of Bio::Reference objects
! attr_accessor :references
!
! # A comment String
! attr_accessor :comments
!
! # Date from sequence source. Often date of deposition.
! attr_accessor :date
!
! # An Array of Strings
! attr_accessor :keywords
!
! # An Array of Strings; links to other database entries.
! attr_accessor :dblinks
!
! # A taxonomy String
! attr_accessor :taxonomy
!
! # Bio::Sequence::NA/AA
! attr_accessor :moltype
!
! # The sequence object, usually Bio::Sequence::NA/AA,
! # but could be a simple String
! attr_accessor :seq
!
! # Using Bio::Sequence::Format, return a String with the Bio::Sequence
! # object formatted in the given style.
! #
! # Formats currently implemented are: 'fasta', 'genbank', and 'embl'
! #
! # s = Bio::Sequence.new('atgc')
! # puts s.output(:fasta) #=> "> \natgc\n"
! #
! # The style argument is given as a Ruby
! # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html)
! # ---
! # *Arguments*:
! # * (required) _style_: :fasta, :genbank, *or* :embl
! # *Returns*:: String object
def output(style)
extend Bio::Sequence::Format
***************
*** 48,51 ****
--- 162,175 ----
end
+ # Guess the type of sequence, Amino Acid or Nucleic Acid, and create a
+ # new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis
+ # of this guess. This method will change the current Bio::Sequence object.
+ #
+ # s = Bio::Sequence.new('atgc')
+ # puts s.seq.class #=> String
+ # s.auto
+ # puts s.seq.class #=> Bio::Sequence::NA
+ # ---
+ # *Returns*:: Bio::Sequence::NA/AA object
def auto
@moltype = guess
***************
*** 57,60 ****
--- 181,194 ----
end
+ # Given a sequence String, guess its type, Amino Acid or Nucleic Acid, and
+ # return a new Bio::Sequence object wrapping a sequence of the guessed type
+ # (either Bio::Sequence::AA or Bio::Sequence::NA)
+ #
+ # s = Bio::Sequence.auto('atgc')
+ # puts s.seq.class #=> Bio::Sequence::NA
+ # ---
+ # *Arguments*:
+ # * (required) _str_: String *or* Bio::Sequence::NA/AA object
+ # *Returns*:: Bio::Sequence object
def self.auto(str)
seq = self.new(str)
***************
*** 63,74 ****
end
def guess(threshold = 0.9, length = 10000, index = 0)
str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
cmp = str.composition
! bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
! cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
! total = @seq.length - cmp['N'] - cmp['n']
if bases.to_f / total > threshold
--- 197,247 ----
end
+ # Guess the class of the current sequence. Returns the class
+ # (Bio::Sequence::AA or Bio::Sequence::NA) guessed. In general, used by
+ # developers only, but if you know what you are doing, feel free.
+ #
+ # s = Bio::Sequence.new('atgc')
+ # puts s.guess #=> Bio::Sequence::NA
+ #
+ # There are three parameters: `threshold`, `length`, and `index`.
+ #
+ # The `threshold` value (defaults to 0.9) is the frequency of
+ # nucleic acid bases [AGCTUagctu] required in the sequence for this method
+ # to produce a Bio::Sequence::NA "guess". In the default case, if less
+ # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
+ # then the guess is Bio::Sequence::AA.
+ #
+ # s = Bio::Sequence.new('atgcatgcqq')
+ # puts s.guess #=> Bio::Sequence::AA
+ # puts s.guess(0.8) #=> Bio::Sequence::AA
+ # puts s.guess(0.7) #=> Bio::Sequence::NA
+ #
+ # The `length` value is how much of the total sequence to use in the
+ # guess (default 10000). If your sequence is very long, you may
+ # want to use a smaller amount to reduce the computational burden.
+ #
+ # s = Bio::Sequence.new(A VERY LONG SEQUENCE)
+ # puts s.guess(0.9, 1000) # limit the guess to the first 1000 positions
+ #
+ # The `index` value is where to start the guess. Perhaps you know there
+ # are a lot of gaps at the start...
+ #
+ # s = Bio::Sequence.new('-----atgcc')
+ # puts s.guess #=> Bio::Sequence::AA
+ # puts s.guess(0.9,10000,5) #=> Bio::Sequence::NA
+ # ---
+ # *Arguments*:
+ # * (optional) _threshold_: Float in range 0,1 (default 0.9)
+ # * (optional) _length_: Fixnum (default 10000)
+ # * (optional) _index_: Fixnum (default 1)
+ # *Returns*:: Bio::Sequence::NA/AA
def guess(threshold = 0.9, length = 10000, index = 0)
str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
cmp = str.composition
! bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] +
! cmp['a'] + cmp['t'] + cmp['g'] + cmp['c'] + cmp['u']
! total = str.length - cmp['N'] - cmp['n']
if bases.to_f / total > threshold
***************
*** 79,86 ****
--- 252,312 ----
end
+ # Guess the class of a given sequence. Returns the class
+ # (Bio::Sequence::AA or Bio::Sequence::NA) guessed. In general, used by
+ # developers only, but if you know what you are doing, feel free.
+ #
+ # puts .guess('atgc') #=> Bio::Sequence::NA
+ #
+ # There are three optional parameters: `threshold`, `length`, and `index`.
+ #
+ # The `threshold` value (defaults to 0.9) is the frequency of
+ # nucleic acid bases [AGCTUagctu] required in the sequence for this method
+ # to produce a Bio::Sequence::NA "guess". In the default case, if less
+ # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
+ # then the guess is Bio::Sequence::AA.
+ #
+ # puts Bio::Sequence.guess('atgcatgcqq') #=> Bio::Sequence::AA
+ # puts Bio::Sequence.guess('atgcatgcqq', 0.8) #=> Bio::Sequence::AA
+ # puts Bio::Sequence.guess('atgcatgcqq', 0.7) #=> Bio::Sequence::NA
+ #
+ # The `length` value is how much of the total sequence to use in the
+ # guess (default 10000). If your sequence is very long, you may
+ # want to use a smaller amount to reduce the computational burden.
+ #
+ # # limit the guess to the first 1000 positions
+ # puts Bio::Sequence.guess('A VERY LONG SEQUENCE', 0.9, 1000)
+ #
+ # The `index` value is where to start the guess. Perhaps you know there
+ # are a lot of gaps at the start...
+ #
+ # puts Bio::Sequence.guess('-----atgcc') #=> Bio::Sequence::AA
+ # puts Bio::Sequence.guess('-----atgcc',0.9,10000,5) #=> Bio::Sequence::NA
+ # ---
+ # *Arguments*:
+ # * (required) _str_: String *or* Bio::Sequence::NA/AA object
+ # * (optional) _threshold_: Float in range 0,1 (default 0.9)
+ # * (optional) _length_: Fixnum (default 10000)
+ # * (optional) _index_: Fixnum (default 1)
+ # *Returns*:: Bio::Sequence::NA/AA
def self.guess(str, *args)
self.new(str).guess(*args)
end
+ # Transform the sequence wrapped in the current Bio::Sequence object
+ # into a Bio::Sequence::NA object. This method will change the current
+ # object. This method does not validate your choice, so be careful!
+ #
+ # s = Bio::Sequence.new('RRLE')
+ # puts s.seq.class #=> String
+ # s.na
+ # puts s.seq.class #=> Bio::Sequence::NA !!!
+ #
+ # However, if you know your sequence type, this method may be
+ # constructively used after initialization,
+ #
+ # s = Bio::Sequence.new('atgc')
+ # s.na
+ # ---
+ # *Returns*:: Bio::Sequence::NA
def na
@seq = NA.new(@seq)
***************
*** 88,96 ****
end
def aa
@seq = AA.new(@seq)
@moltype = AA
end
!
end # Sequence
--- 314,338 ----
end
+ # Transform the sequence wrapped in the current Bio::Sequence object
+ # into a Bio::Sequence::NA object. This method will change the current
+ # object. This method does not validate your choice, so be careful!
+ #
+ # s = Bio::Sequence.new('atgc')
+ # puts s.seq.class #=> String
+ # s.aa
+ # puts s.seq.class #=> Bio::Sequence::AA !!!
+ #
+ # However, if you know your sequence type, this method may be
+ # constructively used after initialization,
+ #
+ # s = Bio::Sequence.new('RRLE')
+ # s.aa
+ # ---
+ # *Returns*:: Bio::Sequence::AA
def aa
@seq = AA.new(@seq)
@moltype = AA
end
!
end # Sequence
From k at dev.open-bio.org Sat Mar 25 21:32:58 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:32:58 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio reference.rb,1.21,1.22
Message-ID: <200603260232.k2Q2Ww61028892@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv28888
Modified Files:
reference.rb
Log Message:
* comprehensive documentation contributed by Ryan Raaum is added
Index: reference.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/reference.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** reference.rb 8 Feb 2006 15:06:26 -0000 1.21
--- reference.rb 26 Mar 2006 02:32:56 -0000 1.22
***************
*** 2,45 ****
# = bio/reference.rb - Journal reference classes
#
! # Copyright:: Copyright (C) 2001
! # KATAYAMA Toshiaki
! # Lisence:: LGPL
#
# $Id$
#
- # == Description
- #
- # Journal reference classes.
- #
- # == Examples
- #
- # == References
- #
- #
- #
- #--
- #
- # This library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Lesser General Public
- # License as published by the Free Software Foundation; either
- # version 2 of the License, or (at your option) any later version.
- #
- # This library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Lesser General Public License for more details.
- #
- # You should have received a copy of the GNU Lesser General Public
- # License along with this library; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- #
- #++
- #
module Bio
# A class for journal reference information.
#
! # === Examples
#
# hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
--- 2,20 ----
# = bio/reference.rb - Journal reference classes
#
! # Copyright:: Copyright (C) 2001, 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
! # Lisence:: Ruby's
#
# $Id$
#
module Bio
+ # = DESCRIPTION
+ #
# A class for journal reference information.
#
! # = USAGE
#
# hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
***************
*** 69,100 ****
attr_reader :authors
! # "Title of the study."
attr_reader :title
! # "Theor. J. Hoge"
attr_reader :journal
! # 12
attr_reader :volume
! # 3
attr_reader :issue
! # "123-145"
attr_reader :pages
! # 2001
attr_reader :year
! # 12345678
attr_reader :pubmed
! # 98765432
attr_reader :medline
! # Abstract test in String.
attr_reader :abstract
! # A URL String.
attr_reader :url
--- 44,75 ----
attr_reader :authors
! # String with title of the study
attr_reader :title
! # String with journal name
attr_reader :journal
! # volume number (typically Fixnum)
attr_reader :volume
! # issue number (typically Fixnum)
attr_reader :issue
! # page range (typically String, e.g. "123-145")
attr_reader :pages
! # year of publication (typically Fixnum)
attr_reader :year
! # pubmed identifier (typically Fixnum)
attr_reader :pubmed
! # medline identifier (typically Fixnum)
attr_reader :medline
! # Abstract text in String.
attr_reader :abstract
! # An URL String.
attr_reader :url
***************
*** 105,109 ****
attr_reader :affiliations
! #
def initialize(hash)
hash.default = ''
--- 80,119 ----
attr_reader :affiliations
! # Create a new Bio::Reference object from a Hash of values.
! # Data is extracted from the values for keys:
! #
! # * authors - expected value: Array of Strings
! # * title - expected value: String
! # * journal - expected value: String
! # * volume - expected value: Fixnum or String
! # * issue - expected value: Fixnum or String
! # * pages - expected value: String
! # * year - expected value: Fixnum or String
! # * pubmed - expected value: Fixnum or String
! # * medline - expected value: Fixnum or String
! # * abstract - expected value: String
! # * url - expected value: String
! # * mesh - expected value: Array of Strings
! # * affiliations - expected value: Array of Strings
! #
! #
! # hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
! # 'title' => "Title of the study.",
! # 'journal' => "Theor. J. Hoge",
! # 'volume' => 12,
! # 'issue' => 3,
! # 'pages' => "123-145",
! # 'year' => 2001,
! # 'pubmed' => 12345678,
! # 'medline' => 98765432,
! # 'abstract' => "Hoge fuga. ...",
! # 'url' => "http://example.com",
! # 'mesh' => [],
! # 'affiliations' => []}
! # ref = Bio::Reference.new(hash)
! # ---
! # *Arguments*:
! # * (required) _hash_: Hash
! # *Returns*:: Bio::Reference object
def initialize(hash)
hash.default = ''
***************
*** 131,138 ****
# 0. nil - general
# 1. endnote - Endnote
! # 2. bibitem - Bibitem (option acceptable)
! # 3. bibtex - BiBTeX (option acceptable)
! # 4. rd - rd (option acceptable)
! # 5. nature - Nature (option acceptable)
# 6. science - Science
# 7. genome_biol - Genome Biology
--- 141,148 ----
# 0. nil - general
# 1. endnote - Endnote
! # 2. bibitem - Bibitem (option available)
! # 3. bibtex - BiBTeX (option available)
! # 4. rd - rd (option available)
! # 5. nature - Nature (option available)
# 6. science - Science
# 7. genome_biol - Genome Biology
***************
*** 142,145 ****
--- 152,172 ----
# 11. trends - Trends in *
# 12. cell - Cell Press
+ #
+ # See individual methods for details. Basic usage is:
+ #
+ # # ref is Bio::Reference object
+ # # using simplest possible call (for general style)
+ # puts ref.format
+ #
+ # # output in Nature style
+ # puts ref.format("nature") # alternatively, puts ref.nature
+ #
+ # # output in Nature short style (see Bio::Reference#nature)
+ # puts ref.format("nature",true) # alternatively, puts ref.nature(true)
+ # ---
+ # *Arguments*:
+ # * (optional) _style_: String with style identifier
+ # * (optional) _option_: Option for styles accepting one
+ # *Returns*:: String
def format(style = nil, option = nil)
case style
***************
*** 173,177 ****
end
! # Formats in the Endonote style.
def endnote
lines = []
--- 200,222 ----
end
! # Returns reference formatted in the Endnote style.
! #
! # # ref is a Bio::Reference object
! # puts ref.endnote
! #
! # %0 Journal Article
! # %A Hoge, J.P.
! # %A Fuga, F.B.
! # %D 2001
! # %T Title of the study.
! # %J Theor. J. Hoge
! # %V 12
! # %N 3
! # %P 123-145
! # %M 12345678
! # %U http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&dopt=Citation&list_uids=12345678
! # %X Hoge fuga. ...
! # ---
! # *Returns*:: String
def endnote
lines = []
***************
*** 201,205 ****
end
! # Formats in the bibitem.
def bibitem(item = nil)
item = "PMID:#{@pubmed}" unless item
--- 246,260 ----
end
! # Returns reference formatted in the bibitem style
! #
! # # ref is a Bio::Reference object
! # puts ref.bibitem
! #
! # \bibitem{PMID:12345678}
! # Hoge, J.P., Fuga, F.B.
! # Title of the study.,
! # {\em Theor. J. Hoge}, 12(3):123--145, 2001.
! # ---
! # *Returns*:: String
def bibitem(item = nil)
item = "PMID:#{@pubmed}" unless item
***************
*** 213,217 ****
end
! # Formats in the BiBTeX style.
def bibtex(section = nil)
section = "article" unless section
--- 268,303 ----
end
! # Returns reference formatted in the BiBTeX style.
! #
! # # ref is a Bio::Reference object
! # puts ref.bibtex
! #
! # @article{PMID:12345678,
! # author = {Hoge, J.P. and Fuga, F.B.},
! # title = {Title of the study.},
! # journal = {Theor. J. Hoge},
! # year = {2001},
! # volume = {12},
! # number = {3},
! # pages = {123--145},
! # }
! #
! # # using a different section (e.g. "book")
! # # (but not really configured for anything other than articles)
! # puts ref.bibtex("book")
! #
! # @book{PMID:12345678,
! # author = {Hoge, J.P. and Fuga, F.B.},
! # title = {Title of the study.},
! # journal = {Theor. J. Hoge},
! # year = {2001},
! # volume = {12},
! # number = {3},
! # pages = {123--145},
! # }
! # ---
! # *Arguments*:
! # * (optional) _section_: BiBTeX section as String
! # *Returns*:: String
def bibtex(section = nil)
section = "article" unless section
***************
*** 231,235 ****
end
! # Formats in a general style.
def general
authors = @authors.join(', ')
--- 317,328 ----
end
! # Returns reference formatted in a general/generic style.
! #
! # # ref is a Bio::Reference object
! # puts ref.general
! #
! # Hoge, J.P., Fuga, F.B. (2001). "Title of the study." Theor. J. Hoge 12:123-145.
! # ---
! # *Returns*:: String
def general
authors = @authors.join(', ')
***************
*** 237,241 ****
end
! # Formats in the RD style.
def rd(str = nil)
@abstract ||= str
--- 330,351 ----
end
! # Return reference formatted in the RD style.
! #
! # # ref is a Bio::Reference object
! # puts ref.rd
! #
! # == Title of the study.
! #
! # * Hoge, J.P. and Fuga, F.B.
! #
! # * Theor. J. Hoge 2001 12:123-145 [PMID:12345678]
! #
! # Hoge fuga. ...
! #
! # An optional string argument can be supplied, but does nothing.
! # ---
! # *Arguments*:
! # * (optional) str: String (default nil)
! # *Returns*:: String
def rd(str = nil)
@abstract ||= str
***************
*** 248,253 ****
end
! # Formats in the Nature Publish Group style.
! # * http://www.nature.com
def nature(short = false)
if short
--- 358,377 ----
end
! # Formats in the Nature Publishing Group
! # (http://www.nature.com) style.
! #
! # # ref is a Bio::Reference object
! # puts ref.nature
! #
! # Hoge, J.P. & Fuga, F.B. Title of the study. Theor. J. Hoge 12, 123-145 (2001).
! #
! # # optionally, output short version
! # puts ref.nature(true) # or puts ref.nature(short=true)
! #
! # Hoge, J.P. & Fuga, F.B. Theor. J. Hoge 12, 123-145 (2001).
! # ---
! # *Arguments*:
! # * (optional) _short_: Boolean (default false)
! # *Returns*:: String
def nature(short = false)
if short
***************
*** 266,271 ****
end
! # Formats in the Science style.
! # * http://www.siencemag.com/
def science
if @authors.size > 4
--- 390,402 ----
end
! # Returns reference formatted in the
! # Science[http://www.sciencemag.org] style.
! #
! # # ref is a Bio::Reference object
! # puts ref.science
! #
! # J.P. Hoge, F.B. Fuga, Theor. J. Hoge 12 123 (2001).
! # ---
! # *Returns*:: String
def science
if @authors.size > 4
***************
*** 278,283 ****
end
! # Formats in the Genome Biology style.
! # * http://genomebiology.com/
def genome_biol
authors = @authors.collect {|name| strip_dots(name)}.join(', ')
--- 409,421 ----
end
! # Returns reference formatted in the Genome Biology
! # (http://genomebiology.com) style.
! #
! # # ref is a Bio::Reference object
! # puts ref.genome_biol
! #
! # Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.
! # ---
! # *Returns*:: String
def genome_biol
authors = @authors.collect {|name| strip_dots(name)}.join(', ')
***************
*** 285,294 ****
"#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
end
! # Formats in the Current Biology style.
! # * http://www.current-biology.com/
! alias current genome_biol
! # Formats in the Genome Research style.
! # * http://genome.org/
def genome_res
authors = authors_join(' and ')
--- 423,450 ----
"#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
end
!
! # Returns reference formatted in the Current Biology
! # (http://current-biology.com) style. (Same as the Genome Biology style)
! #
! # # ref is a Bio::Reference object
! # puts ref.current
! #
! # Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.
! # ---
! # *Returns*:: String
! def current
! self.genome_biol
! end
! # Returns reference formatted in the Genome Research
! # (http://genome.org) style.
! #
! # # ref is a Bio::Reference object
! # puts ref.genome_res
! #
! # Hoge, J.P. and Fuga, F.B. 2001.
! # Title of the study. Theor. J. Hoge 12: 123-145.
! # ---
! # *Returns*:: String
def genome_res
authors = authors_join(' and ')
***************
*** 296,301 ****
end
! # Formats in the Nucleic Acids Reseach style.
! # * http://nar.oxfordjournals.org/
def nar
authors = authors_join(' and ')
--- 452,464 ----
end
! # Returns reference formatted in the Nucleic Acids Reseach
! # (http://nar.oxfordjournals.org) style.
! #
! # # ref is a Bio::Reference object
! # puts ref.nar
! #
! # Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge, 12, 123-145.
! # ---
! # *Returns*:: String
def nar
authors = authors_join(' and ')
***************
*** 303,308 ****
end
! # Formats in the CELL Press style.
! # http://www.cell.com/
def cell
authors = authors_join(' and ')
--- 466,478 ----
end
! # Returns reference formatted in the
! # CELL[http://www.cell.com] Press style.
! #
! # # ref is a Bio::Reference object
! # puts ref.cell
! #
! # Hoge, J.P. and Fuga, F.B. (2001). Title of the study. Theor. J. Hoge 12, 123-145.
! # ---
! # *Returns*:: String
def cell
authors = authors_join(' and ')
***************
*** 310,315 ****
end
! # Formats in the TRENDS Journals.
! # * http://www.trends.com/
def trends
if @authors.size > 2
--- 480,492 ----
end
! # Returns reference formatted in the
! # TRENDS[http://www.trends.com] style.
! #
! # # ref is a Bio::Reference object
! # puts ref.trends
! #
! # Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge 12, 123-145
! # ---
! # *Returns*:: String
def trends
if @authors.size > 2
***************
*** 352,358 ****
end
! # Set of Bio::Reference.
#
! # === Examples
#
# refs = Bio::References.new
--- 529,537 ----
end
! # = DESCRIPTION
#
! # A container class for Bio::Reference objects.
! #
! # = USAGE
#
# refs = Bio::References.new
***************
*** 364,371 ****
class References
! # Array of Bio::Reference.
attr_accessor :references
#
def initialize(ary = [])
@references = ary
--- 543,556 ----
class References
! # Array of Bio::Reference objects
attr_accessor :references
+ # Create a new Bio::References object
#
+ # refs = Bio::References.new
+ # ---
+ # *Arguments*:
+ # * (optional) __: Array of Bio::Reference objects
+ # *Returns*:: Bio::References object
def initialize(ary = [])
@references = ary
***************
*** 373,377 ****
! # Append a Bio::Reference object.
def append(reference)
@references.push(reference) if reference.is_a? Reference
--- 558,568 ----
! # Add a Bio::Reference object to the container.
! #
! # refs.append(reference)
! # ---
! # *Arguments*:
! # * (required) _reference_: Bio::Reference object
! # *Returns*:: current Bio::References object
def append(reference)
@references.push(reference) if reference.is_a? Reference
***************
*** 379,383 ****
end
! # Iterates each Bio::Reference object.
def each
@references.each do |reference|
--- 570,580 ----
end
! # Iterate through Bio::Reference objects.
! #
! # refs.each do |reference|
! # ...
! # end
! # ---
! # *Block*:: yields each Bio::Reference object
def each
@references.each do |reference|
From k at dev.open-bio.org Sat Mar 25 21:28:01 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:28:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/sequence aa.rb, 1.2, 1.3 common.rb,
1.2, 1.3 compat.rb, 1.2, 1.3 format.rb, 1.2, 1.3 generic.rb,
1.3, 1.4 na.rb, 1.2, 1.3
Message-ID: <200603260228.k2Q2S12v028863@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/sequence
In directory dev.open-bio.org:/tmp/cvs-serv28853/sequence
Modified Files:
aa.rb common.rb compat.rb format.rb generic.rb na.rb
Log Message:
* comprehensive documentations contributed by Ryan Raaum and Jan Aerts are added.
* bug fixes in sequence.rb contributed by Ryan Raaum
* Added 'U' and 'u' to the bases counted towards the nucleic acid total in Bio::Sequence#guess. (Without this, RNA sequences were "guessed" to be Amino Acid sequences).
* Changed the arguments for method_missing in Bio::Sequence from (*arg) to (sym, *args, &block). With this argument set, blocks will be properly passed through to the encapsulated object.
Index: compat.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/compat.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** compat.rb 6 Feb 2006 14:18:03 -0000 1.2
--- compat.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 3,7 ****
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama
# License:: Ruby's
#
--- 3,8 ----
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 18,21 ****
--- 19,33 ----
autoload :AA, 'bio/sequence/aa'
+ # Return sequence as
+ # String[http://corelib.rubyonrails.org/classes/String.html].
+ # The original sequence is unchanged.
+ #
+ # seq = Bio::Sequence.new('atgc')
+ # puts s.to_s #=> 'atgc'
+ # puts s.to_s.class #=> String
+ # puts s #=> 'atgc'
+ # puts s.class #=> Bio::Sequence
+ # ---
+ # *Returns*:: String object
def to_s
String.new(@seq)
***************
*** 26,32 ****
--- 38,51 ----
module Common
+ # *DEPRECIATED* Do not use! Use Bio::Sequence#output instead.
+ #
# Output the FASTA format string of the sequence. The 1st argument is
# used as the comment string. If the 2nd option is given, the output
# sequence will be folded.
+ # ---
+ # *Arguments*:
+ # * (optional) _header_: String object
+ # * (optional) _width_: Fixnum object (default nil)
+ # *Returns*:: String
def to_fasta(header = '', width = nil)
warn "Bio::Sequence#to_fasta is obsolete. Use Bio::Sequence#output(:fasta) instead" if $DEBUG
***************
*** 44,52 ****
class NA
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
end
! def pikachu
self.dna.tr("atgc", "pika") # joke, of course :-)
end
--- 63,89 ----
class NA
+ # Generate a new random sequence with the given frequency of bases.
+ # The sequence length is determined by their cumulative sum.
+ # (See also Bio::Sequence::Common#randomize which creates a new
+ # randomized sequence object using the base composition of an existing
+ # sequence instance).
+ #
+ # counts = {'a'=>1,'c'=>2,'g'=>3,'t'=>4}
+ # puts Bio::Sequence::NA.randomize(counts) #=> "ggcttgttac" (for example)
+ #
+ # You may also feed the output of randomize into a block
+ #
+ # actual_counts = {'a'=>0, 'c'=>0, 'g'=>0, 't'=>0}
+ # Bio::Sequence::NA.randomize(counts) {|x| actual_counts[x] += 1}
+ # actual_counts #=> {"a"=>1, "c"=>2, "g"=>3, "t"=>4}
+ # ---
+ # *Arguments*:
+ # * (optional) _hash_: Hash object
+ # *Returns*:: Bio::Sequence::NA object
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
end
! def pikachu #:nodoc:
self.dna.tr("atgc", "pika") # joke, of course :-)
end
***************
*** 57,60 ****
--- 94,115 ----
class AA
+ # Generate a new random sequence with the given frequency of bases.
+ # The sequence length is determined by their cumulative sum.
+ # (See also Bio::Sequence::Common#randomize which creates a new
+ # randomized sequence object using the base composition of an existing
+ # sequence instance).
+ #
+ # counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
+ # puts Bio::Sequence::AA.randomize(counts) #=> "AAEAELALRE" (for example)
+ #
+ # You may also feed the output of randomize into a block
+ #
+ # actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
+ # Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
+ # actual_counts #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
+ # ---
+ # *Arguments*:
+ # * (optional) _hash_: Hash object
+ # *Returns*:: Bio::Sequence::AA object
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/common.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** common.rb 6 Feb 2006 14:16:17 -0000 1.2
--- common.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 3,7 ****
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama
# License:: Ruby's
#
--- 3,8 ----
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 15,22 ****
class Sequence
! # This module provides common methods for biological sequence classes
! # which must inherit String.
module Common
def to_s
String.new(self)
--- 16,53 ----
class Sequence
! # = DESCRIPTION
! # Bio::Sequence::Common is a
! # Mixin[http://www.rubycentral.com/book/tut_modules.html]
! # implementing methods common to
! # Bio::Sequence::AA and Bio::Sequence::NA. All of these methods
! # are available to either Amino Acid or Nucleic Acid sequences, and
! # by encapsulation are also available to Bio::Sequence objects.
! #
! # = USAGE
! #
! # # Create a sequence
! # dna = Bio::Sequence.auto('atgcatgcatgc')
! #
! # # Splice out a subsequence using a Genbank-style location string
! # puts dna.splice('complement(1..4)')
! #
! # # What is the base composition?
! # puts dna.composition
! #
! # # Create a random sequence with the composition of a current sequence
! # puts dna.randomize
module Common
+ # Return sequence as
+ # String[http://corelib.rubyonrails.org/classes/String.html].
+ # The original sequence is unchanged.
+ #
+ # seq = Bio::Sequence::NA.new('atgc')
+ # puts s.to_s #=> 'atgc'
+ # puts s.to_s.class #=> String
+ # puts s #=> 'atgc'
+ # puts s.class #=> Bio::Sequence::NA
+ # ---
+ # *Returns*:: String object
def to_s
String.new(self)
***************
*** 24,34 ****
alias to_str to_s
! # Force self to re-initialize for clean up (remove white spaces,
! # case unification).
def seq
self.class.new(self)
end
! # Similar to the 'seq' method, but changes the self object destructively.
def normalize!
initialize(self)
--- 55,79 ----
alias to_str to_s
! # Create a new sequence based on the current sequence.
! # The original sequence is unchanged.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # s2 = s.seq
! # puts s2 #=> 'atgc'
! # ---
! # *Returns*:: new Bio::Sequence::NA/AA object
def seq
self.class.new(self)
end
! # Normalize the current sequence, removing all whitespace and
! # transforming all positions to uppercase if the sequence is AA or
! # transforming all positions to lowercase if the sequence is NA.
! # The original sequence is modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # s.normalize!
! # ---
! # *Returns*:: current Bio::Sequence::NA/AA object (modified)
def normalize!
initialize(self)
***************
*** 37,40 ****
--- 82,95 ----
alias seq! normalize!
+ # Add new data to the end of the current sequence.
+ # The original sequence is modified.
+ #
+ # s = Bio::Sequence::NA.new('atgc')
+ # s << 'atgc'
+ # puts s #=> "atgcatgc"
+ # s << s
+ # puts s #=> "atgcatgcatgcatgc"
+ # ---
+ # *Returns*:: current Bio::Sequence::NA/AA object (modified)
def <<(*arg)
super(self.class.new(*arg))
***************
*** 42,50 ****
alias concat <<
def +(*arg)
self.class.new(super(*arg))
end
! # Returns the subsequence of the self string.
def subseq(s = 1, e = self.length)
raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
--- 97,141 ----
alias concat <<
+ # Create a new sequence by adding to an existing sequence.
+ # The existing sequence is not modified.
+ #
+ # s = Bio::Sequence::NA.new('atgc')
+ # s2 = s + 'atgc'
+ # puts s2 #=> "atgcatgc"
+ # puts s #=> "atgc"
+ #
+ # The new sequence is of the same class as the existing sequence if
+ # the new data was added to an existing sequence,
+ #
+ # puts s2.class == s.class #=> true
+ #
+ # but if an existing sequence is added to a String, the result is a String
+ #
+ # s3 = 'atgc' + s
+ # puts s3.class #=> String
+ # ---
+ # *Returns*:: new Bio::Sequence::NA/AA *or* String object
def +(*arg)
self.class.new(super(*arg))
end
! # Returns a new sequence containing the subsequence identified by the
! # start and end numbers given as parameters. *Important:* Biological
! # sequence numbering conventions (one-based) rather than ruby's
! # (zero-based) numbering conventions are used.
! #
! # s = Bio::Sequence::NA.new('atggaatga')
! # puts s.subseq(1,3) #=> "atg"
! #
! # Start defaults to 1 and end defaults to the entire existing string, so
! # subseq called without any parameters simply returns a new sequence
! # identical to the existing sequence.
! #
! # puts s.subseq #=> "atggaatga"
! # ---
! # *Arguments*:
! # * (optional) _s_(start): Integer (default 1)
! # * (optional) _e_(end): Integer (default current sequence length)
! # *Returns*:: new Bio::Sequence::NA/AA object
def subseq(s = 1, e = self.length)
raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
***************
*** 54,80 ****
end
! # This method iterates on sub string with specified length 'window_size'.
! # By specifing 'step_size', codon sized shifting or spliting genome
! # sequence with ovelapping each end can easily be yielded.
#
! # The remainder sequence at the terminal end will be returned.
#
! # Example:
! # # prints average GC% on each 100bp
! # seq.window_search(100) do |subseq|
# puts subseq.gc
# end
! # # prints every translated peptide (length 5aa) in the same frame
! # seq.window_search(15, 3) do |subseq|
# puts subseq.translate
# end
! # # split genome sequence by 10000bp with 1000bp overlap in fasta format
# i = 1
! # remainder = seq.window_search(10000, 9000) do |subseq|
# puts subseq.to_fasta("segment #{i}", 60)
# i += 1
# end
# puts remainder.to_fasta("segment #{i}", 60)
! #
def window_search(window_size, step_size = 1)
i = 0
--- 145,177 ----
end
! # This method steps through a sequences in steps of 'step_size' by
! # subsequences of 'window_size'. Typically used with a block.
! # Any remaining sequence at the terminal end will be returned.
#
! # Prints average GC% on each 100bp
#
! # s.window_search(100) do |subseq|
# puts subseq.gc
# end
! #
! # Prints every translated peptide (length 5aa) in the same frame
! #
! # s.window_search(15, 3) do |subseq|
# puts subseq.translate
# end
! #
! # Split genome sequence by 10000bp with 1000bp overlap in fasta format
! #
# i = 1
! # remainder = s.window_search(10000, 9000) do |subseq|
# puts subseq.to_fasta("segment #{i}", 60)
# i += 1
# end
# puts remainder.to_fasta("segment #{i}", 60)
! # ---
! # *Arguments*:
! # * (required) _window_size_: Fixnum
! # * (optional) _step_size_: Fixnum (default 1)
! # *Returns*:: new Bio::Sequence::NA/AA object
def window_search(window_size, step_size = 1)
i = 0
***************
*** 85,91 ****
end
! # This method receive a hash of residues/bases to the particular values,
! # and sum up the value along with the self sequence. Especially useful
! # to use with the window_search method and amino acid indices etc.
def total(hash)
hash.default = 0.0 unless hash.default
--- 182,195 ----
end
! # Returns a float total value for the sequence given a hash of
! # base or residue values,
! #
! # values = {'a' => 0.1, 't' => 0.2, 'g' => 0.3, 'c' => 0.4}
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.total(values) #=> 1.0
! # ---
! # *Arguments*:
! # * (required) _hash_: Hash object
! # *Returns*:: Float object
def total(hash)
hash.default = 0.0 unless hash.default
***************
*** 100,103 ****
--- 204,212 ----
# Returns a hash of the occurrence counts for each residue or base.
+ #
+ # s = Bio::Sequence::NA.new('atgc')
+ # puts s.composition #=> {"a"=>1, "c"=>1, "g"=>1, "t"=>1}
+ # ---
+ # *Returns*:: Hash object
def composition
count = Hash.new(0)
***************
*** 108,118 ****
end
! # Returns a randomized sequence keeping its composition by default.
! # The argument is required when generating a random sequence from the empty
! # sequence (used by the class methods NA.randomize, AA.randomize).
! # If the block is given, yields for each random residue/base.
def randomize(hash = nil)
length = self.length
if hash
count = hash.clone
count.each_value {|x| length += x}
--- 217,244 ----
end
! # Returns a randomized sequence. The default is to retain the same
! # base/residue composition as the original. If a hash of base/residue
! # counts is given, the new sequence will be based on that hash
! # composition. If a block is given, each new randomly selected
! # position will be passed into the block. In all cases, the
! # original sequence is not modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.randomize #=> "tcag" (for example)
! #
! # new_composition = {'a' => 2, 't' => 2}
! # puts s.randomize(new_composition) #=> "ttaa" (for example)
! #
! # count = 0
! # s.randomize { |x| count += 1 }
! # puts count #=> 4
! # ---
! # *Arguments*:
! # * (optional) _hash_: Hash object
! # *Returns*:: new Bio::Sequence::NA/AA object
def randomize(hash = nil)
length = self.length
if hash
+ length = 0
count = hash.clone
count.each_value {|x| length += x}
***************
*** 139,151 ****
end
! # Generate a new random sequence with the given frequency of bases
! # or residues. The sequence length is determined by the sum of each
! # base/residue occurences.
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
end
! # Receive a GenBank style position string and convert it to the Locations
! # objects to splice the sequence itself. See also: bio/location.rb
def splice(position)
unless position.is_a?(Locations) then
--- 265,305 ----
end
! # Generate a new random sequence with the given frequency of bases.
! # The sequence length is determined by their cumulative sum.
! # (See also Bio::Sequence::Common#randomize which creates a new
! # randomized sequence object using the base composition of an existing
! # sequence instance).
! #
! # counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
! # puts Bio::Sequence::AA.randomize(counts) #=> "AAEAELALRE" (for example)
! #
! # You may also feed the output of randomize into a block
! #
! # actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
! # Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
! # actual_counts #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
! # ---
! # *Arguments*:
! # * (optional) _hash_: Hash object
! # *Returns*:: Bio::Sequence::NA/AA object
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
end
! # Return a new sequence extracted from the original using a GenBank style
! # position string. See also documentation for the Bio::Location class.
! #
! # s = Bio::Sequence::NA.new('atgcatgcatgcatgc')
! # puts s.splice('1..3') #=> "atg"
! # puts s.splice('join(1..3,8..10)') #=> "atgcat"
! # puts s.splice('complement(1..3)') #=> "cat"
! # puts s.splice('complement(join(1..3,8..10))') #=> "atgcat"
! #
! # Note that 'complement'ed Genbank position strings will have no
! # effect on Bio::Sequence::AA objects.
! # ---
! # *Arguments*:
! # * (required) _position_: String *or* Bio::Location object
! # *Returns*:: Bio::Sequence::NA/AA object
def splice(position)
unless position.is_a?(Locations) then
Index: format.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/format.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** format.rb 6 Feb 2006 14:20:35 -0000 1.2
--- format.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 4,8 ****
# Copyright:: Copyright (C) 2006
# Toshiaki Katayama ,
! # Naohisa Goto
# License:: Ruby's
#
--- 4,9 ----
# Copyright:: Copyright (C) 2006
# Toshiaki Katayama ,
! # Naohisa Goto ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 21,29 ****
class Sequence
module Format
! # Output the FASTA format string of the sequence. The 1st argument is
! # used in the comment line. If the 2nd argument (integer) is given,
! # the output sequence will be folded.
def format_fasta(header = nil, width = nil)
header ||= "#{@entry_id} #{@definition}"
--- 22,56 ----
class Sequence
+ # = DESCRIPTION
+ # A Mixin[http://www.rubycentral.com/book/tut_modules.html]
+ # of methods used by Bio::Sequence#output to output sequences in
+ # common bioinformatic formats. These are not called in isolation.
+ #
+ # = USAGE
+ # # Given a Bio::Sequence object,
+ # puts s.output(:fasta)
+ # puts s.output(:genbank)
+ # puts s.output(:embl)
module Format
! # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
! # case, it would be difficult to successfully call this method outside
! # its expected context).
! #
! # Output the FASTA format string of the sequence.
! #
! # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of
! # using either the header or width arguments. So something needs to be
! # changed...
! #
! # Currently, this method is used in Bio::Sequence#output like so,
! #
! # s = Bio::Sequence.new('atgc')
! # puts s.output(:fasta) #=> "> \natgc\n"
! # ---
! # *Arguments*:
! # * (optional) _header_: String (default nil)
! # * (optional) _width_: Fixnum (default nil)
! # *Returns*:: String object
def format_fasta(header = nil, width = nil)
header ||= "#{@entry_id} #{@definition}"
***************
*** 37,44 ****
end
! def format_gff
raise NotImplementedError
end
def format_genbank
prefix = ' ' * 5
--- 64,83 ----
end
! # Not yet implemented :)
! # Remove the nodoc command after implementation!
! # ---
! # *Returns*:: String object
! def format_gff #:nodoc:
raise NotImplementedError
end
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+ # case, it would be difficult to successfully call this method outside
+ # its expected context).
+ #
+ # Output the Genbank format string of the sequence.
+ # Used in Bio::Sequence#output.
+ # ---
+ # *Returns*:: String object
def format_genbank
prefix = ' ' * 5
***************
*** 49,52 ****
--- 88,99 ----
end
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+ # case, it would be difficult to successfully call this method outside
+ # its expected context).
+ #
+ # Output the EMBL format string of the sequence.
+ # Used in Bio::Sequence#output.
+ # ---
+ # *Returns*:: String object
def format_embl
prefix = 'FT '
Index: aa.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/aa.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** aa.rb 6 Feb 2006 14:11:31 -0000 1.2
--- aa.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 3,7 ****
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama
# License:: Ruby's
#
--- 3,8 ----
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 17,27 ****
class Sequence
!
! # Amino Acid sequence
class AA < String
include Bio::Sequence::Common
! # Generate a amino acid sequence object from a string.
def initialize(str)
super
--- 18,61 ----
class Sequence
! # = DESCRIPTION
! # Bio::Sequence::AA represents a bare Amino Acid sequence in bioruby.
! #
! # = USAGE
! # # Create an Amino Acid sequence.
! # aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
! #
! # # What are the three-letter codes for all the residues?
! # puts aa.codes
! #
! # # What are the names of all the residues?
! # puts aa.names
! #
! # # What is the molecular weight of this peptide?
! # puts aa.molecular_weight
class AA < String
include Bio::Sequence::Common
! # Generate an amino acid sequence object from a string.
! #
! # s = Bio::Sequence::AA.new("RRLEHTFVFLRNFSLMLLRY")
! #
! # or maybe (if you have an amino acid sequence in a file)
! #
! # s = Bio::Sequence:AA.new(File.open('aa.txt').read)
! #
! # Amino Acid sequences are *always* all uppercase in bioruby
! #
! # s = Bio::Sequence::AA.new("rrLeHtfV")
! # puts s #=> "RRLEHTFVF"
! #
! # Whitespace is stripped from the sequence
! #
! # s = Bio::Sequence::AA.new("RRL\nELA\tRG\r RL")
! # puts s #=> "RRLELARGRL"
! # ---
! # *Arguments*:
! # * (required) _str_: String
! # *Returns*:: Bio::Sequence::AA object
def initialize(str)
super
***************
*** 31,45 ****
! # Estimate the weight of this protein.
def molecular_weight
Bio::AminoAcid.weight(self)
end
def to_re
Bio::AminoAcid.to_re(self)
end
! # Generate the list of the names of the each residue along with the
! # sequence (3 letters code).
def codes
array = []
--- 65,98 ----
! # Estimate molecular weight based on
! # Fasman1976[http://www.genome.ad.jp/dbget-bin/www_bget?aaindex+FASG760101]
! #
! # s = Bio::Sequence::AA.new("RRLE")
! # puts s.molecular_weight #=> 572.655
! # ---
! # *Returns*:: Float object
def molecular_weight
Bio::AminoAcid.weight(self)
end
+ # Create a ruby regular expression instance
+ # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]
+ #
+ # s = Bio::Sequence::AA.new("RRLE")
+ # puts s.to_re #=> /RRLE/
+ # ---
+ # *Returns*:: Regexp object
def to_re
Bio::AminoAcid.to_re(self)
end
! # Generate the list of the names of each residue along with the
! # sequence (3 letters code). Codes used in bioruby are found in the
! # Bio::AminoAcid::NAMES hash.
! #
! # s = Bio::Sequence::AA.new("RRLE")
! # puts s.codes #=> ["Arg", "Arg", "Leu", "Glu"]
! # ---
! # *Returns*:: Array object
def codes
array = []
***************
*** 50,54 ****
end
! # Similar to codes but returns long names.
def names
self.codes.map do |x|
--- 103,115 ----
end
! # Generate the list of the names of each residue along with the
! # sequence (full name). Names used in bioruby are found in the
! # Bio::AminoAcid::NAMES hash.
! #
! # s = Bio::Sequence::AA.new("RRLE")
! # puts s.names
! # #=> ["arginine", "arginine", "leucine", "glutamic acid"]
! # ---
! # *Returns*:: Array object
def names
self.codes.map do |x|
Index: generic.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/generic.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** generic.rb 6 Feb 2006 14:26:04 -0000 1.3
--- generic.rb 26 Mar 2006 02:27:59 -0000 1.4
***************
*** 14,18 ****
class Sequence
! class Generic < String
include Bio::Sequence::Common
--- 14,18 ----
class Sequence
! class Generic < String #:nodoc:
include Bio::Sequence::Common
Index: na.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/na.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** na.rb 6 Feb 2006 14:13:52 -0000 1.2
--- na.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 3,7 ****
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama
# License:: Ruby's
#
--- 3,8 ----
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 19,28 ****
! # Nucleic Acid sequence
class NA < String
include Bio::Sequence::Common
! # Generate a nucleic acid sequence object from a string.
def initialize(str)
super
--- 20,78 ----
! # = DESCRIPTION
! # Bio::Sequence::NA represents a bare Nucleic Acid sequence in bioruby.
! #
! # = USAGE
! # # Create a Nucleic Acid sequence.
! # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
! # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
! #
! # # What are the names of all the bases?
! # puts dna.names
! # puts rna.names
! #
! # # What is the GC percentage?
! # puts dna.gc_percent
! # puts rna.gc_percent
! #
! # # What is the molecular weight?
! # puts dna.molecular_weight
! # puts rna.molecular_weight
! #
! # # What is the reverse complement?
! # puts dna.reverse_complement
! # puts dna.complement
! #
! # # Is this sequence DNA or RNA?
! # puts dna.rna?
! #
! # # Translate my sequence (see method docs for many options)
! # puts dna.translate
! # puts rna.translate
class NA < String
include Bio::Sequence::Common
! # Generate an nucleic acid sequence object from a string.
! #
! # s = Bio::Sequence::NA.new("aagcttggaccgttgaagt")
! #
! # or maybe (if you have an nucleic acid sequence in a file)
! #
! # s = Bio::Sequence:NA.new(File.open('dna.txt').read)
! #
! # Nucleic Acid sequences are *always* all lowercase in bioruby
! #
! # s = Bio::Sequence::NA.new("AAGcTtGG")
! # puts s #=> "aagcttgg"
! #
! # Whitespace is stripped from the sequence
! #
! # seq = Bio::Sequence::NA.new("atg\nggg\ttt\r gc")
! # puts s #=> "atggggttgc"
! # ---
! # *Arguments*:
! # * (required) _str_: String
! # *Returns*:: Bio::Sequence::NA object
def initialize(str)
super
***************
*** 31,36 ****
end
! # This method depends on Locations class, see bio/location.rb
! def splicing(position)
mRNA = super
if mRNA.rna?
--- 81,86 ----
end
! # Alias of Bio::Sequence::Common splice method, documented there.
! def splicing(position) #:nodoc:
mRNA = super
if mRNA.rna?
***************
*** 42,46 ****
end
! # Returns complement sequence without reversing ("atgc" -> "tacg")
def forward_complement
s = self.class.new(self)
--- 92,103 ----
end
! # Returns a new complementary sequence object (without reversing).
! # The original sequence object is not modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.forward_complement #=> 'tacg'
! # puts s #=> 'atgc'
! # ---
! # *Returns*:: new Bio::Sequence::NA object
def forward_complement
s = self.class.new(self)
***************
*** 49,53 ****
end
! # Convert to complement sequence without reversing ("atgc" -> "tacg")
def forward_complement!
if self.rna?
--- 106,117 ----
end
! # Converts the current sequence into its complement (without reversing).
! # The original sequence object is modified.
! #
! # seq = Bio::Sequence::NA.new('atgc')
! # puts s.forward_complement! #=> 'tacg'
! # puts s #=> 'tacg'
! # ---
! # *Returns*:: current Bio::Sequence::NA object (modified)
def forward_complement!
if self.rna?
***************
*** 59,63 ****
end
! # Returns reverse complement sequence ("atgc" -> "gcat")
def reverse_complement
s = self.class.new(self)
--- 123,134 ----
end
! # Returns a new sequence object with the reverse complement
! # sequence to the original. The original sequence is not modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.reverse_complement #=> 'gcat'
! # puts s #=> 'atgc'
! # ---
! # *Returns*:: new Bio::Sequence::NA object
def reverse_complement
s = self.class.new(self)
***************
*** 66,70 ****
end
! # Convert to reverse complement sequence ("atgc" -> "gcat")
def reverse_complement!
self.reverse!
--- 137,148 ----
end
! # Converts the original sequence into its reverse complement.
! # The original sequence is modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.reverse_complement #=> 'gcat'
! # puts s #=> 'gcat'
! # ---
! # *Returns*:: current Bio::Sequence::NA object (modified)
def reverse_complement!
self.reverse!
***************
*** 72,87 ****
end
! # Aliases for short
alias complement reverse_complement
alias complement! reverse_complement!
! # Translate into the amino acid sequence from the given frame and the
! # selected codon table. The table also can be a Bio::CodonTable object.
! # The 'unknown' character is used for invalid/unknown codon (can be
! # used for 'nnn' and/or gap translation in practice).
#
! # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
! # (4, 5 or 6 is also accepted) for the reverse strand.
def translate(frame = 1, table = 1, unknown = 'X')
if table.is_a?(Bio::CodonTable)
--- 150,235 ----
end
! # Alias for Bio::Sequence::NA#reverse_complement
alias complement reverse_complement
+
+ # Alias for Bio::Sequence::NA#reverse_complement!
alias complement! reverse_complement!
! # Translate into an amino acid sequence.
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.translate #=> "MA*"
#
! # By default, translate starts in reading frame position 1, but you
! # can start in either 2 or 3 as well,
! #
! # puts s.translate(2) #=> "WR"
! # puts s.translate(3) #=> "GV"
! #
! # You may also translate the reverse complement in one step by using frame
! # values of -1, -2, and -3 (or 4, 5, and 6)
! #
! # puts s.translate(-1) #=> "SRH"
! # puts s.translate(4) #=> "SRH"
! # puts s.reverse_complement.translate(1) #=> "SRH"
! #
! # The default codon table in the translate function is the Standard
! # Eukaryotic codon table. The translate function takes either a
! # number or a Bio::CodonTable object for its table argument.
! # The available tables are
! # (NCBI[http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t]):
! #
! # 1. "Standard (Eukaryote)"
! # 2. "Vertebrate Mitochondrial"
! # 3. "Yeast Mitochondorial"
! # 4. "Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma"
! # 5. "Invertebrate Mitochondrial"
! # 6. "Ciliate Macronuclear and Dasycladacean"
! # 9. "Echinoderm Mitochondrial"
! # 10. "Euplotid Nuclear"
! # 11. "Bacteria"
! # 12. "Alternative Yeast Nuclear"
! # 13. "Ascidian Mitochondrial"
! # 14. "Flatworm Mitochondrial"
! # 15. "Blepharisma Macronuclear"
! # 16. "Chlorophycean Mitochondrial"
! # 21. "Trematode Mitochondrial"
! # 22. "Scenedesmus obliquus mitochondrial"
! # 23. "Thraustochytrium Mitochondrial"
! #
! # If you are using anything other than the default table, you must specify
! # frame in the translate method call,
! #
! # puts s.translate #=> "MA*" (using defaults)
! # puts s.translate(1,1) #=> "MA*" (same as above, but explicit)
! # puts s.translate(1,2) #=> "MAW" (different codon table)
! #
! # and using a Bio::CodonTable instance in the translate method call,
! #
! # mt_table = Bio::CodonTable[2]
! # puts s.translate(1, mt_table) #=> "MAW"
! #
! # By default, any invalid or unknown codons (as could happen if the
! # sequence contains ambiguities) will be represented by 'X' in the
! # translated sequence.
! # You may change this to any character of your choice.
! #
! # s = Bio::Sequence::NA.new('atgcNNtga')
! # puts s.translate #=> "MX*"
! # puts s.translate(1,1,'9') #=> "M9*"
! #
! # The translate method considers gaps to be unknown characters and treats
! # them as such (i.e. does not collapse sequences prior to translation), so
! #
! # s = Bio::Sequence::NA.new('atgc--tga')
! # puts s.translate #=> "MX*"
! # ---
! # *Arguments*:
! # * (optional) _frame_: one of 1,2,3,4,5,6,-1,-2,-3 (default 1)
! # * (optional) _table_: Fixnum in range 1,23 or Bio::CodonTable object
! # (default 1)
! # * (optional) _unknown_: Character (default 'X')
! # *Returns*:: Bio::Sequence::AA object
def translate(frame = 1, table = 1, unknown = 'X')
if table.is_a?(Bio::CodonTable)
***************
*** 109,113 ****
end
! # Returns counts of the each codon in the sequence by Hash.
def codon_usage
hash = Hash.new(0)
--- 257,276 ----
end
! # Returns counts of each codon in the sequence in a hash.
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.codon_usage #=> {"gcg"=>1, "tga"=>1, "atg"=>1}
! #
! # This method does not validate codons! Any three letter group is a 'codon'. So,
! #
! # s = Bio::Sequence::NA.new('atggNNtga')
! # puts s.codon_usage #=> {"tga"=>1, "gnn"=>1, "atg"=>1}
! #
! # seq = Bio::Sequence::NA.new('atgg--tga')
! # puts s.codon_usage #=> {"tga"=>1, "g--"=>1, "atg"=>1}
! #
! # Also, there is no option to work in any frame other than the first.
! # ---
! # *Returns*:: Hash object
def codon_usage
hash = Hash.new(0)
***************
*** 118,122 ****
end
! # Calculate the ratio of GC / ATGC bases in percent.
def gc_percent
count = self.composition
--- 281,291 ----
end
! # Calculate the ratio of GC / ATGC bases as a percentage rounded to
! # the nearest whole number.
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.gc_percent #=> 55
! # ---
! # *Returns*:: Fixnum
def gc_percent
count = self.composition
***************
*** 127,136 ****
end
! # Show abnormal bases other than 'atgcu'.
def illegal_bases
self.scan(/[^atgcu]/).sort.uniq
end
! # Estimate the weight of this biological string molecule.
def molecular_weight
if self.rna?
--- 296,322 ----
end
! # Returns an alphabetically sorted array of any non-standard bases
! # (other than 'atgcu').
! #
! # s = Bio::Sequence::NA.new('atgStgQccR')
! # puts s.illegal_bases #=> ["q", "r", "s"]
! # ---
! # *Returns*:: Array object
def illegal_bases
self.scan(/[^atgcu]/).sort.uniq
end
! # Estimate molecular weight (using the values from BioPerl's
! # SeqStats.pm[http://doc.bioperl.org/releases/bioperl-1.0.1/Bio/Tools/SeqStats.html] module).
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.molecular_weight #=> 2841.00708
! #
! # RNA and DNA do not have the same molecular weights,
! #
! # s = Bio::Sequence::NA.new('auggcguga')
! # puts s.molecular_weight #=> 2956.94708
! # ---
! # *Returns*:: Float object
def molecular_weight
if self.rna?
***************
*** 141,145 ****
end
! # Convert the universal code string into the regular expression.
def to_re
if self.rna?
--- 327,337 ----
end
! # Create a ruby regular expression instance
! # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.to_re #=> /atggcgtga/
! # ---
! # *Returns*:: Regexp object
def to_re
if self.rna?
***************
*** 150,154 ****
end
! # Convert the self string into the list of the names of the each base.
def names
array = []
--- 342,353 ----
end
! # Generate the list of the names of each nucleotide along with the
! # sequence (full name). Names used in bioruby are found in the
! # Bio::AminoAcid::NAMES hash.
! #
! # s = Bio::Sequence::NA.new('atg')
! # puts s.names #=> ["Adenine", "Thymine", "Guanine"]
! # ---
! # *Returns*:: Array object
def names
array = []
***************
*** 159,176 ****
end
! # Output a DNA string by substituting 'u' to 't'.
def dna
self.tr('u', 't')
end
def dna!
self.tr!('u', 't')
end
! # Output a RNA string by substituting 't' to 'u'.
def rna
self.tr('t', 'u')
end
def rna!
self.tr!('t', 'u')
--- 358,405 ----
end
! # Returns a new sequence object with any 'u' bases changed to 't'.
! # The original sequence is not modified.
! #
! # s = Bio::Sequence::NA.new('augc')
! # puts s.dna #=> 'atgc'
! # puts s #=> 'augc'
! # ---
! # *Returns*:: new Bio::Sequence::NA object
def dna
self.tr('u', 't')
end
+ # Changes any 'u' bases in the original sequence to 't'.
+ # The original sequence is modified.
+ #
+ # s = Bio::Sequence::NA.new('augc')
+ # puts s.dna! #=> 'atgc'
+ # puts s #=> 'atgc'
+ # ---
+ # *Returns*:: current Bio::Sequence::NA object (modified)
def dna!
self.tr!('u', 't')
end
! # Returns a new sequence object with any 't' bases changed to 'u'.
! # The original sequence is not modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.dna #=> 'augc'
! # puts s #=> 'atgc'
! # ---
! # *Returns*:: new Bio::Sequence::NA object
def rna
self.tr('t', 'u')
end
+ # Changes any 't' bases in the original sequence to 'u'.
+ # The original sequence is modified.
+ #
+ # s = Bio::Sequence::NA.new('atgc')
+ # puts s.dna! #=> 'augc'
+ # puts s #=> 'augc'
+ # ---
+ # *Returns*:: current Bio::Sequence::NA object (modified)
def rna!
self.tr!('t', 'u')
From aerts at dev.open-bio.org Mon Mar 27 13:34:37 2006
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Mon, 27 Mar 2006 18:34:37 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fetch.rb,1.6,1.7
Message-ID: <200603271834.k2RIYb5l020081@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv20061/io
Modified Files:
fetch.rb
Log Message:
Updated docs.
Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** fetch.rb 20 Mar 2006 12:40:13 -0000 1.6
--- fetch.rb 27 Mar 2006 18:34:35 -0000 1.7
***************
*** 31,36 ****
# = DESCRIPTION
# The Bio::Fetch class provides an interface to dbfetch servers. Given
! # a database name and an accession number, these servers return the nucleic
! # or amino acid sequence for that accession number in that database.
#
# Possible dbfetch servers include:
--- 31,37 ----
# = DESCRIPTION
# The Bio::Fetch class provides an interface to dbfetch servers. Given
! # a database name and an accession number, these servers return the associated
! # record. For example, for the embl database on the EBI, that would be a
! # nucleic or amino acid sequence.
#
# Possible dbfetch servers include:
***************
*** 82,86 ****
# Get raw database entry by id. This method lets the Bio::Registry class
! # use Bio::Fetch objects and should probably not be used directly.
def get_by_id(id)
fetch(@database, id)
--- 83,87 ----
# Get raw database entry by id. This method lets the Bio::Registry class
! # use Bio::Fetch objects.
def get_by_id(id)
fetch(@database, id)
From aerts at dev.open-bio.org Tue Mar 28 08:42:34 2006
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Tue, 28 Mar 2006 13:42:34 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/db gff.rb,1.5,1.6
Message-ID: <200603281342.k2SDgYvl024727@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/db
In directory dev.open-bio.org:/tmp/cvs-serv24707
Modified Files:
gff.rb
Log Message:
Added documentation.
Index: gff.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/gff.rb,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** gff.rb 18 Dec 2005 15:58:41 -0000 1.5
--- gff.rb 28 Mar 2006 13:42:32 -0000 1.6
***************
*** 4,21 ****
# Copyright:: Copyright (C) 2003, 2005
# Toshiaki Katayama
# License:: LGPL
#
# $Id$
#
- # == Description
- #
- #
- # == Example
- #
- #
- # == References
- #
- # * http://www.sanger.ac.uk/Software/formats/GFF/
- #
#--
#
--- 4,12 ----
# Copyright:: Copyright (C) 2003, 2005
# Toshiaki Katayama
+ # 2006 Jan Aerts
# License:: LGPL
#
# $Id$
#
#--
#
***************
*** 38,46 ****
module Bio
!
class GFF
!
! attr_accessor :records
!
def initialize(str = '')
@records = Array.new
--- 29,78 ----
module Bio
! # == DESCRIPTION
! # The Bio::GFF and Bio::GFF::Record classes describe data contained in a
! # GFF-formatted file. For information on the GFF format, see
! # http://www.sanger.ac.uk/Software/formats/GFF/. Data are represented in tab-
! # delimited format, including
! # * seqname
! # * source
! # * feature
! # * start
! # * end
! # * score
! # * strand
! # * frame
! # * attributes (optional)
! #
! # For example:
! # SEQ1 EMBL atg 103 105 . + 0
! # SEQ1 EMBL exon 103 172 . + 0
! # SEQ1 EMBL splice5 172 173 . + .
! # SEQ1 netgene splice5 172 173 0.94 + .
! # SEQ1 genie sp5-20 163 182 2.3 + .
! # SEQ1 genie sp5-10 168 177 2.1 + .
! # SEQ1 grail ATG 17 19 2.1 - 0
! #
! # The Bio::GFF object is a container for Bio::GFF::Record objects, each
! # representing a single line in the GFF file.
class GFF
! # Creates a Bio::GFF object by building a collection of Bio::GFF::Record
! # objects.
! #
! # Create a Bio::GFF object the hard way
! # this_gff = "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
! # this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
! # this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
! # this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
! # this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
! # this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
! # this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
! # p Bio::GFF.new(this_gff)
! #
! # or create one based on a GFF-formatted file:
! # p Bio::GFF.new(File.open('my_data.gff')
! # ---
! # *Arguments*:
! # * _str_: string in GFF format
! # *Returns*:: Bio::GFF object
def initialize(str = '')
@records = Array.new
***************
*** 50,66 ****
--- 82,127 ----
end
+ # An array of Bio::GFF::Record objects.
+ attr_accessor :records
+
+ # Represents a single line of a GFF-formatted file. See Bio::GFF for more
+ # information.
class Record
+ # Name of the reference sequence
attr_accessor :seqname
+
+ # Name of the source of the feature (e.g. program that did prediction)
attr_accessor :source
+
+ # Name of the feature
attr_accessor :feature
+
+ # Start position of feature on reference sequence
attr_accessor :start
+
+ # End position of feature on reference sequence
attr_accessor :end
+
+ # Score of annotation (e.g. e-value for BLAST search)
attr_accessor :score
+
+ # Strand that feature is located on
attr_accessor :strand
+
+ # For features of type 'exon': indicates where feature begins in the reading frame
attr_accessor :frame
+
+ # List of tag=value pairs (e.g. to store name of the feature: ID=my_id)
attr_accessor :attributes
+
+ # Comments for the GFF record
attr_accessor :comments
+ # Creates a Bio::GFF::Record object. Is typically not called directly, but
+ # is called automatically when creating a Bio::GFF object.
+ # ---
+ # *Arguments*:
+ # * _str_: a tab-delimited line in GFF format
def initialize(str)
@comments = str.chomp[/#.*/]
***************
*** 83,90 ****
--- 144,158 ----
end
+ # = DESCRIPTION
+ # Represents version 2 of GFF specification. Is completely implemented by the
+ # Bio::GFF class.
class GFF2 < GFF
VERSION = 2
end
+ # = DESCRIPTION
+ # Represents version 3 of GFF specification. Is completely implemented by the
+ # Bio::GFF class. For more information on version GFF3, see
+ # http://flybase.bio.indiana.edu/annot/gff3.html
class GFF3 < GFF
VERSION = 3
***************
*** 103,106 ****
end
! p Bio::GFF.new(ARGF.read)
end
--- 171,181 ----
end
! this_gff = "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
! this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
! this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
! this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
! this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
! this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
! this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
! p Bio::GFF.new(this_gff)
end
From trevor at pub.open-bio.org Wed Mar 1 01:40:03 2006
From: trevor at pub.open-bio.org (Trevor Wennblom)
Date: Wed, 01 Mar 2006 01:40:03 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme analysis.rb,
1.4, 1.5
Message-ID: <200603010140.k211e3VL013061@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme
In directory pub.open-bio.org:/tmp/cvs-serv13046
Modified Files:
analysis.rb
Log Message:
Huge optimization by getting ride of unnecessary permutations.
Index: analysis.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme/analysis.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** analysis.rb 28 Feb 2006 22:21:48 -0000 1.4
--- analysis.rb 1 Mar 2006 01:40:00 -0000 1.5
***************
*** 71,77 ****
def cut_without_permutations( sequence, *args )
! return nil if !sequence.kind_of?(String) or sequence.empty?
sequence = Bio::Sequence::NA.new( sequence )
! enzyme_actions = create_enzyme_actions( sequence, *args )
sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
enzyme_actions.each do |id, enzyme_action|
--- 71,81 ----
def cut_without_permutations( sequence, *args )
! return {} if !sequence.kind_of?(String) or sequence.empty?
sequence = Bio::Sequence::NA.new( sequence )
!
! #enzyme_actions = create_enzyme_actions( sequence, *args )
! tmp = create_enzyme_actions( sequence, *args )
! enzyme_actions = tmp[0].merge(tmp[1])
!
sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
enzyme_actions.each do |id, enzyme_action|
***************
*** 90,105 ****
def cut_and_return_by_permutations( sequence, *args )
! return nil if !sequence.kind_of?(String) or sequence.empty?
sequence = Bio::Sequence::NA.new( sequence )
! enzyme_actions = create_enzyme_actions( sequence, *args )
! return nil if enzyme_actions.empty?
! permutations = permute(enzyme_actions.size)
# Indexed by permutation.
hash_of_sequence_ranges_with_cuts = {}
permutations.each do |permutation|
previous_cut_ranges = []
sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
permutation.each do |id|
--- 94,121 ----
def cut_and_return_by_permutations( sequence, *args )
! return {} if !sequence.kind_of?(String) or sequence.empty?
sequence = Bio::Sequence::NA.new( sequence )
! enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
! return {} if enzyme_actions.empty? and initial_cuts.empty?
!
! if enzyme_actions.size > 1
! permutations = permute(enzyme_actions.size)
! else
! permutations = []
! end
# Indexed by permutation.
hash_of_sequence_ranges_with_cuts = {}
+ if permutations.empty?
+ sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
+ initial_cuts.each { |key, enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) } }
+ hash_of_sequence_ranges_with_cuts[0] = sr_with_cuts
+ end
+
permutations.each do |permutation|
previous_cut_ranges = []
sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
+ initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) } }
permutation.each do |id|
***************
*** 251,265 ****
def create_enzyme_actions( sequence, *args )
id = 0
! enzyme_actions = {}
args.each do |enzyme|
enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
! enzyme_actions[id] = enzyme_to_enzyme_action( enzyme, offset )
id += 1
end
end
! enzyme_actions
end
--- 267,338 ----
def create_enzyme_actions( sequence, *args )
id = 0
! enzyme_actions_that_sometimes_cut = {}
! enzyme_actions_that_always_cut = {}
! indicies_of_sometimes_cut = []
args.each do |enzyme|
enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
! enzyme_actions_that_always_cut[id] = enzyme_to_enzyme_action( enzyme, offset )
id += 1
end
end
! # enzyme_actions_that_always_cut may lose members, the members to be lost are recorded in indicies_of_sometimes_cut
!
! max = enzyme_actions_that_always_cut.size - 1
! 0.upto(max) do |i|
! enzyme_action = enzyme_actions_that_always_cut[i]
! conflict = false
! other_cut_ranges = {}
! #enzyme_actions.each { |key,enzyme_action| next if i == key; puts "i: #{i}, key: #{key}"; previous_cut_ranges += enzyme_action.cut_ranges }
! # enzyme_actions_that_always_cut.each { |key,i_ea| next if i == key; puts "i: #{i}, key: #{key}"; other_cut_ranges[key] = i_ea.cut_ranges }
! enzyme_actions_that_always_cut.each { |key,i_ea| next if i == key; other_cut_ranges[key] = i_ea.cut_ranges }
! # puts "Enzyme action #{i}:"
! # pp enzyme_actions[i]
! # pp enzyme_action
! # puts "Previous cut ranges:"
! # pp previous_cut_ranges
!
! other_cut_ranges.each do |key, cut_ranges|
! cut_ranges.each do |cut_range|
! next unless cut_range.class == VerticalCutRange # we aren't concerned with horizontal cuts
! previous_cut_left = cut_range.range.first
! previous_cut_right = cut_range.range.last
!
! if (enzyme_action.right <= previous_cut_left) or
! (enzyme_action.left > previous_cut_right) or
! (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
! # no conflict
! # puts "no conflict"
!
! else
! conflict = true
! # puts "conflict"
! #puts "cut range:"
! #pp cut_range
! #puts "enzyme action:"
! #pp enzyme_action
! end
!
! indicies_of_sometimes_cut += [i, key] if conflict == true
! end
! end
!
! # We don't need to make permutations with this enzyme action if it always cuts
! # indicies << i if conflict == false
! end
! # pp indicies_of_sometimes_cut
!
! indicies_of_sometimes_cut.uniq.each do |i|
! enzyme_actions_that_sometimes_cut[i] = enzyme_actions_that_always_cut[i]
! enzyme_actions_that_always_cut.delete(i)
! end
! #puts 'Always cut:'
! #pp enzyme_actions_that_always_cut
! #puts 'Permute:'
! #pp enzyme_actions_that_sometimes_cut
!
! [enzyme_actions_that_sometimes_cut, enzyme_actions_that_always_cut]
end
From ngoto at pub.open-bio.org Fri Mar 3 08:18:51 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Fri, 03 Mar 2006 08:18:51 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.46,1.47
Message-ID: <200603030818.k238IpVL028555@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv28535/lib/bio/io
Modified Files:
flatfile.rb
Log Message:
* Removed duplicated initializing of @path in BufferedInputStream#initialize.
* Fiexed a bug that buffered input stream was nested.
Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.46
retrieving revision 1.47
diff -C2 -d -r1.46 -r1.47
*** flatfile.rb 22 Feb 2006 10:01:27 -0000 1.46
--- flatfile.rb 3 Mar 2006 08:18:49 -0000 1.47
***************
*** 35,39 ****
# initialize prefetch buffer
@buffer = ''
- @path = path
end
--- 35,38 ----
***************
*** 519,529 ****
def initialize(dbclass, stream)
# 2nd arg: IO object
! if @stream.kind_of?(BufferedInputStream)
@stream = stream
else
@stream = BufferedInputStream.for_io(stream)
end
- # default is raw mode
- self.raw = false
# 1st arg: database class (or file format autodetection)
if dbclass then
--- 518,526 ----
def initialize(dbclass, stream)
# 2nd arg: IO object
! if stream.kind_of?(BufferedInputStream)
@stream = stream
else
@stream = BufferedInputStream.for_io(stream)
end
# 1st arg: database class (or file format autodetection)
if dbclass then
***************
*** 535,538 ****
--- 532,537 ----
@skip_leader_mode = :firsttime
@firsttime_flag = true
+ # default raw mode is false
+ self.raw = false
end
***************
*** 743,747 ****
self.new(*arg)
end
!
# Creates a new element.
def initialize
--- 742,746 ----
self.new(*arg)
end
!
# Creates a new element.
def initialize
From ngoto at pub.open-bio.org Fri Mar 3 09:31:59 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Fri, 03 Mar 2006 09:31:59 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.47,1.48
Message-ID: <200603030931.k239VxVL029035@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv29020/lib/bio/io
Modified Files:
flatfile.rb
Log Message:
* added RulesArray class only for inspect
* changed constant (like Bio::GenBank) to String (like "Bio::GenBank")
to avoid doing require almost all files when using autodetect
Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.47
retrieving revision 1.48
diff -C2 -d -r1.47 -r1.48
*** flatfile.rb 3 Mar 2006 08:18:49 -0000 1.47
--- flatfile.rb 3 Mar 2006 09:31:57 -0000 1.48
***************
*** 736,739 ****
--- 736,748 ----
include TSort
+ # Array to store autodetection rules.
+ # This is defined only for inspect.
+ class RulesArray < Array
+ # visualize contents
+ def inspect
+ "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
+ end
+ end #class RulesArray
+
# Template of a single rule of autodetection
class RuleTemplate
***************
*** 745,754 ****
# Creates a new element.
def initialize
! a = Array.new
! def a.inspect
! "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
! end
! @higher_priority_elements = a.clone
! @lower_priority_elements = a.clone
@name = nil
end
--- 754,759 ----
# Creates a new element.
def initialize
! @higher_priority_elements = RulesArray.new
! @lower_priority_elements = RulesArray.new
@name = nil
end
***************
*** 784,787 ****
--- 789,810 ----
nil
end
+
+ private
+ # Gets constant from constant name given as a string.
+ def str2const(str)
+ const = Object
+ str.split(/\:\:/).each do |x|
+ const = const.const_get(x)
+ end
+ const
+ end
+
+ # Gets database class from given object.
+ # Current implementation is:
+ # if _obj_ is kind of String, regarded as a constant.
+ # Otherwise, returns _obj_ as is.
+ def get_dbclass(obj)
+ obj.kind_of?(String) ? str2const(obj) : obj
+ end
end #class Rule_Template
***************
*** 835,841 ****
super()
@re = re
- @dbclass = dbclass
- @dbclasses = [ dbclass ]
@name = dbclass.to_s
end
--- 858,878 ----
super()
@re = re
@name = dbclass.to_s
+ @dbclass = nil
+ @dbclass_lazy = dbclass
+ end
+
+ # database class (lazy evaluation)
+ def dbclass
+ unless @dbclass
+ @dbclass = get_dbclass(@dbclass_lazy)
+ end
+ @dbclass
+ end
+ private :dbclass
+
+ # returns database classes
+ def dbclasses
+ [ dbclass ]
end
***************
*** 844,872 ****
# _meta_ is ignored.
def guess(text, meta)
! @re =~ text ? @dbclass : nil
end
end #class RuleRegexp
# A autodetection rule to use more than two regular expressions.
! class RuleRegexp2 < RuleTemplate
# Creates a new instance.
def initialize(dbclass, *regexps)
! super()
@regexps = regexps
- @dbclass = dbclass
- @dbclasses = [ dbclass ]
- if name
- @name = name
- else
- @name = @dbclass.to_s
- end
end
! # If given text matches the regexp, returns the database class.
# Otherwise, returns nil or false.
# _meta_ is ignored.
def guess(text, meta)
@regexps.each do |re|
! return @dbclass if re =~ text
end
nil
--- 881,904 ----
# _meta_ is ignored.
def guess(text, meta)
! @re =~ text ? dbclass : nil
end
end #class RuleRegexp
# A autodetection rule to use more than two regular expressions.
! # If given string matches one of the regular expressions,
! # returns the database class.
! class RuleRegexp2 < RuleRegexp
# Creates a new instance.
def initialize(dbclass, *regexps)
! super(dbclass, nil)
@regexps = regexps
end
! # If given text matches one of the regexp, returns the database class.
# Otherwise, returns nil or false.
# _meta_ is ignored.
def guess(text, meta)
@regexps.each do |re|
! return dbclass if re =~ text
end
nil
***************
*** 880,887 ****
super()
@proc = proc
! @dbclasses = dbclasses
@name = dbclasses.collect { |x| x.to_s }.join('|')
end
# If given text (and/or meta information) is known, returns
# the database class.
--- 912,928 ----
super()
@proc = proc
! @dbclasses = nil
! @dbclasses_lazy = dbclasses
@name = dbclasses.collect { |x| x.to_s }.join('|')
end
+ # database classes (lazy evaluation)
+ def dbclasses
+ unless @dbclasses
+ @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
+ end
+ @dbclasses
+ end
+
# If given text (and/or meta information) is known, returns
# the database class.
***************
*** 1039,1058 ****
def self.make_default
a = self[
! genbank = RuleRegexp[ Bio::GenBank,
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
! genpept = RuleRegexp[ Bio::GenPept,
/^LOCUS .+ aa .+/ ],
! medline = RuleRegexp[ Bio::MEDLINE,
/^UI \- [0-9]+$/ ],
! embl = RuleRegexp[ Bio::EMBL,
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
! sptr = RuleRegexp[ Bio::SPTR,
/^ID .+\; *PRT\;/ ],
! prosite = RuleRegexp[ Bio::PROSITE,
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
! transfac = RuleRegexp[ Bio::TRANSFAC,
/^AC [-A-Za-z0-9_\.]+$/ ],
! aaindex = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
if /^H [-A-Z0-9_\.]+$/ =~ text then
if text =~ /^M [rc]/ then
--- 1080,1099 ----
def self.make_default
a = self[
! genbank = RuleRegexp[ 'Bio::GenBank',
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
! genpept = RuleRegexp[ 'Bio::GenPept',
/^LOCUS .+ aa .+/ ],
! medline = RuleRegexp[ 'Bio::MEDLINE',
/^UI \- [0-9]+$/ ],
! embl = RuleRegexp[ 'Bio::EMBL',
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
! sptr = RuleRegexp[ 'Bio::SPTR',
/^ID .+\; *PRT\;/ ],
! prosite = RuleRegexp[ 'Bio::PROSITE',
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
! transfac = RuleRegexp[ 'Bio::TRANSFAC',
/^AC [-A-Za-z0-9_\.]+$/ ],
! aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
if /^H [-A-Z0-9_\.]+$/ =~ text then
if text =~ /^M [rc]/ then
***************
*** 1068,1098 ****
end,
! litdb = RuleRegexp[ Bio::LITDB,
/^CODE [0-9]+$/ ],
! brite = RuleRegexp[ Bio::KEGG::BRITE,
/^Entry [A-Z0-9]+/ ],
! ko = RuleRegexp[ Bio::KEGG::KO,
/^ENTRY .+ KO\s*/ ],
! glycan = RuleRegexp[ Bio::KEGG::GLYCAN,
/^ENTRY .+ Glycan\s*/ ],
! enzyme = RuleRegexp2[ Bio::KEGG::ENZYME,
/^ENTRY EC [0-9\.]+$/,
/^ENTRY .+ Enzyme\s*/
],
! compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
/^ENTRY C[A-Za-z0-9\._]+$/,
/^ENTRY .+ Compound\s*/
],
! reaction = RuleRegexp2[ Bio::KEGG::REACTION,
/^ENTRY R[A-Za-z0-9\._]+$/,
/^ENTRY .+ Reaction\s*/
],
! genes = RuleRegexp[ Bio::KEGG::GENES,
/^ENTRY .+ (CDS|gene|.*RNA) / ],
! genome = RuleRegexp[ Bio::KEGG::GENOME,
/^ENTRY [a-z]+$/ ],
! fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
! Bio::FANTOM::MaXML::Sequence) do |text|
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
case $1
--- 1109,1139 ----
end,
! litdb = RuleRegexp[ 'Bio::LITDB',
/^CODE [0-9]+$/ ],
! brite = RuleRegexp[ 'Bio::KEGG::BRITE',
/^Entry [A-Z0-9]+/ ],
! ko = RuleRegexp[ 'Bio::KEGG::KO',
/^ENTRY .+ KO\s*/ ],
! glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
/^ENTRY .+ Glycan\s*/ ],
! enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
/^ENTRY EC [0-9\.]+$/,
/^ENTRY .+ Enzyme\s*/
],
! compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
/^ENTRY C[A-Za-z0-9\._]+$/,
/^ENTRY .+ Compound\s*/
],
! reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
/^ENTRY R[A-Za-z0-9\._]+$/,
/^ENTRY .+ Reaction\s*/
],
! genes = RuleRegexp[ 'Bio::KEGG::GENES',
/^ENTRY .+ (CDS|gene|.*RNA) / ],
! genome = RuleRegexp[ 'Bio::KEGG::GENOME',
/^ENTRY [a-z]+$/ ],
! fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
! 'Bio::FANTOM::MaXML::Sequence') do |text|
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
case $1
***************
*** 1109,1143 ****
end,
! pdb = RuleRegexp[ Bio::PDB,
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
! het = RuleRegexp[ Bio::PDB::ChemicalComponent,
/^RESIDUE +.+ +\d+\s*$/ ],
! clustal = RuleRegexp[ Bio::ClustalW::Report,
/^CLUSTAL .*\(.*\).*sequence +alignment/ ],
! blastxml = RuleRegexp[ Bio::Blast::Report,
/\<\!DOCTYPE BlastOutput PUBLIC / ],
! wublast = RuleRegexp[ Bio::Blast::WU::Report,
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! blast = RuleRegexp[ Bio::Blast::Default::Report,
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! tblast = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! blat = RuleRegexp[ Bio::Blat::Report,
/^psLayout version \d+\s*$/ ],
! spidey = RuleRegexp[ Bio::Spidey::Report,
/^\-\-SPIDEY version .+\-\-$/ ],
! hmmer = RuleRegexp[ Bio::HMMER::Report,
/^HMMER +\d+\./ ],
! sim4 = RuleRegexp[ Bio::Sim4::Report,
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
! fastaformat = RuleProc.new(Bio::FastaFormat,
! Bio::NBRF,
! Bio::FastaNumericFormat) do |text|
if /^>.+$/ =~ text
case text
--- 1150,1184 ----
end,
! pdb = RuleRegexp[ 'Bio::PDB',
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
! het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
/^RESIDUE +.+ +\d+\s*$/ ],
! clustal = RuleRegexp[ 'Bio::ClustalW::Report',
/^CLUSTAL .*\(.*\).*sequence +alignment/ ],
! blastxml = RuleRegexp[ 'Bio::Blast::Report',
/\<\!DOCTYPE BlastOutput PUBLIC / ],
! wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! blast = RuleRegexp[ 'Bio::Blast::Default::Report',
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! blat = RuleRegexp[ 'Bio::Blat::Report',
/^psLayout version \d+\s*$/ ],
! spidey = RuleRegexp[ 'Bio::Spidey::Report',
/^\-\-SPIDEY version .+\-\-$/ ],
! hmmer = RuleRegexp[ 'Bio::HMMER::Report',
/^HMMER +\d+\./ ],
! sim4 = RuleRegexp[ 'Bio::Sim4::Report',
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
! fastaformat = RuleProc.new('Bio::FastaFormat',
! 'Bio::NBRF',
! 'Bio::FastaNumericFormat') do |text|
if /^>.+$/ =~ text
case text
From pjotr at pub.open-bio.org Fri Mar 3 14:52:00 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 14:52:00 +0000
Subject: [BioRuby-cvs] bioruby/test/data/fasta - New directory
Message-ID: <200603031452.k23Eq0VL029679@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/test/data/fasta
In directory pub.open-bio.org:/tmp/cvs-serv29669/fasta
Log Message:
Directory /home/repository/bioruby/bioruby/test/data/fasta added to the repository
From pjotr at pub.open-bio.org Fri Mar 3 15:31:08 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 15:31:08 +0000
Subject: [BioRuby-cvs] bioruby/test/data/fasta example1.txt, NONE,
1.1 example2.txt, NONE, 1.1
Message-ID: <200603031531.k23FV8VL029797@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/test/data/fasta
In directory pub.open-bio.org:/tmp/cvs-serv29781/test/data/fasta
Added Files:
example1.txt example2.txt
Log Message:
Added example of enzyme cuts using Trevor's libs - and two short
FASTA data files for testing
--- NEW FILE: example2.txt ---
>At1g11545.1 68414.m01326 xyloglucan:xyloglucosyl transferase, putative / xyloglucan endotransglycosylase, putative / endo-xyloglucan transferase, putative similar to endo-xyloglucan transferase GI:2244732 from [Gossypium hirsutum]
actcacggaacaagtgtagattgcattacctctctctctctctctcttcgaaatattcga
agtagagacaaccaATGGAGACGGAAAGGAGGATCATAACGAGCTGTTCTGCCATGACGG
CTCTGTTCTTGTTCATGACGGCTCTAATGGCGTCGTCCTCTATCGCAGCAACACCGACAC
AATCGTTTGAAGATAATTTCAACATTATGTGGTCTGAAAATCACTTCACGACTTCCGATG
ATGGAGAGATCTGGAATCTTTCCTTAGATAACGACACCGGATGTGGATTTCAGACAAAGC
ACATGTATAGATTCGGATGGTTTAGTATGAAGCTAAAGCTCGTCGGAGGCGACTCCGCCG
GCGTCGTCACCGCTTACTACATGTGTTCGGAGAATGGGGCAGGACCGGAGAGAGACGAGA
TAGATTTCGAATTTCTAGGGAACCGAACCGGACAGCCTTACATTATTCAGACCAATGTGT
ATAAGAACGGAACCGGGAATCGGGAGATGCGACATTCCCTCTGGTTCGACCCGACCAAGG
ATTATCACACCTACTCAATTCTTTGGAATAACCACCAGCTTGTGTTCTTCGTGGATAGGG
TACCAATTCGAGTATACAAGAACAGTGATAAGGTACCAAACAACGACTTCTTCCCGAACC
AGAAGCCGATGTACTTGTTCTCCAGCATTTGGAACGCTGACGATTGGGCTACACGTGGTG
GTCTGGAGAAGACTGACTGGAAAAAAGCTCCATTCGTCTCTTCTTACAAGGACTTCGCCG
TCGAAGGCTGCCGTTGGAAGGATCCATTCCCTGCATGCGTCTCTACCACAACAGAGAATT
GGTGGGATCAGTACGACGCGTGGCATTTGTCCAAGACACAGAAGATGGATTATGCGTGGG
TGCAGCGTAATCTCGTCGTATACGATTATTGCAAAGACAGTGAGAGGTTCCCTACTCTTC
CTTGGGAGTGTTCCATTAGCCCTTGGGCTTAAaatcaattttgttttgagtgtattaaag
tggaaatggtttatgtaataattttactctcttttttttggcatttcttattttgttatg
gactatatcctctgtttatttatttaattaattatttatttagtcggctat
--- NEW FILE: example1.txt ---
>At1g02580 mRNA (2291 bp) UTR's and CDS
aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
catggatggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
tttatgttgtt
>At1g65300: mRNA 837bp
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
ga gatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcagattccaaagaaaattcatggttt taatatgaatatgaataaggattcgaatcaaagtatg
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
g
>At1g65300: mRNA 837bp (shortened at end)
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
gagatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcag
>At1g65300: mRNA 837bp (shortened from start)
ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
g
>At1g02580 - shortened for test - inserted cutpoint
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
ctaatcgtgaatgcgatcca gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
agtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggt
tttaattggggtgcatttacatgggactctct taaaaagaatgagtatctcggagaatatactggagaactgatca
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
From pjotr at pub.open-bio.org Fri Mar 3 15:31:08 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 15:31:08 +0000
Subject: [BioRuby-cvs] bioruby/sample enzymes.rb,NONE,1.1
Message-ID: <200603031531.k23FV8VL029793@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/sample
In directory pub.open-bio.org:/tmp/cvs-serv29781/sample
Added Files:
enzymes.rb
Log Message:
Added example of enzyme cuts using Trevor's libs - and two short
FASTA data files for testing
--- NEW FILE: enzymes.rb ---
#!/usr/bin/env ruby
#
# enzymes.rb - cut input file using enzyme on command line
#
# Copyright (C) 2006 Pjotr Prins and Trevor Wennblom
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# $Id: enzymes.rb,v 1.1 2006/03/03 15:31:06 pjotr Exp $
#
require 'bio/io/flatfile'
require 'bio/util/restriction_enzyme'
include Bio
usage = < '+entry.definition+"\n"
print frag.primary,"\n"
end
end
end
From aerts at pub.open-bio.org Thu Mar 16 17:29:07 2006
From: aerts at pub.open-bio.org (Jan Aerts)
Date: Thu, 16 Mar 2006 17:29:07 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb, 1.12, 1.13 fetch.rb, 1.4,
1.5
Message-ID: <200603161729.k2GHT7VL007097@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv7087
Modified Files:
pubmed.rb fetch.rb
Log Message:
* Added documentation to pubmed.rb and fetch.rb
* For fetch.rb: replaced 'net/http' with 'open-uri' to allow people behind a proxy to use this class.
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** pubmed.rb 8 Sep 2005 01:22:12 -0000 1.12
--- pubmed.rb 16 Mar 2006 17:29:05 -0000 1.13
***************
*** 3,6 ****
--- 3,7 ----
#
# Copyright (C) 2001 KATAYAMA Toshiaki
+ # 2006 Jan Aerts
#
# This library is free software; you can redistribute it and/or
***************
*** 26,61 ****
module Bio
class PubMed
! def self.query(id)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
!
! http = Net::HTTP.new(host)
! response, = http.get(path + id.to_s)
! result = response.body
! if result =~ /#{id}\s+Error/
! raise( result )
! else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
! end
! end
!
! def self.pmfetch(id)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
!
! http = Net::HTTP.new(host)
! response, = http.get(path + id.to_s)
! result = response.body
! if result =~ /#{id}\s+Error/
! raise( result )
! else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
! end
! end
!
def self.search(str)
host = "www.ncbi.nlm.nih.gov"
--- 27,85 ----
module Bio
+ # = DESCRIPTION
+ # The Bio::PubMed class provides several ways to retrieve bibliographic
+ # information from the PubMed database at
+ # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
+ # types of queries are possible:
+ # * searching for PubMed IDs given a query string:
+ # * Bio::PubMed#search
+ # * Bio::PubMed#esearch
+ # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) given a PubMed ID
+ # * Bio::PubMed#query
+ # * Bio::PubMed#pmfetch
+ # * Bio::PubMed#efetch
+ #
+ # The different methods within the same group are interchangeable and should
+ # return the same result.
+ #
+ # Additional information about the MEDLINE format and PubMed programmable
+ # APIs can be found on the following websites:
+ # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
+ # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
+ # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
+ # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
+ # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
+ # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
+ # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+ #
+ # = USAGE
+ # require 'bio'
+ #
+ # # If you don't know the pubmed ID:
+ # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+ # p x
+ # end
+ # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+ # p x
+ # end
+ #
+ # # To retrieve the MEDLINE entry for a given PubMed ID:
+ # puts Bio::PubMed.query("10592173")
+ # puts Bio::PubMed.pmfetch("10592173")
+ # puts Bio::PubMed.efetch("10592173", "14693808")
+ # # This can be converted into a Bio::MEDLINE object:
+ # manuscript = Bio::PubMed.query("10592173")
+ # medline = Bio::MEDLINE(manuscript)
+ #
+ # = REMARK
+ # This class can not be used at the moment if you're behind a proxy server. This will be solved in the near future.
class PubMed
! # Search the PubMed database by given keywords using entrez query and returns
! # an array of PubMed IDs.
! # ---
! # *Arguments*:
! # * _id_: query string (required)
! # *Returns*:: array of PubMed IDs
def self.search(str)
host = "www.ncbi.nlm.nih.gov"
***************
*** 70,73 ****
--- 94,115 ----
end
+ # Search the PubMed database by given keywords using E-Utils and returns
+ # an array of PubMed IDs.
+ #
+ # For information on the possible arguments, see
+ # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
+ # ---
+ # *Arguments*:
+ # * _id_: query string (required)
+ # * _field_
+ # * _reldate_
+ # * _mindate_
+ # * _maxdate_
+ # * _datetype_
+ # * _retstart_
+ # * _retmax_ (default 100)
+ # * _retmode_
+ # * _rettype_
+ # *Returns*:: array of PubMed IDs
def self.esearch(str, hash = {})
hash['retmax'] = 100 unless hash['retmax']
***************
*** 88,91 ****
--- 130,184 ----
end
+ # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+ # entrez query.
+ # ---
+ # *Arguments*:
+ # * _id_: PubMed ID (required)
+ # *Returns*:: MEDLINE formatted String
+ def self.query(id)
+ host = "www.ncbi.nlm.nih.gov"
+ path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
+
+ http = Net::HTTP.new(host)
+ response, = http.get(path + id.to_s)
+ result = response.body
+ if result =~ /#{id}\s+Error/
+ raise( result )
+ else
+ result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
+ return result
+ end
+ end
+
+ # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+ # entrez pmfetch.
+ # ---
+ # *Arguments*:
+ # * _id_: PubMed ID (required)
+ # *Returns*:: MEDLINE formatted String
+ def self.pmfetch(id)
+ host = "www.ncbi.nlm.nih.gov"
+ path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
+
+ http = Net::HTTP.new(host)
+ response, = http.get(path + id.to_s)
+ result = response.body
+ if result =~ /#{id}\s+Error/
+ raise( result )
+ else
+ result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
+ return result
+ end
+ end
+
+ # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+ # entrez efetch. Multiple PubMed IDs can be provided:
+ # Bio::PubMed.efetch(123)
+ # Bio::PubMed.efetch(123,456,789)
+ # Bio::PubMed.efetch([123,456,789])
+ # ---
+ # *Arguments*:
+ # * _ids_: list of PubMed IDs (required)
+ # *Returns*:: MEDLINE formatted String
def self.efetch(*ids)
return [] if ids.empty?
***************
*** 125,189 ****
end
-
- =begin
-
- = Bio::PubMed
-
- These class methods access NCBI/PubMed database via HTTP.
-
- --- Bio::PubMed.esearch(str, options)
-
- Search keywords in PubMed by E-Utils and returns an array of PubMed IDs.
- Options can be a hash containing keys include 'field', 'reldate',
- 'mindate', 'maxdate', 'datetype', 'retstart', 'retmax', 'retmode',
- and 'rettype' as specified in the following URL:
-
- (())
-
- Default 'retmax' is 100.
-
- --- Bio::PubMed.efetch(pmids)
-
- Returns an array of MEDLINE records. A list of PubMed IDs can be
- supplied as following:
-
- Bio::PubMed.efetch(123)
- Bio::PubMed.efetch(123,456,789)
- Bio::PubMed.efetch([123,456,789])
-
- --- Bio::PubMed.query(pmid)
-
- Retrieve PubMed entry by PMID and returns MEDLINE format string (can
- be parsed by the Bio::MEDLINE and can be converted into Bio::Reference
- object).
-
- --- Bio::PubMed.pmfetch(pmid)
-
- Just another query method (by pmfetch).
-
- --- Bio::PubMed.search(str)
-
- Search the PubMed database by given keywords and returns the list of
- matched records in MEDLINE format.
-
-
- = For more informations
-
- * Overview
- * (())
- * How to link
- * (())
- * MEDLINE format
- * (())
- * Search field descriptions and tags
- * (())
- * Entrez utilities index
- * (())
- * PmFetch CGI help
- * (())
- * E-Utilities CGI help
- * (())
-
- =end
-
-
--- 218,219 ----
Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** fetch.rb 18 Dec 2005 15:58:42 -0000 1.4
--- fetch.rb 16 Mar 2006 17:29:05 -0000 1.5
***************
*** 1,12 ****
#
! # = bio/io/biofetch.rb - BioFetch access module
! #
! # Copyright:: Copyright (C) 2002, 2005
! # Toshiaki Katayama
! # License:: LGPL
#
! # $Id$
#
- #--
#
# This library is free software; you can redistribute it and/or
--- 1,10 ----
#
! # bio/io/biofetch.rb - BioFetch access module
#
! # Copyright (C) 2002, 2005 Toshiaki Katayama
! # 2006 Jan Aerts
!
! # License: LGPL
#
#
# This library is free software; you can redistribute it and/or
***************
*** 24,95 ****
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! #++
#
require 'uri'
! require 'net/http'
module Bio
! class Fetch
!
! # Create a new Bio::Fetch server object.
! # Use Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch') to connect
! # to EBI BioFetch server.
! def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
! schema, user, @host, @port, reg, @path, = URI.split(url)
! end
!
! # Set default database to dbname (prepare for get_by_id).
! attr_accessor :database
!
! # Get raw database entry by id (mainly used by Bio::Registry).
! def get_by_id(id)
! fetch(@database, id)
! end
!
! # Fetch a database entry as specified by database (db), entry id (id),
! # 'raw' text or 'html' (style), and format. When using BioRuby's
! # BioFetch server, value for the format should not be set.
! def fetch(db, id, style = 'raw', format = nil)
! data = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
! data.push("format=#{format}") if format
! data = data.join('&')
!
! responce, result = Net::HTTP.new(@host, @port).post(@path, data)
! return result
! end
!
! # Short cut for using BioRuby's BioFetch server. You can fetch an entry
! # without creating instance of BioFetch server.
! def self.query(*args)
! self.new.fetch(*args)
! end
! # What databases are available?
! def databases
! query = "info=dbs"
! responce, result = Net::HTTP.new(@host, @port).post(@path, query)
! return result
! end
! # What formats does the database X have?
! def formats(database = @database)
! if database
! query = "info=formats;db=#{database}"
! responce, result = Net::HTTP.new(@host, @port).post(@path, query)
return result
end
end
- # How many entries can be retrieved simultaneously?
- def maxids
- query = "info=maxids"
- responce, result = Net::HTTP.new(@host, @port).post(@path, query)
- return result
- end
-
- end
-
end # module Bio
--- 22,183 ----
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! # $Id$
#
require 'uri'
! require 'open-uri'
module Bio
+ # = DESCRIPTION
+ # The Bio::Fetch class provides an interface to dbfetch servers. Given
+ # a database name and an accession number, these servers return the nucleic
+ # or amino acid sequence for that accession number in that database.
+ #
+ # Possible dbfetch servers include:
+ # * http://bioruby.org/cgi-bin/biofetch.rb (default)
+ # * http://www.ebi.ac.uk/cgi-bin/dbfetch
+ #
+ # If you're behind a proxy server, be sure to set your HTTP_PROXY
+ # environment variable accordingly.
+ #
+ # = USAGE
+ # require 'bio'
+ #
+ # # Retrieve the sequence of accession number M33388 from the EMBL
+ # # database.
+ # server = Bio::Fetch.new() #uses default server
+ # puts server.fetch('embl','M33388')
+ #
+ # # Do the same thing without creating a Bio::Fetch object. This method always
+ # # uses the default dbfetch server: http://bioruby.org/cgi-bin/biofetch.rb
+ # puts Bio::Fetch.query('embl','M33388')
+ #
+ # # To know what databases are available on the bioruby dbfetch server:
+ # server = Bio::Fetch.new()
+ # puts server.databases
+ #
+ # # Some databases provide their data in different formats (e.g. 'fasta',
+ # # 'genbank' or 'embl'). To check which formats are supported by a given
+ # # database:
+ # puts server.formats('embl')
+ #
+ class Fetch
+
+ # Create a new Bio::Fetch server object that can subsequently be queried
+ # using the Bio::Fetch#fetch method
+ # ---
+ # *Arguments*:
+ # * _url_: URL of dbfetch server (default = 'http://bioruby.org/cgi-bin/biofetch.rb')
+ # *Returns*:: Bio::Fetch object
+ def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
+ @url = url
+ schema, user, @host, @port, reg, @path, = URI.split(@url)
+ end
+
+ # The default database to query
+ #--
+ # This will be used by the get_by_id method
+ #++
+ attr_accessor :database
+
+ # Get raw database entry by id. This method lets the Bio::Registry class
+ # use Bio::Fetch objects and should probably not be used directly.
+ def get_by_id(id)
+ fetch(@database, id)
+ end
+
+ # Fetch a database entry as specified by database (db), entry id (id),
+ # 'raw' text or 'html' (style), and format. When using BioRuby's
+ # BioFetch server, value for the format should not be set.
+ # Examples:
+ # server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
+ # puts server.fetch('embl','M33388','raw','fasta')
+ # puts server.fetch('refseq','NM_12345','html','embl')
+ # ---
+ # *Arguments*:
+ # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
+ # * _id_: single ID or ID list separated by commas or white space
+ # * _style_: [raw|html] (default = 'raw')
+ # * _format_: name of output format (see Bio::Fetch#formats)
+ def fetch(db, id, style = 'raw', format = nil)
+ query = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
+ query.push("format=#{format}") if format
+ query = query.join('&')
+
+ result = open(@url + '?' + query).readlines.join('')
+ return result
+ end
+
+ # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
+ # without creating an instance of BioFetch server. This method uses the
+ # default dbfetch server, which is http://bioruby.org/cgi-bin/biofetch.rb
+ #
+ # Example:
+ # puts Bio::Fetch.query('refseq','NM_12345')
+ #
+ # ---
+ # *Arguments*:
+ # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
+ # * _id_: single ID or ID list separated by commas or white space
+ # * _style_: [raw|html] (default = 'raw')
+ # * _format_: name of output format (see Bio::Fetch#formats)
+ def self.query(*args)
+ self.new.fetch(*args)
+ end
+
+ # Using this method, the user can ask a dbfetch server what databases
+ # it supports. This would normally be the first step you'd take when
+ # you use a dbfetch server for the first time.
+ # Example:
+ # server = Bio::Fetch.new()
+ # puts server.databases # returns "aa aax bl cpd dgenes dr ec eg emb ..."
+ #
+ # This method only works for the bioruby dbfetch server. For a list
+ # of databases available from the EBI, see the EBI website at
+ # http://www.ebi.ac.uk/cgi-bin/dbfetch/
+ # ---
+ # *Returns*:: array of database names
+ def databases
+ query = "info=dbs"
! result = open(@url + '?' + query).readlines.join('')
! return result
! end
!
! # Lists the formats that are available for a given database. Like the
! # Bio::Fetch#databases method, this method is only available on
! # the bioruby dbfetch server.
! # Example:
! # server = Bio::Fetch.new()
! # puts server.formats('embl') # returns "default fasta"
! # ---
! # *Arguments*:
! # * _database_:: name of database you want the supported formats for
! # *Returns*:: array of formats
! def formats(database = @database)
! if database
! query = "info=formats;db=#{database}"
! result = open(@url + '?' + query).readlines.join('')
! return result
! end
! end
!
! # A dbfetch server will only return entries up to a given maximum number.
! # This method retrieves that number from the server. As for the databases
! # and formats methods, the maxids method only works for the bioruby
! # dbfetch server.
! # ---
! # *Arguments*: none
! # *Returns*:: number
! def maxids
! query = "info=maxids"
! result = open(@url + '?' + query).readlines.join('')
return result
end
+
end
end # module Bio
***************
*** 98,113 ****
if __FILE__ == $0
- # bfserv = Bio::Fetch.new('http://www.ebi.ac.uk:80/cgi-bin/dbfetch')
- bfserv = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
puts "# test 1"
! puts bfserv.fetch('embl', 'J00231', 'raw')
puts "# test 2"
! puts bfserv.fetch('embl', 'J00231', 'html')
!
puts "# test 3"
! puts Bio::Fetch.query('genbank', 'J00231')
puts "# test 4"
puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
!
end
--- 186,204 ----
if __FILE__ == $0
puts "# test 1"
! br_server = Bio::Fetch.new()
! puts br_server.databases
! puts br_server.formats('embl')
! puts br_server.maxids
! ebi_server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
puts "# test 2"
! puts ebi_server.fetch('embl', 'J00231', 'raw')
puts "# test 3"
! puts ebi_server.fetch('embl', 'J00231', 'html')
puts "# test 4"
+ puts Bio::Fetch.query('genbank', 'J00231')
+ puts "# test 5"
puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
!
end
From ngoto at pub.open-bio.org Mon Mar 20 10:34:59 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Mon, 20 Mar 2006 10:34:59 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio command.rb,1.3,1.4
Message-ID: <200603201035.k2KAYxVL030067@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio
In directory pub.open-bio.org:/tmp/cvs-serv30042/lib/bio
Modified Files:
command.rb
Log Message:
* New module Bio::Command::NetTools for miscellaneous network methods.
Currently, this module is intended to be used only inside
BioRuby library. Please do not use it in user's programs now.
* New methods: Bio::Command::NetTools.open_uri(uri, *arg) and
Bio::Command::NetTools.read_uri(uri).
* Changed license to Ruby's.
Index: command.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/command.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** command.rb 4 Nov 2005 17:36:00 -0000 1.3
--- command.rb 20 Mar 2006 10:34:57 -0000 1.4
***************
*** 2,32 ****
# = bio/command.rb - general methods for external command execution
#
! # Copyright:: Copyright (C) 2003-2005
# Naohisa Goto ,
# Toshiaki Katayama
! # License:: LGPL
#
# $Id$
#
- #--
- #
- # This library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Lesser General Public
- # License as published by the Free Software Foundation; either
- # version 2 of the License, or (at your option) any later version.
- #
- # This library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Lesser General Public License for more details.
- #
- # You should have received a copy of the GNU Lesser General Public
- # License along with this library; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- #
- #++
- #
require 'open3'
module Bio
--- 2,15 ----
# = bio/command.rb - general methods for external command execution
#
! # Copyright:: Copyright (C) 2003-2006
# Naohisa Goto ,
# Toshiaki Katayama
! # License:: Ruby's
#
# $Id$
#
require 'open3'
+ require 'uri'
module Bio
***************
*** 162,165 ****
--- 145,291 ----
end # module Tools
+
+
+ # = Bio::Command::NetTools
+ #
+ # Bio::Command::NetTools is a collection of miscellaneous methods
+ # for data transport through network.
+ #
+ # Library internal use only. Users should not directly use it.
+ #
+ # Note that it is under construction.
+ module NetTools
+
+ # Same as OpenURI.open_uri(*arg).
+ # If open-uri.rb is already loaded, ::OpenURI is used.
+ # Otherwise, internal OpenURI in sandbox is used because
+ # open-uri.rb redefines Kernel.open.
+ def self.open_uri(uri, *arg)
+ if defined? ::OpenURI
+ ::OpenURI.open_uri(uri, *arg)
+ else
+ SandBox.load_openuri_in_sandbox
+ uri = uri.to_s if ::URI::Generic === uri
+ SandBox::OpenURI.open_uri(uri, *arg)
+ end
+ end
+
+ # Same as OpenURI.open_uri(uri).read.
+ # If open-uri.rb is already loaded, ::OpenURI is used.
+ # Otherwise, internal OpenURI in sandbox is used becase
+ # open-uri.rb redefines Kernel.open.
+ def self.read_uri(uri)
+ self.open_uri(uri).read
+ end
+
+ # Sandbox to load open-uri.rb.
+ # Internal use only.
+ module SandBox #:nodoc:
+
+ # Dummy module definition.
+ module Kernel #:nodoc:
+ # dummy method
+ def open(*arg); end #:nodoc:
+ end #module Kernel
+
+ # a method to find proxy. dummy definition
+ module FindProxy; end #:nodoc:
+
+ # dummy module definition
+ module OpenURI #:nodoc:
+ module OpenRead; end #:nodoc:
+ end #module OpenURI
+
+ # Dummy module definition.
+ module URI #:nodoc:
+ class Generic < ::URI::Generic #:nodoc:
+ include SandBox::FindProxy
+ end
+
+ class HTTPS < ::URI::HTTPS #:nodoc:
+ include SandBox::FindProxy
+ include SandBox::OpenURI::OpenRead
+ end
+
+ class HTTP < ::URI::HTTP #:nodoc:
+ include SandBox::FindProxy
+ include SandBox::OpenURI::OpenRead
+ end
+
+ class FTP < ::URI::FTP #:nodoc:
+ include SandBox::FindProxy
+ include SandBox::OpenURI::OpenRead
+ end
+
+ # parse and new. internal use only.
+ def self.__parse_and_new__(klass, uri) #:nodoc:
+ scheme, userinfo, host, port,
+ registry, path, opaque, query, fragment = ::URI.split(uri)
+ klass.new(scheme, userinfo, host, port,
+ registry, path, opaque, query,
+ fragment)
+ end
+ private_class_method :__parse_and_new__
+
+ # same as ::URI.parse. internal use only.
+ def self.parse(uri) #:nodoc:
+ r = ::URI.parse(uri)
+ case r
+ when ::URI::HTTPS
+ __parse_and_new__(HTTPS, uri)
+ when ::URI::HTTP
+ __parse_and_new__(HTTP, uri)
+ when ::URI::FTP
+ __parse_and_new__(FTP, uri)
+ else
+ r
+ end
+ end
+ end #module URI
+
+ @load_openuri = nil
+ # load open-uri.rb in SandBox module.
+ def self.load_openuri_in_sandbox #:nodoc:
+ return if @load_openuri
+ fn = nil
+ unless $:.find do |x|
+ fn = File.join(x, 'open-uri.rb')
+ FileTest.exist?(fn)
+ end then
+ warn('Warning: cannot find open-uri.rb in $LOAD_PATH')
+ else
+ # reading open-uri.rb
+ str = File.read(fn)
+ # eval open-uri.rb contents in SandBox module
+ module_eval(str)
+
+ # finds 'find_proxy' method
+ find_proxy_lines = nil
+ flag = nil
+ endstr = nil
+ str.each do |line|
+ if flag then
+ find_proxy_lines << line
+ if endstr == line[0, endstr.length] and
+ /^\s+end(\s+.*)?$/ =~ line then
+ break
+ end
+ elsif /^(\s+)def\s+find_proxy(\s+.*)?$/ =~ line then
+ flag = true
+ endstr = "#{$1}end"
+ find_proxy_lines = line
+ end
+ end
+ if find_proxy_lines
+ module_eval("module FindProxy;\n#{find_proxy_lines}\n;end\n")
+ else
+ warn('Warning: cannot find find_proxy method in open-uri.rb.')
+ end
+ @load_openuri = true
+ end
+ end
+ end #module SandBox
+ end #module NetTools
+
end # module Command
end # module Bio
From ngoto at pub.open-bio.org Mon Mar 20 12:40:16 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Mon, 20 Mar 2006 12:40:16 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fetch.rb,1.5,1.6
Message-ID: <200603201240.k2KCeGVL030358@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv30167/lib/bio/io
Modified Files:
fetch.rb
Log Message:
* "require 'open-uri'" is removed because open-uri.rb changes Kernel#open.
Instead, Bio::Command::NetTools.read_uri is used.
* query should be escaped by using URI.escape.
* Bio::Fetch#databases, #formats are changed to return an array of string,
as described in the documents.
* Bio::Fetch#maxids are changed to return an Integer number,
as described in the document.
Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** fetch.rb 16 Mar 2006 17:29:05 -0000 1.5
--- fetch.rb 20 Mar 2006 12:40:13 -0000 1.6
***************
*** 26,30 ****
require 'uri'
! require 'open-uri'
module Bio
--- 26,30 ----
require 'uri'
! require 'bio/command'
module Bio
***************
*** 105,110 ****
query = query.join('&')
! result = open(@url + '?' + query).readlines.join('')
! return result
end
--- 105,109 ----
query = query.join('&')
! Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query))
end
***************
*** 141,146 ****
query = "info=dbs"
! result = open(@url + '?' + query).readlines.join('')
! return result
end
--- 140,144 ----
query = "info=dbs"
! Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
end
***************
*** 159,164 ****
query = "info=formats;db=#{database}"
! result = open(@url + '?' + query).readlines.join('')
! return result
end
end
--- 157,161 ----
query = "info=formats;db=#{database}"
! Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
end
end
***************
*** 174,179 ****
query = "info=maxids"
! result = open(@url + '?' + query).readlines.join('')
! return result
end
--- 171,175 ----
query = "info=maxids"
! Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).to_i
end
From aerts at pub.open-bio.org Tue Mar 21 12:18:16 2006
From: aerts at pub.open-bio.org (Jan Aerts)
Date: Tue, 21 Mar 2006 12:18:16 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fastacmd.rb,1.10,1.11
Message-ID: <200603211218.k2LCIGVL001647@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv1637
Modified Files:
fastacmd.rb
Log Message:
Added/reformatted documentation.
Index: fastacmd.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fastacmd.rb,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** fastacmd.rb 28 Jan 2006 08:12:21 -0000 1.10
--- fastacmd.rb 21 Mar 2006 12:18:14 -0000 1.11
***************
*** 5,45 ****
# Shuji SHIGENOBU ,
# Toshiaki Katayama ,
! # Mitsuteru C. Nakao
# Lisence:: LGPL
#
# $Id$
#
- # == Description
- #
- # Retrives FASTA formatted sequences from a blast database using
- # NCBI fastacmd command.
- #
- # This class requires 'fastacmd' command and a blast database
- # (formatted using the '-o' option of 'formatdb').
- #
- # == Examples
- #
- # database = ARGV.shift || "/db/myblastdb"
- # entry_id = ARGV.shift || "sp:128U_DROME"
- # ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
- #
- # fastacmd = Bio::Blast::Fastacmd.new(database)
- #
- # entry = fastacmd.get_by_id(entry_id)
- # fastacmd.fetch(entry_id)
- # fastacmd.fetch(ent_list)
- #
- # fastacmd.fetch(ent_list).each do |fasta|
- # puts fasta
- # end
- #
- # == References
- #
- # * NCBI tool
- # ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
- #
- # * fastacmd.html
- # http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
- #
#--
#
--- 5,14 ----
# Shuji SHIGENOBU ,
# Toshiaki Katayama ,
! # Mitsuteru C. Nakao ,
! # Jan Aerts
# Lisence:: LGPL
#
# $Id$
#
#--
#
***************
*** 68,72 ****
class Blast
! # NCBI fastacmd wrapper class
#
class Fastacmd
--- 37,68 ----
class Blast
! # = DESCRIPTION
! #
! # Retrieves FASTA formatted sequences from a blast database using
! # NCBI fastacmd command.
! #
! # This class requires 'fastacmd' command and a blast database
! # (formatted using the '-o' option of 'formatdb').
! #
! # = USAGE
! # require 'bio'
! #
! # fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
! #
! # entry = fastacmd.get_by_id("sp:128U_DROME")
! # fastacmd.fetch("sp:128U_DROME")
! # fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
! #
! # fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"]).each do |fasta|
! # puts fasta
! # end
! #
! # = REFERENCES
! #
! # * NCBI tool
! # ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
! #
! # * fastacmd.html
! # http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
#
class Fastacmd
***************
*** 78,90 ****
attr_accessor :database
! # fastcmd command file path.
attr_accessor :fastacmd
- #
attr_accessor :errorlog
! # Initalize a fastacmd object.
! #
! # fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
def initialize(blast_database_file_path)
@database = blast_database_file_path
--- 74,103 ----
attr_accessor :database
! # fastacmd command file path.
attr_accessor :fastacmd
attr_accessor :errorlog
! # This method provides a handle to a BLASTable database, which you can then
! # use to retrieve sequences.
! #
! # Prerequisites:
! # * You have created a BLASTable database with the '-o T' option.
! # * You have the NCBI fastacmd tool installed.
! #
! # For example, suppose the original input file looks like:
! # >my_seq_1
! # ACCGACCTCCGGAACGGATAGCCCGACCTACG
! # >my_seq_2
! # TCCGACCTTTCCTACCGCACACCTACGCCATCAC
! # ...
! # and you've created a BLASTable database from that with the command
! # cd /my_dir/
! # formatdb -i my_input_file -t Test -n Test -o T
! # then you can get a handle to this database with the command
! # fastacmd = Bio::Blast::Fastacmd.new("/my_dir/Test")
! # ---
! # *Arguments*:
! # * _database_:: path and name of BLASTable database
def initialize(blast_database_file_path)
@database = blast_database_file_path
***************
*** 93,117 ****
! # get an entry_id and returns a Bio::FastaFormat object.
! #
! # entry_id = "sp:128U_DROME"
! # entry = fastacmd.get_by_id(entry_id)
def get_by_id(entry_id)
fetch(entry_id).shift
end
! # get one or more entry_id and returns an Array of Bio::FastaFormat objects.
! #
! # Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
! # object even when the result is a single entry.
! #
! # p fastacmd.fetch(entry_id)
#
! # Fastacmd#fetch method also accepts a list of entry_id and returns
! # an Array of Bio::FastaFormat objects.
! #
! # ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
! # p fastacmd.fetch(ent_list)
#
def fetch(list)
if list.respond_to?(:join)
--- 106,131 ----
! # Get the sequence of a specific entry in the BLASTable database.
! # For example:
! # entry = fastacmd.get_by_id("sp:128U_DROME")
! # ---
! # *Arguments*:
! # * _id_: id of an entry in the BLAST database
! # *Returns*:: a Bio::FastaFormat object
def get_by_id(entry_id)
fetch(entry_id).shift
end
! # Get the sequence for a _list_ of IDs in the database.
#
! # For example:
! # p fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
#
+ # This method always returns an array of Bio::FastaFormat objects, even when
+ # the result is a single entry.
+ # ---
+ # *Arguments*:
+ # * _ids_: list of IDs to retrieve from the database
+ # *Returns*:: array of Bio::FastaFormat objects
def fetch(list)
if list.respond_to?(:join)
***************
*** 128,138 ****
end
! # Iterates each entry.
! #
! # You can also iterate on all sequences in the database!
! # fastacmd.each do |fasta|
! # p [ fasta.definition[0..30], fasta.seq.size ]
! # end
#
def each_entry
cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
--- 142,152 ----
end
! # Iterates over _all_ sequences in the database.
#
+ # fastacmd.each_entry do |fasta|
+ # p [ fasta.definition[0..30], fasta.seq.size ]
+ # end
+ # ---
+ # *Returns*:: a Bio::FastaFormat object for each iteration
def each_entry
cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
***************
*** 154,156 ****
--- 168,184 ----
end # module Bio
+ if $0 == __FILE__
+ fastacmd = Bio::Blast::Fastacmd.new("/path_to_my_db/db_name")
+ seq = fastacmd.get_by_id('id_of_entry1')
+ puts seq.class
+ puts seq
+
+ seqs = fastacmd.fetch(['id_of_entry1','id_of_entry2'])
+ seqs.each do |seq|
+ puts seq
+ end
+ fastacmd.each_entry do |fasta|
+ puts fasta.seq.size.to_s + "\t" + fasta.definition
+ end
+ end
From ngoto at pub.open-bio.org Wed Mar 22 10:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.48,1.49
Message-ID: <200603221019.k2MAJOVL005746@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv5657/lib/bio/io
Modified Files:
flatfile.rb
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.
Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.48
retrieving revision 1.49
diff -C2 -d -r1.48 -r1.49
*** flatfile.rb 3 Mar 2006 09:31:57 -0000 1.48
--- flatfile.rb 22 Mar 2006 10:19:22 -0000 1.49
***************
*** 262,265 ****
--- 262,268 ----
attr_reader :entry
+ # a flag to write down entry start and end positions
+ attr_accessor :entry_pos_flag
+
# start position of the entry
attr_reader :entry_start_pos
***************
*** 290,293 ****
--- 293,297 ----
end
@delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
+ @entry_pos_flag = nil
end
***************
*** 330,334 ****
# gets a entry
def get_entry
! p0 = @stream.pos
e = @stream.gets(@delimiter)
if e and @delimiter_overrun then
--- 334,338 ----
# gets a entry
def get_entry
! p0 = @entry_pos_flag ? @stream.pos : nil
e = @stream.gets(@delimiter)
if e and @delimiter_overrun then
***************
*** 339,343 ****
end
end
! p1 = @stream.pos
@entry_start_pos = p0
@entry = e
--- 343,347 ----
end
end
! p1 = @entry_pos_flag ? @stream.pos : nil
@entry_start_pos = p0
@entry = e
***************
*** 585,588 ****
--- 589,602 ----
def entry_raw
@splitter.entry
+ end
+
+ # a flag to write down entry start and end positions
+ def entry_pos_flag
+ @splitter.entry_pos_flag
+ end
+
+ # Sets flag to write down entry start and end positions
+ def entry_pos_flag=(x)
+ @splitter.entry_pos_flag = x
end
From ngoto at pub.open-bio.org Wed Mar 22 10:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io/flatfile indexer.rb,1.23,1.24
Message-ID: <200603221019.k2MAJOVL005748@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io/flatfile
In directory pub.open-bio.org:/tmp/cvs-serv5657/lib/bio/io/flatfile
Modified Files:
indexer.rb
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.
Index: indexer.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile/indexer.rb,v
retrieving revision 1.23
retrieving revision 1.24
diff -C2 -d -r1.23 -r1.24
*** indexer.rb 22 Feb 2006 08:41:03 -0000 1.23
--- indexer.rb 22 Mar 2006 10:19:22 -0000 1.24
***************
*** 115,118 ****
--- 115,119 ----
@flatfile = Bio::FlatFile.open(@dbclass, file, 'rb')
@flatfile.raw = nil
+ @flatfile.entry_pos_flag = true
@entry = nil
end
From ngoto at pub.open-bio.org Wed Mar 22 10:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/doc Changes-0.7.rd,1.16,1.17
Message-ID: <200603221019.k2MAJOVL005750@pub.open-bio.org>
Update of /home/repository/bioruby/bioruby/doc
In directory pub.open-bio.org:/tmp/cvs-serv5657/doc
Modified Files:
Changes-0.7.rd
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.
Index: Changes-0.7.rd
===================================================================
RCS file: /home/repository/bioruby/bioruby/doc/Changes-0.7.rd,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** Changes-0.7.rd 27 Feb 2006 11:38:14 -0000 1.16
--- Changes-0.7.rd 22 Mar 2006 10:19:22 -0000 1.17
***************
*** 262,265 ****
--- 262,270 ----
structure (which is not recommended) would not work.
+ In 1.0.1:
+
+ * Bio::FlatFile#entry_start_pos and #entry_ended_pos are enabled
+ only when Bio::FlatFile#entry_pos_flag is true.
+
=== Deleted files
From k at portal.open-bio.org Sun Mar 26 00:38:12 2006
From: k at portal.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 00:38:12 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/shell demo.rb,1.1,1.2
Message-ID: <200603260038.k2Q0cCgZ028442@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/shell
In directory dev.open-bio.org:/tmp/cvs-serv28438/lib/bio/shell
Modified Files:
demo.rb
Log Message:
* 1st commit test on dev.open-bio.org after the server migration
* fixed some typos in BioRuby shell demo
Index: demo.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/demo.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** demo.rb 27 Feb 2006 09:33:22 -0000 1.1
--- demo.rb 26 Mar 2006 00:38:10 -0000 1.2
***************
*** 90,94 ****
run(%q[head ent_1bl8], "Head part of the entry ...", false) &&
run(%q[savefile("1bl8.pdb", ent_1bl8)], "Saving the original entry in file ...", false) &&
! run(%q[less "data/1bl8.pdb"], "Look through the entire entry ...", false) &&
run(%q[pdb_1bl8 = flatparse(ent_1bl8)], "Parsing the entry ...", false) &&
run(%q[pdb_1bl8.entry_id], "Showing the entry ID ...", true) &&
--- 90,94 ----
run(%q[head ent_1bl8], "Head part of the entry ...", false) &&
run(%q[savefile("1bl8.pdb", ent_1bl8)], "Saving the original entry in file ...", false) &&
! run(%q[disp "data/1bl8.pdb"], "Look through the entire entry ...", false) &&
run(%q[pdb_1bl8 = flatparse(ent_1bl8)], "Parsing the entry ...", false) &&
run(%q[pdb_1bl8.entry_id], "Showing the entry ID ...", true) &&
***************
*** 98,112 ****
def pdb_hetdic
! run(%q[het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read],
! "Retrieving the het_dic database ...", false) &&
! run(%q[savefile("data/het_dictionary.txt", het_dic)],
! "Saving the file ... ", false) &&
run(%q[het_dic.size], "Bytes of the file ...", true) &&
! run(%q[less "data/het_dictionary.txt"], "Take a look on the contents ...", true) &&
run(%q[flatindex("het_dic", "data/het_dictionary.txt")],
"Creating index to make the seaarchable database ...", false) &&
run(%q[ethanol = flatsearch("het_dic", "EOH")], "Search an ethanol entry ...", true) &&
run(%q[osake = flatparse(ethanol)], "Parse the entry ...", true) &&
! run(%q[sake.conect], "Showing connect table (conect) of the molecule ...", true) &&
true
end
--- 98,112 ----
def pdb_hetdic
! # run(%q[het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read],
! # "Retrieving the het_dic database ...", false) &&
! # run(%q[savefile("data/het_dictionary.txt", het_dic)],
! # "Saving the file ... ", false) &&
run(%q[het_dic.size], "Bytes of the file ...", true) &&
! run(%q[disp "data/het_dictionary.txt"], "Take a look on the contents ...", true) &&
run(%q[flatindex("het_dic", "data/het_dictionary.txt")],
"Creating index to make the seaarchable database ...", false) &&
run(%q[ethanol = flatsearch("het_dic", "EOH")], "Search an ethanol entry ...", true) &&
run(%q[osake = flatparse(ethanol)], "Parse the entry ...", true) &&
! run(%q[osake.conect], "Showing connect table (conect) of the molecule ...", true) &&
true
end
From ngoto at dev.open-bio.org Tue Mar 28 14:00:50 2006
From: ngoto at dev.open-bio.org (Naohisa Goto)
Date: Tue, 28 Mar 2006 14:00:50 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio command.rb,1.4,1.5
Message-ID: <200603281400.k2SE0oK6024842@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv24822
Modified Files:
command.rb
Log Message:
* added "require 'open-uri'"
* removed complicated hacks for open-uri
Index: command.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/command.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** command.rb 20 Mar 2006 10:34:57 -0000 1.4
--- command.rb 28 Mar 2006 14:00:48 -0000 1.5
***************
*** 12,15 ****
--- 12,16 ----
require 'open3'
require 'uri'
+ require 'open-uri'
module Bio
***************
*** 157,289 ****
module NetTools
- # Same as OpenURI.open_uri(*arg).
- # If open-uri.rb is already loaded, ::OpenURI is used.
- # Otherwise, internal OpenURI in sandbox is used because
- # open-uri.rb redefines Kernel.open.
- def self.open_uri(uri, *arg)
- if defined? ::OpenURI
- ::OpenURI.open_uri(uri, *arg)
- else
- SandBox.load_openuri_in_sandbox
- uri = uri.to_s if ::URI::Generic === uri
- SandBox::OpenURI.open_uri(uri, *arg)
- end
- end
-
# Same as OpenURI.open_uri(uri).read.
- # If open-uri.rb is already loaded, ::OpenURI is used.
- # Otherwise, internal OpenURI in sandbox is used becase
- # open-uri.rb redefines Kernel.open.
def self.read_uri(uri)
! self.open_uri(uri).read
end
-
- # Sandbox to load open-uri.rb.
- # Internal use only.
- module SandBox #:nodoc:
-
- # Dummy module definition.
- module Kernel #:nodoc:
- # dummy method
- def open(*arg); end #:nodoc:
- end #module Kernel
-
- # a method to find proxy. dummy definition
- module FindProxy; end #:nodoc:
-
- # dummy module definition
- module OpenURI #:nodoc:
- module OpenRead; end #:nodoc:
- end #module OpenURI
-
- # Dummy module definition.
- module URI #:nodoc:
- class Generic < ::URI::Generic #:nodoc:
- include SandBox::FindProxy
- end
-
- class HTTPS < ::URI::HTTPS #:nodoc:
- include SandBox::FindProxy
- include SandBox::OpenURI::OpenRead
- end
-
- class HTTP < ::URI::HTTP #:nodoc:
- include SandBox::FindProxy
- include SandBox::OpenURI::OpenRead
- end
-
- class FTP < ::URI::FTP #:nodoc:
- include SandBox::FindProxy
- include SandBox::OpenURI::OpenRead
- end
-
- # parse and new. internal use only.
- def self.__parse_and_new__(klass, uri) #:nodoc:
- scheme, userinfo, host, port,
- registry, path, opaque, query, fragment = ::URI.split(uri)
- klass.new(scheme, userinfo, host, port,
- registry, path, opaque, query,
- fragment)
- end
- private_class_method :__parse_and_new__
-
- # same as ::URI.parse. internal use only.
- def self.parse(uri) #:nodoc:
- r = ::URI.parse(uri)
- case r
- when ::URI::HTTPS
- __parse_and_new__(HTTPS, uri)
- when ::URI::HTTP
- __parse_and_new__(HTTP, uri)
- when ::URI::FTP
- __parse_and_new__(FTP, uri)
- else
- r
- end
- end
- end #module URI
-
- @load_openuri = nil
- # load open-uri.rb in SandBox module.
- def self.load_openuri_in_sandbox #:nodoc:
- return if @load_openuri
- fn = nil
- unless $:.find do |x|
- fn = File.join(x, 'open-uri.rb')
- FileTest.exist?(fn)
- end then
- warn('Warning: cannot find open-uri.rb in $LOAD_PATH')
- else
- # reading open-uri.rb
- str = File.read(fn)
- # eval open-uri.rb contents in SandBox module
- module_eval(str)
-
- # finds 'find_proxy' method
- find_proxy_lines = nil
- flag = nil
- endstr = nil
- str.each do |line|
- if flag then
- find_proxy_lines << line
- if endstr == line[0, endstr.length] and
- /^\s+end(\s+.*)?$/ =~ line then
- break
- end
- elsif /^(\s+)def\s+find_proxy(\s+.*)?$/ =~ line then
- flag = true
- endstr = "#{$1}end"
- find_proxy_lines = line
- end
- end
- if find_proxy_lines
- module_eval("module FindProxy;\n#{find_proxy_lines}\n;end\n")
- else
- warn('Warning: cannot find find_proxy method in open-uri.rb.')
- end
- @load_openuri = true
- end
- end
- end #module SandBox
end #module NetTools
--- 158,165 ----
module NetTools
# Same as OpenURI.open_uri(uri).read.
def self.read_uri(uri)
! OpenURI.open_uri(uri).read
end
end #module NetTools
From k at dev.open-bio.org Sun Mar 26 02:28:01 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:28:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio sequence.rb,0.56,0.57
Message-ID: <200603260228.k2Q2S1uq028859@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv28853
Modified Files:
sequence.rb
Log Message:
* comprehensive documentations contributed by Ryan Raaum and Jan Aerts are added.
* bug fixes in sequence.rb contributed by Ryan Raaum
* Added 'U' and 'u' to the bases counted towards the nucleic acid total in Bio::Sequence#guess. (Without this, RNA sequences were "guessed" to be Amino Acid sequences).
* Changed the arguments for method_missing in Bio::Sequence from (*arg) to (sym, *args, &block). With this argument set, blocks will be properly passed through to the encapsulated object.
Index: sequence.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence.rb,v
retrieving revision 0.56
retrieving revision 0.57
diff -C2 -d -r0.56 -r0.57
*** sequence.rb 17 Feb 2006 17:15:08 -0000 0.56
--- sequence.rb 26 Mar 2006 02:27:59 -0000 0.57
***************
*** 5,9 ****
# Toshiaki Katayama ,
# Yoshinori K. Okuji ,
! # Naohisa Goto
# License:: Ruby's
#
--- 5,11 ----
# Toshiaki Katayama ,
# Yoshinori K. Okuji ,
! # Naohisa Goto ,
! # Ryan Raaum ,
! # Jan Aerts
# License:: Ruby's
#
***************
*** 15,18 ****
--- 17,67 ----
module Bio
+ # = DESCRIPTION
+ # Bio::Sequence objects represent annotated sequences in bioruby.
+ # A Bio::Sequence object is a wrapper around the actual sequence,
+ # represented as either a Bio::Sequence::NA or a Bio::Sequence::AA object.
+ # For most users, this encapsulation will be completely transparent.
+ # Bio::Sequence responds to all methods defined for Bio::Sequence::NA/AA
+ # objects using the same arguments and returning the same values (even though
+ # these methods are not documented specifically for Bio::Sequence).
+ #
+ # = USAGE
+ # # Create a nucleic or amino acid sequence
+ # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
+ # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
+ # aa = Bio::Sequence.auto('ACDEFGHIKLMNPQRSTVWYU')
+ #
+ # # Print it out
+ # puts dna.to_s
+ # puts aa.to_s
+ #
+ # # Get a subsequence, bioinformatics style (first nucleotide is '1')
+ # puts dna.subseq(2,6)
+ #
+ # # Get a subsequence, informatics style (first nucleotide is '0')
+ # puts dna[2,6]
+ #
+ # # Print in FASTA format
+ # puts dna.output(:fasta)
+ #
+ # # Print all codons
+ # dna.window_search(3,3) do |codon|
+ # puts codon
+ # end
+ #
+ # # Splice or otherwise mangle your sequence
+ # puts dna.splicing("complement(join(1..5,16..20))")
+ # puts rna.splicing("complement(join(1..5,16..20))")
+ #
+ # # Convert a sequence containing ambiguity codes into a
+ # # regular expression you can use for subsequent searching
+ # puts aa.to_re
+ #
+ # # These should speak for themselves
+ # puts dna.complement
+ # puts dna.composition
+ # puts dna.molecular_weight
+ # puts dna.translate
+ # puts dna.gc_percent
class Sequence
***************
*** 23,37 ****
autoload :Format, 'bio/sequence/format'
def initialize(str)
@seq = str
end
! def method_missing(*arg)
! @seq.send(*arg)
end
!
! attr_accessor :entry_id, :definition, :features, :references, :comments,
! :date, :keywords, :dblinks, :taxonomy, :moltype, :seq
!
def output(style)
extend Bio::Sequence::Format
--- 72,151 ----
autoload :Format, 'bio/sequence/format'
+ # Create a new Bio::Sequence object
+ #
+ # s = Bio::Sequence.new('atgc')
+ # puts s #=> 'atgc'
+ #
+ # Note that this method does not intialize the contained sequence
+ # as any kind of bioruby object, only as a simple string
+ #
+ # puts s.seq.class #=> String
+ #
+ # See Bio::Sequence#na, Bio::Sequence#aa, and Bio::Sequence#auto
+ # for methods to transform the basic String of a just created
+ # Bio::Sequence object to a proper bioruby object
+ # ---
+ # *Arguments*:
+ # * (required) _str_: String or Bio::Sequence::NA/AA object
+ # *Returns*:: Bio::Sequence object
def initialize(str)
@seq = str
end
! # Pass any unknown method calls to the wrapped sequence object. see
! # http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing
! def method_missing(sym, *args, &block) #:nodoc:
! @seq.send(sym, *args, &block)
end
!
! # The sequence identifier. For example, for a sequence
! # of Genbank origin, this is the accession number.
! attr_accessor :entry_id
!
! # A String with a description of the sequence
! attr_accessor :definition
!
! # An Array of Bio::Feature objects
! attr_accessor :features
!
! # An Array of Bio::Reference objects
! attr_accessor :references
!
! # A comment String
! attr_accessor :comments
!
! # Date from sequence source. Often date of deposition.
! attr_accessor :date
!
! # An Array of Strings
! attr_accessor :keywords
!
! # An Array of Strings; links to other database entries.
! attr_accessor :dblinks
!
! # A taxonomy String
! attr_accessor :taxonomy
!
! # Bio::Sequence::NA/AA
! attr_accessor :moltype
!
! # The sequence object, usually Bio::Sequence::NA/AA,
! # but could be a simple String
! attr_accessor :seq
!
! # Using Bio::Sequence::Format, return a String with the Bio::Sequence
! # object formatted in the given style.
! #
! # Formats currently implemented are: 'fasta', 'genbank', and 'embl'
! #
! # s = Bio::Sequence.new('atgc')
! # puts s.output(:fasta) #=> "> \natgc\n"
! #
! # The style argument is given as a Ruby
! # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html)
! # ---
! # *Arguments*:
! # * (required) _style_: :fasta, :genbank, *or* :embl
! # *Returns*:: String object
def output(style)
extend Bio::Sequence::Format
***************
*** 48,51 ****
--- 162,175 ----
end
+ # Guess the type of sequence, Amino Acid or Nucleic Acid, and create a
+ # new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis
+ # of this guess. This method will change the current Bio::Sequence object.
+ #
+ # s = Bio::Sequence.new('atgc')
+ # puts s.seq.class #=> String
+ # s.auto
+ # puts s.seq.class #=> Bio::Sequence::NA
+ # ---
+ # *Returns*:: Bio::Sequence::NA/AA object
def auto
@moltype = guess
***************
*** 57,60 ****
--- 181,194 ----
end
+ # Given a sequence String, guess its type, Amino Acid or Nucleic Acid, and
+ # return a new Bio::Sequence object wrapping a sequence of the guessed type
+ # (either Bio::Sequence::AA or Bio::Sequence::NA)
+ #
+ # s = Bio::Sequence.auto('atgc')
+ # puts s.seq.class #=> Bio::Sequence::NA
+ # ---
+ # *Arguments*:
+ # * (required) _str_: String *or* Bio::Sequence::NA/AA object
+ # *Returns*:: Bio::Sequence object
def self.auto(str)
seq = self.new(str)
***************
*** 63,74 ****
end
def guess(threshold = 0.9, length = 10000, index = 0)
str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
cmp = str.composition
! bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] +
! cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
! total = @seq.length - cmp['N'] - cmp['n']
if bases.to_f / total > threshold
--- 197,247 ----
end
+ # Guess the class of the current sequence. Returns the class
+ # (Bio::Sequence::AA or Bio::Sequence::NA) guessed. In general, used by
+ # developers only, but if you know what you are doing, feel free.
+ #
+ # s = Bio::Sequence.new('atgc')
+ # puts s.guess #=> Bio::Sequence::NA
+ #
+ # There are three parameters: `threshold`, `length`, and `index`.
+ #
+ # The `threshold` value (defaults to 0.9) is the frequency of
+ # nucleic acid bases [AGCTUagctu] required in the sequence for this method
+ # to produce a Bio::Sequence::NA "guess". In the default case, if less
+ # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
+ # then the guess is Bio::Sequence::AA.
+ #
+ # s = Bio::Sequence.new('atgcatgcqq')
+ # puts s.guess #=> Bio::Sequence::AA
+ # puts s.guess(0.8) #=> Bio::Sequence::AA
+ # puts s.guess(0.7) #=> Bio::Sequence::NA
+ #
+ # The `length` value is how much of the total sequence to use in the
+ # guess (default 10000). If your sequence is very long, you may
+ # want to use a smaller amount to reduce the computational burden.
+ #
+ # s = Bio::Sequence.new(A VERY LONG SEQUENCE)
+ # puts s.guess(0.9, 1000) # limit the guess to the first 1000 positions
+ #
+ # The `index` value is where to start the guess. Perhaps you know there
+ # are a lot of gaps at the start...
+ #
+ # s = Bio::Sequence.new('-----atgcc')
+ # puts s.guess #=> Bio::Sequence::AA
+ # puts s.guess(0.9,10000,5) #=> Bio::Sequence::NA
+ # ---
+ # *Arguments*:
+ # * (optional) _threshold_: Float in range 0,1 (default 0.9)
+ # * (optional) _length_: Fixnum (default 10000)
+ # * (optional) _index_: Fixnum (default 1)
+ # *Returns*:: Bio::Sequence::NA/AA
def guess(threshold = 0.9, length = 10000, index = 0)
str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
cmp = str.composition
! bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] +
! cmp['a'] + cmp['t'] + cmp['g'] + cmp['c'] + cmp['u']
! total = str.length - cmp['N'] - cmp['n']
if bases.to_f / total > threshold
***************
*** 79,86 ****
--- 252,312 ----
end
+ # Guess the class of a given sequence. Returns the class
+ # (Bio::Sequence::AA or Bio::Sequence::NA) guessed. In general, used by
+ # developers only, but if you know what you are doing, feel free.
+ #
+ # puts .guess('atgc') #=> Bio::Sequence::NA
+ #
+ # There are three optional parameters: `threshold`, `length`, and `index`.
+ #
+ # The `threshold` value (defaults to 0.9) is the frequency of
+ # nucleic acid bases [AGCTUagctu] required in the sequence for this method
+ # to produce a Bio::Sequence::NA "guess". In the default case, if less
+ # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
+ # then the guess is Bio::Sequence::AA.
+ #
+ # puts Bio::Sequence.guess('atgcatgcqq') #=> Bio::Sequence::AA
+ # puts Bio::Sequence.guess('atgcatgcqq', 0.8) #=> Bio::Sequence::AA
+ # puts Bio::Sequence.guess('atgcatgcqq', 0.7) #=> Bio::Sequence::NA
+ #
+ # The `length` value is how much of the total sequence to use in the
+ # guess (default 10000). If your sequence is very long, you may
+ # want to use a smaller amount to reduce the computational burden.
+ #
+ # # limit the guess to the first 1000 positions
+ # puts Bio::Sequence.guess('A VERY LONG SEQUENCE', 0.9, 1000)
+ #
+ # The `index` value is where to start the guess. Perhaps you know there
+ # are a lot of gaps at the start...
+ #
+ # puts Bio::Sequence.guess('-----atgcc') #=> Bio::Sequence::AA
+ # puts Bio::Sequence.guess('-----atgcc',0.9,10000,5) #=> Bio::Sequence::NA
+ # ---
+ # *Arguments*:
+ # * (required) _str_: String *or* Bio::Sequence::NA/AA object
+ # * (optional) _threshold_: Float in range 0,1 (default 0.9)
+ # * (optional) _length_: Fixnum (default 10000)
+ # * (optional) _index_: Fixnum (default 1)
+ # *Returns*:: Bio::Sequence::NA/AA
def self.guess(str, *args)
self.new(str).guess(*args)
end
+ # Transform the sequence wrapped in the current Bio::Sequence object
+ # into a Bio::Sequence::NA object. This method will change the current
+ # object. This method does not validate your choice, so be careful!
+ #
+ # s = Bio::Sequence.new('RRLE')
+ # puts s.seq.class #=> String
+ # s.na
+ # puts s.seq.class #=> Bio::Sequence::NA !!!
+ #
+ # However, if you know your sequence type, this method may be
+ # constructively used after initialization,
+ #
+ # s = Bio::Sequence.new('atgc')
+ # s.na
+ # ---
+ # *Returns*:: Bio::Sequence::NA
def na
@seq = NA.new(@seq)
***************
*** 88,96 ****
end
def aa
@seq = AA.new(@seq)
@moltype = AA
end
!
end # Sequence
--- 314,338 ----
end
+ # Transform the sequence wrapped in the current Bio::Sequence object
+ # into a Bio::Sequence::NA object. This method will change the current
+ # object. This method does not validate your choice, so be careful!
+ #
+ # s = Bio::Sequence.new('atgc')
+ # puts s.seq.class #=> String
+ # s.aa
+ # puts s.seq.class #=> Bio::Sequence::AA !!!
+ #
+ # However, if you know your sequence type, this method may be
+ # constructively used after initialization,
+ #
+ # s = Bio::Sequence.new('RRLE')
+ # s.aa
+ # ---
+ # *Returns*:: Bio::Sequence::AA
def aa
@seq = AA.new(@seq)
@moltype = AA
end
!
end # Sequence
From k at dev.open-bio.org Sun Mar 26 02:32:58 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:32:58 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio reference.rb,1.21,1.22
Message-ID: <200603260232.k2Q2Ww61028892@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv28888
Modified Files:
reference.rb
Log Message:
* comprehensive documentation contributed by Ryan Raaum is added
Index: reference.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/reference.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** reference.rb 8 Feb 2006 15:06:26 -0000 1.21
--- reference.rb 26 Mar 2006 02:32:56 -0000 1.22
***************
*** 2,45 ****
# = bio/reference.rb - Journal reference classes
#
! # Copyright:: Copyright (C) 2001
! # KATAYAMA Toshiaki
! # Lisence:: LGPL
#
# $Id$
#
- # == Description
- #
- # Journal reference classes.
- #
- # == Examples
- #
- # == References
- #
- #
- #
- #--
- #
- # This library is free software; you can redistribute it and/or
- # modify it under the terms of the GNU Lesser General Public
- # License as published by the Free Software Foundation; either
- # version 2 of the License, or (at your option) any later version.
- #
- # This library is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Lesser General Public License for more details.
- #
- # You should have received a copy of the GNU Lesser General Public
- # License along with this library; if not, write to the Free Software
- # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- #
- #++
- #
module Bio
# A class for journal reference information.
#
! # === Examples
#
# hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
--- 2,20 ----
# = bio/reference.rb - Journal reference classes
#
! # Copyright:: Copyright (C) 2001, 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
! # Lisence:: Ruby's
#
# $Id$
#
module Bio
+ # = DESCRIPTION
+ #
# A class for journal reference information.
#
! # = USAGE
#
# hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
***************
*** 69,100 ****
attr_reader :authors
! # "Title of the study."
attr_reader :title
! # "Theor. J. Hoge"
attr_reader :journal
! # 12
attr_reader :volume
! # 3
attr_reader :issue
! # "123-145"
attr_reader :pages
! # 2001
attr_reader :year
! # 12345678
attr_reader :pubmed
! # 98765432
attr_reader :medline
! # Abstract test in String.
attr_reader :abstract
! # A URL String.
attr_reader :url
--- 44,75 ----
attr_reader :authors
! # String with title of the study
attr_reader :title
! # String with journal name
attr_reader :journal
! # volume number (typically Fixnum)
attr_reader :volume
! # issue number (typically Fixnum)
attr_reader :issue
! # page range (typically String, e.g. "123-145")
attr_reader :pages
! # year of publication (typically Fixnum)
attr_reader :year
! # pubmed identifier (typically Fixnum)
attr_reader :pubmed
! # medline identifier (typically Fixnum)
attr_reader :medline
! # Abstract text in String.
attr_reader :abstract
! # An URL String.
attr_reader :url
***************
*** 105,109 ****
attr_reader :affiliations
! #
def initialize(hash)
hash.default = ''
--- 80,119 ----
attr_reader :affiliations
! # Create a new Bio::Reference object from a Hash of values.
! # Data is extracted from the values for keys:
! #
! # * authors - expected value: Array of Strings
! # * title - expected value: String
! # * journal - expected value: String
! # * volume - expected value: Fixnum or String
! # * issue - expected value: Fixnum or String
! # * pages - expected value: String
! # * year - expected value: Fixnum or String
! # * pubmed - expected value: Fixnum or String
! # * medline - expected value: Fixnum or String
! # * abstract - expected value: String
! # * url - expected value: String
! # * mesh - expected value: Array of Strings
! # * affiliations - expected value: Array of Strings
! #
! #
! # hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ],
! # 'title' => "Title of the study.",
! # 'journal' => "Theor. J. Hoge",
! # 'volume' => 12,
! # 'issue' => 3,
! # 'pages' => "123-145",
! # 'year' => 2001,
! # 'pubmed' => 12345678,
! # 'medline' => 98765432,
! # 'abstract' => "Hoge fuga. ...",
! # 'url' => "http://example.com",
! # 'mesh' => [],
! # 'affiliations' => []}
! # ref = Bio::Reference.new(hash)
! # ---
! # *Arguments*:
! # * (required) _hash_: Hash
! # *Returns*:: Bio::Reference object
def initialize(hash)
hash.default = ''
***************
*** 131,138 ****
# 0. nil - general
# 1. endnote - Endnote
! # 2. bibitem - Bibitem (option acceptable)
! # 3. bibtex - BiBTeX (option acceptable)
! # 4. rd - rd (option acceptable)
! # 5. nature - Nature (option acceptable)
# 6. science - Science
# 7. genome_biol - Genome Biology
--- 141,148 ----
# 0. nil - general
# 1. endnote - Endnote
! # 2. bibitem - Bibitem (option available)
! # 3. bibtex - BiBTeX (option available)
! # 4. rd - rd (option available)
! # 5. nature - Nature (option available)
# 6. science - Science
# 7. genome_biol - Genome Biology
***************
*** 142,145 ****
--- 152,172 ----
# 11. trends - Trends in *
# 12. cell - Cell Press
+ #
+ # See individual methods for details. Basic usage is:
+ #
+ # # ref is Bio::Reference object
+ # # using simplest possible call (for general style)
+ # puts ref.format
+ #
+ # # output in Nature style
+ # puts ref.format("nature") # alternatively, puts ref.nature
+ #
+ # # output in Nature short style (see Bio::Reference#nature)
+ # puts ref.format("nature",true) # alternatively, puts ref.nature(true)
+ # ---
+ # *Arguments*:
+ # * (optional) _style_: String with style identifier
+ # * (optional) _option_: Option for styles accepting one
+ # *Returns*:: String
def format(style = nil, option = nil)
case style
***************
*** 173,177 ****
end
! # Formats in the Endonote style.
def endnote
lines = []
--- 200,222 ----
end
! # Returns reference formatted in the Endnote style.
! #
! # # ref is a Bio::Reference object
! # puts ref.endnote
! #
! # %0 Journal Article
! # %A Hoge, J.P.
! # %A Fuga, F.B.
! # %D 2001
! # %T Title of the study.
! # %J Theor. J. Hoge
! # %V 12
! # %N 3
! # %P 123-145
! # %M 12345678
! # %U http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&dopt=Citation&list_uids=12345678
! # %X Hoge fuga. ...
! # ---
! # *Returns*:: String
def endnote
lines = []
***************
*** 201,205 ****
end
! # Formats in the bibitem.
def bibitem(item = nil)
item = "PMID:#{@pubmed}" unless item
--- 246,260 ----
end
! # Returns reference formatted in the bibitem style
! #
! # # ref is a Bio::Reference object
! # puts ref.bibitem
! #
! # \bibitem{PMID:12345678}
! # Hoge, J.P., Fuga, F.B.
! # Title of the study.,
! # {\em Theor. J. Hoge}, 12(3):123--145, 2001.
! # ---
! # *Returns*:: String
def bibitem(item = nil)
item = "PMID:#{@pubmed}" unless item
***************
*** 213,217 ****
end
! # Formats in the BiBTeX style.
def bibtex(section = nil)
section = "article" unless section
--- 268,303 ----
end
! # Returns reference formatted in the BiBTeX style.
! #
! # # ref is a Bio::Reference object
! # puts ref.bibtex
! #
! # @article{PMID:12345678,
! # author = {Hoge, J.P. and Fuga, F.B.},
! # title = {Title of the study.},
! # journal = {Theor. J. Hoge},
! # year = {2001},
! # volume = {12},
! # number = {3},
! # pages = {123--145},
! # }
! #
! # # using a different section (e.g. "book")
! # # (but not really configured for anything other than articles)
! # puts ref.bibtex("book")
! #
! # @book{PMID:12345678,
! # author = {Hoge, J.P. and Fuga, F.B.},
! # title = {Title of the study.},
! # journal = {Theor. J. Hoge},
! # year = {2001},
! # volume = {12},
! # number = {3},
! # pages = {123--145},
! # }
! # ---
! # *Arguments*:
! # * (optional) _section_: BiBTeX section as String
! # *Returns*:: String
def bibtex(section = nil)
section = "article" unless section
***************
*** 231,235 ****
end
! # Formats in a general style.
def general
authors = @authors.join(', ')
--- 317,328 ----
end
! # Returns reference formatted in a general/generic style.
! #
! # # ref is a Bio::Reference object
! # puts ref.general
! #
! # Hoge, J.P., Fuga, F.B. (2001). "Title of the study." Theor. J. Hoge 12:123-145.
! # ---
! # *Returns*:: String
def general
authors = @authors.join(', ')
***************
*** 237,241 ****
end
! # Formats in the RD style.
def rd(str = nil)
@abstract ||= str
--- 330,351 ----
end
! # Return reference formatted in the RD style.
! #
! # # ref is a Bio::Reference object
! # puts ref.rd
! #
! # == Title of the study.
! #
! # * Hoge, J.P. and Fuga, F.B.
! #
! # * Theor. J. Hoge 2001 12:123-145 [PMID:12345678]
! #
! # Hoge fuga. ...
! #
! # An optional string argument can be supplied, but does nothing.
! # ---
! # *Arguments*:
! # * (optional) str: String (default nil)
! # *Returns*:: String
def rd(str = nil)
@abstract ||= str
***************
*** 248,253 ****
end
! # Formats in the Nature Publish Group style.
! # * http://www.nature.com
def nature(short = false)
if short
--- 358,377 ----
end
! # Formats in the Nature Publishing Group
! # (http://www.nature.com) style.
! #
! # # ref is a Bio::Reference object
! # puts ref.nature
! #
! # Hoge, J.P. & Fuga, F.B. Title of the study. Theor. J. Hoge 12, 123-145 (2001).
! #
! # # optionally, output short version
! # puts ref.nature(true) # or puts ref.nature(short=true)
! #
! # Hoge, J.P. & Fuga, F.B. Theor. J. Hoge 12, 123-145 (2001).
! # ---
! # *Arguments*:
! # * (optional) _short_: Boolean (default false)
! # *Returns*:: String
def nature(short = false)
if short
***************
*** 266,271 ****
end
! # Formats in the Science style.
! # * http://www.siencemag.com/
def science
if @authors.size > 4
--- 390,402 ----
end
! # Returns reference formatted in the
! # Science[http://www.sciencemag.org] style.
! #
! # # ref is a Bio::Reference object
! # puts ref.science
! #
! # J.P. Hoge, F.B. Fuga, Theor. J. Hoge 12 123 (2001).
! # ---
! # *Returns*:: String
def science
if @authors.size > 4
***************
*** 278,283 ****
end
! # Formats in the Genome Biology style.
! # * http://genomebiology.com/
def genome_biol
authors = @authors.collect {|name| strip_dots(name)}.join(', ')
--- 409,421 ----
end
! # Returns reference formatted in the Genome Biology
! # (http://genomebiology.com) style.
! #
! # # ref is a Bio::Reference object
! # puts ref.genome_biol
! #
! # Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.
! # ---
! # *Returns*:: String
def genome_biol
authors = @authors.collect {|name| strip_dots(name)}.join(', ')
***************
*** 285,294 ****
"#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
end
! # Formats in the Current Biology style.
! # * http://www.current-biology.com/
! alias current genome_biol
! # Formats in the Genome Research style.
! # * http://genome.org/
def genome_res
authors = authors_join(' and ')
--- 423,450 ----
"#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
end
!
! # Returns reference formatted in the Current Biology
! # (http://current-biology.com) style. (Same as the Genome Biology style)
! #
! # # ref is a Bio::Reference object
! # puts ref.current
! #
! # Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.
! # ---
! # *Returns*:: String
! def current
! self.genome_biol
! end
! # Returns reference formatted in the Genome Research
! # (http://genome.org) style.
! #
! # # ref is a Bio::Reference object
! # puts ref.genome_res
! #
! # Hoge, J.P. and Fuga, F.B. 2001.
! # Title of the study. Theor. J. Hoge 12: 123-145.
! # ---
! # *Returns*:: String
def genome_res
authors = authors_join(' and ')
***************
*** 296,301 ****
end
! # Formats in the Nucleic Acids Reseach style.
! # * http://nar.oxfordjournals.org/
def nar
authors = authors_join(' and ')
--- 452,464 ----
end
! # Returns reference formatted in the Nucleic Acids Reseach
! # (http://nar.oxfordjournals.org) style.
! #
! # # ref is a Bio::Reference object
! # puts ref.nar
! #
! # Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge, 12, 123-145.
! # ---
! # *Returns*:: String
def nar
authors = authors_join(' and ')
***************
*** 303,308 ****
end
! # Formats in the CELL Press style.
! # http://www.cell.com/
def cell
authors = authors_join(' and ')
--- 466,478 ----
end
! # Returns reference formatted in the
! # CELL[http://www.cell.com] Press style.
! #
! # # ref is a Bio::Reference object
! # puts ref.cell
! #
! # Hoge, J.P. and Fuga, F.B. (2001). Title of the study. Theor. J. Hoge 12, 123-145.
! # ---
! # *Returns*:: String
def cell
authors = authors_join(' and ')
***************
*** 310,315 ****
end
! # Formats in the TRENDS Journals.
! # * http://www.trends.com/
def trends
if @authors.size > 2
--- 480,492 ----
end
! # Returns reference formatted in the
! # TRENDS[http://www.trends.com] style.
! #
! # # ref is a Bio::Reference object
! # puts ref.trends
! #
! # Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge 12, 123-145
! # ---
! # *Returns*:: String
def trends
if @authors.size > 2
***************
*** 352,358 ****
end
! # Set of Bio::Reference.
#
! # === Examples
#
# refs = Bio::References.new
--- 529,537 ----
end
! # = DESCRIPTION
#
! # A container class for Bio::Reference objects.
! #
! # = USAGE
#
# refs = Bio::References.new
***************
*** 364,371 ****
class References
! # Array of Bio::Reference.
attr_accessor :references
#
def initialize(ary = [])
@references = ary
--- 543,556 ----
class References
! # Array of Bio::Reference objects
attr_accessor :references
+ # Create a new Bio::References object
#
+ # refs = Bio::References.new
+ # ---
+ # *Arguments*:
+ # * (optional) __: Array of Bio::Reference objects
+ # *Returns*:: Bio::References object
def initialize(ary = [])
@references = ary
***************
*** 373,377 ****
! # Append a Bio::Reference object.
def append(reference)
@references.push(reference) if reference.is_a? Reference
--- 558,568 ----
! # Add a Bio::Reference object to the container.
! #
! # refs.append(reference)
! # ---
! # *Arguments*:
! # * (required) _reference_: Bio::Reference object
! # *Returns*:: current Bio::References object
def append(reference)
@references.push(reference) if reference.is_a? Reference
***************
*** 379,383 ****
end
! # Iterates each Bio::Reference object.
def each
@references.each do |reference|
--- 570,580 ----
end
! # Iterate through Bio::Reference objects.
! #
! # refs.each do |reference|
! # ...
! # end
! # ---
! # *Block*:: yields each Bio::Reference object
def each
@references.each do |reference|
From k at dev.open-bio.org Sun Mar 26 02:28:01 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:28:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/sequence aa.rb, 1.2, 1.3 common.rb,
1.2, 1.3 compat.rb, 1.2, 1.3 format.rb, 1.2, 1.3 generic.rb,
1.3, 1.4 na.rb, 1.2, 1.3
Message-ID: <200603260228.k2Q2S12v028863@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/sequence
In directory dev.open-bio.org:/tmp/cvs-serv28853/sequence
Modified Files:
aa.rb common.rb compat.rb format.rb generic.rb na.rb
Log Message:
* comprehensive documentations contributed by Ryan Raaum and Jan Aerts are added.
* bug fixes in sequence.rb contributed by Ryan Raaum
* Added 'U' and 'u' to the bases counted towards the nucleic acid total in Bio::Sequence#guess. (Without this, RNA sequences were "guessed" to be Amino Acid sequences).
* Changed the arguments for method_missing in Bio::Sequence from (*arg) to (sym, *args, &block). With this argument set, blocks will be properly passed through to the encapsulated object.
Index: compat.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/compat.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** compat.rb 6 Feb 2006 14:18:03 -0000 1.2
--- compat.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 3,7 ****
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama
# License:: Ruby's
#
--- 3,8 ----
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 18,21 ****
--- 19,33 ----
autoload :AA, 'bio/sequence/aa'
+ # Return sequence as
+ # String[http://corelib.rubyonrails.org/classes/String.html].
+ # The original sequence is unchanged.
+ #
+ # seq = Bio::Sequence.new('atgc')
+ # puts s.to_s #=> 'atgc'
+ # puts s.to_s.class #=> String
+ # puts s #=> 'atgc'
+ # puts s.class #=> Bio::Sequence
+ # ---
+ # *Returns*:: String object
def to_s
String.new(@seq)
***************
*** 26,32 ****
--- 38,51 ----
module Common
+ # *DEPRECIATED* Do not use! Use Bio::Sequence#output instead.
+ #
# Output the FASTA format string of the sequence. The 1st argument is
# used as the comment string. If the 2nd option is given, the output
# sequence will be folded.
+ # ---
+ # *Arguments*:
+ # * (optional) _header_: String object
+ # * (optional) _width_: Fixnum object (default nil)
+ # *Returns*:: String
def to_fasta(header = '', width = nil)
warn "Bio::Sequence#to_fasta is obsolete. Use Bio::Sequence#output(:fasta) instead" if $DEBUG
***************
*** 44,52 ****
class NA
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
end
! def pikachu
self.dna.tr("atgc", "pika") # joke, of course :-)
end
--- 63,89 ----
class NA
+ # Generate a new random sequence with the given frequency of bases.
+ # The sequence length is determined by their cumulative sum.
+ # (See also Bio::Sequence::Common#randomize which creates a new
+ # randomized sequence object using the base composition of an existing
+ # sequence instance).
+ #
+ # counts = {'a'=>1,'c'=>2,'g'=>3,'t'=>4}
+ # puts Bio::Sequence::NA.randomize(counts) #=> "ggcttgttac" (for example)
+ #
+ # You may also feed the output of randomize into a block
+ #
+ # actual_counts = {'a'=>0, 'c'=>0, 'g'=>0, 't'=>0}
+ # Bio::Sequence::NA.randomize(counts) {|x| actual_counts[x] += 1}
+ # actual_counts #=> {"a"=>1, "c"=>2, "g"=>3, "t"=>4}
+ # ---
+ # *Arguments*:
+ # * (optional) _hash_: Hash object
+ # *Returns*:: Bio::Sequence::NA object
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
end
! def pikachu #:nodoc:
self.dna.tr("atgc", "pika") # joke, of course :-)
end
***************
*** 57,60 ****
--- 94,115 ----
class AA
+ # Generate a new random sequence with the given frequency of bases.
+ # The sequence length is determined by their cumulative sum.
+ # (See also Bio::Sequence::Common#randomize which creates a new
+ # randomized sequence object using the base composition of an existing
+ # sequence instance).
+ #
+ # counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
+ # puts Bio::Sequence::AA.randomize(counts) #=> "AAEAELALRE" (for example)
+ #
+ # You may also feed the output of randomize into a block
+ #
+ # actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
+ # Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
+ # actual_counts #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
+ # ---
+ # *Arguments*:
+ # * (optional) _hash_: Hash object
+ # *Returns*:: Bio::Sequence::AA object
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/common.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** common.rb 6 Feb 2006 14:16:17 -0000 1.2
--- common.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 3,7 ****
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama
# License:: Ruby's
#
--- 3,8 ----
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 15,22 ****
class Sequence
! # This module provides common methods for biological sequence classes
! # which must inherit String.
module Common
def to_s
String.new(self)
--- 16,53 ----
class Sequence
! # = DESCRIPTION
! # Bio::Sequence::Common is a
! # Mixin[http://www.rubycentral.com/book/tut_modules.html]
! # implementing methods common to
! # Bio::Sequence::AA and Bio::Sequence::NA. All of these methods
! # are available to either Amino Acid or Nucleic Acid sequences, and
! # by encapsulation are also available to Bio::Sequence objects.
! #
! # = USAGE
! #
! # # Create a sequence
! # dna = Bio::Sequence.auto('atgcatgcatgc')
! #
! # # Splice out a subsequence using a Genbank-style location string
! # puts dna.splice('complement(1..4)')
! #
! # # What is the base composition?
! # puts dna.composition
! #
! # # Create a random sequence with the composition of a current sequence
! # puts dna.randomize
module Common
+ # Return sequence as
+ # String[http://corelib.rubyonrails.org/classes/String.html].
+ # The original sequence is unchanged.
+ #
+ # seq = Bio::Sequence::NA.new('atgc')
+ # puts s.to_s #=> 'atgc'
+ # puts s.to_s.class #=> String
+ # puts s #=> 'atgc'
+ # puts s.class #=> Bio::Sequence::NA
+ # ---
+ # *Returns*:: String object
def to_s
String.new(self)
***************
*** 24,34 ****
alias to_str to_s
! # Force self to re-initialize for clean up (remove white spaces,
! # case unification).
def seq
self.class.new(self)
end
! # Similar to the 'seq' method, but changes the self object destructively.
def normalize!
initialize(self)
--- 55,79 ----
alias to_str to_s
! # Create a new sequence based on the current sequence.
! # The original sequence is unchanged.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # s2 = s.seq
! # puts s2 #=> 'atgc'
! # ---
! # *Returns*:: new Bio::Sequence::NA/AA object
def seq
self.class.new(self)
end
! # Normalize the current sequence, removing all whitespace and
! # transforming all positions to uppercase if the sequence is AA or
! # transforming all positions to lowercase if the sequence is NA.
! # The original sequence is modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # s.normalize!
! # ---
! # *Returns*:: current Bio::Sequence::NA/AA object (modified)
def normalize!
initialize(self)
***************
*** 37,40 ****
--- 82,95 ----
alias seq! normalize!
+ # Add new data to the end of the current sequence.
+ # The original sequence is modified.
+ #
+ # s = Bio::Sequence::NA.new('atgc')
+ # s << 'atgc'
+ # puts s #=> "atgcatgc"
+ # s << s
+ # puts s #=> "atgcatgcatgcatgc"
+ # ---
+ # *Returns*:: current Bio::Sequence::NA/AA object (modified)
def <<(*arg)
super(self.class.new(*arg))
***************
*** 42,50 ****
alias concat <<
def +(*arg)
self.class.new(super(*arg))
end
! # Returns the subsequence of the self string.
def subseq(s = 1, e = self.length)
raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
--- 97,141 ----
alias concat <<
+ # Create a new sequence by adding to an existing sequence.
+ # The existing sequence is not modified.
+ #
+ # s = Bio::Sequence::NA.new('atgc')
+ # s2 = s + 'atgc'
+ # puts s2 #=> "atgcatgc"
+ # puts s #=> "atgc"
+ #
+ # The new sequence is of the same class as the existing sequence if
+ # the new data was added to an existing sequence,
+ #
+ # puts s2.class == s.class #=> true
+ #
+ # but if an existing sequence is added to a String, the result is a String
+ #
+ # s3 = 'atgc' + s
+ # puts s3.class #=> String
+ # ---
+ # *Returns*:: new Bio::Sequence::NA/AA *or* String object
def +(*arg)
self.class.new(super(*arg))
end
! # Returns a new sequence containing the subsequence identified by the
! # start and end numbers given as parameters. *Important:* Biological
! # sequence numbering conventions (one-based) rather than ruby's
! # (zero-based) numbering conventions are used.
! #
! # s = Bio::Sequence::NA.new('atggaatga')
! # puts s.subseq(1,3) #=> "atg"
! #
! # Start defaults to 1 and end defaults to the entire existing string, so
! # subseq called without any parameters simply returns a new sequence
! # identical to the existing sequence.
! #
! # puts s.subseq #=> "atggaatga"
! # ---
! # *Arguments*:
! # * (optional) _s_(start): Integer (default 1)
! # * (optional) _e_(end): Integer (default current sequence length)
! # *Returns*:: new Bio::Sequence::NA/AA object
def subseq(s = 1, e = self.length)
raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
***************
*** 54,80 ****
end
! # This method iterates on sub string with specified length 'window_size'.
! # By specifing 'step_size', codon sized shifting or spliting genome
! # sequence with ovelapping each end can easily be yielded.
#
! # The remainder sequence at the terminal end will be returned.
#
! # Example:
! # # prints average GC% on each 100bp
! # seq.window_search(100) do |subseq|
# puts subseq.gc
# end
! # # prints every translated peptide (length 5aa) in the same frame
! # seq.window_search(15, 3) do |subseq|
# puts subseq.translate
# end
! # # split genome sequence by 10000bp with 1000bp overlap in fasta format
# i = 1
! # remainder = seq.window_search(10000, 9000) do |subseq|
# puts subseq.to_fasta("segment #{i}", 60)
# i += 1
# end
# puts remainder.to_fasta("segment #{i}", 60)
! #
def window_search(window_size, step_size = 1)
i = 0
--- 145,177 ----
end
! # This method steps through a sequences in steps of 'step_size' by
! # subsequences of 'window_size'. Typically used with a block.
! # Any remaining sequence at the terminal end will be returned.
#
! # Prints average GC% on each 100bp
#
! # s.window_search(100) do |subseq|
# puts subseq.gc
# end
! #
! # Prints every translated peptide (length 5aa) in the same frame
! #
! # s.window_search(15, 3) do |subseq|
# puts subseq.translate
# end
! #
! # Split genome sequence by 10000bp with 1000bp overlap in fasta format
! #
# i = 1
! # remainder = s.window_search(10000, 9000) do |subseq|
# puts subseq.to_fasta("segment #{i}", 60)
# i += 1
# end
# puts remainder.to_fasta("segment #{i}", 60)
! # ---
! # *Arguments*:
! # * (required) _window_size_: Fixnum
! # * (optional) _step_size_: Fixnum (default 1)
! # *Returns*:: new Bio::Sequence::NA/AA object
def window_search(window_size, step_size = 1)
i = 0
***************
*** 85,91 ****
end
! # This method receive a hash of residues/bases to the particular values,
! # and sum up the value along with the self sequence. Especially useful
! # to use with the window_search method and amino acid indices etc.
def total(hash)
hash.default = 0.0 unless hash.default
--- 182,195 ----
end
! # Returns a float total value for the sequence given a hash of
! # base or residue values,
! #
! # values = {'a' => 0.1, 't' => 0.2, 'g' => 0.3, 'c' => 0.4}
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.total(values) #=> 1.0
! # ---
! # *Arguments*:
! # * (required) _hash_: Hash object
! # *Returns*:: Float object
def total(hash)
hash.default = 0.0 unless hash.default
***************
*** 100,103 ****
--- 204,212 ----
# Returns a hash of the occurrence counts for each residue or base.
+ #
+ # s = Bio::Sequence::NA.new('atgc')
+ # puts s.composition #=> {"a"=>1, "c"=>1, "g"=>1, "t"=>1}
+ # ---
+ # *Returns*:: Hash object
def composition
count = Hash.new(0)
***************
*** 108,118 ****
end
! # Returns a randomized sequence keeping its composition by default.
! # The argument is required when generating a random sequence from the empty
! # sequence (used by the class methods NA.randomize, AA.randomize).
! # If the block is given, yields for each random residue/base.
def randomize(hash = nil)
length = self.length
if hash
count = hash.clone
count.each_value {|x| length += x}
--- 217,244 ----
end
! # Returns a randomized sequence. The default is to retain the same
! # base/residue composition as the original. If a hash of base/residue
! # counts is given, the new sequence will be based on that hash
! # composition. If a block is given, each new randomly selected
! # position will be passed into the block. In all cases, the
! # original sequence is not modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.randomize #=> "tcag" (for example)
! #
! # new_composition = {'a' => 2, 't' => 2}
! # puts s.randomize(new_composition) #=> "ttaa" (for example)
! #
! # count = 0
! # s.randomize { |x| count += 1 }
! # puts count #=> 4
! # ---
! # *Arguments*:
! # * (optional) _hash_: Hash object
! # *Returns*:: new Bio::Sequence::NA/AA object
def randomize(hash = nil)
length = self.length
if hash
+ length = 0
count = hash.clone
count.each_value {|x| length += x}
***************
*** 139,151 ****
end
! # Generate a new random sequence with the given frequency of bases
! # or residues. The sequence length is determined by the sum of each
! # base/residue occurences.
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
end
! # Receive a GenBank style position string and convert it to the Locations
! # objects to splice the sequence itself. See also: bio/location.rb
def splice(position)
unless position.is_a?(Locations) then
--- 265,305 ----
end
! # Generate a new random sequence with the given frequency of bases.
! # The sequence length is determined by their cumulative sum.
! # (See also Bio::Sequence::Common#randomize which creates a new
! # randomized sequence object using the base composition of an existing
! # sequence instance).
! #
! # counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
! # puts Bio::Sequence::AA.randomize(counts) #=> "AAEAELALRE" (for example)
! #
! # You may also feed the output of randomize into a block
! #
! # actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
! # Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
! # actual_counts #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
! # ---
! # *Arguments*:
! # * (optional) _hash_: Hash object
! # *Returns*:: Bio::Sequence::NA/AA object
def self.randomize(*arg, &block)
self.new('').randomize(*arg, &block)
end
! # Return a new sequence extracted from the original using a GenBank style
! # position string. See also documentation for the Bio::Location class.
! #
! # s = Bio::Sequence::NA.new('atgcatgcatgcatgc')
! # puts s.splice('1..3') #=> "atg"
! # puts s.splice('join(1..3,8..10)') #=> "atgcat"
! # puts s.splice('complement(1..3)') #=> "cat"
! # puts s.splice('complement(join(1..3,8..10))') #=> "atgcat"
! #
! # Note that 'complement'ed Genbank position strings will have no
! # effect on Bio::Sequence::AA objects.
! # ---
! # *Arguments*:
! # * (required) _position_: String *or* Bio::Location object
! # *Returns*:: Bio::Sequence::NA/AA object
def splice(position)
unless position.is_a?(Locations) then
Index: format.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/format.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** format.rb 6 Feb 2006 14:20:35 -0000 1.2
--- format.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 4,8 ****
# Copyright:: Copyright (C) 2006
# Toshiaki Katayama ,
! # Naohisa Goto
# License:: Ruby's
#
--- 4,9 ----
# Copyright:: Copyright (C) 2006
# Toshiaki Katayama ,
! # Naohisa Goto ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 21,29 ****
class Sequence
module Format
! # Output the FASTA format string of the sequence. The 1st argument is
! # used in the comment line. If the 2nd argument (integer) is given,
! # the output sequence will be folded.
def format_fasta(header = nil, width = nil)
header ||= "#{@entry_id} #{@definition}"
--- 22,56 ----
class Sequence
+ # = DESCRIPTION
+ # A Mixin[http://www.rubycentral.com/book/tut_modules.html]
+ # of methods used by Bio::Sequence#output to output sequences in
+ # common bioinformatic formats. These are not called in isolation.
+ #
+ # = USAGE
+ # # Given a Bio::Sequence object,
+ # puts s.output(:fasta)
+ # puts s.output(:genbank)
+ # puts s.output(:embl)
module Format
! # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
! # case, it would be difficult to successfully call this method outside
! # its expected context).
! #
! # Output the FASTA format string of the sequence.
! #
! # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of
! # using either the header or width arguments. So something needs to be
! # changed...
! #
! # Currently, this method is used in Bio::Sequence#output like so,
! #
! # s = Bio::Sequence.new('atgc')
! # puts s.output(:fasta) #=> "> \natgc\n"
! # ---
! # *Arguments*:
! # * (optional) _header_: String (default nil)
! # * (optional) _width_: Fixnum (default nil)
! # *Returns*:: String object
def format_fasta(header = nil, width = nil)
header ||= "#{@entry_id} #{@definition}"
***************
*** 37,44 ****
end
! def format_gff
raise NotImplementedError
end
def format_genbank
prefix = ' ' * 5
--- 64,83 ----
end
! # Not yet implemented :)
! # Remove the nodoc command after implementation!
! # ---
! # *Returns*:: String object
! def format_gff #:nodoc:
raise NotImplementedError
end
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+ # case, it would be difficult to successfully call this method outside
+ # its expected context).
+ #
+ # Output the Genbank format string of the sequence.
+ # Used in Bio::Sequence#output.
+ # ---
+ # *Returns*:: String object
def format_genbank
prefix = ' ' * 5
***************
*** 49,52 ****
--- 88,99 ----
end
+ # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+ # case, it would be difficult to successfully call this method outside
+ # its expected context).
+ #
+ # Output the EMBL format string of the sequence.
+ # Used in Bio::Sequence#output.
+ # ---
+ # *Returns*:: String object
def format_embl
prefix = 'FT '
Index: aa.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/aa.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** aa.rb 6 Feb 2006 14:11:31 -0000 1.2
--- aa.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 3,7 ****
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama
# License:: Ruby's
#
--- 3,8 ----
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 17,27 ****
class Sequence
!
! # Amino Acid sequence
class AA < String
include Bio::Sequence::Common
! # Generate a amino acid sequence object from a string.
def initialize(str)
super
--- 18,61 ----
class Sequence
! # = DESCRIPTION
! # Bio::Sequence::AA represents a bare Amino Acid sequence in bioruby.
! #
! # = USAGE
! # # Create an Amino Acid sequence.
! # aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
! #
! # # What are the three-letter codes for all the residues?
! # puts aa.codes
! #
! # # What are the names of all the residues?
! # puts aa.names
! #
! # # What is the molecular weight of this peptide?
! # puts aa.molecular_weight
class AA < String
include Bio::Sequence::Common
! # Generate an amino acid sequence object from a string.
! #
! # s = Bio::Sequence::AA.new("RRLEHTFVFLRNFSLMLLRY")
! #
! # or maybe (if you have an amino acid sequence in a file)
! #
! # s = Bio::Sequence:AA.new(File.open('aa.txt').read)
! #
! # Amino Acid sequences are *always* all uppercase in bioruby
! #
! # s = Bio::Sequence::AA.new("rrLeHtfV")
! # puts s #=> "RRLEHTFVF"
! #
! # Whitespace is stripped from the sequence
! #
! # s = Bio::Sequence::AA.new("RRL\nELA\tRG\r RL")
! # puts s #=> "RRLELARGRL"
! # ---
! # *Arguments*:
! # * (required) _str_: String
! # *Returns*:: Bio::Sequence::AA object
def initialize(str)
super
***************
*** 31,45 ****
! # Estimate the weight of this protein.
def molecular_weight
Bio::AminoAcid.weight(self)
end
def to_re
Bio::AminoAcid.to_re(self)
end
! # Generate the list of the names of the each residue along with the
! # sequence (3 letters code).
def codes
array = []
--- 65,98 ----
! # Estimate molecular weight based on
! # Fasman1976[http://www.genome.ad.jp/dbget-bin/www_bget?aaindex+FASG760101]
! #
! # s = Bio::Sequence::AA.new("RRLE")
! # puts s.molecular_weight #=> 572.655
! # ---
! # *Returns*:: Float object
def molecular_weight
Bio::AminoAcid.weight(self)
end
+ # Create a ruby regular expression instance
+ # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]
+ #
+ # s = Bio::Sequence::AA.new("RRLE")
+ # puts s.to_re #=> /RRLE/
+ # ---
+ # *Returns*:: Regexp object
def to_re
Bio::AminoAcid.to_re(self)
end
! # Generate the list of the names of each residue along with the
! # sequence (3 letters code). Codes used in bioruby are found in the
! # Bio::AminoAcid::NAMES hash.
! #
! # s = Bio::Sequence::AA.new("RRLE")
! # puts s.codes #=> ["Arg", "Arg", "Leu", "Glu"]
! # ---
! # *Returns*:: Array object
def codes
array = []
***************
*** 50,54 ****
end
! # Similar to codes but returns long names.
def names
self.codes.map do |x|
--- 103,115 ----
end
! # Generate the list of the names of each residue along with the
! # sequence (full name). Names used in bioruby are found in the
! # Bio::AminoAcid::NAMES hash.
! #
! # s = Bio::Sequence::AA.new("RRLE")
! # puts s.names
! # #=> ["arginine", "arginine", "leucine", "glutamic acid"]
! # ---
! # *Returns*:: Array object
def names
self.codes.map do |x|
Index: generic.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/generic.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** generic.rb 6 Feb 2006 14:26:04 -0000 1.3
--- generic.rb 26 Mar 2006 02:27:59 -0000 1.4
***************
*** 14,18 ****
class Sequence
! class Generic < String
include Bio::Sequence::Common
--- 14,18 ----
class Sequence
! class Generic < String #:nodoc:
include Bio::Sequence::Common
Index: na.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/na.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** na.rb 6 Feb 2006 14:13:52 -0000 1.2
--- na.rb 26 Mar 2006 02:27:59 -0000 1.3
***************
*** 3,7 ****
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama
# License:: Ruby's
#
--- 3,8 ----
#
# Copyright:: Copyright (C) 2006
! # Toshiaki Katayama ,
! # Ryan Raaum
# License:: Ruby's
#
***************
*** 19,28 ****
! # Nucleic Acid sequence
class NA < String
include Bio::Sequence::Common
! # Generate a nucleic acid sequence object from a string.
def initialize(str)
super
--- 20,78 ----
! # = DESCRIPTION
! # Bio::Sequence::NA represents a bare Nucleic Acid sequence in bioruby.
! #
! # = USAGE
! # # Create a Nucleic Acid sequence.
! # dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
! # rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
! #
! # # What are the names of all the bases?
! # puts dna.names
! # puts rna.names
! #
! # # What is the GC percentage?
! # puts dna.gc_percent
! # puts rna.gc_percent
! #
! # # What is the molecular weight?
! # puts dna.molecular_weight
! # puts rna.molecular_weight
! #
! # # What is the reverse complement?
! # puts dna.reverse_complement
! # puts dna.complement
! #
! # # Is this sequence DNA or RNA?
! # puts dna.rna?
! #
! # # Translate my sequence (see method docs for many options)
! # puts dna.translate
! # puts rna.translate
class NA < String
include Bio::Sequence::Common
! # Generate an nucleic acid sequence object from a string.
! #
! # s = Bio::Sequence::NA.new("aagcttggaccgttgaagt")
! #
! # or maybe (if you have an nucleic acid sequence in a file)
! #
! # s = Bio::Sequence:NA.new(File.open('dna.txt').read)
! #
! # Nucleic Acid sequences are *always* all lowercase in bioruby
! #
! # s = Bio::Sequence::NA.new("AAGcTtGG")
! # puts s #=> "aagcttgg"
! #
! # Whitespace is stripped from the sequence
! #
! # seq = Bio::Sequence::NA.new("atg\nggg\ttt\r gc")
! # puts s #=> "atggggttgc"
! # ---
! # *Arguments*:
! # * (required) _str_: String
! # *Returns*:: Bio::Sequence::NA object
def initialize(str)
super
***************
*** 31,36 ****
end
! # This method depends on Locations class, see bio/location.rb
! def splicing(position)
mRNA = super
if mRNA.rna?
--- 81,86 ----
end
! # Alias of Bio::Sequence::Common splice method, documented there.
! def splicing(position) #:nodoc:
mRNA = super
if mRNA.rna?
***************
*** 42,46 ****
end
! # Returns complement sequence without reversing ("atgc" -> "tacg")
def forward_complement
s = self.class.new(self)
--- 92,103 ----
end
! # Returns a new complementary sequence object (without reversing).
! # The original sequence object is not modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.forward_complement #=> 'tacg'
! # puts s #=> 'atgc'
! # ---
! # *Returns*:: new Bio::Sequence::NA object
def forward_complement
s = self.class.new(self)
***************
*** 49,53 ****
end
! # Convert to complement sequence without reversing ("atgc" -> "tacg")
def forward_complement!
if self.rna?
--- 106,117 ----
end
! # Converts the current sequence into its complement (without reversing).
! # The original sequence object is modified.
! #
! # seq = Bio::Sequence::NA.new('atgc')
! # puts s.forward_complement! #=> 'tacg'
! # puts s #=> 'tacg'
! # ---
! # *Returns*:: current Bio::Sequence::NA object (modified)
def forward_complement!
if self.rna?
***************
*** 59,63 ****
end
! # Returns reverse complement sequence ("atgc" -> "gcat")
def reverse_complement
s = self.class.new(self)
--- 123,134 ----
end
! # Returns a new sequence object with the reverse complement
! # sequence to the original. The original sequence is not modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.reverse_complement #=> 'gcat'
! # puts s #=> 'atgc'
! # ---
! # *Returns*:: new Bio::Sequence::NA object
def reverse_complement
s = self.class.new(self)
***************
*** 66,70 ****
end
! # Convert to reverse complement sequence ("atgc" -> "gcat")
def reverse_complement!
self.reverse!
--- 137,148 ----
end
! # Converts the original sequence into its reverse complement.
! # The original sequence is modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.reverse_complement #=> 'gcat'
! # puts s #=> 'gcat'
! # ---
! # *Returns*:: current Bio::Sequence::NA object (modified)
def reverse_complement!
self.reverse!
***************
*** 72,87 ****
end
! # Aliases for short
alias complement reverse_complement
alias complement! reverse_complement!
! # Translate into the amino acid sequence from the given frame and the
! # selected codon table. The table also can be a Bio::CodonTable object.
! # The 'unknown' character is used for invalid/unknown codon (can be
! # used for 'nnn' and/or gap translation in practice).
#
! # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
! # (4, 5 or 6 is also accepted) for the reverse strand.
def translate(frame = 1, table = 1, unknown = 'X')
if table.is_a?(Bio::CodonTable)
--- 150,235 ----
end
! # Alias for Bio::Sequence::NA#reverse_complement
alias complement reverse_complement
+
+ # Alias for Bio::Sequence::NA#reverse_complement!
alias complement! reverse_complement!
! # Translate into an amino acid sequence.
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.translate #=> "MA*"
#
! # By default, translate starts in reading frame position 1, but you
! # can start in either 2 or 3 as well,
! #
! # puts s.translate(2) #=> "WR"
! # puts s.translate(3) #=> "GV"
! #
! # You may also translate the reverse complement in one step by using frame
! # values of -1, -2, and -3 (or 4, 5, and 6)
! #
! # puts s.translate(-1) #=> "SRH"
! # puts s.translate(4) #=> "SRH"
! # puts s.reverse_complement.translate(1) #=> "SRH"
! #
! # The default codon table in the translate function is the Standard
! # Eukaryotic codon table. The translate function takes either a
! # number or a Bio::CodonTable object for its table argument.
! # The available tables are
! # (NCBI[http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t]):
! #
! # 1. "Standard (Eukaryote)"
! # 2. "Vertebrate Mitochondrial"
! # 3. "Yeast Mitochondorial"
! # 4. "Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma"
! # 5. "Invertebrate Mitochondrial"
! # 6. "Ciliate Macronuclear and Dasycladacean"
! # 9. "Echinoderm Mitochondrial"
! # 10. "Euplotid Nuclear"
! # 11. "Bacteria"
! # 12. "Alternative Yeast Nuclear"
! # 13. "Ascidian Mitochondrial"
! # 14. "Flatworm Mitochondrial"
! # 15. "Blepharisma Macronuclear"
! # 16. "Chlorophycean Mitochondrial"
! # 21. "Trematode Mitochondrial"
! # 22. "Scenedesmus obliquus mitochondrial"
! # 23. "Thraustochytrium Mitochondrial"
! #
! # If you are using anything other than the default table, you must specify
! # frame in the translate method call,
! #
! # puts s.translate #=> "MA*" (using defaults)
! # puts s.translate(1,1) #=> "MA*" (same as above, but explicit)
! # puts s.translate(1,2) #=> "MAW" (different codon table)
! #
! # and using a Bio::CodonTable instance in the translate method call,
! #
! # mt_table = Bio::CodonTable[2]
! # puts s.translate(1, mt_table) #=> "MAW"
! #
! # By default, any invalid or unknown codons (as could happen if the
! # sequence contains ambiguities) will be represented by 'X' in the
! # translated sequence.
! # You may change this to any character of your choice.
! #
! # s = Bio::Sequence::NA.new('atgcNNtga')
! # puts s.translate #=> "MX*"
! # puts s.translate(1,1,'9') #=> "M9*"
! #
! # The translate method considers gaps to be unknown characters and treats
! # them as such (i.e. does not collapse sequences prior to translation), so
! #
! # s = Bio::Sequence::NA.new('atgc--tga')
! # puts s.translate #=> "MX*"
! # ---
! # *Arguments*:
! # * (optional) _frame_: one of 1,2,3,4,5,6,-1,-2,-3 (default 1)
! # * (optional) _table_: Fixnum in range 1,23 or Bio::CodonTable object
! # (default 1)
! # * (optional) _unknown_: Character (default 'X')
! # *Returns*:: Bio::Sequence::AA object
def translate(frame = 1, table = 1, unknown = 'X')
if table.is_a?(Bio::CodonTable)
***************
*** 109,113 ****
end
! # Returns counts of the each codon in the sequence by Hash.
def codon_usage
hash = Hash.new(0)
--- 257,276 ----
end
! # Returns counts of each codon in the sequence in a hash.
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.codon_usage #=> {"gcg"=>1, "tga"=>1, "atg"=>1}
! #
! # This method does not validate codons! Any three letter group is a 'codon'. So,
! #
! # s = Bio::Sequence::NA.new('atggNNtga')
! # puts s.codon_usage #=> {"tga"=>1, "gnn"=>1, "atg"=>1}
! #
! # seq = Bio::Sequence::NA.new('atgg--tga')
! # puts s.codon_usage #=> {"tga"=>1, "g--"=>1, "atg"=>1}
! #
! # Also, there is no option to work in any frame other than the first.
! # ---
! # *Returns*:: Hash object
def codon_usage
hash = Hash.new(0)
***************
*** 118,122 ****
end
! # Calculate the ratio of GC / ATGC bases in percent.
def gc_percent
count = self.composition
--- 281,291 ----
end
! # Calculate the ratio of GC / ATGC bases as a percentage rounded to
! # the nearest whole number.
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.gc_percent #=> 55
! # ---
! # *Returns*:: Fixnum
def gc_percent
count = self.composition
***************
*** 127,136 ****
end
! # Show abnormal bases other than 'atgcu'.
def illegal_bases
self.scan(/[^atgcu]/).sort.uniq
end
! # Estimate the weight of this biological string molecule.
def molecular_weight
if self.rna?
--- 296,322 ----
end
! # Returns an alphabetically sorted array of any non-standard bases
! # (other than 'atgcu').
! #
! # s = Bio::Sequence::NA.new('atgStgQccR')
! # puts s.illegal_bases #=> ["q", "r", "s"]
! # ---
! # *Returns*:: Array object
def illegal_bases
self.scan(/[^atgcu]/).sort.uniq
end
! # Estimate molecular weight (using the values from BioPerl's
! # SeqStats.pm[http://doc.bioperl.org/releases/bioperl-1.0.1/Bio/Tools/SeqStats.html] module).
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.molecular_weight #=> 2841.00708
! #
! # RNA and DNA do not have the same molecular weights,
! #
! # s = Bio::Sequence::NA.new('auggcguga')
! # puts s.molecular_weight #=> 2956.94708
! # ---
! # *Returns*:: Float object
def molecular_weight
if self.rna?
***************
*** 141,145 ****
end
! # Convert the universal code string into the regular expression.
def to_re
if self.rna?
--- 327,337 ----
end
! # Create a ruby regular expression instance
! # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]
! #
! # s = Bio::Sequence::NA.new('atggcgtga')
! # puts s.to_re #=> /atggcgtga/
! # ---
! # *Returns*:: Regexp object
def to_re
if self.rna?
***************
*** 150,154 ****
end
! # Convert the self string into the list of the names of the each base.
def names
array = []
--- 342,353 ----
end
! # Generate the list of the names of each nucleotide along with the
! # sequence (full name). Names used in bioruby are found in the
! # Bio::AminoAcid::NAMES hash.
! #
! # s = Bio::Sequence::NA.new('atg')
! # puts s.names #=> ["Adenine", "Thymine", "Guanine"]
! # ---
! # *Returns*:: Array object
def names
array = []
***************
*** 159,176 ****
end
! # Output a DNA string by substituting 'u' to 't'.
def dna
self.tr('u', 't')
end
def dna!
self.tr!('u', 't')
end
! # Output a RNA string by substituting 't' to 'u'.
def rna
self.tr('t', 'u')
end
def rna!
self.tr!('t', 'u')
--- 358,405 ----
end
! # Returns a new sequence object with any 'u' bases changed to 't'.
! # The original sequence is not modified.
! #
! # s = Bio::Sequence::NA.new('augc')
! # puts s.dna #=> 'atgc'
! # puts s #=> 'augc'
! # ---
! # *Returns*:: new Bio::Sequence::NA object
def dna
self.tr('u', 't')
end
+ # Changes any 'u' bases in the original sequence to 't'.
+ # The original sequence is modified.
+ #
+ # s = Bio::Sequence::NA.new('augc')
+ # puts s.dna! #=> 'atgc'
+ # puts s #=> 'atgc'
+ # ---
+ # *Returns*:: current Bio::Sequence::NA object (modified)
def dna!
self.tr!('u', 't')
end
! # Returns a new sequence object with any 't' bases changed to 'u'.
! # The original sequence is not modified.
! #
! # s = Bio::Sequence::NA.new('atgc')
! # puts s.dna #=> 'augc'
! # puts s #=> 'atgc'
! # ---
! # *Returns*:: new Bio::Sequence::NA object
def rna
self.tr('t', 'u')
end
+ # Changes any 't' bases in the original sequence to 'u'.
+ # The original sequence is modified.
+ #
+ # s = Bio::Sequence::NA.new('atgc')
+ # puts s.dna! #=> 'augc'
+ # puts s #=> 'augc'
+ # ---
+ # *Returns*:: current Bio::Sequence::NA object (modified)
def rna!
self.tr!('t', 'u')
From aerts at dev.open-bio.org Mon Mar 27 18:34:37 2006
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Mon, 27 Mar 2006 18:34:37 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fetch.rb,1.6,1.7
Message-ID: <200603271834.k2RIYb5l020081@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv20061/io
Modified Files:
fetch.rb
Log Message:
Updated docs.
Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** fetch.rb 20 Mar 2006 12:40:13 -0000 1.6
--- fetch.rb 27 Mar 2006 18:34:35 -0000 1.7
***************
*** 31,36 ****
# = DESCRIPTION
# The Bio::Fetch class provides an interface to dbfetch servers. Given
! # a database name and an accession number, these servers return the nucleic
! # or amino acid sequence for that accession number in that database.
#
# Possible dbfetch servers include:
--- 31,37 ----
# = DESCRIPTION
# The Bio::Fetch class provides an interface to dbfetch servers. Given
! # a database name and an accession number, these servers return the associated
! # record. For example, for the embl database on the EBI, that would be a
! # nucleic or amino acid sequence.
#
# Possible dbfetch servers include:
***************
*** 82,86 ****
# Get raw database entry by id. This method lets the Bio::Registry class
! # use Bio::Fetch objects and should probably not be used directly.
def get_by_id(id)
fetch(@database, id)
--- 83,87 ----
# Get raw database entry by id. This method lets the Bio::Registry class
! # use Bio::Fetch objects.
def get_by_id(id)
fetch(@database, id)
From aerts at dev.open-bio.org Tue Mar 28 13:42:34 2006
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Tue, 28 Mar 2006 13:42:34 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/db gff.rb,1.5,1.6
Message-ID: <200603281342.k2SDgYvl024727@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/db
In directory dev.open-bio.org:/tmp/cvs-serv24707
Modified Files:
gff.rb
Log Message:
Added documentation.
Index: gff.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/gff.rb,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** gff.rb 18 Dec 2005 15:58:41 -0000 1.5
--- gff.rb 28 Mar 2006 13:42:32 -0000 1.6
***************
*** 4,21 ****
# Copyright:: Copyright (C) 2003, 2005
# Toshiaki Katayama
# License:: LGPL
#
# $Id$
#
- # == Description
- #
- #
- # == Example
- #
- #
- # == References
- #
- # * http://www.sanger.ac.uk/Software/formats/GFF/
- #
#--
#
--- 4,12 ----
# Copyright:: Copyright (C) 2003, 2005
# Toshiaki Katayama
+ # 2006 Jan Aerts
# License:: LGPL
#
# $Id$
#
#--
#
***************
*** 38,46 ****
module Bio
!
class GFF
!
! attr_accessor :records
!
def initialize(str = '')
@records = Array.new
--- 29,78 ----
module Bio
! # == DESCRIPTION
! # The Bio::GFF and Bio::GFF::Record classes describe data contained in a
! # GFF-formatted file. For information on the GFF format, see
! # http://www.sanger.ac.uk/Software/formats/GFF/. Data are represented in tab-
! # delimited format, including
! # * seqname
! # * source
! # * feature
! # * start
! # * end
! # * score
! # * strand
! # * frame
! # * attributes (optional)
! #
! # For example:
! # SEQ1 EMBL atg 103 105 . + 0
! # SEQ1 EMBL exon 103 172 . + 0
! # SEQ1 EMBL splice5 172 173 . + .
! # SEQ1 netgene splice5 172 173 0.94 + .
! # SEQ1 genie sp5-20 163 182 2.3 + .
! # SEQ1 genie sp5-10 168 177 2.1 + .
! # SEQ1 grail ATG 17 19 2.1 - 0
! #
! # The Bio::GFF object is a container for Bio::GFF::Record objects, each
! # representing a single line in the GFF file.
class GFF
! # Creates a Bio::GFF object by building a collection of Bio::GFF::Record
! # objects.
! #
! # Create a Bio::GFF object the hard way
! # this_gff = "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
! # this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
! # this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
! # this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
! # this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
! # this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
! # this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
! # p Bio::GFF.new(this_gff)
! #
! # or create one based on a GFF-formatted file:
! # p Bio::GFF.new(File.open('my_data.gff')
! # ---
! # *Arguments*:
! # * _str_: string in GFF format
! # *Returns*:: Bio::GFF object
def initialize(str = '')
@records = Array.new
***************
*** 50,66 ****
--- 82,127 ----
end
+ # An array of Bio::GFF::Record objects.
+ attr_accessor :records
+
+ # Represents a single line of a GFF-formatted file. See Bio::GFF for more
+ # information.
class Record
+ # Name of the reference sequence
attr_accessor :seqname
+
+ # Name of the source of the feature (e.g. program that did prediction)
attr_accessor :source
+
+ # Name of the feature
attr_accessor :feature
+
+ # Start position of feature on reference sequence
attr_accessor :start
+
+ # End position of feature on reference sequence
attr_accessor :end
+
+ # Score of annotation (e.g. e-value for BLAST search)
attr_accessor :score
+
+ # Strand that feature is located on
attr_accessor :strand
+
+ # For features of type 'exon': indicates where feature begins in the reading frame
attr_accessor :frame
+
+ # List of tag=value pairs (e.g. to store name of the feature: ID=my_id)
attr_accessor :attributes
+
+ # Comments for the GFF record
attr_accessor :comments
+ # Creates a Bio::GFF::Record object. Is typically not called directly, but
+ # is called automatically when creating a Bio::GFF object.
+ # ---
+ # *Arguments*:
+ # * _str_: a tab-delimited line in GFF format
def initialize(str)
@comments = str.chomp[/#.*/]
***************
*** 83,90 ****
--- 144,158 ----
end
+ # = DESCRIPTION
+ # Represents version 2 of GFF specification. Is completely implemented by the
+ # Bio::GFF class.
class GFF2 < GFF
VERSION = 2
end
+ # = DESCRIPTION
+ # Represents version 3 of GFF specification. Is completely implemented by the
+ # Bio::GFF class. For more information on version GFF3, see
+ # http://flybase.bio.indiana.edu/annot/gff3.html
class GFF3 < GFF
VERSION = 3
***************
*** 103,106 ****
end
! p Bio::GFF.new(ARGF.read)
end
--- 171,181 ----
end
! this_gff = "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
! this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
! this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
! this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
! this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
! this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
! this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
! p Bio::GFF.new(this_gff)
end