[BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.20,1.21
Katayama Toshiaki
k at dev.open-bio.org
Tue Nov 20 15:22:05 UTC 2007
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv26040
Modified Files:
pubmed.rb
Log Message:
* ncbi_access_wait is introduced to wait for 3 seconds for consequent queries
* esearch2 and efetch2 methods are renamed to esearch and efetch
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** pubmed.rb 15 Nov 2007 07:40:27 -0000 1.20
--- pubmed.rb 20 Nov 2007 15:22:03 -0000 1.21
***************
*** 2,6 ****
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
#
! # Copyright:: Copyright (C) 2001 Toshiaki Katayama <k at bioruby.org>
# Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
# License:: The Ruby License
--- 2,6 ----
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
#
! # Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama <k at bioruby.org>
# Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
# License:: The Ruby License
***************
*** 71,74 ****
--- 71,92 ----
class PubMed
+ # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
+ # weekdays for any series of more than 100 requests.
+ # -> Not implemented yet in BioRuby
+
+ # Make no more than one request every 3 seconds.
+ NCBI_INTERVAL = 3
+ @@last_access = nil
+
+ def self.ncbi_access_wait(wait = NCBI_INTERVAL)
+ if @@last_access
+ duration = Time.now - @@last_access
+ if wait > duration
+ sleep wait - duration
+ end
+ end
+ @@last_access = Time.now
+ end
+
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
***************
*** 90,115 ****
# *Returns*:: array of PubMed IDs or a number of results
def self.esearch(str, hash = {})
! hash['retmax'] = 100 unless hash['retmax']
!
! opts = []
! hash.each do |k, v|
! opts << "#{k}=#{v}"
! end
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! if hash['rettype'] == 'count'
! result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
! else
! result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
! end
! return result
! end
- def self.esearch2(str, hash = {})
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
opts = {
--- 108,113 ----
# *Returns*:: array of PubMed IDs or a number of results
def self.esearch(str, hash = {})
! return nil if str.empty?
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
opts = {
***************
*** 121,124 ****
--- 119,124 ----
opts.update(hash)
+ self.ncbi_access_wait
+
response, = Bio::Command.post_form(serv, opts)
result = response.body
***************
*** 134,160 ****
# entrez efetch. Multiple PubMed IDs can be provided:
# Bio::PubMed.efetch(123)
- # Bio::PubMed.efetch(123,456,789)
# Bio::PubMed.efetch([123,456,789])
# ---
# *Arguments*:
# * _ids_: list of PubMed IDs (required)
! # *Returns*:: MEDLINE formatted String
! def self.efetch(*ids)
! return [] if ids.empty?
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
!
! list = ids.join(",")
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + list)
! result = response.body
! result = result.split(/\n\n+/)
! return result
! end
!
! def self.efetch2(ids, hash = {})
! return "" if ids.empty?
ids = ids.join(",") if ids === Array
--- 134,144 ----
# entrez efetch. Multiple PubMed IDs can be provided:
# Bio::PubMed.efetch(123)
# Bio::PubMed.efetch([123,456,789])
# ---
# *Arguments*:
# * _ids_: list of PubMed IDs (required)
! # *Returns*:: Array of MEDLINE formatted String
! def self.efetch(ids, hash = {})
! return nil if ids.to_s.empty?
ids = ids.join(",") if ids === Array
***************
*** 169,172 ****
--- 153,158 ----
opts.update(hash)
+ self.ncbi_access_wait
+
response, = Bio::Command.post_form(serv, opts)
result = response.body
***************
*** 174,178 ****
result = result.split(/\n\n+/)
end
-
return result
end
--- 160,163 ----
***************
*** 254,266 ****
puts "--- Search PubMed by E-Utils ---"
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
!
! Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
! p x
end
puts "--- Retrieve PubMed entry by E-Utils ---"
! puts Bio::PubMed.efetch("10592173", "14693808")
! puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
puts "--- Search PubMed by Entrez CGI ---"
--- 239,266 ----
puts "--- Search PubMed by E-Utils ---"
! opts = {"rettype" => "count"}
! puts Time.now
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
! puts Time.now
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
! puts Time.now
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
! puts Time.now
! Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
! puts x
end
puts "--- Retrieve PubMed entry by E-Utils ---"
! puts Time.now
! puts Bio::PubMed.efetch(16381885)
! puts Time.now
! puts Bio::PubMed.efetch("16381885")
! puts Time.now
! puts Bio::PubMed.efetch("16381885")
! puts Time.now
! opts = {"retmode" => "xml"}
! puts Bio::PubMed.efetch([10592173, 14693808], opts)
! puts Time.now
! puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
puts "--- Search PubMed by Entrez CGI ---"
***************
*** 270,278 ****
puts "--- Retrieve PubMed entry by Entrez CGI ---"
! puts Bio::PubMed.query("10592173")
puts "--- Retrieve PubMed entry by PMfetch ---"
! puts Bio::PubMed.pmfetch("10592173")
end
--- 270,278 ----
puts "--- Retrieve PubMed entry by Entrez CGI ---"
! puts Bio::PubMed.query("16381885")
puts "--- Retrieve PubMed entry by PMfetch ---"
! puts Bio::PubMed.pmfetch("16381885")
end
More information about the bioruby-cvs
mailing list