[BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.20,1.21

Tue Nov 20 15:22:05 UTC 2007

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv26040

Modified Files:
	pubmed.rb 
Log Message:
* ncbi_access_wait is introduced to wait for 3 seconds for consequent queries
* esearch2 and efetch2 methods are renamed to esearch and efetch


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** pubmed.rb	15 Nov 2007 07:40:27 -0000	1.20
--- pubmed.rb	20 Nov 2007 15:22:03 -0000	1.21
***************
*** 2,6 ****
  # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
  #
! # Copyright::  Copyright (C) 2001 Toshiaki Katayama <k at bioruby.org>
  # Copyright::  Copyright (C) 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # License::    The Ruby License
--- 2,6 ----
  # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
  #
! # Copyright::  Copyright (C) 2001, 2007 Toshiaki Katayama <k at bioruby.org>
  # Copyright::  Copyright (C) 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # License::    The Ruby License
***************
*** 71,74 ****
--- 71,92 ----
  class PubMed
  
+   # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
+   # weekdays for any series of more than 100 requests.
+   # -> Not implemented yet in BioRuby
+ 
+   # Make no more than one request every 3 seconds.
+   NCBI_INTERVAL = 3
+   @@last_access = nil
+ 
+   def self.ncbi_access_wait(wait = NCBI_INTERVAL)
+     if @@last_access
+       duration = Time.now - @@last_access
+       if wait > duration
+         sleep wait - duration
+       end
+     end
+     @@last_access = Time.now
+   end
+ 
    # Search the PubMed database by given keywords using E-Utils and returns 
    # an array of PubMed IDs.
***************
*** 90,115 ****
    # *Returns*:: array of PubMed IDs or a number of results
    def self.esearch(str, hash = {})
!     hash['retmax'] = 100 unless hash['retmax']
! 
!     opts = []
!     hash.each do |k, v|
!       opts << "#{k}=#{v}"
!     end
! 
!     host = "eutils.ncbi.nlm.nih.gov"
!     path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + CGI.escape(str))
!     result = response.body
!     if hash['rettype'] == 'count'
!       result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
!     else
!       result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
!     end
!     return result
!   end
  
-   def self.esearch2(str, hash = {})
      serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
      opts = {
--- 108,113 ----
    # *Returns*:: array of PubMed IDs or a number of results
    def self.esearch(str, hash = {})
!     return nil if str.empty?
  
      serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
      opts = {
***************
*** 121,124 ****
--- 119,124 ----
      opts.update(hash)
  
+     self.ncbi_access_wait
+ 
      response, = Bio::Command.post_form(serv, opts)
      result = response.body
***************
*** 134,160 ****
    # entrez efetch. Multiple PubMed IDs can be provided:
    #   Bio::PubMed.efetch(123)
-   #   Bio::PubMed.efetch(123,456,789)
    #   Bio::PubMed.efetch([123,456,789])
    # ---
    # *Arguments*:
    # * _ids_: list of PubMed IDs (required)
!   # *Returns*:: MEDLINE formatted String
!   def self.efetch(*ids)
!     return [] if ids.empty?
! 
!     host = "eutils.ncbi.nlm.nih.gov"
!     path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
! 
!     list = ids.join(",")
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + list)
!     result = response.body
!     result = result.split(/\n\n+/)
!     return result
!   end
! 
!   def self.efetch2(ids, hash = {})
!     return "" if ids.empty?
      ids = ids.join(",") if ids === Array
  
--- 134,144 ----
    # entrez efetch. Multiple PubMed IDs can be provided:
    #   Bio::PubMed.efetch(123)
    #   Bio::PubMed.efetch([123,456,789])
    # ---
    # *Arguments*:
    # * _ids_: list of PubMed IDs (required)
!   # *Returns*:: Array of MEDLINE formatted String
!   def self.efetch(ids, hash = {})
!     return nil if ids.to_s.empty?
      ids = ids.join(",") if ids === Array
  
***************
*** 169,172 ****
--- 153,158 ----
      opts.update(hash)
  
+     self.ncbi_access_wait
+ 
      response, = Bio::Command.post_form(serv, opts)
      result = response.body
***************
*** 174,178 ****
        result = result.split(/\n\n+/)
      end
- 
      return result
    end
--- 160,163 ----
***************
*** 254,266 ****
  
    puts "--- Search PubMed by E-Utils ---"
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
! 
!   Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
!     p x
    end
  
    puts "--- Retrieve PubMed entry by E-Utils ---"
!   puts Bio::PubMed.efetch("10592173", "14693808")
!   puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
  
    puts "--- Search PubMed by Entrez CGI ---"
--- 239,266 ----
  
    puts "--- Search PubMed by E-Utils ---"
!   opts = {"rettype" => "count"}
!   puts Time.now
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
!   puts Time.now
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
!   puts Time.now
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
!   puts Time.now
!   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
!     puts x
    end
  
    puts "--- Retrieve PubMed entry by E-Utils ---"
!   puts Time.now
!   puts Bio::PubMed.efetch(16381885)
!   puts Time.now
!   puts Bio::PubMed.efetch("16381885")
!   puts Time.now
!   puts Bio::PubMed.efetch("16381885")
!   puts Time.now
!   opts = {"retmode" => "xml"}
!   puts Bio::PubMed.efetch([10592173, 14693808], opts)
!   puts Time.now
!   puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
  
    puts "--- Search PubMed by Entrez CGI ---"
***************
*** 270,278 ****
  
    puts "--- Retrieve PubMed entry by Entrez CGI ---"
!   puts Bio::PubMed.query("10592173")
  
  
    puts "--- Retrieve PubMed entry by PMfetch ---"
!   puts Bio::PubMed.pmfetch("10592173")
  
  end
--- 270,278 ----
  
    puts "--- Retrieve PubMed entry by Entrez CGI ---"
!   puts Bio::PubMed.query("16381885")
  
  
    puts "--- Retrieve PubMed entry by PMfetch ---"
!   puts Bio::PubMed.pmfetch("16381885")
  
  end