[BioRuby-cvs] bioruby/lib/bio/io ncbirest.rb, NONE, 1.1 pubmed.rb, 1.23, 1.24
Katayama Toshiaki
k at dev.open-bio.org
Tue Feb 19 03:36:54 UTC 2008
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv12321
Modified Files:
pubmed.rb
Added Files:
ncbirest.rb
Log Message:
* NCBI E-Utilities (REST) functionality is separated to ncbirest.rb
and pubmed.rb is changed to utilize the Bio::NCBI::REST class for
esearch and efetch. You can now search and retrieve any database
in any format that NCBI supports by E-Utilities through the
Bio::NCBI::REST interface (currently, only esearch and efetch methods
are implemented).
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.23
retrieving revision 1.24
diff -C2 -d -r1.23 -r1.24
*** pubmed.rb 12 Dec 2007 13:53:26 -0000 1.23
--- pubmed.rb 19 Feb 2008 03:36:52 -0000 1.24
***************
*** 2,6 ****
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
#
! # Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama <k at bioruby.org>
# Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
# License:: The Ruby License
--- 2,6 ----
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
#
! # Copyright:: Copyright (C) 2001, 2007, 2008 Toshiaki Katayama <k at bioruby.org>
# Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
# License:: The Ruby License
***************
*** 9,12 ****
--- 9,13 ----
#
+ require 'bio/io/ncbirest'
require 'bio/command'
require 'cgi' unless defined?(CGI)
***************
*** 69,95 ****
# medline = Bio::MEDLINE.new(manuscript)
#
! class PubMed
!
! # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
! # weekdays for any series of more than 100 requests.
! # -> Not implemented yet in BioRuby
!
! # Make no more than one request every 3 seconds.
! NCBI_INTERVAL = 3
! @@last_access = nil
!
! private
!
! def ncbi_access_wait(wait = NCBI_INTERVAL)
! if @@last_access
! duration = Time.now - @@last_access
! if wait > duration
! sleep wait - duration
! end
! end
! @@last_access = Time.now
! end
!
! public
# Search the PubMed database by given keywords using E-Utils and returns
--- 70,74 ----
# medline = Bio::MEDLINE.new(manuscript)
#
! class PubMed < Bio::NCBI::REST
# Search the PubMed database by given keywords using E-Utils and returns
***************
*** 100,136 ****
# ---
# *Arguments*:
! # * _id_: query string (required)
! # * _field_
! # * _reldate_
! # * _mindate_
! # * _maxdate_
! # * _datetype_
! # * _retstart_
! # * _retmax_ (default 100)
! # * _retmode_
! # * _rettype_
# *Returns*:: array of PubMed IDs or a number of results
def esearch(str, hash = {})
! return nil if str.empty?
!
! serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
! opts = {
! "retmax" => 100,
! "tool" => "bioruby",
! "db" => "pubmed",
! "term" => str
! }
opts.update(hash)
!
! ncbi_access_wait
!
! response, = Bio::Command.post_form(serv, opts)
! result = response.body
! if opts['rettype'] == 'count'
! result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
! else
! result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
! end
! return result
end
--- 79,98 ----
# ---
# *Arguments*:
! # * _str_: query string (required)
! # * _hash_: hash of E-Utils options
! # * _retmode_: "xml", "html", ...
! # * _rettype_: "medline", ...
! # * _retmax_: integer (default 100)
! # * _retstart_: integer
! # * _field_
! # * _reldate_
! # * _mindate_
! # * _maxdate_
! # * _datetype_
# *Returns*:: array of PubMed IDs or a number of results
def esearch(str, hash = {})
! opts = { "db" => "pubmed" }
opts.update(hash)
! super(str, opts)
end
***************
*** 142,168 ****
# *Arguments*:
# * _ids_: list of PubMed IDs (required)
# *Returns*:: Array of MEDLINE formatted String
def efetch(ids, hash = {})
! return nil if ids.to_s.empty?
! ids = ids.join(",") if ids === Array
!
! serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
! opts = {
! "tool" => "bioruby",
! "db" => "pubmed",
! "retmode" => "text",
! "rettype" => "medline",
! "id" => ids,
! }
opts.update(hash)
!
! ncbi_access_wait
!
! response, = Bio::Command.post_form(serv, opts)
! result = response.body
! if opts["retmode"] == "text"
! result = result.split(/\n\n+/)
! end
! return result
end
--- 104,122 ----
# *Arguments*:
# * _ids_: list of PubMed IDs (required)
+ # * _hash_: hash of E-Utils options
+ # * _retmode_: "xml", "html", ...
+ # * _rettype_: "medline", ...
+ # * _retmax_: integer (default 100)
+ # * _retstart_: integer
+ # * _field_
+ # * _reldate_
+ # * _mindate_
+ # * _maxdate_
+ # * _datetype_
# *Returns*:: Array of MEDLINE formatted String
def efetch(ids, hash = {})
! opts = { "db" => "pubmed", "rettype" => "medline" }
opts.update(hash)
! super(ids, opts)
end
--- NEW FILE: ncbirest.rb ---
#
# = bio/io/ncbrest.rb - NCBI Entrez client module
#
# Copyright:: Copyright (C) 2008 Toshiaki Katayama <k at bioruby.org>
# License:: The Ruby License
#
# $Id: ncbirest.rb,v 1.1 2008/02/19 03:36:52 k Exp $
#
require 'bio/command'
module Bio
# == Description
#
# The Bio::NCBI::REST class provides REST client for the NCBI E-Utilities
#
# * Entrez utilities index:
# http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
# * How to link:
# http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
#
# == Usage
#
# Bio::NCBI::REST.esearch("tardigrada", {"db"=>"nuccore", "rettype"=>"gb"})
# Bio::NCBI::REST.esearch("yeast kinase", {"db"=>"nuccore", "rettype"=>"gb", "retmode"=>"xml", "retmax"=>5})
# Bio::NCBI::REST.efetch("185041", {"db"=>"nuccore", "rettype"=>"gb"})
# Bio::NCBI::REST.efetch("J00231", {"db"=>"nuccore", "rettype"=>"gb", "retmode"=>"xml"})
#
class NCBI
class REST
# Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
# weekdays for any series of more than 100 requests.
# -> Not implemented yet in BioRuby
# Make no more than one request every 3 seconds.
NCBI_INTERVAL = 3
@@last_access = nil
private
def ncbi_access_wait(wait = NCBI_INTERVAL)
if @@last_access
duration = Time.now - @@last_access
if wait > duration
sleep wait - duration
end
end
@@last_access = Time.now
end
public
# Search the NCBI database by given keywords using E-Utils and returns
# an array of entry IDs.
#
# For information on the possible arguments, see
#
# * http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html
#
# ---
# *Arguments*:
# * _str_: query string (required)
# * _hash_: hash of E-Utils option {"db" => "nuccore", "rettype" => "gb"}
# * _db_: "nuccore", "pubmed", ...
# * _retmode_: "xml", "html", ...
# * _rettype_: "gb", "medline", "count", ...
# * _retmax_: integer (default 100)
# * _retstart_: integer
# * _field_
# * _reldate_
# * _mindate_
# * _maxdate_
# * _datetype_
# *Returns*:: array of entry IDs or a number of results
def esearch(str, hash = {})
return nil if str.empty?
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
opts = {
"retmax" => 100,
"tool" => "bioruby",
"term" => str
}
opts.update(hash)
ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
result = response.body
if opts['rettype'] == 'count'
result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
else
result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
end
return result
end
# Retrieve a database entry by given ID and using E-Utils (efetch) and
# returns an array of entry string. Multiple IDs can be supplied.
# ---
# *Arguments*:
# * _ids_: list of NCBI entry IDs (required)
# * _hash_: hash of E-Utils option {"db" => "nuccore", "rettype" => "gb"}
# * _db_: "nuccore", "pubmed", ...
# * _retmode_: "xml", "html", ...
# * _rettype_: "gb", "medline", "count",...
# * _retmax_: integer (default 100)
# * _retstart_: integer
# * _field_
# * _reldate_
# * _mindate_
# * _maxdate_
# * _datetype_
# *Returns*:: Array of entry String
def efetch(ids, hash = {})
return nil if ids.to_s.empty?
ids = ids.join(",") if ids === Array
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
opts = {
"tool" => "bioruby",
"retmode" => "text",
"id" => ids,
}
opts.update(hash)
ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
result = response.body
if opts["retmode"] == "text"
result = result.split(/\n\n+/)
end
return result
end
def self.esearch(*args)
self.new.esearch(*args)
end
def self.efetch(*args)
self.new.efetch(*args)
end
end # REST
end # NCBI
end # Bio
if __FILE__ == $0
gbopts = {"db"=>"nuccore", "rettype"=>"gb"}
pmopts = {"db"=>"pubmed", "rettype"=>"medline"}
count = {"rettype" => "count"}
xml = {"retmode"=>"xml"}
max = {"retmax"=>5}
puts "=== class methods ==="
puts "--- Search NCBI by E-Utils ---"
puts Time.now
puts "# count of 'tardigrada' in nuccore"
puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(count))
puts Time.now
puts "# max 5 'tardigrada' entries in nuccore"
puts Bio::NCBI::REST.esearch("tardigrada", gbopts.merge(max))
puts Time.now
puts "# count of 'yeast kinase' in nuccore"
puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(count))
puts Time.now
puts "# max 5 'yeast kinase' entries in nuccore (XML)"
puts Bio::NCBI::REST.esearch("yeast kinase", gbopts.merge(xml).merge(max))
puts Time.now
puts "# count of 'genome&analysis|bioinformatics' in pubmed"
puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
puts Time.now
puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed (XML)"
puts Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(xml).merge(max))
puts Time.now
Bio::NCBI::REST.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max)).each do |x|
puts "# each of 5 'genome&analysis|bioinformatics' entries in pubmed"
puts x
end
puts "--- Retrieve NCBI entry by E-Utils ---"
puts Time.now
puts "# '185041' entry in nuccore"
puts Bio::NCBI::REST.efetch("185041", gbopts)
puts Time.now
puts "# 'J00231' entry in nuccore (XML)"
puts Bio::NCBI::REST.efetch("J00231", gbopts.merge(xml))
puts Time.now
puts "# 16381885 entry in pubmed"
puts Bio::NCBI::REST.efetch(16381885, pmopts)
puts Time.now
puts "# '16381885' entry in pubmed"
puts Bio::NCBI::REST.efetch("16381885", pmopts)
puts Time.now
puts "# [10592173,14693808] entries in pubmed"
puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts)
puts Time.now
puts "# [10592173,14693808] entries in pubmed (XML)"
puts Bio::NCBI::REST.efetch([10592173, 14693808], pmopts.merge(xml))
puts "=== instance methods ==="
ncbi = Bio::NCBI::REST.new
puts "--- Search NCBI by E-Utils ---"
puts Time.now
puts "# count of 'genome&analysis|bioinformatics' in pubmed"
puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(count))
puts Time.now
puts "# max 5 'genome&analysis|bioinformatics' entries in pubmed"
puts ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts.merge(max))
puts Time.now
ncbi.esearch("(genome AND analysis) OR bioinformatics", pmopts).each do |x|
puts "# each 'genome&analysis|bioinformatics' entries in pubmed"
puts x
end
puts "--- Retrieve NCBI entry by E-Utils ---"
puts Time.now
puts "# 16381885 entry in pubmed"
puts ncbi.efetch(16381885, pmopts)
puts Time.now
puts "# [10592173,14693808] entries in pubmed"
puts ncbi.efetch([10592173, 14693808], pmopts)
end
More information about the bioruby-cvs
mailing list