[BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.17,1.18
Katayama Toshiaki
k at dev.open-bio.org
Sat Nov 10 08:21:56 UTC 2007
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv21448
Modified Files:
pubmed.rb
Log Message:
* search, query is fixed to use new NCBI URI (previous fix was wrong and
insufficient).
* esearch is enhanced to accept hash['rettype'] == "count" as suggested
by Kaustubh Patil
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17
--- pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18
***************
*** 19,34 ****
# The Bio::PubMed class provides several ways to retrieve bibliographic
# information from the PubMed database at
! # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
! # types of queries are possible:
#
# * searching for PubMed IDs given a query string:
! # * Bio::PubMed#search
! # * Bio::PubMed#esearch
#
# * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
# given a PubMed ID
! # * Bio::PubMed#query
! # * Bio::PubMed#pmfetch
! # * Bio::PubMed#efetch
#
# The different methods within the same group are interchangeable and should
--- 19,35 ----
# The Bio::PubMed class provides several ways to retrieve bibliographic
# information from the PubMed database at
! # http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed
! #
! # Basically, two types of queries are possible:
#
# * searching for PubMed IDs given a query string:
! # * Bio::PubMed#esearch (recommended)
! # * Bio::PubMed#search (only retrieves top 20 hits)
#
# * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
# given a PubMed ID
! # * Bio::PubMed#efetch (recommended)
! # * Bio::PubMed#query (unstable for the change of the HTML design)
! # * Bio::PubMed#pmfetch (still working but could be obsoleted by NCBI)
#
# The different methods within the same group are interchangeable and should
***************
*** 38,48 ****
# APIs can be found on the following websites:
#
! # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
! # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
! # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
! # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
! # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
! # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
! # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
#
# == Usage
--- 39,50 ----
# APIs can be found on the following websites:
#
! # * PubMed Overview:
! # http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
! # * PubMed help:
! # http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
! # * Entrez utilities index:
! # http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
! # * How to link:
! # http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
#
# == Usage
***************
*** 51,89 ****
#
# # If you don't know the pubmed ID:
! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
#
# # To retrieve the MEDLINE entry for a given PubMed ID:
# puts Bio::PubMed.query("10592173")
# puts Bio::PubMed.pmfetch("10592173")
! # puts Bio::PubMed.efetch("10592173", "14693808")
# # This can be converted into a Bio::MEDLINE object:
# manuscript = Bio::PubMed.query("10592173")
! # medline = Bio::MEDLINE(manuscript)
#
class PubMed
- # Search the PubMed database by given keywords using entrez query and returns
- # an array of PubMed IDs.
- # ---
- # *Arguments*:
- # * _id_: query string (required)
- # *Returns*:: array of PubMed IDs
- def self.search(str)
- host = 'www.ncbi.nlm.nih.gov'
- path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
-
- http = Bio::Command.new_http(host)
- response, = http.get(path + CGI.escape(str))
- result = response.body
- result = result.gsub("\r", "\n").squeeze("\n")
- result = result.scan(/<pre>(.*?)<\/pre>/m).flatten
- return result
- end
-
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
--- 53,75 ----
#
# # If you don't know the pubmed ID:
! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
! #
! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
#
# # To retrieve the MEDLINE entry for a given PubMed ID:
+ # puts Bio::PubMed.efetch("10592173", "14693808")
# puts Bio::PubMed.query("10592173")
# puts Bio::PubMed.pmfetch("10592173")
! #
# # This can be converted into a Bio::MEDLINE object:
# manuscript = Bio::PubMed.query("10592173")
! # medline = Bio::MEDLINE.new(manuscript)
#
class PubMed
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
***************
*** 103,107 ****
# * _retmode_
# * _rettype_
! # *Returns*:: array of PubMed IDs
def self.esearch(str, hash = {})
hash['retmax'] = 100 unless hash['retmax']
--- 89,93 ----
# * _retmode_
# * _rettype_
! # *Returns*:: array of PubMed IDs or a number of results
def self.esearch(str, hash = {})
hash['retmax'] = 100 unless hash['retmax']
***************
*** 118,122 ****
response, = http.get(path + CGI.escape(str))
result = response.body
! result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
return result
end
--- 104,154 ----
response, = http.get(path + CGI.escape(str))
result = response.body
! if hash['rettype'] == 'count'
! result = result.scan(/<Count>(.*?)<\/Count>/m).flatten.first.to_i
! else
! result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
! end
! return result
! end
!
! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
! # entrez efetch. Multiple PubMed IDs can be provided:
! # Bio::PubMed.efetch(123)
! # Bio::PubMed.efetch(123,456,789)
! # Bio::PubMed.efetch([123,456,789])
! # ---
! # *Arguments*:
! # * _ids_: list of PubMed IDs (required)
! # *Returns*:: MEDLINE formatted String
! def self.efetch(*ids)
! return [] if ids.empty?
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
!
! list = ids.join(",")
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + list)
! result = response.body
! result = result.split(/\n\n+/)
! return result
! end
!
! # Search the PubMed database by given keywords using entrez query and returns
! # an array of PubMed IDs. Caution: this method returns the first 20 hits only.
! # Instead, use of the 'esearch' method is strongly recomended.
! # ---
! # *Arguments*:
! # * _id_: query string (required)
! # *Returns*:: array of PubMed IDs
! def self.search(str)
! host = "www.ncbi.nlm.nih.gov"
! path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
return result
end
***************
*** 128,143 ****
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.query(id)
host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
http = Bio::Command.new_http(host)
! response, = http.get(path + id.to_s)
result = response.body
! if result =~ /#{id}\s+Error/
raise( result )
else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
end
end
--- 160,183 ----
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.query(*ids)
host = "www.ncbi.nlm.nih.gov"
! path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
!
! list = ids.join(",")
http = Bio::Command.new_http(host)
! response, = http.get(path + list)
result = response.body
! result = result.scan(/<pre>\s*(.*?)<\/pre>/m).flatten
!
! if result =~ /id:.*Error occurred/
! # id: xxxxx Error occurred: Article does not exist
raise( result )
else
! if ids.size > 1
! return result
! else
! return result.first
! end
end
end
***************
*** 164,191 ****
end
- # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
- # entrez efetch. Multiple PubMed IDs can be provided:
- # Bio::PubMed.efetch(123)
- # Bio::PubMed.efetch(123,456,789)
- # Bio::PubMed.efetch([123,456,789])
- # ---
- # *Arguments*:
- # * _ids_: list of PubMed IDs (required)
- # *Returns*:: MEDLINE formatted String
- def self.efetch(*ids)
- return [] if ids.empty?
-
- host = "eutils.ncbi.nlm.nih.gov"
- path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
-
- ids = ids.join(",")
-
- http = Bio::Command.new_http(host)
- response, = http.get(path + ids)
- result = response.body
- result = result.split(/\n\n+/)
- return result
- end
-
end # PubMed
--- 204,207 ----
***************
*** 195,211 ****
if __FILE__ == $0
! puts Bio::PubMed.query("10592173")
! puts "--- ---"
! puts Bio::PubMed.pmfetch("10592173")
! puts "--- ---"
! Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
! p x
! end
! puts "--- ---"
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
! puts "--- ---"
puts Bio::PubMed.efetch("10592173", "14693808")
end
--- 211,233 ----
if __FILE__ == $0
! puts "--- Search PubMed by E-Utils ---"
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
!
! puts "--- Retrieve PubMed entry by E-Utils ---"
puts Bio::PubMed.efetch("10592173", "14693808")
+ puts "--- Search PubMed by Entrez CGI ---"
+ Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+ p x
+ end
+
+ puts "--- Retrieve PubMed entry by Entrez CGI ---"
+ puts Bio::PubMed.query("10592173")
+
+
+ puts "--- Retrieve PubMed entry by PMfetch ---"
+ puts Bio::PubMed.pmfetch("10592173")
+
end
More information about the bioruby-cvs
mailing list