[BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.14,1.15
Katayama Toshiaki
k at dev.open-bio.org
Tue Sep 19 05:47:54 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv31487/lib/bio/io
Modified Files:
pubmed.rb
Log Message:
* changed from LGPL to Ruby's
* minor change of doc format
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.14
retrieving revision 1.15
diff -C2 -d -r1.14 -r1.15
*** pubmed.rb 14 Jul 2006 14:48:56 -0000 1.14
--- pubmed.rb 19 Sep 2006 05:47:52 -0000 1.15
***************
*** 1,23 ****
#
! # bio/io/pubmed.rb - NCBI Entrez/PubMed client module
! #
! # Copyright (C) 2001 KATAYAMA Toshiaki <k at bioruby.org>
! # 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
! #
! # This library is free software; you can redistribute it and/or
! # modify it under the terms of the GNU Lesser General Public
! # License as published by the Free Software Foundation; either
! # version 2 of the License, or (at your option) any later version.
! #
! # This library is distributed in the hope that it will be useful,
! # but WITHOUT ANY WARRANTY; without even the implied warranty of
! # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
! # Lesser General Public License for more details.
#
! # You should have received a copy of the GNU Lesser General Public
! # License along with this library; if not, write to the Free Software
! # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
! # $Id$
#
--- 1,10 ----
#
! # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
#
! # Copyright:: Copyright (C) 2001 Toshiaki Katayama <k at bioruby.org>
! # Copyright:: Copyright (C) 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
! # License:: Ruby's
#
! # $Id$
#
***************
*** 28,203 ****
module Bio
! # = DESCRIPTION
! # The Bio::PubMed class provides several ways to retrieve bibliographic
! # information from the PubMed database at
! # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
! # types of queries are possible:
! # * searching for PubMed IDs given a query string:
! # * Bio::PubMed#search
! # * Bio::PubMed#esearch
! # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) given a PubMed ID
! # * Bio::PubMed#query
! # * Bio::PubMed#pmfetch
! # * Bio::PubMed#efetch
! #
! # The different methods within the same group are interchangeable and should
! # return the same result.
! #
! # Additional information about the MEDLINE format and PubMed programmable
! # APIs can be found on the following websites:
! # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
! # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
! # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
! # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
! # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
! # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
! # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
! #
! # = USAGE
! # require 'bio'
! #
! # # If you don't know the pubmed ID:
! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
! # p x
! # end
! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
! # p x
! # end
! #
! # # To retrieve the MEDLINE entry for a given PubMed ID:
! # puts Bio::PubMed.query("10592173")
! # puts Bio::PubMed.pmfetch("10592173")
! # puts Bio::PubMed.efetch("10592173", "14693808")
! # # This can be converted into a Bio::MEDLINE object:
! # manuscript = Bio::PubMed.query("10592173")
! # medline = Bio::MEDLINE(manuscript)
! #
! # = REMARK
! # This class can not be used at the moment if you're behind a proxy server. This will be solved in the near future.
! class PubMed
! # Search the PubMed database by given keywords using entrez query and returns
! # an array of PubMed IDs.
! # ---
! # *Arguments*:
! # * _id_: query string (required)
! # *Returns*:: array of PubMed IDs
! def self.search(str)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term="
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! result = result.gsub("\r", "\n").squeeze("\n")
! result = result.scan(/<pre>(.*?)<\/pre>/m).flatten
! return result
! end
! # Search the PubMed database by given keywords using E-Utils and returns
! # an array of PubMed IDs.
! #
! # For information on the possible arguments, see
! # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
! # ---
! # *Arguments*:
! # * _id_: query string (required)
! # * _field_
! # * _reldate_
! # * _mindate_
! # * _maxdate_
! # * _datetype_
! # * _retstart_
! # * _retmax_ (default 100)
! # * _retmode_
! # * _rettype_
! # *Returns*:: array of PubMed IDs
! def self.esearch(str, hash = {})
! hash['retmax'] = 100 unless hash['retmax']
! opts = []
! hash.each do |k, v|
! opts << "#{k}=#{v}"
! end
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
! return result
! end
! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
! # entrez query.
! # ---
! # *Arguments*:
! # * _id_: PubMed ID (required)
! # *Returns*:: MEDLINE formatted String
! def self.query(id)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
! http = Bio::Command.new_http(host)
! response, = http.get(path + id.to_s)
! result = response.body
! if result =~ /#{id}\s+Error/
! raise( result )
! else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
! end
end
! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
! # entrez pmfetch.
! # ---
! # *Arguments*:
! # * _id_: PubMed ID (required)
! # *Returns*:: MEDLINE formatted String
! def self.pmfetch(id)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
! http = Bio::Command.new_http(host)
! response, = http.get(path + id.to_s)
! result = response.body
! if result =~ /#{id}\s+Error/
! raise( result )
! else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
! end
end
! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
! # entrez efetch. Multiple PubMed IDs can be provided:
! # Bio::PubMed.efetch(123)
! # Bio::PubMed.efetch(123,456,789)
! # Bio::PubMed.efetch([123,456,789])
! # ---
! # *Arguments*:
! # * _ids_: list of PubMed IDs (required)
! # *Returns*:: MEDLINE formatted String
! def self.efetch(*ids)
! return [] if ids.empty?
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
! ids = ids.join(",")
! http = Bio::Command.new_http(host)
! response, = http.get(path + ids)
! result = response.body
! result = result.split(/\n\n+/)
! return result
! end
end
! end
--- 15,194 ----
module Bio
! # == Description
! #
! # The Bio::PubMed class provides several ways to retrieve bibliographic
! # information from the PubMed database at
! # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
! # types of queries are possible:
! #
! # * searching for PubMed IDs given a query string:
! # * Bio::PubMed#search
! # * Bio::PubMed#esearch
! #
! # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
! # given a PubMed ID
! # * Bio::PubMed#query
! # * Bio::PubMed#pmfetch
! # * Bio::PubMed#efetch
! #
! # The different methods within the same group are interchangeable and should
! # return the same result.
! #
! # Additional information about the MEDLINE format and PubMed programmable
! # APIs can be found on the following websites:
! #
! # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
! # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
! # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
! # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
! # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
! # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
! # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
! #
! # == Usage
! #
! # require 'bio'
! #
! # # If you don't know the pubmed ID:
! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
! # p x
! # end
! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
! # p x
! # end
! #
! # # To retrieve the MEDLINE entry for a given PubMed ID:
! # puts Bio::PubMed.query("10592173")
! # puts Bio::PubMed.pmfetch("10592173")
! # puts Bio::PubMed.efetch("10592173", "14693808")
! # # This can be converted into a Bio::MEDLINE object:
! # manuscript = Bio::PubMed.query("10592173")
! # medline = Bio::MEDLINE(manuscript)
! #
! class PubMed
! # Search the PubMed database by given keywords using entrez query and returns
! # an array of PubMed IDs.
! # ---
! # *Arguments*:
! # * _id_: query string (required)
! # *Returns*:: array of PubMed IDs
! def self.search(str)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term="
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! result = result.gsub("\r", "\n").squeeze("\n")
! result = result.scan(/<pre>(.*?)<\/pre>/m).flatten
! return result
! end
! # Search the PubMed database by given keywords using E-Utils and returns
! # an array of PubMed IDs.
! #
! # For information on the possible arguments, see
! # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
! # ---
! # *Arguments*:
! # * _id_: query string (required)
! # * _field_
! # * _reldate_
! # * _mindate_
! # * _maxdate_
! # * _datetype_
! # * _retstart_
! # * _retmax_ (default 100)
! # * _retmode_
! # * _rettype_
! # *Returns*:: array of PubMed IDs
! def self.esearch(str, hash = {})
! hash['retmax'] = 100 unless hash['retmax']
! opts = []
! hash.each do |k, v|
! opts << "#{k}=#{v}"
! end
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! result = result.scan(/<Id>(.*?)<\/Id>/m).flatten
! return result
! end
! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
! # entrez query.
! # ---
! # *Arguments*:
! # * _id_: PubMed ID (required)
! # *Returns*:: MEDLINE formatted String
! def self.query(id)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
! http = Bio::Command.new_http(host)
! response, = http.get(path + id.to_s)
! result = response.body
! if result =~ /#{id}\s+Error/
! raise( result )
! else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
end
+ end
! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
! # entrez pmfetch.
! # ---
! # *Arguments*:
! # * _id_: PubMed ID (required)
! # *Returns*:: MEDLINE formatted String
! def self.pmfetch(id)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
! http = Bio::Command.new_http(host)
! response, = http.get(path + id.to_s)
! result = response.body
! if result =~ /#{id}\s+Error/
! raise( result )
! else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
end
+ end
! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
! # entrez efetch. Multiple PubMed IDs can be provided:
! # Bio::PubMed.efetch(123)
! # Bio::PubMed.efetch(123,456,789)
! # Bio::PubMed.efetch([123,456,789])
! # ---
! # *Arguments*:
! # * _ids_: list of PubMed IDs (required)
! # *Returns*:: MEDLINE formatted String
! def self.efetch(*ids)
! return [] if ids.empty?
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
! ids = ids.join(",")
+ http = Bio::Command.new_http(host)
+ response, = http.get(path + ids)
+ result = response.body
+ result = result.split(/\n\n+/)
+ return result
end
! end # PubMed
!
! end # Bio
More information about the bioruby-cvs
mailing list