From aerts at dev.open-bio.org Sun Nov 4 06:51:01 2007 From: aerts at dev.open-bio.org (Jan Aerts) Date: Sun, 04 Nov 2007 11:51:01 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.16,1.17 Message-ID: <200711041151.lA4Bp1lq007763@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv7743 Modified Files: pubmed.rb Log Message: Fixed bug #11736: change to pubmed interface (reported by Masahide Kikkawa) Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** pubmed.rb 5 Apr 2007 23:35:41 -0000 1.16 --- pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17 *************** *** 75,80 **** # *Returns*:: array of PubMed IDs def self.search(str) ! host = "www.ncbi.nlm.nih.gov" ! path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term=" http = Bio::Command.new_http(host) --- 75,80 ---- # *Returns*:: array of PubMed IDs def self.search(str) ! host = 'www.ncbi.nlm.nih.gov' ! path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" http = Bio::Command.new_http(host) From k at dev.open-bio.org Sat Nov 10 03:21:56 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Sat, 10 Nov 2007 08:21:56 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.17,1.18 Message-ID: <200711100821.lAA8LunA021453@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv21448 Modified Files: pubmed.rb Log Message: * search, query is fixed to use new NCBI URI (previous fix was wrong and insufficient). * esearch is enhanced to accept hash['rettype'] == "count" as suggested by Kaustubh Patil Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17 --- pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18 *************** *** 19,34 **** # The Bio::PubMed class provides several ways to retrieve bibliographic # information from the PubMed database at ! # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two ! # types of queries are possible: # # * searching for PubMed IDs given a query string: ! # * Bio::PubMed#search ! # * Bio::PubMed#esearch # # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) # given a PubMed ID ! # * Bio::PubMed#query ! # * Bio::PubMed#pmfetch ! # * Bio::PubMed#efetch # # The different methods within the same group are interchangeable and should --- 19,35 ---- # The Bio::PubMed class provides several ways to retrieve bibliographic # information from the PubMed database at ! # http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed ! # ! # Basically, two types of queries are possible: # # * searching for PubMed IDs given a query string: ! # * Bio::PubMed#esearch (recommended) ! # * Bio::PubMed#search (only retrieves top 20 hits) # # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) # given a PubMed ID ! # * Bio::PubMed#efetch (recommended) ! # * Bio::PubMed#query (unstable for the change of the HTML design) ! # * Bio::PubMed#pmfetch (still working but could be obsoleted by NCBI) # # The different methods within the same group are interchangeable and should *************** *** 38,48 **** # APIs can be found on the following websites: # ! # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html ! # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html ! # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat ! # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags ! # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html ! # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html ! # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html # # == Usage --- 39,50 ---- # APIs can be found on the following websites: # ! # * PubMed Overview: ! # http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html ! # * PubMed help: ! # http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html ! # * Entrez utilities index: ! # http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html ! # * How to link: ! # http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp # # == Usage *************** *** 51,89 **** # # # If you don't know the pubmed ID: ! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end ! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end # # # To retrieve the MEDLINE entry for a given PubMed ID: # puts Bio::PubMed.query("10592173") # puts Bio::PubMed.pmfetch("10592173") ! # puts Bio::PubMed.efetch("10592173", "14693808") # # This can be converted into a Bio::MEDLINE object: # manuscript = Bio::PubMed.query("10592173") ! # medline = Bio::MEDLINE(manuscript) # class PubMed - # Search the PubMed database by given keywords using entrez query and returns - # an array of PubMed IDs. - # --- - # *Arguments*: - # * _id_: query string (required) - # *Returns*:: array of PubMed IDs - def self.search(str) - host = 'www.ncbi.nlm.nih.gov' - path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" - - http = Bio::Command.new_http(host) - response, = http.get(path + CGI.escape(str)) - result = response.body - result = result.gsub("\r", "\n").squeeze("\n") - result = result.scan(/
(.*?)<\/pre>/m).flatten
- return result
- end
-
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
--- 53,75 ----
#
# # If you don't know the pubmed ID:
! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
! #
! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
#
# # To retrieve the MEDLINE entry for a given PubMed ID:
+ # puts Bio::PubMed.efetch("10592173", "14693808")
# puts Bio::PubMed.query("10592173")
# puts Bio::PubMed.pmfetch("10592173")
! #
# # This can be converted into a Bio::MEDLINE object:
# manuscript = Bio::PubMed.query("10592173")
! # medline = Bio::MEDLINE.new(manuscript)
#
class PubMed
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
***************
*** 103,107 ****
# * _retmode_
# * _rettype_
! # *Returns*:: array of PubMed IDs
def self.esearch(str, hash = {})
hash['retmax'] = 100 unless hash['retmax']
--- 89,93 ----
# * _retmode_
# * _rettype_
! # *Returns*:: array of PubMed IDs or a number of results
def self.esearch(str, hash = {})
hash['retmax'] = 100 unless hash['retmax']
***************
*** 118,122 ****
response, = http.get(path + CGI.escape(str))
result = response.body
! result = result.scan(/(.*?)<\/Id>/m).flatten
return result
end
--- 104,154 ----
response, = http.get(path + CGI.escape(str))
result = response.body
! if hash['rettype'] == 'count'
! result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
! else
! result = result.scan(/(.*?)<\/Id>/m).flatten
! end
! return result
! end
!
! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
! # entrez efetch. Multiple PubMed IDs can be provided:
! # Bio::PubMed.efetch(123)
! # Bio::PubMed.efetch(123,456,789)
! # Bio::PubMed.efetch([123,456,789])
! # ---
! # *Arguments*:
! # * _ids_: list of PubMed IDs (required)
! # *Returns*:: MEDLINE formatted String
! def self.efetch(*ids)
! return [] if ids.empty?
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
!
! list = ids.join(",")
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + list)
! result = response.body
! result = result.split(/\n\n+/)
! return result
! end
!
! # Search the PubMed database by given keywords using entrez query and returns
! # an array of PubMed IDs. Caution: this method returns the first 20 hits only.
! # Instead, use of the 'esearch' method is strongly recomended.
! # ---
! # *Arguments*:
! # * _id_: query string (required)
! # *Returns*:: array of PubMed IDs
! def self.search(str)
! host = "www.ncbi.nlm.nih.gov"
! path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
return result
end
***************
*** 128,143 ****
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.query(id)
host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
http = Bio::Command.new_http(host)
! response, = http.get(path + id.to_s)
result = response.body
! if result =~ /#{id}\s+Error/
raise( result )
else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
end
end
--- 160,183 ----
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.query(*ids)
host = "www.ncbi.nlm.nih.gov"
! path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
!
! list = ids.join(",")
http = Bio::Command.new_http(host)
! response, = http.get(path + list)
result = response.body
! result = result.scan(/\s*(.*?)<\/pre>/m).flatten
!
! if result =~ /id:.*Error occurred/
! # id: xxxxx Error occurred: Article does not exist
raise( result )
else
! if ids.size > 1
! return result
! else
! return result.first
! end
end
end
***************
*** 164,191 ****
end
- # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
- # entrez efetch. Multiple PubMed IDs can be provided:
- # Bio::PubMed.efetch(123)
- # Bio::PubMed.efetch(123,456,789)
- # Bio::PubMed.efetch([123,456,789])
- # ---
- # *Arguments*:
- # * _ids_: list of PubMed IDs (required)
- # *Returns*:: MEDLINE formatted String
- def self.efetch(*ids)
- return [] if ids.empty?
-
- host = "eutils.ncbi.nlm.nih.gov"
- path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
-
- ids = ids.join(",")
-
- http = Bio::Command.new_http(host)
- response, = http.get(path + ids)
- result = response.body
- result = result.split(/\n\n+/)
- return result
- end
-
end # PubMed
--- 204,207 ----
***************
*** 195,211 ****
if __FILE__ == $0
! puts Bio::PubMed.query("10592173")
! puts "--- ---"
! puts Bio::PubMed.pmfetch("10592173")
! puts "--- ---"
! Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
! p x
! end
! puts "--- ---"
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
! puts "--- ---"
puts Bio::PubMed.efetch("10592173", "14693808")
end
--- 211,233 ----
if __FILE__ == $0
! puts "--- Search PubMed by E-Utils ---"
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
!
! puts "--- Retrieve PubMed entry by E-Utils ---"
puts Bio::PubMed.efetch("10592173", "14693808")
+ puts "--- Search PubMed by Entrez CGI ---"
+ Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+ p x
+ end
+
+ puts "--- Retrieve PubMed entry by Entrez CGI ---"
+ puts Bio::PubMed.query("10592173")
+
+
+ puts "--- Retrieve PubMed entry by PMfetch ---"
+ puts Bio::PubMed.pmfetch("10592173")
+
end
From k at dev.open-bio.org Sat Nov 10 03:28:52 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sat, 10 Nov 2007 08:28:52 +0000
Subject: [BioRuby-cvs] bioruby ChangeLog,1.68,1.69
Message-ID: <200711100828.lAA8Sq9g021475@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby
In directory dev.open-bio.org:/tmp/cvs-serv21471
Modified Files:
ChangeLog
Log Message:
* updated
Index: ChangeLog
===================================================================
RCS file: /home/repository/bioruby/bioruby/ChangeLog,v
retrieving revision 1.68
retrieving revision 1.69
diff -C2 -d -r1.68 -r1.69
*** ChangeLog 19 Jul 2007 04:08:47 -0000 1.68
--- ChangeLog 10 Nov 2007 08:28:50 -0000 1.69
***************
*** 1,2 ****
--- 1,9 ----
+ 2007-11-10 Toshiaki Katayama
+
+ * lib/bio/io/pubmed.rb:
+
+ Fixed search, query methods (but use of esearch and efetch is
+ strongly recommended).
+
2007-07-19 Toshiaki Katayama
***************
*** 415,419 ****
visual effects.
! * lib/bio/.rb
Extended to have Bio.command where command can be any BioRuby
--- 422,426 ----
visual effects.
! * lib/bio.rb
Extended to have Bio.command where command can be any BioRuby
From nakao at dev.open-bio.org Sat Nov 10 11:57:45 2007
From: nakao at dev.open-bio.org (Mitsuteru C. Nakao)
Date: Sat, 10 Nov 2007 16:57:45 +0000
Subject: [BioRuby-cvs] bioruby/test/functional/bio/io test_ensembl.rb, 1.4,
1.5
Message-ID: <200711101657.lAAGvjCP022677@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/test/functional/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv22657/test/functional/bio/io
Modified Files:
test_ensembl.rb
Log Message:
* Updated some expected values of test_gff_exportview*.
Index: test_ensembl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/test/functional/bio/io/test_ensembl.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** test_ensembl.rb 5 Apr 2007 23:35:42 -0000 1.4
--- test_ensembl.rb 10 Nov 2007 16:57:43 -0000 1.5
***************
*** 74,78 ****
def test_gff_exportview
! line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"
gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
assert_equal(line, gff)
--- 74,95 ----
def test_gff_exportview
! line = ["chromosome:NCBI36:4:1149206:1149209:1",
! "Ensembl",
! "Gene",
! "-839",
! "2747",
! ".",
! "+",
! ".",
! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"].join("\t") + "\n"
! line = ["4",
! "Ensembl",
! "Gene",
! "1148366",
! "1151952",
! ".",
! "+",
! "1",
! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
assert_equal(line, gff)
***************
*** 80,84 ****
def test_gff_exportview_with_named_args
! line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"
gff = @serv.exportview(:seq_region_name => 4,
:anchor1 => 1149206,
--- 97,118 ----
def test_gff_exportview_with_named_args
! line = ["chromosome:NCBI36:4:1149206:1149209:1",
! "Ensembl",
! "Gene",
! "-839",
! "2747",
! ".",
! "+",
! ".",
! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
! line = ["4",
! "Ensembl",
! "Gene",
! "1148366",
! "1151952",
! ".",
! "+",
! "1",
! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
gff = @serv.exportview(:seq_region_name => 4,
:anchor1 => 1149206,
***************
*** 89,93 ****
def test_tab_exportview_with_named_args
! line = "seqname\tsource\tfeature\tstart\tend\tscore\tstrand\tframe\tgene_id\ttranscript_id\texon_id\tgene_type\nchromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tENSG00000206158\tENST00000382964\tENSE00001494097\tKNOWN_protein_coding\n"
gff = @serv.exportview(:seq_region_name => 4,
:anchor1 => 1149206,
--- 123,176 ----
def test_tab_exportview_with_named_args
! line = [["seqname",
! "source",
! "feature",
! "start",
! "end",
! "score",
! "strand",
! "frame",
! "gene_id",
! "transcript_id",
! "exon_id",
! "gene_type"].join("\t"),
! ["chromosome:NCBI36:4:1149206:1149209:1",
! "Ensembl",
! "Gene",
! "-839",
! "2747",
! ".",
! "+",
! ".",
! "ENSG00000206158",
! "ENST00000382964",
! "ENSE00001494097",
! "KNOWN_protein_coding"].join("\t") + "\n"
! ].join("\n")
! line = [["seqname",
! "source",
! "feature",
! "start",
! "end",
! "score",
! "strand",
! "frame",
! "gene_id",
! "transcript_id",
! "exon_id",
! "gene_type"].join("\t"),
! ["4",
! "Ensembl",
! "Gene",
! "1148366",
! "1151952",
! ".",
! "+",
! "1",
! "ENSG00000206158",
! "ENST00000382964",
! "ENSE00001494097",
! "KNOWN_protein_coding"].join("\t") + "\n"
! ].join("\n")
gff = @serv.exportview(:seq_region_name => 4,
:anchor1 => 1149206,
From k at dev.open-bio.org Thu Nov 15 02:07:18 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:07:18 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.60,1.61
Message-ID: <200711150707.lAF77IWZ006676@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6671/io
Modified Files:
flatfile.rb
Log Message:
* the first line of the MEDLINE entry is changed from UI to PMID
Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.60
retrieving revision 1.61
diff -C2 -d -r1.60 -r1.61
*** flatfile.rb 9 Jul 2007 14:08:34 -0000 1.60
--- flatfile.rb 15 Nov 2007 07:07:16 -0000 1.61
***************
*** 1131,1135 ****
/^LOCUS .+ aa .+/ ],
medline = RuleRegexp[ 'Bio::MEDLINE',
! /^UI \- [0-9]+$/ ],
embl = RuleRegexp[ 'Bio::EMBL',
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
--- 1131,1135 ----
/^LOCUS .+ aa .+/ ],
medline = RuleRegexp[ 'Bio::MEDLINE',
! /^PMID\- [0-9]+$/ ],
embl = RuleRegexp[ 'Bio::EMBL',
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
From k at dev.open-bio.org Thu Nov 15 02:08:51 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:08:51 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/shell interface.rb,1.18,1.19
Message-ID: <200711150708.lAF78prq006727@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/shell
In directory dev.open-bio.org:/tmp/cvs-serv6723/shell
Modified Files:
interface.rb
Log Message:
* fixed that savefile("hoge", obj) created "datahoge" file instead of "data/hoge"
Index: interface.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/interface.rb,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** interface.rb 26 Jun 2007 08:38:38 -0000 1.18
--- interface.rb 15 Nov 2007 07:08:49 -0000 1.19
***************
*** 153,157 ****
message = "Save file '#{file}' in '#{datadir}' directory? [y/n] "
if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
! file = datadir + file
end
if File.exists?(file)
--- 153,157 ----
message = "Save file '#{file}' in '#{datadir}' directory? [y/n] "
if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
! file = File.join(datadir, file)
end
if File.exists?(file)
From k at dev.open-bio.org Thu Nov 15 02:23:41 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:23:41 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.18,1.19
Message-ID: <200711150723.lAF7Nfkd006749@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6745/io
Modified Files:
pubmed.rb
Log Message:
* esearch2, efetch2: candidates for the better replacement of esearch and efetch methods which are enchanced to accept options as a hash and utilize Bio::Command.post_form for the options
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18
--- pubmed.rb 15 Nov 2007 07:23:39 -0000 1.19
***************
*** 9,15 ****
#
- require 'net/http'
- require 'cgi' unless defined?(CGI)
require 'bio/command'
module Bio
--- 9,14 ----
#
require 'bio/command'
+ require 'cgi' unless defined?(CGI)
module Bio
***************
*** 112,115 ****
--- 111,134 ----
end
+ def self.esearch2(str, hash = {})
+ serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+ opts = {
+ "retmax" => 100,
+ "tool" => "bioruby",
+ "db" => "pubmed",
+ "term" => str
+ }
+ opts.update(hash)
+
+ response, = Bio::Command.post_form(serv, opts)
+ result = response.body
+ if opts['rettype'] == 'count'
+ result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
+ else
+ result = result.scan(/(.*?)<\/Id>/m).flatten
+ end
+ return result
+ end
+
# Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
# entrez efetch. Multiple PubMed IDs can be provided:
***************
*** 132,136 ****
response, = http.get(path + list)
result = response.body
! result = result.split(/\n\n+/)
return result
end
--- 151,173 ----
response, = http.get(path + list)
result = response.body
! return result
! end
!
! def self.efetch2(ids, hash = {})
! return "" if ids.empty?
! ids = ids.join(",") if ids === Array
!
! serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
! opts = {
! "tool" => "bioruby",
! "db" => "pubmed",
! "retmode" => "text",
! "rettype" => "medline",
! "id" => ids,
! }
! opts.update(hash)
!
! response, = Bio::Command.post_form(serv, opts)
! result = response.body
return result
end
***************
*** 212,216 ****
puts "--- Search PubMed by E-Utils ---"
! Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
--- 249,255 ----
puts "--- Search PubMed by E-Utils ---"
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
!
! Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
***************
*** 218,221 ****
--- 257,261 ----
puts "--- Retrieve PubMed entry by E-Utils ---"
puts Bio::PubMed.efetch("10592173", "14693808")
+ puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
puts "--- Search PubMed by Entrez CGI ---"
From k at dev.open-bio.org Thu Nov 15 02:40:29 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:40:29 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.19,1.20
Message-ID: <200711150740.lAF7eTZQ006794@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6790
Modified Files:
pubmed.rb
Log Message:
* get back to split multiple MEDLINE entries into array when not in XML mode
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** pubmed.rb 15 Nov 2007 07:23:39 -0000 1.19
--- pubmed.rb 15 Nov 2007 07:40:27 -0000 1.20
***************
*** 151,154 ****
--- 151,155 ----
response, = http.get(path + list)
result = response.body
+ result = result.split(/\n\n+/)
return result
end
***************
*** 170,173 ****
--- 171,178 ----
response, = Bio::Command.post_form(serv, opts)
result = response.body
+ if opts["retmode"] == "text"
+ result = result.split(/\n\n+/)
+ end
+
return result
end
From k at dev.open-bio.org Tue Nov 20 10:22:05 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Tue, 20 Nov 2007 15:22:05 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.20,1.21
Message-ID: <200711201522.lAKFM5vl026044@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv26040
Modified Files:
pubmed.rb
Log Message:
* ncbi_access_wait is introduced to wait for 3 seconds for consequent queries
* esearch2 and efetch2 methods are renamed to esearch and efetch
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** pubmed.rb 15 Nov 2007 07:40:27 -0000 1.20
--- pubmed.rb 20 Nov 2007 15:22:03 -0000 1.21
***************
*** 2,6 ****
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
#
! # Copyright:: Copyright (C) 2001 Toshiaki Katayama
# Copyright:: Copyright (C) 2006 Jan Aerts
# License:: The Ruby License
--- 2,6 ----
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
#
! # Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama
# Copyright:: Copyright (C) 2006 Jan Aerts
# License:: The Ruby License
***************
*** 71,74 ****
--- 71,92 ----
class PubMed
+ # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
+ # weekdays for any series of more than 100 requests.
+ # -> Not implemented yet in BioRuby
+
+ # Make no more than one request every 3 seconds.
+ NCBI_INTERVAL = 3
+ @@last_access = nil
+
+ def self.ncbi_access_wait(wait = NCBI_INTERVAL)
+ if @@last_access
+ duration = Time.now - @@last_access
+ if wait > duration
+ sleep wait - duration
+ end
+ end
+ @@last_access = Time.now
+ end
+
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
***************
*** 90,115 ****
# *Returns*:: array of PubMed IDs or a number of results
def self.esearch(str, hash = {})
! hash['retmax'] = 100 unless hash['retmax']
!
! opts = []
! hash.each do |k, v|
! opts << "#{k}=#{v}"
! end
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed{opts.join('&')}&term="
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! if hash['rettype'] == 'count'
! result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
! else
! result = result.scan(/(.*?)<\/Id>/m).flatten
! end
! return result
! end
- def self.esearch2(str, hash = {})
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
opts = {
--- 108,113 ----
# *Returns*:: array of PubMed IDs or a number of results
def self.esearch(str, hash = {})
! return nil if str.empty?
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
opts = {
***************
*** 121,124 ****
--- 119,124 ----
opts.update(hash)
+ self.ncbi_access_wait
+
response, = Bio::Command.post_form(serv, opts)
result = response.body
***************
*** 134,160 ****
# entrez efetch. Multiple PubMed IDs can be provided:
# Bio::PubMed.efetch(123)
- # Bio::PubMed.efetch(123,456,789)
# Bio::PubMed.efetch([123,456,789])
# ---
# *Arguments*:
# * _ids_: list of PubMed IDs (required)
! # *Returns*:: MEDLINE formatted String
! def self.efetch(*ids)
! return [] if ids.empty?
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
!
! list = ids.join(",")
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + list)
! result = response.body
! result = result.split(/\n\n+/)
! return result
! end
!
! def self.efetch2(ids, hash = {})
! return "" if ids.empty?
ids = ids.join(",") if ids === Array
--- 134,144 ----
# entrez efetch. Multiple PubMed IDs can be provided:
# Bio::PubMed.efetch(123)
# Bio::PubMed.efetch([123,456,789])
# ---
# *Arguments*:
# * _ids_: list of PubMed IDs (required)
! # *Returns*:: Array of MEDLINE formatted String
! def self.efetch(ids, hash = {})
! return nil if ids.to_s.empty?
ids = ids.join(",") if ids === Array
***************
*** 169,172 ****
--- 153,158 ----
opts.update(hash)
+ self.ncbi_access_wait
+
response, = Bio::Command.post_form(serv, opts)
result = response.body
***************
*** 174,178 ****
result = result.split(/\n\n+/)
end
-
return result
end
--- 160,163 ----
***************
*** 254,266 ****
puts "--- Search PubMed by E-Utils ---"
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
!
! Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
! p x
end
puts "--- Retrieve PubMed entry by E-Utils ---"
! puts Bio::PubMed.efetch("10592173", "14693808")
! puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
puts "--- Search PubMed by Entrez CGI ---"
--- 239,266 ----
puts "--- Search PubMed by E-Utils ---"
! opts = {"rettype" => "count"}
! puts Time.now
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
! puts Time.now
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
! puts Time.now
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
! puts Time.now
! Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
! puts x
end
puts "--- Retrieve PubMed entry by E-Utils ---"
! puts Time.now
! puts Bio::PubMed.efetch(16381885)
! puts Time.now
! puts Bio::PubMed.efetch("16381885")
! puts Time.now
! puts Bio::PubMed.efetch("16381885")
! puts Time.now
! opts = {"retmode" => "xml"}
! puts Bio::PubMed.efetch([10592173, 14693808], opts)
! puts Time.now
! puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
puts "--- Search PubMed by Entrez CGI ---"
***************
*** 270,278 ****
puts "--- Retrieve PubMed entry by Entrez CGI ---"
! puts Bio::PubMed.query("10592173")
puts "--- Retrieve PubMed entry by PMfetch ---"
! puts Bio::PubMed.pmfetch("10592173")
end
--- 270,278 ----
puts "--- Retrieve PubMed entry by Entrez CGI ---"
! puts Bio::PubMed.query("16381885")
puts "--- Retrieve PubMed entry by PMfetch ---"
! puts Bio::PubMed.pmfetch("16381885")
end
From k at dev.open-bio.org Tue Nov 27 02:09:45 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Tue, 27 Nov 2007 07:09:45 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/db/kegg compound.rb,0.16,0.17
Message-ID: <200711270709.lAR79jPi020625@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg
In directory dev.open-bio.org:/tmp/cvs-serv20621
Modified Files:
compound.rb
Log Message:
* remark method is added
Index: compound.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/compound.rb,v
retrieving revision 0.16
retrieving revision 0.17
diff -C2 -d -r0.16 -r0.17
*** compound.rb 28 Jun 2007 11:27:24 -0000 0.16
--- compound.rb 27 Nov 2007 07:09:43 -0000 0.17
***************
*** 46,49 ****
--- 46,54 ----
end
+ # REMARK
+ def remark
+ field_fetch('REMARK')
+ end
+
# GLYCAN
def glycans
From k at dev.open-bio.org Wed Nov 28 01:34:35 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Wed, 28 Nov 2007 06:34:35 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.21,1.22
Message-ID: <200711280634.lAS6YZ9i023050@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv23044
Modified Files:
pubmed.rb
Log Message:
* all class methods are changed to instance methods (class methods are
still remained for the backward compatibility)
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** pubmed.rb 20 Nov 2007 15:22:03 -0000 1.21
--- pubmed.rb 28 Nov 2007 06:34:33 -0000 1.22
***************
*** 79,83 ****
@@last_access = nil
! def self.ncbi_access_wait(wait = NCBI_INTERVAL)
if @@last_access
duration = Time.now - @@last_access
--- 79,85 ----
@@last_access = nil
! private
!
! def ncbi_access_wait(wait = NCBI_INTERVAL)
if @@last_access
duration = Time.now - @@last_access
***************
*** 89,92 ****
--- 91,96 ----
end
+ public
+
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
***************
*** 107,111 ****
# * _rettype_
# *Returns*:: array of PubMed IDs or a number of results
! def self.esearch(str, hash = {})
return nil if str.empty?
--- 111,115 ----
# * _rettype_
# *Returns*:: array of PubMed IDs or a number of results
! def esearch(str, hash = {})
return nil if str.empty?
***************
*** 119,123 ****
opts.update(hash)
! self.ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
--- 123,127 ----
opts.update(hash)
! ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
***************
*** 139,143 ****
# * _ids_: list of PubMed IDs (required)
# *Returns*:: Array of MEDLINE formatted String
! def self.efetch(ids, hash = {})
return nil if ids.to_s.empty?
ids = ids.join(",") if ids === Array
--- 143,147 ----
# * _ids_: list of PubMed IDs (required)
# *Returns*:: Array of MEDLINE formatted String
! def efetch(ids, hash = {})
return nil if ids.to_s.empty?
ids = ids.join(",") if ids === Array
***************
*** 153,157 ****
opts.update(hash)
! self.ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
--- 157,161 ----
opts.update(hash)
! ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
***************
*** 170,177 ****
# * _id_: query string (required)
# *Returns*:: array of PubMed IDs
! def self.search(str)
host = "www.ncbi.nlm.nih.gov"
path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
http = Bio::Command.new_http(host)
response, = http.get(path + CGI.escape(str))
--- 174,183 ----
# * _id_: query string (required)
# *Returns*:: array of PubMed IDs
! def search(str)
host = "www.ncbi.nlm.nih.gov"
path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
+ ncbi_access_wait
+
http = Bio::Command.new_http(host)
response, = http.get(path + CGI.escape(str))
***************
*** 187,196 ****
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.query(*ids)
host = "www.ncbi.nlm.nih.gov"
path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
-
list = ids.join(",")
http = Bio::Command.new_http(host)
response, = http.get(path + list)
--- 193,203 ----
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def query(*ids)
host = "www.ncbi.nlm.nih.gov"
path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
list = ids.join(",")
+ ncbi_access_wait
+
http = Bio::Command.new_http(host)
response, = http.get(path + list)
***************
*** 216,223 ****
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.pmfetch(id)
host = "www.ncbi.nlm.nih.gov"
path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
http = Bio::Command.new_http(host)
response, = http.get(path + id.to_s)
--- 223,232 ----
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def pmfetch(id)
host = "www.ncbi.nlm.nih.gov"
path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
+ ncbi_access_wait
+
http = Bio::Command.new_http(host)
response, = http.get(path + id.to_s)
***************
*** 231,234 ****
--- 240,263 ----
end
+ def self.esearch(*args)
+ self.new.esearch(*args)
+ end
+
+ def self.efetch(*args)
+ self.new.efetch(*args)
+ end
+
+ def self.search(*args)
+ self.new.search(*args)
+ end
+
+ def self.query(*args)
+ self.new.query(*args)
+ end
+
+ def self.pmfetch(*args)
+ self.new.pmfetch(*args)
+ end
+
end # PubMed
***************
*** 238,241 ****
--- 267,316 ----
if __FILE__ == $0
+ puts "=== instance methods ==="
+
+ pubmed = Bio::PubMed.new
+
+ puts "--- Search PubMed by E-Utils ---"
+ opts = {"rettype" => "count"}
+ puts Time.now
+ puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+ puts Time.now
+ puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+ puts Time.now
+ puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+ puts Time.now
+ pubmed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+ puts x
+ end
+
+ puts "--- Retrieve PubMed entry by E-Utils ---"
+ puts Time.now
+ puts pubmed.efetch(16381885)
+ puts Time.now
+ puts pubmed.efetch("16381885")
+ puts Time.now
+ puts pubmed.efetch("16381885")
+ puts Time.now
+ opts = {"retmode" => "xml"}
+ puts pubmed.efetch([10592173, 14693808], opts)
+ puts Time.now
+ puts pubmed.efetch(["10592173", "14693808"], opts)
+
+ puts "--- Search PubMed by Entrez CGI ---"
+ pubmed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+ p x
+ end
+
+ puts "--- Retrieve PubMed entry by Entrez CGI ---"
+ puts pubmed.query("16381885")
+
+
+ puts "--- Retrieve PubMed entry by PMfetch ---"
+ puts pubmed.pmfetch("16381885")
+
+
+ puts "=== class methods ==="
+
+
puts "--- Search PubMed by E-Utils ---"
opts = {"rettype" => "count"}
From aerts at dev.open-bio.org Sun Nov 4 11:51:01 2007
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Sun, 04 Nov 2007 11:51:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.16,1.17
Message-ID: <200711041151.lA4Bp1lq007763@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv7743
Modified Files:
pubmed.rb
Log Message:
Fixed bug #11736: change to pubmed interface (reported by Masahide Kikkawa)
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** pubmed.rb 5 Apr 2007 23:35:41 -0000 1.16
--- pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17
***************
*** 75,80 ****
# *Returns*:: array of PubMed IDs
def self.search(str)
! host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term="
http = Bio::Command.new_http(host)
--- 75,80 ----
# *Returns*:: array of PubMed IDs
def self.search(str)
! host = 'www.ncbi.nlm.nih.gov'
! path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
http = Bio::Command.new_http(host)
From k at dev.open-bio.org Sat Nov 10 08:21:56 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sat, 10 Nov 2007 08:21:56 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.17,1.18
Message-ID: <200711100821.lAA8LunA021453@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv21448
Modified Files:
pubmed.rb
Log Message:
* search, query is fixed to use new NCBI URI (previous fix was wrong and
insufficient).
* esearch is enhanced to accept hash['rettype'] == "count" as suggested
by Kaustubh Patil
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17
--- pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18
***************
*** 19,34 ****
# The Bio::PubMed class provides several ways to retrieve bibliographic
# information from the PubMed database at
! # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
! # types of queries are possible:
#
# * searching for PubMed IDs given a query string:
! # * Bio::PubMed#search
! # * Bio::PubMed#esearch
#
# * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
# given a PubMed ID
! # * Bio::PubMed#query
! # * Bio::PubMed#pmfetch
! # * Bio::PubMed#efetch
#
# The different methods within the same group are interchangeable and should
--- 19,35 ----
# The Bio::PubMed class provides several ways to retrieve bibliographic
# information from the PubMed database at
! # http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed
! #
! # Basically, two types of queries are possible:
#
# * searching for PubMed IDs given a query string:
! # * Bio::PubMed#esearch (recommended)
! # * Bio::PubMed#search (only retrieves top 20 hits)
#
# * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
# given a PubMed ID
! # * Bio::PubMed#efetch (recommended)
! # * Bio::PubMed#query (unstable for the change of the HTML design)
! # * Bio::PubMed#pmfetch (still working but could be obsoleted by NCBI)
#
# The different methods within the same group are interchangeable and should
***************
*** 38,48 ****
# APIs can be found on the following websites:
#
! # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
! # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
! # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
! # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
! # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
! # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
! # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
#
# == Usage
--- 39,50 ----
# APIs can be found on the following websites:
#
! # * PubMed Overview:
! # http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
! # * PubMed help:
! # http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
! # * Entrez utilities index:
! # http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
! # * How to link:
! # http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
#
# == Usage
***************
*** 51,89 ****
#
# # If you don't know the pubmed ID:
! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
#
# # To retrieve the MEDLINE entry for a given PubMed ID:
# puts Bio::PubMed.query("10592173")
# puts Bio::PubMed.pmfetch("10592173")
! # puts Bio::PubMed.efetch("10592173", "14693808")
# # This can be converted into a Bio::MEDLINE object:
# manuscript = Bio::PubMed.query("10592173")
! # medline = Bio::MEDLINE(manuscript)
#
class PubMed
- # Search the PubMed database by given keywords using entrez query and returns
- # an array of PubMed IDs.
- # ---
- # *Arguments*:
- # * _id_: query string (required)
- # *Returns*:: array of PubMed IDs
- def self.search(str)
- host = 'www.ncbi.nlm.nih.gov'
- path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
-
- http = Bio::Command.new_http(host)
- response, = http.get(path + CGI.escape(str))
- result = response.body
- result = result.gsub("\r", "\n").squeeze("\n")
- result = result.scan(/(.*?)<\/pre>/m).flatten
- return result
- end
-
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
--- 53,75 ----
#
# # If you don't know the pubmed ID:
! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
! #
! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
# p x
# end
#
# # To retrieve the MEDLINE entry for a given PubMed ID:
+ # puts Bio::PubMed.efetch("10592173", "14693808")
# puts Bio::PubMed.query("10592173")
# puts Bio::PubMed.pmfetch("10592173")
! #
# # This can be converted into a Bio::MEDLINE object:
# manuscript = Bio::PubMed.query("10592173")
! # medline = Bio::MEDLINE.new(manuscript)
#
class PubMed
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
***************
*** 103,107 ****
# * _retmode_
# * _rettype_
! # *Returns*:: array of PubMed IDs
def self.esearch(str, hash = {})
hash['retmax'] = 100 unless hash['retmax']
--- 89,93 ----
# * _retmode_
# * _rettype_
! # *Returns*:: array of PubMed IDs or a number of results
def self.esearch(str, hash = {})
hash['retmax'] = 100 unless hash['retmax']
***************
*** 118,122 ****
response, = http.get(path + CGI.escape(str))
result = response.body
! result = result.scan(/(.*?)<\/Id>/m).flatten
return result
end
--- 104,154 ----
response, = http.get(path + CGI.escape(str))
result = response.body
! if hash['rettype'] == 'count'
! result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
! else
! result = result.scan(/(.*?)<\/Id>/m).flatten
! end
! return result
! end
!
! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
! # entrez efetch. Multiple PubMed IDs can be provided:
! # Bio::PubMed.efetch(123)
! # Bio::PubMed.efetch(123,456,789)
! # Bio::PubMed.efetch([123,456,789])
! # ---
! # *Arguments*:
! # * _ids_: list of PubMed IDs (required)
! # *Returns*:: MEDLINE formatted String
! def self.efetch(*ids)
! return [] if ids.empty?
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
!
! list = ids.join(",")
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + list)
! result = response.body
! result = result.split(/\n\n+/)
! return result
! end
!
! # Search the PubMed database by given keywords using entrez query and returns
! # an array of PubMed IDs. Caution: this method returns the first 20 hits only.
! # Instead, use of the 'esearch' method is strongly recomended.
! # ---
! # *Arguments*:
! # * _id_: query string (required)
! # *Returns*:: array of PubMed IDs
! def self.search(str)
! host = "www.ncbi.nlm.nih.gov"
! path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
return result
end
***************
*** 128,143 ****
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.query(id)
host = "www.ncbi.nlm.nih.gov"
! path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
http = Bio::Command.new_http(host)
! response, = http.get(path + id.to_s)
result = response.body
! if result =~ /#{id}\s+Error/
raise( result )
else
! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
! return result
end
end
--- 160,183 ----
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.query(*ids)
host = "www.ncbi.nlm.nih.gov"
! path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
!
! list = ids.join(",")
http = Bio::Command.new_http(host)
! response, = http.get(path + list)
result = response.body
! result = result.scan(/\s*(.*?)<\/pre>/m).flatten
!
! if result =~ /id:.*Error occurred/
! # id: xxxxx Error occurred: Article does not exist
raise( result )
else
! if ids.size > 1
! return result
! else
! return result.first
! end
end
end
***************
*** 164,191 ****
end
- # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
- # entrez efetch. Multiple PubMed IDs can be provided:
- # Bio::PubMed.efetch(123)
- # Bio::PubMed.efetch(123,456,789)
- # Bio::PubMed.efetch([123,456,789])
- # ---
- # *Arguments*:
- # * _ids_: list of PubMed IDs (required)
- # *Returns*:: MEDLINE formatted String
- def self.efetch(*ids)
- return [] if ids.empty?
-
- host = "eutils.ncbi.nlm.nih.gov"
- path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
-
- ids = ids.join(",")
-
- http = Bio::Command.new_http(host)
- response, = http.get(path + ids)
- result = response.body
- result = result.split(/\n\n+/)
- return result
- end
-
end # PubMed
--- 204,207 ----
***************
*** 195,211 ****
if __FILE__ == $0
! puts Bio::PubMed.query("10592173")
! puts "--- ---"
! puts Bio::PubMed.pmfetch("10592173")
! puts "--- ---"
! Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
! p x
! end
! puts "--- ---"
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
! puts "--- ---"
puts Bio::PubMed.efetch("10592173", "14693808")
end
--- 211,233 ----
if __FILE__ == $0
! puts "--- Search PubMed by E-Utils ---"
Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
!
! puts "--- Retrieve PubMed entry by E-Utils ---"
puts Bio::PubMed.efetch("10592173", "14693808")
+ puts "--- Search PubMed by Entrez CGI ---"
+ Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+ p x
+ end
+
+ puts "--- Retrieve PubMed entry by Entrez CGI ---"
+ puts Bio::PubMed.query("10592173")
+
+
+ puts "--- Retrieve PubMed entry by PMfetch ---"
+ puts Bio::PubMed.pmfetch("10592173")
+
end
From k at dev.open-bio.org Sat Nov 10 08:28:52 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sat, 10 Nov 2007 08:28:52 +0000
Subject: [BioRuby-cvs] bioruby ChangeLog,1.68,1.69
Message-ID: <200711100828.lAA8Sq9g021475@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby
In directory dev.open-bio.org:/tmp/cvs-serv21471
Modified Files:
ChangeLog
Log Message:
* updated
Index: ChangeLog
===================================================================
RCS file: /home/repository/bioruby/bioruby/ChangeLog,v
retrieving revision 1.68
retrieving revision 1.69
diff -C2 -d -r1.68 -r1.69
*** ChangeLog 19 Jul 2007 04:08:47 -0000 1.68
--- ChangeLog 10 Nov 2007 08:28:50 -0000 1.69
***************
*** 1,2 ****
--- 1,9 ----
+ 2007-11-10 Toshiaki Katayama
+
+ * lib/bio/io/pubmed.rb:
+
+ Fixed search, query methods (but use of esearch and efetch is
+ strongly recommended).
+
2007-07-19 Toshiaki Katayama
***************
*** 415,419 ****
visual effects.
! * lib/bio/.rb
Extended to have Bio.command where command can be any BioRuby
--- 422,426 ----
visual effects.
! * lib/bio.rb
Extended to have Bio.command where command can be any BioRuby
From nakao at dev.open-bio.org Sat Nov 10 16:57:45 2007
From: nakao at dev.open-bio.org (Mitsuteru C. Nakao)
Date: Sat, 10 Nov 2007 16:57:45 +0000
Subject: [BioRuby-cvs] bioruby/test/functional/bio/io test_ensembl.rb, 1.4,
1.5
Message-ID: <200711101657.lAAGvjCP022677@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/test/functional/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv22657/test/functional/bio/io
Modified Files:
test_ensembl.rb
Log Message:
* Updated some expected values of test_gff_exportview*.
Index: test_ensembl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/test/functional/bio/io/test_ensembl.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** test_ensembl.rb 5 Apr 2007 23:35:42 -0000 1.4
--- test_ensembl.rb 10 Nov 2007 16:57:43 -0000 1.5
***************
*** 74,78 ****
def test_gff_exportview
! line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"
gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
assert_equal(line, gff)
--- 74,95 ----
def test_gff_exportview
! line = ["chromosome:NCBI36:4:1149206:1149209:1",
! "Ensembl",
! "Gene",
! "-839",
! "2747",
! ".",
! "+",
! ".",
! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"].join("\t") + "\n"
! line = ["4",
! "Ensembl",
! "Gene",
! "1148366",
! "1151952",
! ".",
! "+",
! "1",
! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
assert_equal(line, gff)
***************
*** 80,84 ****
def test_gff_exportview_with_named_args
! line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"
gff = @serv.exportview(:seq_region_name => 4,
:anchor1 => 1149206,
--- 97,118 ----
def test_gff_exportview_with_named_args
! line = ["chromosome:NCBI36:4:1149206:1149209:1",
! "Ensembl",
! "Gene",
! "-839",
! "2747",
! ".",
! "+",
! ".",
! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
! line = ["4",
! "Ensembl",
! "Gene",
! "1148366",
! "1151952",
! ".",
! "+",
! "1",
! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
gff = @serv.exportview(:seq_region_name => 4,
:anchor1 => 1149206,
***************
*** 89,93 ****
def test_tab_exportview_with_named_args
! line = "seqname\tsource\tfeature\tstart\tend\tscore\tstrand\tframe\tgene_id\ttranscript_id\texon_id\tgene_type\nchromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tENSG00000206158\tENST00000382964\tENSE00001494097\tKNOWN_protein_coding\n"
gff = @serv.exportview(:seq_region_name => 4,
:anchor1 => 1149206,
--- 123,176 ----
def test_tab_exportview_with_named_args
! line = [["seqname",
! "source",
! "feature",
! "start",
! "end",
! "score",
! "strand",
! "frame",
! "gene_id",
! "transcript_id",
! "exon_id",
! "gene_type"].join("\t"),
! ["chromosome:NCBI36:4:1149206:1149209:1",
! "Ensembl",
! "Gene",
! "-839",
! "2747",
! ".",
! "+",
! ".",
! "ENSG00000206158",
! "ENST00000382964",
! "ENSE00001494097",
! "KNOWN_protein_coding"].join("\t") + "\n"
! ].join("\n")
! line = [["seqname",
! "source",
! "feature",
! "start",
! "end",
! "score",
! "strand",
! "frame",
! "gene_id",
! "transcript_id",
! "exon_id",
! "gene_type"].join("\t"),
! ["4",
! "Ensembl",
! "Gene",
! "1148366",
! "1151952",
! ".",
! "+",
! "1",
! "ENSG00000206158",
! "ENST00000382964",
! "ENSE00001494097",
! "KNOWN_protein_coding"].join("\t") + "\n"
! ].join("\n")
gff = @serv.exportview(:seq_region_name => 4,
:anchor1 => 1149206,
From k at dev.open-bio.org Thu Nov 15 07:07:18 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:07:18 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.60,1.61
Message-ID: <200711150707.lAF77IWZ006676@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6671/io
Modified Files:
flatfile.rb
Log Message:
* the first line of the MEDLINE entry is changed from UI to PMID
Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.60
retrieving revision 1.61
diff -C2 -d -r1.60 -r1.61
*** flatfile.rb 9 Jul 2007 14:08:34 -0000 1.60
--- flatfile.rb 15 Nov 2007 07:07:16 -0000 1.61
***************
*** 1131,1135 ****
/^LOCUS .+ aa .+/ ],
medline = RuleRegexp[ 'Bio::MEDLINE',
! /^UI \- [0-9]+$/ ],
embl = RuleRegexp[ 'Bio::EMBL',
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
--- 1131,1135 ----
/^LOCUS .+ aa .+/ ],
medline = RuleRegexp[ 'Bio::MEDLINE',
! /^PMID\- [0-9]+$/ ],
embl = RuleRegexp[ 'Bio::EMBL',
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
From k at dev.open-bio.org Thu Nov 15 07:08:51 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:08:51 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/shell interface.rb,1.18,1.19
Message-ID: <200711150708.lAF78prq006727@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/shell
In directory dev.open-bio.org:/tmp/cvs-serv6723/shell
Modified Files:
interface.rb
Log Message:
* fixed that savefile("hoge", obj) created "datahoge" file instead of "data/hoge"
Index: interface.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/interface.rb,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** interface.rb 26 Jun 2007 08:38:38 -0000 1.18
--- interface.rb 15 Nov 2007 07:08:49 -0000 1.19
***************
*** 153,157 ****
message = "Save file '#{file}' in '#{datadir}' directory? [y/n] "
if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
! file = datadir + file
end
if File.exists?(file)
--- 153,157 ----
message = "Save file '#{file}' in '#{datadir}' directory? [y/n] "
if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
! file = File.join(datadir, file)
end
if File.exists?(file)
From k at dev.open-bio.org Thu Nov 15 07:23:41 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:23:41 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.18,1.19
Message-ID: <200711150723.lAF7Nfkd006749@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6745/io
Modified Files:
pubmed.rb
Log Message:
* esearch2, efetch2: candidates for the better replacement of esearch and efetch methods which are enchanced to accept options as a hash and utilize Bio::Command.post_form for the options
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18
--- pubmed.rb 15 Nov 2007 07:23:39 -0000 1.19
***************
*** 9,15 ****
#
- require 'net/http'
- require 'cgi' unless defined?(CGI)
require 'bio/command'
module Bio
--- 9,14 ----
#
require 'bio/command'
+ require 'cgi' unless defined?(CGI)
module Bio
***************
*** 112,115 ****
--- 111,134 ----
end
+ def self.esearch2(str, hash = {})
+ serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+ opts = {
+ "retmax" => 100,
+ "tool" => "bioruby",
+ "db" => "pubmed",
+ "term" => str
+ }
+ opts.update(hash)
+
+ response, = Bio::Command.post_form(serv, opts)
+ result = response.body
+ if opts['rettype'] == 'count'
+ result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
+ else
+ result = result.scan(/(.*?)<\/Id>/m).flatten
+ end
+ return result
+ end
+
# Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
# entrez efetch. Multiple PubMed IDs can be provided:
***************
*** 132,136 ****
response, = http.get(path + list)
result = response.body
! result = result.split(/\n\n+/)
return result
end
--- 151,173 ----
response, = http.get(path + list)
result = response.body
! return result
! end
!
! def self.efetch2(ids, hash = {})
! return "" if ids.empty?
! ids = ids.join(",") if ids === Array
!
! serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
! opts = {
! "tool" => "bioruby",
! "db" => "pubmed",
! "retmode" => "text",
! "rettype" => "medline",
! "id" => ids,
! }
! opts.update(hash)
!
! response, = Bio::Command.post_form(serv, opts)
! result = response.body
return result
end
***************
*** 212,216 ****
puts "--- Search PubMed by E-Utils ---"
! Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
--- 249,255 ----
puts "--- Search PubMed by E-Utils ---"
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
!
! Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
p x
end
***************
*** 218,221 ****
--- 257,261 ----
puts "--- Retrieve PubMed entry by E-Utils ---"
puts Bio::PubMed.efetch("10592173", "14693808")
+ puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
puts "--- Search PubMed by Entrez CGI ---"
From k at dev.open-bio.org Thu Nov 15 07:40:29 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:40:29 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.19,1.20
Message-ID: <200711150740.lAF7eTZQ006794@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6790
Modified Files:
pubmed.rb
Log Message:
* get back to split multiple MEDLINE entries into array when not in XML mode
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** pubmed.rb 15 Nov 2007 07:23:39 -0000 1.19
--- pubmed.rb 15 Nov 2007 07:40:27 -0000 1.20
***************
*** 151,154 ****
--- 151,155 ----
response, = http.get(path + list)
result = response.body
+ result = result.split(/\n\n+/)
return result
end
***************
*** 170,173 ****
--- 171,178 ----
response, = Bio::Command.post_form(serv, opts)
result = response.body
+ if opts["retmode"] == "text"
+ result = result.split(/\n\n+/)
+ end
+
return result
end
From k at dev.open-bio.org Tue Nov 20 15:22:05 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Tue, 20 Nov 2007 15:22:05 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.20,1.21
Message-ID: <200711201522.lAKFM5vl026044@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv26040
Modified Files:
pubmed.rb
Log Message:
* ncbi_access_wait is introduced to wait for 3 seconds for consequent queries
* esearch2 and efetch2 methods are renamed to esearch and efetch
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** pubmed.rb 15 Nov 2007 07:40:27 -0000 1.20
--- pubmed.rb 20 Nov 2007 15:22:03 -0000 1.21
***************
*** 2,6 ****
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
#
! # Copyright:: Copyright (C) 2001 Toshiaki Katayama
# Copyright:: Copyright (C) 2006 Jan Aerts
# License:: The Ruby License
--- 2,6 ----
# = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
#
! # Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama
# Copyright:: Copyright (C) 2006 Jan Aerts
# License:: The Ruby License
***************
*** 71,74 ****
--- 71,92 ----
class PubMed
+ # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
+ # weekdays for any series of more than 100 requests.
+ # -> Not implemented yet in BioRuby
+
+ # Make no more than one request every 3 seconds.
+ NCBI_INTERVAL = 3
+ @@last_access = nil
+
+ def self.ncbi_access_wait(wait = NCBI_INTERVAL)
+ if @@last_access
+ duration = Time.now - @@last_access
+ if wait > duration
+ sleep wait - duration
+ end
+ end
+ @@last_access = Time.now
+ end
+
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
***************
*** 90,115 ****
# *Returns*:: array of PubMed IDs or a number of results
def self.esearch(str, hash = {})
! hash['retmax'] = 100 unless hash['retmax']
!
! opts = []
! hash.each do |k, v|
! opts << "#{k}=#{v}"
! end
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed{opts.join('&')}&term="
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + CGI.escape(str))
! result = response.body
! if hash['rettype'] == 'count'
! result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
! else
! result = result.scan(/(.*?)<\/Id>/m).flatten
! end
! return result
! end
- def self.esearch2(str, hash = {})
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
opts = {
--- 108,113 ----
# *Returns*:: array of PubMed IDs or a number of results
def self.esearch(str, hash = {})
! return nil if str.empty?
serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
opts = {
***************
*** 121,124 ****
--- 119,124 ----
opts.update(hash)
+ self.ncbi_access_wait
+
response, = Bio::Command.post_form(serv, opts)
result = response.body
***************
*** 134,160 ****
# entrez efetch. Multiple PubMed IDs can be provided:
# Bio::PubMed.efetch(123)
- # Bio::PubMed.efetch(123,456,789)
# Bio::PubMed.efetch([123,456,789])
# ---
# *Arguments*:
# * _ids_: list of PubMed IDs (required)
! # *Returns*:: MEDLINE formatted String
! def self.efetch(*ids)
! return [] if ids.empty?
!
! host = "eutils.ncbi.nlm.nih.gov"
! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
!
! list = ids.join(",")
!
! http = Bio::Command.new_http(host)
! response, = http.get(path + list)
! result = response.body
! result = result.split(/\n\n+/)
! return result
! end
!
! def self.efetch2(ids, hash = {})
! return "" if ids.empty?
ids = ids.join(",") if ids === Array
--- 134,144 ----
# entrez efetch. Multiple PubMed IDs can be provided:
# Bio::PubMed.efetch(123)
# Bio::PubMed.efetch([123,456,789])
# ---
# *Arguments*:
# * _ids_: list of PubMed IDs (required)
! # *Returns*:: Array of MEDLINE formatted String
! def self.efetch(ids, hash = {})
! return nil if ids.to_s.empty?
ids = ids.join(",") if ids === Array
***************
*** 169,172 ****
--- 153,158 ----
opts.update(hash)
+ self.ncbi_access_wait
+
response, = Bio::Command.post_form(serv, opts)
result = response.body
***************
*** 174,178 ****
result = result.split(/\n\n+/)
end
-
return result
end
--- 160,163 ----
***************
*** 254,266 ****
puts "--- Search PubMed by E-Utils ---"
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
!
! Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
! p x
end
puts "--- Retrieve PubMed entry by E-Utils ---"
! puts Bio::PubMed.efetch("10592173", "14693808")
! puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
puts "--- Search PubMed by Entrez CGI ---"
--- 239,266 ----
puts "--- Search PubMed by E-Utils ---"
! opts = {"rettype" => "count"}
! puts Time.now
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
! puts Time.now
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
! puts Time.now
! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
! puts Time.now
! Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
! puts x
end
puts "--- Retrieve PubMed entry by E-Utils ---"
! puts Time.now
! puts Bio::PubMed.efetch(16381885)
! puts Time.now
! puts Bio::PubMed.efetch("16381885")
! puts Time.now
! puts Bio::PubMed.efetch("16381885")
! puts Time.now
! opts = {"retmode" => "xml"}
! puts Bio::PubMed.efetch([10592173, 14693808], opts)
! puts Time.now
! puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
puts "--- Search PubMed by Entrez CGI ---"
***************
*** 270,278 ****
puts "--- Retrieve PubMed entry by Entrez CGI ---"
! puts Bio::PubMed.query("10592173")
puts "--- Retrieve PubMed entry by PMfetch ---"
! puts Bio::PubMed.pmfetch("10592173")
end
--- 270,278 ----
puts "--- Retrieve PubMed entry by Entrez CGI ---"
! puts Bio::PubMed.query("16381885")
puts "--- Retrieve PubMed entry by PMfetch ---"
! puts Bio::PubMed.pmfetch("16381885")
end
From k at dev.open-bio.org Tue Nov 27 07:09:45 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Tue, 27 Nov 2007 07:09:45 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/db/kegg compound.rb,0.16,0.17
Message-ID: <200711270709.lAR79jPi020625@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg
In directory dev.open-bio.org:/tmp/cvs-serv20621
Modified Files:
compound.rb
Log Message:
* remark method is added
Index: compound.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/compound.rb,v
retrieving revision 0.16
retrieving revision 0.17
diff -C2 -d -r0.16 -r0.17
*** compound.rb 28 Jun 2007 11:27:24 -0000 0.16
--- compound.rb 27 Nov 2007 07:09:43 -0000 0.17
***************
*** 46,49 ****
--- 46,54 ----
end
+ # REMARK
+ def remark
+ field_fetch('REMARK')
+ end
+
# GLYCAN
def glycans
From k at dev.open-bio.org Wed Nov 28 06:34:35 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Wed, 28 Nov 2007 06:34:35 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.21,1.22
Message-ID: <200711280634.lAS6YZ9i023050@dev.open-bio.org>
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv23044
Modified Files:
pubmed.rb
Log Message:
* all class methods are changed to instance methods (class methods are
still remained for the backward compatibility)
Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** pubmed.rb 20 Nov 2007 15:22:03 -0000 1.21
--- pubmed.rb 28 Nov 2007 06:34:33 -0000 1.22
***************
*** 79,83 ****
@@last_access = nil
! def self.ncbi_access_wait(wait = NCBI_INTERVAL)
if @@last_access
duration = Time.now - @@last_access
--- 79,85 ----
@@last_access = nil
! private
!
! def ncbi_access_wait(wait = NCBI_INTERVAL)
if @@last_access
duration = Time.now - @@last_access
***************
*** 89,92 ****
--- 91,96 ----
end
+ public
+
# Search the PubMed database by given keywords using E-Utils and returns
# an array of PubMed IDs.
***************
*** 107,111 ****
# * _rettype_
# *Returns*:: array of PubMed IDs or a number of results
! def self.esearch(str, hash = {})
return nil if str.empty?
--- 111,115 ----
# * _rettype_
# *Returns*:: array of PubMed IDs or a number of results
! def esearch(str, hash = {})
return nil if str.empty?
***************
*** 119,123 ****
opts.update(hash)
! self.ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
--- 123,127 ----
opts.update(hash)
! ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
***************
*** 139,143 ****
# * _ids_: list of PubMed IDs (required)
# *Returns*:: Array of MEDLINE formatted String
! def self.efetch(ids, hash = {})
return nil if ids.to_s.empty?
ids = ids.join(",") if ids === Array
--- 143,147 ----
# * _ids_: list of PubMed IDs (required)
# *Returns*:: Array of MEDLINE formatted String
! def efetch(ids, hash = {})
return nil if ids.to_s.empty?
ids = ids.join(",") if ids === Array
***************
*** 153,157 ****
opts.update(hash)
! self.ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
--- 157,161 ----
opts.update(hash)
! ncbi_access_wait
response, = Bio::Command.post_form(serv, opts)
***************
*** 170,177 ****
# * _id_: query string (required)
# *Returns*:: array of PubMed IDs
! def self.search(str)
host = "www.ncbi.nlm.nih.gov"
path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
http = Bio::Command.new_http(host)
response, = http.get(path + CGI.escape(str))
--- 174,183 ----
# * _id_: query string (required)
# *Returns*:: array of PubMed IDs
! def search(str)
host = "www.ncbi.nlm.nih.gov"
path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
+ ncbi_access_wait
+
http = Bio::Command.new_http(host)
response, = http.get(path + CGI.escape(str))
***************
*** 187,196 ****
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.query(*ids)
host = "www.ncbi.nlm.nih.gov"
path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
-
list = ids.join(",")
http = Bio::Command.new_http(host)
response, = http.get(path + list)
--- 193,203 ----
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def query(*ids)
host = "www.ncbi.nlm.nih.gov"
path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
list = ids.join(",")
+ ncbi_access_wait
+
http = Bio::Command.new_http(host)
response, = http.get(path + list)
***************
*** 216,223 ****
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def self.pmfetch(id)
host = "www.ncbi.nlm.nih.gov"
path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
http = Bio::Command.new_http(host)
response, = http.get(path + id.to_s)
--- 223,232 ----
# * _id_: PubMed ID (required)
# *Returns*:: MEDLINE formatted String
! def pmfetch(id)
host = "www.ncbi.nlm.nih.gov"
path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
+ ncbi_access_wait
+
http = Bio::Command.new_http(host)
response, = http.get(path + id.to_s)
***************
*** 231,234 ****
--- 240,263 ----
end
+ def self.esearch(*args)
+ self.new.esearch(*args)
+ end
+
+ def self.efetch(*args)
+ self.new.efetch(*args)
+ end
+
+ def self.search(*args)
+ self.new.search(*args)
+ end
+
+ def self.query(*args)
+ self.new.query(*args)
+ end
+
+ def self.pmfetch(*args)
+ self.new.pmfetch(*args)
+ end
+
end # PubMed
***************
*** 238,241 ****
--- 267,316 ----
if __FILE__ == $0
+ puts "=== instance methods ==="
+
+ pubmed = Bio::PubMed.new
+
+ puts "--- Search PubMed by E-Utils ---"
+ opts = {"rettype" => "count"}
+ puts Time.now
+ puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+ puts Time.now
+ puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+ puts Time.now
+ puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+ puts Time.now
+ pubmed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+ puts x
+ end
+
+ puts "--- Retrieve PubMed entry by E-Utils ---"
+ puts Time.now
+ puts pubmed.efetch(16381885)
+ puts Time.now
+ puts pubmed.efetch("16381885")
+ puts Time.now
+ puts pubmed.efetch("16381885")
+ puts Time.now
+ opts = {"retmode" => "xml"}
+ puts pubmed.efetch([10592173, 14693808], opts)
+ puts Time.now
+ puts pubmed.efetch(["10592173", "14693808"], opts)
+
+ puts "--- Search PubMed by Entrez CGI ---"
+ pubmed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+ p x
+ end
+
+ puts "--- Retrieve PubMed entry by Entrez CGI ---"
+ puts pubmed.query("16381885")
+
+
+ puts "--- Retrieve PubMed entry by PMfetch ---"
+ puts pubmed.pmfetch("16381885")
+
+
+ puts "=== class methods ==="
+
+
puts "--- Search PubMed by E-Utils ---"
opts = {"rettype" => "count"}