From aerts at dev.open-bio.org Sun Nov 4 06:51:01 2007 From: aerts at dev.open-bio.org (Jan Aerts) Date: Sun, 04 Nov 2007 11:51:01 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.16,1.17 Message-ID: <200711041151.lA4Bp1lq007763@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv7743 Modified Files: pubmed.rb Log Message: Fixed bug #11736: change to pubmed interface (reported by Masahide Kikkawa) Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** pubmed.rb 5 Apr 2007 23:35:41 -0000 1.16 --- pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17 *************** *** 75,80 **** # *Returns*:: array of PubMed IDs def self.search(str) ! host = "www.ncbi.nlm.nih.gov" ! path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term=" http = Bio::Command.new_http(host) --- 75,80 ---- # *Returns*:: array of PubMed IDs def self.search(str) ! host = 'www.ncbi.nlm.nih.gov' ! path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" http = Bio::Command.new_http(host) From k at dev.open-bio.org Sat Nov 10 03:21:56 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Sat, 10 Nov 2007 08:21:56 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.17,1.18 Message-ID: <200711100821.lAA8LunA021453@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv21448 Modified Files: pubmed.rb Log Message: * search, query is fixed to use new NCBI URI (previous fix was wrong and insufficient). * esearch is enhanced to accept hash['rettype'] == "count" as suggested by Kaustubh Patil Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17 --- pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18 *************** *** 19,34 **** # The Bio::PubMed class provides several ways to retrieve bibliographic # information from the PubMed database at ! # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two ! # types of queries are possible: # # * searching for PubMed IDs given a query string: ! # * Bio::PubMed#search ! # * Bio::PubMed#esearch # # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) # given a PubMed ID ! # * Bio::PubMed#query ! # * Bio::PubMed#pmfetch ! # * Bio::PubMed#efetch # # The different methods within the same group are interchangeable and should --- 19,35 ---- # The Bio::PubMed class provides several ways to retrieve bibliographic # information from the PubMed database at ! # http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed ! # ! # Basically, two types of queries are possible: # # * searching for PubMed IDs given a query string: ! # * Bio::PubMed#esearch (recommended) ! # * Bio::PubMed#search (only retrieves top 20 hits) # # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) # given a PubMed ID ! # * Bio::PubMed#efetch (recommended) ! # * Bio::PubMed#query (unstable for the change of the HTML design) ! # * Bio::PubMed#pmfetch (still working but could be obsoleted by NCBI) # # The different methods within the same group are interchangeable and should *************** *** 38,48 **** # APIs can be found on the following websites: # ! # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html ! # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html ! # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat ! # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags ! # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html ! # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html ! # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html # # == Usage --- 39,50 ---- # APIs can be found on the following websites: # ! # * PubMed Overview: ! # http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html ! # * PubMed help: ! # http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html ! # * Entrez utilities index: ! # http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html ! # * How to link: ! # http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp # # == Usage *************** *** 51,89 **** # # # If you don't know the pubmed ID: ! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end ! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end # # # To retrieve the MEDLINE entry for a given PubMed ID: # puts Bio::PubMed.query("10592173") # puts Bio::PubMed.pmfetch("10592173") ! # puts Bio::PubMed.efetch("10592173", "14693808") # # This can be converted into a Bio::MEDLINE object: # manuscript = Bio::PubMed.query("10592173") ! # medline = Bio::MEDLINE(manuscript) # class PubMed - # Search the PubMed database by given keywords using entrez query and returns - # an array of PubMed IDs. - # --- - # *Arguments*: - # * _id_: query string (required) - # *Returns*:: array of PubMed IDs - def self.search(str) - host = 'www.ncbi.nlm.nih.gov' - path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" - - http = Bio::Command.new_http(host) - response, = http.get(path + CGI.escape(str)) - result = response.body - result = result.gsub("\r", "\n").squeeze("\n") - result = result.scan(/
(.*?)<\/pre>/m).flatten - return result - end - # Search the PubMed database by given keywords using E-Utils and returns # an array of PubMed IDs. --- 53,75 ---- # # # If you don't know the pubmed ID: ! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end ! # ! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end # # # To retrieve the MEDLINE entry for a given PubMed ID: + # puts Bio::PubMed.efetch("10592173", "14693808") # puts Bio::PubMed.query("10592173") # puts Bio::PubMed.pmfetch("10592173") ! # # # This can be converted into a Bio::MEDLINE object: # manuscript = Bio::PubMed.query("10592173") ! # medline = Bio::MEDLINE.new(manuscript) # class PubMed # Search the PubMed database by given keywords using E-Utils and returns # an array of PubMed IDs. *************** *** 103,107 **** # * _retmode_ # * _rettype_ ! # *Returns*:: array of PubMed IDs def self.esearch(str, hash = {}) hash['retmax'] = 100 unless hash['retmax'] --- 89,93 ---- # * _retmode_ # * _rettype_ ! # *Returns*:: array of PubMed IDs or a number of results def self.esearch(str, hash = {}) hash['retmax'] = 100 unless hash['retmax'] *************** *** 118,122 **** response, = http.get(path + CGI.escape(str)) result = response.body ! result = result.scan(/(.*?)<\/Id>/m).flatten return result end --- 104,154 ---- response, = http.get(path + CGI.escape(str)) result = response.body ! if hash['rettype'] == 'count' ! result = result.scan(/ (.*?)<\/Count>/m).flatten.first.to_i ! else ! result = result.scan(/ (.*?)<\/Id>/m).flatten ! end ! return result ! end ! ! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using ! # entrez efetch. Multiple PubMed IDs can be provided: ! # Bio::PubMed.efetch(123) ! # Bio::PubMed.efetch(123,456,789) ! # Bio::PubMed.efetch([123,456,789]) ! # --- ! # *Arguments*: ! # * _ids_: list of PubMed IDs (required) ! # *Returns*:: MEDLINE formatted String ! def self.efetch(*ids) ! return [] if ids.empty? ! ! host = "eutils.ncbi.nlm.nih.gov" ! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id=" ! ! list = ids.join(",") ! ! http = Bio::Command.new_http(host) ! response, = http.get(path + list) ! result = response.body ! result = result.split(/\n\n+/) ! return result ! end ! ! # Search the PubMed database by given keywords using entrez query and returns ! # an array of PubMed IDs. Caution: this method returns the first 20 hits only. ! # Instead, use of the 'esearch' method is strongly recomended. ! # --- ! # *Arguments*: ! # * _id_: query string (required) ! # *Returns*:: array of PubMed IDs ! def self.search(str) ! host = "www.ncbi.nlm.nih.gov" ! path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term=" ! ! http = Bio::Command.new_http(host) ! response, = http.get(path + CGI.escape(str)) ! result = response.body ! result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten return result end *************** *** 128,143 **** # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def self.query(id) host = "www.ncbi.nlm.nih.gov" ! path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" http = Bio::Command.new_http(host) ! response, = http.get(path + id.to_s) result = response.body ! if result =~ /#{id}\s+Error/ raise( result ) else ! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '') ! return result end end --- 160,183 ---- # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def self.query(*ids) host = "www.ncbi.nlm.nih.gov" ! path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" ! ! list = ids.join(",") http = Bio::Command.new_http(host) ! response, = http.get(path + list) result = response.body ! result = result.scan(/ \s*(.*?)<\/pre>/m).flatten ! ! if result =~ /id:.*Error occurred/ ! # id: xxxxx Error occurred: Article does not exist raise( result ) else ! if ids.size > 1 ! return result ! else ! return result.first ! end end end *************** *** 164,191 **** end - # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using - # entrez efetch. Multiple PubMed IDs can be provided: - # Bio::PubMed.efetch(123) - # Bio::PubMed.efetch(123,456,789) - # Bio::PubMed.efetch([123,456,789]) - # --- - # *Arguments*: - # * _ids_: list of PubMed IDs (required) - # *Returns*:: MEDLINE formatted String - def self.efetch(*ids) - return [] if ids.empty? - - host = "eutils.ncbi.nlm.nih.gov" - path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id=" - - ids = ids.join(",") - - http = Bio::Command.new_http(host) - response, = http.get(path + ids) - result = response.body - result = result.split(/\n\n+/) - return result - end - end # PubMed --- 204,207 ---- *************** *** 195,211 **** if __FILE__ == $0 ! puts Bio::PubMed.query("10592173") ! puts "--- ---" ! puts Bio::PubMed.pmfetch("10592173") ! puts "--- ---" ! Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| ! p x ! end ! puts "--- ---" Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| p x end ! puts "--- ---" puts Bio::PubMed.efetch("10592173", "14693808") end --- 211,233 ---- if __FILE__ == $0 ! puts "--- Search PubMed by E-Utils ---" Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| p x end ! ! puts "--- Retrieve PubMed entry by E-Utils ---" puts Bio::PubMed.efetch("10592173", "14693808") + puts "--- Search PubMed by Entrez CGI ---" + Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| + p x + end + + puts "--- Retrieve PubMed entry by Entrez CGI ---" + puts Bio::PubMed.query("10592173") + + + puts "--- Retrieve PubMed entry by PMfetch ---" + puts Bio::PubMed.pmfetch("10592173") + end From k at dev.open-bio.org Sat Nov 10 03:28:52 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Sat, 10 Nov 2007 08:28:52 +0000 Subject: [BioRuby-cvs] bioruby ChangeLog,1.68,1.69 Message-ID: <200711100828.lAA8Sq9g021475@dev.open-bio.org> Update of /home/repository/bioruby/bioruby In directory dev.open-bio.org:/tmp/cvs-serv21471 Modified Files: ChangeLog Log Message: * updated Index: ChangeLog =================================================================== RCS file: /home/repository/bioruby/bioruby/ChangeLog,v retrieving revision 1.68 retrieving revision 1.69 diff -C2 -d -r1.68 -r1.69 *** ChangeLog 19 Jul 2007 04:08:47 -0000 1.68 --- ChangeLog 10 Nov 2007 08:28:50 -0000 1.69 *************** *** 1,2 **** --- 1,9 ---- + 2007-11-10 Toshiaki Katayama+ + * lib/bio/io/pubmed.rb: + + Fixed search, query methods (but use of esearch and efetch is + strongly recommended). + 2007-07-19 Toshiaki Katayama *************** *** 415,419 **** visual effects. ! * lib/bio/.rb Extended to have Bio.command where command can be any BioRuby --- 422,426 ---- visual effects. ! * lib/bio.rb Extended to have Bio.command where command can be any BioRuby From nakao at dev.open-bio.org Sat Nov 10 11:57:45 2007 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Sat, 10 Nov 2007 16:57:45 +0000 Subject: [BioRuby-cvs] bioruby/test/functional/bio/io test_ensembl.rb, 1.4, 1.5 Message-ID: <200711101657.lAAGvjCP022677@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/functional/bio/io In directory dev.open-bio.org:/tmp/cvs-serv22657/test/functional/bio/io Modified Files: test_ensembl.rb Log Message: * Updated some expected values of test_gff_exportview*. Index: test_ensembl.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/functional/bio/io/test_ensembl.rb,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** test_ensembl.rb 5 Apr 2007 23:35:42 -0000 1.4 --- test_ensembl.rb 10 Nov 2007 16:57:43 -0000 1.5 *************** *** 74,78 **** def test_gff_exportview ! line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n" gff = @serv.exportview(4, 1149206, 1149209, ['gene']) assert_equal(line, gff) --- 74,95 ---- def test_gff_exportview ! line = ["chromosome:NCBI36:4:1149206:1149209:1", ! "Ensembl", ! "Gene", ! "-839", ! "2747", ! ".", ! "+", ! ".", ! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"].join("\t") + "\n" ! line = ["4", ! "Ensembl", ! "Gene", ! "1148366", ! "1151952", ! ".", ! "+", ! "1", ! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n" gff = @serv.exportview(4, 1149206, 1149209, ['gene']) assert_equal(line, gff) *************** *** 80,84 **** def test_gff_exportview_with_named_args ! line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n" gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, --- 97,118 ---- def test_gff_exportview_with_named_args ! line = ["chromosome:NCBI36:4:1149206:1149209:1", ! "Ensembl", ! "Gene", ! "-839", ! "2747", ! ".", ! "+", ! ".", ! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n" ! line = ["4", ! "Ensembl", ! "Gene", ! "1148366", ! "1151952", ! ".", ! "+", ! "1", ! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n" gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, *************** *** 89,93 **** def test_tab_exportview_with_named_args ! line = "seqname\tsource\tfeature\tstart\tend\tscore\tstrand\tframe\tgene_id\ttranscript_id\texon_id\tgene_type\nchromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tENSG00000206158\tENST00000382964\tENSE00001494097\tKNOWN_protein_coding\n" gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, --- 123,176 ---- def test_tab_exportview_with_named_args ! line = [["seqname", ! "source", ! "feature", ! "start", ! "end", ! "score", ! "strand", ! "frame", ! "gene_id", ! "transcript_id", ! "exon_id", ! "gene_type"].join("\t"), ! ["chromosome:NCBI36:4:1149206:1149209:1", ! "Ensembl", ! "Gene", ! "-839", ! "2747", ! ".", ! "+", ! ".", ! "ENSG00000206158", ! "ENST00000382964", ! "ENSE00001494097", ! "KNOWN_protein_coding"].join("\t") + "\n" ! ].join("\n") ! line = [["seqname", ! "source", ! "feature", ! "start", ! "end", ! "score", ! "strand", ! "frame", ! "gene_id", ! "transcript_id", ! "exon_id", ! "gene_type"].join("\t"), ! ["4", ! "Ensembl", ! "Gene", ! "1148366", ! "1151952", ! ".", ! "+", ! "1", ! "ENSG00000206158", ! "ENST00000382964", ! "ENSE00001494097", ! "KNOWN_protein_coding"].join("\t") + "\n" ! ].join("\n") gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, From k at dev.open-bio.org Thu Nov 15 02:07:18 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Thu, 15 Nov 2007 07:07:18 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.60,1.61 Message-ID: <200711150707.lAF77IWZ006676@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv6671/io Modified Files: flatfile.rb Log Message: * the first line of the MEDLINE entry is changed from UI to PMID Index: flatfile.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v retrieving revision 1.60 retrieving revision 1.61 diff -C2 -d -r1.60 -r1.61 *** flatfile.rb 9 Jul 2007 14:08:34 -0000 1.60 --- flatfile.rb 15 Nov 2007 07:07:16 -0000 1.61 *************** *** 1131,1135 **** /^LOCUS .+ aa .+/ ], medline = RuleRegexp[ 'Bio::MEDLINE', ! /^UI \- [0-9]+$/ ], embl = RuleRegexp[ 'Bio::EMBL', /^ID .+\; .*(DNA|RNA|XXX)\;/ ], --- 1131,1135 ---- /^LOCUS .+ aa .+/ ], medline = RuleRegexp[ 'Bio::MEDLINE', ! /^PMID\- [0-9]+$/ ], embl = RuleRegexp[ 'Bio::EMBL', /^ID .+\; .*(DNA|RNA|XXX)\;/ ], From k at dev.open-bio.org Thu Nov 15 02:08:51 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Thu, 15 Nov 2007 07:08:51 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/shell interface.rb,1.18,1.19 Message-ID: <200711150708.lAF78prq006727@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/shell In directory dev.open-bio.org:/tmp/cvs-serv6723/shell Modified Files: interface.rb Log Message: * fixed that savefile("hoge", obj) created "datahoge" file instead of "data/hoge" Index: interface.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/interface.rb,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** interface.rb 26 Jun 2007 08:38:38 -0000 1.18 --- interface.rb 15 Nov 2007 07:08:49 -0000 1.19 *************** *** 153,157 **** message = "Save file '#{file}' in '#{datadir}' directory? [y/n] " if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message) ! file = datadir + file end if File.exists?(file) --- 153,157 ---- message = "Save file '#{file}' in '#{datadir}' directory? [y/n] " if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message) ! file = File.join(datadir, file) end if File.exists?(file) From k at dev.open-bio.org Thu Nov 15 02:23:41 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Thu, 15 Nov 2007 07:23:41 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.18,1.19 Message-ID: <200711150723.lAF7Nfkd006749@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv6745/io Modified Files: pubmed.rb Log Message: * esearch2, efetch2: candidates for the better replacement of esearch and efetch methods which are enchanced to accept options as a hash and utilize Bio::Command.post_form for the options Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18 --- pubmed.rb 15 Nov 2007 07:23:39 -0000 1.19 *************** *** 9,15 **** # - require 'net/http' - require 'cgi' unless defined?(CGI) require 'bio/command' module Bio --- 9,14 ---- # require 'bio/command' + require 'cgi' unless defined?(CGI) module Bio *************** *** 112,115 **** --- 111,134 ---- end + def self.esearch2(str, hash = {}) + serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" + opts = { + "retmax" => 100, + "tool" => "bioruby", + "db" => "pubmed", + "term" => str + } + opts.update(hash) + + response, = Bio::Command.post_form(serv, opts) + result = response.body + if opts['rettype'] == 'count' + result = result.scan(/ (.*?)<\/Count>/m).flatten.first.to_i + else + result = result.scan(/ (.*?)<\/Id>/m).flatten + end + return result + end + # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using # entrez efetch. Multiple PubMed IDs can be provided: *************** *** 132,136 **** response, = http.get(path + list) result = response.body ! result = result.split(/\n\n+/) return result end --- 151,173 ---- response, = http.get(path + list) result = response.body ! return result ! end ! ! def self.efetch2(ids, hash = {}) ! return "" if ids.empty? ! ids = ids.join(",") if ids === Array ! ! serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" ! opts = { ! "tool" => "bioruby", ! "db" => "pubmed", ! "retmode" => "text", ! "rettype" => "medline", ! "id" => ids, ! } ! opts.update(hash) ! ! response, = Bio::Command.post_form(serv, opts) ! result = response.body return result end *************** *** 212,216 **** puts "--- Search PubMed by E-Utils ---" ! Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| p x end --- 249,255 ---- puts "--- Search PubMed by E-Utils ---" ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"}) ! ! Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x| p x end *************** *** 218,221 **** --- 257,261 ---- puts "--- Retrieve PubMed entry by E-Utils ---" puts Bio::PubMed.efetch("10592173", "14693808") + puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"}) puts "--- Search PubMed by Entrez CGI ---" From k at dev.open-bio.org Thu Nov 15 02:40:29 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Thu, 15 Nov 2007 07:40:29 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.19,1.20 Message-ID: <200711150740.lAF7eTZQ006794@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv6790 Modified Files: pubmed.rb Log Message: * get back to split multiple MEDLINE entries into array when not in XML mode Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** pubmed.rb 15 Nov 2007 07:23:39 -0000 1.19 --- pubmed.rb 15 Nov 2007 07:40:27 -0000 1.20 *************** *** 151,154 **** --- 151,155 ---- response, = http.get(path + list) result = response.body + result = result.split(/\n\n+/) return result end *************** *** 170,173 **** --- 171,178 ---- response, = Bio::Command.post_form(serv, opts) result = response.body + if opts["retmode"] == "text" + result = result.split(/\n\n+/) + end + return result end From k at dev.open-bio.org Tue Nov 20 10:22:05 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Tue, 20 Nov 2007 15:22:05 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.20,1.21 Message-ID: <200711201522.lAKFM5vl026044@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv26040 Modified Files: pubmed.rb Log Message: * ncbi_access_wait is introduced to wait for 3 seconds for consequent queries * esearch2 and efetch2 methods are renamed to esearch and efetch Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** pubmed.rb 15 Nov 2007 07:40:27 -0000 1.20 --- pubmed.rb 20 Nov 2007 15:22:03 -0000 1.21 *************** *** 2,6 **** # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module # ! # Copyright:: Copyright (C) 2001 Toshiaki Katayama # Copyright:: Copyright (C) 2006 Jan Aerts # License:: The Ruby License --- 2,6 ---- # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module # ! # Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama # Copyright:: Copyright (C) 2006 Jan Aerts # License:: The Ruby License *************** *** 71,74 **** --- 71,92 ---- class PubMed + # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time + # weekdays for any series of more than 100 requests. + # -> Not implemented yet in BioRuby + + # Make no more than one request every 3 seconds. + NCBI_INTERVAL = 3 + @@last_access = nil + + def self.ncbi_access_wait(wait = NCBI_INTERVAL) + if @@last_access + duration = Time.now - @@last_access + if wait > duration + sleep wait - duration + end + end + @@last_access = Time.now + end + # Search the PubMed database by given keywords using E-Utils and returns # an array of PubMed IDs. *************** *** 90,115 **** # *Returns*:: array of PubMed IDs or a number of results def self.esearch(str, hash = {}) ! hash['retmax'] = 100 unless hash['retmax'] ! ! opts = [] ! hash.each do |k, v| ! opts << "#{k}=#{v}" ! end ! ! host = "eutils.ncbi.nlm.nih.gov" ! path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed{opts.join('&')}&term=" ! ! http = Bio::Command.new_http(host) ! response, = http.get(path + CGI.escape(str)) ! result = response.body ! if hash['rettype'] == 'count' ! result = result.scan(/ (.*?)<\/Count>/m).flatten.first.to_i ! else ! result = result.scan(/ (.*?)<\/Id>/m).flatten ! end ! return result ! end - def self.esearch2(str, hash = {}) serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" opts = { --- 108,113 ---- # *Returns*:: array of PubMed IDs or a number of results def self.esearch(str, hash = {}) ! return nil if str.empty? serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" opts = { *************** *** 121,124 **** --- 119,124 ---- opts.update(hash) + self.ncbi_access_wait + response, = Bio::Command.post_form(serv, opts) result = response.body *************** *** 134,160 **** # entrez efetch. Multiple PubMed IDs can be provided: # Bio::PubMed.efetch(123) - # Bio::PubMed.efetch(123,456,789) # Bio::PubMed.efetch([123,456,789]) # --- # *Arguments*: # * _ids_: list of PubMed IDs (required) ! # *Returns*:: MEDLINE formatted String ! def self.efetch(*ids) ! return [] if ids.empty? ! ! host = "eutils.ncbi.nlm.nih.gov" ! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id=" ! ! list = ids.join(",") ! ! http = Bio::Command.new_http(host) ! response, = http.get(path + list) ! result = response.body ! result = result.split(/\n\n+/) ! return result ! end ! ! def self.efetch2(ids, hash = {}) ! return "" if ids.empty? ids = ids.join(",") if ids === Array --- 134,144 ---- # entrez efetch. Multiple PubMed IDs can be provided: # Bio::PubMed.efetch(123) # Bio::PubMed.efetch([123,456,789]) # --- # *Arguments*: # * _ids_: list of PubMed IDs (required) ! # *Returns*:: Array of MEDLINE formatted String ! def self.efetch(ids, hash = {}) ! return nil if ids.to_s.empty? ids = ids.join(",") if ids === Array *************** *** 169,172 **** --- 153,158 ---- opts.update(hash) + self.ncbi_access_wait + response, = Bio::Command.post_form(serv, opts) result = response.body *************** *** 174,178 **** result = result.split(/\n\n+/) end - return result end --- 160,163 ---- *************** *** 254,266 **** puts "--- Search PubMed by E-Utils ---" ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"}) ! ! Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x| ! p x end puts "--- Retrieve PubMed entry by E-Utils ---" ! puts Bio::PubMed.efetch("10592173", "14693808") ! puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"}) puts "--- Search PubMed by Entrez CGI ---" --- 239,266 ---- puts "--- Search PubMed by E-Utils ---" ! opts = {"rettype" => "count"} ! puts Time.now ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts) ! puts Time.now ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts) ! puts Time.now ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts) ! puts Time.now ! Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| ! puts x end puts "--- Retrieve PubMed entry by E-Utils ---" ! puts Time.now ! puts Bio::PubMed.efetch(16381885) ! puts Time.now ! puts Bio::PubMed.efetch("16381885") ! puts Time.now ! puts Bio::PubMed.efetch("16381885") ! puts Time.now ! opts = {"retmode" => "xml"} ! puts Bio::PubMed.efetch([10592173, 14693808], opts) ! puts Time.now ! puts Bio::PubMed.efetch(["10592173", "14693808"], opts) puts "--- Search PubMed by Entrez CGI ---" *************** *** 270,278 **** puts "--- Retrieve PubMed entry by Entrez CGI ---" ! puts Bio::PubMed.query("10592173") puts "--- Retrieve PubMed entry by PMfetch ---" ! puts Bio::PubMed.pmfetch("10592173") end --- 270,278 ---- puts "--- Retrieve PubMed entry by Entrez CGI ---" ! puts Bio::PubMed.query("16381885") puts "--- Retrieve PubMed entry by PMfetch ---" ! puts Bio::PubMed.pmfetch("16381885") end From k at dev.open-bio.org Tue Nov 27 02:09:45 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Tue, 27 Nov 2007 07:09:45 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db/kegg compound.rb,0.16,0.17 Message-ID: <200711270709.lAR79jPi020625@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg In directory dev.open-bio.org:/tmp/cvs-serv20621 Modified Files: compound.rb Log Message: * remark method is added Index: compound.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/compound.rb,v retrieving revision 0.16 retrieving revision 0.17 diff -C2 -d -r0.16 -r0.17 *** compound.rb 28 Jun 2007 11:27:24 -0000 0.16 --- compound.rb 27 Nov 2007 07:09:43 -0000 0.17 *************** *** 46,49 **** --- 46,54 ---- end + # REMARK + def remark + field_fetch('REMARK') + end + # GLYCAN def glycans From k at dev.open-bio.org Wed Nov 28 01:34:35 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Wed, 28 Nov 2007 06:34:35 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.21,1.22 Message-ID: <200711280634.lAS6YZ9i023050@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv23044 Modified Files: pubmed.rb Log Message: * all class methods are changed to instance methods (class methods are still remained for the backward compatibility) Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** pubmed.rb 20 Nov 2007 15:22:03 -0000 1.21 --- pubmed.rb 28 Nov 2007 06:34:33 -0000 1.22 *************** *** 79,83 **** @@last_access = nil ! def self.ncbi_access_wait(wait = NCBI_INTERVAL) if @@last_access duration = Time.now - @@last_access --- 79,85 ---- @@last_access = nil ! private ! ! def ncbi_access_wait(wait = NCBI_INTERVAL) if @@last_access duration = Time.now - @@last_access *************** *** 89,92 **** --- 91,96 ---- end + public + # Search the PubMed database by given keywords using E-Utils and returns # an array of PubMed IDs. *************** *** 107,111 **** # * _rettype_ # *Returns*:: array of PubMed IDs or a number of results ! def self.esearch(str, hash = {}) return nil if str.empty? --- 111,115 ---- # * _rettype_ # *Returns*:: array of PubMed IDs or a number of results ! def esearch(str, hash = {}) return nil if str.empty? *************** *** 119,123 **** opts.update(hash) ! self.ncbi_access_wait response, = Bio::Command.post_form(serv, opts) --- 123,127 ---- opts.update(hash) ! ncbi_access_wait response, = Bio::Command.post_form(serv, opts) *************** *** 139,143 **** # * _ids_: list of PubMed IDs (required) # *Returns*:: Array of MEDLINE formatted String ! def self.efetch(ids, hash = {}) return nil if ids.to_s.empty? ids = ids.join(",") if ids === Array --- 143,147 ---- # * _ids_: list of PubMed IDs (required) # *Returns*:: Array of MEDLINE formatted String ! def efetch(ids, hash = {}) return nil if ids.to_s.empty? ids = ids.join(",") if ids === Array *************** *** 153,157 **** opts.update(hash) ! self.ncbi_access_wait response, = Bio::Command.post_form(serv, opts) --- 157,161 ---- opts.update(hash) ! ncbi_access_wait response, = Bio::Command.post_form(serv, opts) *************** *** 170,177 **** # * _id_: query string (required) # *Returns*:: array of PubMed IDs ! def self.search(str) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term=" http = Bio::Command.new_http(host) response, = http.get(path + CGI.escape(str)) --- 174,183 ---- # * _id_: query string (required) # *Returns*:: array of PubMed IDs ! def search(str) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term=" + ncbi_access_wait + http = Bio::Command.new_http(host) response, = http.get(path + CGI.escape(str)) *************** *** 187,196 **** # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def self.query(*ids) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" - list = ids.join(",") http = Bio::Command.new_http(host) response, = http.get(path + list) --- 193,203 ---- # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def query(*ids) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" list = ids.join(",") + ncbi_access_wait + http = Bio::Command.new_http(host) response, = http.get(path + list) *************** *** 216,223 **** # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def self.pmfetch(id) host = "www.ncbi.nlm.nih.gov" path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id=" http = Bio::Command.new_http(host) response, = http.get(path + id.to_s) --- 223,232 ---- # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def pmfetch(id) host = "www.ncbi.nlm.nih.gov" path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id=" + ncbi_access_wait + http = Bio::Command.new_http(host) response, = http.get(path + id.to_s) *************** *** 231,234 **** --- 240,263 ---- end + def self.esearch(*args) + self.new.esearch(*args) + end + + def self.efetch(*args) + self.new.efetch(*args) + end + + def self.search(*args) + self.new.search(*args) + end + + def self.query(*args) + self.new.query(*args) + end + + def self.pmfetch(*args) + self.new.pmfetch(*args) + end + end # PubMed *************** *** 238,241 **** --- 267,316 ---- if __FILE__ == $0 + puts "=== instance methods ===" + + pubmed = Bio::PubMed.new + + puts "--- Search PubMed by E-Utils ---" + opts = {"rettype" => "count"} + puts Time.now + puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts) + puts Time.now + puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts) + puts Time.now + puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts) + puts Time.now + pubmed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| + puts x + end + + puts "--- Retrieve PubMed entry by E-Utils ---" + puts Time.now + puts pubmed.efetch(16381885) + puts Time.now + puts pubmed.efetch("16381885") + puts Time.now + puts pubmed.efetch("16381885") + puts Time.now + opts = {"retmode" => "xml"} + puts pubmed.efetch([10592173, 14693808], opts) + puts Time.now + puts pubmed.efetch(["10592173", "14693808"], opts) + + puts "--- Search PubMed by Entrez CGI ---" + pubmed.search("(genome AND analysis) OR bioinformatics)").each do |x| + p x + end + + puts "--- Retrieve PubMed entry by Entrez CGI ---" + puts pubmed.query("16381885") + + + puts "--- Retrieve PubMed entry by PMfetch ---" + puts pubmed.pmfetch("16381885") + + + puts "=== class methods ===" + + puts "--- Search PubMed by E-Utils ---" opts = {"rettype" => "count"} From aerts at dev.open-bio.org Sun Nov 4 11:51:01 2007 From: aerts at dev.open-bio.org (Jan Aerts) Date: Sun, 04 Nov 2007 11:51:01 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.16,1.17 Message-ID: <200711041151.lA4Bp1lq007763@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv7743 Modified Files: pubmed.rb Log Message: Fixed bug #11736: change to pubmed interface (reported by Masahide Kikkawa) Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** pubmed.rb 5 Apr 2007 23:35:41 -0000 1.16 --- pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17 *************** *** 75,80 **** # *Returns*:: array of PubMed IDs def self.search(str) ! host = "www.ncbi.nlm.nih.gov" ! path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term=" http = Bio::Command.new_http(host) --- 75,80 ---- # *Returns*:: array of PubMed IDs def self.search(str) ! host = 'www.ncbi.nlm.nih.gov' ! path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" http = Bio::Command.new_http(host) From k at dev.open-bio.org Sat Nov 10 08:21:56 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Sat, 10 Nov 2007 08:21:56 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.17,1.18 Message-ID: <200711100821.lAA8LunA021453@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv21448 Modified Files: pubmed.rb Log Message: * search, query is fixed to use new NCBI URI (previous fix was wrong and insufficient). * esearch is enhanced to accept hash['rettype'] == "count" as suggested by Kaustubh Patil Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17 --- pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18 *************** *** 19,34 **** # The Bio::PubMed class provides several ways to retrieve bibliographic # information from the PubMed database at ! # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two ! # types of queries are possible: # # * searching for PubMed IDs given a query string: ! # * Bio::PubMed#search ! # * Bio::PubMed#esearch # # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) # given a PubMed ID ! # * Bio::PubMed#query ! # * Bio::PubMed#pmfetch ! # * Bio::PubMed#efetch # # The different methods within the same group are interchangeable and should --- 19,35 ---- # The Bio::PubMed class provides several ways to retrieve bibliographic # information from the PubMed database at ! # http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed ! # ! # Basically, two types of queries are possible: # # * searching for PubMed IDs given a query string: ! # * Bio::PubMed#esearch (recommended) ! # * Bio::PubMed#search (only retrieves top 20 hits) # # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) # given a PubMed ID ! # * Bio::PubMed#efetch (recommended) ! # * Bio::PubMed#query (unstable for the change of the HTML design) ! # * Bio::PubMed#pmfetch (still working but could be obsoleted by NCBI) # # The different methods within the same group are interchangeable and should *************** *** 38,48 **** # APIs can be found on the following websites: # ! # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html ! # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html ! # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat ! # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags ! # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html ! # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html ! # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html # # == Usage --- 39,50 ---- # APIs can be found on the following websites: # ! # * PubMed Overview: ! # http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html ! # * PubMed help: ! # http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html ! # * Entrez utilities index: ! # http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html ! # * How to link: ! # http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp # # == Usage *************** *** 51,89 **** # # # If you don't know the pubmed ID: ! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end ! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end # # # To retrieve the MEDLINE entry for a given PubMed ID: # puts Bio::PubMed.query("10592173") # puts Bio::PubMed.pmfetch("10592173") ! # puts Bio::PubMed.efetch("10592173", "14693808") # # This can be converted into a Bio::MEDLINE object: # manuscript = Bio::PubMed.query("10592173") ! # medline = Bio::MEDLINE(manuscript) # class PubMed - # Search the PubMed database by given keywords using entrez query and returns - # an array of PubMed IDs. - # --- - # *Arguments*: - # * _id_: query string (required) - # *Returns*:: array of PubMed IDs - def self.search(str) - host = 'www.ncbi.nlm.nih.gov' - path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" - - http = Bio::Command.new_http(host) - response, = http.get(path + CGI.escape(str)) - result = response.body - result = result.gsub("\r", "\n").squeeze("\n") - result = result.scan(/ (.*?)<\/pre>/m).flatten - return result - end - # Search the PubMed database by given keywords using E-Utils and returns # an array of PubMed IDs. --- 53,75 ---- # # # If you don't know the pubmed ID: ! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end ! # ! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end # # # To retrieve the MEDLINE entry for a given PubMed ID: + # puts Bio::PubMed.efetch("10592173", "14693808") # puts Bio::PubMed.query("10592173") # puts Bio::PubMed.pmfetch("10592173") ! # # # This can be converted into a Bio::MEDLINE object: # manuscript = Bio::PubMed.query("10592173") ! # medline = Bio::MEDLINE.new(manuscript) # class PubMed # Search the PubMed database by given keywords using E-Utils and returns # an array of PubMed IDs. *************** *** 103,107 **** # * _retmode_ # * _rettype_ ! # *Returns*:: array of PubMed IDs def self.esearch(str, hash = {}) hash['retmax'] = 100 unless hash['retmax'] --- 89,93 ---- # * _retmode_ # * _rettype_ ! # *Returns*:: array of PubMed IDs or a number of results def self.esearch(str, hash = {}) hash['retmax'] = 100 unless hash['retmax'] *************** *** 118,122 **** response, = http.get(path + CGI.escape(str)) result = response.body ! result = result.scan(/(.*?)<\/Id>/m).flatten return result end --- 104,154 ---- response, = http.get(path + CGI.escape(str)) result = response.body ! if hash['rettype'] == 'count' ! result = result.scan(/ (.*?)<\/Count>/m).flatten.first.to_i ! else ! result = result.scan(/ (.*?)<\/Id>/m).flatten ! end ! return result ! end ! ! # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using ! # entrez efetch. Multiple PubMed IDs can be provided: ! # Bio::PubMed.efetch(123) ! # Bio::PubMed.efetch(123,456,789) ! # Bio::PubMed.efetch([123,456,789]) ! # --- ! # *Arguments*: ! # * _ids_: list of PubMed IDs (required) ! # *Returns*:: MEDLINE formatted String ! def self.efetch(*ids) ! return [] if ids.empty? ! ! host = "eutils.ncbi.nlm.nih.gov" ! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id=" ! ! list = ids.join(",") ! ! http = Bio::Command.new_http(host) ! response, = http.get(path + list) ! result = response.body ! result = result.split(/\n\n+/) ! return result ! end ! ! # Search the PubMed database by given keywords using entrez query and returns ! # an array of PubMed IDs. Caution: this method returns the first 20 hits only. ! # Instead, use of the 'esearch' method is strongly recomended. ! # --- ! # *Arguments*: ! # * _id_: query string (required) ! # *Returns*:: array of PubMed IDs ! def self.search(str) ! host = "www.ncbi.nlm.nih.gov" ! path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term=" ! ! http = Bio::Command.new_http(host) ! response, = http.get(path + CGI.escape(str)) ! result = response.body ! result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten return result end *************** *** 128,143 **** # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def self.query(id) host = "www.ncbi.nlm.nih.gov" ! path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" http = Bio::Command.new_http(host) ! response, = http.get(path + id.to_s) result = response.body ! if result =~ /#{id}\s+Error/ raise( result ) else ! result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '') ! return result end end --- 160,183 ---- # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def self.query(*ids) host = "www.ncbi.nlm.nih.gov" ! path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" ! ! list = ids.join(",") http = Bio::Command.new_http(host) ! response, = http.get(path + list) result = response.body ! result = result.scan(/ \s*(.*?)<\/pre>/m).flatten ! ! if result =~ /id:.*Error occurred/ ! # id: xxxxx Error occurred: Article does not exist raise( result ) else ! if ids.size > 1 ! return result ! else ! return result.first ! end end end *************** *** 164,191 **** end - # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using - # entrez efetch. Multiple PubMed IDs can be provided: - # Bio::PubMed.efetch(123) - # Bio::PubMed.efetch(123,456,789) - # Bio::PubMed.efetch([123,456,789]) - # --- - # *Arguments*: - # * _ids_: list of PubMed IDs (required) - # *Returns*:: MEDLINE formatted String - def self.efetch(*ids) - return [] if ids.empty? - - host = "eutils.ncbi.nlm.nih.gov" - path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id=" - - ids = ids.join(",") - - http = Bio::Command.new_http(host) - response, = http.get(path + ids) - result = response.body - result = result.split(/\n\n+/) - return result - end - end # PubMed --- 204,207 ---- *************** *** 195,211 **** if __FILE__ == $0 ! puts Bio::PubMed.query("10592173") ! puts "--- ---" ! puts Bio::PubMed.pmfetch("10592173") ! puts "--- ---" ! Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| ! p x ! end ! puts "--- ---" Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| p x end ! puts "--- ---" puts Bio::PubMed.efetch("10592173", "14693808") end --- 211,233 ---- if __FILE__ == $0 ! puts "--- Search PubMed by E-Utils ---" Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| p x end ! ! puts "--- Retrieve PubMed entry by E-Utils ---" puts Bio::PubMed.efetch("10592173", "14693808") + puts "--- Search PubMed by Entrez CGI ---" + Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| + p x + end + + puts "--- Retrieve PubMed entry by Entrez CGI ---" + puts Bio::PubMed.query("10592173") + + + puts "--- Retrieve PubMed entry by PMfetch ---" + puts Bio::PubMed.pmfetch("10592173") + end From k at dev.open-bio.org Sat Nov 10 08:28:52 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Sat, 10 Nov 2007 08:28:52 +0000 Subject: [BioRuby-cvs] bioruby ChangeLog,1.68,1.69 Message-ID: <200711100828.lAA8Sq9g021475@dev.open-bio.org> Update of /home/repository/bioruby/bioruby In directory dev.open-bio.org:/tmp/cvs-serv21471 Modified Files: ChangeLog Log Message: * updated Index: ChangeLog =================================================================== RCS file: /home/repository/bioruby/bioruby/ChangeLog,v retrieving revision 1.68 retrieving revision 1.69 diff -C2 -d -r1.68 -r1.69 *** ChangeLog 19 Jul 2007 04:08:47 -0000 1.68 --- ChangeLog 10 Nov 2007 08:28:50 -0000 1.69 *************** *** 1,2 **** --- 1,9 ---- + 2007-11-10 Toshiaki Katayama+ + * lib/bio/io/pubmed.rb: + + Fixed search, query methods (but use of esearch and efetch is + strongly recommended). + 2007-07-19 Toshiaki Katayama *************** *** 415,419 **** visual effects. ! * lib/bio/.rb Extended to have Bio.command where command can be any BioRuby --- 422,426 ---- visual effects. ! * lib/bio.rb Extended to have Bio.command where command can be any BioRuby From nakao at dev.open-bio.org Sat Nov 10 16:57:45 2007 From: nakao at dev.open-bio.org (Mitsuteru C. Nakao) Date: Sat, 10 Nov 2007 16:57:45 +0000 Subject: [BioRuby-cvs] bioruby/test/functional/bio/io test_ensembl.rb, 1.4, 1.5 Message-ID: <200711101657.lAAGvjCP022677@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/test/functional/bio/io In directory dev.open-bio.org:/tmp/cvs-serv22657/test/functional/bio/io Modified Files: test_ensembl.rb Log Message: * Updated some expected values of test_gff_exportview*. Index: test_ensembl.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/test/functional/bio/io/test_ensembl.rb,v retrieving revision 1.4 retrieving revision 1.5 diff -C2 -d -r1.4 -r1.5 *** test_ensembl.rb 5 Apr 2007 23:35:42 -0000 1.4 --- test_ensembl.rb 10 Nov 2007 16:57:43 -0000 1.5 *************** *** 74,78 **** def test_gff_exportview ! line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n" gff = @serv.exportview(4, 1149206, 1149209, ['gene']) assert_equal(line, gff) --- 74,95 ---- def test_gff_exportview ! line = ["chromosome:NCBI36:4:1149206:1149209:1", ! "Ensembl", ! "Gene", ! "-839", ! "2747", ! ".", ! "+", ! ".", ! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"].join("\t") + "\n" ! line = ["4", ! "Ensembl", ! "Gene", ! "1148366", ! "1151952", ! ".", ! "+", ! "1", ! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n" gff = @serv.exportview(4, 1149206, 1149209, ['gene']) assert_equal(line, gff) *************** *** 80,84 **** def test_gff_exportview_with_named_args ! line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n" gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, --- 97,118 ---- def test_gff_exportview_with_named_args ! line = ["chromosome:NCBI36:4:1149206:1149209:1", ! "Ensembl", ! "Gene", ! "-839", ! "2747", ! ".", ! "+", ! ".", ! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n" ! line = ["4", ! "Ensembl", ! "Gene", ! "1148366", ! "1151952", ! ".", ! "+", ! "1", ! "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n" gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, *************** *** 89,93 **** def test_tab_exportview_with_named_args ! line = "seqname\tsource\tfeature\tstart\tend\tscore\tstrand\tframe\tgene_id\ttranscript_id\texon_id\tgene_type\nchromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tENSG00000206158\tENST00000382964\tENSE00001494097\tKNOWN_protein_coding\n" gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, --- 123,176 ---- def test_tab_exportview_with_named_args ! line = [["seqname", ! "source", ! "feature", ! "start", ! "end", ! "score", ! "strand", ! "frame", ! "gene_id", ! "transcript_id", ! "exon_id", ! "gene_type"].join("\t"), ! ["chromosome:NCBI36:4:1149206:1149209:1", ! "Ensembl", ! "Gene", ! "-839", ! "2747", ! ".", ! "+", ! ".", ! "ENSG00000206158", ! "ENST00000382964", ! "ENSE00001494097", ! "KNOWN_protein_coding"].join("\t") + "\n" ! ].join("\n") ! line = [["seqname", ! "source", ! "feature", ! "start", ! "end", ! "score", ! "strand", ! "frame", ! "gene_id", ! "transcript_id", ! "exon_id", ! "gene_type"].join("\t"), ! ["4", ! "Ensembl", ! "Gene", ! "1148366", ! "1151952", ! ".", ! "+", ! "1", ! "ENSG00000206158", ! "ENST00000382964", ! "ENSE00001494097", ! "KNOWN_protein_coding"].join("\t") + "\n" ! ].join("\n") gff = @serv.exportview(:seq_region_name => 4, :anchor1 => 1149206, From k at dev.open-bio.org Thu Nov 15 07:07:18 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Thu, 15 Nov 2007 07:07:18 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.60,1.61 Message-ID: <200711150707.lAF77IWZ006676@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv6671/io Modified Files: flatfile.rb Log Message: * the first line of the MEDLINE entry is changed from UI to PMID Index: flatfile.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v retrieving revision 1.60 retrieving revision 1.61 diff -C2 -d -r1.60 -r1.61 *** flatfile.rb 9 Jul 2007 14:08:34 -0000 1.60 --- flatfile.rb 15 Nov 2007 07:07:16 -0000 1.61 *************** *** 1131,1135 **** /^LOCUS .+ aa .+/ ], medline = RuleRegexp[ 'Bio::MEDLINE', ! /^UI \- [0-9]+$/ ], embl = RuleRegexp[ 'Bio::EMBL', /^ID .+\; .*(DNA|RNA|XXX)\;/ ], --- 1131,1135 ---- /^LOCUS .+ aa .+/ ], medline = RuleRegexp[ 'Bio::MEDLINE', ! /^PMID\- [0-9]+$/ ], embl = RuleRegexp[ 'Bio::EMBL', /^ID .+\; .*(DNA|RNA|XXX)\;/ ], From k at dev.open-bio.org Thu Nov 15 07:08:51 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Thu, 15 Nov 2007 07:08:51 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/shell interface.rb,1.18,1.19 Message-ID: <200711150708.lAF78prq006727@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/shell In directory dev.open-bio.org:/tmp/cvs-serv6723/shell Modified Files: interface.rb Log Message: * fixed that savefile("hoge", obj) created "datahoge" file instead of "data/hoge" Index: interface.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/interface.rb,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** interface.rb 26 Jun 2007 08:38:38 -0000 1.18 --- interface.rb 15 Nov 2007 07:08:49 -0000 1.19 *************** *** 153,157 **** message = "Save file '#{file}' in '#{datadir}' directory? [y/n] " if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message) ! file = datadir + file end if File.exists?(file) --- 153,157 ---- message = "Save file '#{file}' in '#{datadir}' directory? [y/n] " if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message) ! file = File.join(datadir, file) end if File.exists?(file) From k at dev.open-bio.org Thu Nov 15 07:23:41 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Thu, 15 Nov 2007 07:23:41 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.18,1.19 Message-ID: <200711150723.lAF7Nfkd006749@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv6745/io Modified Files: pubmed.rb Log Message: * esearch2, efetch2: candidates for the better replacement of esearch and efetch methods which are enchanced to accept options as a hash and utilize Bio::Command.post_form for the options Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.18 retrieving revision 1.19 diff -C2 -d -r1.18 -r1.19 *** pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18 --- pubmed.rb 15 Nov 2007 07:23:39 -0000 1.19 *************** *** 9,15 **** # - require 'net/http' - require 'cgi' unless defined?(CGI) require 'bio/command' module Bio --- 9,14 ---- # require 'bio/command' + require 'cgi' unless defined?(CGI) module Bio *************** *** 112,115 **** --- 111,134 ---- end + def self.esearch2(str, hash = {}) + serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" + opts = { + "retmax" => 100, + "tool" => "bioruby", + "db" => "pubmed", + "term" => str + } + opts.update(hash) + + response, = Bio::Command.post_form(serv, opts) + result = response.body + if opts['rettype'] == 'count' + result = result.scan(/ (.*?)<\/Count>/m).flatten.first.to_i + else + result = result.scan(/ (.*?)<\/Id>/m).flatten + end + return result + end + # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using # entrez efetch. Multiple PubMed IDs can be provided: *************** *** 132,136 **** response, = http.get(path + list) result = response.body ! result = result.split(/\n\n+/) return result end --- 151,173 ---- response, = http.get(path + list) result = response.body ! return result ! end ! ! def self.efetch2(ids, hash = {}) ! return "" if ids.empty? ! ids = ids.join(",") if ids === Array ! ! serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" ! opts = { ! "tool" => "bioruby", ! "db" => "pubmed", ! "retmode" => "text", ! "rettype" => "medline", ! "id" => ids, ! } ! opts.update(hash) ! ! response, = Bio::Command.post_form(serv, opts) ! result = response.body return result end *************** *** 212,216 **** puts "--- Search PubMed by E-Utils ---" ! Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| p x end --- 249,255 ---- puts "--- Search PubMed by E-Utils ---" ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"}) ! ! Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x| p x end *************** *** 218,221 **** --- 257,261 ---- puts "--- Retrieve PubMed entry by E-Utils ---" puts Bio::PubMed.efetch("10592173", "14693808") + puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"}) puts "--- Search PubMed by Entrez CGI ---" From k at dev.open-bio.org Thu Nov 15 07:40:29 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Thu, 15 Nov 2007 07:40:29 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.19,1.20 Message-ID: <200711150740.lAF7eTZQ006794@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv6790 Modified Files: pubmed.rb Log Message: * get back to split multiple MEDLINE entries into array when not in XML mode Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.19 retrieving revision 1.20 diff -C2 -d -r1.19 -r1.20 *** pubmed.rb 15 Nov 2007 07:23:39 -0000 1.19 --- pubmed.rb 15 Nov 2007 07:40:27 -0000 1.20 *************** *** 151,154 **** --- 151,155 ---- response, = http.get(path + list) result = response.body + result = result.split(/\n\n+/) return result end *************** *** 170,173 **** --- 171,178 ---- response, = Bio::Command.post_form(serv, opts) result = response.body + if opts["retmode"] == "text" + result = result.split(/\n\n+/) + end + return result end From k at dev.open-bio.org Tue Nov 20 15:22:05 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Tue, 20 Nov 2007 15:22:05 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.20,1.21 Message-ID: <200711201522.lAKFM5vl026044@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv26040 Modified Files: pubmed.rb Log Message: * ncbi_access_wait is introduced to wait for 3 seconds for consequent queries * esearch2 and efetch2 methods are renamed to esearch and efetch Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.20 retrieving revision 1.21 diff -C2 -d -r1.20 -r1.21 *** pubmed.rb 15 Nov 2007 07:40:27 -0000 1.20 --- pubmed.rb 20 Nov 2007 15:22:03 -0000 1.21 *************** *** 2,6 **** # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module # ! # Copyright:: Copyright (C) 2001 Toshiaki Katayama # Copyright:: Copyright (C) 2006 Jan Aerts # License:: The Ruby License --- 2,6 ---- # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module # ! # Copyright:: Copyright (C) 2001, 2007 Toshiaki Katayama # Copyright:: Copyright (C) 2006 Jan Aerts # License:: The Ruby License *************** *** 71,74 **** --- 71,92 ---- class PubMed + # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time + # weekdays for any series of more than 100 requests. + # -> Not implemented yet in BioRuby + + # Make no more than one request every 3 seconds. + NCBI_INTERVAL = 3 + @@last_access = nil + + def self.ncbi_access_wait(wait = NCBI_INTERVAL) + if @@last_access + duration = Time.now - @@last_access + if wait > duration + sleep wait - duration + end + end + @@last_access = Time.now + end + # Search the PubMed database by given keywords using E-Utils and returns # an array of PubMed IDs. *************** *** 90,115 **** # *Returns*:: array of PubMed IDs or a number of results def self.esearch(str, hash = {}) ! hash['retmax'] = 100 unless hash['retmax'] ! ! opts = [] ! hash.each do |k, v| ! opts << "#{k}=#{v}" ! end ! ! host = "eutils.ncbi.nlm.nih.gov" ! path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed{opts.join('&')}&term=" ! ! http = Bio::Command.new_http(host) ! response, = http.get(path + CGI.escape(str)) ! result = response.body ! if hash['rettype'] == 'count' ! result = result.scan(/ (.*?)<\/Count>/m).flatten.first.to_i ! else ! result = result.scan(/ (.*?)<\/Id>/m).flatten ! end ! return result ! end - def self.esearch2(str, hash = {}) serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" opts = { --- 108,113 ---- # *Returns*:: array of PubMed IDs or a number of results def self.esearch(str, hash = {}) ! return nil if str.empty? serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" opts = { *************** *** 121,124 **** --- 119,124 ---- opts.update(hash) + self.ncbi_access_wait + response, = Bio::Command.post_form(serv, opts) result = response.body *************** *** 134,160 **** # entrez efetch. Multiple PubMed IDs can be provided: # Bio::PubMed.efetch(123) - # Bio::PubMed.efetch(123,456,789) # Bio::PubMed.efetch([123,456,789]) # --- # *Arguments*: # * _ids_: list of PubMed IDs (required) ! # *Returns*:: MEDLINE formatted String ! def self.efetch(*ids) ! return [] if ids.empty? ! ! host = "eutils.ncbi.nlm.nih.gov" ! path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id=" ! ! list = ids.join(",") ! ! http = Bio::Command.new_http(host) ! response, = http.get(path + list) ! result = response.body ! result = result.split(/\n\n+/) ! return result ! end ! ! def self.efetch2(ids, hash = {}) ! return "" if ids.empty? ids = ids.join(",") if ids === Array --- 134,144 ---- # entrez efetch. Multiple PubMed IDs can be provided: # Bio::PubMed.efetch(123) # Bio::PubMed.efetch([123,456,789]) # --- # *Arguments*: # * _ids_: list of PubMed IDs (required) ! # *Returns*:: Array of MEDLINE formatted String ! def self.efetch(ids, hash = {}) ! return nil if ids.to_s.empty? ids = ids.join(",") if ids === Array *************** *** 169,172 **** --- 153,158 ---- opts.update(hash) + self.ncbi_access_wait + response, = Bio::Command.post_form(serv, opts) result = response.body *************** *** 174,178 **** result = result.split(/\n\n+/) end - return result end --- 160,163 ---- *************** *** 254,266 **** puts "--- Search PubMed by E-Utils ---" ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"}) ! ! Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x| ! p x end puts "--- Retrieve PubMed entry by E-Utils ---" ! puts Bio::PubMed.efetch("10592173", "14693808") ! puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"}) puts "--- Search PubMed by Entrez CGI ---" --- 239,266 ---- puts "--- Search PubMed by E-Utils ---" ! opts = {"rettype" => "count"} ! puts Time.now ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts) ! puts Time.now ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts) ! puts Time.now ! puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts) ! puts Time.now ! Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| ! puts x end puts "--- Retrieve PubMed entry by E-Utils ---" ! puts Time.now ! puts Bio::PubMed.efetch(16381885) ! puts Time.now ! puts Bio::PubMed.efetch("16381885") ! puts Time.now ! puts Bio::PubMed.efetch("16381885") ! puts Time.now ! opts = {"retmode" => "xml"} ! puts Bio::PubMed.efetch([10592173, 14693808], opts) ! puts Time.now ! puts Bio::PubMed.efetch(["10592173", "14693808"], opts) puts "--- Search PubMed by Entrez CGI ---" *************** *** 270,278 **** puts "--- Retrieve PubMed entry by Entrez CGI ---" ! puts Bio::PubMed.query("10592173") puts "--- Retrieve PubMed entry by PMfetch ---" ! puts Bio::PubMed.pmfetch("10592173") end --- 270,278 ---- puts "--- Retrieve PubMed entry by Entrez CGI ---" ! puts Bio::PubMed.query("16381885") puts "--- Retrieve PubMed entry by PMfetch ---" ! puts Bio::PubMed.pmfetch("16381885") end From k at dev.open-bio.org Tue Nov 27 07:09:45 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Tue, 27 Nov 2007 07:09:45 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/db/kegg compound.rb,0.16,0.17 Message-ID: <200711270709.lAR79jPi020625@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg In directory dev.open-bio.org:/tmp/cvs-serv20621 Modified Files: compound.rb Log Message: * remark method is added Index: compound.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/compound.rb,v retrieving revision 0.16 retrieving revision 0.17 diff -C2 -d -r0.16 -r0.17 *** compound.rb 28 Jun 2007 11:27:24 -0000 0.16 --- compound.rb 27 Nov 2007 07:09:43 -0000 0.17 *************** *** 46,49 **** --- 46,54 ---- end + # REMARK + def remark + field_fetch('REMARK') + end + # GLYCAN def glycans From k at dev.open-bio.org Wed Nov 28 06:34:35 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Wed, 28 Nov 2007 06:34:35 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.21,1.22 Message-ID: <200711280634.lAS6YZ9i023050@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv23044 Modified Files: pubmed.rb Log Message: * all class methods are changed to instance methods (class methods are still remained for the backward compatibility) Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.21 retrieving revision 1.22 diff -C2 -d -r1.21 -r1.22 *** pubmed.rb 20 Nov 2007 15:22:03 -0000 1.21 --- pubmed.rb 28 Nov 2007 06:34:33 -0000 1.22 *************** *** 79,83 **** @@last_access = nil ! def self.ncbi_access_wait(wait = NCBI_INTERVAL) if @@last_access duration = Time.now - @@last_access --- 79,85 ---- @@last_access = nil ! private ! ! def ncbi_access_wait(wait = NCBI_INTERVAL) if @@last_access duration = Time.now - @@last_access *************** *** 89,92 **** --- 91,96 ---- end + public + # Search the PubMed database by given keywords using E-Utils and returns # an array of PubMed IDs. *************** *** 107,111 **** # * _rettype_ # *Returns*:: array of PubMed IDs or a number of results ! def self.esearch(str, hash = {}) return nil if str.empty? --- 111,115 ---- # * _rettype_ # *Returns*:: array of PubMed IDs or a number of results ! def esearch(str, hash = {}) return nil if str.empty? *************** *** 119,123 **** opts.update(hash) ! self.ncbi_access_wait response, = Bio::Command.post_form(serv, opts) --- 123,127 ---- opts.update(hash) ! ncbi_access_wait response, = Bio::Command.post_form(serv, opts) *************** *** 139,143 **** # * _ids_: list of PubMed IDs (required) # *Returns*:: Array of MEDLINE formatted String ! def self.efetch(ids, hash = {}) return nil if ids.to_s.empty? ids = ids.join(",") if ids === Array --- 143,147 ---- # * _ids_: list of PubMed IDs (required) # *Returns*:: Array of MEDLINE formatted String ! def efetch(ids, hash = {}) return nil if ids.to_s.empty? ids = ids.join(",") if ids === Array *************** *** 153,157 **** opts.update(hash) ! self.ncbi_access_wait response, = Bio::Command.post_form(serv, opts) --- 157,161 ---- opts.update(hash) ! ncbi_access_wait response, = Bio::Command.post_form(serv, opts) *************** *** 170,177 **** # * _id_: query string (required) # *Returns*:: array of PubMed IDs ! def self.search(str) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term=" http = Bio::Command.new_http(host) response, = http.get(path + CGI.escape(str)) --- 174,183 ---- # * _id_: query string (required) # *Returns*:: array of PubMed IDs ! def search(str) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term=" + ncbi_access_wait + http = Bio::Command.new_http(host) response, = http.get(path + CGI.escape(str)) *************** *** 187,196 **** # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def self.query(*ids) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" - list = ids.join(",") http = Bio::Command.new_http(host) response, = http.get(path + list) --- 193,203 ---- # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def query(*ids) host = "www.ncbi.nlm.nih.gov" path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" list = ids.join(",") + ncbi_access_wait + http = Bio::Command.new_http(host) response, = http.get(path + list) *************** *** 216,223 **** # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def self.pmfetch(id) host = "www.ncbi.nlm.nih.gov" path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id=" http = Bio::Command.new_http(host) response, = http.get(path + id.to_s) --- 223,232 ---- # * _id_: PubMed ID (required) # *Returns*:: MEDLINE formatted String ! def pmfetch(id) host = "www.ncbi.nlm.nih.gov" path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id=" + ncbi_access_wait + http = Bio::Command.new_http(host) response, = http.get(path + id.to_s) *************** *** 231,234 **** --- 240,263 ---- end + def self.esearch(*args) + self.new.esearch(*args) + end + + def self.efetch(*args) + self.new.efetch(*args) + end + + def self.search(*args) + self.new.search(*args) + end + + def self.query(*args) + self.new.query(*args) + end + + def self.pmfetch(*args) + self.new.pmfetch(*args) + end + end # PubMed *************** *** 238,241 **** --- 267,316 ---- if __FILE__ == $0 + puts "=== instance methods ===" + + pubmed = Bio::PubMed.new + + puts "--- Search PubMed by E-Utils ---" + opts = {"rettype" => "count"} + puts Time.now + puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts) + puts Time.now + puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts) + puts Time.now + puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts) + puts Time.now + pubmed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| + puts x + end + + puts "--- Retrieve PubMed entry by E-Utils ---" + puts Time.now + puts pubmed.efetch(16381885) + puts Time.now + puts pubmed.efetch("16381885") + puts Time.now + puts pubmed.efetch("16381885") + puts Time.now + opts = {"retmode" => "xml"} + puts pubmed.efetch([10592173, 14693808], opts) + puts Time.now + puts pubmed.efetch(["10592173", "14693808"], opts) + + puts "--- Search PubMed by Entrez CGI ---" + pubmed.search("(genome AND analysis) OR bioinformatics)").each do |x| + p x + end + + puts "--- Retrieve PubMed entry by Entrez CGI ---" + puts pubmed.query("16381885") + + + puts "--- Retrieve PubMed entry by PMfetch ---" + puts pubmed.pmfetch("16381885") + + + puts "=== class methods ===" + + puts "--- Search PubMed by E-Utils ---" opts = {"rettype" => "count"}