From aerts at dev.open-bio.org Sun Nov 4 06:51:01 2007 From: aerts at dev.open-bio.org (Jan Aerts) Date: Sun, 04 Nov 2007 11:51:01 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.16,1.17 Message-ID: <200711041151.lA4Bp1lq007763@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv7743 Modified Files: pubmed.rb Log Message: Fixed bug #11736: change to pubmed interface (reported by Masahide Kikkawa) Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.16 retrieving revision 1.17 diff -C2 -d -r1.16 -r1.17 *** pubmed.rb 5 Apr 2007 23:35:41 -0000 1.16 --- pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17 *************** *** 75,80 **** # *Returns*:: array of PubMed IDs def self.search(str) ! host = "www.ncbi.nlm.nih.gov" ! path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term=" http = Bio::Command.new_http(host) --- 75,80 ---- # *Returns*:: array of PubMed IDs def self.search(str) ! host = 'www.ncbi.nlm.nih.gov' ! path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" http = Bio::Command.new_http(host) From k at dev.open-bio.org Sat Nov 10 03:21:56 2007 From: k at dev.open-bio.org (Katayama Toshiaki) Date: Sat, 10 Nov 2007 08:21:56 +0000 Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.17,1.18 Message-ID: <200711100821.lAA8LunA021453@dev.open-bio.org> Update of /home/repository/bioruby/bioruby/lib/bio/io In directory dev.open-bio.org:/tmp/cvs-serv21448 Modified Files: pubmed.rb Log Message: * search, query is fixed to use new NCBI URI (previous fix was wrong and insufficient). * esearch is enhanced to accept hash['rettype'] == "count" as suggested by Kaustubh Patil Index: pubmed.rb =================================================================== RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v retrieving revision 1.17 retrieving revision 1.18 diff -C2 -d -r1.17 -r1.18 *** pubmed.rb 4 Nov 2007 11:50:59 -0000 1.17 --- pubmed.rb 10 Nov 2007 08:21:54 -0000 1.18 *************** *** 19,34 **** # The Bio::PubMed class provides several ways to retrieve bibliographic # information from the PubMed database at ! # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two ! # types of queries are possible: # # * searching for PubMed IDs given a query string: ! # * Bio::PubMed#search ! # * Bio::PubMed#esearch # # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) # given a PubMed ID ! # * Bio::PubMed#query ! # * Bio::PubMed#pmfetch ! # * Bio::PubMed#efetch # # The different methods within the same group are interchangeable and should --- 19,35 ---- # The Bio::PubMed class provides several ways to retrieve bibliographic # information from the PubMed database at ! # http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed ! # ! # Basically, two types of queries are possible: # # * searching for PubMed IDs given a query string: ! # * Bio::PubMed#esearch (recommended) ! # * Bio::PubMed#search (only retrieves top 20 hits) # # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) # given a PubMed ID ! # * Bio::PubMed#efetch (recommended) ! # * Bio::PubMed#query (unstable for the change of the HTML design) ! # * Bio::PubMed#pmfetch (still working but could be obsoleted by NCBI) # # The different methods within the same group are interchangeable and should *************** *** 38,48 **** # APIs can be found on the following websites: # ! # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html ! # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html ! # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat ! # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags ! # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html ! # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html ! # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html # # == Usage --- 39,50 ---- # APIs can be found on the following websites: # ! # * PubMed Overview: ! # http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html ! # * PubMed help: ! # http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html ! # * Entrez utilities index: ! # http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html ! # * How to link: ! # http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp # # == Usage *************** *** 51,89 **** # # # If you don't know the pubmed ID: ! # Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end ! # Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x| # p x # end # # # To retrieve the MEDLINE entry for a given PubMed ID: # puts Bio::PubMed.query("10592173") # puts Bio::PubMed.pmfetch("10592173") ! # puts Bio::PubMed.efetch("10592173", "14693808") # # This can be converted into a Bio::MEDLINE object: # manuscript = Bio::PubMed.query("10592173") ! # medline = Bio::MEDLINE(manuscript) # class PubMed - # Search the PubMed database by given keywords using entrez query and returns - # an array of PubMed IDs. - # --- - # *Arguments*: - # * _id_: query string (required) - # *Returns*:: array of PubMed IDs - def self.search(str) - host = 'www.ncbi.nlm.nih.gov' - path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid=" - - http = Bio::Command.new_http(host) - response, = http.get(path + CGI.escape(str)) - result = response.body - result = result.gsub("\r", "\n").squeeze("\n") - result = result.scan(/
(.*?)<\/pre>/m).flatten
-     return result
-   end
- 
    # Search the PubMed database by given keywords using E-Utils and returns 
    # an array of PubMed IDs.
--- 53,75 ----
  #
  #   # If you don't know the pubmed ID:
! #   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
  #     p x
  #   end
! #
! #   Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
  #     p x
  #   end
  #   
  #   # To retrieve the MEDLINE entry for a given PubMed ID:
+ #   puts Bio::PubMed.efetch("10592173", "14693808")
  #   puts Bio::PubMed.query("10592173")
  #   puts Bio::PubMed.pmfetch("10592173")
! #
  #   # This can be converted into a Bio::MEDLINE object:
  #   manuscript = Bio::PubMed.query("10592173")
! #   medline = Bio::MEDLINE.new(manuscript)
  #  
  class PubMed
  
    # Search the PubMed database by given keywords using E-Utils and returns 
    # an array of PubMed IDs.
***************
*** 103,107 ****
    # * _retmode_
    # * _rettype_
!   # *Returns*:: array of PubMed IDs
    def self.esearch(str, hash = {})
      hash['retmax'] = 100 unless hash['retmax']
--- 89,93 ----
    # * _retmode_
    # * _rettype_
!   # *Returns*:: array of PubMed IDs or a number of results
    def self.esearch(str, hash = {})
      hash['retmax'] = 100 unless hash['retmax']
***************
*** 118,122 ****
      response, = http.get(path + CGI.escape(str))
      result = response.body
!     result = result.scan(/(.*?)<\/Id>/m).flatten
      return result
    end
--- 104,154 ----
      response, = http.get(path + CGI.escape(str))
      result = response.body
!     if hash['rettype'] == 'count'
!       result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
!     else
!       result = result.scan(/(.*?)<\/Id>/m).flatten
!     end
!     return result
!   end
! 
!   # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
!   # entrez efetch. Multiple PubMed IDs can be provided:
!   #   Bio::PubMed.efetch(123)
!   #   Bio::PubMed.efetch(123,456,789)
!   #   Bio::PubMed.efetch([123,456,789])
!   # ---
!   # *Arguments*:
!   # * _ids_: list of PubMed IDs (required)
!   # *Returns*:: MEDLINE formatted String
!   def self.efetch(*ids)
!     return [] if ids.empty?
! 
!     host = "eutils.ncbi.nlm.nih.gov"
!     path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
! 
!     list = ids.join(",")
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + list)
!     result = response.body
!     result = result.split(/\n\n+/)
!     return result
!   end
! 
!   # Search the PubMed database by given keywords using entrez query and returns
!   # an array of PubMed IDs. Caution: this method returns the first 20 hits only.
!   # Instead, use of the 'esearch' method is strongly recomended.
!   # ---
!   # *Arguments*:
!   # * _id_: query string (required)
!   # *Returns*:: array of PubMed IDs
!   def self.search(str)
!     host = "www.ncbi.nlm.nih.gov"
!     path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + CGI.escape(str))
!     result = response.body
!     result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
      return result
    end
***************
*** 128,143 ****
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def self.query(id)
      host = "www.ncbi.nlm.nih.gov"
!     path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
  
      http = Bio::Command.new_http(host)
!     response, = http.get(path + id.to_s)
      result = response.body
!     if result =~ /#{id}\s+Error/
        raise( result )
      else
!       result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
!       return result
      end
    end
--- 160,183 ----
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def self.query(*ids)
      host = "www.ncbi.nlm.nih.gov"
!     path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
! 
!     list = ids.join(",")
  
      http = Bio::Command.new_http(host)
!     response, = http.get(path + list)
      result = response.body
!     result = result.scan(/
\s*(.*?)<\/pre>/m).flatten
! 
!     if result =~ /id:.*Error occurred/
!       # id: xxxxx Error occurred: Article does not exist
        raise( result )
      else
!       if ids.size > 1
!         return result
!       else
!         return result.first
!       end
      end
    end
***************
*** 164,191 ****
    end
  
-   # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
-   # entrez efetch. Multiple PubMed IDs can be provided:
-   #   Bio::PubMed.efetch(123)
-   #   Bio::PubMed.efetch(123,456,789)
-   #   Bio::PubMed.efetch([123,456,789])
-   # ---
-   # *Arguments*:
-   # * _ids_: list of PubMed IDs (required)
-   # *Returns*:: MEDLINE formatted String
-   def self.efetch(*ids)
-     return [] if ids.empty?
- 
-     host = "eutils.ncbi.nlm.nih.gov"
-     path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
- 
-     ids = ids.join(",")
- 
-     http = Bio::Command.new_http(host)
-     response, = http.get(path + ids)
-     result = response.body
-     result = result.split(/\n\n+/)
-     return result
-   end
- 
  end # PubMed
  
--- 204,207 ----
***************
*** 195,211 ****
  if __FILE__ == $0
  
!   puts Bio::PubMed.query("10592173")
!   puts "--- ---"
!   puts Bio::PubMed.pmfetch("10592173")
!   puts "--- ---"
!   Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
!     p x
!   end
!   puts "--- ---"
    Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
      p x
    end
!   puts "--- ---"
    puts Bio::PubMed.efetch("10592173", "14693808")
  
  end
--- 211,233 ----
  if __FILE__ == $0
  
!   puts "--- Search PubMed by E-Utils ---"
    Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
      p x
    end
! 
!   puts "--- Retrieve PubMed entry by E-Utils ---"
    puts Bio::PubMed.efetch("10592173", "14693808")
  
+   puts "--- Search PubMed by Entrez CGI ---"
+   Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+     p x
+   end
+ 
+   puts "--- Retrieve PubMed entry by Entrez CGI ---"
+   puts Bio::PubMed.query("10592173")
+ 
+ 
+   puts "--- Retrieve PubMed entry by PMfetch ---"
+   puts Bio::PubMed.pmfetch("10592173")
+ 
  end


From k at dev.open-bio.org  Sat Nov 10 03:28:52 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sat, 10 Nov 2007 08:28:52 +0000
Subject: [BioRuby-cvs] bioruby ChangeLog,1.68,1.69
Message-ID: <200711100828.lAA8Sq9g021475@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby
In directory dev.open-bio.org:/tmp/cvs-serv21471

Modified Files:
	ChangeLog 
Log Message:
* updated


Index: ChangeLog
===================================================================
RCS file: /home/repository/bioruby/bioruby/ChangeLog,v
retrieving revision 1.68
retrieving revision 1.69
diff -C2 -d -r1.68 -r1.69
*** ChangeLog	19 Jul 2007 04:08:47 -0000	1.68
--- ChangeLog	10 Nov 2007 08:28:50 -0000	1.69
***************
*** 1,2 ****
--- 1,9 ----
+ 2007-11-10  Toshiaki Katayama 
+ 
+ 	* lib/bio/io/pubmed.rb:
+ 
+ 	  Fixed search, query methods (but use of esearch and efetch is
+ 	  strongly recommended).
+ 
  2007-07-19  Toshiaki Katayama 
  
***************
*** 415,419 ****
  	  visual effects.
  
! 	* lib/bio/.rb
  
  	  Extended to have Bio.command where command can be any BioRuby
--- 422,426 ----
  	  visual effects.
  
! 	* lib/bio.rb
  
  	  Extended to have Bio.command where command can be any BioRuby


From nakao at dev.open-bio.org  Sat Nov 10 11:57:45 2007
From: nakao at dev.open-bio.org (Mitsuteru C. Nakao)
Date: Sat, 10 Nov 2007 16:57:45 +0000
Subject: [BioRuby-cvs] bioruby/test/functional/bio/io test_ensembl.rb, 1.4,
	1.5
Message-ID: <200711101657.lAAGvjCP022677@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/test/functional/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv22657/test/functional/bio/io

Modified Files:
	test_ensembl.rb 
Log Message:
* Updated some expected values of test_gff_exportview*.


Index: test_ensembl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/test/functional/bio/io/test_ensembl.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** test_ensembl.rb	5 Apr 2007 23:35:42 -0000	1.4
--- test_ensembl.rb	10 Nov 2007 16:57:43 -0000	1.5
***************
*** 74,78 ****
  
     def test_gff_exportview
!      line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"
       gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
       assert_equal(line, gff)
--- 74,95 ----
  
     def test_gff_exportview
!      line = ["chromosome:NCBI36:4:1149206:1149209:1", 
!              "Ensembl", 
!              "Gene", 
!              "-839",
!              "2747", 
!              ".", 
!              "+",
!              ".",
!              "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"].join("\t")  + "\n"
!      line = ["4", 
!              "Ensembl", 
!              "Gene", 
!              "1148366", 
!              "1151952", 
!              ".", 
!              "+", 
!              "1", 
!              "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
       gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
       assert_equal(line, gff)
***************
*** 80,84 ****
  
     def test_gff_exportview_with_named_args
!      line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"
       gff = @serv.exportview(:seq_region_name => 4,
                              :anchor1 => 1149206,
--- 97,118 ----
  
     def test_gff_exportview_with_named_args
!      line = ["chromosome:NCBI36:4:1149206:1149209:1",
!              "Ensembl",
!              "Gene",
!              "-839",
!              "2747",
!              ".",
!              "+",
!              ".",
!              "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
!      line = ["4", 
!              "Ensembl", 
!              "Gene", 
!              "1148366", 
!              "1151952", 
!              ".", 
!              "+", 
!              "1", 
!              "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
       gff = @serv.exportview(:seq_region_name => 4,
                              :anchor1 => 1149206,
***************
*** 89,93 ****
  
     def test_tab_exportview_with_named_args
!      line = "seqname\tsource\tfeature\tstart\tend\tscore\tstrand\tframe\tgene_id\ttranscript_id\texon_id\tgene_type\nchromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tENSG00000206158\tENST00000382964\tENSE00001494097\tKNOWN_protein_coding\n"
       gff = @serv.exportview(:seq_region_name => 4,
                              :anchor1 => 1149206,
--- 123,176 ----
  
     def test_tab_exportview_with_named_args
!      line = [["seqname",
!              "source",
!              "feature",
!              "start",
!              "end",
!              "score",
!              "strand",
!              "frame",
!              "gene_id",
!              "transcript_id",
!              "exon_id",
!              "gene_type"].join("\t"),
!              ["chromosome:NCBI36:4:1149206:1149209:1",
!               "Ensembl",
!               "Gene",
!               "-839",
!               "2747",
!               ".",
!               "+",
!               ".",
!               "ENSG00000206158",
!               "ENST00000382964",
!               "ENSE00001494097",
!               "KNOWN_protein_coding"].join("\t") + "\n"
!      ].join("\n")
!      line = [["seqname",
!              "source",
!              "feature",
!              "start",
!              "end",
!              "score",
!              "strand",
!              "frame",
!              "gene_id",
!              "transcript_id",
!              "exon_id",
!              "gene_type"].join("\t"),
!              ["4",
!               "Ensembl",
!               "Gene",
!               "1148366",
!               "1151952",
!               ".",
!               "+",
!               "1",
!               "ENSG00000206158",
!               "ENST00000382964",
!               "ENSE00001494097",
!               "KNOWN_protein_coding"].join("\t") + "\n"
!      ].join("\n")
       gff = @serv.exportview(:seq_region_name => 4,
                              :anchor1 => 1149206,


From k at dev.open-bio.org  Thu Nov 15 02:07:18 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:07:18 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.60,1.61
Message-ID: <200711150707.lAF77IWZ006676@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6671/io

Modified Files:
	flatfile.rb 
Log Message:
* the first line of the MEDLINE entry is changed from UI to PMID


Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.60
retrieving revision 1.61
diff -C2 -d -r1.60 -r1.61
*** flatfile.rb	9 Jul 2007 14:08:34 -0000	1.60
--- flatfile.rb	15 Nov 2007 07:07:16 -0000	1.61
***************
*** 1131,1135 ****
              /^LOCUS       .+ aa .+/ ],
            medline  = RuleRegexp[ 'Bio::MEDLINE',
!             /^UI  \- [0-9]+$/ ],
            embl     = RuleRegexp[ 'Bio::EMBL',
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
--- 1131,1135 ----
              /^LOCUS       .+ aa .+/ ],
            medline  = RuleRegexp[ 'Bio::MEDLINE',
!             /^PMID\- [0-9]+$/ ],
            embl     = RuleRegexp[ 'Bio::EMBL',
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],


From k at dev.open-bio.org  Thu Nov 15 02:08:51 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:08:51 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/shell interface.rb,1.18,1.19
Message-ID: <200711150708.lAF78prq006727@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/shell
In directory dev.open-bio.org:/tmp/cvs-serv6723/shell

Modified Files:
	interface.rb 
Log Message:
* fixed that savefile("hoge", obj) created "datahoge" file instead of "data/hoge"


Index: interface.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/interface.rb,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** interface.rb	26 Jun 2007 08:38:38 -0000	1.18
--- interface.rb	15 Nov 2007 07:08:49 -0000	1.19
***************
*** 153,157 ****
      message = "Save file '#{file}' in '#{datadir}' directory? [y/n] "
      if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
!       file = datadir + file
      end
      if File.exists?(file)
--- 153,157 ----
      message = "Save file '#{file}' in '#{datadir}' directory? [y/n] "
      if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
!       file = File.join(datadir, file)
      end
      if File.exists?(file)


From k at dev.open-bio.org  Thu Nov 15 02:23:41 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:23:41 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.18,1.19
Message-ID: <200711150723.lAF7Nfkd006749@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6745/io

Modified Files:
	pubmed.rb 
Log Message:
* esearch2, efetch2: candidates for the better replacement of esearch and efetch methods which are enchanced to accept options as a hash and utilize Bio::Command.post_form for the options


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** pubmed.rb	10 Nov 2007 08:21:54 -0000	1.18
--- pubmed.rb	15 Nov 2007 07:23:39 -0000	1.19
***************
*** 9,15 ****
  #
  
- require 'net/http'
- require 'cgi' unless defined?(CGI)
  require 'bio/command'
  
  module Bio
--- 9,14 ----
  #
  
  require 'bio/command'
+ require 'cgi' unless defined?(CGI)
  
  module Bio
***************
*** 112,115 ****
--- 111,134 ----
    end
  
+   def self.esearch2(str, hash = {})
+     serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+     opts = {
+       "retmax" => 100,
+       "tool"   => "bioruby",
+       "db"     => "pubmed",
+       "term"   => str
+     }
+     opts.update(hash)
+ 
+     response, = Bio::Command.post_form(serv, opts)
+     result = response.body
+     if opts['rettype'] == 'count'
+       result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
+     else
+       result = result.scan(/(.*?)<\/Id>/m).flatten
+     end
+     return result
+   end
+ 
    # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
    # entrez efetch. Multiple PubMed IDs can be provided:
***************
*** 132,136 ****
      response, = http.get(path + list)
      result = response.body
!     result = result.split(/\n\n+/)
      return result
    end
--- 151,173 ----
      response, = http.get(path + list)
      result = response.body
!     return result
!   end
! 
!   def self.efetch2(ids, hash = {})
!     return "" if ids.empty?
!     ids = ids.join(",") if ids === Array
! 
!     serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
!     opts = {
!       "tool"     => "bioruby",
!       "db"       => "pubmed",
!       "retmode"  => "text",
!       "rettype"  => "medline",
!       "id"       => ids,
!     }
!     opts.update(hash)
! 
!     response, = Bio::Command.post_form(serv, opts)
!     result = response.body
      return result
    end
***************
*** 212,216 ****
  
    puts "--- Search PubMed by E-Utils ---"
!   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
      p x
    end
--- 249,255 ----
  
    puts "--- Search PubMed by E-Utils ---"
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
! 
!   Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
      p x
    end
***************
*** 218,221 ****
--- 257,261 ----
    puts "--- Retrieve PubMed entry by E-Utils ---"
    puts Bio::PubMed.efetch("10592173", "14693808")
+   puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
  
    puts "--- Search PubMed by Entrez CGI ---"


From k at dev.open-bio.org  Thu Nov 15 02:40:29 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:40:29 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.19,1.20
Message-ID: <200711150740.lAF7eTZQ006794@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6790

Modified Files:
	pubmed.rb 
Log Message:
* get back to split multiple MEDLINE entries into array when not in XML mode


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** pubmed.rb	15 Nov 2007 07:23:39 -0000	1.19
--- pubmed.rb	15 Nov 2007 07:40:27 -0000	1.20
***************
*** 151,154 ****
--- 151,155 ----
      response, = http.get(path + list)
      result = response.body
+     result = result.split(/\n\n+/)
      return result
    end
***************
*** 170,173 ****
--- 171,178 ----
      response, = Bio::Command.post_form(serv, opts)
      result = response.body
+     if opts["retmode"] == "text"
+       result = result.split(/\n\n+/)
+     end
+ 
      return result
    end


From k at dev.open-bio.org  Tue Nov 20 10:22:05 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Tue, 20 Nov 2007 15:22:05 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.20,1.21
Message-ID: <200711201522.lAKFM5vl026044@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv26040

Modified Files:
	pubmed.rb 
Log Message:
* ncbi_access_wait is introduced to wait for 3 seconds for consequent queries
* esearch2 and efetch2 methods are renamed to esearch and efetch


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** pubmed.rb	15 Nov 2007 07:40:27 -0000	1.20
--- pubmed.rb	20 Nov 2007 15:22:03 -0000	1.21
***************
*** 2,6 ****
  # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
  #
! # Copyright::  Copyright (C) 2001 Toshiaki Katayama 
  # Copyright::  Copyright (C) 2006 Jan Aerts 
  # License::    The Ruby License
--- 2,6 ----
  # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
  #
! # Copyright::  Copyright (C) 2001, 2007 Toshiaki Katayama 
  # Copyright::  Copyright (C) 2006 Jan Aerts 
  # License::    The Ruby License
***************
*** 71,74 ****
--- 71,92 ----
  class PubMed
  
+   # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
+   # weekdays for any series of more than 100 requests.
+   # -> Not implemented yet in BioRuby
+ 
+   # Make no more than one request every 3 seconds.
+   NCBI_INTERVAL = 3
+   @@last_access = nil
+ 
+   def self.ncbi_access_wait(wait = NCBI_INTERVAL)
+     if @@last_access
+       duration = Time.now - @@last_access
+       if wait > duration
+         sleep wait - duration
+       end
+     end
+     @@last_access = Time.now
+   end
+ 
    # Search the PubMed database by given keywords using E-Utils and returns 
    # an array of PubMed IDs.
***************
*** 90,115 ****
    # *Returns*:: array of PubMed IDs or a number of results
    def self.esearch(str, hash = {})
!     hash['retmax'] = 100 unless hash['retmax']
! 
!     opts = []
!     hash.each do |k, v|
!       opts << "#{k}=#{v}"
!     end
! 
!     host = "eutils.ncbi.nlm.nih.gov"
!     path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + CGI.escape(str))
!     result = response.body
!     if hash['rettype'] == 'count'
!       result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
!     else
!       result = result.scan(/(.*?)<\/Id>/m).flatten
!     end
!     return result
!   end
  
-   def self.esearch2(str, hash = {})
      serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
      opts = {
--- 108,113 ----
    # *Returns*:: array of PubMed IDs or a number of results
    def self.esearch(str, hash = {})
!     return nil if str.empty?
  
      serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
      opts = {
***************
*** 121,124 ****
--- 119,124 ----
      opts.update(hash)
  
+     self.ncbi_access_wait
+ 
      response, = Bio::Command.post_form(serv, opts)
      result = response.body
***************
*** 134,160 ****
    # entrez efetch. Multiple PubMed IDs can be provided:
    #   Bio::PubMed.efetch(123)
-   #   Bio::PubMed.efetch(123,456,789)
    #   Bio::PubMed.efetch([123,456,789])
    # ---
    # *Arguments*:
    # * _ids_: list of PubMed IDs (required)
!   # *Returns*:: MEDLINE formatted String
!   def self.efetch(*ids)
!     return [] if ids.empty?
! 
!     host = "eutils.ncbi.nlm.nih.gov"
!     path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
! 
!     list = ids.join(",")
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + list)
!     result = response.body
!     result = result.split(/\n\n+/)
!     return result
!   end
! 
!   def self.efetch2(ids, hash = {})
!     return "" if ids.empty?
      ids = ids.join(",") if ids === Array
  
--- 134,144 ----
    # entrez efetch. Multiple PubMed IDs can be provided:
    #   Bio::PubMed.efetch(123)
    #   Bio::PubMed.efetch([123,456,789])
    # ---
    # *Arguments*:
    # * _ids_: list of PubMed IDs (required)
!   # *Returns*:: Array of MEDLINE formatted String
!   def self.efetch(ids, hash = {})
!     return nil if ids.to_s.empty?
      ids = ids.join(",") if ids === Array
  
***************
*** 169,172 ****
--- 153,158 ----
      opts.update(hash)
  
+     self.ncbi_access_wait
+ 
      response, = Bio::Command.post_form(serv, opts)
      result = response.body
***************
*** 174,178 ****
        result = result.split(/\n\n+/)
      end
- 
      return result
    end
--- 160,163 ----
***************
*** 254,266 ****
  
    puts "--- Search PubMed by E-Utils ---"
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
! 
!   Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
!     p x
    end
  
    puts "--- Retrieve PubMed entry by E-Utils ---"
!   puts Bio::PubMed.efetch("10592173", "14693808")
!   puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
  
    puts "--- Search PubMed by Entrez CGI ---"
--- 239,266 ----
  
    puts "--- Search PubMed by E-Utils ---"
!   opts = {"rettype" => "count"}
!   puts Time.now
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
!   puts Time.now
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
!   puts Time.now
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
!   puts Time.now
!   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
!     puts x
    end
  
    puts "--- Retrieve PubMed entry by E-Utils ---"
!   puts Time.now
!   puts Bio::PubMed.efetch(16381885)
!   puts Time.now
!   puts Bio::PubMed.efetch("16381885")
!   puts Time.now
!   puts Bio::PubMed.efetch("16381885")
!   puts Time.now
!   opts = {"retmode" => "xml"}
!   puts Bio::PubMed.efetch([10592173, 14693808], opts)
!   puts Time.now
!   puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
  
    puts "--- Search PubMed by Entrez CGI ---"
***************
*** 270,278 ****
  
    puts "--- Retrieve PubMed entry by Entrez CGI ---"
!   puts Bio::PubMed.query("10592173")
  
  
    puts "--- Retrieve PubMed entry by PMfetch ---"
!   puts Bio::PubMed.pmfetch("10592173")
  
  end
--- 270,278 ----
  
    puts "--- Retrieve PubMed entry by Entrez CGI ---"
!   puts Bio::PubMed.query("16381885")
  
  
    puts "--- Retrieve PubMed entry by PMfetch ---"
!   puts Bio::PubMed.pmfetch("16381885")
  
  end


From k at dev.open-bio.org  Tue Nov 27 02:09:45 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Tue, 27 Nov 2007 07:09:45 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/db/kegg compound.rb,0.16,0.17
Message-ID: <200711270709.lAR79jPi020625@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg
In directory dev.open-bio.org:/tmp/cvs-serv20621

Modified Files:
	compound.rb 
Log Message:
* remark method is added


Index: compound.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/compound.rb,v
retrieving revision 0.16
retrieving revision 0.17
diff -C2 -d -r0.16 -r0.17
*** compound.rb	28 Jun 2007 11:27:24 -0000	0.16
--- compound.rb	27 Nov 2007 07:09:43 -0000	0.17
***************
*** 46,49 ****
--- 46,54 ----
    end
  
+   # REMARK
+   def remark
+     field_fetch('REMARK')
+   end
+ 
    # GLYCAN
    def glycans


From k at dev.open-bio.org  Wed Nov 28 01:34:35 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Wed, 28 Nov 2007 06:34:35 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.21,1.22
Message-ID: <200711280634.lAS6YZ9i023050@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv23044

Modified Files:
	pubmed.rb 
Log Message:
* all class methods are changed to instance methods (class methods are
  still remained for the backward compatibility)


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** pubmed.rb	20 Nov 2007 15:22:03 -0000	1.21
--- pubmed.rb	28 Nov 2007 06:34:33 -0000	1.22
***************
*** 79,83 ****
    @@last_access = nil
  
!   def self.ncbi_access_wait(wait = NCBI_INTERVAL)
      if @@last_access
        duration = Time.now - @@last_access
--- 79,85 ----
    @@last_access = nil
  
!   private
! 
!   def ncbi_access_wait(wait = NCBI_INTERVAL)
      if @@last_access
        duration = Time.now - @@last_access
***************
*** 89,92 ****
--- 91,96 ----
    end
  
+   public
+ 
    # Search the PubMed database by given keywords using E-Utils and returns 
    # an array of PubMed IDs.
***************
*** 107,111 ****
    # * _rettype_
    # *Returns*:: array of PubMed IDs or a number of results
!   def self.esearch(str, hash = {})
      return nil if str.empty?
  
--- 111,115 ----
    # * _rettype_
    # *Returns*:: array of PubMed IDs or a number of results
!   def esearch(str, hash = {})
      return nil if str.empty?
  
***************
*** 119,123 ****
      opts.update(hash)
  
!     self.ncbi_access_wait
  
      response, = Bio::Command.post_form(serv, opts)
--- 123,127 ----
      opts.update(hash)
  
!     ncbi_access_wait
  
      response, = Bio::Command.post_form(serv, opts)
***************
*** 139,143 ****
    # * _ids_: list of PubMed IDs (required)
    # *Returns*:: Array of MEDLINE formatted String
!   def self.efetch(ids, hash = {})
      return nil if ids.to_s.empty?
      ids = ids.join(",") if ids === Array
--- 143,147 ----
    # * _ids_: list of PubMed IDs (required)
    # *Returns*:: Array of MEDLINE formatted String
!   def efetch(ids, hash = {})
      return nil if ids.to_s.empty?
      ids = ids.join(",") if ids === Array
***************
*** 153,157 ****
      opts.update(hash)
  
!     self.ncbi_access_wait
  
      response, = Bio::Command.post_form(serv, opts)
--- 157,161 ----
      opts.update(hash)
  
!     ncbi_access_wait
  
      response, = Bio::Command.post_form(serv, opts)
***************
*** 170,177 ****
    # * _id_: query string (required)
    # *Returns*:: array of PubMed IDs
!   def self.search(str)
      host = "www.ncbi.nlm.nih.gov"
      path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
  
      http = Bio::Command.new_http(host)
      response, = http.get(path + CGI.escape(str))
--- 174,183 ----
    # * _id_: query string (required)
    # *Returns*:: array of PubMed IDs
!   def search(str)
      host = "www.ncbi.nlm.nih.gov"
      path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
  
+     ncbi_access_wait
+ 
      http = Bio::Command.new_http(host)
      response, = http.get(path + CGI.escape(str))
***************
*** 187,196 ****
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def self.query(*ids)
      host = "www.ncbi.nlm.nih.gov"
      path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
- 
      list = ids.join(",")
  
      http = Bio::Command.new_http(host)
      response, = http.get(path + list)
--- 193,203 ----
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def query(*ids)
      host = "www.ncbi.nlm.nih.gov"
      path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
      list = ids.join(",")
  
+     ncbi_access_wait
+ 
      http = Bio::Command.new_http(host)
      response, = http.get(path + list)
***************
*** 216,223 ****
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def self.pmfetch(id)
      host = "www.ncbi.nlm.nih.gov"
      path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
  
      http = Bio::Command.new_http(host)
      response, = http.get(path + id.to_s)
--- 223,232 ----
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def pmfetch(id)
      host = "www.ncbi.nlm.nih.gov"
      path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
  
+     ncbi_access_wait
+ 
      http = Bio::Command.new_http(host)
      response, = http.get(path + id.to_s)
***************
*** 231,234 ****
--- 240,263 ----
    end
  
+   def self.esearch(*args)
+     self.new.esearch(*args)
+   end
+ 
+   def self.efetch(*args)
+     self.new.efetch(*args)
+   end
+ 
+   def self.search(*args)
+     self.new.search(*args)
+   end
+ 
+   def self.query(*args)
+     self.new.query(*args)
+   end
+ 
+   def self.pmfetch(*args)
+     self.new.pmfetch(*args)
+   end
+ 
  end # PubMed
  
***************
*** 238,241 ****
--- 267,316 ----
  if __FILE__ == $0
  
+   puts "=== instance methods ==="
+ 
+   pubmed = Bio::PubMed.new
+ 
+   puts "--- Search PubMed by E-Utils ---"
+   opts = {"rettype" => "count"}
+   puts Time.now
+   puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+   puts Time.now
+   puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+   puts Time.now
+   puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+   puts Time.now
+   pubmed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+     puts x
+   end
+ 
+   puts "--- Retrieve PubMed entry by E-Utils ---"
+   puts Time.now
+   puts pubmed.efetch(16381885)
+   puts Time.now
+   puts pubmed.efetch("16381885")
+   puts Time.now
+   puts pubmed.efetch("16381885")
+   puts Time.now
+   opts = {"retmode" => "xml"}
+   puts pubmed.efetch([10592173, 14693808], opts)
+   puts Time.now
+   puts pubmed.efetch(["10592173", "14693808"], opts)
+ 
+   puts "--- Search PubMed by Entrez CGI ---"
+   pubmed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+     p x
+   end
+ 
+   puts "--- Retrieve PubMed entry by Entrez CGI ---"
+   puts pubmed.query("16381885")
+ 
+ 
+   puts "--- Retrieve PubMed entry by PMfetch ---"
+   puts pubmed.pmfetch("16381885")
+ 
+ 
+   puts "=== class methods ==="
+ 
+ 
    puts "--- Search PubMed by E-Utils ---"
    opts = {"rettype" => "count"}


From aerts at dev.open-bio.org  Sun Nov  4 11:51:01 2007
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Sun, 04 Nov 2007 11:51:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.16,1.17
Message-ID: <200711041151.lA4Bp1lq007763@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv7743

Modified Files:
	pubmed.rb 
Log Message:
Fixed bug #11736: change to pubmed interface (reported by Masahide Kikkawa)


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** pubmed.rb	5 Apr 2007 23:35:41 -0000	1.16
--- pubmed.rb	4 Nov 2007 11:50:59 -0000	1.17
***************
*** 75,80 ****
    # *Returns*:: array of PubMed IDs
    def self.search(str)
!     host = "www.ncbi.nlm.nih.gov"
!     path = "/entrez/query.fcgi?tool=bioruby&cmd=Search&doptcmdl=MEDLINE&db=PubMed&term="
  
      http = Bio::Command.new_http(host)
--- 75,80 ----
    # *Returns*:: array of PubMed IDs
    def self.search(str)
!     host = 'www.ncbi.nlm.nih.gov'
!     path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
  
      http = Bio::Command.new_http(host)



From k at dev.open-bio.org  Sat Nov 10 08:21:56 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sat, 10 Nov 2007 08:21:56 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.17,1.18
Message-ID: <200711100821.lAA8LunA021453@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv21448

Modified Files:
	pubmed.rb 
Log Message:
* search, query is fixed to use new NCBI URI (previous fix was wrong and
  insufficient).
* esearch is enhanced to accept hash['rettype'] == "count" as suggested
  by Kaustubh Patil


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -d -r1.17 -r1.18
*** pubmed.rb	4 Nov 2007 11:50:59 -0000	1.17
--- pubmed.rb	10 Nov 2007 08:21:54 -0000	1.18
***************
*** 19,34 ****
  # The Bio::PubMed class provides several ways to retrieve bibliographic
  # information from the PubMed database at
! # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
! # types of queries are possible:
  #
  # * searching for PubMed IDs given a query string:
! #   * Bio::PubMed#search
! #   * Bio::PubMed#esearch
  #
  # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
  #   given a PubMed ID
! #   * Bio::PubMed#query
! #   * Bio::PubMed#pmfetch
! #   * Bio::PubMed#efetch
  #
  # The different methods within the same group are interchangeable and should
--- 19,35 ----
  # The Bio::PubMed class provides several ways to retrieve bibliographic
  # information from the PubMed database at
! #   http://www.ncbi.nlm.nih.gov/sites/entrez?db=PubMed
! #
! # Basically, two types of queries are possible:
  #
  # * searching for PubMed IDs given a query string:
! #   * Bio::PubMed#esearch  (recommended)
! #   * Bio::PubMed#search   (only retrieves top 20 hits)
  #
  # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...)
  #   given a PubMed ID
! #   * Bio::PubMed#efetch   (recommended)
! #   * Bio::PubMed#query    (unstable for the change of the HTML design)
! #   * Bio::PubMed#pmfetch  (still working but could be obsoleted by NCBI)
  #
  # The different methods within the same group are interchangeable and should
***************
*** 38,48 ****
  # APIs can be found on the following websites:
  #
! # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
! # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
! # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
! # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
! # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
! # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
! # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
  #
  # == Usage
--- 39,50 ----
  # APIs can be found on the following websites:
  #
! # * PubMed Overview:
! #     http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
! # * PubMed help:
! #     http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html
! # * Entrez utilities index:
! #      http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
! # * How to link:
! #     http://www.ncbi.nlm.nih.gov/books/bv.fcgi?rid=helplinks.chapter.linkshelp
  #
  # == Usage
***************
*** 51,89 ****
  #
  #   # If you don't know the pubmed ID:
! #   Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
  #     p x
  #   end
! #   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
  #     p x
  #   end
  #   
  #   # To retrieve the MEDLINE entry for a given PubMed ID:
  #   puts Bio::PubMed.query("10592173")
  #   puts Bio::PubMed.pmfetch("10592173")
! #   puts Bio::PubMed.efetch("10592173", "14693808")
  #   # This can be converted into a Bio::MEDLINE object:
  #   manuscript = Bio::PubMed.query("10592173")
! #   medline = Bio::MEDLINE(manuscript)
  #  
  class PubMed
  
-   # Search the PubMed database by given keywords using entrez query and returns
-   # an array of PubMed IDs.
-   # ---
-   # *Arguments*:
-   # * _id_: query string (required)
-   # *Returns*:: array of PubMed IDs
-   def self.search(str)
-     host = 'www.ncbi.nlm.nih.gov'
-     path = "sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
- 
-     http = Bio::Command.new_http(host)
-     response, = http.get(path + CGI.escape(str))
-     result = response.body
-     result = result.gsub("\r", "\n").squeeze("\n")
-     result = result.scan(/
(.*?)<\/pre>/m).flatten
-     return result
-   end
- 
    # Search the PubMed database by given keywords using E-Utils and returns 
    # an array of PubMed IDs.
--- 53,75 ----
  #
  #   # If you don't know the pubmed ID:
! #   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
  #     p x
  #   end
! #
! #   Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
  #     p x
  #   end
  #   
  #   # To retrieve the MEDLINE entry for a given PubMed ID:
+ #   puts Bio::PubMed.efetch("10592173", "14693808")
  #   puts Bio::PubMed.query("10592173")
  #   puts Bio::PubMed.pmfetch("10592173")
! #
  #   # This can be converted into a Bio::MEDLINE object:
  #   manuscript = Bio::PubMed.query("10592173")
! #   medline = Bio::MEDLINE.new(manuscript)
  #  
  class PubMed
  
    # Search the PubMed database by given keywords using E-Utils and returns 
    # an array of PubMed IDs.
***************
*** 103,107 ****
    # * _retmode_
    # * _rettype_
!   # *Returns*:: array of PubMed IDs
    def self.esearch(str, hash = {})
      hash['retmax'] = 100 unless hash['retmax']
--- 89,93 ----
    # * _retmode_
    # * _rettype_
!   # *Returns*:: array of PubMed IDs or a number of results
    def self.esearch(str, hash = {})
      hash['retmax'] = 100 unless hash['retmax']
***************
*** 118,122 ****
      response, = http.get(path + CGI.escape(str))
      result = response.body
!     result = result.scan(/(.*?)<\/Id>/m).flatten
      return result
    end
--- 104,154 ----
      response, = http.get(path + CGI.escape(str))
      result = response.body
!     if hash['rettype'] == 'count'
!       result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
!     else
!       result = result.scan(/(.*?)<\/Id>/m).flatten
!     end
!     return result
!   end
! 
!   # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
!   # entrez efetch. Multiple PubMed IDs can be provided:
!   #   Bio::PubMed.efetch(123)
!   #   Bio::PubMed.efetch(123,456,789)
!   #   Bio::PubMed.efetch([123,456,789])
!   # ---
!   # *Arguments*:
!   # * _ids_: list of PubMed IDs (required)
!   # *Returns*:: MEDLINE formatted String
!   def self.efetch(*ids)
!     return [] if ids.empty?
! 
!     host = "eutils.ncbi.nlm.nih.gov"
!     path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
! 
!     list = ids.join(",")
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + list)
!     result = response.body
!     result = result.split(/\n\n+/)
!     return result
!   end
! 
!   # Search the PubMed database by given keywords using entrez query and returns
!   # an array of PubMed IDs. Caution: this method returns the first 20 hits only.
!   # Instead, use of the 'esearch' method is strongly recomended.
!   # ---
!   # *Arguments*:
!   # * _id_: query string (required)
!   # *Returns*:: array of PubMed IDs
!   def self.search(str)
!     host = "www.ncbi.nlm.nih.gov"
!     path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + CGI.escape(str))
!     result = response.body
!     result = result.scan(/value="(\d+)" id="UidCheckBox"/m).flatten
      return result
    end
***************
*** 128,143 ****
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def self.query(id)
      host = "www.ncbi.nlm.nih.gov"
!     path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
  
      http = Bio::Command.new_http(host)
!     response, = http.get(path + id.to_s)
      result = response.body
!     if result =~ /#{id}\s+Error/
        raise( result )
      else
!       result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
!       return result
      end
    end
--- 160,183 ----
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def self.query(*ids)
      host = "www.ncbi.nlm.nih.gov"
!     path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
! 
!     list = ids.join(",")
  
      http = Bio::Command.new_http(host)
!     response, = http.get(path + list)
      result = response.body
!     result = result.scan(/
\s*(.*?)<\/pre>/m).flatten
! 
!     if result =~ /id:.*Error occurred/
!       # id: xxxxx Error occurred: Article does not exist
        raise( result )
      else
!       if ids.size > 1
!         return result
!       else
!         return result.first
!       end
      end
    end
***************
*** 164,191 ****
    end
  
-   # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
-   # entrez efetch. Multiple PubMed IDs can be provided:
-   #   Bio::PubMed.efetch(123)
-   #   Bio::PubMed.efetch(123,456,789)
-   #   Bio::PubMed.efetch([123,456,789])
-   # ---
-   # *Arguments*:
-   # * _ids_: list of PubMed IDs (required)
-   # *Returns*:: MEDLINE formatted String
-   def self.efetch(*ids)
-     return [] if ids.empty?
- 
-     host = "eutils.ncbi.nlm.nih.gov"
-     path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
- 
-     ids = ids.join(",")
- 
-     http = Bio::Command.new_http(host)
-     response, = http.get(path + ids)
-     result = response.body
-     result = result.split(/\n\n+/)
-     return result
-   end
- 
  end # PubMed
  
--- 204,207 ----
***************
*** 195,211 ****
  if __FILE__ == $0
  
!   puts Bio::PubMed.query("10592173")
!   puts "--- ---"
!   puts Bio::PubMed.pmfetch("10592173")
!   puts "--- ---"
!   Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
!     p x
!   end
!   puts "--- ---"
    Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
      p x
    end
!   puts "--- ---"
    puts Bio::PubMed.efetch("10592173", "14693808")
  
  end
--- 211,233 ----
  if __FILE__ == $0
  
!   puts "--- Search PubMed by E-Utils ---"
    Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
      p x
    end
! 
!   puts "--- Retrieve PubMed entry by E-Utils ---"
    puts Bio::PubMed.efetch("10592173", "14693808")
  
+   puts "--- Search PubMed by Entrez CGI ---"
+   Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+     p x
+   end
+ 
+   puts "--- Retrieve PubMed entry by Entrez CGI ---"
+   puts Bio::PubMed.query("10592173")
+ 
+ 
+   puts "--- Retrieve PubMed entry by PMfetch ---"
+   puts Bio::PubMed.pmfetch("10592173")
+ 
  end



From k at dev.open-bio.org  Sat Nov 10 08:28:52 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sat, 10 Nov 2007 08:28:52 +0000
Subject: [BioRuby-cvs] bioruby ChangeLog,1.68,1.69
Message-ID: <200711100828.lAA8Sq9g021475@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby
In directory dev.open-bio.org:/tmp/cvs-serv21471

Modified Files:
	ChangeLog 
Log Message:
* updated


Index: ChangeLog
===================================================================
RCS file: /home/repository/bioruby/bioruby/ChangeLog,v
retrieving revision 1.68
retrieving revision 1.69
diff -C2 -d -r1.68 -r1.69
*** ChangeLog	19 Jul 2007 04:08:47 -0000	1.68
--- ChangeLog	10 Nov 2007 08:28:50 -0000	1.69
***************
*** 1,2 ****
--- 1,9 ----
+ 2007-11-10  Toshiaki Katayama 
+ 
+ 	* lib/bio/io/pubmed.rb:
+ 
+ 	  Fixed search, query methods (but use of esearch and efetch is
+ 	  strongly recommended).
+ 
  2007-07-19  Toshiaki Katayama 
  
***************
*** 415,419 ****
  	  visual effects.
  
! 	* lib/bio/.rb
  
  	  Extended to have Bio.command where command can be any BioRuby
--- 422,426 ----
  	  visual effects.
  
! 	* lib/bio.rb
  
  	  Extended to have Bio.command where command can be any BioRuby



From nakao at dev.open-bio.org  Sat Nov 10 16:57:45 2007
From: nakao at dev.open-bio.org (Mitsuteru C. Nakao)
Date: Sat, 10 Nov 2007 16:57:45 +0000
Subject: [BioRuby-cvs] bioruby/test/functional/bio/io test_ensembl.rb, 1.4,
	1.5
Message-ID: <200711101657.lAAGvjCP022677@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/test/functional/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv22657/test/functional/bio/io

Modified Files:
	test_ensembl.rb 
Log Message:
* Updated some expected values of test_gff_exportview*.


Index: test_ensembl.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/test/functional/bio/io/test_ensembl.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** test_ensembl.rb	5 Apr 2007 23:35:42 -0000	1.4
--- test_ensembl.rb	10 Nov 2007 16:57:43 -0000	1.5
***************
*** 74,78 ****
  
     def test_gff_exportview
!      line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"
       gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
       assert_equal(line, gff)
--- 74,95 ----
  
     def test_gff_exportview
!      line = ["chromosome:NCBI36:4:1149206:1149209:1", 
!              "Ensembl", 
!              "Gene", 
!              "-839",
!              "2747", 
!              ".", 
!              "+",
!              ".",
!              "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"].join("\t")  + "\n"
!      line = ["4", 
!              "Ensembl", 
!              "Gene", 
!              "1148366", 
!              "1151952", 
!              ".", 
!              "+", 
!              "1", 
!              "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
       gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
       assert_equal(line, gff)
***************
*** 80,84 ****
  
     def test_gff_exportview_with_named_args
!      line = "chromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tgene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"
       gff = @serv.exportview(:seq_region_name => 4,
                              :anchor1 => 1149206,
--- 97,118 ----
  
     def test_gff_exportview_with_named_args
!      line = ["chromosome:NCBI36:4:1149206:1149209:1",
!              "Ensembl",
!              "Gene",
!              "-839",
!              "2747",
!              ".",
!              "+",
!              ".",
!              "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
!      line = ["4", 
!              "Ensembl", 
!              "Gene", 
!              "1148366", 
!              "1151952", 
!              ".", 
!              "+", 
!              "1", 
!              "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
       gff = @serv.exportview(:seq_region_name => 4,
                              :anchor1 => 1149206,
***************
*** 89,93 ****
  
     def test_tab_exportview_with_named_args
!      line = "seqname\tsource\tfeature\tstart\tend\tscore\tstrand\tframe\tgene_id\ttranscript_id\texon_id\tgene_type\nchromosome:NCBI36:4:1149206:1149209:1\tEnsembl\tGene\t-839\t2747\t.\t+\t.\tENSG00000206158\tENST00000382964\tENSE00001494097\tKNOWN_protein_coding\n"
       gff = @serv.exportview(:seq_region_name => 4,
                              :anchor1 => 1149206,
--- 123,176 ----
  
     def test_tab_exportview_with_named_args
!      line = [["seqname",
!              "source",
!              "feature",
!              "start",
!              "end",
!              "score",
!              "strand",
!              "frame",
!              "gene_id",
!              "transcript_id",
!              "exon_id",
!              "gene_type"].join("\t"),
!              ["chromosome:NCBI36:4:1149206:1149209:1",
!               "Ensembl",
!               "Gene",
!               "-839",
!               "2747",
!               ".",
!               "+",
!               ".",
!               "ENSG00000206158",
!               "ENST00000382964",
!               "ENSE00001494097",
!               "KNOWN_protein_coding"].join("\t") + "\n"
!      ].join("\n")
!      line = [["seqname",
!              "source",
!              "feature",
!              "start",
!              "end",
!              "score",
!              "strand",
!              "frame",
!              "gene_id",
!              "transcript_id",
!              "exon_id",
!              "gene_type"].join("\t"),
!              ["4",
!               "Ensembl",
!               "Gene",
!               "1148366",
!               "1151952",
!               ".",
!               "+",
!               "1",
!               "ENSG00000206158",
!               "ENST00000382964",
!               "ENSE00001494097",
!               "KNOWN_protein_coding"].join("\t") + "\n"
!      ].join("\n")
       gff = @serv.exportview(:seq_region_name => 4,
                              :anchor1 => 1149206,



From k at dev.open-bio.org  Thu Nov 15 07:07:18 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:07:18 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.60,1.61
Message-ID: <200711150707.lAF77IWZ006676@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6671/io

Modified Files:
	flatfile.rb 
Log Message:
* the first line of the MEDLINE entry is changed from UI to PMID


Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.60
retrieving revision 1.61
diff -C2 -d -r1.60 -r1.61
*** flatfile.rb	9 Jul 2007 14:08:34 -0000	1.60
--- flatfile.rb	15 Nov 2007 07:07:16 -0000	1.61
***************
*** 1131,1135 ****
              /^LOCUS       .+ aa .+/ ],
            medline  = RuleRegexp[ 'Bio::MEDLINE',
!             /^UI  \- [0-9]+$/ ],
            embl     = RuleRegexp[ 'Bio::EMBL',
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
--- 1131,1135 ----
              /^LOCUS       .+ aa .+/ ],
            medline  = RuleRegexp[ 'Bio::MEDLINE',
!             /^PMID\- [0-9]+$/ ],
            embl     = RuleRegexp[ 'Bio::EMBL',
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],



From k at dev.open-bio.org  Thu Nov 15 07:08:51 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:08:51 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/shell interface.rb,1.18,1.19
Message-ID: <200711150708.lAF78prq006727@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/shell
In directory dev.open-bio.org:/tmp/cvs-serv6723/shell

Modified Files:
	interface.rb 
Log Message:
* fixed that savefile("hoge", obj) created "datahoge" file instead of "data/hoge"


Index: interface.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/interface.rb,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** interface.rb	26 Jun 2007 08:38:38 -0000	1.18
--- interface.rb	15 Nov 2007 07:08:49 -0000	1.19
***************
*** 153,157 ****
      message = "Save file '#{file}' in '#{datadir}' directory? [y/n] "
      if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
!       file = datadir + file
      end
      if File.exists?(file)
--- 153,157 ----
      message = "Save file '#{file}' in '#{datadir}' directory? [y/n] "
      if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
!       file = File.join(datadir, file)
      end
      if File.exists?(file)



From k at dev.open-bio.org  Thu Nov 15 07:23:41 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:23:41 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.18,1.19
Message-ID: <200711150723.lAF7Nfkd006749@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6745/io

Modified Files:
	pubmed.rb 
Log Message:
* esearch2, efetch2: candidates for the better replacement of esearch and efetch methods which are enchanced to accept options as a hash and utilize Bio::Command.post_form for the options


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** pubmed.rb	10 Nov 2007 08:21:54 -0000	1.18
--- pubmed.rb	15 Nov 2007 07:23:39 -0000	1.19
***************
*** 9,15 ****
  #
  
- require 'net/http'
- require 'cgi' unless defined?(CGI)
  require 'bio/command'
  
  module Bio
--- 9,14 ----
  #
  
  require 'bio/command'
+ require 'cgi' unless defined?(CGI)
  
  module Bio
***************
*** 112,115 ****
--- 111,134 ----
    end
  
+   def self.esearch2(str, hash = {})
+     serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+     opts = {
+       "retmax" => 100,
+       "tool"   => "bioruby",
+       "db"     => "pubmed",
+       "term"   => str
+     }
+     opts.update(hash)
+ 
+     response, = Bio::Command.post_form(serv, opts)
+     result = response.body
+     if opts['rettype'] == 'count'
+       result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
+     else
+       result = result.scan(/(.*?)<\/Id>/m).flatten
+     end
+     return result
+   end
+ 
    # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
    # entrez efetch. Multiple PubMed IDs can be provided:
***************
*** 132,136 ****
      response, = http.get(path + list)
      result = response.body
!     result = result.split(/\n\n+/)
      return result
    end
--- 151,173 ----
      response, = http.get(path + list)
      result = response.body
!     return result
!   end
! 
!   def self.efetch2(ids, hash = {})
!     return "" if ids.empty?
!     ids = ids.join(",") if ids === Array
! 
!     serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
!     opts = {
!       "tool"     => "bioruby",
!       "db"       => "pubmed",
!       "retmode"  => "text",
!       "rettype"  => "medline",
!       "id"       => ids,
!     }
!     opts.update(hash)
! 
!     response, = Bio::Command.post_form(serv, opts)
!     result = response.body
      return result
    end
***************
*** 212,216 ****
  
    puts "--- Search PubMed by E-Utils ---"
!   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
      p x
    end
--- 249,255 ----
  
    puts "--- Search PubMed by E-Utils ---"
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
! 
!   Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
      p x
    end
***************
*** 218,221 ****
--- 257,261 ----
    puts "--- Retrieve PubMed entry by E-Utils ---"
    puts Bio::PubMed.efetch("10592173", "14693808")
+   puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
  
    puts "--- Search PubMed by Entrez CGI ---"



From k at dev.open-bio.org  Thu Nov 15 07:40:29 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Thu, 15 Nov 2007 07:40:29 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.19,1.20
Message-ID: <200711150740.lAF7eTZQ006794@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv6790

Modified Files:
	pubmed.rb 
Log Message:
* get back to split multiple MEDLINE entries into array when not in XML mode


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** pubmed.rb	15 Nov 2007 07:23:39 -0000	1.19
--- pubmed.rb	15 Nov 2007 07:40:27 -0000	1.20
***************
*** 151,154 ****
--- 151,155 ----
      response, = http.get(path + list)
      result = response.body
+     result = result.split(/\n\n+/)
      return result
    end
***************
*** 170,173 ****
--- 171,178 ----
      response, = Bio::Command.post_form(serv, opts)
      result = response.body
+     if opts["retmode"] == "text"
+       result = result.split(/\n\n+/)
+     end
+ 
      return result
    end



From k at dev.open-bio.org  Tue Nov 20 15:22:05 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Tue, 20 Nov 2007 15:22:05 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.20,1.21
Message-ID: <200711201522.lAKFM5vl026044@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv26040

Modified Files:
	pubmed.rb 
Log Message:
* ncbi_access_wait is introduced to wait for 3 seconds for consequent queries
* esearch2 and efetch2 methods are renamed to esearch and efetch


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -d -r1.20 -r1.21
*** pubmed.rb	15 Nov 2007 07:40:27 -0000	1.20
--- pubmed.rb	20 Nov 2007 15:22:03 -0000	1.21
***************
*** 2,6 ****
  # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
  #
! # Copyright::  Copyright (C) 2001 Toshiaki Katayama 
  # Copyright::  Copyright (C) 2006 Jan Aerts 
  # License::    The Ruby License
--- 2,6 ----
  # = bio/io/pubmed.rb - NCBI Entrez/PubMed client module
  #
! # Copyright::  Copyright (C) 2001, 2007 Toshiaki Katayama 
  # Copyright::  Copyright (C) 2006 Jan Aerts 
  # License::    The Ruby License
***************
*** 71,74 ****
--- 71,92 ----
  class PubMed
  
+   # Run retrieval scripts on weekends or between 9 pm and 5 am Eastern Time
+   # weekdays for any series of more than 100 requests.
+   # -> Not implemented yet in BioRuby
+ 
+   # Make no more than one request every 3 seconds.
+   NCBI_INTERVAL = 3
+   @@last_access = nil
+ 
+   def self.ncbi_access_wait(wait = NCBI_INTERVAL)
+     if @@last_access
+       duration = Time.now - @@last_access
+       if wait > duration
+         sleep wait - duration
+       end
+     end
+     @@last_access = Time.now
+   end
+ 
    # Search the PubMed database by given keywords using E-Utils and returns 
    # an array of PubMed IDs.
***************
*** 90,115 ****
    # *Returns*:: array of PubMed IDs or a number of results
    def self.esearch(str, hash = {})
!     hash['retmax'] = 100 unless hash['retmax']
! 
!     opts = []
!     hash.each do |k, v|
!       opts << "#{k}=#{v}"
!     end
! 
!     host = "eutils.ncbi.nlm.nih.gov"
!     path = "/entrez/eutils/esearch.fcgi?tool=bioruby&db=pubmed&#{opts.join('&')}&term="
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + CGI.escape(str))
!     result = response.body
!     if hash['rettype'] == 'count'
!       result = result.scan(/(.*?)<\/Count>/m).flatten.first.to_i
!     else
!       result = result.scan(/(.*?)<\/Id>/m).flatten
!     end
!     return result
!   end
  
-   def self.esearch2(str, hash = {})
      serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
      opts = {
--- 108,113 ----
    # *Returns*:: array of PubMed IDs or a number of results
    def self.esearch(str, hash = {})
!     return nil if str.empty?
  
      serv = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
      opts = {
***************
*** 121,124 ****
--- 119,124 ----
      opts.update(hash)
  
+     self.ncbi_access_wait
+ 
      response, = Bio::Command.post_form(serv, opts)
      result = response.body
***************
*** 134,160 ****
    # entrez efetch. Multiple PubMed IDs can be provided:
    #   Bio::PubMed.efetch(123)
-   #   Bio::PubMed.efetch(123,456,789)
    #   Bio::PubMed.efetch([123,456,789])
    # ---
    # *Arguments*:
    # * _ids_: list of PubMed IDs (required)
!   # *Returns*:: MEDLINE formatted String
!   def self.efetch(*ids)
!     return [] if ids.empty?
! 
!     host = "eutils.ncbi.nlm.nih.gov"
!     path = "/entrez/eutils/efetch.fcgi?tool=bioruby&db=pubmed&retmode=text&rettype=medline&id="
! 
!     list = ids.join(",")
! 
!     http = Bio::Command.new_http(host)
!     response, = http.get(path + list)
!     result = response.body
!     result = result.split(/\n\n+/)
!     return result
!   end
! 
!   def self.efetch2(ids, hash = {})
!     return "" if ids.empty?
      ids = ids.join(",") if ids === Array
  
--- 134,144 ----
    # entrez efetch. Multiple PubMed IDs can be provided:
    #   Bio::PubMed.efetch(123)
    #   Bio::PubMed.efetch([123,456,789])
    # ---
    # *Arguments*:
    # * _ids_: list of PubMed IDs (required)
!   # *Returns*:: Array of MEDLINE formatted String
!   def self.efetch(ids, hash = {})
!     return nil if ids.to_s.empty?
      ids = ids.join(",") if ids === Array
  
***************
*** 169,172 ****
--- 153,158 ----
      opts.update(hash)
  
+     self.ncbi_access_wait
+ 
      response, = Bio::Command.post_form(serv, opts)
      result = response.body
***************
*** 174,178 ****
        result = result.split(/\n\n+/)
      end
- 
      return result
    end
--- 160,163 ----
***************
*** 254,266 ****
  
    puts "--- Search PubMed by E-Utils ---"
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", {"rettype" => "count"})
! 
!   Bio::PubMed.esearch2("(genome AND analysis) OR bioinformatics)").each do |x|
!     p x
    end
  
    puts "--- Retrieve PubMed entry by E-Utils ---"
!   puts Bio::PubMed.efetch("10592173", "14693808")
!   puts Bio::PubMed.efetch2(["10592173", "14693808"], {"retmode" => "xml"})
  
    puts "--- Search PubMed by Entrez CGI ---"
--- 239,266 ----
  
    puts "--- Search PubMed by E-Utils ---"
!   opts = {"rettype" => "count"}
!   puts Time.now
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
!   puts Time.now
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
!   puts Time.now
!   puts Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)", opts)
!   puts Time.now
!   Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
!     puts x
    end
  
    puts "--- Retrieve PubMed entry by E-Utils ---"
!   puts Time.now
!   puts Bio::PubMed.efetch(16381885)
!   puts Time.now
!   puts Bio::PubMed.efetch("16381885")
!   puts Time.now
!   puts Bio::PubMed.efetch("16381885")
!   puts Time.now
!   opts = {"retmode" => "xml"}
!   puts Bio::PubMed.efetch([10592173, 14693808], opts)
!   puts Time.now
!   puts Bio::PubMed.efetch(["10592173", "14693808"], opts)
  
    puts "--- Search PubMed by Entrez CGI ---"
***************
*** 270,278 ****
  
    puts "--- Retrieve PubMed entry by Entrez CGI ---"
!   puts Bio::PubMed.query("10592173")
  
  
    puts "--- Retrieve PubMed entry by PMfetch ---"
!   puts Bio::PubMed.pmfetch("10592173")
  
  end
--- 270,278 ----
  
    puts "--- Retrieve PubMed entry by Entrez CGI ---"
!   puts Bio::PubMed.query("16381885")
  
  
    puts "--- Retrieve PubMed entry by PMfetch ---"
!   puts Bio::PubMed.pmfetch("16381885")
  
  end



From k at dev.open-bio.org  Tue Nov 27 07:09:45 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Tue, 27 Nov 2007 07:09:45 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/db/kegg compound.rb,0.16,0.17
Message-ID: <200711270709.lAR79jPi020625@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/db/kegg
In directory dev.open-bio.org:/tmp/cvs-serv20621

Modified Files:
	compound.rb 
Log Message:
* remark method is added


Index: compound.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/kegg/compound.rb,v
retrieving revision 0.16
retrieving revision 0.17
diff -C2 -d -r0.16 -r0.17
*** compound.rb	28 Jun 2007 11:27:24 -0000	0.16
--- compound.rb	27 Nov 2007 07:09:43 -0000	0.17
***************
*** 46,49 ****
--- 46,54 ----
    end
  
+   # REMARK
+   def remark
+     field_fetch('REMARK')
+   end
+ 
    # GLYCAN
    def glycans



From k at dev.open-bio.org  Wed Nov 28 06:34:35 2007
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Wed, 28 Nov 2007 06:34:35 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb,1.21,1.22
Message-ID: <200711280634.lAS6YZ9i023050@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv23044

Modified Files:
	pubmed.rb 
Log Message:
* all class methods are changed to instance methods (class methods are
  still remained for the backward compatibility)


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** pubmed.rb	20 Nov 2007 15:22:03 -0000	1.21
--- pubmed.rb	28 Nov 2007 06:34:33 -0000	1.22
***************
*** 79,83 ****
    @@last_access = nil
  
!   def self.ncbi_access_wait(wait = NCBI_INTERVAL)
      if @@last_access
        duration = Time.now - @@last_access
--- 79,85 ----
    @@last_access = nil
  
!   private
! 
!   def ncbi_access_wait(wait = NCBI_INTERVAL)
      if @@last_access
        duration = Time.now - @@last_access
***************
*** 89,92 ****
--- 91,96 ----
    end
  
+   public
+ 
    # Search the PubMed database by given keywords using E-Utils and returns 
    # an array of PubMed IDs.
***************
*** 107,111 ****
    # * _rettype_
    # *Returns*:: array of PubMed IDs or a number of results
!   def self.esearch(str, hash = {})
      return nil if str.empty?
  
--- 111,115 ----
    # * _rettype_
    # *Returns*:: array of PubMed IDs or a number of results
!   def esearch(str, hash = {})
      return nil if str.empty?
  
***************
*** 119,123 ****
      opts.update(hash)
  
!     self.ncbi_access_wait
  
      response, = Bio::Command.post_form(serv, opts)
--- 123,127 ----
      opts.update(hash)
  
!     ncbi_access_wait
  
      response, = Bio::Command.post_form(serv, opts)
***************
*** 139,143 ****
    # * _ids_: list of PubMed IDs (required)
    # *Returns*:: Array of MEDLINE formatted String
!   def self.efetch(ids, hash = {})
      return nil if ids.to_s.empty?
      ids = ids.join(",") if ids === Array
--- 143,147 ----
    # * _ids_: list of PubMed IDs (required)
    # *Returns*:: Array of MEDLINE formatted String
!   def efetch(ids, hash = {})
      return nil if ids.to_s.empty?
      ids = ids.join(",") if ids === Array
***************
*** 153,157 ****
      opts.update(hash)
  
!     self.ncbi_access_wait
  
      response, = Bio::Command.post_form(serv, opts)
--- 157,161 ----
      opts.update(hash)
  
!     ncbi_access_wait
  
      response, = Bio::Command.post_form(serv, opts)
***************
*** 170,177 ****
    # * _id_: query string (required)
    # *Returns*:: array of PubMed IDs
!   def self.search(str)
      host = "www.ncbi.nlm.nih.gov"
      path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
  
      http = Bio::Command.new_http(host)
      response, = http.get(path + CGI.escape(str))
--- 174,183 ----
    # * _id_: query string (required)
    # *Returns*:: array of PubMed IDs
!   def search(str)
      host = "www.ncbi.nlm.nih.gov"
      path = "/sites/entrez?tool=bioruby&cmd=Search&doptcmdl=Brief&db=PubMed&term="
  
+     ncbi_access_wait
+ 
      http = Bio::Command.new_http(host)
      response, = http.get(path + CGI.escape(str))
***************
*** 187,196 ****
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def self.query(*ids)
      host = "www.ncbi.nlm.nih.gov"
      path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
- 
      list = ids.join(",")
  
      http = Bio::Command.new_http(host)
      response, = http.get(path + list)
--- 193,203 ----
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def query(*ids)
      host = "www.ncbi.nlm.nih.gov"
      path = "/sites/entrez?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
      list = ids.join(",")
  
+     ncbi_access_wait
+ 
      http = Bio::Command.new_http(host)
      response, = http.get(path + list)
***************
*** 216,223 ****
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def self.pmfetch(id)
      host = "www.ncbi.nlm.nih.gov"
      path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
  
      http = Bio::Command.new_http(host)
      response, = http.get(path + id.to_s)
--- 223,232 ----
    # * _id_: PubMed ID (required)
    # *Returns*:: MEDLINE formatted String
!   def pmfetch(id)
      host = "www.ncbi.nlm.nih.gov"
      path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
  
+     ncbi_access_wait
+ 
      http = Bio::Command.new_http(host)
      response, = http.get(path + id.to_s)
***************
*** 231,234 ****
--- 240,263 ----
    end
  
+   def self.esearch(*args)
+     self.new.esearch(*args)
+   end
+ 
+   def self.efetch(*args)
+     self.new.efetch(*args)
+   end
+ 
+   def self.search(*args)
+     self.new.search(*args)
+   end
+ 
+   def self.query(*args)
+     self.new.query(*args)
+   end
+ 
+   def self.pmfetch(*args)
+     self.new.pmfetch(*args)
+   end
+ 
  end # PubMed
  
***************
*** 238,241 ****
--- 267,316 ----
  if __FILE__ == $0
  
+   puts "=== instance methods ==="
+ 
+   pubmed = Bio::PubMed.new
+ 
+   puts "--- Search PubMed by E-Utils ---"
+   opts = {"rettype" => "count"}
+   puts Time.now
+   puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+   puts Time.now
+   puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+   puts Time.now
+   puts pubmed.esearch("(genome AND analysis) OR bioinformatics)", opts)
+   puts Time.now
+   pubmed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+     puts x
+   end
+ 
+   puts "--- Retrieve PubMed entry by E-Utils ---"
+   puts Time.now
+   puts pubmed.efetch(16381885)
+   puts Time.now
+   puts pubmed.efetch("16381885")
+   puts Time.now
+   puts pubmed.efetch("16381885")
+   puts Time.now
+   opts = {"retmode" => "xml"}
+   puts pubmed.efetch([10592173, 14693808], opts)
+   puts Time.now
+   puts pubmed.efetch(["10592173", "14693808"], opts)
+ 
+   puts "--- Search PubMed by Entrez CGI ---"
+   pubmed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+     p x
+   end
+ 
+   puts "--- Retrieve PubMed entry by Entrez CGI ---"
+   puts pubmed.query("16381885")
+ 
+ 
+   puts "--- Retrieve PubMed entry by PMfetch ---"
+   puts pubmed.pmfetch("16381885")
+ 
+ 
+   puts "=== class methods ==="
+ 
+ 
    puts "--- Search PubMed by E-Utils ---"
    opts = {"rettype" => "count"}