From ngoto at pub.open-bio.org  Fri Mar  3 03:18:51 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Fri, 03 Mar 2006 08:18:51 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.46,1.47
Message-ID: <200603030818.k238IpVL028555@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv28535/lib/bio/io

Modified Files:
	flatfile.rb 
Log Message:
* Removed duplicated initializing of @path in BufferedInputStream#initialize.
* Fiexed a bug that buffered input stream was nested.


Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.46
retrieving revision 1.47
diff -C2 -d -r1.46 -r1.47
*** flatfile.rb	22 Feb 2006 10:01:27 -0000	1.46
--- flatfile.rb	3 Mar 2006 08:18:49 -0000	1.47
***************
*** 35,39 ****
          # initialize prefetch buffer
          @buffer = ''
-         @path = path
        end
  
--- 35,38 ----
***************
*** 519,529 ****
      def initialize(dbclass, stream)
        # 2nd arg: IO object
!       if @stream.kind_of?(BufferedInputStream)
          @stream = stream
        else
          @stream = BufferedInputStream.for_io(stream)
        end
-       # default is raw mode
-       self.raw = false
        # 1st arg: database class (or file format autodetection)
        if dbclass then
--- 518,526 ----
      def initialize(dbclass, stream)
        # 2nd arg: IO object
!       if stream.kind_of?(BufferedInputStream)
          @stream = stream
        else
          @stream = BufferedInputStream.for_io(stream)
        end
        # 1st arg: database class (or file format autodetection)
        if dbclass then
***************
*** 535,538 ****
--- 532,537 ----
        @skip_leader_mode = :firsttime
        @firsttime_flag = true
+       # default raw mode is false
+       self.raw = false
      end
  
***************
*** 743,747 ****
            self.new(*arg)
          end
!           
          # Creates a new element.
          def initialize
--- 742,746 ----
            self.new(*arg)
          end
!         
          # Creates a new element.
          def initialize


From ngoto at pub.open-bio.org  Fri Mar  3 04:31:59 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Fri, 03 Mar 2006 09:31:59 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.47,1.48
Message-ID: <200603030931.k239VxVL029035@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv29020/lib/bio/io

Modified Files:
	flatfile.rb 
Log Message:
* added RulesArray class only for inspect
* changed constant (like Bio::GenBank) to String (like "Bio::GenBank")
  to avoid doing require almost all files when using autodetect


Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.47
retrieving revision 1.48
diff -C2 -d -r1.47 -r1.48
*** flatfile.rb	3 Mar 2006 08:18:49 -0000	1.47
--- flatfile.rb	3 Mar 2006 09:31:57 -0000	1.48
***************
*** 736,739 ****
--- 736,748 ----
        include TSort
  
+       # Array to store autodetection rules.
+       # This is defined only for inspect.
+       class RulesArray < Array
+         # visualize contents
+         def inspect
+           "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
+         end
+       end #class RulesArray
+ 
        # Template of a single rule of autodetection
        class RuleTemplate
***************
*** 745,754 ****
          # Creates a new element.
          def initialize
!           a = Array.new
!           def a.inspect
!             "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
!           end
!           @higher_priority_elements = a.clone
!           @lower_priority_elements  = a.clone
            @name = nil
          end
--- 754,759 ----
          # Creates a new element.
          def initialize
!           @higher_priority_elements = RulesArray.new
!           @lower_priority_elements  = RulesArray.new
            @name = nil
          end
***************
*** 784,787 ****
--- 789,810 ----
            nil
          end
+ 
+         private
+         # Gets constant from constant name given as a string.
+         def str2const(str)
+           const = Object
+           str.split(/\:\:/).each do |x|
+             const = const.const_get(x)
+           end
+           const
+         end
+ 
+         # Gets database class from given object.
+         # Current implementation is: 
+         # if _obj_ is kind of String, regarded as a constant.
+         # Otherwise, returns _obj_ as is.
+         def get_dbclass(obj)
+           obj.kind_of?(String) ? str2const(obj) : obj
+         end
        end #class Rule_Template
  
***************
*** 835,841 ****
            super()
            @re = re
-           @dbclass = dbclass
-           @dbclasses = [ dbclass ]
            @name = dbclass.to_s
          end
  
--- 858,878 ----
            super()
            @re = re
            @name = dbclass.to_s
+           @dbclass = nil
+           @dbclass_lazy = dbclass
+         end
+ 
+         # database class (lazy evaluation)
+         def dbclass
+           unless @dbclass
+             @dbclass = get_dbclass(@dbclass_lazy)
+           end
+           @dbclass
+         end
+         private :dbclass
+ 
+         # returns database classes
+         def dbclasses
+           [ dbclass ]
          end
  
***************
*** 844,872 ****
          # _meta_ is ignored.
          def guess(text, meta)
!           @re =~ text ? @dbclass : nil
          end
        end #class RuleRegexp
  
        # A autodetection rule to use more than two regular expressions.
!       class RuleRegexp2 < RuleTemplate
          # Creates a new instance.
          def initialize(dbclass, *regexps)
!           super()
            @regexps = regexps
-           @dbclass = dbclass
-           @dbclasses = [ dbclass ]
-           if name
-             @name = name
-           else
-             @name = @dbclass.to_s
-           end
          end
  
!         # If given text matches the regexp, returns the database class.
          # Otherwise, returns nil or false.
          # _meta_ is ignored.
          def guess(text, meta)
            @regexps.each do |re|
!             return @dbclass if re =~ text
            end
            nil
--- 881,904 ----
          # _meta_ is ignored.
          def guess(text, meta)
!           @re =~ text ? dbclass : nil
          end
        end #class RuleRegexp
  
        # A autodetection rule to use more than two regular expressions.
!       # If given string matches one of the regular expressions,
!       # returns the database class.
!       class RuleRegexp2 < RuleRegexp
          # Creates a new instance.
          def initialize(dbclass, *regexps)
!           super(dbclass, nil)
            @regexps = regexps
          end
  
!         # If given text matches one of the regexp, returns the database class.
          # Otherwise, returns nil or false.
          # _meta_ is ignored.
          def guess(text, meta)
            @regexps.each do |re|
!             return dbclass if re =~ text
            end
            nil
***************
*** 880,887 ****
            super()
            @proc = proc
!           @dbclasses = dbclasses
            @name = dbclasses.collect { |x| x.to_s }.join('|')
          end
  
          # If given text (and/or meta information) is known, returns
          # the database class.
--- 912,928 ----
            super()
            @proc = proc
!           @dbclasses = nil
!           @dbclasses_lazy = dbclasses
            @name = dbclasses.collect { |x| x.to_s }.join('|')
          end
  
+         # database classes (lazy evaluation)
+         def dbclasses
+           unless @dbclasses
+             @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
+           end
+           @dbclasses
+         end
+ 
          # If given text (and/or meta information) is known, returns
          # the database class.
***************
*** 1039,1058 ****
        def self.make_default
          a = self[
!           genbank  = RuleRegexp[ Bio::GenBank,
              /^LOCUS       .+ bp .*[a-z]*[DR]?NA/ ],
!           genpept  = RuleRegexp[ Bio::GenPept,
              /^LOCUS       .+ aa .+/ ],
!           medline  = RuleRegexp[ Bio::MEDLINE,
              /^UI  \- [0-9]+$/ ],
!           embl     = RuleRegexp[ Bio::EMBL,
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
!           sptr     = RuleRegexp[ Bio::SPTR,
              /^ID   .+\; *PRT\;/ ],
!           prosite  = RuleRegexp[ Bio::PROSITE,
              /^ID   [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
!           transfac = RuleRegexp[ Bio::TRANSFAC,
              /^AC  [-A-Za-z0-9_\.]+$/ ],
  
!           aaindex  = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
              if /^H [-A-Z0-9_\.]+$/ =~ text then
                if text =~ /^M [rc]/ then
--- 1080,1099 ----
        def self.make_default
          a = self[
!           genbank  = RuleRegexp[ 'Bio::GenBank',
              /^LOCUS       .+ bp .*[a-z]*[DR]?NA/ ],
!           genpept  = RuleRegexp[ 'Bio::GenPept',
              /^LOCUS       .+ aa .+/ ],
!           medline  = RuleRegexp[ 'Bio::MEDLINE',
              /^UI  \- [0-9]+$/ ],
!           embl     = RuleRegexp[ 'Bio::EMBL',
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
!           sptr     = RuleRegexp[ 'Bio::SPTR',
              /^ID   .+\; *PRT\;/ ],
!           prosite  = RuleRegexp[ 'Bio::PROSITE',
              /^ID   [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
!           transfac = RuleRegexp[ 'Bio::TRANSFAC',
              /^AC  [-A-Za-z0-9_\.]+$/ ],
  
!           aaindex  = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
              if /^H [-A-Z0-9_\.]+$/ =~ text then
                if text =~ /^M [rc]/ then
***************
*** 1068,1098 ****
            end,
  
!           litdb    = RuleRegexp[ Bio::LITDB,
              /^CODE        [0-9]+$/ ],
!           brite    = RuleRegexp[ Bio::KEGG::BRITE,
              /^Entry           [A-Z0-9]+/ ],
!           ko       = RuleRegexp[ Bio::KEGG::KO,
              /^ENTRY       .+ KO\s*/ ],
!           glycan   = RuleRegexp[ Bio::KEGG::GLYCAN,
              /^ENTRY       .+ Glycan\s*/ ],
!           enzyme   = RuleRegexp2[ Bio::KEGG::ENZYME,
              /^ENTRY       EC [0-9\.]+$/,
              /^ENTRY       .+ Enzyme\s*/
            ],
!           compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
              /^ENTRY       C[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Compound\s*/
            ],
!           reaction = RuleRegexp2[ Bio::KEGG::REACTION,
              /^ENTRY       R[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Reaction\s*/
            ],
!           genes    = RuleRegexp[ Bio::KEGG::GENES,
              /^ENTRY       .+ (CDS|gene|.*RNA) / ],
!           genome   = RuleRegexp[ Bio::KEGG::GENOME,
              /^ENTRY       [a-z]+$/ ],
  
!           fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
!                                 Bio::FANTOM::MaXML::Sequence) do |text|
              if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
                case $1
--- 1109,1139 ----
            end,
  
!           litdb    = RuleRegexp[ 'Bio::LITDB',
              /^CODE        [0-9]+$/ ],
!           brite    = RuleRegexp[ 'Bio::KEGG::BRITE',
              /^Entry           [A-Z0-9]+/ ],
!           ko       = RuleRegexp[ 'Bio::KEGG::KO',
              /^ENTRY       .+ KO\s*/ ],
!           glycan   = RuleRegexp[ 'Bio::KEGG::GLYCAN',
              /^ENTRY       .+ Glycan\s*/ ],
!           enzyme   = RuleRegexp2[ 'Bio::KEGG::ENZYME',
              /^ENTRY       EC [0-9\.]+$/,
              /^ENTRY       .+ Enzyme\s*/
            ],
!           compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
              /^ENTRY       C[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Compound\s*/
            ],
!           reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
              /^ENTRY       R[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Reaction\s*/
            ],
!           genes    = RuleRegexp[ 'Bio::KEGG::GENES',
              /^ENTRY       .+ (CDS|gene|.*RNA) / ],
!           genome   = RuleRegexp[ 'Bio::KEGG::GENOME',
              /^ENTRY       [a-z]+$/ ],
  
!           fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
!                                 'Bio::FANTOM::MaXML::Sequence') do |text|
              if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
                case $1
***************
*** 1109,1143 ****
            end,
  
!           pdb = RuleRegexp[ Bio::PDB,
              /^HEADER    .{40}\d\d\-[A-Z]{3}\-\d\d   [0-9A-Z]{4}/ ],
!           het = RuleRegexp[ Bio::PDB::ChemicalComponent,
              /^RESIDUE +.+ +\d+\s*$/ ],
  
!           clustal = RuleRegexp[ Bio::ClustalW::Report,
            /^CLUSTAL .*\(.*\).*sequence +alignment/ ],
  
!           blastxml = RuleRegexp[ Bio::Blast::Report,
              /\<\!DOCTYPE BlastOutput PUBLIC / ],
!           wublast  = RuleRegexp[ Bio::Blast::WU::Report,
              /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
              /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           blast    = RuleRegexp[ Bio::Blast::Default::Report,
              /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
!           tblast   = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
              /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
  
!           blat   = RuleRegexp[ Bio::Blat::Report,
              /^psLayout version \d+\s*$/ ],
!           spidey = RuleRegexp[ Bio::Spidey::Report,
              /^\-\-SPIDEY version .+\-\-$/ ],
!           hmmer  = RuleRegexp[ Bio::HMMER::Report,
              /^HMMER +\d+\./ ],
!           sim4   = RuleRegexp[ Bio::Sim4::Report,
              /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
  
!           fastaformat = RuleProc.new(Bio::FastaFormat,
!                                      Bio::NBRF,
!                                      Bio::FastaNumericFormat) do |text|
              if /^>.+$/ =~ text
                case text
--- 1150,1184 ----
            end,
  
!           pdb = RuleRegexp[ 'Bio::PDB',
              /^HEADER    .{40}\d\d\-[A-Z]{3}\-\d\d   [0-9A-Z]{4}/ ],
!           het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
              /^RESIDUE +.+ +\d+\s*$/ ],
  
!           clustal = RuleRegexp[ 'Bio::ClustalW::Report',
            /^CLUSTAL .*\(.*\).*sequence +alignment/ ],
  
!           blastxml = RuleRegexp[ 'Bio::Blast::Report',
              /\<\!DOCTYPE BlastOutput PUBLIC / ],
!           wublast  = RuleRegexp[ 'Bio::Blast::WU::Report',
              /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
              /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           blast    = RuleRegexp[ 'Bio::Blast::Default::Report',
              /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
!           tblast   = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
              /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
  
!           blat   = RuleRegexp[ 'Bio::Blat::Report',
              /^psLayout version \d+\s*$/ ],
!           spidey = RuleRegexp[ 'Bio::Spidey::Report',
              /^\-\-SPIDEY version .+\-\-$/ ],
!           hmmer  = RuleRegexp[ 'Bio::HMMER::Report',
              /^HMMER +\d+\./ ],
!           sim4   = RuleRegexp[ 'Bio::Sim4::Report',
              /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
  
!           fastaformat = RuleProc.new('Bio::FastaFormat',
!                                      'Bio::NBRF',
!                                      'Bio::FastaNumericFormat') do |text|
              if /^>.+$/ =~ text
                case text


From pjotr at pub.open-bio.org  Fri Mar  3 09:52:00 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 14:52:00 +0000
Subject: [BioRuby-cvs] bioruby/test/data/fasta - New directory
Message-ID: <200603031452.k23Eq0VL029679@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/test/data/fasta
In directory pub.open-bio.org:/tmp/cvs-serv29669/fasta

Log Message:
Directory /home/repository/bioruby/bioruby/test/data/fasta added to the repository


From pjotr at pub.open-bio.org  Fri Mar  3 10:31:08 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 15:31:08 +0000
Subject: [BioRuby-cvs] bioruby/test/data/fasta example1.txt, NONE,
	1.1 example2.txt, NONE, 1.1
Message-ID: <200603031531.k23FV8VL029797@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/test/data/fasta
In directory pub.open-bio.org:/tmp/cvs-serv29781/test/data/fasta

Added Files:
	example1.txt example2.txt 
Log Message:
Added example of enzyme cuts using Trevor's libs - and two short
FASTA data files for testing


--- NEW FILE: example2.txt ---
>At1g11545.1 68414.m01326 xyloglucan:xyloglucosyl transferase, putative / xyloglucan endotransglycosylase, putative / endo-xyloglucan transferase, putative similar to endo-xyloglucan transferase GI:2244732 from [Gossypium hirsutum]
actcacggaacaagtgtagattgcattacctctctctctctctctcttcgaaatattcga
agtagagacaaccaATGGAGACGGAAAGGAGGATCATAACGAGCTGTTCTGCCATGACGG
CTCTGTTCTTGTTCATGACGGCTCTAATGGCGTCGTCCTCTATCGCAGCAACACCGACAC
AATCGTTTGAAGATAATTTCAACATTATGTGGTCTGAAAATCACTTCACGACTTCCGATG
ATGGAGAGATCTGGAATCTTTCCTTAGATAACGACACCGGATGTGGATTTCAGACAAAGC
ACATGTATAGATTCGGATGGTTTAGTATGAAGCTAAAGCTCGTCGGAGGCGACTCCGCCG
GCGTCGTCACCGCTTACTACATGTGTTCGGAGAATGGGGCAGGACCGGAGAGAGACGAGA
TAGATTTCGAATTTCTAGGGAACCGAACCGGACAGCCTTACATTATTCAGACCAATGTGT
ATAAGAACGGAACCGGGAATCGGGAGATGCGACATTCCCTCTGGTTCGACCCGACCAAGG
ATTATCACACCTACTCAATTCTTTGGAATAACCACCAGCTTGTGTTCTTCGTGGATAGGG
TACCAATTCGAGTATACAAGAACAGTGATAAGGTACCAAACAACGACTTCTTCCCGAACC
AGAAGCCGATGTACTTGTTCTCCAGCATTTGGAACGCTGACGATTGGGCTACACGTGGTG
GTCTGGAGAAGACTGACTGGAAAAAAGCTCCATTCGTCTCTTCTTACAAGGACTTCGCCG
TCGAAGGCTGCCGTTGGAAGGATCCATTCCCTGCATGCGTCTCTACCACAACAGAGAATT
GGTGGGATCAGTACGACGCGTGGCATTTGTCCAAGACACAGAAGATGGATTATGCGTGGG
TGCAGCGTAATCTCGTCGTATACGATTATTGCAAAGACAGTGAGAGGTTCCCTACTCTTC
CTTGGGAGTGTTCCATTAGCCCTTGGGCTTAAaatcaattttgttttgagtgtattaaag
tggaaatggtttatgtaataattttactctcttttttttggcatttcttattttgttatg
gactatatcctctgtttatttatttaattaattatttatttagtcggctat


--- NEW FILE: example1.txt ---
>At1g02580 mRNA (2291 bp) UTR's and CDS
aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
ctaatcgtgaatgcgatcca  gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
catggatggggtgcatttacatgggactctct  taaaaagaatgagtatctcggagaatatactggagaactgatca
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
tttatgttgtt

>At1g65300: mRNA 837bp
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
ga  gatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcagattccaaagaaaattcatggttt  taatatgaatatgaataaggattcgaatcaaagtatg
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
g

>At1g65300: mRNA 837bp (shortened at end)
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
gagatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcag


>At1g65300: mRNA 837bp (shortened from start)
ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
g


>At1g02580 - shortened for test - inserted cutpoint
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
ctaatcgtgaatgcgatcca  gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
agtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggt
tttaattggggtgcatttacatgggactctct  taaaaagaatgagtatctcggagaatatactggagaactgatca
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca


From pjotr at pub.open-bio.org  Fri Mar  3 10:31:08 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 15:31:08 +0000
Subject: [BioRuby-cvs] bioruby/sample enzymes.rb,NONE,1.1
Message-ID: <200603031531.k23FV8VL029793@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/sample
In directory pub.open-bio.org:/tmp/cvs-serv29781/sample

Added Files:
	enzymes.rb 
Log Message:
Added example of enzyme cuts using Trevor's libs - and two short
FASTA data files for testing


--- NEW FILE: enzymes.rb ---
#!/usr/bin/env ruby
#
# enzymes.rb - cut input file using enzyme on command line
#
#   Copyright (C) 2006 Pjotr Prins <p at bioruby.org> and Trevor Wennblom <trevor at corevx.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  $Id: enzymes.rb,v 1.1 2006/03/03 15:31:06 pjotr Exp $
#

require 'bio/io/flatfile'
require 'bio/util/restriction_enzyme'

include Bio

usage = <<USAGE

Usage: enzymes.rb enzyme1 [enzyme2] infiles

  Examples:

    Output the primary sequences cut using both BstYI and MseI:
		
	    ./enzymes.rb BstYI MseI *.seq

    or using the actual formats

	    ./enzymes.rb "r^gatcy" "t^taa" *.seq
		
USAGE

if ARGV.size < 2
  print usage
	exit 1
end

enzyme1 = ARGV.shift
# ---- Fetch enzyme2 if it is not a file
arg2 = ARGV[0]
if arg2 and !File.exist?(arg2)
  enzyme2 = ARGV.shift 
end

re1 = Bio::RestrictionEnzyme::DoubleStranded.new(enzyme1)
puts "Enzyme #{enzyme1}: " + re1.primary.with_cut_symbols # e.g. r^gatcy
if (enzyme2)
  re2 = Bio::RestrictionEnzyme::DoubleStranded.new(enzyme2)
  puts "Enzyme #{enzyme2}: " + re2.primary.with_cut_symbols # e.g. t^taa
end

ARGV.each do | fn |
  ff = Bio::FlatFile.auto(fn)
  ff.each_entry do |entry|
    seq = Bio::Sequence::NA.new(entry.seq)
    # puts seq.inspect
    seq.cut_with_enzyme(enzyme1).each do | frag1 |
      frag = frag1
      if enzyme2
        seq = Bio::Sequence::NA.new(frag1.primary)
        frags2 = seq.cut_with_enzyme(enzyme2)
        next if frags2.size == 0
        frag = frags2.shift  # pick up first fragment
      end
      print '> '+entry.definition+"\n"
      print frag.primary,"\n"
    end
	end
end


From aerts at pub.open-bio.org  Thu Mar 16 12:29:07 2006
From: aerts at pub.open-bio.org (Jan Aerts)
Date: Thu, 16 Mar 2006 17:29:07 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb, 1.12, 1.13 fetch.rb, 1.4,
	1.5
Message-ID: <200603161729.k2GHT7VL007097@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv7087

Modified Files:
	pubmed.rb fetch.rb 
Log Message:
* Added documentation to pubmed.rb and fetch.rb
* For fetch.rb: replaced 'net/http' with 'open-uri' to allow people behind a proxy to use this class.


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** pubmed.rb	8 Sep 2005 01:22:12 -0000	1.12
--- pubmed.rb	16 Mar 2006 17:29:05 -0000	1.13
***************
*** 3,6 ****
--- 3,7 ----
  #
  #   Copyright (C) 2001 KATAYAMA Toshiaki <k at bioruby.org>
+ #                 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
  #
  #  This library is free software; you can redistribute it and/or
***************
*** 26,61 ****
  module Bio
  
    class PubMed
  
!     def self.query(id)
!       host = "www.ncbi.nlm.nih.gov"
!       path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
! 
!       http = Net::HTTP.new(host)
!       response, = http.get(path + id.to_s)
!       result = response.body
!       if result =~ /#{id}\s+Error/
!         raise( result )
!       else
!         result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
!         return result
!       end
!     end
! 
!     def self.pmfetch(id)
!       host = "www.ncbi.nlm.nih.gov"
!       path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
! 
!       http = Net::HTTP.new(host)
!       response, = http.get(path + id.to_s)
!       result = response.body
!       if result =~ /#{id}\s+Error/
!         raise( result )
!       else
!         result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
!         return result
!       end
!     end
! 
      def self.search(str)
        host = "www.ncbi.nlm.nih.gov"
--- 27,85 ----
  module Bio
  
+   # = DESCRIPTION
+   # The Bio::PubMed class provides several ways to retrieve bibliographic
+   # information from the PubMed database at
+   # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
+   # types of queries are possible:
+   # * searching for PubMed IDs given a query string:
+   #   * Bio::PubMed#search
+   #   * Bio::PubMed#esearch
+   # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) given a PubMed ID
+   #   * Bio::PubMed#query
+   #   * Bio::PubMed#pmfetch
+   #   * Bio::PubMed#efetch
+   #
+   # The different methods within the same group are interchangeable and should
+   # return the same result.
+   # 
+   # Additional information about the MEDLINE format and PubMed programmable
+   # APIs can be found on the following websites:
+   # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
+   # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
+   # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
+   # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
+   # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
+   # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
+   # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+   #
+   # = USAGE
+   #  require 'bio'
+   #
+   #  # If you don't know the pubmed ID:
+   #  Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+   #    p x
+   #  end
+   #  Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+   #    p x
+   #  end
+   #  
+   #  # To retrieve the MEDLINE entry for a given PubMed ID:
+   #  puts Bio::PubMed.query("10592173")
+   #  puts Bio::PubMed.pmfetch("10592173")
+   #  puts Bio::PubMed.efetch("10592173", "14693808")
+   #  # This can be converted into a Bio::MEDLINE object:
+   #  manuscript = Bio::PubMed.query("10592173")
+   #  medline = Bio::MEDLINE(manuscript)
+   #  
+   # = REMARK
+   # This class can not be used at the moment if you're behind a proxy server. This will be solved in the near future.
    class PubMed
  
!     # Search the PubMed database by given keywords using entrez query and returns
!     # an array of PubMed IDs.
!     # ---
!     # *Arguments*:
!     # * _id_: query string (required)
!     # *Returns*:: array of PubMed IDs
      def self.search(str)
        host = "www.ncbi.nlm.nih.gov"
***************
*** 70,73 ****
--- 94,115 ----
      end
  
+     # Search the PubMed database by given keywords using E-Utils and returns 
+     # an array of PubMed IDs.
+     # 
+     # For information on the possible arguments, see
+     # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
+     # ---
+     # *Arguments*:
+     # * _id_: query string (required)
+     # * _field_
+     # * _reldate_
+     # * _mindate_
+     # * _maxdate_
+     # * _datetype_
+     # * _retstart_
+     # * _retmax_ (default 100)
+     # * _retmode_
+     # * _rettype_
+     # *Returns*:: array of PubMed IDs
      def self.esearch(str, hash = {})
        hash['retmax'] = 100 unless hash['retmax']
***************
*** 88,91 ****
--- 130,184 ----
      end
  
+     # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+     # entrez query.
+     # ---
+     # *Arguments*:
+     # * _id_: PubMed ID (required)
+     # *Returns*:: MEDLINE formatted String
+     def self.query(id)
+       host = "www.ncbi.nlm.nih.gov"
+       path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
+ 
+       http = Net::HTTP.new(host)
+       response, = http.get(path + id.to_s)
+       result = response.body
+       if result =~ /#{id}\s+Error/
+         raise( result )
+       else
+         result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
+         return result
+       end
+     end
+ 
+     # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+     # entrez pmfetch.
+     # ---
+     # *Arguments*:
+     # * _id_: PubMed ID (required)
+     # *Returns*:: MEDLINE formatted String
+     def self.pmfetch(id)
+       host = "www.ncbi.nlm.nih.gov"
+       path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
+ 
+       http = Net::HTTP.new(host)
+       response, = http.get(path + id.to_s)
+       result = response.body
+       if result =~ /#{id}\s+Error/
+         raise( result )
+       else
+         result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
+         return result
+       end
+     end
+ 
+     # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+     # entrez efetch. Multiple PubMed IDs can be provided:
+     #   Bio::PubMed.efetch(123)
+     #   Bio::PubMed.efetch(123,456,789)
+     #   Bio::PubMed.efetch([123,456,789])
+     # ---
+     # *Arguments*:
+     # * _ids_: list of PubMed IDs (required)
+     # *Returns*:: MEDLINE formatted String
      def self.efetch(*ids)
        return [] if ids.empty?
***************
*** 125,189 ****
  
  end
- 
- =begin
- 
- = Bio::PubMed
- 
- These class methods access NCBI/PubMed database via HTTP.
- 
- --- Bio::PubMed.esearch(str, options)
- 
-       Search keywords in PubMed by E-Utils and returns an array of PubMed IDs.
-       Options can be a hash containing keys include 'field', 'reldate',
-       'mindate', 'maxdate', 'datetype', 'retstart', 'retmax', 'retmode',
-       and 'rettype' as specified in the following URL:
- 
-         ((<URL:http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed>))
- 
-      Default 'retmax' is 100.
- 
- --- Bio::PubMed.efetch(pmids)
- 
-       Returns an array of MEDLINE records.  A list of PubMed IDs can be
-       supplied as following:
- 
-         Bio::PubMed.efetch(123)
-         Bio::PubMed.efetch(123,456,789)
-         Bio::PubMed.efetch([123,456,789])
- 
- --- Bio::PubMed.query(pmid)
- 
-       Retrieve PubMed entry by PMID and returns MEDLINE format string (can
-       be parsed by the Bio::MEDLINE and can be converted into Bio::Reference
-       object).
- 
- --- Bio::PubMed.pmfetch(pmid)
- 
-       Just another query method (by pmfetch).
- 
- --- Bio::PubMed.search(str)
- 
-       Search the PubMed database by given keywords and returns the list of
-       matched records in MEDLINE format.
- 
- 
- = For more informations
- 
- * Overview
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html>))
- * How to link
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html>))
- * MEDLINE format
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat>))
- * Search field descriptions and tags
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags>))
- * Entrez utilities index
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html>))
- * PmFetch CGI help
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html>))
- * E-Utilities CGI help
-   * ((<URL:http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html>))
- 
- =end
- 
- 
--- 218,219 ----

Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** fetch.rb	18 Dec 2005 15:58:42 -0000	1.4
--- fetch.rb	16 Mar 2006 17:29:05 -0000	1.5
***************
*** 1,12 ****
  #
! # = bio/io/biofetch.rb - BioFetch access module
! #
! # Copyright::   Copyright (C) 2002, 2005
! #               Toshiaki Katayama <k at bioruby.org>
! # License::     LGPL
  #
! # $Id$
  #
- #--
  #
  #  This library is free software; you can redistribute it and/or
--- 1,10 ----
  #
! # bio/io/biofetch.rb - BioFetch access module
  #
! #  Copyright (C) 2002, 2005 Toshiaki Katayama <k at bioruby.org>
! #               2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
!            
! #  License: LGPL
  #
  #
  #  This library is free software; you can redistribute it and/or
***************
*** 24,95 ****
  #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  #
! #++
  #
  
  require 'uri'
! require 'net/http'
  
  module Bio
  
! class Fetch
! 
!   # Create a new Bio::Fetch server object.
!   # Use Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch') to connect
!   # to EBI BioFetch server.
!   def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
!     schema, user, @host, @port, reg, @path, = URI.split(url)
!   end
! 
!   # Set default database to dbname (prepare for get_by_id).
!   attr_accessor :database
! 
!   # Get raw database entry by id (mainly used by Bio::Registry).
!   def get_by_id(id)
!     fetch(@database, id)
!   end
! 
!   # Fetch a database entry as specified by database (db), entry id (id),
!   # 'raw' text or 'html' (style), and format.  When using BioRuby's
!   # BioFetch server, value for the format should not be set.
!   def fetch(db, id, style = 'raw', format = nil)
!     data = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
!     data.push("format=#{format}") if format
!     data = data.join('&')
! 
!     responce, result = Net::HTTP.new(@host, @port).post(@path, data)
!     return result
!   end
! 
!   # Short cut for using BioRuby's BioFetch server.  You can fetch an entry
!   # without creating instance of BioFetch server.
!   def self.query(*args)
!     self.new.fetch(*args)
!   end
  
!   # What databases are available?
!   def databases
!     query = "info=dbs"
!     responce, result = Net::HTTP.new(@host, @port).post(@path, query)
!     return result
!   end
  
!   # What formats does the database X have?
!   def formats(database = @database)
!     if database
!       query = "info=formats;db=#{database}"
!       responce, result = Net::HTTP.new(@host, @port).post(@path, query)
        return result
      end
    end
  
-   # How many entries can be retrieved simultaneously?
-   def maxids
-     query = "info=maxids"
-     responce, result = Net::HTTP.new(@host, @port).post(@path, query)
-     return result
-   end
- 
- end
- 
  end # module Bio
  
--- 22,183 ----
  #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  #
! #  $Id$
  #
  
  require 'uri'
! require 'open-uri'
  
  module Bio
+   # = DESCRIPTION
+   # The Bio::Fetch class provides an interface to dbfetch servers. Given
+   # a database name and an accession number, these servers return the nucleic
+   # or amino acid sequence for that accession number in that database.
+   #
+   # Possible dbfetch servers include:
+   # * http://bioruby.org/cgi-bin/biofetch.rb (default)
+   # * http://www.ebi.ac.uk/cgi-bin/dbfetch
+   #
+   # If you're behind a proxy server, be sure to set your HTTP_PROXY
+   # environment variable accordingly.
+   #
+   # = USAGE
+   #  require 'bio'
+   #
+   #  # Retrieve the sequence of accession number M33388 from the EMBL
+   #  # database.
+   #  server = Bio::Fetch.new()  #uses default server
+   #  puts server.fetch('embl','M33388')
+   #  
+   #  # Do the same thing without creating a Bio::Fetch object. This method always
+   #  # uses the default dbfetch server: http://bioruby.org/cgi-bin/biofetch.rb
+   #  puts Bio::Fetch.query('embl','M33388')
+   #
+   #  # To know what databases are available on the bioruby dbfetch server:
+   #  server = Bio::Fetch.new()
+   #  puts server.databases
+   #
+   #  # Some databases provide their data in different formats (e.g. 'fasta',
+   #  # 'genbank' or 'embl'). To check which formats are supported by a given
+   #  # database:
+   #  puts server.formats('embl')
+   #
+   class Fetch
+   
+     # Create a new Bio::Fetch server object that can subsequently be queried
+     # using the Bio::Fetch#fetch method
+     # ---
+     # *Arguments*:
+     # * _url_: URL of dbfetch server (default = 'http://bioruby.org/cgi-bin/biofetch.rb')
+     # *Returns*:: Bio::Fetch object
+     def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
+       @url = url
+       schema, user, @host, @port, reg, @path, = URI.split(@url)
+     end
+   
+     # The default database to query
+     #--
+     # This will be used by the get_by_id method
+     #++
+     attr_accessor :database
+   
+     # Get raw database entry by id. This method lets the Bio::Registry class
+     # use Bio::Fetch objects and should probably not be used directly.
+     def get_by_id(id)
+       fetch(@database, id)
+     end
+   
+     # Fetch a database entry as specified by database (db), entry id (id),
+     # 'raw' text or 'html' (style), and format.  When using BioRuby's
+     # BioFetch server, value for the format should not be set.
+     # Examples:
+     #   server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
+     #   puts server.fetch('embl','M33388','raw','fasta')
+     #   puts server.fetch('refseq','NM_12345','html','embl')
+     # ---
+     # *Arguments*:
+     # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
+     # * _id_: single ID or ID list separated by commas or white space
+     # * _style_: [raw|html] (default = 'raw')
+     # * _format_: name of output format (see Bio::Fetch#formats)
+     def fetch(db, id, style = 'raw', format = nil)
+       query = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
+       query.push("format=#{format}") if format
+       query = query.join('&')
+   
+       result = open(@url + '?' + query).readlines.join('')
+       return result
+     end
+   
+     # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
+     # without creating an instance of BioFetch server. This method uses the 
+     # default dbfetch server, which is http://bioruby.org/cgi-bin/biofetch.rb
+     # 
+     # Example:
+     #   puts Bio::Fetch.query('refseq','NM_12345')
+     #
+     # ---
+     # *Arguments*:
+     # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
+     # * _id_: single ID or ID list separated by commas or white space
+     # * _style_: [raw|html] (default = 'raw')
+     # * _format_: name of output format (see Bio::Fetch#formats)
+     def self.query(*args)
+       self.new.fetch(*args)
+     end
+   
+     # Using this method, the user can ask a dbfetch server what databases
+     # it supports. This would normally be the first step you'd take when
+     # you use a dbfetch server for the first time.
+     # Example:
+     #  server = Bio::Fetch.new()
+     #  puts server.databases # returns "aa aax bl cpd dgenes dr ec eg emb ..."
+     #
+     # This method only works for the bioruby dbfetch server. For a list
+     # of databases available from the EBI, see the EBI website at 
+     # http://www.ebi.ac.uk/cgi-bin/dbfetch/
+     # ---
+     # *Returns*:: array of database names
+     def databases
+       query = "info=dbs"
  
!       result = open(@url + '?' + query).readlines.join('')
!       return result
!     end
!   
!     # Lists the formats that are available for a given database. Like the
!     # Bio::Fetch#databases method, this method is only available on 
!     # the bioruby dbfetch server.
!     # Example:
!     #  server = Bio::Fetch.new()
!     #  puts server.formats('embl') # returns "default fasta"
!     # ---
!     # *Arguments*:
!     # * _database_:: name of database you want the supported formats for
!     # *Returns*:: array of formats
!     def formats(database = @database)
!       if database
!         query = "info=formats;db=#{database}"
  
!         result = open(@url + '?' + query).readlines.join('')
!         return result
!       end
!     end
!   
!     # A dbfetch server will only return entries up to a given maximum number.
!     # This method retrieves that number from the server. As for the databases
!     # and formats methods, the maxids method only works for the bioruby
!     # dbfetch server.
!     # ---
!     # *Arguments*: none
!     # *Returns*:: number
!     def maxids
!       query = "info=maxids"
  
!       result = open(@url + '?' + query).readlines.join('')
        return result
      end
+   
    end
  
  end # module Bio
  
***************
*** 98,113 ****
  if __FILE__ == $0
  
- # bfserv = Bio::Fetch.new('http://www.ebi.ac.uk:80/cgi-bin/dbfetch')
-   bfserv = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
    puts "# test 1"
!   puts bfserv.fetch('embl', 'J00231', 'raw')
    puts "# test 2"
!   puts bfserv.fetch('embl', 'J00231', 'html')
! 
    puts "# test 3"
!   puts Bio::Fetch.query('genbank', 'J00231')
    puts "# test 4"
    puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
! 
  end
  
--- 186,204 ----
  if __FILE__ == $0
  
    puts "# test 1"
!   br_server = Bio::Fetch.new()
!   puts br_server.databases
!   puts br_server.formats('embl')
!   puts br_server.maxids
!   ebi_server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
    puts "# test 2"
!   puts ebi_server.fetch('embl', 'J00231', 'raw')
    puts "# test 3"
!   puts ebi_server.fetch('embl', 'J00231', 'html')
    puts "# test 4"
+   puts Bio::Fetch.query('genbank', 'J00231')
+   puts "# test 5"
    puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
!  
  end
  

From ngoto at pub.open-bio.org  Mon Mar 20 05:34:59 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Mon, 20 Mar 2006 10:34:59 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio command.rb,1.3,1.4
Message-ID: <200603201035.k2KAYxVL030067@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio
In directory pub.open-bio.org:/tmp/cvs-serv30042/lib/bio

Modified Files:
	command.rb 
Log Message:
* New module Bio::Command::NetTools for miscellaneous network methods.
  Currently, this module is intended to be used only inside
  BioRuby library. Please do not use it in user's programs now.
* New methods: Bio::Command::NetTools.open_uri(uri, *arg) and
  Bio::Command::NetTools.read_uri(uri).
* Changed license to Ruby's.


Index: command.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/command.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** command.rb	4 Nov 2005 17:36:00 -0000	1.3
--- command.rb	20 Mar 2006 10:34:57 -0000	1.4
***************
*** 2,32 ****
  # = bio/command.rb - general methods for external command execution
  #
! # Copyright::	Copyright (C) 2003-2005
  # 		Naohisa Goto <ng at bioruby.org>,
  #		Toshiaki Katayama <k at bioruby.org>
! # License::	LGPL
  #
  #  $Id$
  #
- #--
- #
- #  This library is free software; you can redistribute it and/or
- #  modify it under the terms of the GNU Lesser General Public
- #  License as published by the Free Software Foundation; either
- #  version 2 of the License, or (at your option) any later version.
- #
- #  This library is distributed in the hope that it will be useful,
- #  but WITHOUT ANY WARRANTY; without even the implied warranty of
- #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- #  Lesser General Public License for more details.
- #
- #  You should have received a copy of the GNU Lesser General Public
- #  License along with this library; if not, write to the Free Software
- #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
- #
- #++
- #
  
  require 'open3'
  
  module Bio
--- 2,15 ----
  # = bio/command.rb - general methods for external command execution
  #
! # Copyright::	Copyright (C) 2003-2006
  # 		Naohisa Goto <ng at bioruby.org>,
  #		Toshiaki Katayama <k at bioruby.org>
! # License::	Ruby's
  #
  #  $Id$
  #
  
  require 'open3'
+ require 'uri'
  
  module Bio
***************
*** 162,165 ****
--- 145,291 ----
  
  end # module Tools
+ 
+ 
+ # = Bio::Command::NetTools
+ #
+ # Bio::Command::NetTools is a collection of miscellaneous methods
+ # for data transport through network.
+ #
+ # Library internal use only. Users should not directly use it.
+ #
+ # Note that it is under construction.
+ module NetTools
+ 
+   # Same as OpenURI.open_uri(*arg).
+   # If open-uri.rb is already loaded, ::OpenURI is used.
+   # Otherwise, internal OpenURI in sandbox is used because
+   # open-uri.rb redefines Kernel.open.
+   def self.open_uri(uri, *arg)
+     if defined? ::OpenURI
+       ::OpenURI.open_uri(uri, *arg)
+     else
+       SandBox.load_openuri_in_sandbox
+       uri = uri.to_s if ::URI::Generic === uri
+       SandBox::OpenURI.open_uri(uri, *arg)
+     end
+   end
+ 
+   # Same as OpenURI.open_uri(uri).read.
+   # If open-uri.rb is already loaded, ::OpenURI is used.
+   # Otherwise, internal OpenURI in sandbox is used becase
+   # open-uri.rb redefines Kernel.open.
+   def self.read_uri(uri)
+     self.open_uri(uri).read
+   end
+ 
+   # Sandbox to load open-uri.rb.
+   # Internal use only.
+   module SandBox #:nodoc:
+ 
+     # Dummy module definition.
+     module Kernel #:nodoc:
+       # dummy method
+       def open(*arg); end #:nodoc:
+     end #module Kernel
+     
+     # a method to find proxy. dummy definition
+     module FindProxy; end #:nodoc:
+     
+     # dummy module definition
+     module OpenURI #:nodoc:
+       module OpenRead; end #:nodoc:
+     end #module OpenURI
+     
+     # Dummy module definition.
+     module URI #:nodoc:
+       class Generic < ::URI::Generic #:nodoc:
+         include SandBox::FindProxy
+       end
+       
+       class HTTPS < ::URI::HTTPS #:nodoc:
+         include SandBox::FindProxy
+         include SandBox::OpenURI::OpenRead
+       end
+       
+       class HTTP  < ::URI::HTTP  #:nodoc:
+         include SandBox::FindProxy
+         include SandBox::OpenURI::OpenRead
+       end
+       
+       class FTP  < ::URI::FTP    #:nodoc:
+         include SandBox::FindProxy
+         include SandBox::OpenURI::OpenRead
+       end
+       
+       # parse and new. internal use only.
+       def self.__parse_and_new__(klass, uri) #:nodoc:
+         scheme, userinfo, host, port,
+         registry, path, opaque, query, fragment = ::URI.split(uri)
+         klass.new(scheme, userinfo, host, port,
+                   registry, path, opaque, query,
+                   fragment)
+       end
+       private_class_method :__parse_and_new__
+       
+       # same as ::URI.parse. internal use only.
+       def self.parse(uri) #:nodoc:
+         r = ::URI.parse(uri)
+         case r
+         when ::URI::HTTPS
+           __parse_and_new__(HTTPS, uri)
+         when ::URI::HTTP
+           __parse_and_new__(HTTP, uri)
+         when ::URI::FTP
+           __parse_and_new__(FTP, uri)
+         else
+           r
+         end
+       end
+     end #module URI
+     
+     @load_openuri = nil
+     # load open-uri.rb in SandBox module.
+     def self.load_openuri_in_sandbox #:nodoc:
+       return if @load_openuri
+       fn = nil
+       unless $:.find do |x|
+           fn = File.join(x, 'open-uri.rb')
+           FileTest.exist?(fn)
+         end then
+         warn('Warning: cannot find open-uri.rb in $LOAD_PATH')
+       else
+         # reading open-uri.rb
+         str = File.read(fn)
+         # eval open-uri.rb contents in SandBox module
+         module_eval(str)
+         
+         # finds 'find_proxy' method
+         find_proxy_lines = nil
+         flag = nil
+         endstr = nil
+         str.each do |line|
+           if flag then
+             find_proxy_lines << line
+             if endstr == line[0, endstr.length] and
+                 /^\s+end(\s+.*)?$/ =~ line then
+               break
+             end
+           elsif /^(\s+)def\s+find_proxy(\s+.*)?$/ =~ line then
+             flag = true
+             endstr = "#{$1}end"
+             find_proxy_lines = line 
+           end
+         end
+         if find_proxy_lines
+           module_eval("module FindProxy;\n#{find_proxy_lines}\n;end\n")
+         else
+           warn('Warning: cannot find find_proxy method in open-uri.rb.')
+         end
+         @load_openuri = true
+       end
+     end
+   end #module SandBox
+ end #module NetTools
+ 
  end # module Command
  end # module Bio


From ngoto at pub.open-bio.org  Mon Mar 20 07:40:16 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Mon, 20 Mar 2006 12:40:16 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fetch.rb,1.5,1.6
Message-ID: <200603201240.k2KCeGVL030358@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv30167/lib/bio/io

Modified Files:
	fetch.rb 
Log Message:
* "require 'open-uri'" is removed because open-uri.rb changes Kernel#open.
  Instead, Bio::Command::NetTools.read_uri is used.
* query should be escaped by using URI.escape.
* Bio::Fetch#databases, #formats are changed to return an array of string,
  as described in the documents.
* Bio::Fetch#maxids are changed to return an Integer number,
  as described in the document.


Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** fetch.rb	16 Mar 2006 17:29:05 -0000	1.5
--- fetch.rb	20 Mar 2006 12:40:13 -0000	1.6
***************
*** 26,30 ****
  
  require 'uri'
! require 'open-uri'
  
  module Bio
--- 26,30 ----
  
  require 'uri'
! require 'bio/command'
  
  module Bio
***************
*** 105,110 ****
        query = query.join('&')
    
!       result = open(@url + '?' + query).readlines.join('')
!       return result
      end
    
--- 105,109 ----
        query = query.join('&')
    
!       Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query))
      end
    
***************
*** 141,146 ****
        query = "info=dbs"
  
!       result = open(@url + '?' + query).readlines.join('')
!       return result
      end
    
--- 140,144 ----
        query = "info=dbs"
  
!       Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
      end
    
***************
*** 159,164 ****
          query = "info=formats;db=#{database}"
  
!         result = open(@url + '?' + query).readlines.join('')
!         return result
        end
      end
--- 157,161 ----
          query = "info=formats;db=#{database}"
  
!         Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
        end
      end
***************
*** 174,179 ****
        query = "info=maxids"
  
!       result = open(@url + '?' + query).readlines.join('')
!       return result
      end
    
--- 171,175 ----
        query = "info=maxids"
  
!       Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).to_i
      end
    

From aerts at pub.open-bio.org  Tue Mar 21 07:18:16 2006
From: aerts at pub.open-bio.org (Jan Aerts)
Date: Tue, 21 Mar 2006 12:18:16 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fastacmd.rb,1.10,1.11
Message-ID: <200603211218.k2LCIGVL001647@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv1637

Modified Files:
	fastacmd.rb 
Log Message:
Added/reformatted documentation.


Index: fastacmd.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fastacmd.rb,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** fastacmd.rb	28 Jan 2006 08:12:21 -0000	1.10
--- fastacmd.rb	21 Mar 2006 12:18:14 -0000	1.11
***************
*** 5,45 ****
  #              Shuji SHIGENOBU <shige at nibb.ac.jp>,
  #              Toshiaki Katayama <k at bioruby.org>,
! #              Mitsuteru C. Nakao <n at bioruby.org>
  # Lisence::    LGPL
  #
  # $Id$
  #
- # == Description
- #
- # Retrives FASTA formatted sequences from a blast database using 
- # NCBI fastacmd command.
- # 
- # This class requires 'fastacmd' command and a blast database  
- # (formatted using the '-o' option of 'formatdb').
- #
- # == Examples
- #
- #    database = ARGV.shift || "/db/myblastdb"
- #    entry_id = ARGV.shift || "sp:128U_DROME"
- #    ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
- #
- #    fastacmd = Bio::Blast::Fastacmd.new(database)
- #
- #    entry = fastacmd.get_by_id(entry_id)
- #    fastacmd.fetch(entry_id)
- #    fastacmd.fetch(ent_list)
- #
- #    fastacmd.fetch(ent_list).each do |fasta|
- #      puts fasta
- #    end
- #
- # == References
- #
- # * NCBI tool
- #   ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
- #
- # * fastacmd.html
- #   http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
- #
  #--
  #
--- 5,14 ----
  #              Shuji SHIGENOBU <shige at nibb.ac.jp>,
  #              Toshiaki Katayama <k at bioruby.org>,
! #              Mitsuteru C. Nakao <n at bioruby.org>,
! #              Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # Lisence::    LGPL
  #
  # $Id$
  #
  #--
  #
***************
*** 68,72 ****
  class Blast
  
! # NCBI fastacmd wrapper class
  #
  class Fastacmd
--- 37,68 ----
  class Blast
  
! # = DESCRIPTION
! #
! # Retrieves FASTA formatted sequences from a blast database using 
! # NCBI fastacmd command.
! # 
! # This class requires 'fastacmd' command and a blast database  
! # (formatted using the '-o' option of 'formatdb').
! #
! # = USAGE
! #  require 'bio'
! #  
! #  fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
! #
! #  entry = fastacmd.get_by_id("sp:128U_DROME")
! #  fastacmd.fetch("sp:128U_DROME")
! #  fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
! #
! #  fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"]).each do |fasta|
! #    puts fasta
! #  end
! #
! # = REFERENCES
! #
! # * NCBI tool
! #   ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
! #
! # * fastacmd.html
! #   http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
  #
  class Fastacmd
***************
*** 78,90 ****
    attr_accessor :database
  
!   # fastcmd command file path.
    attr_accessor :fastacmd
  
-   # 
    attr_accessor :errorlog
  
!   # Initalize a fastacmd object.
!   #    
!   #    fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
    def initialize(blast_database_file_path)
      @database = blast_database_file_path
--- 74,103 ----
    attr_accessor :database
  
!   # fastacmd command file path.
    attr_accessor :fastacmd
  
    attr_accessor :errorlog
  
!   # This method provides a handle to a BLASTable database, which you can then
!   # use to retrieve sequences.
!   # 
!   # Prerequisites:
!   # * You have created a BLASTable database with the '-o T' option.
!   # * You have the NCBI fastacmd tool installed.
!   #
!   # For example, suppose the original input file looks like:
!   #  >my_seq_1
!   #  ACCGACCTCCGGAACGGATAGCCCGACCTACG
!   #  >my_seq_2
!   #  TCCGACCTTTCCTACCGCACACCTACGCCATCAC
!   #  ...
!   # and you've created a BLASTable database from that with the command
!   #  cd /my_dir/
!   #  formatdb -i my_input_file -t Test -n Test -o T
!   # then you can get a handle to this database with the command
!   #  fastacmd = Bio::Blast::Fastacmd.new("/my_dir/Test")
!   # ---
!   # *Arguments*:
!   # * _database_:: path and name of BLASTable database
    def initialize(blast_database_file_path)
      @database = blast_database_file_path
***************
*** 93,117 ****
  
  
!   # get an entry_id and returns a Bio::FastaFormat object.
!   #
!   #   entry_id = "sp:128U_DROME"
!   #   entry = fastacmd.get_by_id(entry_id)
    def get_by_id(entry_id)
      fetch(entry_id).shift
    end
  
!   # get one or more entry_id and returns an Array of Bio::FastaFormat objects.
!   #
!   # Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
!   # object even when the result is a single entry.
!   #
!   #    p fastacmd.fetch(entry_id)
    #
!   # Fastacmd#fetch method also accepts a list of entry_id and returns
!   # an Array of Bio::FastaFormat objects.
!   #    
!   #    ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
!   #    p fastacmd.fetch(ent_list)
    #
    def fetch(list)
      if list.respond_to?(:join)
--- 106,131 ----
  
  
!   # Get the sequence of a specific entry in the BLASTable database.
!   # For example:
!   #  entry = fastacmd.get_by_id("sp:128U_DROME")
!   # ---
!   # *Arguments*:
!   # * _id_: id of an entry in the BLAST database
!   # *Returns*:: a Bio::FastaFormat object
    def get_by_id(entry_id)
      fetch(entry_id).shift
    end
  
!   # Get the sequence for a _list_ of IDs in the database.
    #
!   # For example:
!   #  p fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
    #
+   # This method always returns an array of Bio::FastaFormat objects, even when 
+   # the result is a single entry.
+   # ---
+   # *Arguments*:
+   # * _ids_: list of IDs to retrieve from the database
+   # *Returns*:: array of Bio::FastaFormat objects
    def fetch(list)
      if list.respond_to?(:join)
***************
*** 128,138 ****
    end
  
!   # Iterates each entry.
!   #
!   # You can also iterate on all sequences in the database!
!   #    fastacmd.each do |fasta|
!   #      p [ fasta.definition[0..30], fasta.seq.size ]
!   #    end
    #
    def each_entry
      cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
--- 142,152 ----
    end
  
!   # Iterates over _all_ sequences in the database.
    #
+   #  fastacmd.each_entry do |fasta|
+   #    p [ fasta.definition[0..30], fasta.seq.size ]
+   #  end
+   # ---
+   # *Returns*:: a Bio::FastaFormat object for each iteration
    def each_entry
      cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
***************
*** 154,156 ****
--- 168,184 ----
  end # module Bio
  
+ if $0 == __FILE__
+   fastacmd = Bio::Blast::Fastacmd.new("/path_to_my_db/db_name")
+   seq = fastacmd.get_by_id('id_of_entry1')
+   puts seq.class
+   puts seq
+   
+   seqs = fastacmd.fetch(['id_of_entry1','id_of_entry2'])
+   seqs.each do |seq|
+     puts seq
+   end
  
+   fastacmd.each_entry do |fasta|
+     puts fasta.seq.size.to_s + "\t" + fasta.definition
+   end
+ end


From ngoto at pub.open-bio.org  Wed Mar 22 05:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.48,1.49
Message-ID: <200603221019.k2MAJOVL005746@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv5657/lib/bio/io

Modified Files:
	flatfile.rb 
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.


Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.48
retrieving revision 1.49
diff -C2 -d -r1.48 -r1.49
*** flatfile.rb	3 Mar 2006 09:31:57 -0000	1.48
--- flatfile.rb	22 Mar 2006 10:19:22 -0000	1.49
***************
*** 262,265 ****
--- 262,268 ----
          attr_reader :entry
  
+         # a flag to write down entry start and end positions
+         attr_accessor :entry_pos_flag
+ 
          # start position of the entry
          attr_reader :entry_start_pos
***************
*** 290,293 ****
--- 293,297 ----
            end
            @delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
+           @entry_pos_flag = nil
          end
  
***************
*** 330,334 ****
          # gets a entry
          def get_entry
!           p0 = @stream.pos
            e  = @stream.gets(@delimiter)
            if e and @delimiter_overrun then
--- 334,338 ----
          # gets a entry
          def get_entry
!           p0 = @entry_pos_flag ? @stream.pos : nil
            e  = @stream.gets(@delimiter)
            if e and @delimiter_overrun then
***************
*** 339,343 ****
              end
            end
!           p1 = @stream.pos
            @entry_start_pos = p0
            @entry = e
--- 343,347 ----
              end
            end
!           p1 = @entry_pos_flag ? @stream.pos : nil
            @entry_start_pos = p0
            @entry = e
***************
*** 585,588 ****
--- 589,602 ----
      def entry_raw
        @splitter.entry
+     end
+ 
+     # a flag to write down entry start and end positions
+     def entry_pos_flag
+       @splitter.entry_pos_flag
+     end
+ 
+     # Sets flag to write down entry start and end positions
+     def entry_pos_flag=(x)
+       @splitter.entry_pos_flag = x
      end
  

From ngoto at pub.open-bio.org  Wed Mar 22 05:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io/flatfile indexer.rb,1.23,1.24
Message-ID: <200603221019.k2MAJOVL005748@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io/flatfile
In directory pub.open-bio.org:/tmp/cvs-serv5657/lib/bio/io/flatfile

Modified Files:
	indexer.rb 
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.


Index: indexer.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile/indexer.rb,v
retrieving revision 1.23
retrieving revision 1.24
diff -C2 -d -r1.23 -r1.24
*** indexer.rb	22 Feb 2006 08:41:03 -0000	1.23
--- indexer.rb	22 Mar 2006 10:19:22 -0000	1.24
***************
*** 115,118 ****
--- 115,119 ----
              @flatfile = Bio::FlatFile.open(@dbclass, file, 'rb')
              @flatfile.raw = nil
+             @flatfile.entry_pos_flag = true
              @entry = nil
            end


From ngoto at pub.open-bio.org  Wed Mar 22 05:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/doc Changes-0.7.rd,1.16,1.17
Message-ID: <200603221019.k2MAJOVL005750@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/doc
In directory pub.open-bio.org:/tmp/cvs-serv5657/doc

Modified Files:
	Changes-0.7.rd 
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.


Index: Changes-0.7.rd
===================================================================
RCS file: /home/repository/bioruby/bioruby/doc/Changes-0.7.rd,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** Changes-0.7.rd	27 Feb 2006 11:38:14 -0000	1.16
--- Changes-0.7.rd	22 Mar 2006 10:19:22 -0000	1.17
***************
*** 262,265 ****
--- 262,270 ----
    structure (which is not recommended) would not work.
  
+ In 1.0.1:
+ 
+ * Bio::FlatFile#entry_start_pos and #entry_ended_pos are enabled
+   only when Bio::FlatFile#entry_pos_flag is true.
+ 
  === Deleted files
  

From k at portal.open-bio.org  Sat Mar 25 19:38:12 2006
From: k at portal.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 00:38:12 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/shell demo.rb,1.1,1.2
Message-ID: <200603260038.k2Q0cCgZ028442@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/shell
In directory dev.open-bio.org:/tmp/cvs-serv28438/lib/bio/shell

Modified Files:
	demo.rb 
Log Message:
* 1st commit test on dev.open-bio.org after the server migration
* fixed some typos in BioRuby shell demo


Index: demo.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/demo.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** demo.rb	27 Feb 2006 09:33:22 -0000	1.1
--- demo.rb	26 Mar 2006 00:38:10 -0000	1.2
***************
*** 90,94 ****
        run(%q[head ent_1bl8], "Head part of the entry ...", false) &&
        run(%q[savefile("1bl8.pdb", ent_1bl8)], "Saving the original entry in file ...", false) &&
!       run(%q[less "data/1bl8.pdb"], "Look through the entire entry ...", false) &&
        run(%q[pdb_1bl8 = flatparse(ent_1bl8)], "Parsing the entry ...", false) &&
        run(%q[pdb_1bl8.entry_id], "Showing the entry ID ...", true) &&
--- 90,94 ----
        run(%q[head ent_1bl8], "Head part of the entry ...", false) &&
        run(%q[savefile("1bl8.pdb", ent_1bl8)], "Saving the original entry in file ...", false) &&
!       run(%q[disp "data/1bl8.pdb"], "Look through the entire entry ...", false) &&
        run(%q[pdb_1bl8 = flatparse(ent_1bl8)], "Parsing the entry ...", false) &&
        run(%q[pdb_1bl8.entry_id], "Showing the entry ID ...", true) &&
***************
*** 98,112 ****
  
      def pdb_hetdic
!       run(%q[het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read],
!           "Retrieving the het_dic database ...", false) &&
!       run(%q[savefile("data/het_dictionary.txt", het_dic)],
!           "Saving the file ... ", false) &&
        run(%q[het_dic.size], "Bytes of the file ...", true) &&
!       run(%q[less "data/het_dictionary.txt"], "Take a look on the contents ...", true) &&
        run(%q[flatindex("het_dic", "data/het_dictionary.txt")],
            "Creating index to make the seaarchable database ...", false) &&
        run(%q[ethanol = flatsearch("het_dic", "EOH")], "Search an ethanol entry ...", true) &&
        run(%q[osake = flatparse(ethanol)], "Parse the entry ...", true) &&
!       run(%q[sake.conect], "Showing connect table (conect) of the molecule ...", true) &&
        true
      end
--- 98,112 ----
  
      def pdb_hetdic
! #      run(%q[het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read],
! #          "Retrieving the het_dic database ...", false) &&
! #      run(%q[savefile("data/het_dictionary.txt", het_dic)],
! #          "Saving the file ... ", false) &&
        run(%q[het_dic.size], "Bytes of the file ...", true) &&
!       run(%q[disp "data/het_dictionary.txt"], "Take a look on the contents ...", true) &&
        run(%q[flatindex("het_dic", "data/het_dictionary.txt")],
            "Creating index to make the seaarchable database ...", false) &&
        run(%q[ethanol = flatsearch("het_dic", "EOH")], "Search an ethanol entry ...", true) &&
        run(%q[osake = flatparse(ethanol)], "Parse the entry ...", true) &&
!       run(%q[osake.conect], "Showing connect table (conect) of the molecule ...", true) &&
        true
      end


From ngoto at dev.open-bio.org  Tue Mar 28 09:00:50 2006
From: ngoto at dev.open-bio.org (Naohisa Goto)
Date: Tue, 28 Mar 2006 14:00:50 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio command.rb,1.4,1.5
Message-ID: <200603281400.k2SE0oK6024842@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv24822

Modified Files:
	command.rb 
Log Message:
* added "require 'open-uri'"
* removed complicated hacks for open-uri


Index: command.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/command.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** command.rb	20 Mar 2006 10:34:57 -0000	1.4
--- command.rb	28 Mar 2006 14:00:48 -0000	1.5
***************
*** 12,15 ****
--- 12,16 ----
  require 'open3'
  require 'uri'
+ require 'open-uri'
  
  module Bio
***************
*** 157,289 ****
  module NetTools
  
-   # Same as OpenURI.open_uri(*arg).
-   # If open-uri.rb is already loaded, ::OpenURI is used.
-   # Otherwise, internal OpenURI in sandbox is used because
-   # open-uri.rb redefines Kernel.open.
-   def self.open_uri(uri, *arg)
-     if defined? ::OpenURI
-       ::OpenURI.open_uri(uri, *arg)
-     else
-       SandBox.load_openuri_in_sandbox
-       uri = uri.to_s if ::URI::Generic === uri
-       SandBox::OpenURI.open_uri(uri, *arg)
-     end
-   end
- 
    # Same as OpenURI.open_uri(uri).read.
-   # If open-uri.rb is already loaded, ::OpenURI is used.
-   # Otherwise, internal OpenURI in sandbox is used becase
-   # open-uri.rb redefines Kernel.open.
    def self.read_uri(uri)
!     self.open_uri(uri).read
    end
- 
-   # Sandbox to load open-uri.rb.
-   # Internal use only.
-   module SandBox #:nodoc:
- 
-     # Dummy module definition.
-     module Kernel #:nodoc:
-       # dummy method
-       def open(*arg); end #:nodoc:
-     end #module Kernel
-     
-     # a method to find proxy. dummy definition
-     module FindProxy; end #:nodoc:
-     
-     # dummy module definition
-     module OpenURI #:nodoc:
-       module OpenRead; end #:nodoc:
-     end #module OpenURI
-     
-     # Dummy module definition.
-     module URI #:nodoc:
-       class Generic < ::URI::Generic #:nodoc:
-         include SandBox::FindProxy
-       end
-       
-       class HTTPS < ::URI::HTTPS #:nodoc:
-         include SandBox::FindProxy
-         include SandBox::OpenURI::OpenRead
-       end
-       
-       class HTTP  < ::URI::HTTP  #:nodoc:
-         include SandBox::FindProxy
-         include SandBox::OpenURI::OpenRead
-       end
-       
-       class FTP  < ::URI::FTP    #:nodoc:
-         include SandBox::FindProxy
-         include SandBox::OpenURI::OpenRead
-       end
-       
-       # parse and new. internal use only.
-       def self.__parse_and_new__(klass, uri) #:nodoc:
-         scheme, userinfo, host, port,
-         registry, path, opaque, query, fragment = ::URI.split(uri)
-         klass.new(scheme, userinfo, host, port,
-                   registry, path, opaque, query,
-                   fragment)
-       end
-       private_class_method :__parse_and_new__
-       
-       # same as ::URI.parse. internal use only.
-       def self.parse(uri) #:nodoc:
-         r = ::URI.parse(uri)
-         case r
-         when ::URI::HTTPS
-           __parse_and_new__(HTTPS, uri)
-         when ::URI::HTTP
-           __parse_and_new__(HTTP, uri)
-         when ::URI::FTP
-           __parse_and_new__(FTP, uri)
-         else
-           r
-         end
-       end
-     end #module URI
-     
-     @load_openuri = nil
-     # load open-uri.rb in SandBox module.
-     def self.load_openuri_in_sandbox #:nodoc:
-       return if @load_openuri
-       fn = nil
-       unless $:.find do |x|
-           fn = File.join(x, 'open-uri.rb')
-           FileTest.exist?(fn)
-         end then
-         warn('Warning: cannot find open-uri.rb in $LOAD_PATH')
-       else
-         # reading open-uri.rb
-         str = File.read(fn)
-         # eval open-uri.rb contents in SandBox module
-         module_eval(str)
-         
-         # finds 'find_proxy' method
-         find_proxy_lines = nil
-         flag = nil
-         endstr = nil
-         str.each do |line|
-           if flag then
-             find_proxy_lines << line
-             if endstr == line[0, endstr.length] and
-                 /^\s+end(\s+.*)?$/ =~ line then
-               break
-             end
-           elsif /^(\s+)def\s+find_proxy(\s+.*)?$/ =~ line then
-             flag = true
-             endstr = "#{$1}end"
-             find_proxy_lines = line 
-           end
-         end
-         if find_proxy_lines
-           module_eval("module FindProxy;\n#{find_proxy_lines}\n;end\n")
-         else
-           warn('Warning: cannot find find_proxy method in open-uri.rb.')
-         end
-         @load_openuri = true
-       end
-     end
-   end #module SandBox
  end #module NetTools
  
--- 158,165 ----
  module NetTools
  
    # Same as OpenURI.open_uri(uri).read.
    def self.read_uri(uri)
!     OpenURI.open_uri(uri).read
    end
  end #module NetTools
  

From k at dev.open-bio.org  Sat Mar 25 21:28:01 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:28:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio sequence.rb,0.56,0.57
Message-ID: <200603260228.k2Q2S1uq028859@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv28853

Modified Files:
	sequence.rb 
Log Message:
* comprehensive documentations contributed by Ryan Raaum and Jan Aerts are added.
* bug fixes in sequence.rb contributed by Ryan Raaum
  * Added 'U' and 'u' to the bases counted towards the nucleic acid total in Bio::Sequence#guess.  (Without this, RNA sequences were "guessed" to be Amino Acid sequences).
  * Changed the arguments for method_missing in Bio::Sequence from (*arg) to (sym, *args, &block).  With this argument set, blocks will be properly passed through to the encapsulated object.


Index: sequence.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence.rb,v
retrieving revision 0.56
retrieving revision 0.57
diff -C2 -d -r0.56 -r0.57
*** sequence.rb	17 Feb 2006 17:15:08 -0000	0.56
--- sequence.rb	26 Mar 2006 02:27:59 -0000	0.57
***************
*** 5,9 ****
  #               Toshiaki Katayama <k at bioruby.org>,
  #               Yoshinori K. Okuji <okuji at enbug.org>,
! #               Naohisa Goto <ng at bioruby.org>
  # License::     Ruby's
  #
--- 5,11 ----
  #               Toshiaki Katayama <k at bioruby.org>,
  #               Yoshinori K. Okuji <okuji at enbug.org>,
! #               Naohisa Goto <ng at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>,
! #               Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # License::     Ruby's
  #
***************
*** 15,18 ****
--- 17,67 ----
  module Bio
  
+ # = DESCRIPTION
+ # Bio::Sequence objects represent annotated sequences in bioruby.
+ # A Bio::Sequence object is a wrapper around the actual sequence, 
+ # represented as either a Bio::Sequence::NA or a Bio::Sequence::AA object.
+ # For most users, this encapsulation will be completely transparent.
+ # Bio::Sequence responds to all methods defined for Bio::Sequence::NA/AA
+ # objects using the same arguments and returning the same values (even though 
+ # these methods are not documented specifically for Bio::Sequence).
+ #
+ # = USAGE
+ #   # Create a nucleic or amino acid sequence
+ #   dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
+ #   rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
+ #   aa = Bio::Sequence.auto('ACDEFGHIKLMNPQRSTVWYU')
+ # 
+ #   # Print it out
+ #   puts dna.to_s
+ #   puts aa.to_s
+ # 
+ #   # Get a subsequence, bioinformatics style (first nucleotide is '1')
+ #   puts dna.subseq(2,6)
+ # 
+ #   # Get a subsequence, informatics style (first nucleotide is '0')
+ #   puts dna[2,6]
+ # 
+ #   # Print in FASTA format
+ #   puts dna.output(:fasta)
+ # 
+ #   # Print all codons
+ #   dna.window_search(3,3) do |codon|
+ #     puts codon
+ #   end
+ # 
+ #   # Splice or otherwise mangle your sequence
+ #   puts dna.splicing("complement(join(1..5,16..20))")
+ #   puts rna.splicing("complement(join(1..5,16..20))")
+ # 
+ #   # Convert a sequence containing ambiguity codes into a 
+ #   # regular expression you can use for subsequent searching
+ #   puts aa.to_re
+ # 
+ #   # These should speak for themselves
+ #   puts dna.complement
+ #   puts dna.composition
+ #   puts dna.molecular_weight
+ #   puts dna.translate
+ #   puts dna.gc_percent
  class Sequence
  
***************
*** 23,37 ****
    autoload :Format,  'bio/sequence/format'
  
    def initialize(str)
      @seq = str
    end
  
!   def method_missing(*arg)
!     @seq.send(*arg)
    end
! 
!   attr_accessor :entry_id, :definition, :features, :references, :comments,
!     :date, :keywords, :dblinks, :taxonomy, :moltype, :seq
! 
    def output(style)
      extend Bio::Sequence::Format
--- 72,151 ----
    autoload :Format,  'bio/sequence/format'
  
+   # Create a new Bio::Sequence object
+   #
+   #   s = Bio::Sequence.new('atgc')
+   #   puts s                                  #=> 'atgc'
+   #
+   # Note that this method does not intialize the contained sequence
+   # as any kind of bioruby object, only as a simple string
+   #
+   #   puts s.seq.class                        #=> String
+   #
+   # See Bio::Sequence#na, Bio::Sequence#aa, and Bio::Sequence#auto 
+   # for methods to transform the basic String of a just created 
+   # Bio::Sequence object to a proper bioruby object
+   # ---
+   # *Arguments*:
+   # * (required) _str_: String or Bio::Sequence::NA/AA object
+   # *Returns*:: Bio::Sequence object
    def initialize(str)
      @seq = str
    end
  
!   # Pass any unknown method calls to the wrapped sequence object.  see
!   # http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing
!   def method_missing(sym, *args, &block) #:nodoc:
!     @seq.send(sym, *args, &block)
    end
!   
!   # The sequence identifier.  For example, for a sequence
!   # of Genbank origin, this is the accession number.
!   attr_accessor :entry_id
!   
!   # A String with a description of the sequence
!   attr_accessor :definition
!   
!   # An Array of Bio::Feature objects
!   attr_accessor :features
!   
!   # An Array of Bio::Reference objects
!   attr_accessor :references
!   
!   # A comment String
!   attr_accessor :comments
!   
!   # Date from sequence source. Often date of deposition.
!   attr_accessor :date
!   
!   # An Array of Strings
!   attr_accessor :keywords
!   
!   # An Array of Strings; links to other database entries.
!   attr_accessor :dblinks
!   
!   # A taxonomy String
!   attr_accessor :taxonomy
!   
!   # Bio::Sequence::NA/AA
!   attr_accessor :moltype
!   
!   # The sequence object, usually Bio::Sequence::NA/AA, 
!   # but could be a simple String
!   attr_accessor :seq
!   
!   # Using Bio::Sequence::Format, return a String with the Bio::Sequence
!   # object formatted in the given style.
!   #
!   # Formats currently implemented are: 'fasta', 'genbank', and 'embl'
!   #
!   #   s = Bio::Sequence.new('atgc')
!   #   puts s.output(:fasta)                   #=> "> \natgc\n"
!   #
!   # The style argument is given as a Ruby 
!   # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html)
!   # ---
!   # *Arguments*: 
!   # * (required) _style_: :fasta, :genbank, *or* :embl
!   # *Returns*:: String object
    def output(style)
      extend Bio::Sequence::Format
***************
*** 48,51 ****
--- 162,175 ----
    end
  
+   # Guess the type of sequence, Amino Acid or Nucleic Acid, and create a 
+   # new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis
+   # of this guess.  This method will change the current Bio::Sequence object.
+   #
+   #   s = Bio::Sequence.new('atgc')
+   #   puts s.seq.class                        #=> String
+   #   s.auto
+   #   puts s.seq.class                        #=> Bio::Sequence::NA
+   # ---
+   # *Returns*:: Bio::Sequence::NA/AA object
    def auto
      @moltype = guess
***************
*** 57,60 ****
--- 181,194 ----
    end
  
+   # Given a sequence String, guess its type, Amino Acid or Nucleic Acid, and
+   # return a new Bio::Sequence object wrapping a sequence of the guessed type
+   # (either Bio::Sequence::AA or Bio::Sequence::NA)
+   # 
+   #   s = Bio::Sequence.auto('atgc')
+   #   puts s.seq.class                        #=> Bio::Sequence::NA
+   # ---
+   # *Arguments*:
+   # * (required) _str_: String *or* Bio::Sequence::NA/AA object
+   # *Returns*:: Bio::Sequence object
    def self.auto(str)
      seq = self.new(str)
***************
*** 63,74 ****
    end
  
    def guess(threshold = 0.9, length = 10000, index = 0)
      str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
      cmp = str.composition
  
!     bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + 
!             cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
  
!     total = @seq.length - cmp['N'] - cmp['n']
  
      if bases.to_f / total > threshold
--- 197,247 ----
    end
  
+   # Guess the class of the current sequence.  Returns the class
+   # (Bio::Sequence::AA or Bio::Sequence::NA) guessed.  In general, used by
+   # developers only, but if you know what you are doing, feel free.
+   # 
+   #   s = Bio::Sequence.new('atgc')
+   #   puts s.guess                            #=> Bio::Sequence::NA
+   #
+   # There are three parameters: `threshold`, `length`, and `index`.  
+   #
+   # The `threshold` value (defaults to 0.9) is the frequency of 
+   # nucleic acid bases [AGCTUagctu] required in the sequence for this method
+   # to produce a Bio::Sequence::NA "guess".  In the default case, if less
+   # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
+   # then the guess is Bio::Sequence::AA.
+   # 
+   #   s = Bio::Sequence.new('atgcatgcqq')
+   #   puts s.guess                            #=> Bio::Sequence::AA
+   #   puts s.guess(0.8)                       #=> Bio::Sequence::AA
+   #   puts s.guess(0.7)                       #=> Bio::Sequence::NA
+   #
+   # The `length` value is how much of the total sequence to use in the
+   # guess (default 10000).  If your sequence is very long, you may 
+   # want to use a smaller amount to reduce the computational burden.
+   #
+   #   s = Bio::Sequence.new(A VERY LONG SEQUENCE)
+   #   puts s.guess(0.9, 1000)  # limit the guess to the first 1000 positions
+   #
+   # The `index` value is where to start the guess.  Perhaps you know there
+   # are a lot of gaps at the start...
+   #
+   #   s = Bio::Sequence.new('-----atgcc')
+   #   puts s.guess                            #=> Bio::Sequence::AA
+   #   puts s.guess(0.9,10000,5)               #=> Bio::Sequence::NA
+   # ---
+   # *Arguments*:
+   # * (optional) _threshold_: Float in range 0,1 (default 0.9)
+   # * (optional) _length_: Fixnum (default 10000)
+   # * (optional) _index_: Fixnum (default 1)
+   # *Returns*:: Bio::Sequence::NA/AA
    def guess(threshold = 0.9, length = 10000, index = 0)
      str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
      cmp = str.composition
  
!     bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] +
!             cmp['a'] + cmp['t'] + cmp['g'] + cmp['c'] + cmp['u']
  
!     total = str.length - cmp['N'] - cmp['n']
  
      if bases.to_f / total > threshold
***************
*** 79,86 ****
--- 252,312 ----
    end 
  
+   # Guess the class of a given sequence.  Returns the class
+   # (Bio::Sequence::AA or Bio::Sequence::NA) guessed.  In general, used by
+   # developers only, but if you know what you are doing, feel free.
+   # 
+   #   puts .guess('atgc')        #=> Bio::Sequence::NA
+   #
+   # There are three optional parameters: `threshold`, `length`, and `index`.  
+   #
+   # The `threshold` value (defaults to 0.9) is the frequency of 
+   # nucleic acid bases [AGCTUagctu] required in the sequence for this method
+   # to produce a Bio::Sequence::NA "guess".  In the default case, if less
+   # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
+   # then the guess is Bio::Sequence::AA.
+   # 
+   #   puts Bio::Sequence.guess('atgcatgcqq')      #=> Bio::Sequence::AA
+   #   puts Bio::Sequence.guess('atgcatgcqq', 0.8) #=> Bio::Sequence::AA
+   #   puts Bio::Sequence.guess('atgcatgcqq', 0.7) #=> Bio::Sequence::NA
+   #
+   # The `length` value is how much of the total sequence to use in the
+   # guess (default 10000).  If your sequence is very long, you may 
+   # want to use a smaller amount to reduce the computational burden.
+   #
+   #   # limit the guess to the first 1000 positions
+   #   puts Bio::Sequence.guess('A VERY LONG SEQUENCE', 0.9, 1000)  
+   #
+   # The `index` value is where to start the guess.  Perhaps you know there
+   # are a lot of gaps at the start...
+   #
+   #   puts Bio::Sequence.guess('-----atgcc')             #=> Bio::Sequence::AA
+   #   puts Bio::Sequence.guess('-----atgcc',0.9,10000,5) #=> Bio::Sequence::NA
+   # ---
+   # *Arguments*:
+   # * (required) _str_: String *or* Bio::Sequence::NA/AA object
+   # * (optional) _threshold_: Float in range 0,1 (default 0.9)
+   # * (optional) _length_: Fixnum (default 10000)
+   # * (optional) _index_: Fixnum (default 1)
+   # *Returns*:: Bio::Sequence::NA/AA
    def self.guess(str, *args)
      self.new(str).guess(*args)
    end
  
+   # Transform the sequence wrapped in the current Bio::Sequence object
+   # into a Bio::Sequence::NA object.  This method will change the current
+   # object.  This method does not validate your choice, so be careful!
+   #
+   #   s = Bio::Sequence.new('RRLE')
+   #   puts s.seq.class                        #=> String
+   #   s.na
+   #   puts s.seq.class                        #=> Bio::Sequence::NA !!!
+   #
+   # However, if you know your sequence type, this method may be 
+   # constructively used after initialization,
+   #
+   #   s = Bio::Sequence.new('atgc')
+   #   s.na
+   # ---
+   # *Returns*:: Bio::Sequence::NA
    def na
      @seq = NA.new(@seq)
***************
*** 88,96 ****
    end
  
    def aa
      @seq = AA.new(@seq)
      @moltype = AA
    end
! 
  end # Sequence
  
--- 314,338 ----
    end
  
+   # Transform the sequence wrapped in the current Bio::Sequence object
+   # into a Bio::Sequence::NA object.  This method will change the current
+   # object.  This method does not validate your choice, so be careful!
+   #
+   #   s = Bio::Sequence.new('atgc')
+   #   puts s.seq.class                        #=> String
+   #   s.aa
+   #   puts s.seq.class                        #=> Bio::Sequence::AA !!!
+   #
+   # However, if you know your sequence type, this method may be 
+   # constructively used after initialization,
+   #
+   #   s = Bio::Sequence.new('RRLE')
+   #   s.aa
+   # ---
+   # *Returns*:: Bio::Sequence::AA
    def aa
      @seq = AA.new(@seq)
      @moltype = AA
    end
!   
  end # Sequence
  

From k at dev.open-bio.org  Sat Mar 25 21:32:58 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:32:58 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio reference.rb,1.21,1.22
Message-ID: <200603260232.k2Q2Ww61028892@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv28888

Modified Files:
	reference.rb 
Log Message:
* comprehensive documentation contributed by Ryan Raaum is added


Index: reference.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/reference.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** reference.rb	8 Feb 2006 15:06:26 -0000	1.21
--- reference.rb	26 Mar 2006 02:32:56 -0000	1.22
***************
*** 2,45 ****
  # = bio/reference.rb - Journal reference classes
  #
! # Copyright::   Copyright (C) 2001 
! #               KATAYAMA Toshiaki <k at bioruby.org>
! # Lisence::     LGPL
  #
  # $Id$
  #
- # == Description
- # 
- # Journal reference classes.
- #
- # == Examples
- #
- # == References
- #
- # 
- #
- #--
- #
- #  This library is free software; you can redistribute it and/or
- #  modify it under the terms of the GNU Lesser General Public
- #  License as published by the Free Software Foundation; either
- #  version 2 of the License, or (at your option) any later version.
- #
- #  This library is distributed in the hope that it will be useful,
- #  but WITHOUT ANY WARRANTY; without even the implied warranty of
- #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- #  Lesser General Public License for more details.
- #
- #  You should have received a copy of the GNU Lesser General Public
- #  License along with this library; if not, write to the Free Software
- #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
- #
- #++
- #
  
  module Bio
  
    # A class for journal reference information.
    #
!   # === Examples
    # 
    #    hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], 
--- 2,20 ----
  # = bio/reference.rb - Journal reference classes
  #
! # Copyright::   Copyright (C) 2001, 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
! # Lisence::     Ruby's
  #
  # $Id$
  #
  
  module Bio
  
+   # = DESCRIPTION
+   #
    # A class for journal reference information.
    #
!   # = USAGE
    # 
    #    hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], 
***************
*** 69,100 ****
      attr_reader :authors
  
!     # "Title of the study."
      attr_reader :title
  
!     # "Theor. J. Hoge"
      attr_reader :journal
  
!     # 12
      attr_reader :volume
      
!     # 3
      attr_reader :issue
  
!     # "123-145"
      attr_reader :pages
  
!     # 2001
      attr_reader :year
  
!     # 12345678
      attr_reader :pubmed
  
!     # 98765432
      attr_reader :medline
      
!     # Abstract test in String.
      attr_reader :abstract
  
!     # A URL String.
      attr_reader :url
  
--- 44,75 ----
      attr_reader :authors
  
!     # String with title of the study
      attr_reader :title
  
!     # String with journal name
      attr_reader :journal
  
!     # volume number (typically Fixnum)
      attr_reader :volume
      
!     # issue number (typically Fixnum)
      attr_reader :issue
  
!     # page range (typically String, e.g. "123-145")
      attr_reader :pages
  
!     # year of publication (typically Fixnum)
      attr_reader :year
  
!     # pubmed identifier (typically Fixnum)
      attr_reader :pubmed
  
!     # medline identifier (typically Fixnum)
      attr_reader :medline
      
!     # Abstract text in String.
      attr_reader :abstract
  
!     # An URL String.
      attr_reader :url
  
***************
*** 105,109 ****
      attr_reader :affiliations
  
!     # 
      def initialize(hash)
        hash.default = ''
--- 80,119 ----
      attr_reader :affiliations
  
!     # Create a new Bio::Reference object from a Hash of values. 
!     # Data is extracted from the values for keys:
!     #
!     # * authors - expected value: Array of Strings
!     # * title - expected value: String
!     # * journal - expected value: String
!     # * volume - expected value: Fixnum or String
!     # * issue - expected value: Fixnum or String
!     # * pages - expected value: String
!     # * year - expected value: Fixnum or String
!     # * pubmed - expected value: Fixnum or String
!     # * medline - expected value: Fixnum or String
!     # * abstract - expected value: String
!     # * url - expected value: String
!     # * mesh - expected value: Array of Strings
!     # * affiliations - expected value: Array of Strings
!     #
!     #
!     #    hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], 
!     #            'title' => "Title of the study.",
!     #            'journal' => "Theor. J. Hoge", 
!     #            'volume' => 12, 
!     #            'issue' => 3, 
!     #            'pages' => "123-145",
!     #            'year' => 2001, 
!     #            'pubmed' => 12345678, 
!     #            'medline' => 98765432, 
!     #            'abstract' => "Hoge fuga. ...",
!     #            'url' => "http://example.com", 
!     #            'mesh' => [], 
!     #            'affiliations' => []}
!     #    ref = Bio::Reference.new(hash)
!     # ---
!     # *Arguments*:
!     # * (required) _hash_: Hash
!     # *Returns*:: Bio::Reference object
      def initialize(hash)
        hash.default = ''
***************
*** 131,138 ****
      # 0. nil - general
      # 1. endnote - Endnote
!     # 2. bibitem - Bibitem (option acceptable)
!     # 3. bibtex - BiBTeX (option acceptable)
!     # 4. rd - rd (option acceptable)
!     # 5. nature - Nature (option acceptable)
      # 6. science - Science
      # 7. genome_biol - Genome Biology
--- 141,148 ----
      # 0. nil - general
      # 1. endnote - Endnote
!     # 2. bibitem - Bibitem (option available)
!     # 3. bibtex - BiBTeX (option available)
!     # 4. rd - rd (option available)
!     # 5. nature - Nature (option available)
      # 6. science - Science
      # 7. genome_biol - Genome Biology
***************
*** 142,145 ****
--- 152,172 ----
      # 11. trends - Trends in *
      # 12. cell - Cell Press
+     #
+     # See individual methods for details. Basic usage is:
+     #
+     #   # ref is Bio::Reference object
+     #   # using simplest possible call (for general style)
+     #   puts ref.format
+     #   
+     #   # output in Nature style
+     #   puts ref.format("nature")      # alternatively, puts ref.nature
+     #
+     #   # output in Nature short style (see Bio::Reference#nature)
+     #   puts ref.format("nature",true) # alternatively, puts ref.nature(true)
+     # ---
+     # *Arguments*:
+     # * (optional) _style_: String with style identifier
+     # * (optional) _option_: Option for styles accepting one
+     # *Returns*:: String
      def format(style = nil, option = nil)
        case style
***************
*** 173,177 ****
      end
  
!     # Formats in the Endonote style.
      def endnote
        lines = []
--- 200,222 ----
      end
  
!     # Returns reference formatted in the Endnote style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.endnote
!     #
!     #     %0 Journal Article
!     #     %A Hoge, J.P.
!     #     %A Fuga, F.B.
!     #     %D 2001
!     #     %T Title of the study.
!     #     %J Theor. J. Hoge
!     #     %V 12
!     #     %N 3
!     #     %P 123-145
!     #     %M 12345678
!     #     %U http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&dopt=Citation&list_uids=12345678
!     #     %X Hoge fuga. ...
!     # ---
!     # *Returns*:: String
      def endnote
        lines = []
***************
*** 201,205 ****
      end
  
!     # Formats in the bibitem.
      def bibitem(item = nil)
        item  = "PMID:#{@pubmed}" unless item
--- 246,260 ----
      end
  
!     # Returns reference formatted in the bibitem style
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.bibitem
!     #
!     #     \bibitem{PMID:12345678}
!     #     Hoge, J.P., Fuga, F.B.
!     #     Title of the study.,
!     #     {\em Theor. J. Hoge}, 12(3):123--145, 2001.
!     # ---
!     # *Returns*:: String
      def bibitem(item = nil)
        item  = "PMID:#{@pubmed}" unless item
***************
*** 213,217 ****
      end
  
!     # Formats in the BiBTeX style.
      def bibtex(section = nil)
        section = "article" unless section
--- 268,303 ----
      end
  
!     # Returns reference formatted in the BiBTeX style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.bibtex
!     #
!     #     @article{PMID:12345678,
!     #       author  = {Hoge, J.P. and Fuga, F.B.},
!     #       title   = {Title of the study.},
!     #       journal = {Theor. J. Hoge},
!     #       year    = {2001},
!     #       volume  = {12},
!     #       number  = {3},
!     #       pages   = {123--145},
!     #     }
!     #
!     #   # using a different section (e.g. "book")
!     #   # (but not really configured for anything other than articles)
!     #   puts ref.bibtex("book")
!     #
!     #     @book{PMID:12345678,
!     #       author  = {Hoge, J.P. and Fuga, F.B.},
!     #       title   = {Title of the study.},
!     #       journal = {Theor. J. Hoge},
!     #       year    = {2001},
!     #       volume  = {12},
!     #       number  = {3},
!     #       pages   = {123--145},
!     #     }    
!     # ---
!     # *Arguments*:
!     # * (optional) _section_: BiBTeX section as String
!     # *Returns*:: String
      def bibtex(section = nil)
        section = "article" unless section
***************
*** 231,235 ****
      end
  
!     # Formats in a general style.                
      def general
        authors = @authors.join(', ')
--- 317,328 ----
      end
  
!     # Returns reference formatted in a general/generic style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.general
!     #
!     #     Hoge, J.P., Fuga, F.B. (2001). "Title of the study." Theor. J. Hoge 12:123-145.
!     # ---
!     # *Returns*:: String
      def general
        authors = @authors.join(', ')
***************
*** 237,241 ****
      end
  
!     # Formats in the RD style.
      def rd(str = nil)
        @abstract ||= str
--- 330,351 ----
      end
  
!     # Return reference formatted in the RD style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.rd
!     #
!     #     == Title of the study.
!     #     
!     #     * Hoge, J.P. and Fuga, F.B.
!     #     
!     #     * Theor. J. Hoge 2001 12:123-145 [PMID:12345678]
!     #     
!     #     Hoge fuga. ...
!     #
!     # An optional string argument can be supplied, but does nothing.
!     # ---
!     # *Arguments*:
!     # * (optional) str: String (default nil)
!     # *Returns*:: String
      def rd(str = nil)
        @abstract ||= str
***************
*** 248,253 ****
      end
  
!     # Formats in the Nature Publish Group style.
!     # * http://www.nature.com
      def nature(short = false)
        if short
--- 358,377 ----
      end
  
!     # Formats in the Nature Publishing Group 
!     # (http://www.nature.com) style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.nature
!     #
!     #     Hoge, J.P. & Fuga, F.B. Title of the study. Theor. J. Hoge 12, 123-145 (2001).
!     #
!     #   # optionally, output short version
!     #   puts ref.nature(true)  # or puts ref.nature(short=true)
!     #
!     #     Hoge, J.P. & Fuga, F.B. Theor. J. Hoge 12, 123-145 (2001).
!     # ---
!     # *Arguments*:
!     # * (optional) _short_: Boolean (default false)
!     # *Returns*:: String
      def nature(short = false)
        if short
***************
*** 266,271 ****
      end
  
!     # Formats in the Science style.
!     # * http://www.siencemag.com/
      def science
        if @authors.size > 4
--- 390,402 ----
      end
  
!     # Returns reference formatted in the 
!     # Science[http://www.sciencemag.org] style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.science
!     #
!     #     J.P. Hoge, F.B. Fuga, Theor. J. Hoge 12 123 (2001).
!     # ---
!     # *Returns*:: String
      def science
        if @authors.size > 4
***************
*** 278,283 ****
      end
  
!     # Formats in the Genome Biology style.
!     # * http://genomebiology.com/
      def genome_biol
        authors = @authors.collect {|name| strip_dots(name)}.join(', ')
--- 409,421 ----
      end
  
!     # Returns reference formatted in the Genome Biology 
!     # (http://genomebiology.com) style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.genome_biol
!     #
!     #     Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.
!     # ---
!     # *Returns*:: String
      def genome_biol
        authors = @authors.collect {|name| strip_dots(name)}.join(', ')
***************
*** 285,294 ****
        "#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
      end
!     # Formats in the Current Biology style.
!     # * http://www.current-biology.com/
!     alias current genome_biol
  
!     # Formats in the Genome Research style.
!     # * http://genome.org/
      def genome_res
        authors = authors_join(' and ')
--- 423,450 ----
        "#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
      end
!     
!     # Returns reference formatted in the Current Biology 
!     # (http://current-biology.com) style. (Same as the Genome Biology style)
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.current
!     #
!     #     Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.
!     # ---
!     # *Returns*:: String
!     def current 
!       self.genome_biol
!     end
  
!     # Returns reference formatted in the Genome Research 
!     # (http://genome.org) style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.genome_res
!     #
!     #     Hoge, J.P. and Fuga, F.B. 2001.
!     #       Title of the study. Theor. J. Hoge 12: 123-145.
!     # ---
!     # *Returns*:: String
      def genome_res
        authors = authors_join(' and ')
***************
*** 296,301 ****
      end
  
!     # Formats in the Nucleic Acids Reseach style.
!     # * http://nar.oxfordjournals.org/
      def nar
        authors = authors_join(' and ')
--- 452,464 ----
      end
  
!     # Returns reference formatted in the Nucleic Acids Reseach 
!     # (http://nar.oxfordjournals.org) style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.nar
!     #
!     #     Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge, 12, 123-145.
!     # ---
!     # *Returns*:: String
      def nar
        authors = authors_join(' and ')
***************
*** 303,308 ****
      end
  
!     # Formats in the CELL Press style.
!     # http://www.cell.com/
      def cell
        authors = authors_join(' and ')
--- 466,478 ----
      end
  
!     # Returns reference formatted in the 
!     # CELL[http://www.cell.com] Press style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.cell
!     #
!     #     Hoge, J.P. and Fuga, F.B. (2001). Title of the study. Theor. J. Hoge 12, 123-145.
!     # ---
!     # *Returns*:: String
      def cell
        authors = authors_join(' and ')
***************
*** 310,315 ****
      end
      
!     # Formats in the TRENDS Journals.
!     # * http://www.trends.com/
      def trends
        if @authors.size > 2
--- 480,492 ----
      end
      
!     # Returns reference formatted in the 
!     # TRENDS[http://www.trends.com] style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.trends
!     #
!     #     Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge 12, 123-145
!     # ---
!     # *Returns*:: String
      def trends
        if @authors.size > 2
***************
*** 352,358 ****
    end
  
!   # Set of Bio::Reference.
    #
!   # === Examples
    #
    #   refs = Bio::References.new
--- 529,537 ----
    end
  
!   # = DESCRIPTION
    #
!   # A container class for Bio::Reference objects.
!   #
!   # = USAGE
    #
    #   refs = Bio::References.new
***************
*** 364,371 ****
    class References
  
!     # Array of Bio::Reference.
      attr_accessor :references
  
      # 
      def initialize(ary = [])
        @references = ary
--- 543,556 ----
    class References
  
!     # Array of Bio::Reference objects
      attr_accessor :references
  
+     # Create a new Bio::References object
      # 
+     #   refs = Bio::References.new
+     # ---
+     # *Arguments*:
+     # * (optional) __: Array of Bio::Reference objects
+     # *Returns*:: Bio::References object
      def initialize(ary = [])
        @references = ary
***************
*** 373,377 ****
  
  
!     # Append a Bio::Reference object.
      def append(reference)
        @references.push(reference) if reference.is_a? Reference
--- 558,568 ----
  
  
!     # Add a Bio::Reference object to the container.
!     #
!     #   refs.append(reference)
!     # ---
!     # *Arguments*:
!     # * (required) _reference_: Bio::Reference object
!     # *Returns*:: current Bio::References object
      def append(reference)
        @references.push(reference) if reference.is_a? Reference
***************
*** 379,383 ****
      end
  
!     # Iterates each Bio::Reference object.
      def each
        @references.each do |reference|
--- 570,580 ----
      end
  
!     # Iterate through Bio::Reference objects.
!     #
!     #   refs.each do |reference|
!     #     ...
!     #   end
!     # ---
!     # *Block*:: yields each Bio::Reference object
      def each
        @references.each do |reference|


From k at dev.open-bio.org  Sat Mar 25 21:28:01 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:28:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/sequence aa.rb, 1.2, 1.3 common.rb,
	1.2, 1.3 compat.rb, 1.2, 1.3 format.rb, 1.2, 1.3 generic.rb,
	1.3, 1.4 na.rb, 1.2, 1.3
Message-ID: <200603260228.k2Q2S12v028863@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/sequence
In directory dev.open-bio.org:/tmp/cvs-serv28853/sequence

Modified Files:
	aa.rb common.rb compat.rb format.rb generic.rb na.rb 
Log Message:
* comprehensive documentations contributed by Ryan Raaum and Jan Aerts are added.
* bug fixes in sequence.rb contributed by Ryan Raaum
  * Added 'U' and 'u' to the bases counted towards the nucleic acid total in Bio::Sequence#guess.  (Without this, RNA sequences were "guessed" to be Amino Acid sequences).
  * Changed the arguments for method_missing in Bio::Sequence from (*arg) to (sym, *args, &block).  With this argument set, blocks will be properly passed through to the encapsulated object.


Index: compat.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/compat.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** compat.rb	6 Feb 2006 14:18:03 -0000	1.2
--- compat.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 18,21 ****
--- 19,33 ----
    autoload :AA,     'bio/sequence/aa'
  
+   # Return sequence as 
+   # String[http://corelib.rubyonrails.org/classes/String.html].
+   # The original sequence is unchanged.
+   #
+   #   seq = Bio::Sequence.new('atgc')
+   #   puts s.to_s                             #=> 'atgc'
+   #   puts s.to_s.class                       #=> String
+   #   puts s                                  #=> 'atgc'
+   #   puts s.class                            #=> Bio::Sequence
+   # ---
+   # *Returns*:: String object
    def to_s
      String.new(@seq)
***************
*** 26,32 ****
--- 38,51 ----
  module Common
  
+   # *DEPRECIATED* Do not use! Use Bio::Sequence#output instead. 
+   # 
    # Output the FASTA format string of the sequence.  The 1st argument is
    # used as the comment string.  If the 2nd option is given, the output
    # sequence will be folded.
+   # ---
+   # *Arguments*:
+   # * (optional) _header_: String object
+   # * (optional) _width_: Fixnum object (default nil)
+   # *Returns*:: String
    def to_fasta(header = '', width = nil)
      warn "Bio::Sequence#to_fasta is obsolete. Use Bio::Sequence#output(:fasta) instead" if $DEBUG
***************
*** 44,52 ****
  class NA
  
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   def pikachu
      self.dna.tr("atgc", "pika") # joke, of course :-)
    end
--- 63,89 ----
  class NA
  
+   # Generate a new random sequence with the given frequency of bases.
+   # The sequence length is determined by their cumulative sum.
+   # (See also Bio::Sequence::Common#randomize which creates a new
+   # randomized sequence object using the base composition of an existing 
+   # sequence instance).
+   #
+   #   counts = {'a'=>1,'c'=>2,'g'=>3,'t'=>4}
+   #   puts Bio::Sequence::NA.randomize(counts)  #=> "ggcttgttac" (for example)
+   #
+   # You may also feed the output of randomize into a block
+   #
+   #   actual_counts = {'a'=>0, 'c'=>0, 'g'=>0, 't'=>0}
+   #   Bio::Sequence::NA.randomize(counts) {|x| actual_counts[x] += 1}
+   #   actual_counts                     #=> {"a"=>1, "c"=>2, "g"=>3, "t"=>4}
+   # ---
+   # *Arguments*:
+   # * (optional) _hash_: Hash object
+   # *Returns*:: Bio::Sequence::NA object
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   def pikachu #:nodoc:
      self.dna.tr("atgc", "pika") # joke, of course :-)
    end
***************
*** 57,60 ****
--- 94,115 ----
  class AA
  
+   # Generate a new random sequence with the given frequency of bases.
+   # The sequence length is determined by their cumulative sum.
+   # (See also Bio::Sequence::Common#randomize which creates a new
+   # randomized sequence object using the base composition of an existing 
+   # sequence instance).
+   #
+   #   counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
+   #   puts Bio::Sequence::AA.randomize(counts)  #=> "AAEAELALRE" (for example)
+   #
+   # You may also feed the output of randomize into a block
+   #
+   #   actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
+   #   Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
+   #   actual_counts                     #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
+   # ---
+   # *Arguments*:
+   # * (optional) _hash_: Hash object
+   # *Returns*:: Bio::Sequence::AA object
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)

Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/common.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** common.rb	6 Feb 2006 14:16:17 -0000	1.2
--- common.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 15,22 ****
  class Sequence
  
! # This module provides common methods for biological sequence classes
! # which must inherit String.
  module Common
  
    def to_s
      String.new(self)
--- 16,53 ----
  class Sequence
  
! # = DESCRIPTION
! # Bio::Sequence::Common is a 
! # Mixin[http://www.rubycentral.com/book/tut_modules.html]
! # implementing methods common to
! # Bio::Sequence::AA and Bio::Sequence::NA.  All of these methods
! # are available to either Amino Acid or Nucleic Acid sequences, and
! # by encapsulation are also available to Bio::Sequence objects.
! #
! # = USAGE
! #
! #   # Create a sequence
! #   dna = Bio::Sequence.auto('atgcatgcatgc')
! #
! #   # Splice out a subsequence using a Genbank-style location string
! #   puts dna.splice('complement(1..4)')
! #
! #   # What is the base composition?
! #   puts dna.composition
! #
! #   # Create a random sequence with the composition of a current sequence
! #   puts dna.randomize
  module Common
  
+   # Return sequence as 
+   # String[http://corelib.rubyonrails.org/classes/String.html].
+   # The original sequence is unchanged.
+   #
+   #   seq = Bio::Sequence::NA.new('atgc')
+   #   puts s.to_s                             #=> 'atgc'
+   #   puts s.to_s.class                       #=> String
+   #   puts s                                  #=> 'atgc'
+   #   puts s.class                            #=> Bio::Sequence::NA
+   # ---
+   # *Returns*:: String object
    def to_s
      String.new(self)
***************
*** 24,34 ****
    alias to_str to_s
  
!   # Force self to re-initialize for clean up (remove white spaces,
!   # case unification).
    def seq
      self.class.new(self)
    end
  
!   # Similar to the 'seq' method, but changes the self object destructively.
    def normalize!
      initialize(self)
--- 55,79 ----
    alias to_str to_s
  
!   # Create a new sequence based on the current sequence.
!   # The original sequence is unchanged.
!   # 
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   s2 = s.seq
!   #   puts s2                                 #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def seq
      self.class.new(self)
    end
  
!   # Normalize the current sequence, removing all whitespace and 
!   # transforming all positions to uppercase if the sequence is AA or
!   # transforming all positions to lowercase if the sequence is NA.
!   # The original sequence is modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   s.normalize!
!   # ---
!   # *Returns*:: current Bio::Sequence::NA/AA object (modified)
    def normalize!
      initialize(self)
***************
*** 37,40 ****
--- 82,95 ----
    alias seq! normalize!
  
+   # Add new data to the end of the current sequence.
+   # The original sequence is modified.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   s << 'atgc'
+   #   puts s                                  #=> "atgcatgc"
+   #   s << s
+   #   puts s                                  #=> "atgcatgcatgcatgc"
+   # ---
+   # *Returns*:: current Bio::Sequence::NA/AA object (modified)
    def <<(*arg)
      super(self.class.new(*arg))
***************
*** 42,50 ****
    alias concat <<
  
    def +(*arg)
      self.class.new(super(*arg))
    end
  
!   # Returns the subsequence of the self string.
    def subseq(s = 1, e = self.length)
      raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
--- 97,141 ----
    alias concat <<
  
+   # Create a new sequence by adding to an existing sequence.
+   # The existing sequence is not modified.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   s2 = s + 'atgc'
+   #   puts s2                                 #=> "atgcatgc"
+   #   puts s                                  #=> "atgc"
+   #
+   # The new sequence is of the same class as the existing sequence if 
+   # the new data was added to an existing sequence,
+   #
+   #   puts s2.class == s.class                #=> true
+   #
+   # but if an existing sequence is added to a String, the result is a String
+   #
+   #   s3 = 'atgc' + s
+   #   puts s3.class                           #=> String
+   # ---
+   # *Returns*:: new Bio::Sequence::NA/AA *or* String object
    def +(*arg)
      self.class.new(super(*arg))
    end
  
!   # Returns a new sequence containing the subsequence identified by the 
!   # start and end numbers given as parameters.  *Important:* Biological 
!   # sequence numbering conventions (one-based) rather than ruby's 
!   # (zero-based) numbering conventions are used.  
!   #
!   #   s = Bio::Sequence::NA.new('atggaatga')
!   #   puts s.subseq(1,3)                      #=> "atg"
!   #
!   # Start defaults to 1 and end defaults to the entire existing string, so
!   # subseq called without any parameters simply returns a new sequence 
!   # identical to the existing sequence.
!   #
!   #   puts s.subseq                           #=> "atggaatga"
!   # ---
!   # *Arguments*:
!   # * (optional) _s_(start): Integer (default 1)
!   # * (optional) _e_(end): Integer (default current sequence length)
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def subseq(s = 1, e = self.length)
      raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
***************
*** 54,80 ****
    end
  
!   # This method iterates on sub string with specified length 'window_size'.
!   # By specifing 'step_size', codon sized shifting or spliting genome
!   # sequence with ovelapping each end can easily be yielded.
    #
!   # The remainder sequence at the terminal end will be returned.
    #
!   # Example:
!   #   # prints average GC% on each 100bp
!   #   seq.window_search(100) do |subseq|
    #     puts subseq.gc
    #   end
!   #   # prints every translated peptide (length 5aa) in the same frame
!   #   seq.window_search(15, 3) do |subseq|
    #     puts subseq.translate
    #   end
!   #   # split genome sequence by 10000bp with 1000bp overlap in fasta format
    #   i = 1
!   #   remainder = seq.window_search(10000, 9000) do |subseq|
    #     puts subseq.to_fasta("segment #{i}", 60)
    #     i += 1
    #   end
    #   puts remainder.to_fasta("segment #{i}", 60)
!   #
    def window_search(window_size, step_size = 1)
      i = 0
--- 145,177 ----
    end
  
!   # This method steps through a sequences in steps of 'step_size' by 
!   # subsequences of 'window_size'. Typically used with a block.
!   # Any remaining sequence at the terminal end will be returned.
    #
!   # Prints average GC% on each 100bp
    #
!   #   s.window_search(100) do |subseq|
    #     puts subseq.gc
    #   end
!   #   
!   # Prints every translated peptide (length 5aa) in the same frame
!   #
!   #   s.window_search(15, 3) do |subseq|
    #     puts subseq.translate
    #   end
!   #
!   # Split genome sequence by 10000bp with 1000bp overlap in fasta format
!   #
    #   i = 1
!   #   remainder = s.window_search(10000, 9000) do |subseq|
    #     puts subseq.to_fasta("segment #{i}", 60)
    #     i += 1
    #   end
    #   puts remainder.to_fasta("segment #{i}", 60)
!   # ---
!   # *Arguments*:
!   # * (required) _window_size_: Fixnum
!   # * (optional) _step_size_: Fixnum (default 1)
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def window_search(window_size, step_size = 1)
      i = 0
***************
*** 85,91 ****
    end
  
!   # This method receive a hash of residues/bases to the particular values,
!   # and sum up the value along with the self sequence.  Especially useful
!   # to use with the window_search method and amino acid indices etc.
    def total(hash)
      hash.default = 0.0 unless hash.default
--- 182,195 ----
    end
  
!   # Returns a float total value for the sequence given a hash of
!   # base or residue values,
!   #
!   #   values = {'a' => 0.1, 't' => 0.2, 'g' => 0.3, 'c' => 0.4}
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.total(values)                    #=> 1.0
!   # ---
!   # *Arguments*:
!   # * (required) _hash_: Hash object
!   # *Returns*:: Float object
    def total(hash)
      hash.default = 0.0 unless hash.default
***************
*** 100,103 ****
--- 204,212 ----
  
    # Returns a hash of the occurrence counts for each residue or base.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   puts s.composition              #=> {"a"=>1, "c"=>1, "g"=>1, "t"=>1}
+   # ---
+   # *Returns*:: Hash object
    def composition
      count = Hash.new(0)
***************
*** 108,118 ****
    end
  
!   # Returns a randomized sequence keeping its composition by default.
!   # The argument is required when generating a random sequence from the empty
!   # sequence (used by the class methods NA.randomize, AA.randomize).
!   # If the block is given, yields for each random residue/base.
    def randomize(hash = nil)
      length = self.length
      if hash
        count = hash.clone
        count.each_value {|x| length += x}
--- 217,244 ----
    end
  
!   # Returns a randomized sequence. The default is to retain the same 
!   # base/residue composition as the original.  If a hash of base/residue 
!   # counts is given, the new sequence will be based on that hash 
!   # composition.  If a block is given, each new randomly selected 
!   # position will be passed into the block.  In all cases, the
!   # original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.randomize                        #=> "tcag"  (for example)
!   #
!   #   new_composition = {'a' => 2, 't' => 2}
!   #   puts s.randomize(new_composition)       #=> "ttaa"  (for example)
!   #
!   #   count = 0
!   #   s.randomize { |x| count += 1 }
!   #   puts count                              #=> 4
!   # ---
!   # *Arguments*:
!   # * (optional) _hash_: Hash object
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def randomize(hash = nil)
      length = self.length
      if hash
+       length = 0
        count = hash.clone
        count.each_value {|x| length += x}
***************
*** 139,151 ****
    end
  
!   # Generate a new random sequence with the given frequency of bases
!   # or residues.  The sequence length is determined by the sum of each
!   # base/residue occurences.
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   # Receive a GenBank style position string and convert it to the Locations
!   # objects to splice the sequence itself.  See also: bio/location.rb
    def splice(position)
      unless position.is_a?(Locations) then
--- 265,305 ----
    end
  
!   # Generate a new random sequence with the given frequency of bases.
!   # The sequence length is determined by their cumulative sum.
!   # (See also Bio::Sequence::Common#randomize which creates a new
!   # randomized sequence object using the base composition of an existing 
!   # sequence instance).
!   #
!   #   counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
!   #   puts Bio::Sequence::AA.randomize(counts)  #=> "AAEAELALRE" (for example)
!   #
!   # You may also feed the output of randomize into a block
!   #
!   #   actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
!   #   Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
!   #   actual_counts                     #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
!   # ---
!   # *Arguments*:
!   # * (optional) _hash_: Hash object
!   # *Returns*:: Bio::Sequence::NA/AA object
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   # Return a new sequence extracted from the original using a GenBank style 
!   # position string.  See also documentation for the Bio::Location class.
!   #
!   #   s = Bio::Sequence::NA.new('atgcatgcatgcatgc')
!   #   puts s.splice('1..3')                           #=> "atg"
!   #   puts s.splice('join(1..3,8..10)')               #=> "atgcat"
!   #   puts s.splice('complement(1..3)')               #=> "cat"
!   #   puts s.splice('complement(join(1..3,8..10))')   #=> "atgcat"
!   #
!   # Note that 'complement'ed Genbank position strings will have no 
!   # effect on Bio::Sequence::AA objects.
!   # ---
!   # *Arguments*:
!   # * (required) _position_: String *or* Bio::Location object
!   # *Returns*:: Bio::Sequence::NA/AA object
    def splice(position)
      unless position.is_a?(Locations) then

Index: format.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/format.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** format.rb	6 Feb 2006 14:20:35 -0000	1.2
--- format.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 4,8 ****
  # Copyright::   Copyright (C) 2006
  #               Toshiaki Katayama <k at bioruby.org>,
! #               Naohisa Goto <ng at bioruby.org>
  # License::     Ruby's
  #
--- 4,9 ----
  # Copyright::   Copyright (C) 2006
  #               Toshiaki Katayama <k at bioruby.org>,
! #               Naohisa Goto <ng at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 21,29 ****
  class Sequence
  
  module Format
  
!   # Output the FASTA format string of the sequence.  The 1st argument is
!   # used in the comment line.  If the 2nd argument (integer) is given,
!   # the output sequence will be folded.
    def format_fasta(header = nil, width = nil)
      header ||= "#{@entry_id} #{@definition}"
--- 22,56 ----
  class Sequence
  
+ # = DESCRIPTION
+ # A Mixin[http://www.rubycentral.com/book/tut_modules.html]
+ # of methods used by Bio::Sequence#output to output sequences in 
+ # common bioinformatic formats.  These are not called in isolation.
+ #
+ # = USAGE
+ #   # Given a Bio::Sequence object,
+ #   puts s.output(:fasta)
+ #   puts s.output(:genbank)
+ #   puts s.output(:embl)
  module Format
  
!   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
!   # case, it would be difficult to successfully call this method outside
!   # its expected context).
!   #
!   # Output the FASTA format string of the sequence.  
!   #
!   # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of 
!   # using either the header or width arguments.  So something needs to be
!   # changed...
!   #
!   # Currently, this method is used in Bio::Sequence#output like so,
!   #
!   #   s = Bio::Sequence.new('atgc')
!   #   puts s.output(:fasta)                   #=> "> \natgc\n"
!   # ---
!   # *Arguments*:
!   # * (optional) _header_: String (default nil)
!   # * (optional) _width_: Fixnum (default nil)
!   # *Returns*:: String object
    def format_fasta(header = nil, width = nil)
      header ||= "#{@entry_id} #{@definition}"
***************
*** 37,44 ****
    end
  
!   def format_gff
      raise NotImplementedError
    end
  
    def format_genbank
      prefix = ' ' * 5
--- 64,83 ----
    end
  
!   # Not yet implemented :)
!   # Remove the nodoc command after implementation!
!   # ---
!   # *Returns*:: String object
!   def format_gff #:nodoc:
      raise NotImplementedError
    end
  
+   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+   # case, it would be difficult to successfully call this method outside
+   # its expected context).
+   #
+   # Output the Genbank format string of the sequence.  
+   # Used in Bio::Sequence#output.
+   # ---
+   # *Returns*:: String object
    def format_genbank
      prefix = ' ' * 5
***************
*** 49,52 ****
--- 88,99 ----
    end
  
+   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+   # case, it would be difficult to successfully call this method outside
+   # its expected context).
+   #
+   # Output the EMBL format string of the sequence.  
+   # Used in Bio::Sequence#output.
+   # ---
+   # *Returns*:: String object
    def format_embl
      prefix = 'FT   '

Index: aa.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/aa.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** aa.rb	6 Feb 2006 14:11:31 -0000	1.2
--- aa.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 17,27 ****
  class Sequence
  
! 
! # Amino Acid sequence
  class AA < String
  
    include Bio::Sequence::Common
  
!   # Generate a amino acid sequence object from a string.
    def initialize(str)
      super
--- 18,61 ----
  class Sequence
  
! # = DESCRIPTION
! # Bio::Sequence::AA represents a bare Amino Acid sequence in bioruby.
! #
! # = USAGE
! #   # Create an Amino Acid sequence.
! #   aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
! #
! #   # What are the three-letter codes for all the residues?
! #   puts aa.codes
! #
! #   # What are the names of all the residues?
! #   puts aa.names
! #
! #   # What is the molecular weight of this peptide?
! #   puts aa.molecular_weight
  class AA < String
  
    include Bio::Sequence::Common
  
!   # Generate an amino acid sequence object from a string.
!   #
!   #   s = Bio::Sequence::AA.new("RRLEHTFVFLRNFSLMLLRY")
!   #
!   # or maybe (if you have an amino acid sequence in a file)
!   #
!   #   s = Bio::Sequence:AA.new(File.open('aa.txt').read)
!   #
!   # Amino Acid sequences are *always* all uppercase in bioruby
!   #
!   #   s = Bio::Sequence::AA.new("rrLeHtfV")
!   #   puts s                                  #=> "RRLEHTFVF"
!   #
!   # Whitespace is stripped from the sequence
!   #
!   #   s = Bio::Sequence::AA.new("RRL\nELA\tRG\r  RL")
!   #   puts s                                  #=> "RRLELARGRL"
!   # ---
!   # *Arguments*:
!   # * (required) _str_: String
!   # *Returns*:: Bio::Sequence::AA object
    def initialize(str)
      super
***************
*** 31,45 ****
  
  
!   # Estimate the weight of this protein.
    def molecular_weight
      Bio::AminoAcid.weight(self)
    end
  
    def to_re
      Bio::AminoAcid.to_re(self)
    end
  
!   # Generate the list of the names of the each residue along with the
!   # sequence (3 letters code).
    def codes
      array = []
--- 65,98 ----
  
  
!   # Estimate molecular weight based on 
!   # Fasman1976[http://www.genome.ad.jp/dbget-bin/www_bget?aaindex+FASG760101]
!   #
!   #   s = Bio::Sequence::AA.new("RRLE")
!   #   puts s.molecular_weight             #=> 572.655
!   # ---
!   # *Returns*:: Float object
    def molecular_weight
      Bio::AminoAcid.weight(self)
    end
  
+   # Create a ruby regular expression instance 
+   # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]  
+   #
+   #   s = Bio::Sequence::AA.new("RRLE")
+   #   puts s.to_re                        #=> /RRLE/
+   # ---
+   # *Returns*:: Regexp object
    def to_re
      Bio::AminoAcid.to_re(self)
    end
  
!   # Generate the list of the names of each residue along with the
!   # sequence (3 letters code).  Codes used in bioruby are found in the
!   # Bio::AminoAcid::NAMES hash.
!   #
!   #   s = Bio::Sequence::AA.new("RRLE")
!   #   puts s.codes                        #=> ["Arg", "Arg", "Leu", "Glu"]
!   # ---
!   # *Returns*:: Array object
    def codes
      array = []
***************
*** 50,54 ****
    end
  
!   # Similar to codes but returns long names.
    def names
      self.codes.map do |x|
--- 103,115 ----
    end
  
!   # Generate the list of the names of each residue along with the
!   # sequence (full name).  Names used in bioruby are found in the
!   # Bio::AminoAcid::NAMES hash.
!   #
!   #   s = Bio::Sequence::AA.new("RRLE")
!   #   puts s.names  
!   #               #=> ["arginine", "arginine", "leucine", "glutamic acid"]
!   # ---
!   # *Returns*:: Array object
    def names
      self.codes.map do |x|

Index: generic.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/generic.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** generic.rb	6 Feb 2006 14:26:04 -0000	1.3
--- generic.rb	26 Mar 2006 02:27:59 -0000	1.4
***************
*** 14,18 ****
  class Sequence
  
! class Generic < String
  
    include Bio::Sequence::Common
--- 14,18 ----
  class Sequence
  
! class Generic < String #:nodoc:
  
    include Bio::Sequence::Common

Index: na.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/na.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** na.rb	6 Feb 2006 14:13:52 -0000	1.2
--- na.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 19,28 ****
  
  
! # Nucleic Acid sequence
  class NA < String
  
    include Bio::Sequence::Common
  
!   # Generate a nucleic acid sequence object from a string.
    def initialize(str)
      super
--- 20,78 ----
  
  
! # = DESCRIPTION
! # Bio::Sequence::NA represents a bare Nucleic Acid sequence in bioruby.
! #
! # = USAGE
! #   # Create a Nucleic Acid sequence.
! #   dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
! #   rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
! #
! #   # What are the names of all the bases?
! #   puts dna.names
! #   puts rna.names
! #
! #   # What is the GC percentage?
! #   puts dna.gc_percent
! #   puts rna.gc_percent
! #
! #   # What is the molecular weight?
! #   puts dna.molecular_weight
! #   puts rna.molecular_weight
! #
! #   # What is the reverse complement?
! #   puts dna.reverse_complement
! #   puts dna.complement
! #
! #   # Is this sequence DNA or RNA?
! #   puts dna.rna?
! #
! #   # Translate my sequence (see method docs for many options)
! #   puts dna.translate
! #   puts rna.translate
  class NA < String
  
    include Bio::Sequence::Common
  
!   # Generate an nucleic acid sequence object from a string.
!   #
!   #   s = Bio::Sequence::NA.new("aagcttggaccgttgaagt")
!   #
!   # or maybe (if you have an nucleic acid sequence in a file)
!   #
!   #   s = Bio::Sequence:NA.new(File.open('dna.txt').read)
!   #
!   # Nucleic Acid sequences are *always* all lowercase in bioruby
!   #
!   #   s = Bio::Sequence::NA.new("AAGcTtGG")
!   #   puts s                                  #=> "aagcttgg"
!   #
!   # Whitespace is stripped from the sequence
!   #
!   #   seq = Bio::Sequence::NA.new("atg\nggg\ttt\r  gc")
!   #   puts s                                  #=> "atggggttgc"
!   # ---
!   # *Arguments*:
!   # * (required) _str_: String
!   # *Returns*:: Bio::Sequence::NA object
    def initialize(str)
      super
***************
*** 31,36 ****
    end
  
!   # This method depends on Locations class, see bio/location.rb
!   def splicing(position)
      mRNA = super
      if mRNA.rna?
--- 81,86 ----
    end
  
!   # Alias of Bio::Sequence::Common splice method, documented there.
!   def splicing(position) #:nodoc:
      mRNA = super
      if mRNA.rna?
***************
*** 42,46 ****
    end
  
!   # Returns complement sequence without reversing ("atgc" -> "tacg")
    def forward_complement
      s = self.class.new(self)
--- 92,103 ----
    end
  
!   # Returns a new complementary sequence object (without reversing).
!   # The original sequence object is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.forward_complement               #=> 'tacg'
!   #   puts s                                  #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def forward_complement
      s = self.class.new(self)
***************
*** 49,53 ****
    end
  
!   # Convert to complement sequence without reversing ("atgc" -> "tacg")
    def forward_complement!
      if self.rna?
--- 106,117 ----
    end
  
!   # Converts the current sequence into its complement (without reversing).
!   # The original sequence object is modified.
!   #
!   #   seq = Bio::Sequence::NA.new('atgc')
!   #   puts s.forward_complement!              #=> 'tacg'
!   #   puts s                                  #=> 'tacg'
!   # ---
!   # *Returns*:: current Bio::Sequence::NA object (modified)
    def forward_complement!
      if self.rna?
***************
*** 59,63 ****
    end
  
!   # Returns reverse complement sequence ("atgc" -> "gcat")
    def reverse_complement
      s = self.class.new(self)
--- 123,134 ----
    end
  
!   # Returns a new sequence object with the reverse complement 
!   # sequence to the original.  The original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.reverse_complement               #=> 'gcat'
!   #   puts s                                  #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def reverse_complement
      s = self.class.new(self)
***************
*** 66,70 ****
    end
  
!   # Convert to reverse complement sequence ("atgc" -> "gcat")
    def reverse_complement!
      self.reverse!
--- 137,148 ----
    end
  
!   # Converts the original sequence into its reverse complement.  
!   # The original sequence is modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.reverse_complement               #=> 'gcat'
!   #   puts s                                  #=> 'gcat'
!   # ---
!   # *Returns*:: current Bio::Sequence::NA object (modified)
    def reverse_complement!
      self.reverse!
***************
*** 72,87 ****
    end
  
!   # Aliases for short
    alias complement reverse_complement
    alias complement! reverse_complement!
  
  
!   # Translate into the amino acid sequence from the given frame and the
!   # selected codon table.  The table also can be a Bio::CodonTable object.
!   # The 'unknown' character is used for invalid/unknown codon (can be
!   # used for 'nnn' and/or gap translation in practice).
    #
!   # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
!   # (4, 5 or 6 is also accepted) for the reverse strand.
    def translate(frame = 1, table = 1, unknown = 'X')
      if table.is_a?(Bio::CodonTable)
--- 150,235 ----
    end
  
!   # Alias for Bio::Sequence::NA#reverse_complement
    alias complement reverse_complement
+   
+   # Alias for Bio::Sequence::NA#reverse_complement!
    alias complement! reverse_complement!
  
  
!   # Translate into an amino acid sequence.
!   #   
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.translate                        #=> "MA*"
    #
!   # By default, translate starts in reading frame position 1, but you
!   # can start in either 2 or 3 as well,
!   #
!   #   puts s.translate(2)                     #=> "WR"
!   #   puts s.translate(3)                     #=> "GV"
!   #
!   # You may also translate the reverse complement in one step by using frame
!   # values of -1, -2, and -3 (or 4, 5, and 6)
!   #
!   #   puts s.translate(-1)                    #=> "SRH"
!   #   puts s.translate(4)                     #=> "SRH"
!   #   puts s.reverse_complement.translate(1)  #=> "SRH"
!   #
!   # The default codon table in the translate function is the Standard
!   # Eukaryotic codon table.  The translate function takes either a 
!   # number or a Bio::CodonTable object for its table argument. 
!   # The available tables are 
!   # (NCBI[http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t]):
!   #
!   #   1. "Standard (Eukaryote)"
!   #   2. "Vertebrate Mitochondrial"
!   #   3. "Yeast Mitochondorial"
!   #   4. "Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma"
!   #   5. "Invertebrate Mitochondrial"
!   #   6. "Ciliate Macronuclear and Dasycladacean"
!   #   9. "Echinoderm Mitochondrial"
!   #   10. "Euplotid Nuclear"
!   #   11. "Bacteria"
!   #   12. "Alternative Yeast Nuclear"
!   #   13. "Ascidian Mitochondrial"
!   #   14. "Flatworm Mitochondrial"
!   #   15. "Blepharisma Macronuclear"
!   #   16. "Chlorophycean Mitochondrial"
!   #   21. "Trematode Mitochondrial"
!   #   22. "Scenedesmus obliquus mitochondrial"
!   #   23. "Thraustochytrium Mitochondrial"
!   #
!   # If you are using anything other than the default table, you must specify 
!   # frame in the translate method call,
!   #
!   #   puts s.translate                #=> "MA*"  (using defaults)
!   #   puts s.translate(1,1)           #=> "MA*"  (same as above, but explicit)
!   #   puts s.translate(1,2)           #=> "MAW"  (different codon table)
!   #
!   # and using a Bio::CodonTable instance in the translate method call,
!   #
!   #   mt_table = Bio::CodonTable[2]
!   #   puts s.translate(1, mt_table)           #=> "MAW"
!   #
!   # By default, any invalid or unknown codons (as could happen if the 
!   # sequence contains ambiguities) will be represented by 'X' in the 
!   # translated sequence. 
!   # You may change this to any character of your choice.
!   #
!   #   s = Bio::Sequence::NA.new('atgcNNtga')
!   #   puts s.translate                        #=> "MX*"
!   #   puts s.translate(1,1,'9')               #=> "M9*"
!   #
!   # The translate method considers gaps to be unknown characters and treats 
!   # them as such (i.e. does not collapse sequences prior to translation), so
!   #
!   #   s = Bio::Sequence::NA.new('atgc--tga')
!   #   puts s.translate                        #=> "MX*"
!   # ---
!   # *Arguments*:
!   # * (optional) _frame_:  one of 1,2,3,4,5,6,-1,-2,-3 (default 1)
!   # * (optional) _table_: Fixnum in range 1,23 or Bio::CodonTable object
!   #   (default 1)
!   # * (optional) _unknown_: Character (default 'X')
!   # *Returns*:: Bio::Sequence::AA object
    def translate(frame = 1, table = 1, unknown = 'X')
      if table.is_a?(Bio::CodonTable)
***************
*** 109,113 ****
    end
  
!   # Returns counts of the each codon in the sequence by Hash.
    def codon_usage
      hash = Hash.new(0)
--- 257,276 ----
    end
  
!   # Returns counts of each codon in the sequence in a hash.
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.codon_usage                #=> {"gcg"=>1, "tga"=>1, "atg"=>1}
!   #
!   # This method does not validate codons!  Any three letter group is a 'codon'. So,
!   #
!   #   s = Bio::Sequence::NA.new('atggNNtga')
!   #   puts s.codon_usage                #=> {"tga"=>1, "gnn"=>1, "atg"=>1}
!   #
!   #   seq = Bio::Sequence::NA.new('atgg--tga')
!   #   puts s.codon_usage                #=> {"tga"=>1, "g--"=>1, "atg"=>1}
!   #
!   # Also, there is no option to work in any frame other than the first.
!   # ---
!   # *Returns*:: Hash object
    def codon_usage
      hash = Hash.new(0)
***************
*** 118,122 ****
    end
  
!   # Calculate the ratio of GC / ATGC bases in percent.
    def gc_percent
      count = self.composition
--- 281,291 ----
    end
  
!   # Calculate the ratio of GC / ATGC bases as a percentage rounded to 
!   # the nearest whole number.
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.gc_percent                       #=> 55
!   # ---
!   # *Returns*:: Fixnum
    def gc_percent
      count = self.composition
***************
*** 127,136 ****
    end
  
!   # Show abnormal bases other than 'atgcu'.
    def illegal_bases
      self.scan(/[^atgcu]/).sort.uniq
    end
  
!   # Estimate the weight of this biological string molecule.
    def molecular_weight
      if self.rna?
--- 296,322 ----
    end
  
!   # Returns an alphabetically sorted array of any non-standard bases 
!   # (other than 'atgcu').
!   #
!   #   s = Bio::Sequence::NA.new('atgStgQccR')
!   #   puts s.illegal_bases                    #=> ["q", "r", "s"]
!   # ---
!   # *Returns*:: Array object
    def illegal_bases
      self.scan(/[^atgcu]/).sort.uniq
    end
  
!   # Estimate molecular weight (using the values from BioPerl's 
!   # SeqStats.pm[http://doc.bioperl.org/releases/bioperl-1.0.1/Bio/Tools/SeqStats.html] module).
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.molecular_weight                 #=> 2841.00708
!   #
!   # RNA and DNA do not have the same molecular weights,
!   #
!   #   s = Bio::Sequence::NA.new('auggcguga')
!   #   puts s.molecular_weight                 #=> 2956.94708
!   # ---
!   # *Returns*:: Float object
    def molecular_weight
      if self.rna?
***************
*** 141,145 ****
    end
  
!   # Convert the universal code string into the regular expression.
    def to_re
      if self.rna?
--- 327,337 ----
    end
  
!   # Create a ruby regular expression instance 
!   # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]  
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.to_re                            #=> /atggcgtga/
!   # ---
!   # *Returns*:: Regexp object
    def to_re
      if self.rna?
***************
*** 150,154 ****
    end
  
!   # Convert the self string into the list of the names of the each base.
    def names
      array = []
--- 342,353 ----
    end
  
!   # Generate the list of the names of each nucleotide along with the
!   # sequence (full name).  Names used in bioruby are found in the
!   # Bio::AminoAcid::NAMES hash.
!   #
!   #   s = Bio::Sequence::NA.new('atg')
!   #   puts s.names                    #=> ["Adenine", "Thymine", "Guanine"]
!   # ---
!   # *Returns*:: Array object
    def names
      array = []
***************
*** 159,176 ****
    end
  
!   # Output a DNA string by substituting 'u' to 't'.
    def dna
      self.tr('u', 't')
    end
  
    def dna!
      self.tr!('u', 't')
    end
  
!   # Output a RNA string by substituting 't' to 'u'.
    def rna
      self.tr('t', 'u')
    end
  
    def rna!
      self.tr!('t', 'u')
--- 358,405 ----
    end
  
!   # Returns a new sequence object with any 'u' bases changed to 't'.
!   # The original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('augc')
!   #   puts s.dna                              #=> 'atgc'
!   #   puts s                                  #=> 'augc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def dna
      self.tr('u', 't')
    end
  
+   # Changes any 'u' bases in the original sequence to 't'.
+   # The original sequence is modified.
+   #
+   #   s = Bio::Sequence::NA.new('augc')
+   #   puts s.dna!                             #=> 'atgc'
+   #   puts s                                  #=> 'atgc'
+   # ---
+   # *Returns*:: current Bio::Sequence::NA object (modified)
    def dna!
      self.tr!('u', 't')
    end
  
!   # Returns a new sequence object with any 't' bases changed to 'u'.
!   # The original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.dna                              #=> 'augc'  
!   #   puts s                                  #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def rna
      self.tr('t', 'u')
    end
  
+   # Changes any 't' bases in the original sequence to 'u'.
+   # The original sequence is modified.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   puts s.dna!                             #=> 'augc'
+   #   puts s                                  #=> 'augc'
+   # ---
+   # *Returns*:: current Bio::Sequence::NA object (modified)
    def rna!
      self.tr!('t', 'u')


From aerts at dev.open-bio.org  Mon Mar 27 13:34:37 2006
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Mon, 27 Mar 2006 18:34:37 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fetch.rb,1.6,1.7
Message-ID: <200603271834.k2RIYb5l020081@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv20061/io

Modified Files:
	fetch.rb 
Log Message:
Updated docs.


Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** fetch.rb	20 Mar 2006 12:40:13 -0000	1.6
--- fetch.rb	27 Mar 2006 18:34:35 -0000	1.7
***************
*** 31,36 ****
    # = DESCRIPTION
    # The Bio::Fetch class provides an interface to dbfetch servers. Given
!   # a database name and an accession number, these servers return the nucleic
!   # or amino acid sequence for that accession number in that database.
    #
    # Possible dbfetch servers include:
--- 31,37 ----
    # = DESCRIPTION
    # The Bio::Fetch class provides an interface to dbfetch servers. Given
!   # a database name and an accession number, these servers return the associated
!   # record. For example, for the embl database on the EBI, that would be a
!   # nucleic or amino acid sequence.
    #
    # Possible dbfetch servers include:
***************
*** 82,86 ****
    
      # Get raw database entry by id. This method lets the Bio::Registry class
!     # use Bio::Fetch objects and should probably not be used directly.
      def get_by_id(id)
        fetch(@database, id)
--- 83,87 ----
    
      # Get raw database entry by id. This method lets the Bio::Registry class
!     # use Bio::Fetch objects.
      def get_by_id(id)
        fetch(@database, id)


From aerts at dev.open-bio.org  Tue Mar 28 08:42:34 2006
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Tue, 28 Mar 2006 13:42:34 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/db gff.rb,1.5,1.6
Message-ID: <200603281342.k2SDgYvl024727@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/db
In directory dev.open-bio.org:/tmp/cvs-serv24707

Modified Files:
	gff.rb 
Log Message:
Added documentation.


Index: gff.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/gff.rb,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** gff.rb	18 Dec 2005 15:58:41 -0000	1.5
--- gff.rb	28 Mar 2006 13:42:32 -0000	1.6
***************
*** 4,21 ****
  # Copyright::  Copyright (C) 2003, 2005
  #              Toshiaki Katayama <k at bioruby.org>
  # License::    LGPL
  #
  # $Id$
  #
- # == Description
- #
- #
- # == Example
- #
- #
- # == References
- #
- # * http://www.sanger.ac.uk/Software/formats/GFF/
- #
  #--
  #
--- 4,12 ----
  # Copyright::  Copyright (C) 2003, 2005
  #              Toshiaki Katayama <k at bioruby.org>
+ #              2006  Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # License::    LGPL
  #
  # $Id$
  #
  #--
  #
***************
*** 38,46 ****
  
  module Bio
! 
  class GFF
! 
!   attr_accessor :records
! 
    def initialize(str = '')
      @records = Array.new
--- 29,78 ----
  
  module Bio
! # == DESCRIPTION
! # The Bio::GFF and Bio::GFF::Record classes describe data contained in a 
! # GFF-formatted file. For information on the GFF format, see 
! # http://www.sanger.ac.uk/Software/formats/GFF/. Data are represented in tab- 
! # delimited format, including
! # * seqname
! # * source
! # * feature
! # * start
! # * end
! # * score
! # * strand
! # * frame
! # * attributes (optional)
! # 
! # For example:
! #  SEQ1     EMBL        atg       103   105     .       +       0
! #  SEQ1     EMBL        exon      103   172     .       +       0
! #  SEQ1     EMBL        splice5   172   173     .       +       .
! #  SEQ1     netgene     splice5   172   173     0.94    +       .
! #  SEQ1     genie       sp5-20    163   182     2.3     +       .
! #  SEQ1     genie       sp5-10    168   177     2.1     +       .
! #  SEQ1     grail       ATG       17    19      2.1     -       0
! #
! # The Bio::GFF object is a container for Bio::GFF::Record objects, each 
! # representing a single line in the GFF file.
  class GFF
!   # Creates a Bio::GFF object by building a collection of Bio::GFF::Record
!   # objects.
!   # 
!   # Create a Bio::GFF object the hard way
!   #  this_gff =  "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
!   #  this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
!   #  this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
!   #  this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
!   #  this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
!   #  this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
!   #  this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
!   #  p Bio::GFF.new(this_gff)
!   #  
!   # or create one based on a GFF-formatted file:
!   #  p Bio::GFF.new(File.open('my_data.gff')
!   # ---
!   # *Arguments*:
!   # * _str_: string in GFF format
!   # *Returns*:: Bio::GFF object
    def initialize(str = '')
      @records = Array.new
***************
*** 50,66 ****
--- 82,127 ----
    end
  
+   # An array of Bio::GFF::Record objects.
+   attr_accessor :records
+ 
+   # Represents a single line of a GFF-formatted file. See Bio::GFF for more
+   # information.
    class Record
  
+     # Name of the reference sequence
      attr_accessor :seqname
+     
+     # Name of the source of the feature (e.g. program that did prediction)
      attr_accessor :source
+     
+     # Name of the feature
      attr_accessor :feature
+     
+     # Start position of feature on reference sequence
      attr_accessor :start
+     
+     # End position of feature on reference sequence
      attr_accessor :end
+     
+     # Score of annotation (e.g. e-value for BLAST search)
      attr_accessor :score
+     
+     # Strand that feature is located on
      attr_accessor :strand
+     
+     # For features of type 'exon': indicates where feature begins in the reading frame
      attr_accessor :frame
+     
+     # List of tag=value pairs (e.g. to store name of the feature: ID=my_id)
      attr_accessor :attributes
+     
+     # Comments for the GFF record
      attr_accessor :comments
  
+     # Creates a Bio::GFF::Record object. Is typically not called directly, but
+     # is called automatically when creating a Bio::GFF object.
+     # ---
+     # *Arguments*:
+     # * _str_: a tab-delimited line in GFF format
      def initialize(str)
        @comments = str.chomp[/#.*/]
***************
*** 83,90 ****
--- 144,158 ----
    end
  
+   # = DESCRIPTION
+   # Represents version 2 of GFF specification. Is completely implemented by the
+   # Bio::GFF class.
    class GFF2 < GFF
      VERSION = 2
    end
  
+   # = DESCRIPTION
+   # Represents version 3 of GFF specification. Is completely implemented by the
+   # Bio::GFF class. For more information on version GFF3, see
+   # http://flybase.bio.indiana.edu/annot/gff3.html
    class GFF3 < GFF
      VERSION = 3
***************
*** 103,106 ****
    end
  
!   p Bio::GFF.new(ARGF.read)
  end
--- 171,181 ----
    end
  
!   this_gff =  "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
!   this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
!   this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
!   this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
!   this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
!   this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
!   this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
!   p Bio::GFF.new(this_gff)
  end


From trevor at pub.open-bio.org  Wed Mar  1 01:40:03 2006
From: trevor at pub.open-bio.org (Trevor Wennblom)
Date: Wed, 01 Mar 2006 01:40:03 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme analysis.rb,
	1.4, 1.5
Message-ID: <200603010140.k211e3VL013061@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme
In directory pub.open-bio.org:/tmp/cvs-serv13046

Modified Files:
	analysis.rb 
Log Message:
Huge optimization by getting ride of unnecessary permutations.


Index: analysis.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme/analysis.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** analysis.rb	28 Feb 2006 22:21:48 -0000	1.4
--- analysis.rb	1 Mar 2006 01:40:00 -0000	1.5
***************
*** 71,77 ****
  
    def cut_without_permutations( sequence, *args )
!     return nil if !sequence.kind_of?(String) or sequence.empty?
      sequence = Bio::Sequence::NA.new( sequence )
!     enzyme_actions = create_enzyme_actions( sequence, *args )
      sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
      enzyme_actions.each do |id, enzyme_action|
--- 71,81 ----
  
    def cut_without_permutations( sequence, *args )
!     return {} if !sequence.kind_of?(String) or sequence.empty?
      sequence = Bio::Sequence::NA.new( sequence )
! 
!     #enzyme_actions = create_enzyme_actions( sequence, *args )
!     tmp = create_enzyme_actions( sequence, *args )
!     enzyme_actions = tmp[0].merge(tmp[1])
! 
      sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
      enzyme_actions.each do |id, enzyme_action|
***************
*** 90,105 ****
  
    def cut_and_return_by_permutations( sequence, *args )
!     return nil if !sequence.kind_of?(String) or sequence.empty?
      sequence = Bio::Sequence::NA.new( sequence )
!     enzyme_actions = create_enzyme_actions( sequence, *args )
!     return nil if enzyme_actions.empty?
!     permutations = permute(enzyme_actions.size)
  
      # Indexed by permutation.
      hash_of_sequence_ranges_with_cuts = {}
  
      permutations.each do |permutation|
        previous_cut_ranges = []
        sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
  
        permutation.each do |id|
--- 94,121 ----
  
    def cut_and_return_by_permutations( sequence, *args )
!     return {} if !sequence.kind_of?(String) or sequence.empty?
      sequence = Bio::Sequence::NA.new( sequence )
!     enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
!     return {} if enzyme_actions.empty? and initial_cuts.empty?
! 
!     if enzyme_actions.size > 1
!       permutations = permute(enzyme_actions.size)
!     else
!       permutations = []
!     end
  
      # Indexed by permutation.
      hash_of_sequence_ranges_with_cuts = {}
  
+     if permutations.empty?
+       sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
+       initial_cuts.each { |key, enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) } }
+       hash_of_sequence_ranges_with_cuts[0] = sr_with_cuts
+     end
+ 
      permutations.each do |permutation|
        previous_cut_ranges = []
        sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
+       initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) } }
  
        permutation.each do |id|
***************
*** 251,265 ****
    def create_enzyme_actions( sequence, *args )
      id = 0
!     enzyme_actions = {}
  
      args.each do |enzyme|
        enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
        find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
!         enzyme_actions[id] = enzyme_to_enzyme_action( enzyme, offset )
          id += 1
        end
      end
  
!     enzyme_actions
    end
  
--- 267,338 ----
    def create_enzyme_actions( sequence, *args )
      id = 0
!     enzyme_actions_that_sometimes_cut = {}
!     enzyme_actions_that_always_cut = {}
!     indicies_of_sometimes_cut = []
  
      args.each do |enzyme|
        enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
        find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
!         enzyme_actions_that_always_cut[id] = enzyme_to_enzyme_action( enzyme, offset )
          id += 1
        end
      end
  
!     # enzyme_actions_that_always_cut may lose members, the members to be lost are recorded in indicies_of_sometimes_cut
! 
!     max = enzyme_actions_that_always_cut.size - 1
!     0.upto(max) do |i|
!       enzyme_action = enzyme_actions_that_always_cut[i]
!       conflict = false
!       other_cut_ranges = {}
!       #enzyme_actions.each { |key,enzyme_action| next if i == key; puts "i: #{i}, key: #{key}"; previous_cut_ranges += enzyme_action.cut_ranges }
! #      enzyme_actions_that_always_cut.each { |key,i_ea|  next if i == key; puts "i: #{i}, key: #{key}"; other_cut_ranges[key] = i_ea.cut_ranges }
!       enzyme_actions_that_always_cut.each { |key,i_ea| next if i == key; other_cut_ranges[key] = i_ea.cut_ranges }
! #      puts "Enzyme action #{i}:"
! #      pp enzyme_actions[i]
! #      pp enzyme_action
! #      puts "Previous cut ranges:"
! #      pp previous_cut_ranges
! 
!       other_cut_ranges.each do |key, cut_ranges|
!         cut_ranges.each do |cut_range|
!           next unless cut_range.class == VerticalCutRange  # we aren't concerned with horizontal cuts
!           previous_cut_left = cut_range.range.first 
!           previous_cut_right = cut_range.range.last
! 
!           if (enzyme_action.right <= previous_cut_left) or
!              (enzyme_action.left > previous_cut_right) or
!              (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
!             # no conflict
! #  puts "no conflict"
! 
!           else
!             conflict = true
! #  puts "conflict"
!   #puts "cut range:"
!   #pp cut_range
!   #puts "enzyme action:"
!   #pp enzyme_action
!           end
! 
!           indicies_of_sometimes_cut += [i, key] if conflict == true
!         end
!       end
! 
!       # We don't need to make permutations with this enzyme action if it always cuts
! #      indicies << i if conflict == false
!     end
! #    pp indicies_of_sometimes_cut
! 
!     indicies_of_sometimes_cut.uniq.each do |i|
!       enzyme_actions_that_sometimes_cut[i] = enzyme_actions_that_always_cut[i]
!       enzyme_actions_that_always_cut.delete(i)
!     end
! #puts 'Always cut:'
! #pp enzyme_actions_that_always_cut
! #puts 'Permute:'
! #pp enzyme_actions_that_sometimes_cut
! 
!     [enzyme_actions_that_sometimes_cut, enzyme_actions_that_always_cut]
    end
  

From ngoto at pub.open-bio.org  Fri Mar  3 08:18:51 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Fri, 03 Mar 2006 08:18:51 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.46,1.47
Message-ID: <200603030818.k238IpVL028555@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv28535/lib/bio/io

Modified Files:
	flatfile.rb 
Log Message:
* Removed duplicated initializing of @path in BufferedInputStream#initialize.
* Fiexed a bug that buffered input stream was nested.


Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.46
retrieving revision 1.47
diff -C2 -d -r1.46 -r1.47
*** flatfile.rb	22 Feb 2006 10:01:27 -0000	1.46
--- flatfile.rb	3 Mar 2006 08:18:49 -0000	1.47
***************
*** 35,39 ****
          # initialize prefetch buffer
          @buffer = ''
-         @path = path
        end
  
--- 35,38 ----
***************
*** 519,529 ****
      def initialize(dbclass, stream)
        # 2nd arg: IO object
!       if @stream.kind_of?(BufferedInputStream)
          @stream = stream
        else
          @stream = BufferedInputStream.for_io(stream)
        end
-       # default is raw mode
-       self.raw = false
        # 1st arg: database class (or file format autodetection)
        if dbclass then
--- 518,526 ----
      def initialize(dbclass, stream)
        # 2nd arg: IO object
!       if stream.kind_of?(BufferedInputStream)
          @stream = stream
        else
          @stream = BufferedInputStream.for_io(stream)
        end
        # 1st arg: database class (or file format autodetection)
        if dbclass then
***************
*** 535,538 ****
--- 532,537 ----
        @skip_leader_mode = :firsttime
        @firsttime_flag = true
+       # default raw mode is false
+       self.raw = false
      end
  
***************
*** 743,747 ****
            self.new(*arg)
          end
!           
          # Creates a new element.
          def initialize
--- 742,746 ----
            self.new(*arg)
          end
!         
          # Creates a new element.
          def initialize


From ngoto at pub.open-bio.org  Fri Mar  3 09:31:59 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Fri, 03 Mar 2006 09:31:59 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.47,1.48
Message-ID: <200603030931.k239VxVL029035@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv29020/lib/bio/io

Modified Files:
	flatfile.rb 
Log Message:
* added RulesArray class only for inspect
* changed constant (like Bio::GenBank) to String (like "Bio::GenBank")
  to avoid doing require almost all files when using autodetect


Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.47
retrieving revision 1.48
diff -C2 -d -r1.47 -r1.48
*** flatfile.rb	3 Mar 2006 08:18:49 -0000	1.47
--- flatfile.rb	3 Mar 2006 09:31:57 -0000	1.48
***************
*** 736,739 ****
--- 736,748 ----
        include TSort
  
+       # Array to store autodetection rules.
+       # This is defined only for inspect.
+       class RulesArray < Array
+         # visualize contents
+         def inspect
+           "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
+         end
+       end #class RulesArray
+ 
        # Template of a single rule of autodetection
        class RuleTemplate
***************
*** 745,754 ****
          # Creates a new element.
          def initialize
!           a = Array.new
!           def a.inspect
!             "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
!           end
!           @higher_priority_elements = a.clone
!           @lower_priority_elements  = a.clone
            @name = nil
          end
--- 754,759 ----
          # Creates a new element.
          def initialize
!           @higher_priority_elements = RulesArray.new
!           @lower_priority_elements  = RulesArray.new
            @name = nil
          end
***************
*** 784,787 ****
--- 789,810 ----
            nil
          end
+ 
+         private
+         # Gets constant from constant name given as a string.
+         def str2const(str)
+           const = Object
+           str.split(/\:\:/).each do |x|
+             const = const.const_get(x)
+           end
+           const
+         end
+ 
+         # Gets database class from given object.
+         # Current implementation is: 
+         # if _obj_ is kind of String, regarded as a constant.
+         # Otherwise, returns _obj_ as is.
+         def get_dbclass(obj)
+           obj.kind_of?(String) ? str2const(obj) : obj
+         end
        end #class Rule_Template
  
***************
*** 835,841 ****
            super()
            @re = re
-           @dbclass = dbclass
-           @dbclasses = [ dbclass ]
            @name = dbclass.to_s
          end
  
--- 858,878 ----
            super()
            @re = re
            @name = dbclass.to_s
+           @dbclass = nil
+           @dbclass_lazy = dbclass
+         end
+ 
+         # database class (lazy evaluation)
+         def dbclass
+           unless @dbclass
+             @dbclass = get_dbclass(@dbclass_lazy)
+           end
+           @dbclass
+         end
+         private :dbclass
+ 
+         # returns database classes
+         def dbclasses
+           [ dbclass ]
          end
  
***************
*** 844,872 ****
          # _meta_ is ignored.
          def guess(text, meta)
!           @re =~ text ? @dbclass : nil
          end
        end #class RuleRegexp
  
        # A autodetection rule to use more than two regular expressions.
!       class RuleRegexp2 < RuleTemplate
          # Creates a new instance.
          def initialize(dbclass, *regexps)
!           super()
            @regexps = regexps
-           @dbclass = dbclass
-           @dbclasses = [ dbclass ]
-           if name
-             @name = name
-           else
-             @name = @dbclass.to_s
-           end
          end
  
!         # If given text matches the regexp, returns the database class.
          # Otherwise, returns nil or false.
          # _meta_ is ignored.
          def guess(text, meta)
            @regexps.each do |re|
!             return @dbclass if re =~ text
            end
            nil
--- 881,904 ----
          # _meta_ is ignored.
          def guess(text, meta)
!           @re =~ text ? dbclass : nil
          end
        end #class RuleRegexp
  
        # A autodetection rule to use more than two regular expressions.
!       # If given string matches one of the regular expressions,
!       # returns the database class.
!       class RuleRegexp2 < RuleRegexp
          # Creates a new instance.
          def initialize(dbclass, *regexps)
!           super(dbclass, nil)
            @regexps = regexps
          end
  
!         # If given text matches one of the regexp, returns the database class.
          # Otherwise, returns nil or false.
          # _meta_ is ignored.
          def guess(text, meta)
            @regexps.each do |re|
!             return dbclass if re =~ text
            end
            nil
***************
*** 880,887 ****
            super()
            @proc = proc
!           @dbclasses = dbclasses
            @name = dbclasses.collect { |x| x.to_s }.join('|')
          end
  
          # If given text (and/or meta information) is known, returns
          # the database class.
--- 912,928 ----
            super()
            @proc = proc
!           @dbclasses = nil
!           @dbclasses_lazy = dbclasses
            @name = dbclasses.collect { |x| x.to_s }.join('|')
          end
  
+         # database classes (lazy evaluation)
+         def dbclasses
+           unless @dbclasses
+             @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
+           end
+           @dbclasses
+         end
+ 
          # If given text (and/or meta information) is known, returns
          # the database class.
***************
*** 1039,1058 ****
        def self.make_default
          a = self[
!           genbank  = RuleRegexp[ Bio::GenBank,
              /^LOCUS       .+ bp .*[a-z]*[DR]?NA/ ],
!           genpept  = RuleRegexp[ Bio::GenPept,
              /^LOCUS       .+ aa .+/ ],
!           medline  = RuleRegexp[ Bio::MEDLINE,
              /^UI  \- [0-9]+$/ ],
!           embl     = RuleRegexp[ Bio::EMBL,
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
!           sptr     = RuleRegexp[ Bio::SPTR,
              /^ID   .+\; *PRT\;/ ],
!           prosite  = RuleRegexp[ Bio::PROSITE,
              /^ID   [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
!           transfac = RuleRegexp[ Bio::TRANSFAC,
              /^AC  [-A-Za-z0-9_\.]+$/ ],
  
!           aaindex  = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
              if /^H [-A-Z0-9_\.]+$/ =~ text then
                if text =~ /^M [rc]/ then
--- 1080,1099 ----
        def self.make_default
          a = self[
!           genbank  = RuleRegexp[ 'Bio::GenBank',
              /^LOCUS       .+ bp .*[a-z]*[DR]?NA/ ],
!           genpept  = RuleRegexp[ 'Bio::GenPept',
              /^LOCUS       .+ aa .+/ ],
!           medline  = RuleRegexp[ 'Bio::MEDLINE',
              /^UI  \- [0-9]+$/ ],
!           embl     = RuleRegexp[ 'Bio::EMBL',
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
!           sptr     = RuleRegexp[ 'Bio::SPTR',
              /^ID   .+\; *PRT\;/ ],
!           prosite  = RuleRegexp[ 'Bio::PROSITE',
              /^ID   [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
!           transfac = RuleRegexp[ 'Bio::TRANSFAC',
              /^AC  [-A-Za-z0-9_\.]+$/ ],
  
!           aaindex  = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
              if /^H [-A-Z0-9_\.]+$/ =~ text then
                if text =~ /^M [rc]/ then
***************
*** 1068,1098 ****
            end,
  
!           litdb    = RuleRegexp[ Bio::LITDB,
              /^CODE        [0-9]+$/ ],
!           brite    = RuleRegexp[ Bio::KEGG::BRITE,
              /^Entry           [A-Z0-9]+/ ],
!           ko       = RuleRegexp[ Bio::KEGG::KO,
              /^ENTRY       .+ KO\s*/ ],
!           glycan   = RuleRegexp[ Bio::KEGG::GLYCAN,
              /^ENTRY       .+ Glycan\s*/ ],
!           enzyme   = RuleRegexp2[ Bio::KEGG::ENZYME,
              /^ENTRY       EC [0-9\.]+$/,
              /^ENTRY       .+ Enzyme\s*/
            ],
!           compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
              /^ENTRY       C[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Compound\s*/
            ],
!           reaction = RuleRegexp2[ Bio::KEGG::REACTION,
              /^ENTRY       R[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Reaction\s*/
            ],
!           genes    = RuleRegexp[ Bio::KEGG::GENES,
              /^ENTRY       .+ (CDS|gene|.*RNA) / ],
!           genome   = RuleRegexp[ Bio::KEGG::GENOME,
              /^ENTRY       [a-z]+$/ ],
  
!           fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
!                                 Bio::FANTOM::MaXML::Sequence) do |text|
              if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
                case $1
--- 1109,1139 ----
            end,
  
!           litdb    = RuleRegexp[ 'Bio::LITDB',
              /^CODE        [0-9]+$/ ],
!           brite    = RuleRegexp[ 'Bio::KEGG::BRITE',
              /^Entry           [A-Z0-9]+/ ],
!           ko       = RuleRegexp[ 'Bio::KEGG::KO',
              /^ENTRY       .+ KO\s*/ ],
!           glycan   = RuleRegexp[ 'Bio::KEGG::GLYCAN',
              /^ENTRY       .+ Glycan\s*/ ],
!           enzyme   = RuleRegexp2[ 'Bio::KEGG::ENZYME',
              /^ENTRY       EC [0-9\.]+$/,
              /^ENTRY       .+ Enzyme\s*/
            ],
!           compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
              /^ENTRY       C[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Compound\s*/
            ],
!           reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
              /^ENTRY       R[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Reaction\s*/
            ],
!           genes    = RuleRegexp[ 'Bio::KEGG::GENES',
              /^ENTRY       .+ (CDS|gene|.*RNA) / ],
!           genome   = RuleRegexp[ 'Bio::KEGG::GENOME',
              /^ENTRY       [a-z]+$/ ],
  
!           fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
!                                 'Bio::FANTOM::MaXML::Sequence') do |text|
              if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
                case $1
***************
*** 1109,1143 ****
            end,
  
!           pdb = RuleRegexp[ Bio::PDB,
              /^HEADER    .{40}\d\d\-[A-Z]{3}\-\d\d   [0-9A-Z]{4}/ ],
!           het = RuleRegexp[ Bio::PDB::ChemicalComponent,
              /^RESIDUE +.+ +\d+\s*$/ ],
  
!           clustal = RuleRegexp[ Bio::ClustalW::Report,
            /^CLUSTAL .*\(.*\).*sequence +alignment/ ],
  
!           blastxml = RuleRegexp[ Bio::Blast::Report,
              /\<\!DOCTYPE BlastOutput PUBLIC / ],
!           wublast  = RuleRegexp[ Bio::Blast::WU::Report,
              /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
              /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           blast    = RuleRegexp[ Bio::Blast::Default::Report,
              /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
!           tblast   = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
              /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
  
!           blat   = RuleRegexp[ Bio::Blat::Report,
              /^psLayout version \d+\s*$/ ],
!           spidey = RuleRegexp[ Bio::Spidey::Report,
              /^\-\-SPIDEY version .+\-\-$/ ],
!           hmmer  = RuleRegexp[ Bio::HMMER::Report,
              /^HMMER +\d+\./ ],
!           sim4   = RuleRegexp[ Bio::Sim4::Report,
              /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
  
!           fastaformat = RuleProc.new(Bio::FastaFormat,
!                                      Bio::NBRF,
!                                      Bio::FastaNumericFormat) do |text|
              if /^>.+$/ =~ text
                case text
--- 1150,1184 ----
            end,
  
!           pdb = RuleRegexp[ 'Bio::PDB',
              /^HEADER    .{40}\d\d\-[A-Z]{3}\-\d\d   [0-9A-Z]{4}/ ],
!           het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
              /^RESIDUE +.+ +\d+\s*$/ ],
  
!           clustal = RuleRegexp[ 'Bio::ClustalW::Report',
            /^CLUSTAL .*\(.*\).*sequence +alignment/ ],
  
!           blastxml = RuleRegexp[ 'Bio::Blast::Report',
              /\<\!DOCTYPE BlastOutput PUBLIC / ],
!           wublast  = RuleRegexp[ 'Bio::Blast::WU::Report',
              /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
              /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           blast    = RuleRegexp[ 'Bio::Blast::Default::Report',
              /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
!           tblast   = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
              /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
  
!           blat   = RuleRegexp[ 'Bio::Blat::Report',
              /^psLayout version \d+\s*$/ ],
!           spidey = RuleRegexp[ 'Bio::Spidey::Report',
              /^\-\-SPIDEY version .+\-\-$/ ],
!           hmmer  = RuleRegexp[ 'Bio::HMMER::Report',
              /^HMMER +\d+\./ ],
!           sim4   = RuleRegexp[ 'Bio::Sim4::Report',
              /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
  
!           fastaformat = RuleProc.new('Bio::FastaFormat',
!                                      'Bio::NBRF',
!                                      'Bio::FastaNumericFormat') do |text|
              if /^>.+$/ =~ text
                case text


From pjotr at pub.open-bio.org  Fri Mar  3 14:52:00 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 14:52:00 +0000
Subject: [BioRuby-cvs] bioruby/test/data/fasta - New directory
Message-ID: <200603031452.k23Eq0VL029679@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/test/data/fasta
In directory pub.open-bio.org:/tmp/cvs-serv29669/fasta

Log Message:
Directory /home/repository/bioruby/bioruby/test/data/fasta added to the repository


From pjotr at pub.open-bio.org  Fri Mar  3 15:31:08 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 15:31:08 +0000
Subject: [BioRuby-cvs] bioruby/test/data/fasta example1.txt, NONE,
	1.1 example2.txt, NONE, 1.1
Message-ID: <200603031531.k23FV8VL029797@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/test/data/fasta
In directory pub.open-bio.org:/tmp/cvs-serv29781/test/data/fasta

Added Files:
	example1.txt example2.txt 
Log Message:
Added example of enzyme cuts using Trevor's libs - and two short
FASTA data files for testing


--- NEW FILE: example2.txt ---
>At1g11545.1 68414.m01326 xyloglucan:xyloglucosyl transferase, putative / xyloglucan endotransglycosylase, putative / endo-xyloglucan transferase, putative similar to endo-xyloglucan transferase GI:2244732 from [Gossypium hirsutum]
actcacggaacaagtgtagattgcattacctctctctctctctctcttcgaaatattcga
agtagagacaaccaATGGAGACGGAAAGGAGGATCATAACGAGCTGTTCTGCCATGACGG
CTCTGTTCTTGTTCATGACGGCTCTAATGGCGTCGTCCTCTATCGCAGCAACACCGACAC
AATCGTTTGAAGATAATTTCAACATTATGTGGTCTGAAAATCACTTCACGACTTCCGATG
ATGGAGAGATCTGGAATCTTTCCTTAGATAACGACACCGGATGTGGATTTCAGACAAAGC
ACATGTATAGATTCGGATGGTTTAGTATGAAGCTAAAGCTCGTCGGAGGCGACTCCGCCG
GCGTCGTCACCGCTTACTACATGTGTTCGGAGAATGGGGCAGGACCGGAGAGAGACGAGA
TAGATTTCGAATTTCTAGGGAACCGAACCGGACAGCCTTACATTATTCAGACCAATGTGT
ATAAGAACGGAACCGGGAATCGGGAGATGCGACATTCCCTCTGGTTCGACCCGACCAAGG
ATTATCACACCTACTCAATTCTTTGGAATAACCACCAGCTTGTGTTCTTCGTGGATAGGG
TACCAATTCGAGTATACAAGAACAGTGATAAGGTACCAAACAACGACTTCTTCCCGAACC
AGAAGCCGATGTACTTGTTCTCCAGCATTTGGAACGCTGACGATTGGGCTACACGTGGTG
GTCTGGAGAAGACTGACTGGAAAAAAGCTCCATTCGTCTCTTCTTACAAGGACTTCGCCG
TCGAAGGCTGCCGTTGGAAGGATCCATTCCCTGCATGCGTCTCTACCACAACAGAGAATT
GGTGGGATCAGTACGACGCGTGGCATTTGTCCAAGACACAGAAGATGGATTATGCGTGGG
TGCAGCGTAATCTCGTCGTATACGATTATTGCAAAGACAGTGAGAGGTTCCCTACTCTTC
CTTGGGAGTGTTCCATTAGCCCTTGGGCTTAAaatcaattttgttttgagtgtattaaag
tggaaatggtttatgtaataattttactctcttttttttggcatttcttattttgttatg
gactatatcctctgtttatttatttaattaattatttatttagtcggctat


--- NEW FILE: example1.txt ---
>At1g02580 mRNA (2291 bp) UTR's and CDS
aggcgagtggttaatggagaaggaaaaccatgaggacgatggtgagggtttgccacccgaactaaatcagataaaa
gagcaaatcgaaaaggagagatttctgcatatcaagagaaaattcgagctgagatacattccaagtgtggctactc
atgcttcacaccatcaatcgtttgacttaaaccagcccgctgcagaggatgataatggaggagacaacaaatcact
tttgtcgagaatgcaaaacccacttcgtcatttcagtgcctcatctgattataattcttacgaagatcaaggttat
gttcttgatgaggatcaagattatgctcttgaagaagatgtaccattatttcttgatgaagatgtaccattattac
caagtgtcaagcttccaattgttgagaagctaccacgatccattacatgggtcttcaccaaaagtagccagctgat
ggctgaaagtgattctgtgattggtaagagacaaatctattatttgaatggtgaggcactagaattgagcagtgaa
gaagatgaggaagatgaagaagaagatgaggaagaaatcaagaaagaaaaatgcgaattttctgaagatgtagacc
gatttatatggacggttgggcaggactatggtttggatgatctggtcgtgcggcgtgctctcgccaagtacctcga
agtggatgtttcggacatattggaaagatacaatgaactcaagcttaagaatgatggaactgctggtgaggcttct
gatttgacatccaagacaataactactgctttccaggattttgctgatagacgtcattgccgtcgttgcatgatat
tcgattgtcatatgcatgagaagtatgagcccgagtctagatccagcgaagacaaatctagtttgtttgaggatga
agatagacaaccatgcagtgagcattgttacctcaaggtgaggagtgtgacagaagctgatcatgtgatggataat
gataactctatatcaaacaagattgtggtctcagatccaaacaacactatgtggacgcctgtagagaaggatcttt
acttgaaaggaattgagatatttgggagaaacagttgtgatgttgcattaaacatacttcgggggcttaagacgtg
cctagagatttacaattacatgcgcgaacaagatcaatgtactatgtcattagaccttaacaaaactacacaaaga
cacaatcaggttaccaaaaaagtatctcgaaaaagtagtaggtcggtccgcaaaaaatcgagactccgaaaatatg
ctcgttatccgcctgctttaaagaaaacaactagtggagaagctaagttttataagcactacacaccatgcacttg
caagtcaaaatgtggacagcaatgcccttgtttaactcacgaaaattgctgcgagaaatattgcgggtgctcaaag
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
ctaatcgtgaatgcgatcca  gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
agtgcaaatccaatgcaagaacatgcaattcctccttcaaaccaataaaaagattctcattggaaagtctgatgtt
catggatggggtgcatttacatgggactctct  taaaaagaatgagtatctcggagaatatactggagaactgatca
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca
gctcgaaatcgatgctcgccgtaaaggaaacgagttcaaatttctcaatcactcagcaagacctaactgctacgcc
aagttgatgattgtgagaggagatcagaggattggtctatttgcggagagagcaatcgaagaaggtgaggagcttt
tcttcgactactgctatggaccagaacatgcggattggtcgcgtggtcgagaacctagaaagactggtgcttctaa
aaggtctaaggaagcccgtccagctcgttagtttttgatctgaggagaagcagcaattcaagcagtccttttttta
tgttatggtatatcaattaataatgtaatgctattttgtgttactaaaccaaaacttaagtttctgttttatttgt
tttagggtgttttgtttgtatcatatgtgtcttaactttcaaagttttctttttgtatttcaatttaaaaacaatg
tttatgttgtt

>At1g65300: mRNA 837bp
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
ga  gatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcagattccaaagaaaattcatggttt  taatatgaatatgaataaggattcgaatcaaagtatg
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
g

>At1g65300: mRNA 837bp (shortened at end)
atgaagagaaagatgaagttatcgttaatagaaaacagtgtatcgaggaaaacaacattcaccaaaaggaagaaag
ggatgacgaagaaactaaccgagctagtcactctatgtggtgttgaagcatgtgcggtcgtctatagtccgttcaa
ctcgatcccggaggcttggccgtcaagggaaggcgttgaagacgtggtgtcgaaatttatggagttgtcggtgttg
gaccggaccaagaagatggtggatcaagagacttttataagtcaaaggatcgccaaagaaaaagagcagctgcaga
agctacgtgatgagaaccataattctcagattcgggagttaatgtttggttgtctcaaaggggagacgaatgtgta
taatcttgatggaagggatcttcaagatttgagtttatatattgataagtatcttaatggtcttactcgcaggatt
gagatcctTAttgagaacggtgagtcttcttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcag


>At1g65300: mRNA 837bp (shortened from start)
ttcatctttacctcttcctattgttgcgaatgcagctgcaccagtcg
gatttgatggtcctatgtttcaatatcataatcaaaatcagcaaaagccggttcaattccaatatcaggctcttta
tgatttttatgatcagattccaaagaaaattcatggttttaatatgaatatgaataaggattcgaatcaaagtatg
gttttggatttgaatcaaaatcttaatgatggagaggacgagggcattccttgcatggacaacaacaactaccacc
ccgaaatcgattgtctcgctaccgtcaccactgcccccactgatgtttgtgctcctaacatcaccaatgatctcta
g


>At1g02580 - shortened for test - inserted cutpoint
gattgcaacaatcgctttggaggatgtaattgtgcaattggccaatgcacaaatcgacaatgtccttgttttgctg
ctaatcgtgaatgcgatcca  gatctttgtcggagttgtcctcttagctgtggagatggcactcttggtgagacacc
agtgcaaatccaatgcaagaacatgcaataataaaaagattctcattggaaagtctgatgttcatggattcatggt
tttaattggggtgcatttacatgggactctct  taaaaagaatgagtatctcggagaatatactggagaactgatca
ctcatgatgaagctaatgagcgtgggagaatagaagatcggattggttcttcctacctctttaccttgaatgatca


From pjotr at pub.open-bio.org  Fri Mar  3 15:31:08 2006
From: pjotr at pub.open-bio.org (Pjotr Prins)
Date: Fri, 03 Mar 2006 15:31:08 +0000
Subject: [BioRuby-cvs] bioruby/sample enzymes.rb,NONE,1.1
Message-ID: <200603031531.k23FV8VL029793@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/sample
In directory pub.open-bio.org:/tmp/cvs-serv29781/sample

Added Files:
	enzymes.rb 
Log Message:
Added example of enzyme cuts using Trevor's libs - and two short
FASTA data files for testing


--- NEW FILE: enzymes.rb ---
#!/usr/bin/env ruby
#
# enzymes.rb - cut input file using enzyme on command line
#
#   Copyright (C) 2006 Pjotr Prins <p at bioruby.org> and Trevor Wennblom <trevor at corevx.com>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  $Id: enzymes.rb,v 1.1 2006/03/03 15:31:06 pjotr Exp $
#

require 'bio/io/flatfile'
require 'bio/util/restriction_enzyme'

include Bio

usage = <<USAGE

Usage: enzymes.rb enzyme1 [enzyme2] infiles

  Examples:

    Output the primary sequences cut using both BstYI and MseI:
		
	    ./enzymes.rb BstYI MseI *.seq

    or using the actual formats

	    ./enzymes.rb "r^gatcy" "t^taa" *.seq
		
USAGE

if ARGV.size < 2
  print usage
	exit 1
end

enzyme1 = ARGV.shift
# ---- Fetch enzyme2 if it is not a file
arg2 = ARGV[0]
if arg2 and !File.exist?(arg2)
  enzyme2 = ARGV.shift 
end

re1 = Bio::RestrictionEnzyme::DoubleStranded.new(enzyme1)
puts "Enzyme #{enzyme1}: " + re1.primary.with_cut_symbols # e.g. r^gatcy
if (enzyme2)
  re2 = Bio::RestrictionEnzyme::DoubleStranded.new(enzyme2)
  puts "Enzyme #{enzyme2}: " + re2.primary.with_cut_symbols # e.g. t^taa
end

ARGV.each do | fn |
  ff = Bio::FlatFile.auto(fn)
  ff.each_entry do |entry|
    seq = Bio::Sequence::NA.new(entry.seq)
    # puts seq.inspect
    seq.cut_with_enzyme(enzyme1).each do | frag1 |
      frag = frag1
      if enzyme2
        seq = Bio::Sequence::NA.new(frag1.primary)
        frags2 = seq.cut_with_enzyme(enzyme2)
        next if frags2.size == 0
        frag = frags2.shift  # pick up first fragment
      end
      print '> '+entry.definition+"\n"
      print frag.primary,"\n"
    end
	end
end


From aerts at pub.open-bio.org  Thu Mar 16 17:29:07 2006
From: aerts at pub.open-bio.org (Jan Aerts)
Date: Thu, 16 Mar 2006 17:29:07 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io pubmed.rb, 1.12, 1.13 fetch.rb, 1.4,
	1.5
Message-ID: <200603161729.k2GHT7VL007097@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv7087

Modified Files:
	pubmed.rb fetch.rb 
Log Message:
* Added documentation to pubmed.rb and fetch.rb
* For fetch.rb: replaced 'net/http' with 'open-uri' to allow people behind a proxy to use this class.


Index: pubmed.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/pubmed.rb,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** pubmed.rb	8 Sep 2005 01:22:12 -0000	1.12
--- pubmed.rb	16 Mar 2006 17:29:05 -0000	1.13
***************
*** 3,6 ****
--- 3,7 ----
  #
  #   Copyright (C) 2001 KATAYAMA Toshiaki <k at bioruby.org>
+ #                 2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
  #
  #  This library is free software; you can redistribute it and/or
***************
*** 26,61 ****
  module Bio
  
    class PubMed
  
!     def self.query(id)
!       host = "www.ncbi.nlm.nih.gov"
!       path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
! 
!       http = Net::HTTP.new(host)
!       response, = http.get(path + id.to_s)
!       result = response.body
!       if result =~ /#{id}\s+Error/
!         raise( result )
!       else
!         result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
!         return result
!       end
!     end
! 
!     def self.pmfetch(id)
!       host = "www.ncbi.nlm.nih.gov"
!       path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
! 
!       http = Net::HTTP.new(host)
!       response, = http.get(path + id.to_s)
!       result = response.body
!       if result =~ /#{id}\s+Error/
!         raise( result )
!       else
!         result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
!         return result
!       end
!     end
! 
      def self.search(str)
        host = "www.ncbi.nlm.nih.gov"
--- 27,85 ----
  module Bio
  
+   # = DESCRIPTION
+   # The Bio::PubMed class provides several ways to retrieve bibliographic
+   # information from the PubMed database at
+   # http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed. Basically, two
+   # types of queries are possible:
+   # * searching for PubMed IDs given a query string:
+   #   * Bio::PubMed#search
+   #   * Bio::PubMed#esearch
+   # * retrieving the MEDLINE text (i.e. authors, journal, abstract, ...) given a PubMed ID
+   #   * Bio::PubMed#query
+   #   * Bio::PubMed#pmfetch
+   #   * Bio::PubMed#efetch
+   #
+   # The different methods within the same group are interchangeable and should
+   # return the same result.
+   # 
+   # Additional information about the MEDLINE format and PubMed programmable
+   # APIs can be found on the following websites:
+   # * Overview: http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html
+   # * How to link: http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html
+   # * MEDLINE format: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat
+   # * Search field descriptions and tags: http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags
+   # * Entrez utilities index: http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html
+   # * PmFetch CGI help: http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html
+   # * E-Utilities CGI help: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
+   #
+   # = USAGE
+   #  require 'bio'
+   #
+   #  # If you don't know the pubmed ID:
+   #  Bio::PubMed.search("(genome AND analysis) OR bioinformatics)").each do |x|
+   #    p x
+   #  end
+   #  Bio::PubMed.esearch("(genome AND analysis) OR bioinformatics)").each do |x|
+   #    p x
+   #  end
+   #  
+   #  # To retrieve the MEDLINE entry for a given PubMed ID:
+   #  puts Bio::PubMed.query("10592173")
+   #  puts Bio::PubMed.pmfetch("10592173")
+   #  puts Bio::PubMed.efetch("10592173", "14693808")
+   #  # This can be converted into a Bio::MEDLINE object:
+   #  manuscript = Bio::PubMed.query("10592173")
+   #  medline = Bio::MEDLINE(manuscript)
+   #  
+   # = REMARK
+   # This class can not be used at the moment if you're behind a proxy server. This will be solved in the near future.
    class PubMed
  
!     # Search the PubMed database by given keywords using entrez query and returns
!     # an array of PubMed IDs.
!     # ---
!     # *Arguments*:
!     # * _id_: query string (required)
!     # *Returns*:: array of PubMed IDs
      def self.search(str)
        host = "www.ncbi.nlm.nih.gov"
***************
*** 70,73 ****
--- 94,115 ----
      end
  
+     # Search the PubMed database by given keywords using E-Utils and returns 
+     # an array of PubMed IDs.
+     # 
+     # For information on the possible arguments, see
+     # http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed
+     # ---
+     # *Arguments*:
+     # * _id_: query string (required)
+     # * _field_
+     # * _reldate_
+     # * _mindate_
+     # * _maxdate_
+     # * _datetype_
+     # * _retstart_
+     # * _retmax_ (default 100)
+     # * _retmode_
+     # * _rettype_
+     # *Returns*:: array of PubMed IDs
      def self.esearch(str, hash = {})
        hash['retmax'] = 100 unless hash['retmax']
***************
*** 88,91 ****
--- 130,184 ----
      end
  
+     # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+     # entrez query.
+     # ---
+     # *Arguments*:
+     # * _id_: PubMed ID (required)
+     # *Returns*:: MEDLINE formatted String
+     def self.query(id)
+       host = "www.ncbi.nlm.nih.gov"
+       path = "/entrez/query.fcgi?tool=bioruby&cmd=Text&dopt=MEDLINE&db=PubMed&uid="
+ 
+       http = Net::HTTP.new(host)
+       response, = http.get(path + id.to_s)
+       result = response.body
+       if result =~ /#{id}\s+Error/
+         raise( result )
+       else
+         result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
+         return result
+       end
+     end
+ 
+     # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+     # entrez pmfetch.
+     # ---
+     # *Arguments*:
+     # * _id_: PubMed ID (required)
+     # *Returns*:: MEDLINE formatted String
+     def self.pmfetch(id)
+       host = "www.ncbi.nlm.nih.gov"
+       path = "/entrez/utils/pmfetch.fcgi?tool=bioruby&mode=text&report=medline&db=PubMed&id="
+ 
+       http = Net::HTTP.new(host)
+       response, = http.get(path + id.to_s)
+       result = response.body
+       if result =~ /#{id}\s+Error/
+         raise( result )
+       else
+         result = result.gsub("\r", "\n").squeeze("\n").gsub(/<\/?pre>/, '')
+         return result
+       end
+     end
+ 
+     # Retrieve PubMed entry by PMID and returns MEDLINE formatted string using
+     # entrez efetch. Multiple PubMed IDs can be provided:
+     #   Bio::PubMed.efetch(123)
+     #   Bio::PubMed.efetch(123,456,789)
+     #   Bio::PubMed.efetch([123,456,789])
+     # ---
+     # *Arguments*:
+     # * _ids_: list of PubMed IDs (required)
+     # *Returns*:: MEDLINE formatted String
      def self.efetch(*ids)
        return [] if ids.empty?
***************
*** 125,189 ****
  
  end
- 
- =begin
- 
- = Bio::PubMed
- 
- These class methods access NCBI/PubMed database via HTTP.
- 
- --- Bio::PubMed.esearch(str, options)
- 
-       Search keywords in PubMed by E-Utils and returns an array of PubMed IDs.
-       Options can be a hash containing keys include 'field', 'reldate',
-       'mindate', 'maxdate', 'datetype', 'retstart', 'retmax', 'retmode',
-       and 'rettype' as specified in the following URL:
- 
-         ((<URL:http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html#PubMed>))
- 
-      Default 'retmax' is 100.
- 
- --- Bio::PubMed.efetch(pmids)
- 
-       Returns an array of MEDLINE records.  A list of PubMed IDs can be
-       supplied as following:
- 
-         Bio::PubMed.efetch(123)
-         Bio::PubMed.efetch(123,456,789)
-         Bio::PubMed.efetch([123,456,789])
- 
- --- Bio::PubMed.query(pmid)
- 
-       Retrieve PubMed entry by PMID and returns MEDLINE format string (can
-       be parsed by the Bio::MEDLINE and can be converted into Bio::Reference
-       object).
- 
- --- Bio::PubMed.pmfetch(pmid)
- 
-       Just another query method (by pmfetch).
- 
- --- Bio::PubMed.search(str)
- 
-       Search the PubMed database by given keywords and returns the list of
-       matched records in MEDLINE format.
- 
- 
- = For more informations
- 
- * Overview
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/overview.html>))
- * How to link
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/linking.html>))
- * MEDLINE format
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#MEDLINEDisplayFormat>))
- * Search field descriptions and tags
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query/static/help/pmhelp.html#SearchFieldDescriptionsandTags>))
- * Entrez utilities index
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/utils/utils_index.html>))
- * PmFetch CGI help
-   * ((<URL:http://www.ncbi.nlm.nih.gov/entrez/utils/pmfetch_help.html>))
- * E-Utilities CGI help
-   * ((<URL:http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html>))
- 
- =end
- 
- 
--- 218,219 ----

Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** fetch.rb	18 Dec 2005 15:58:42 -0000	1.4
--- fetch.rb	16 Mar 2006 17:29:05 -0000	1.5
***************
*** 1,12 ****
  #
! # = bio/io/biofetch.rb - BioFetch access module
! #
! # Copyright::   Copyright (C) 2002, 2005
! #               Toshiaki Katayama <k at bioruby.org>
! # License::     LGPL
  #
! # $Id$
  #
- #--
  #
  #  This library is free software; you can redistribute it and/or
--- 1,10 ----
  #
! # bio/io/biofetch.rb - BioFetch access module
  #
! #  Copyright (C) 2002, 2005 Toshiaki Katayama <k at bioruby.org>
! #               2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
!            
! #  License: LGPL
  #
  #
  #  This library is free software; you can redistribute it and/or
***************
*** 24,95 ****
  #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  #
! #++
  #
  
  require 'uri'
! require 'net/http'
  
  module Bio
  
! class Fetch
! 
!   # Create a new Bio::Fetch server object.
!   # Use Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch') to connect
!   # to EBI BioFetch server.
!   def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
!     schema, user, @host, @port, reg, @path, = URI.split(url)
!   end
! 
!   # Set default database to dbname (prepare for get_by_id).
!   attr_accessor :database
! 
!   # Get raw database entry by id (mainly used by Bio::Registry).
!   def get_by_id(id)
!     fetch(@database, id)
!   end
! 
!   # Fetch a database entry as specified by database (db), entry id (id),
!   # 'raw' text or 'html' (style), and format.  When using BioRuby's
!   # BioFetch server, value for the format should not be set.
!   def fetch(db, id, style = 'raw', format = nil)
!     data = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
!     data.push("format=#{format}") if format
!     data = data.join('&')
! 
!     responce, result = Net::HTTP.new(@host, @port).post(@path, data)
!     return result
!   end
! 
!   # Short cut for using BioRuby's BioFetch server.  You can fetch an entry
!   # without creating instance of BioFetch server.
!   def self.query(*args)
!     self.new.fetch(*args)
!   end
  
!   # What databases are available?
!   def databases
!     query = "info=dbs"
!     responce, result = Net::HTTP.new(@host, @port).post(@path, query)
!     return result
!   end
  
!   # What formats does the database X have?
!   def formats(database = @database)
!     if database
!       query = "info=formats;db=#{database}"
!       responce, result = Net::HTTP.new(@host, @port).post(@path, query)
        return result
      end
    end
  
-   # How many entries can be retrieved simultaneously?
-   def maxids
-     query = "info=maxids"
-     responce, result = Net::HTTP.new(@host, @port).post(@path, query)
-     return result
-   end
- 
- end
- 
  end # module Bio
  
--- 22,183 ----
  #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  #
! #  $Id$
  #
  
  require 'uri'
! require 'open-uri'
  
  module Bio
+   # = DESCRIPTION
+   # The Bio::Fetch class provides an interface to dbfetch servers. Given
+   # a database name and an accession number, these servers return the nucleic
+   # or amino acid sequence for that accession number in that database.
+   #
+   # Possible dbfetch servers include:
+   # * http://bioruby.org/cgi-bin/biofetch.rb (default)
+   # * http://www.ebi.ac.uk/cgi-bin/dbfetch
+   #
+   # If you're behind a proxy server, be sure to set your HTTP_PROXY
+   # environment variable accordingly.
+   #
+   # = USAGE
+   #  require 'bio'
+   #
+   #  # Retrieve the sequence of accession number M33388 from the EMBL
+   #  # database.
+   #  server = Bio::Fetch.new()  #uses default server
+   #  puts server.fetch('embl','M33388')
+   #  
+   #  # Do the same thing without creating a Bio::Fetch object. This method always
+   #  # uses the default dbfetch server: http://bioruby.org/cgi-bin/biofetch.rb
+   #  puts Bio::Fetch.query('embl','M33388')
+   #
+   #  # To know what databases are available on the bioruby dbfetch server:
+   #  server = Bio::Fetch.new()
+   #  puts server.databases
+   #
+   #  # Some databases provide their data in different formats (e.g. 'fasta',
+   #  # 'genbank' or 'embl'). To check which formats are supported by a given
+   #  # database:
+   #  puts server.formats('embl')
+   #
+   class Fetch
+   
+     # Create a new Bio::Fetch server object that can subsequently be queried
+     # using the Bio::Fetch#fetch method
+     # ---
+     # *Arguments*:
+     # * _url_: URL of dbfetch server (default = 'http://bioruby.org/cgi-bin/biofetch.rb')
+     # *Returns*:: Bio::Fetch object
+     def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
+       @url = url
+       schema, user, @host, @port, reg, @path, = URI.split(@url)
+     end
+   
+     # The default database to query
+     #--
+     # This will be used by the get_by_id method
+     #++
+     attr_accessor :database
+   
+     # Get raw database entry by id. This method lets the Bio::Registry class
+     # use Bio::Fetch objects and should probably not be used directly.
+     def get_by_id(id)
+       fetch(@database, id)
+     end
+   
+     # Fetch a database entry as specified by database (db), entry id (id),
+     # 'raw' text or 'html' (style), and format.  When using BioRuby's
+     # BioFetch server, value for the format should not be set.
+     # Examples:
+     #   server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
+     #   puts server.fetch('embl','M33388','raw','fasta')
+     #   puts server.fetch('refseq','NM_12345','html','embl')
+     # ---
+     # *Arguments*:
+     # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
+     # * _id_: single ID or ID list separated by commas or white space
+     # * _style_: [raw|html] (default = 'raw')
+     # * _format_: name of output format (see Bio::Fetch#formats)
+     def fetch(db, id, style = 'raw', format = nil)
+       query = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
+       query.push("format=#{format}") if format
+       query = query.join('&')
+   
+       result = open(@url + '?' + query).readlines.join('')
+       return result
+     end
+   
+     # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
+     # without creating an instance of BioFetch server. This method uses the 
+     # default dbfetch server, which is http://bioruby.org/cgi-bin/biofetch.rb
+     # 
+     # Example:
+     #   puts Bio::Fetch.query('refseq','NM_12345')
+     #
+     # ---
+     # *Arguments*:
+     # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
+     # * _id_: single ID or ID list separated by commas or white space
+     # * _style_: [raw|html] (default = 'raw')
+     # * _format_: name of output format (see Bio::Fetch#formats)
+     def self.query(*args)
+       self.new.fetch(*args)
+     end
+   
+     # Using this method, the user can ask a dbfetch server what databases
+     # it supports. This would normally be the first step you'd take when
+     # you use a dbfetch server for the first time.
+     # Example:
+     #  server = Bio::Fetch.new()
+     #  puts server.databases # returns "aa aax bl cpd dgenes dr ec eg emb ..."
+     #
+     # This method only works for the bioruby dbfetch server. For a list
+     # of databases available from the EBI, see the EBI website at 
+     # http://www.ebi.ac.uk/cgi-bin/dbfetch/
+     # ---
+     # *Returns*:: array of database names
+     def databases
+       query = "info=dbs"
  
!       result = open(@url + '?' + query).readlines.join('')
!       return result
!     end
!   
!     # Lists the formats that are available for a given database. Like the
!     # Bio::Fetch#databases method, this method is only available on 
!     # the bioruby dbfetch server.
!     # Example:
!     #  server = Bio::Fetch.new()
!     #  puts server.formats('embl') # returns "default fasta"
!     # ---
!     # *Arguments*:
!     # * _database_:: name of database you want the supported formats for
!     # *Returns*:: array of formats
!     def formats(database = @database)
!       if database
!         query = "info=formats;db=#{database}"
  
!         result = open(@url + '?' + query).readlines.join('')
!         return result
!       end
!     end
!   
!     # A dbfetch server will only return entries up to a given maximum number.
!     # This method retrieves that number from the server. As for the databases
!     # and formats methods, the maxids method only works for the bioruby
!     # dbfetch server.
!     # ---
!     # *Arguments*: none
!     # *Returns*:: number
!     def maxids
!       query = "info=maxids"
  
!       result = open(@url + '?' + query).readlines.join('')
        return result
      end
+   
    end
  
  end # module Bio
  
***************
*** 98,113 ****
  if __FILE__ == $0
  
- # bfserv = Bio::Fetch.new('http://www.ebi.ac.uk:80/cgi-bin/dbfetch')
-   bfserv = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
    puts "# test 1"
!   puts bfserv.fetch('embl', 'J00231', 'raw')
    puts "# test 2"
!   puts bfserv.fetch('embl', 'J00231', 'html')
! 
    puts "# test 3"
!   puts Bio::Fetch.query('genbank', 'J00231')
    puts "# test 4"
    puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
! 
  end
  
--- 186,204 ----
  if __FILE__ == $0
  
    puts "# test 1"
!   br_server = Bio::Fetch.new()
!   puts br_server.databases
!   puts br_server.formats('embl')
!   puts br_server.maxids
!   ebi_server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
    puts "# test 2"
!   puts ebi_server.fetch('embl', 'J00231', 'raw')
    puts "# test 3"
!   puts ebi_server.fetch('embl', 'J00231', 'html')
    puts "# test 4"
+   puts Bio::Fetch.query('genbank', 'J00231')
+   puts "# test 5"
    puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
!  
  end
  

From ngoto at pub.open-bio.org  Mon Mar 20 10:34:59 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Mon, 20 Mar 2006 10:34:59 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio command.rb,1.3,1.4
Message-ID: <200603201035.k2KAYxVL030067@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio
In directory pub.open-bio.org:/tmp/cvs-serv30042/lib/bio

Modified Files:
	command.rb 
Log Message:
* New module Bio::Command::NetTools for miscellaneous network methods.
  Currently, this module is intended to be used only inside
  BioRuby library. Please do not use it in user's programs now.
* New methods: Bio::Command::NetTools.open_uri(uri, *arg) and
  Bio::Command::NetTools.read_uri(uri).
* Changed license to Ruby's.


Index: command.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/command.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** command.rb	4 Nov 2005 17:36:00 -0000	1.3
--- command.rb	20 Mar 2006 10:34:57 -0000	1.4
***************
*** 2,32 ****
  # = bio/command.rb - general methods for external command execution
  #
! # Copyright::	Copyright (C) 2003-2005
  # 		Naohisa Goto <ng at bioruby.org>,
  #		Toshiaki Katayama <k at bioruby.org>
! # License::	LGPL
  #
  #  $Id$
  #
- #--
- #
- #  This library is free software; you can redistribute it and/or
- #  modify it under the terms of the GNU Lesser General Public
- #  License as published by the Free Software Foundation; either
- #  version 2 of the License, or (at your option) any later version.
- #
- #  This library is distributed in the hope that it will be useful,
- #  but WITHOUT ANY WARRANTY; without even the implied warranty of
- #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- #  Lesser General Public License for more details.
- #
- #  You should have received a copy of the GNU Lesser General Public
- #  License along with this library; if not, write to the Free Software
- #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
- #
- #++
- #
  
  require 'open3'
  
  module Bio
--- 2,15 ----
  # = bio/command.rb - general methods for external command execution
  #
! # Copyright::	Copyright (C) 2003-2006
  # 		Naohisa Goto <ng at bioruby.org>,
  #		Toshiaki Katayama <k at bioruby.org>
! # License::	Ruby's
  #
  #  $Id$
  #
  
  require 'open3'
+ require 'uri'
  
  module Bio
***************
*** 162,165 ****
--- 145,291 ----
  
  end # module Tools
+ 
+ 
+ # = Bio::Command::NetTools
+ #
+ # Bio::Command::NetTools is a collection of miscellaneous methods
+ # for data transport through network.
+ #
+ # Library internal use only. Users should not directly use it.
+ #
+ # Note that it is under construction.
+ module NetTools
+ 
+   # Same as OpenURI.open_uri(*arg).
+   # If open-uri.rb is already loaded, ::OpenURI is used.
+   # Otherwise, internal OpenURI in sandbox is used because
+   # open-uri.rb redefines Kernel.open.
+   def self.open_uri(uri, *arg)
+     if defined? ::OpenURI
+       ::OpenURI.open_uri(uri, *arg)
+     else
+       SandBox.load_openuri_in_sandbox
+       uri = uri.to_s if ::URI::Generic === uri
+       SandBox::OpenURI.open_uri(uri, *arg)
+     end
+   end
+ 
+   # Same as OpenURI.open_uri(uri).read.
+   # If open-uri.rb is already loaded, ::OpenURI is used.
+   # Otherwise, internal OpenURI in sandbox is used becase
+   # open-uri.rb redefines Kernel.open.
+   def self.read_uri(uri)
+     self.open_uri(uri).read
+   end
+ 
+   # Sandbox to load open-uri.rb.
+   # Internal use only.
+   module SandBox #:nodoc:
+ 
+     # Dummy module definition.
+     module Kernel #:nodoc:
+       # dummy method
+       def open(*arg); end #:nodoc:
+     end #module Kernel
+     
+     # a method to find proxy. dummy definition
+     module FindProxy; end #:nodoc:
+     
+     # dummy module definition
+     module OpenURI #:nodoc:
+       module OpenRead; end #:nodoc:
+     end #module OpenURI
+     
+     # Dummy module definition.
+     module URI #:nodoc:
+       class Generic < ::URI::Generic #:nodoc:
+         include SandBox::FindProxy
+       end
+       
+       class HTTPS < ::URI::HTTPS #:nodoc:
+         include SandBox::FindProxy
+         include SandBox::OpenURI::OpenRead
+       end
+       
+       class HTTP  < ::URI::HTTP  #:nodoc:
+         include SandBox::FindProxy
+         include SandBox::OpenURI::OpenRead
+       end
+       
+       class FTP  < ::URI::FTP    #:nodoc:
+         include SandBox::FindProxy
+         include SandBox::OpenURI::OpenRead
+       end
+       
+       # parse and new. internal use only.
+       def self.__parse_and_new__(klass, uri) #:nodoc:
+         scheme, userinfo, host, port,
+         registry, path, opaque, query, fragment = ::URI.split(uri)
+         klass.new(scheme, userinfo, host, port,
+                   registry, path, opaque, query,
+                   fragment)
+       end
+       private_class_method :__parse_and_new__
+       
+       # same as ::URI.parse. internal use only.
+       def self.parse(uri) #:nodoc:
+         r = ::URI.parse(uri)
+         case r
+         when ::URI::HTTPS
+           __parse_and_new__(HTTPS, uri)
+         when ::URI::HTTP
+           __parse_and_new__(HTTP, uri)
+         when ::URI::FTP
+           __parse_and_new__(FTP, uri)
+         else
+           r
+         end
+       end
+     end #module URI
+     
+     @load_openuri = nil
+     # load open-uri.rb in SandBox module.
+     def self.load_openuri_in_sandbox #:nodoc:
+       return if @load_openuri
+       fn = nil
+       unless $:.find do |x|
+           fn = File.join(x, 'open-uri.rb')
+           FileTest.exist?(fn)
+         end then
+         warn('Warning: cannot find open-uri.rb in $LOAD_PATH')
+       else
+         # reading open-uri.rb
+         str = File.read(fn)
+         # eval open-uri.rb contents in SandBox module
+         module_eval(str)
+         
+         # finds 'find_proxy' method
+         find_proxy_lines = nil
+         flag = nil
+         endstr = nil
+         str.each do |line|
+           if flag then
+             find_proxy_lines << line
+             if endstr == line[0, endstr.length] and
+                 /^\s+end(\s+.*)?$/ =~ line then
+               break
+             end
+           elsif /^(\s+)def\s+find_proxy(\s+.*)?$/ =~ line then
+             flag = true
+             endstr = "#{$1}end"
+             find_proxy_lines = line 
+           end
+         end
+         if find_proxy_lines
+           module_eval("module FindProxy;\n#{find_proxy_lines}\n;end\n")
+         else
+           warn('Warning: cannot find find_proxy method in open-uri.rb.')
+         end
+         @load_openuri = true
+       end
+     end
+   end #module SandBox
+ end #module NetTools
+ 
  end # module Command
  end # module Bio


From ngoto at pub.open-bio.org  Mon Mar 20 12:40:16 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Mon, 20 Mar 2006 12:40:16 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fetch.rb,1.5,1.6
Message-ID: <200603201240.k2KCeGVL030358@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv30167/lib/bio/io

Modified Files:
	fetch.rb 
Log Message:
* "require 'open-uri'" is removed because open-uri.rb changes Kernel#open.
  Instead, Bio::Command::NetTools.read_uri is used.
* query should be escaped by using URI.escape.
* Bio::Fetch#databases, #formats are changed to return an array of string,
  as described in the documents.
* Bio::Fetch#maxids are changed to return an Integer number,
  as described in the document.


Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** fetch.rb	16 Mar 2006 17:29:05 -0000	1.5
--- fetch.rb	20 Mar 2006 12:40:13 -0000	1.6
***************
*** 26,30 ****
  
  require 'uri'
! require 'open-uri'
  
  module Bio
--- 26,30 ----
  
  require 'uri'
! require 'bio/command'
  
  module Bio
***************
*** 105,110 ****
        query = query.join('&')
    
!       result = open(@url + '?' + query).readlines.join('')
!       return result
      end
    
--- 105,109 ----
        query = query.join('&')
    
!       Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query))
      end
    
***************
*** 141,146 ****
        query = "info=dbs"
  
!       result = open(@url + '?' + query).readlines.join('')
!       return result
      end
    
--- 140,144 ----
        query = "info=dbs"
  
!       Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
      end
    
***************
*** 159,164 ****
          query = "info=formats;db=#{database}"
  
!         result = open(@url + '?' + query).readlines.join('')
!         return result
        end
      end
--- 157,161 ----
          query = "info=formats;db=#{database}"
  
!         Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).strip.split(/\s+/)
        end
      end
***************
*** 174,179 ****
        query = "info=maxids"
  
!       result = open(@url + '?' + query).readlines.join('')
!       return result
      end
    
--- 171,175 ----
        query = "info=maxids"
  
!       Bio::Command::NetTools.read_uri(@url + '?' + URI.escape(query)).to_i
      end
    

From aerts at pub.open-bio.org  Tue Mar 21 12:18:16 2006
From: aerts at pub.open-bio.org (Jan Aerts)
Date: Tue, 21 Mar 2006 12:18:16 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fastacmd.rb,1.10,1.11
Message-ID: <200603211218.k2LCIGVL001647@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv1637

Modified Files:
	fastacmd.rb 
Log Message:
Added/reformatted documentation.


Index: fastacmd.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fastacmd.rb,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** fastacmd.rb	28 Jan 2006 08:12:21 -0000	1.10
--- fastacmd.rb	21 Mar 2006 12:18:14 -0000	1.11
***************
*** 5,45 ****
  #              Shuji SHIGENOBU <shige at nibb.ac.jp>,
  #              Toshiaki Katayama <k at bioruby.org>,
! #              Mitsuteru C. Nakao <n at bioruby.org>
  # Lisence::    LGPL
  #
  # $Id$
  #
- # == Description
- #
- # Retrives FASTA formatted sequences from a blast database using 
- # NCBI fastacmd command.
- # 
- # This class requires 'fastacmd' command and a blast database  
- # (formatted using the '-o' option of 'formatdb').
- #
- # == Examples
- #
- #    database = ARGV.shift || "/db/myblastdb"
- #    entry_id = ARGV.shift || "sp:128U_DROME"
- #    ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
- #
- #    fastacmd = Bio::Blast::Fastacmd.new(database)
- #
- #    entry = fastacmd.get_by_id(entry_id)
- #    fastacmd.fetch(entry_id)
- #    fastacmd.fetch(ent_list)
- #
- #    fastacmd.fetch(ent_list).each do |fasta|
- #      puts fasta
- #    end
- #
- # == References
- #
- # * NCBI tool
- #   ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
- #
- # * fastacmd.html
- #   http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
- #
  #--
  #
--- 5,14 ----
  #              Shuji SHIGENOBU <shige at nibb.ac.jp>,
  #              Toshiaki Katayama <k at bioruby.org>,
! #              Mitsuteru C. Nakao <n at bioruby.org>,
! #              Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # Lisence::    LGPL
  #
  # $Id$
  #
  #--
  #
***************
*** 68,72 ****
  class Blast
  
! # NCBI fastacmd wrapper class
  #
  class Fastacmd
--- 37,68 ----
  class Blast
  
! # = DESCRIPTION
! #
! # Retrieves FASTA formatted sequences from a blast database using 
! # NCBI fastacmd command.
! # 
! # This class requires 'fastacmd' command and a blast database  
! # (formatted using the '-o' option of 'formatdb').
! #
! # = USAGE
! #  require 'bio'
! #  
! #  fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
! #
! #  entry = fastacmd.get_by_id("sp:128U_DROME")
! #  fastacmd.fetch("sp:128U_DROME")
! #  fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
! #
! #  fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"]).each do |fasta|
! #    puts fasta
! #  end
! #
! # = REFERENCES
! #
! # * NCBI tool
! #   ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
! #
! # * fastacmd.html
! #   http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
  #
  class Fastacmd
***************
*** 78,90 ****
    attr_accessor :database
  
!   # fastcmd command file path.
    attr_accessor :fastacmd
  
-   # 
    attr_accessor :errorlog
  
!   # Initalize a fastacmd object.
!   #    
!   #    fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
    def initialize(blast_database_file_path)
      @database = blast_database_file_path
--- 74,103 ----
    attr_accessor :database
  
!   # fastacmd command file path.
    attr_accessor :fastacmd
  
    attr_accessor :errorlog
  
!   # This method provides a handle to a BLASTable database, which you can then
!   # use to retrieve sequences.
!   # 
!   # Prerequisites:
!   # * You have created a BLASTable database with the '-o T' option.
!   # * You have the NCBI fastacmd tool installed.
!   #
!   # For example, suppose the original input file looks like:
!   #  >my_seq_1
!   #  ACCGACCTCCGGAACGGATAGCCCGACCTACG
!   #  >my_seq_2
!   #  TCCGACCTTTCCTACCGCACACCTACGCCATCAC
!   #  ...
!   # and you've created a BLASTable database from that with the command
!   #  cd /my_dir/
!   #  formatdb -i my_input_file -t Test -n Test -o T
!   # then you can get a handle to this database with the command
!   #  fastacmd = Bio::Blast::Fastacmd.new("/my_dir/Test")
!   # ---
!   # *Arguments*:
!   # * _database_:: path and name of BLASTable database
    def initialize(blast_database_file_path)
      @database = blast_database_file_path
***************
*** 93,117 ****
  
  
!   # get an entry_id and returns a Bio::FastaFormat object.
!   #
!   #   entry_id = "sp:128U_DROME"
!   #   entry = fastacmd.get_by_id(entry_id)
    def get_by_id(entry_id)
      fetch(entry_id).shift
    end
  
!   # get one or more entry_id and returns an Array of Bio::FastaFormat objects.
!   #
!   # Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
!   # object even when the result is a single entry.
!   #
!   #    p fastacmd.fetch(entry_id)
    #
!   # Fastacmd#fetch method also accepts a list of entry_id and returns
!   # an Array of Bio::FastaFormat objects.
!   #    
!   #    ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
!   #    p fastacmd.fetch(ent_list)
    #
    def fetch(list)
      if list.respond_to?(:join)
--- 106,131 ----
  
  
!   # Get the sequence of a specific entry in the BLASTable database.
!   # For example:
!   #  entry = fastacmd.get_by_id("sp:128U_DROME")
!   # ---
!   # *Arguments*:
!   # * _id_: id of an entry in the BLAST database
!   # *Returns*:: a Bio::FastaFormat object
    def get_by_id(entry_id)
      fetch(entry_id).shift
    end
  
!   # Get the sequence for a _list_ of IDs in the database.
    #
!   # For example:
!   #  p fastacmd.fetch(["sp:1433_SPIOL", "sp:1432_MAIZE"])
    #
+   # This method always returns an array of Bio::FastaFormat objects, even when 
+   # the result is a single entry.
+   # ---
+   # *Arguments*:
+   # * _ids_: list of IDs to retrieve from the database
+   # *Returns*:: array of Bio::FastaFormat objects
    def fetch(list)
      if list.respond_to?(:join)
***************
*** 128,138 ****
    end
  
!   # Iterates each entry.
!   #
!   # You can also iterate on all sequences in the database!
!   #    fastacmd.each do |fasta|
!   #      p [ fasta.definition[0..30], fasta.seq.size ]
!   #    end
    #
    def each_entry
      cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
--- 142,152 ----
    end
  
!   # Iterates over _all_ sequences in the database.
    #
+   #  fastacmd.each_entry do |fasta|
+   #    p [ fasta.definition[0..30], fasta.seq.size ]
+   #  end
+   # ---
+   # *Returns*:: a Bio::FastaFormat object for each iteration
    def each_entry
      cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
***************
*** 154,156 ****
--- 168,184 ----
  end # module Bio
  
+ if $0 == __FILE__
+   fastacmd = Bio::Blast::Fastacmd.new("/path_to_my_db/db_name")
+   seq = fastacmd.get_by_id('id_of_entry1')
+   puts seq.class
+   puts seq
+   
+   seqs = fastacmd.fetch(['id_of_entry1','id_of_entry2'])
+   seqs.each do |seq|
+     puts seq
+   end
  
+   fastacmd.each_entry do |fasta|
+     puts fasta.seq.size.to_s + "\t" + fasta.definition
+   end
+ end


From ngoto at pub.open-bio.org  Wed Mar 22 10:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.48,1.49
Message-ID: <200603221019.k2MAJOVL005746@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv5657/lib/bio/io

Modified Files:
	flatfile.rb 
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.


Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.48
retrieving revision 1.49
diff -C2 -d -r1.48 -r1.49
*** flatfile.rb	3 Mar 2006 09:31:57 -0000	1.48
--- flatfile.rb	22 Mar 2006 10:19:22 -0000	1.49
***************
*** 262,265 ****
--- 262,268 ----
          attr_reader :entry
  
+         # a flag to write down entry start and end positions
+         attr_accessor :entry_pos_flag
+ 
          # start position of the entry
          attr_reader :entry_start_pos
***************
*** 290,293 ****
--- 293,297 ----
            end
            @delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
+           @entry_pos_flag = nil
          end
  
***************
*** 330,334 ****
          # gets a entry
          def get_entry
!           p0 = @stream.pos
            e  = @stream.gets(@delimiter)
            if e and @delimiter_overrun then
--- 334,338 ----
          # gets a entry
          def get_entry
!           p0 = @entry_pos_flag ? @stream.pos : nil
            e  = @stream.gets(@delimiter)
            if e and @delimiter_overrun then
***************
*** 339,343 ****
              end
            end
!           p1 = @stream.pos
            @entry_start_pos = p0
            @entry = e
--- 343,347 ----
              end
            end
!           p1 = @entry_pos_flag ? @stream.pos : nil
            @entry_start_pos = p0
            @entry = e
***************
*** 585,588 ****
--- 589,602 ----
      def entry_raw
        @splitter.entry
+     end
+ 
+     # a flag to write down entry start and end positions
+     def entry_pos_flag
+       @splitter.entry_pos_flag
+     end
+ 
+     # Sets flag to write down entry start and end positions
+     def entry_pos_flag=(x)
+       @splitter.entry_pos_flag = x
      end
  

From ngoto at pub.open-bio.org  Wed Mar 22 10:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io/flatfile indexer.rb,1.23,1.24
Message-ID: <200603221019.k2MAJOVL005748@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io/flatfile
In directory pub.open-bio.org:/tmp/cvs-serv5657/lib/bio/io/flatfile

Modified Files:
	indexer.rb 
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.


Index: indexer.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile/indexer.rb,v
retrieving revision 1.23
retrieving revision 1.24
diff -C2 -d -r1.23 -r1.24
*** indexer.rb	22 Feb 2006 08:41:03 -0000	1.23
--- indexer.rb	22 Mar 2006 10:19:22 -0000	1.24
***************
*** 115,118 ****
--- 115,119 ----
              @flatfile = Bio::FlatFile.open(@dbclass, file, 'rb')
              @flatfile.raw = nil
+             @flatfile.entry_pos_flag = true
              @entry = nil
            end


From ngoto at pub.open-bio.org  Wed Mar 22 10:19:24 2006
From: ngoto at pub.open-bio.org (Naohisa Goto)
Date: Wed, 22 Mar 2006 10:19:24 +0000
Subject: [BioRuby-cvs] bioruby/doc Changes-0.7.rd,1.16,1.17
Message-ID: <200603221019.k2MAJOVL005750@pub.open-bio.org>

Update of /home/repository/bioruby/bioruby/doc
In directory pub.open-bio.org:/tmp/cvs-serv5657/doc

Modified Files:
	Changes-0.7.rd 
Log Message:
Bio::FlatFile did not work correctly for pipes.
Bio::FlatFile#entry_start_pos and #entry_ended_pos are changed to be enabled
only when Bio::FlatFile#entry_pos_flag is true.


Index: Changes-0.7.rd
===================================================================
RCS file: /home/repository/bioruby/bioruby/doc/Changes-0.7.rd,v
retrieving revision 1.16
retrieving revision 1.17
diff -C2 -d -r1.16 -r1.17
*** Changes-0.7.rd	27 Feb 2006 11:38:14 -0000	1.16
--- Changes-0.7.rd	22 Mar 2006 10:19:22 -0000	1.17
***************
*** 262,265 ****
--- 262,270 ----
    structure (which is not recommended) would not work.
  
+ In 1.0.1:
+ 
+ * Bio::FlatFile#entry_start_pos and #entry_ended_pos are enabled
+   only when Bio::FlatFile#entry_pos_flag is true.
+ 
  === Deleted files
  

From k at portal.open-bio.org  Sun Mar 26 00:38:12 2006
From: k at portal.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 00:38:12 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/shell demo.rb,1.1,1.2
Message-ID: <200603260038.k2Q0cCgZ028442@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/shell
In directory dev.open-bio.org:/tmp/cvs-serv28438/lib/bio/shell

Modified Files:
	demo.rb 
Log Message:
* 1st commit test on dev.open-bio.org after the server migration
* fixed some typos in BioRuby shell demo


Index: demo.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/shell/demo.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** demo.rb	27 Feb 2006 09:33:22 -0000	1.1
--- demo.rb	26 Mar 2006 00:38:10 -0000	1.2
***************
*** 90,94 ****
        run(%q[head ent_1bl8], "Head part of the entry ...", false) &&
        run(%q[savefile("1bl8.pdb", ent_1bl8)], "Saving the original entry in file ...", false) &&
!       run(%q[less "data/1bl8.pdb"], "Look through the entire entry ...", false) &&
        run(%q[pdb_1bl8 = flatparse(ent_1bl8)], "Parsing the entry ...", false) &&
        run(%q[pdb_1bl8.entry_id], "Showing the entry ID ...", true) &&
--- 90,94 ----
        run(%q[head ent_1bl8], "Head part of the entry ...", false) &&
        run(%q[savefile("1bl8.pdb", ent_1bl8)], "Saving the original entry in file ...", false) &&
!       run(%q[disp "data/1bl8.pdb"], "Look through the entire entry ...", false) &&
        run(%q[pdb_1bl8 = flatparse(ent_1bl8)], "Parsing the entry ...", false) &&
        run(%q[pdb_1bl8.entry_id], "Showing the entry ID ...", true) &&
***************
*** 98,112 ****
  
      def pdb_hetdic
!       run(%q[het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read],
!           "Retrieving the het_dic database ...", false) &&
!       run(%q[savefile("data/het_dictionary.txt", het_dic)],
!           "Saving the file ... ", false) &&
        run(%q[het_dic.size], "Bytes of the file ...", true) &&
!       run(%q[less "data/het_dictionary.txt"], "Take a look on the contents ...", true) &&
        run(%q[flatindex("het_dic", "data/het_dictionary.txt")],
            "Creating index to make the seaarchable database ...", false) &&
        run(%q[ethanol = flatsearch("het_dic", "EOH")], "Search an ethanol entry ...", true) &&
        run(%q[osake = flatparse(ethanol)], "Parse the entry ...", true) &&
!       run(%q[sake.conect], "Showing connect table (conect) of the molecule ...", true) &&
        true
      end
--- 98,112 ----
  
      def pdb_hetdic
! #      run(%q[het_dic = open("http://deposit.pdb.org/het_dictionary.txt").read],
! #          "Retrieving the het_dic database ...", false) &&
! #      run(%q[savefile("data/het_dictionary.txt", het_dic)],
! #          "Saving the file ... ", false) &&
        run(%q[het_dic.size], "Bytes of the file ...", true) &&
!       run(%q[disp "data/het_dictionary.txt"], "Take a look on the contents ...", true) &&
        run(%q[flatindex("het_dic", "data/het_dictionary.txt")],
            "Creating index to make the seaarchable database ...", false) &&
        run(%q[ethanol = flatsearch("het_dic", "EOH")], "Search an ethanol entry ...", true) &&
        run(%q[osake = flatparse(ethanol)], "Parse the entry ...", true) &&
!       run(%q[osake.conect], "Showing connect table (conect) of the molecule ...", true) &&
        true
      end


From ngoto at dev.open-bio.org  Tue Mar 28 14:00:50 2006
From: ngoto at dev.open-bio.org (Naohisa Goto)
Date: Tue, 28 Mar 2006 14:00:50 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio command.rb,1.4,1.5
Message-ID: <200603281400.k2SE0oK6024842@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv24822

Modified Files:
	command.rb 
Log Message:
* added "require 'open-uri'"
* removed complicated hacks for open-uri


Index: command.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/command.rb,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** command.rb	20 Mar 2006 10:34:57 -0000	1.4
--- command.rb	28 Mar 2006 14:00:48 -0000	1.5
***************
*** 12,15 ****
--- 12,16 ----
  require 'open3'
  require 'uri'
+ require 'open-uri'
  
  module Bio
***************
*** 157,289 ****
  module NetTools
  
-   # Same as OpenURI.open_uri(*arg).
-   # If open-uri.rb is already loaded, ::OpenURI is used.
-   # Otherwise, internal OpenURI in sandbox is used because
-   # open-uri.rb redefines Kernel.open.
-   def self.open_uri(uri, *arg)
-     if defined? ::OpenURI
-       ::OpenURI.open_uri(uri, *arg)
-     else
-       SandBox.load_openuri_in_sandbox
-       uri = uri.to_s if ::URI::Generic === uri
-       SandBox::OpenURI.open_uri(uri, *arg)
-     end
-   end
- 
    # Same as OpenURI.open_uri(uri).read.
-   # If open-uri.rb is already loaded, ::OpenURI is used.
-   # Otherwise, internal OpenURI in sandbox is used becase
-   # open-uri.rb redefines Kernel.open.
    def self.read_uri(uri)
!     self.open_uri(uri).read
    end
- 
-   # Sandbox to load open-uri.rb.
-   # Internal use only.
-   module SandBox #:nodoc:
- 
-     # Dummy module definition.
-     module Kernel #:nodoc:
-       # dummy method
-       def open(*arg); end #:nodoc:
-     end #module Kernel
-     
-     # a method to find proxy. dummy definition
-     module FindProxy; end #:nodoc:
-     
-     # dummy module definition
-     module OpenURI #:nodoc:
-       module OpenRead; end #:nodoc:
-     end #module OpenURI
-     
-     # Dummy module definition.
-     module URI #:nodoc:
-       class Generic < ::URI::Generic #:nodoc:
-         include SandBox::FindProxy
-       end
-       
-       class HTTPS < ::URI::HTTPS #:nodoc:
-         include SandBox::FindProxy
-         include SandBox::OpenURI::OpenRead
-       end
-       
-       class HTTP  < ::URI::HTTP  #:nodoc:
-         include SandBox::FindProxy
-         include SandBox::OpenURI::OpenRead
-       end
-       
-       class FTP  < ::URI::FTP    #:nodoc:
-         include SandBox::FindProxy
-         include SandBox::OpenURI::OpenRead
-       end
-       
-       # parse and new. internal use only.
-       def self.__parse_and_new__(klass, uri) #:nodoc:
-         scheme, userinfo, host, port,
-         registry, path, opaque, query, fragment = ::URI.split(uri)
-         klass.new(scheme, userinfo, host, port,
-                   registry, path, opaque, query,
-                   fragment)
-       end
-       private_class_method :__parse_and_new__
-       
-       # same as ::URI.parse. internal use only.
-       def self.parse(uri) #:nodoc:
-         r = ::URI.parse(uri)
-         case r
-         when ::URI::HTTPS
-           __parse_and_new__(HTTPS, uri)
-         when ::URI::HTTP
-           __parse_and_new__(HTTP, uri)
-         when ::URI::FTP
-           __parse_and_new__(FTP, uri)
-         else
-           r
-         end
-       end
-     end #module URI
-     
-     @load_openuri = nil
-     # load open-uri.rb in SandBox module.
-     def self.load_openuri_in_sandbox #:nodoc:
-       return if @load_openuri
-       fn = nil
-       unless $:.find do |x|
-           fn = File.join(x, 'open-uri.rb')
-           FileTest.exist?(fn)
-         end then
-         warn('Warning: cannot find open-uri.rb in $LOAD_PATH')
-       else
-         # reading open-uri.rb
-         str = File.read(fn)
-         # eval open-uri.rb contents in SandBox module
-         module_eval(str)
-         
-         # finds 'find_proxy' method
-         find_proxy_lines = nil
-         flag = nil
-         endstr = nil
-         str.each do |line|
-           if flag then
-             find_proxy_lines << line
-             if endstr == line[0, endstr.length] and
-                 /^\s+end(\s+.*)?$/ =~ line then
-               break
-             end
-           elsif /^(\s+)def\s+find_proxy(\s+.*)?$/ =~ line then
-             flag = true
-             endstr = "#{$1}end"
-             find_proxy_lines = line 
-           end
-         end
-         if find_proxy_lines
-           module_eval("module FindProxy;\n#{find_proxy_lines}\n;end\n")
-         else
-           warn('Warning: cannot find find_proxy method in open-uri.rb.')
-         end
-         @load_openuri = true
-       end
-     end
-   end #module SandBox
  end #module NetTools
  
--- 158,165 ----
  module NetTools
  
    # Same as OpenURI.open_uri(uri).read.
    def self.read_uri(uri)
!     OpenURI.open_uri(uri).read
    end
  end #module NetTools
  

From k at dev.open-bio.org  Sun Mar 26 02:28:01 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:28:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio sequence.rb,0.56,0.57
Message-ID: <200603260228.k2Q2S1uq028859@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv28853

Modified Files:
	sequence.rb 
Log Message:
* comprehensive documentations contributed by Ryan Raaum and Jan Aerts are added.
* bug fixes in sequence.rb contributed by Ryan Raaum
  * Added 'U' and 'u' to the bases counted towards the nucleic acid total in Bio::Sequence#guess.  (Without this, RNA sequences were "guessed" to be Amino Acid sequences).
  * Changed the arguments for method_missing in Bio::Sequence from (*arg) to (sym, *args, &block).  With this argument set, blocks will be properly passed through to the encapsulated object.


Index: sequence.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence.rb,v
retrieving revision 0.56
retrieving revision 0.57
diff -C2 -d -r0.56 -r0.57
*** sequence.rb	17 Feb 2006 17:15:08 -0000	0.56
--- sequence.rb	26 Mar 2006 02:27:59 -0000	0.57
***************
*** 5,9 ****
  #               Toshiaki Katayama <k at bioruby.org>,
  #               Yoshinori K. Okuji <okuji at enbug.org>,
! #               Naohisa Goto <ng at bioruby.org>
  # License::     Ruby's
  #
--- 5,11 ----
  #               Toshiaki Katayama <k at bioruby.org>,
  #               Yoshinori K. Okuji <okuji at enbug.org>,
! #               Naohisa Goto <ng at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>,
! #               Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # License::     Ruby's
  #
***************
*** 15,18 ****
--- 17,67 ----
  module Bio
  
+ # = DESCRIPTION
+ # Bio::Sequence objects represent annotated sequences in bioruby.
+ # A Bio::Sequence object is a wrapper around the actual sequence, 
+ # represented as either a Bio::Sequence::NA or a Bio::Sequence::AA object.
+ # For most users, this encapsulation will be completely transparent.
+ # Bio::Sequence responds to all methods defined for Bio::Sequence::NA/AA
+ # objects using the same arguments and returning the same values (even though 
+ # these methods are not documented specifically for Bio::Sequence).
+ #
+ # = USAGE
+ #   # Create a nucleic or amino acid sequence
+ #   dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
+ #   rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
+ #   aa = Bio::Sequence.auto('ACDEFGHIKLMNPQRSTVWYU')
+ # 
+ #   # Print it out
+ #   puts dna.to_s
+ #   puts aa.to_s
+ # 
+ #   # Get a subsequence, bioinformatics style (first nucleotide is '1')
+ #   puts dna.subseq(2,6)
+ # 
+ #   # Get a subsequence, informatics style (first nucleotide is '0')
+ #   puts dna[2,6]
+ # 
+ #   # Print in FASTA format
+ #   puts dna.output(:fasta)
+ # 
+ #   # Print all codons
+ #   dna.window_search(3,3) do |codon|
+ #     puts codon
+ #   end
+ # 
+ #   # Splice or otherwise mangle your sequence
+ #   puts dna.splicing("complement(join(1..5,16..20))")
+ #   puts rna.splicing("complement(join(1..5,16..20))")
+ # 
+ #   # Convert a sequence containing ambiguity codes into a 
+ #   # regular expression you can use for subsequent searching
+ #   puts aa.to_re
+ # 
+ #   # These should speak for themselves
+ #   puts dna.complement
+ #   puts dna.composition
+ #   puts dna.molecular_weight
+ #   puts dna.translate
+ #   puts dna.gc_percent
  class Sequence
  
***************
*** 23,37 ****
    autoload :Format,  'bio/sequence/format'
  
    def initialize(str)
      @seq = str
    end
  
!   def method_missing(*arg)
!     @seq.send(*arg)
    end
! 
!   attr_accessor :entry_id, :definition, :features, :references, :comments,
!     :date, :keywords, :dblinks, :taxonomy, :moltype, :seq
! 
    def output(style)
      extend Bio::Sequence::Format
--- 72,151 ----
    autoload :Format,  'bio/sequence/format'
  
+   # Create a new Bio::Sequence object
+   #
+   #   s = Bio::Sequence.new('atgc')
+   #   puts s                                  #=> 'atgc'
+   #
+   # Note that this method does not intialize the contained sequence
+   # as any kind of bioruby object, only as a simple string
+   #
+   #   puts s.seq.class                        #=> String
+   #
+   # See Bio::Sequence#na, Bio::Sequence#aa, and Bio::Sequence#auto 
+   # for methods to transform the basic String of a just created 
+   # Bio::Sequence object to a proper bioruby object
+   # ---
+   # *Arguments*:
+   # * (required) _str_: String or Bio::Sequence::NA/AA object
+   # *Returns*:: Bio::Sequence object
    def initialize(str)
      @seq = str
    end
  
!   # Pass any unknown method calls to the wrapped sequence object.  see
!   # http://www.rubycentral.com/book/ref_c_object.html#Object.method_missing
!   def method_missing(sym, *args, &block) #:nodoc:
!     @seq.send(sym, *args, &block)
    end
!   
!   # The sequence identifier.  For example, for a sequence
!   # of Genbank origin, this is the accession number.
!   attr_accessor :entry_id
!   
!   # A String with a description of the sequence
!   attr_accessor :definition
!   
!   # An Array of Bio::Feature objects
!   attr_accessor :features
!   
!   # An Array of Bio::Reference objects
!   attr_accessor :references
!   
!   # A comment String
!   attr_accessor :comments
!   
!   # Date from sequence source. Often date of deposition.
!   attr_accessor :date
!   
!   # An Array of Strings
!   attr_accessor :keywords
!   
!   # An Array of Strings; links to other database entries.
!   attr_accessor :dblinks
!   
!   # A taxonomy String
!   attr_accessor :taxonomy
!   
!   # Bio::Sequence::NA/AA
!   attr_accessor :moltype
!   
!   # The sequence object, usually Bio::Sequence::NA/AA, 
!   # but could be a simple String
!   attr_accessor :seq
!   
!   # Using Bio::Sequence::Format, return a String with the Bio::Sequence
!   # object formatted in the given style.
!   #
!   # Formats currently implemented are: 'fasta', 'genbank', and 'embl'
!   #
!   #   s = Bio::Sequence.new('atgc')
!   #   puts s.output(:fasta)                   #=> "> \natgc\n"
!   #
!   # The style argument is given as a Ruby 
!   # Symbol(http://www.ruby-doc.org/core/classes/Symbol.html)
!   # ---
!   # *Arguments*: 
!   # * (required) _style_: :fasta, :genbank, *or* :embl
!   # *Returns*:: String object
    def output(style)
      extend Bio::Sequence::Format
***************
*** 48,51 ****
--- 162,175 ----
    end
  
+   # Guess the type of sequence, Amino Acid or Nucleic Acid, and create a 
+   # new sequence object (Bio::Sequence::AA or Bio::Sequence::NA) on the basis
+   # of this guess.  This method will change the current Bio::Sequence object.
+   #
+   #   s = Bio::Sequence.new('atgc')
+   #   puts s.seq.class                        #=> String
+   #   s.auto
+   #   puts s.seq.class                        #=> Bio::Sequence::NA
+   # ---
+   # *Returns*:: Bio::Sequence::NA/AA object
    def auto
      @moltype = guess
***************
*** 57,60 ****
--- 181,194 ----
    end
  
+   # Given a sequence String, guess its type, Amino Acid or Nucleic Acid, and
+   # return a new Bio::Sequence object wrapping a sequence of the guessed type
+   # (either Bio::Sequence::AA or Bio::Sequence::NA)
+   # 
+   #   s = Bio::Sequence.auto('atgc')
+   #   puts s.seq.class                        #=> Bio::Sequence::NA
+   # ---
+   # *Arguments*:
+   # * (required) _str_: String *or* Bio::Sequence::NA/AA object
+   # *Returns*:: Bio::Sequence object
    def self.auto(str)
      seq = self.new(str)
***************
*** 63,74 ****
    end
  
    def guess(threshold = 0.9, length = 10000, index = 0)
      str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
      cmp = str.composition
  
!     bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + 
!             cmp['a'] + cmp['t'] + cmp['g'] + cmp['c']
  
!     total = @seq.length - cmp['N'] - cmp['n']
  
      if bases.to_f / total > threshold
--- 197,247 ----
    end
  
+   # Guess the class of the current sequence.  Returns the class
+   # (Bio::Sequence::AA or Bio::Sequence::NA) guessed.  In general, used by
+   # developers only, but if you know what you are doing, feel free.
+   # 
+   #   s = Bio::Sequence.new('atgc')
+   #   puts s.guess                            #=> Bio::Sequence::NA
+   #
+   # There are three parameters: `threshold`, `length`, and `index`.  
+   #
+   # The `threshold` value (defaults to 0.9) is the frequency of 
+   # nucleic acid bases [AGCTUagctu] required in the sequence for this method
+   # to produce a Bio::Sequence::NA "guess".  In the default case, if less
+   # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
+   # then the guess is Bio::Sequence::AA.
+   # 
+   #   s = Bio::Sequence.new('atgcatgcqq')
+   #   puts s.guess                            #=> Bio::Sequence::AA
+   #   puts s.guess(0.8)                       #=> Bio::Sequence::AA
+   #   puts s.guess(0.7)                       #=> Bio::Sequence::NA
+   #
+   # The `length` value is how much of the total sequence to use in the
+   # guess (default 10000).  If your sequence is very long, you may 
+   # want to use a smaller amount to reduce the computational burden.
+   #
+   #   s = Bio::Sequence.new(A VERY LONG SEQUENCE)
+   #   puts s.guess(0.9, 1000)  # limit the guess to the first 1000 positions
+   #
+   # The `index` value is where to start the guess.  Perhaps you know there
+   # are a lot of gaps at the start...
+   #
+   #   s = Bio::Sequence.new('-----atgcc')
+   #   puts s.guess                            #=> Bio::Sequence::AA
+   #   puts s.guess(0.9,10000,5)               #=> Bio::Sequence::NA
+   # ---
+   # *Arguments*:
+   # * (optional) _threshold_: Float in range 0,1 (default 0.9)
+   # * (optional) _length_: Fixnum (default 10000)
+   # * (optional) _index_: Fixnum (default 1)
+   # *Returns*:: Bio::Sequence::NA/AA
    def guess(threshold = 0.9, length = 10000, index = 0)
      str = @seq.to_s[index,length].to_s.extend Bio::Sequence::Common
      cmp = str.composition
  
!     bases = cmp['A'] + cmp['T'] + cmp['G'] + cmp['C'] + cmp['U'] +
!             cmp['a'] + cmp['t'] + cmp['g'] + cmp['c'] + cmp['u']
  
!     total = str.length - cmp['N'] - cmp['n']
  
      if bases.to_f / total > threshold
***************
*** 79,86 ****
--- 252,312 ----
    end 
  
+   # Guess the class of a given sequence.  Returns the class
+   # (Bio::Sequence::AA or Bio::Sequence::NA) guessed.  In general, used by
+   # developers only, but if you know what you are doing, feel free.
+   # 
+   #   puts .guess('atgc')        #=> Bio::Sequence::NA
+   #
+   # There are three optional parameters: `threshold`, `length`, and `index`.  
+   #
+   # The `threshold` value (defaults to 0.9) is the frequency of 
+   # nucleic acid bases [AGCTUagctu] required in the sequence for this method
+   # to produce a Bio::Sequence::NA "guess".  In the default case, if less
+   # than 90% of the bases (after excluding [Nn]) are in the set [AGCTUagctu],
+   # then the guess is Bio::Sequence::AA.
+   # 
+   #   puts Bio::Sequence.guess('atgcatgcqq')      #=> Bio::Sequence::AA
+   #   puts Bio::Sequence.guess('atgcatgcqq', 0.8) #=> Bio::Sequence::AA
+   #   puts Bio::Sequence.guess('atgcatgcqq', 0.7) #=> Bio::Sequence::NA
+   #
+   # The `length` value is how much of the total sequence to use in the
+   # guess (default 10000).  If your sequence is very long, you may 
+   # want to use a smaller amount to reduce the computational burden.
+   #
+   #   # limit the guess to the first 1000 positions
+   #   puts Bio::Sequence.guess('A VERY LONG SEQUENCE', 0.9, 1000)  
+   #
+   # The `index` value is where to start the guess.  Perhaps you know there
+   # are a lot of gaps at the start...
+   #
+   #   puts Bio::Sequence.guess('-----atgcc')             #=> Bio::Sequence::AA
+   #   puts Bio::Sequence.guess('-----atgcc',0.9,10000,5) #=> Bio::Sequence::NA
+   # ---
+   # *Arguments*:
+   # * (required) _str_: String *or* Bio::Sequence::NA/AA object
+   # * (optional) _threshold_: Float in range 0,1 (default 0.9)
+   # * (optional) _length_: Fixnum (default 10000)
+   # * (optional) _index_: Fixnum (default 1)
+   # *Returns*:: Bio::Sequence::NA/AA
    def self.guess(str, *args)
      self.new(str).guess(*args)
    end
  
+   # Transform the sequence wrapped in the current Bio::Sequence object
+   # into a Bio::Sequence::NA object.  This method will change the current
+   # object.  This method does not validate your choice, so be careful!
+   #
+   #   s = Bio::Sequence.new('RRLE')
+   #   puts s.seq.class                        #=> String
+   #   s.na
+   #   puts s.seq.class                        #=> Bio::Sequence::NA !!!
+   #
+   # However, if you know your sequence type, this method may be 
+   # constructively used after initialization,
+   #
+   #   s = Bio::Sequence.new('atgc')
+   #   s.na
+   # ---
+   # *Returns*:: Bio::Sequence::NA
    def na
      @seq = NA.new(@seq)
***************
*** 88,96 ****
    end
  
    def aa
      @seq = AA.new(@seq)
      @moltype = AA
    end
! 
  end # Sequence
  
--- 314,338 ----
    end
  
+   # Transform the sequence wrapped in the current Bio::Sequence object
+   # into a Bio::Sequence::NA object.  This method will change the current
+   # object.  This method does not validate your choice, so be careful!
+   #
+   #   s = Bio::Sequence.new('atgc')
+   #   puts s.seq.class                        #=> String
+   #   s.aa
+   #   puts s.seq.class                        #=> Bio::Sequence::AA !!!
+   #
+   # However, if you know your sequence type, this method may be 
+   # constructively used after initialization,
+   #
+   #   s = Bio::Sequence.new('RRLE')
+   #   s.aa
+   # ---
+   # *Returns*:: Bio::Sequence::AA
    def aa
      @seq = AA.new(@seq)
      @moltype = AA
    end
!   
  end # Sequence
  

From k at dev.open-bio.org  Sun Mar 26 02:32:58 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:32:58 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio reference.rb,1.21,1.22
Message-ID: <200603260232.k2Q2Ww61028892@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv28888

Modified Files:
	reference.rb 
Log Message:
* comprehensive documentation contributed by Ryan Raaum is added


Index: reference.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/reference.rb,v
retrieving revision 1.21
retrieving revision 1.22
diff -C2 -d -r1.21 -r1.22
*** reference.rb	8 Feb 2006 15:06:26 -0000	1.21
--- reference.rb	26 Mar 2006 02:32:56 -0000	1.22
***************
*** 2,45 ****
  # = bio/reference.rb - Journal reference classes
  #
! # Copyright::   Copyright (C) 2001 
! #               KATAYAMA Toshiaki <k at bioruby.org>
! # Lisence::     LGPL
  #
  # $Id$
  #
- # == Description
- # 
- # Journal reference classes.
- #
- # == Examples
- #
- # == References
- #
- # 
- #
- #--
- #
- #  This library is free software; you can redistribute it and/or
- #  modify it under the terms of the GNU Lesser General Public
- #  License as published by the Free Software Foundation; either
- #  version 2 of the License, or (at your option) any later version.
- #
- #  This library is distributed in the hope that it will be useful,
- #  but WITHOUT ANY WARRANTY; without even the implied warranty of
- #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- #  Lesser General Public License for more details.
- #
- #  You should have received a copy of the GNU Lesser General Public
- #  License along with this library; if not, write to the Free Software
- #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
- #
- #++
- #
  
  module Bio
  
    # A class for journal reference information.
    #
!   # === Examples
    # 
    #    hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], 
--- 2,20 ----
  # = bio/reference.rb - Journal reference classes
  #
! # Copyright::   Copyright (C) 2001, 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
! # Lisence::     Ruby's
  #
  # $Id$
  #
  
  module Bio
  
+   # = DESCRIPTION
+   #
    # A class for journal reference information.
    #
!   # = USAGE
    # 
    #    hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], 
***************
*** 69,100 ****
      attr_reader :authors
  
!     # "Title of the study."
      attr_reader :title
  
!     # "Theor. J. Hoge"
      attr_reader :journal
  
!     # 12
      attr_reader :volume
      
!     # 3
      attr_reader :issue
  
!     # "123-145"
      attr_reader :pages
  
!     # 2001
      attr_reader :year
  
!     # 12345678
      attr_reader :pubmed
  
!     # 98765432
      attr_reader :medline
      
!     # Abstract test in String.
      attr_reader :abstract
  
!     # A URL String.
      attr_reader :url
  
--- 44,75 ----
      attr_reader :authors
  
!     # String with title of the study
      attr_reader :title
  
!     # String with journal name
      attr_reader :journal
  
!     # volume number (typically Fixnum)
      attr_reader :volume
      
!     # issue number (typically Fixnum)
      attr_reader :issue
  
!     # page range (typically String, e.g. "123-145")
      attr_reader :pages
  
!     # year of publication (typically Fixnum)
      attr_reader :year
  
!     # pubmed identifier (typically Fixnum)
      attr_reader :pubmed
  
!     # medline identifier (typically Fixnum)
      attr_reader :medline
      
!     # Abstract text in String.
      attr_reader :abstract
  
!     # An URL String.
      attr_reader :url
  
***************
*** 105,109 ****
      attr_reader :affiliations
  
!     # 
      def initialize(hash)
        hash.default = ''
--- 80,119 ----
      attr_reader :affiliations
  
!     # Create a new Bio::Reference object from a Hash of values. 
!     # Data is extracted from the values for keys:
!     #
!     # * authors - expected value: Array of Strings
!     # * title - expected value: String
!     # * journal - expected value: String
!     # * volume - expected value: Fixnum or String
!     # * issue - expected value: Fixnum or String
!     # * pages - expected value: String
!     # * year - expected value: Fixnum or String
!     # * pubmed - expected value: Fixnum or String
!     # * medline - expected value: Fixnum or String
!     # * abstract - expected value: String
!     # * url - expected value: String
!     # * mesh - expected value: Array of Strings
!     # * affiliations - expected value: Array of Strings
!     #
!     #
!     #    hash = {'authors' => [ "Hoge, J.P.", "Fuga, F.B." ], 
!     #            'title' => "Title of the study.",
!     #            'journal' => "Theor. J. Hoge", 
!     #            'volume' => 12, 
!     #            'issue' => 3, 
!     #            'pages' => "123-145",
!     #            'year' => 2001, 
!     #            'pubmed' => 12345678, 
!     #            'medline' => 98765432, 
!     #            'abstract' => "Hoge fuga. ...",
!     #            'url' => "http://example.com", 
!     #            'mesh' => [], 
!     #            'affiliations' => []}
!     #    ref = Bio::Reference.new(hash)
!     # ---
!     # *Arguments*:
!     # * (required) _hash_: Hash
!     # *Returns*:: Bio::Reference object
      def initialize(hash)
        hash.default = ''
***************
*** 131,138 ****
      # 0. nil - general
      # 1. endnote - Endnote
!     # 2. bibitem - Bibitem (option acceptable)
!     # 3. bibtex - BiBTeX (option acceptable)
!     # 4. rd - rd (option acceptable)
!     # 5. nature - Nature (option acceptable)
      # 6. science - Science
      # 7. genome_biol - Genome Biology
--- 141,148 ----
      # 0. nil - general
      # 1. endnote - Endnote
!     # 2. bibitem - Bibitem (option available)
!     # 3. bibtex - BiBTeX (option available)
!     # 4. rd - rd (option available)
!     # 5. nature - Nature (option available)
      # 6. science - Science
      # 7. genome_biol - Genome Biology
***************
*** 142,145 ****
--- 152,172 ----
      # 11. trends - Trends in *
      # 12. cell - Cell Press
+     #
+     # See individual methods for details. Basic usage is:
+     #
+     #   # ref is Bio::Reference object
+     #   # using simplest possible call (for general style)
+     #   puts ref.format
+     #   
+     #   # output in Nature style
+     #   puts ref.format("nature")      # alternatively, puts ref.nature
+     #
+     #   # output in Nature short style (see Bio::Reference#nature)
+     #   puts ref.format("nature",true) # alternatively, puts ref.nature(true)
+     # ---
+     # *Arguments*:
+     # * (optional) _style_: String with style identifier
+     # * (optional) _option_: Option for styles accepting one
+     # *Returns*:: String
      def format(style = nil, option = nil)
        case style
***************
*** 173,177 ****
      end
  
!     # Formats in the Endonote style.
      def endnote
        lines = []
--- 200,222 ----
      end
  
!     # Returns reference formatted in the Endnote style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.endnote
!     #
!     #     %0 Journal Article
!     #     %A Hoge, J.P.
!     #     %A Fuga, F.B.
!     #     %D 2001
!     #     %T Title of the study.
!     #     %J Theor. J. Hoge
!     #     %V 12
!     #     %N 3
!     #     %P 123-145
!     #     %M 12345678
!     #     %U http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&dopt=Citation&list_uids=12345678
!     #     %X Hoge fuga. ...
!     # ---
!     # *Returns*:: String
      def endnote
        lines = []
***************
*** 201,205 ****
      end
  
!     # Formats in the bibitem.
      def bibitem(item = nil)
        item  = "PMID:#{@pubmed}" unless item
--- 246,260 ----
      end
  
!     # Returns reference formatted in the bibitem style
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.bibitem
!     #
!     #     \bibitem{PMID:12345678}
!     #     Hoge, J.P., Fuga, F.B.
!     #     Title of the study.,
!     #     {\em Theor. J. Hoge}, 12(3):123--145, 2001.
!     # ---
!     # *Returns*:: String
      def bibitem(item = nil)
        item  = "PMID:#{@pubmed}" unless item
***************
*** 213,217 ****
      end
  
!     # Formats in the BiBTeX style.
      def bibtex(section = nil)
        section = "article" unless section
--- 268,303 ----
      end
  
!     # Returns reference formatted in the BiBTeX style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.bibtex
!     #
!     #     @article{PMID:12345678,
!     #       author  = {Hoge, J.P. and Fuga, F.B.},
!     #       title   = {Title of the study.},
!     #       journal = {Theor. J. Hoge},
!     #       year    = {2001},
!     #       volume  = {12},
!     #       number  = {3},
!     #       pages   = {123--145},
!     #     }
!     #
!     #   # using a different section (e.g. "book")
!     #   # (but not really configured for anything other than articles)
!     #   puts ref.bibtex("book")
!     #
!     #     @book{PMID:12345678,
!     #       author  = {Hoge, J.P. and Fuga, F.B.},
!     #       title   = {Title of the study.},
!     #       journal = {Theor. J. Hoge},
!     #       year    = {2001},
!     #       volume  = {12},
!     #       number  = {3},
!     #       pages   = {123--145},
!     #     }    
!     # ---
!     # *Arguments*:
!     # * (optional) _section_: BiBTeX section as String
!     # *Returns*:: String
      def bibtex(section = nil)
        section = "article" unless section
***************
*** 231,235 ****
      end
  
!     # Formats in a general style.                
      def general
        authors = @authors.join(', ')
--- 317,328 ----
      end
  
!     # Returns reference formatted in a general/generic style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.general
!     #
!     #     Hoge, J.P., Fuga, F.B. (2001). "Title of the study." Theor. J. Hoge 12:123-145.
!     # ---
!     # *Returns*:: String
      def general
        authors = @authors.join(', ')
***************
*** 237,241 ****
      end
  
!     # Formats in the RD style.
      def rd(str = nil)
        @abstract ||= str
--- 330,351 ----
      end
  
!     # Return reference formatted in the RD style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.rd
!     #
!     #     == Title of the study.
!     #     
!     #     * Hoge, J.P. and Fuga, F.B.
!     #     
!     #     * Theor. J. Hoge 2001 12:123-145 [PMID:12345678]
!     #     
!     #     Hoge fuga. ...
!     #
!     # An optional string argument can be supplied, but does nothing.
!     # ---
!     # *Arguments*:
!     # * (optional) str: String (default nil)
!     # *Returns*:: String
      def rd(str = nil)
        @abstract ||= str
***************
*** 248,253 ****
      end
  
!     # Formats in the Nature Publish Group style.
!     # * http://www.nature.com
      def nature(short = false)
        if short
--- 358,377 ----
      end
  
!     # Formats in the Nature Publishing Group 
!     # (http://www.nature.com) style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.nature
!     #
!     #     Hoge, J.P. & Fuga, F.B. Title of the study. Theor. J. Hoge 12, 123-145 (2001).
!     #
!     #   # optionally, output short version
!     #   puts ref.nature(true)  # or puts ref.nature(short=true)
!     #
!     #     Hoge, J.P. & Fuga, F.B. Theor. J. Hoge 12, 123-145 (2001).
!     # ---
!     # *Arguments*:
!     # * (optional) _short_: Boolean (default false)
!     # *Returns*:: String
      def nature(short = false)
        if short
***************
*** 266,271 ****
      end
  
!     # Formats in the Science style.
!     # * http://www.siencemag.com/
      def science
        if @authors.size > 4
--- 390,402 ----
      end
  
!     # Returns reference formatted in the 
!     # Science[http://www.sciencemag.org] style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.science
!     #
!     #     J.P. Hoge, F.B. Fuga, Theor. J. Hoge 12 123 (2001).
!     # ---
!     # *Returns*:: String
      def science
        if @authors.size > 4
***************
*** 278,283 ****
      end
  
!     # Formats in the Genome Biology style.
!     # * http://genomebiology.com/
      def genome_biol
        authors = @authors.collect {|name| strip_dots(name)}.join(', ')
--- 409,421 ----
      end
  
!     # Returns reference formatted in the Genome Biology 
!     # (http://genomebiology.com) style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.genome_biol
!     #
!     #     Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.
!     # ---
!     # *Returns*:: String
      def genome_biol
        authors = @authors.collect {|name| strip_dots(name)}.join(', ')
***************
*** 285,294 ****
        "#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
      end
!     # Formats in the Current Biology style.
!     # * http://www.current-biology.com/
!     alias current genome_biol
  
!     # Formats in the Genome Research style.
!     # * http://genome.org/
      def genome_res
        authors = authors_join(' and ')
--- 423,450 ----
        "#{authors}: #{@title} #{journal} #{@year}, #{@volume}:#{@pages}."
      end
!     
!     # Returns reference formatted in the Current Biology 
!     # (http://current-biology.com) style. (Same as the Genome Biology style)
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.current
!     #
!     #     Hoge JP, Fuga FB: Title of the study. Theor J Hoge 2001, 12:123-145.
!     # ---
!     # *Returns*:: String
!     def current 
!       self.genome_biol
!     end
  
!     # Returns reference formatted in the Genome Research 
!     # (http://genome.org) style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.genome_res
!     #
!     #     Hoge, J.P. and Fuga, F.B. 2001.
!     #       Title of the study. Theor. J. Hoge 12: 123-145.
!     # ---
!     # *Returns*:: String
      def genome_res
        authors = authors_join(' and ')
***************
*** 296,301 ****
      end
  
!     # Formats in the Nucleic Acids Reseach style.
!     # * http://nar.oxfordjournals.org/
      def nar
        authors = authors_join(' and ')
--- 452,464 ----
      end
  
!     # Returns reference formatted in the Nucleic Acids Reseach 
!     # (http://nar.oxfordjournals.org) style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.nar
!     #
!     #     Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge, 12, 123-145.
!     # ---
!     # *Returns*:: String
      def nar
        authors = authors_join(' and ')
***************
*** 303,308 ****
      end
  
!     # Formats in the CELL Press style.
!     # http://www.cell.com/
      def cell
        authors = authors_join(' and ')
--- 466,478 ----
      end
  
!     # Returns reference formatted in the 
!     # CELL[http://www.cell.com] Press style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.cell
!     #
!     #     Hoge, J.P. and Fuga, F.B. (2001). Title of the study. Theor. J. Hoge 12, 123-145.
!     # ---
!     # *Returns*:: String
      def cell
        authors = authors_join(' and ')
***************
*** 310,315 ****
      end
      
!     # Formats in the TRENDS Journals.
!     # * http://www.trends.com/
      def trends
        if @authors.size > 2
--- 480,492 ----
      end
      
!     # Returns reference formatted in the 
!     # TRENDS[http://www.trends.com] style.
!     #
!     #   # ref is a Bio::Reference object
!     #   puts ref.trends
!     #
!     #     Hoge, J.P. and Fuga, F.B. (2001) Title of the study. Theor. J. Hoge 12, 123-145
!     # ---
!     # *Returns*:: String
      def trends
        if @authors.size > 2
***************
*** 352,358 ****
    end
  
!   # Set of Bio::Reference.
    #
!   # === Examples
    #
    #   refs = Bio::References.new
--- 529,537 ----
    end
  
!   # = DESCRIPTION
    #
!   # A container class for Bio::Reference objects.
!   #
!   # = USAGE
    #
    #   refs = Bio::References.new
***************
*** 364,371 ****
    class References
  
!     # Array of Bio::Reference.
      attr_accessor :references
  
      # 
      def initialize(ary = [])
        @references = ary
--- 543,556 ----
    class References
  
!     # Array of Bio::Reference objects
      attr_accessor :references
  
+     # Create a new Bio::References object
      # 
+     #   refs = Bio::References.new
+     # ---
+     # *Arguments*:
+     # * (optional) __: Array of Bio::Reference objects
+     # *Returns*:: Bio::References object
      def initialize(ary = [])
        @references = ary
***************
*** 373,377 ****
  
  
!     # Append a Bio::Reference object.
      def append(reference)
        @references.push(reference) if reference.is_a? Reference
--- 558,568 ----
  
  
!     # Add a Bio::Reference object to the container.
!     #
!     #   refs.append(reference)
!     # ---
!     # *Arguments*:
!     # * (required) _reference_: Bio::Reference object
!     # *Returns*:: current Bio::References object
      def append(reference)
        @references.push(reference) if reference.is_a? Reference
***************
*** 379,383 ****
      end
  
!     # Iterates each Bio::Reference object.
      def each
        @references.each do |reference|
--- 570,580 ----
      end
  
!     # Iterate through Bio::Reference objects.
!     #
!     #   refs.each do |reference|
!     #     ...
!     #   end
!     # ---
!     # *Block*:: yields each Bio::Reference object
      def each
        @references.each do |reference|


From k at dev.open-bio.org  Sun Mar 26 02:28:01 2006
From: k at dev.open-bio.org (Katayama Toshiaki)
Date: Sun, 26 Mar 2006 02:28:01 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/sequence aa.rb, 1.2, 1.3 common.rb,
	1.2, 1.3 compat.rb, 1.2, 1.3 format.rb, 1.2, 1.3 generic.rb,
	1.3, 1.4 na.rb, 1.2, 1.3
Message-ID: <200603260228.k2Q2S12v028863@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/sequence
In directory dev.open-bio.org:/tmp/cvs-serv28853/sequence

Modified Files:
	aa.rb common.rb compat.rb format.rb generic.rb na.rb 
Log Message:
* comprehensive documentations contributed by Ryan Raaum and Jan Aerts are added.
* bug fixes in sequence.rb contributed by Ryan Raaum
  * Added 'U' and 'u' to the bases counted towards the nucleic acid total in Bio::Sequence#guess.  (Without this, RNA sequences were "guessed" to be Amino Acid sequences).
  * Changed the arguments for method_missing in Bio::Sequence from (*arg) to (sym, *args, &block).  With this argument set, blocks will be properly passed through to the encapsulated object.


Index: compat.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/compat.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** compat.rb	6 Feb 2006 14:18:03 -0000	1.2
--- compat.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 18,21 ****
--- 19,33 ----
    autoload :AA,     'bio/sequence/aa'
  
+   # Return sequence as 
+   # String[http://corelib.rubyonrails.org/classes/String.html].
+   # The original sequence is unchanged.
+   #
+   #   seq = Bio::Sequence.new('atgc')
+   #   puts s.to_s                             #=> 'atgc'
+   #   puts s.to_s.class                       #=> String
+   #   puts s                                  #=> 'atgc'
+   #   puts s.class                            #=> Bio::Sequence
+   # ---
+   # *Returns*:: String object
    def to_s
      String.new(@seq)
***************
*** 26,32 ****
--- 38,51 ----
  module Common
  
+   # *DEPRECIATED* Do not use! Use Bio::Sequence#output instead. 
+   # 
    # Output the FASTA format string of the sequence.  The 1st argument is
    # used as the comment string.  If the 2nd option is given, the output
    # sequence will be folded.
+   # ---
+   # *Arguments*:
+   # * (optional) _header_: String object
+   # * (optional) _width_: Fixnum object (default nil)
+   # *Returns*:: String
    def to_fasta(header = '', width = nil)
      warn "Bio::Sequence#to_fasta is obsolete. Use Bio::Sequence#output(:fasta) instead" if $DEBUG
***************
*** 44,52 ****
  class NA
  
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   def pikachu
      self.dna.tr("atgc", "pika") # joke, of course :-)
    end
--- 63,89 ----
  class NA
  
+   # Generate a new random sequence with the given frequency of bases.
+   # The sequence length is determined by their cumulative sum.
+   # (See also Bio::Sequence::Common#randomize which creates a new
+   # randomized sequence object using the base composition of an existing 
+   # sequence instance).
+   #
+   #   counts = {'a'=>1,'c'=>2,'g'=>3,'t'=>4}
+   #   puts Bio::Sequence::NA.randomize(counts)  #=> "ggcttgttac" (for example)
+   #
+   # You may also feed the output of randomize into a block
+   #
+   #   actual_counts = {'a'=>0, 'c'=>0, 'g'=>0, 't'=>0}
+   #   Bio::Sequence::NA.randomize(counts) {|x| actual_counts[x] += 1}
+   #   actual_counts                     #=> {"a"=>1, "c"=>2, "g"=>3, "t"=>4}
+   # ---
+   # *Arguments*:
+   # * (optional) _hash_: Hash object
+   # *Returns*:: Bio::Sequence::NA object
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   def pikachu #:nodoc:
      self.dna.tr("atgc", "pika") # joke, of course :-)
    end
***************
*** 57,60 ****
--- 94,115 ----
  class AA
  
+   # Generate a new random sequence with the given frequency of bases.
+   # The sequence length is determined by their cumulative sum.
+   # (See also Bio::Sequence::Common#randomize which creates a new
+   # randomized sequence object using the base composition of an existing 
+   # sequence instance).
+   #
+   #   counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
+   #   puts Bio::Sequence::AA.randomize(counts)  #=> "AAEAELALRE" (for example)
+   #
+   # You may also feed the output of randomize into a block
+   #
+   #   actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
+   #   Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
+   #   actual_counts                     #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
+   # ---
+   # *Arguments*:
+   # * (optional) _hash_: Hash object
+   # *Returns*:: Bio::Sequence::AA object
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)

Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/common.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** common.rb	6 Feb 2006 14:16:17 -0000	1.2
--- common.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 15,22 ****
  class Sequence
  
! # This module provides common methods for biological sequence classes
! # which must inherit String.
  module Common
  
    def to_s
      String.new(self)
--- 16,53 ----
  class Sequence
  
! # = DESCRIPTION
! # Bio::Sequence::Common is a 
! # Mixin[http://www.rubycentral.com/book/tut_modules.html]
! # implementing methods common to
! # Bio::Sequence::AA and Bio::Sequence::NA.  All of these methods
! # are available to either Amino Acid or Nucleic Acid sequences, and
! # by encapsulation are also available to Bio::Sequence objects.
! #
! # = USAGE
! #
! #   # Create a sequence
! #   dna = Bio::Sequence.auto('atgcatgcatgc')
! #
! #   # Splice out a subsequence using a Genbank-style location string
! #   puts dna.splice('complement(1..4)')
! #
! #   # What is the base composition?
! #   puts dna.composition
! #
! #   # Create a random sequence with the composition of a current sequence
! #   puts dna.randomize
  module Common
  
+   # Return sequence as 
+   # String[http://corelib.rubyonrails.org/classes/String.html].
+   # The original sequence is unchanged.
+   #
+   #   seq = Bio::Sequence::NA.new('atgc')
+   #   puts s.to_s                             #=> 'atgc'
+   #   puts s.to_s.class                       #=> String
+   #   puts s                                  #=> 'atgc'
+   #   puts s.class                            #=> Bio::Sequence::NA
+   # ---
+   # *Returns*:: String object
    def to_s
      String.new(self)
***************
*** 24,34 ****
    alias to_str to_s
  
!   # Force self to re-initialize for clean up (remove white spaces,
!   # case unification).
    def seq
      self.class.new(self)
    end
  
!   # Similar to the 'seq' method, but changes the self object destructively.
    def normalize!
      initialize(self)
--- 55,79 ----
    alias to_str to_s
  
!   # Create a new sequence based on the current sequence.
!   # The original sequence is unchanged.
!   # 
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   s2 = s.seq
!   #   puts s2                                 #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def seq
      self.class.new(self)
    end
  
!   # Normalize the current sequence, removing all whitespace and 
!   # transforming all positions to uppercase if the sequence is AA or
!   # transforming all positions to lowercase if the sequence is NA.
!   # The original sequence is modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   s.normalize!
!   # ---
!   # *Returns*:: current Bio::Sequence::NA/AA object (modified)
    def normalize!
      initialize(self)
***************
*** 37,40 ****
--- 82,95 ----
    alias seq! normalize!
  
+   # Add new data to the end of the current sequence.
+   # The original sequence is modified.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   s << 'atgc'
+   #   puts s                                  #=> "atgcatgc"
+   #   s << s
+   #   puts s                                  #=> "atgcatgcatgcatgc"
+   # ---
+   # *Returns*:: current Bio::Sequence::NA/AA object (modified)
    def <<(*arg)
      super(self.class.new(*arg))
***************
*** 42,50 ****
    alias concat <<
  
    def +(*arg)
      self.class.new(super(*arg))
    end
  
!   # Returns the subsequence of the self string.
    def subseq(s = 1, e = self.length)
      raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
--- 97,141 ----
    alias concat <<
  
+   # Create a new sequence by adding to an existing sequence.
+   # The existing sequence is not modified.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   s2 = s + 'atgc'
+   #   puts s2                                 #=> "atgcatgc"
+   #   puts s                                  #=> "atgc"
+   #
+   # The new sequence is of the same class as the existing sequence if 
+   # the new data was added to an existing sequence,
+   #
+   #   puts s2.class == s.class                #=> true
+   #
+   # but if an existing sequence is added to a String, the result is a String
+   #
+   #   s3 = 'atgc' + s
+   #   puts s3.class                           #=> String
+   # ---
+   # *Returns*:: new Bio::Sequence::NA/AA *or* String object
    def +(*arg)
      self.class.new(super(*arg))
    end
  
!   # Returns a new sequence containing the subsequence identified by the 
!   # start and end numbers given as parameters.  *Important:* Biological 
!   # sequence numbering conventions (one-based) rather than ruby's 
!   # (zero-based) numbering conventions are used.  
!   #
!   #   s = Bio::Sequence::NA.new('atggaatga')
!   #   puts s.subseq(1,3)                      #=> "atg"
!   #
!   # Start defaults to 1 and end defaults to the entire existing string, so
!   # subseq called without any parameters simply returns a new sequence 
!   # identical to the existing sequence.
!   #
!   #   puts s.subseq                           #=> "atggaatga"
!   # ---
!   # *Arguments*:
!   # * (optional) _s_(start): Integer (default 1)
!   # * (optional) _e_(end): Integer (default current sequence length)
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def subseq(s = 1, e = self.length)
      raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
***************
*** 54,80 ****
    end
  
!   # This method iterates on sub string with specified length 'window_size'.
!   # By specifing 'step_size', codon sized shifting or spliting genome
!   # sequence with ovelapping each end can easily be yielded.
    #
!   # The remainder sequence at the terminal end will be returned.
    #
!   # Example:
!   #   # prints average GC% on each 100bp
!   #   seq.window_search(100) do |subseq|
    #     puts subseq.gc
    #   end
!   #   # prints every translated peptide (length 5aa) in the same frame
!   #   seq.window_search(15, 3) do |subseq|
    #     puts subseq.translate
    #   end
!   #   # split genome sequence by 10000bp with 1000bp overlap in fasta format
    #   i = 1
!   #   remainder = seq.window_search(10000, 9000) do |subseq|
    #     puts subseq.to_fasta("segment #{i}", 60)
    #     i += 1
    #   end
    #   puts remainder.to_fasta("segment #{i}", 60)
!   #
    def window_search(window_size, step_size = 1)
      i = 0
--- 145,177 ----
    end
  
!   # This method steps through a sequences in steps of 'step_size' by 
!   # subsequences of 'window_size'. Typically used with a block.
!   # Any remaining sequence at the terminal end will be returned.
    #
!   # Prints average GC% on each 100bp
    #
!   #   s.window_search(100) do |subseq|
    #     puts subseq.gc
    #   end
!   #   
!   # Prints every translated peptide (length 5aa) in the same frame
!   #
!   #   s.window_search(15, 3) do |subseq|
    #     puts subseq.translate
    #   end
!   #
!   # Split genome sequence by 10000bp with 1000bp overlap in fasta format
!   #
    #   i = 1
!   #   remainder = s.window_search(10000, 9000) do |subseq|
    #     puts subseq.to_fasta("segment #{i}", 60)
    #     i += 1
    #   end
    #   puts remainder.to_fasta("segment #{i}", 60)
!   # ---
!   # *Arguments*:
!   # * (required) _window_size_: Fixnum
!   # * (optional) _step_size_: Fixnum (default 1)
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def window_search(window_size, step_size = 1)
      i = 0
***************
*** 85,91 ****
    end
  
!   # This method receive a hash of residues/bases to the particular values,
!   # and sum up the value along with the self sequence.  Especially useful
!   # to use with the window_search method and amino acid indices etc.
    def total(hash)
      hash.default = 0.0 unless hash.default
--- 182,195 ----
    end
  
!   # Returns a float total value for the sequence given a hash of
!   # base or residue values,
!   #
!   #   values = {'a' => 0.1, 't' => 0.2, 'g' => 0.3, 'c' => 0.4}
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.total(values)                    #=> 1.0
!   # ---
!   # *Arguments*:
!   # * (required) _hash_: Hash object
!   # *Returns*:: Float object
    def total(hash)
      hash.default = 0.0 unless hash.default
***************
*** 100,103 ****
--- 204,212 ----
  
    # Returns a hash of the occurrence counts for each residue or base.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   puts s.composition              #=> {"a"=>1, "c"=>1, "g"=>1, "t"=>1}
+   # ---
+   # *Returns*:: Hash object
    def composition
      count = Hash.new(0)
***************
*** 108,118 ****
    end
  
!   # Returns a randomized sequence keeping its composition by default.
!   # The argument is required when generating a random sequence from the empty
!   # sequence (used by the class methods NA.randomize, AA.randomize).
!   # If the block is given, yields for each random residue/base.
    def randomize(hash = nil)
      length = self.length
      if hash
        count = hash.clone
        count.each_value {|x| length += x}
--- 217,244 ----
    end
  
!   # Returns a randomized sequence. The default is to retain the same 
!   # base/residue composition as the original.  If a hash of base/residue 
!   # counts is given, the new sequence will be based on that hash 
!   # composition.  If a block is given, each new randomly selected 
!   # position will be passed into the block.  In all cases, the
!   # original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.randomize                        #=> "tcag"  (for example)
!   #
!   #   new_composition = {'a' => 2, 't' => 2}
!   #   puts s.randomize(new_composition)       #=> "ttaa"  (for example)
!   #
!   #   count = 0
!   #   s.randomize { |x| count += 1 }
!   #   puts count                              #=> 4
!   # ---
!   # *Arguments*:
!   # * (optional) _hash_: Hash object
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def randomize(hash = nil)
      length = self.length
      if hash
+       length = 0
        count = hash.clone
        count.each_value {|x| length += x}
***************
*** 139,151 ****
    end
  
!   # Generate a new random sequence with the given frequency of bases
!   # or residues.  The sequence length is determined by the sum of each
!   # base/residue occurences.
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   # Receive a GenBank style position string and convert it to the Locations
!   # objects to splice the sequence itself.  See also: bio/location.rb
    def splice(position)
      unless position.is_a?(Locations) then
--- 265,305 ----
    end
  
!   # Generate a new random sequence with the given frequency of bases.
!   # The sequence length is determined by their cumulative sum.
!   # (See also Bio::Sequence::Common#randomize which creates a new
!   # randomized sequence object using the base composition of an existing 
!   # sequence instance).
!   #
!   #   counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
!   #   puts Bio::Sequence::AA.randomize(counts)  #=> "AAEAELALRE" (for example)
!   #
!   # You may also feed the output of randomize into a block
!   #
!   #   actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
!   #   Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
!   #   actual_counts                     #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
!   # ---
!   # *Arguments*:
!   # * (optional) _hash_: Hash object
!   # *Returns*:: Bio::Sequence::NA/AA object
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   # Return a new sequence extracted from the original using a GenBank style 
!   # position string.  See also documentation for the Bio::Location class.
!   #
!   #   s = Bio::Sequence::NA.new('atgcatgcatgcatgc')
!   #   puts s.splice('1..3')                           #=> "atg"
!   #   puts s.splice('join(1..3,8..10)')               #=> "atgcat"
!   #   puts s.splice('complement(1..3)')               #=> "cat"
!   #   puts s.splice('complement(join(1..3,8..10))')   #=> "atgcat"
!   #
!   # Note that 'complement'ed Genbank position strings will have no 
!   # effect on Bio::Sequence::AA objects.
!   # ---
!   # *Arguments*:
!   # * (required) _position_: String *or* Bio::Location object
!   # *Returns*:: Bio::Sequence::NA/AA object
    def splice(position)
      unless position.is_a?(Locations) then

Index: format.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/format.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** format.rb	6 Feb 2006 14:20:35 -0000	1.2
--- format.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 4,8 ****
  # Copyright::   Copyright (C) 2006
  #               Toshiaki Katayama <k at bioruby.org>,
! #               Naohisa Goto <ng at bioruby.org>
  # License::     Ruby's
  #
--- 4,9 ----
  # Copyright::   Copyright (C) 2006
  #               Toshiaki Katayama <k at bioruby.org>,
! #               Naohisa Goto <ng at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 21,29 ****
  class Sequence
  
  module Format
  
!   # Output the FASTA format string of the sequence.  The 1st argument is
!   # used in the comment line.  If the 2nd argument (integer) is given,
!   # the output sequence will be folded.
    def format_fasta(header = nil, width = nil)
      header ||= "#{@entry_id} #{@definition}"
--- 22,56 ----
  class Sequence
  
+ # = DESCRIPTION
+ # A Mixin[http://www.rubycentral.com/book/tut_modules.html]
+ # of methods used by Bio::Sequence#output to output sequences in 
+ # common bioinformatic formats.  These are not called in isolation.
+ #
+ # = USAGE
+ #   # Given a Bio::Sequence object,
+ #   puts s.output(:fasta)
+ #   puts s.output(:genbank)
+ #   puts s.output(:embl)
  module Format
  
!   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
!   # case, it would be difficult to successfully call this method outside
!   # its expected context).
!   #
!   # Output the FASTA format string of the sequence.  
!   #
!   # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of 
!   # using either the header or width arguments.  So something needs to be
!   # changed...
!   #
!   # Currently, this method is used in Bio::Sequence#output like so,
!   #
!   #   s = Bio::Sequence.new('atgc')
!   #   puts s.output(:fasta)                   #=> "> \natgc\n"
!   # ---
!   # *Arguments*:
!   # * (optional) _header_: String (default nil)
!   # * (optional) _width_: Fixnum (default nil)
!   # *Returns*:: String object
    def format_fasta(header = nil, width = nil)
      header ||= "#{@entry_id} #{@definition}"
***************
*** 37,44 ****
    end
  
!   def format_gff
      raise NotImplementedError
    end
  
    def format_genbank
      prefix = ' ' * 5
--- 64,83 ----
    end
  
!   # Not yet implemented :)
!   # Remove the nodoc command after implementation!
!   # ---
!   # *Returns*:: String object
!   def format_gff #:nodoc:
      raise NotImplementedError
    end
  
+   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+   # case, it would be difficult to successfully call this method outside
+   # its expected context).
+   #
+   # Output the Genbank format string of the sequence.  
+   # Used in Bio::Sequence#output.
+   # ---
+   # *Returns*:: String object
    def format_genbank
      prefix = ' ' * 5
***************
*** 49,52 ****
--- 88,99 ----
    end
  
+   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+   # case, it would be difficult to successfully call this method outside
+   # its expected context).
+   #
+   # Output the EMBL format string of the sequence.  
+   # Used in Bio::Sequence#output.
+   # ---
+   # *Returns*:: String object
    def format_embl
      prefix = 'FT   '

Index: aa.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/aa.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** aa.rb	6 Feb 2006 14:11:31 -0000	1.2
--- aa.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 17,27 ****
  class Sequence
  
! 
! # Amino Acid sequence
  class AA < String
  
    include Bio::Sequence::Common
  
!   # Generate a amino acid sequence object from a string.
    def initialize(str)
      super
--- 18,61 ----
  class Sequence
  
! # = DESCRIPTION
! # Bio::Sequence::AA represents a bare Amino Acid sequence in bioruby.
! #
! # = USAGE
! #   # Create an Amino Acid sequence.
! #   aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
! #
! #   # What are the three-letter codes for all the residues?
! #   puts aa.codes
! #
! #   # What are the names of all the residues?
! #   puts aa.names
! #
! #   # What is the molecular weight of this peptide?
! #   puts aa.molecular_weight
  class AA < String
  
    include Bio::Sequence::Common
  
!   # Generate an amino acid sequence object from a string.
!   #
!   #   s = Bio::Sequence::AA.new("RRLEHTFVFLRNFSLMLLRY")
!   #
!   # or maybe (if you have an amino acid sequence in a file)
!   #
!   #   s = Bio::Sequence:AA.new(File.open('aa.txt').read)
!   #
!   # Amino Acid sequences are *always* all uppercase in bioruby
!   #
!   #   s = Bio::Sequence::AA.new("rrLeHtfV")
!   #   puts s                                  #=> "RRLEHTFVF"
!   #
!   # Whitespace is stripped from the sequence
!   #
!   #   s = Bio::Sequence::AA.new("RRL\nELA\tRG\r  RL")
!   #   puts s                                  #=> "RRLELARGRL"
!   # ---
!   # *Arguments*:
!   # * (required) _str_: String
!   # *Returns*:: Bio::Sequence::AA object
    def initialize(str)
      super
***************
*** 31,45 ****
  
  
!   # Estimate the weight of this protein.
    def molecular_weight
      Bio::AminoAcid.weight(self)
    end
  
    def to_re
      Bio::AminoAcid.to_re(self)
    end
  
!   # Generate the list of the names of the each residue along with the
!   # sequence (3 letters code).
    def codes
      array = []
--- 65,98 ----
  
  
!   # Estimate molecular weight based on 
!   # Fasman1976[http://www.genome.ad.jp/dbget-bin/www_bget?aaindex+FASG760101]
!   #
!   #   s = Bio::Sequence::AA.new("RRLE")
!   #   puts s.molecular_weight             #=> 572.655
!   # ---
!   # *Returns*:: Float object
    def molecular_weight
      Bio::AminoAcid.weight(self)
    end
  
+   # Create a ruby regular expression instance 
+   # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]  
+   #
+   #   s = Bio::Sequence::AA.new("RRLE")
+   #   puts s.to_re                        #=> /RRLE/
+   # ---
+   # *Returns*:: Regexp object
    def to_re
      Bio::AminoAcid.to_re(self)
    end
  
!   # Generate the list of the names of each residue along with the
!   # sequence (3 letters code).  Codes used in bioruby are found in the
!   # Bio::AminoAcid::NAMES hash.
!   #
!   #   s = Bio::Sequence::AA.new("RRLE")
!   #   puts s.codes                        #=> ["Arg", "Arg", "Leu", "Glu"]
!   # ---
!   # *Returns*:: Array object
    def codes
      array = []
***************
*** 50,54 ****
    end
  
!   # Similar to codes but returns long names.
    def names
      self.codes.map do |x|
--- 103,115 ----
    end
  
!   # Generate the list of the names of each residue along with the
!   # sequence (full name).  Names used in bioruby are found in the
!   # Bio::AminoAcid::NAMES hash.
!   #
!   #   s = Bio::Sequence::AA.new("RRLE")
!   #   puts s.names  
!   #               #=> ["arginine", "arginine", "leucine", "glutamic acid"]
!   # ---
!   # *Returns*:: Array object
    def names
      self.codes.map do |x|

Index: generic.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/generic.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** generic.rb	6 Feb 2006 14:26:04 -0000	1.3
--- generic.rb	26 Mar 2006 02:27:59 -0000	1.4
***************
*** 14,18 ****
  class Sequence
  
! class Generic < String
  
    include Bio::Sequence::Common
--- 14,18 ----
  class Sequence
  
! class Generic < String #:nodoc:
  
    include Bio::Sequence::Common

Index: na.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/na.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** na.rb	6 Feb 2006 14:13:52 -0000	1.2
--- na.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 19,28 ****
  
  
! # Nucleic Acid sequence
  class NA < String
  
    include Bio::Sequence::Common
  
!   # Generate a nucleic acid sequence object from a string.
    def initialize(str)
      super
--- 20,78 ----
  
  
! # = DESCRIPTION
! # Bio::Sequence::NA represents a bare Nucleic Acid sequence in bioruby.
! #
! # = USAGE
! #   # Create a Nucleic Acid sequence.
! #   dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
! #   rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
! #
! #   # What are the names of all the bases?
! #   puts dna.names
! #   puts rna.names
! #
! #   # What is the GC percentage?
! #   puts dna.gc_percent
! #   puts rna.gc_percent
! #
! #   # What is the molecular weight?
! #   puts dna.molecular_weight
! #   puts rna.molecular_weight
! #
! #   # What is the reverse complement?
! #   puts dna.reverse_complement
! #   puts dna.complement
! #
! #   # Is this sequence DNA or RNA?
! #   puts dna.rna?
! #
! #   # Translate my sequence (see method docs for many options)
! #   puts dna.translate
! #   puts rna.translate
  class NA < String
  
    include Bio::Sequence::Common
  
!   # Generate an nucleic acid sequence object from a string.
!   #
!   #   s = Bio::Sequence::NA.new("aagcttggaccgttgaagt")
!   #
!   # or maybe (if you have an nucleic acid sequence in a file)
!   #
!   #   s = Bio::Sequence:NA.new(File.open('dna.txt').read)
!   #
!   # Nucleic Acid sequences are *always* all lowercase in bioruby
!   #
!   #   s = Bio::Sequence::NA.new("AAGcTtGG")
!   #   puts s                                  #=> "aagcttgg"
!   #
!   # Whitespace is stripped from the sequence
!   #
!   #   seq = Bio::Sequence::NA.new("atg\nggg\ttt\r  gc")
!   #   puts s                                  #=> "atggggttgc"
!   # ---
!   # *Arguments*:
!   # * (required) _str_: String
!   # *Returns*:: Bio::Sequence::NA object
    def initialize(str)
      super
***************
*** 31,36 ****
    end
  
!   # This method depends on Locations class, see bio/location.rb
!   def splicing(position)
      mRNA = super
      if mRNA.rna?
--- 81,86 ----
    end
  
!   # Alias of Bio::Sequence::Common splice method, documented there.
!   def splicing(position) #:nodoc:
      mRNA = super
      if mRNA.rna?
***************
*** 42,46 ****
    end
  
!   # Returns complement sequence without reversing ("atgc" -> "tacg")
    def forward_complement
      s = self.class.new(self)
--- 92,103 ----
    end
  
!   # Returns a new complementary sequence object (without reversing).
!   # The original sequence object is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.forward_complement               #=> 'tacg'
!   #   puts s                                  #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def forward_complement
      s = self.class.new(self)
***************
*** 49,53 ****
    end
  
!   # Convert to complement sequence without reversing ("atgc" -> "tacg")
    def forward_complement!
      if self.rna?
--- 106,117 ----
    end
  
!   # Converts the current sequence into its complement (without reversing).
!   # The original sequence object is modified.
!   #
!   #   seq = Bio::Sequence::NA.new('atgc')
!   #   puts s.forward_complement!              #=> 'tacg'
!   #   puts s                                  #=> 'tacg'
!   # ---
!   # *Returns*:: current Bio::Sequence::NA object (modified)
    def forward_complement!
      if self.rna?
***************
*** 59,63 ****
    end
  
!   # Returns reverse complement sequence ("atgc" -> "gcat")
    def reverse_complement
      s = self.class.new(self)
--- 123,134 ----
    end
  
!   # Returns a new sequence object with the reverse complement 
!   # sequence to the original.  The original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.reverse_complement               #=> 'gcat'
!   #   puts s                                  #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def reverse_complement
      s = self.class.new(self)
***************
*** 66,70 ****
    end
  
!   # Convert to reverse complement sequence ("atgc" -> "gcat")
    def reverse_complement!
      self.reverse!
--- 137,148 ----
    end
  
!   # Converts the original sequence into its reverse complement.  
!   # The original sequence is modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.reverse_complement               #=> 'gcat'
!   #   puts s                                  #=> 'gcat'
!   # ---
!   # *Returns*:: current Bio::Sequence::NA object (modified)
    def reverse_complement!
      self.reverse!
***************
*** 72,87 ****
    end
  
!   # Aliases for short
    alias complement reverse_complement
    alias complement! reverse_complement!
  
  
!   # Translate into the amino acid sequence from the given frame and the
!   # selected codon table.  The table also can be a Bio::CodonTable object.
!   # The 'unknown' character is used for invalid/unknown codon (can be
!   # used for 'nnn' and/or gap translation in practice).
    #
!   # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
!   # (4, 5 or 6 is also accepted) for the reverse strand.
    def translate(frame = 1, table = 1, unknown = 'X')
      if table.is_a?(Bio::CodonTable)
--- 150,235 ----
    end
  
!   # Alias for Bio::Sequence::NA#reverse_complement
    alias complement reverse_complement
+   
+   # Alias for Bio::Sequence::NA#reverse_complement!
    alias complement! reverse_complement!
  
  
!   # Translate into an amino acid sequence.
!   #   
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.translate                        #=> "MA*"
    #
!   # By default, translate starts in reading frame position 1, but you
!   # can start in either 2 or 3 as well,
!   #
!   #   puts s.translate(2)                     #=> "WR"
!   #   puts s.translate(3)                     #=> "GV"
!   #
!   # You may also translate the reverse complement in one step by using frame
!   # values of -1, -2, and -3 (or 4, 5, and 6)
!   #
!   #   puts s.translate(-1)                    #=> "SRH"
!   #   puts s.translate(4)                     #=> "SRH"
!   #   puts s.reverse_complement.translate(1)  #=> "SRH"
!   #
!   # The default codon table in the translate function is the Standard
!   # Eukaryotic codon table.  The translate function takes either a 
!   # number or a Bio::CodonTable object for its table argument. 
!   # The available tables are 
!   # (NCBI[http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t]):
!   #
!   #   1. "Standard (Eukaryote)"
!   #   2. "Vertebrate Mitochondrial"
!   #   3. "Yeast Mitochondorial"
!   #   4. "Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma"
!   #   5. "Invertebrate Mitochondrial"
!   #   6. "Ciliate Macronuclear and Dasycladacean"
!   #   9. "Echinoderm Mitochondrial"
!   #   10. "Euplotid Nuclear"
!   #   11. "Bacteria"
!   #   12. "Alternative Yeast Nuclear"
!   #   13. "Ascidian Mitochondrial"
!   #   14. "Flatworm Mitochondrial"
!   #   15. "Blepharisma Macronuclear"
!   #   16. "Chlorophycean Mitochondrial"
!   #   21. "Trematode Mitochondrial"
!   #   22. "Scenedesmus obliquus mitochondrial"
!   #   23. "Thraustochytrium Mitochondrial"
!   #
!   # If you are using anything other than the default table, you must specify 
!   # frame in the translate method call,
!   #
!   #   puts s.translate                #=> "MA*"  (using defaults)
!   #   puts s.translate(1,1)           #=> "MA*"  (same as above, but explicit)
!   #   puts s.translate(1,2)           #=> "MAW"  (different codon table)
!   #
!   # and using a Bio::CodonTable instance in the translate method call,
!   #
!   #   mt_table = Bio::CodonTable[2]
!   #   puts s.translate(1, mt_table)           #=> "MAW"
!   #
!   # By default, any invalid or unknown codons (as could happen if the 
!   # sequence contains ambiguities) will be represented by 'X' in the 
!   # translated sequence. 
!   # You may change this to any character of your choice.
!   #
!   #   s = Bio::Sequence::NA.new('atgcNNtga')
!   #   puts s.translate                        #=> "MX*"
!   #   puts s.translate(1,1,'9')               #=> "M9*"
!   #
!   # The translate method considers gaps to be unknown characters and treats 
!   # them as such (i.e. does not collapse sequences prior to translation), so
!   #
!   #   s = Bio::Sequence::NA.new('atgc--tga')
!   #   puts s.translate                        #=> "MX*"
!   # ---
!   # *Arguments*:
!   # * (optional) _frame_:  one of 1,2,3,4,5,6,-1,-2,-3 (default 1)
!   # * (optional) _table_: Fixnum in range 1,23 or Bio::CodonTable object
!   #   (default 1)
!   # * (optional) _unknown_: Character (default 'X')
!   # *Returns*:: Bio::Sequence::AA object
    def translate(frame = 1, table = 1, unknown = 'X')
      if table.is_a?(Bio::CodonTable)
***************
*** 109,113 ****
    end
  
!   # Returns counts of the each codon in the sequence by Hash.
    def codon_usage
      hash = Hash.new(0)
--- 257,276 ----
    end
  
!   # Returns counts of each codon in the sequence in a hash.
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.codon_usage                #=> {"gcg"=>1, "tga"=>1, "atg"=>1}
!   #
!   # This method does not validate codons!  Any three letter group is a 'codon'. So,
!   #
!   #   s = Bio::Sequence::NA.new('atggNNtga')
!   #   puts s.codon_usage                #=> {"tga"=>1, "gnn"=>1, "atg"=>1}
!   #
!   #   seq = Bio::Sequence::NA.new('atgg--tga')
!   #   puts s.codon_usage                #=> {"tga"=>1, "g--"=>1, "atg"=>1}
!   #
!   # Also, there is no option to work in any frame other than the first.
!   # ---
!   # *Returns*:: Hash object
    def codon_usage
      hash = Hash.new(0)
***************
*** 118,122 ****
    end
  
!   # Calculate the ratio of GC / ATGC bases in percent.
    def gc_percent
      count = self.composition
--- 281,291 ----
    end
  
!   # Calculate the ratio of GC / ATGC bases as a percentage rounded to 
!   # the nearest whole number.
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.gc_percent                       #=> 55
!   # ---
!   # *Returns*:: Fixnum
    def gc_percent
      count = self.composition
***************
*** 127,136 ****
    end
  
!   # Show abnormal bases other than 'atgcu'.
    def illegal_bases
      self.scan(/[^atgcu]/).sort.uniq
    end
  
!   # Estimate the weight of this biological string molecule.
    def molecular_weight
      if self.rna?
--- 296,322 ----
    end
  
!   # Returns an alphabetically sorted array of any non-standard bases 
!   # (other than 'atgcu').
!   #
!   #   s = Bio::Sequence::NA.new('atgStgQccR')
!   #   puts s.illegal_bases                    #=> ["q", "r", "s"]
!   # ---
!   # *Returns*:: Array object
    def illegal_bases
      self.scan(/[^atgcu]/).sort.uniq
    end
  
!   # Estimate molecular weight (using the values from BioPerl's 
!   # SeqStats.pm[http://doc.bioperl.org/releases/bioperl-1.0.1/Bio/Tools/SeqStats.html] module).
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.molecular_weight                 #=> 2841.00708
!   #
!   # RNA and DNA do not have the same molecular weights,
!   #
!   #   s = Bio::Sequence::NA.new('auggcguga')
!   #   puts s.molecular_weight                 #=> 2956.94708
!   # ---
!   # *Returns*:: Float object
    def molecular_weight
      if self.rna?
***************
*** 141,145 ****
    end
  
!   # Convert the universal code string into the regular expression.
    def to_re
      if self.rna?
--- 327,337 ----
    end
  
!   # Create a ruby regular expression instance 
!   # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]  
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.to_re                            #=> /atggcgtga/
!   # ---
!   # *Returns*:: Regexp object
    def to_re
      if self.rna?
***************
*** 150,154 ****
    end
  
!   # Convert the self string into the list of the names of the each base.
    def names
      array = []
--- 342,353 ----
    end
  
!   # Generate the list of the names of each nucleotide along with the
!   # sequence (full name).  Names used in bioruby are found in the
!   # Bio::AminoAcid::NAMES hash.
!   #
!   #   s = Bio::Sequence::NA.new('atg')
!   #   puts s.names                    #=> ["Adenine", "Thymine", "Guanine"]
!   # ---
!   # *Returns*:: Array object
    def names
      array = []
***************
*** 159,176 ****
    end
  
!   # Output a DNA string by substituting 'u' to 't'.
    def dna
      self.tr('u', 't')
    end
  
    def dna!
      self.tr!('u', 't')
    end
  
!   # Output a RNA string by substituting 't' to 'u'.
    def rna
      self.tr('t', 'u')
    end
  
    def rna!
      self.tr!('t', 'u')
--- 358,405 ----
    end
  
!   # Returns a new sequence object with any 'u' bases changed to 't'.
!   # The original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('augc')
!   #   puts s.dna                              #=> 'atgc'
!   #   puts s                                  #=> 'augc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def dna
      self.tr('u', 't')
    end
  
+   # Changes any 'u' bases in the original sequence to 't'.
+   # The original sequence is modified.
+   #
+   #   s = Bio::Sequence::NA.new('augc')
+   #   puts s.dna!                             #=> 'atgc'
+   #   puts s                                  #=> 'atgc'
+   # ---
+   # *Returns*:: current Bio::Sequence::NA object (modified)
    def dna!
      self.tr!('u', 't')
    end
  
!   # Returns a new sequence object with any 't' bases changed to 'u'.
!   # The original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.dna                              #=> 'augc'  
!   #   puts s                                  #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def rna
      self.tr('t', 'u')
    end
  
+   # Changes any 't' bases in the original sequence to 'u'.
+   # The original sequence is modified.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   puts s.dna!                             #=> 'augc'
+   #   puts s                                  #=> 'augc'
+   # ---
+   # *Returns*:: current Bio::Sequence::NA object (modified)
    def rna!
      self.tr!('t', 'u')


From aerts at dev.open-bio.org  Mon Mar 27 18:34:37 2006
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Mon, 27 Mar 2006 18:34:37 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/io fetch.rb,1.6,1.7
Message-ID: <200603271834.k2RIYb5l020081@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv20061/io

Modified Files:
	fetch.rb 
Log Message:
Updated docs.


Index: fetch.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/fetch.rb,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** fetch.rb	20 Mar 2006 12:40:13 -0000	1.6
--- fetch.rb	27 Mar 2006 18:34:35 -0000	1.7
***************
*** 31,36 ****
    # = DESCRIPTION
    # The Bio::Fetch class provides an interface to dbfetch servers. Given
!   # a database name and an accession number, these servers return the nucleic
!   # or amino acid sequence for that accession number in that database.
    #
    # Possible dbfetch servers include:
--- 31,37 ----
    # = DESCRIPTION
    # The Bio::Fetch class provides an interface to dbfetch servers. Given
!   # a database name and an accession number, these servers return the associated
!   # record. For example, for the embl database on the EBI, that would be a
!   # nucleic or amino acid sequence.
    #
    # Possible dbfetch servers include:
***************
*** 82,86 ****
    
      # Get raw database entry by id. This method lets the Bio::Registry class
!     # use Bio::Fetch objects and should probably not be used directly.
      def get_by_id(id)
        fetch(@database, id)
--- 83,87 ----
    
      # Get raw database entry by id. This method lets the Bio::Registry class
!     # use Bio::Fetch objects.
      def get_by_id(id)
        fetch(@database, id)


From aerts at dev.open-bio.org  Tue Mar 28 13:42:34 2006
From: aerts at dev.open-bio.org (Jan Aerts)
Date: Tue, 28 Mar 2006 13:42:34 +0000
Subject: [BioRuby-cvs] bioruby/lib/bio/db gff.rb,1.5,1.6
Message-ID: <200603281342.k2SDgYvl024727@dev.open-bio.org>

Update of /home/repository/bioruby/bioruby/lib/bio/db
In directory dev.open-bio.org:/tmp/cvs-serv24707

Modified Files:
	gff.rb 
Log Message:
Added documentation.


Index: gff.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/gff.rb,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** gff.rb	18 Dec 2005 15:58:41 -0000	1.5
--- gff.rb	28 Mar 2006 13:42:32 -0000	1.6
***************
*** 4,21 ****
  # Copyright::  Copyright (C) 2003, 2005
  #              Toshiaki Katayama <k at bioruby.org>
  # License::    LGPL
  #
  # $Id$
  #
- # == Description
- #
- #
- # == Example
- #
- #
- # == References
- #
- # * http://www.sanger.ac.uk/Software/formats/GFF/
- #
  #--
  #
--- 4,12 ----
  # Copyright::  Copyright (C) 2003, 2005
  #              Toshiaki Katayama <k at bioruby.org>
+ #              2006  Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # License::    LGPL
  #
  # $Id$
  #
  #--
  #
***************
*** 38,46 ****
  
  module Bio
! 
  class GFF
! 
!   attr_accessor :records
! 
    def initialize(str = '')
      @records = Array.new
--- 29,78 ----
  
  module Bio
! # == DESCRIPTION
! # The Bio::GFF and Bio::GFF::Record classes describe data contained in a 
! # GFF-formatted file. For information on the GFF format, see 
! # http://www.sanger.ac.uk/Software/formats/GFF/. Data are represented in tab- 
! # delimited format, including
! # * seqname
! # * source
! # * feature
! # * start
! # * end
! # * score
! # * strand
! # * frame
! # * attributes (optional)
! # 
! # For example:
! #  SEQ1     EMBL        atg       103   105     .       +       0
! #  SEQ1     EMBL        exon      103   172     .       +       0
! #  SEQ1     EMBL        splice5   172   173     .       +       .
! #  SEQ1     netgene     splice5   172   173     0.94    +       .
! #  SEQ1     genie       sp5-20    163   182     2.3     +       .
! #  SEQ1     genie       sp5-10    168   177     2.1     +       .
! #  SEQ1     grail       ATG       17    19      2.1     -       0
! #
! # The Bio::GFF object is a container for Bio::GFF::Record objects, each 
! # representing a single line in the GFF file.
  class GFF
!   # Creates a Bio::GFF object by building a collection of Bio::GFF::Record
!   # objects.
!   # 
!   # Create a Bio::GFF object the hard way
!   #  this_gff =  "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
!   #  this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
!   #  this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
!   #  this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
!   #  this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
!   #  this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
!   #  this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
!   #  p Bio::GFF.new(this_gff)
!   #  
!   # or create one based on a GFF-formatted file:
!   #  p Bio::GFF.new(File.open('my_data.gff')
!   # ---
!   # *Arguments*:
!   # * _str_: string in GFF format
!   # *Returns*:: Bio::GFF object
    def initialize(str = '')
      @records = Array.new
***************
*** 50,66 ****
--- 82,127 ----
    end
  
+   # An array of Bio::GFF::Record objects.
+   attr_accessor :records
+ 
+   # Represents a single line of a GFF-formatted file. See Bio::GFF for more
+   # information.
    class Record
  
+     # Name of the reference sequence
      attr_accessor :seqname
+     
+     # Name of the source of the feature (e.g. program that did prediction)
      attr_accessor :source
+     
+     # Name of the feature
      attr_accessor :feature
+     
+     # Start position of feature on reference sequence
      attr_accessor :start
+     
+     # End position of feature on reference sequence
      attr_accessor :end
+     
+     # Score of annotation (e.g. e-value for BLAST search)
      attr_accessor :score
+     
+     # Strand that feature is located on
      attr_accessor :strand
+     
+     # For features of type 'exon': indicates where feature begins in the reading frame
      attr_accessor :frame
+     
+     # List of tag=value pairs (e.g. to store name of the feature: ID=my_id)
      attr_accessor :attributes
+     
+     # Comments for the GFF record
      attr_accessor :comments
  
+     # Creates a Bio::GFF::Record object. Is typically not called directly, but
+     # is called automatically when creating a Bio::GFF object.
+     # ---
+     # *Arguments*:
+     # * _str_: a tab-delimited line in GFF format
      def initialize(str)
        @comments = str.chomp[/#.*/]
***************
*** 83,90 ****
--- 144,158 ----
    end
  
+   # = DESCRIPTION
+   # Represents version 2 of GFF specification. Is completely implemented by the
+   # Bio::GFF class.
    class GFF2 < GFF
      VERSION = 2
    end
  
+   # = DESCRIPTION
+   # Represents version 3 of GFF specification. Is completely implemented by the
+   # Bio::GFF class. For more information on version GFF3, see
+   # http://flybase.bio.indiana.edu/annot/gff3.html
    class GFF3 < GFF
      VERSION = 3
***************
*** 103,106 ****
    end
  
!   p Bio::GFF.new(ARGF.read)
  end
--- 171,181 ----
    end
  
!   this_gff =  "SEQ1\tEMBL\tatg\t103\t105\t.\t+\t0\n"
!   this_gff << "SEQ1\tEMBL\texon\t103\t172\t.\t+\t0\n"
!   this_gff << "SEQ1\tEMBL\tsplice5\t172\t173\t.\t+\t.\n"
!   this_gff << "SEQ1\tnetgene\tsplice5\t172\t173\t0.94\t+\t.\n"
!   this_gff << "SEQ1\tgenie\tsp5-20\t163\t182\t2.3\t+\t.\n"
!   this_gff << "SEQ1\tgenie\tsp5-10\t168\t177\t2.1\t+\t.\n"
!   this_gff << "SEQ1\tgrail\tATG\t17\t19\t2.1\t-\t0\n"
!   p Bio::GFF.new(this_gff)
  end