[BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.47,1.48

Fri Mar 3 09:31:59 UTC 2006

Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv29020/lib/bio/io

Modified Files:
	flatfile.rb 
Log Message:
* added RulesArray class only for inspect
* changed constant (like Bio::GenBank) to String (like "Bio::GenBank")
  to avoid doing require almost all files when using autodetect


Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.47
retrieving revision 1.48
diff -C2 -d -r1.47 -r1.48
*** flatfile.rb	3 Mar 2006 08:18:49 -0000	1.47
--- flatfile.rb	3 Mar 2006 09:31:57 -0000	1.48
***************
*** 736,739 ****
--- 736,748 ----
        include TSort
  
+       # Array to store autodetection rules.
+       # This is defined only for inspect.
+       class RulesArray < Array
+         # visualize contents
+         def inspect
+           "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
+         end
+       end #class RulesArray
+ 
        # Template of a single rule of autodetection
        class RuleTemplate
***************
*** 745,754 ****
          # Creates a new element.
          def initialize
!           a = Array.new
!           def a.inspect
!             "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
!           end
!           @higher_priority_elements = a.clone
!           @lower_priority_elements  = a.clone
            @name = nil
          end
--- 754,759 ----
          # Creates a new element.
          def initialize
!           @higher_priority_elements = RulesArray.new
!           @lower_priority_elements  = RulesArray.new
            @name = nil
          end
***************
*** 784,787 ****
--- 789,810 ----
            nil
          end
+ 
+         private
+         # Gets constant from constant name given as a string.
+         def str2const(str)
+           const = Object
+           str.split(/\:\:/).each do |x|
+             const = const.const_get(x)
+           end
+           const
+         end
+ 
+         # Gets database class from given object.
+         # Current implementation is: 
+         # if _obj_ is kind of String, regarded as a constant.
+         # Otherwise, returns _obj_ as is.
+         def get_dbclass(obj)
+           obj.kind_of?(String) ? str2const(obj) : obj
+         end
        end #class Rule_Template
  
***************
*** 835,841 ****
            super()
            @re = re
-           @dbclass = dbclass
-           @dbclasses = [ dbclass ]
            @name = dbclass.to_s
          end
  
--- 858,878 ----
            super()
            @re = re
            @name = dbclass.to_s
+           @dbclass = nil
+           @dbclass_lazy = dbclass
+         end
+ 
+         # database class (lazy evaluation)
+         def dbclass
+           unless @dbclass
+             @dbclass = get_dbclass(@dbclass_lazy)
+           end
+           @dbclass
+         end
+         private :dbclass
+ 
+         # returns database classes
+         def dbclasses
+           [ dbclass ]
          end
  
***************
*** 844,872 ****
          # _meta_ is ignored.
          def guess(text, meta)
!           @re =~ text ? @dbclass : nil
          end
        end #class RuleRegexp
  
        # A autodetection rule to use more than two regular expressions.
!       class RuleRegexp2 < RuleTemplate
          # Creates a new instance.
          def initialize(dbclass, *regexps)
!           super()
            @regexps = regexps
-           @dbclass = dbclass
-           @dbclasses = [ dbclass ]
-           if name
-             @name = name
-           else
-             @name = @dbclass.to_s
-           end
          end
  
!         # If given text matches the regexp, returns the database class.
          # Otherwise, returns nil or false.
          # _meta_ is ignored.
          def guess(text, meta)
            @regexps.each do |re|
!             return @dbclass if re =~ text
            end
            nil
--- 881,904 ----
          # _meta_ is ignored.
          def guess(text, meta)
!           @re =~ text ? dbclass : nil
          end
        end #class RuleRegexp
  
        # A autodetection rule to use more than two regular expressions.
!       # If given string matches one of the regular expressions,
!       # returns the database class.
!       class RuleRegexp2 < RuleRegexp
          # Creates a new instance.
          def initialize(dbclass, *regexps)
!           super(dbclass, nil)
            @regexps = regexps
          end
  
!         # If given text matches one of the regexp, returns the database class.
          # Otherwise, returns nil or false.
          # _meta_ is ignored.
          def guess(text, meta)
            @regexps.each do |re|
!             return dbclass if re =~ text
            end
            nil
***************
*** 880,887 ****
            super()
            @proc = proc
!           @dbclasses = dbclasses
            @name = dbclasses.collect { |x| x.to_s }.join('|')
          end
  
          # If given text (and/or meta information) is known, returns
          # the database class.
--- 912,928 ----
            super()
            @proc = proc
!           @dbclasses = nil
!           @dbclasses_lazy = dbclasses
            @name = dbclasses.collect { |x| x.to_s }.join('|')
          end
  
+         # database classes (lazy evaluation)
+         def dbclasses
+           unless @dbclasses
+             @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
+           end
+           @dbclasses
+         end
+ 
          # If given text (and/or meta information) is known, returns
          # the database class.
***************
*** 1039,1058 ****
        def self.make_default
          a = self[
!           genbank  = RuleRegexp[ Bio::GenBank,
              /^LOCUS       .+ bp .*[a-z]*[DR]?NA/ ],
!           genpept  = RuleRegexp[ Bio::GenPept,
              /^LOCUS       .+ aa .+/ ],
!           medline  = RuleRegexp[ Bio::MEDLINE,
              /^UI  \- [0-9]+$/ ],
!           embl     = RuleRegexp[ Bio::EMBL,
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
!           sptr     = RuleRegexp[ Bio::SPTR,
              /^ID   .+\; *PRT\;/ ],
!           prosite  = RuleRegexp[ Bio::PROSITE,
              /^ID   [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
!           transfac = RuleRegexp[ Bio::TRANSFAC,
              /^AC  [-A-Za-z0-9_\.]+$/ ],
  
!           aaindex  = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
              if /^H [-A-Z0-9_\.]+$/ =~ text then
                if text =~ /^M [rc]/ then
--- 1080,1099 ----
        def self.make_default
          a = self[
!           genbank  = RuleRegexp[ 'Bio::GenBank',
              /^LOCUS       .+ bp .*[a-z]*[DR]?NA/ ],
!           genpept  = RuleRegexp[ 'Bio::GenPept',
              /^LOCUS       .+ aa .+/ ],
!           medline  = RuleRegexp[ 'Bio::MEDLINE',
              /^UI  \- [0-9]+$/ ],
!           embl     = RuleRegexp[ 'Bio::EMBL',
              /^ID   .+\; .*(DNA|RNA|XXX)\;/ ],
!           sptr     = RuleRegexp[ 'Bio::SPTR',
              /^ID   .+\; *PRT\;/ ],
!           prosite  = RuleRegexp[ 'Bio::PROSITE',
              /^ID   [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
!           transfac = RuleRegexp[ 'Bio::TRANSFAC',
              /^AC  [-A-Za-z0-9_\.]+$/ ],
  
!           aaindex  = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
              if /^H [-A-Z0-9_\.]+$/ =~ text then
                if text =~ /^M [rc]/ then
***************
*** 1068,1098 ****
            end,
  
!           litdb    = RuleRegexp[ Bio::LITDB,
              /^CODE        [0-9]+$/ ],
!           brite    = RuleRegexp[ Bio::KEGG::BRITE,
              /^Entry           [A-Z0-9]+/ ],
!           ko       = RuleRegexp[ Bio::KEGG::KO,
              /^ENTRY       .+ KO\s*/ ],
!           glycan   = RuleRegexp[ Bio::KEGG::GLYCAN,
              /^ENTRY       .+ Glycan\s*/ ],
!           enzyme   = RuleRegexp2[ Bio::KEGG::ENZYME,
              /^ENTRY       EC [0-9\.]+$/,
              /^ENTRY       .+ Enzyme\s*/
            ],
!           compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
              /^ENTRY       C[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Compound\s*/
            ],
!           reaction = RuleRegexp2[ Bio::KEGG::REACTION,
              /^ENTRY       R[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Reaction\s*/
            ],
!           genes    = RuleRegexp[ Bio::KEGG::GENES,
              /^ENTRY       .+ (CDS|gene|.*RNA) / ],
!           genome   = RuleRegexp[ Bio::KEGG::GENOME,
              /^ENTRY       [a-z]+$/ ],
  
!           fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
!                                 Bio::FANTOM::MaXML::Sequence) do |text|
              if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
                case $1
--- 1109,1139 ----
            end,
  
!           litdb    = RuleRegexp[ 'Bio::LITDB',
              /^CODE        [0-9]+$/ ],
!           brite    = RuleRegexp[ 'Bio::KEGG::BRITE',
              /^Entry           [A-Z0-9]+/ ],
!           ko       = RuleRegexp[ 'Bio::KEGG::KO',
              /^ENTRY       .+ KO\s*/ ],
!           glycan   = RuleRegexp[ 'Bio::KEGG::GLYCAN',
              /^ENTRY       .+ Glycan\s*/ ],
!           enzyme   = RuleRegexp2[ 'Bio::KEGG::ENZYME',
              /^ENTRY       EC [0-9\.]+$/,
              /^ENTRY       .+ Enzyme\s*/
            ],
!           compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
              /^ENTRY       C[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Compound\s*/
            ],
!           reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
              /^ENTRY       R[A-Za-z0-9\._]+$/,
              /^ENTRY       .+ Reaction\s*/
            ],
!           genes    = RuleRegexp[ 'Bio::KEGG::GENES',
              /^ENTRY       .+ (CDS|gene|.*RNA) / ],
!           genome   = RuleRegexp[ 'Bio::KEGG::GENOME',
              /^ENTRY       [a-z]+$/ ],
  
!           fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
!                                 'Bio::FANTOM::MaXML::Sequence') do |text|
              if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
                case $1
***************
*** 1109,1143 ****
            end,
  
!           pdb = RuleRegexp[ Bio::PDB,
              /^HEADER    .{40}\d\d\-[A-Z]{3}\-\d\d   [0-9A-Z]{4}/ ],
!           het = RuleRegexp[ Bio::PDB::ChemicalComponent,
              /^RESIDUE +.+ +\d+\s*$/ ],
  
!           clustal = RuleRegexp[ Bio::ClustalW::Report,
            /^CLUSTAL .*\(.*\).*sequence +alignment/ ],
  
!           blastxml = RuleRegexp[ Bio::Blast::Report,
              /\<\!DOCTYPE BlastOutput PUBLIC / ],
!           wublast  = RuleRegexp[ Bio::Blast::WU::Report,
              /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
              /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           blast    = RuleRegexp[ Bio::Blast::Default::Report,
              /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
!           tblast   = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
              /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
  
!           blat   = RuleRegexp[ Bio::Blat::Report,
              /^psLayout version \d+\s*$/ ],
!           spidey = RuleRegexp[ Bio::Spidey::Report,
              /^\-\-SPIDEY version .+\-\-$/ ],
!           hmmer  = RuleRegexp[ Bio::HMMER::Report,
              /^HMMER +\d+\./ ],
!           sim4   = RuleRegexp[ Bio::Sim4::Report,
              /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
  
!           fastaformat = RuleProc.new(Bio::FastaFormat,
!                                      Bio::NBRF,
!                                      Bio::FastaNumericFormat) do |text|
              if /^>.+$/ =~ text
                case text
--- 1150,1184 ----
            end,
  
!           pdb = RuleRegexp[ 'Bio::PDB',
              /^HEADER    .{40}\d\d\-[A-Z]{3}\-\d\d   [0-9A-Z]{4}/ ],
!           het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
              /^RESIDUE +.+ +\d+\s*$/ ],
  
!           clustal = RuleRegexp[ 'Bio::ClustalW::Report',
            /^CLUSTAL .*\(.*\).*sequence +alignment/ ],
  
!           blastxml = RuleRegexp[ 'Bio::Blast::Report',
              /\<\!DOCTYPE BlastOutput PUBLIC / ],
!           wublast  = RuleRegexp[ 'Bio::Blast::WU::Report',
              /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
              /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
!           blast    = RuleRegexp[ 'Bio::Blast::Default::Report',
              /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
!           tblast   = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
              /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
  
!           blat   = RuleRegexp[ 'Bio::Blat::Report',
              /^psLayout version \d+\s*$/ ],
!           spidey = RuleRegexp[ 'Bio::Spidey::Report',
              /^\-\-SPIDEY version .+\-\-$/ ],
!           hmmer  = RuleRegexp[ 'Bio::HMMER::Report',
              /^HMMER +\d+\./ ],
!           sim4   = RuleRegexp[ 'Bio::Sim4::Report',
              /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
  
!           fastaformat = RuleProc.new('Bio::FastaFormat',
!                                      'Bio::NBRF',
!                                      'Bio::FastaNumericFormat') do |text|
              if /^>.+$/ =~ text
                case text