[BioRuby-cvs] bioruby/lib/bio/io flatfile.rb,1.47,1.48
Naohisa Goto
ngoto at pub.open-bio.org
Fri Mar 3 09:31:59 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory pub.open-bio.org:/tmp/cvs-serv29020/lib/bio/io
Modified Files:
flatfile.rb
Log Message:
* added RulesArray class only for inspect
* changed constant (like Bio::GenBank) to String (like "Bio::GenBank")
to avoid doing require almost all files when using autodetect
Index: flatfile.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/flatfile.rb,v
retrieving revision 1.47
retrieving revision 1.48
diff -C2 -d -r1.47 -r1.48
*** flatfile.rb 3 Mar 2006 08:18:49 -0000 1.47
--- flatfile.rb 3 Mar 2006 09:31:57 -0000 1.48
***************
*** 736,739 ****
--- 736,748 ----
include TSort
+ # Array to store autodetection rules.
+ # This is defined only for inspect.
+ class RulesArray < Array
+ # visualize contents
+ def inspect
+ "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
+ end
+ end #class RulesArray
+
# Template of a single rule of autodetection
class RuleTemplate
***************
*** 745,754 ****
# Creates a new element.
def initialize
! a = Array.new
! def a.inspect
! "[#{self.collect { |e| e.name.inspect }.join(' ')}]"
! end
! @higher_priority_elements = a.clone
! @lower_priority_elements = a.clone
@name = nil
end
--- 754,759 ----
# Creates a new element.
def initialize
! @higher_priority_elements = RulesArray.new
! @lower_priority_elements = RulesArray.new
@name = nil
end
***************
*** 784,787 ****
--- 789,810 ----
nil
end
+
+ private
+ # Gets constant from constant name given as a string.
+ def str2const(str)
+ const = Object
+ str.split(/\:\:/).each do |x|
+ const = const.const_get(x)
+ end
+ const
+ end
+
+ # Gets database class from given object.
+ # Current implementation is:
+ # if _obj_ is kind of String, regarded as a constant.
+ # Otherwise, returns _obj_ as is.
+ def get_dbclass(obj)
+ obj.kind_of?(String) ? str2const(obj) : obj
+ end
end #class Rule_Template
***************
*** 835,841 ****
super()
@re = re
- @dbclass = dbclass
- @dbclasses = [ dbclass ]
@name = dbclass.to_s
end
--- 858,878 ----
super()
@re = re
@name = dbclass.to_s
+ @dbclass = nil
+ @dbclass_lazy = dbclass
+ end
+
+ # database class (lazy evaluation)
+ def dbclass
+ unless @dbclass
+ @dbclass = get_dbclass(@dbclass_lazy)
+ end
+ @dbclass
+ end
+ private :dbclass
+
+ # returns database classes
+ def dbclasses
+ [ dbclass ]
end
***************
*** 844,872 ****
# _meta_ is ignored.
def guess(text, meta)
! @re =~ text ? @dbclass : nil
end
end #class RuleRegexp
# A autodetection rule to use more than two regular expressions.
! class RuleRegexp2 < RuleTemplate
# Creates a new instance.
def initialize(dbclass, *regexps)
! super()
@regexps = regexps
- @dbclass = dbclass
- @dbclasses = [ dbclass ]
- if name
- @name = name
- else
- @name = @dbclass.to_s
- end
end
! # If given text matches the regexp, returns the database class.
# Otherwise, returns nil or false.
# _meta_ is ignored.
def guess(text, meta)
@regexps.each do |re|
! return @dbclass if re =~ text
end
nil
--- 881,904 ----
# _meta_ is ignored.
def guess(text, meta)
! @re =~ text ? dbclass : nil
end
end #class RuleRegexp
# A autodetection rule to use more than two regular expressions.
! # If given string matches one of the regular expressions,
! # returns the database class.
! class RuleRegexp2 < RuleRegexp
# Creates a new instance.
def initialize(dbclass, *regexps)
! super(dbclass, nil)
@regexps = regexps
end
! # If given text matches one of the regexp, returns the database class.
# Otherwise, returns nil or false.
# _meta_ is ignored.
def guess(text, meta)
@regexps.each do |re|
! return dbclass if re =~ text
end
nil
***************
*** 880,887 ****
super()
@proc = proc
! @dbclasses = dbclasses
@name = dbclasses.collect { |x| x.to_s }.join('|')
end
# If given text (and/or meta information) is known, returns
# the database class.
--- 912,928 ----
super()
@proc = proc
! @dbclasses = nil
! @dbclasses_lazy = dbclasses
@name = dbclasses.collect { |x| x.to_s }.join('|')
end
+ # database classes (lazy evaluation)
+ def dbclasses
+ unless @dbclasses
+ @dbclasses = @dbclasses_lazy.collect { |x| get_dbclass(x) }
+ end
+ @dbclasses
+ end
+
# If given text (and/or meta information) is known, returns
# the database class.
***************
*** 1039,1058 ****
def self.make_default
a = self[
! genbank = RuleRegexp[ Bio::GenBank,
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
! genpept = RuleRegexp[ Bio::GenPept,
/^LOCUS .+ aa .+/ ],
! medline = RuleRegexp[ Bio::MEDLINE,
/^UI \- [0-9]+$/ ],
! embl = RuleRegexp[ Bio::EMBL,
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
! sptr = RuleRegexp[ Bio::SPTR,
/^ID .+\; *PRT\;/ ],
! prosite = RuleRegexp[ Bio::PROSITE,
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
! transfac = RuleRegexp[ Bio::TRANSFAC,
/^AC [-A-Za-z0-9_\.]+$/ ],
! aaindex = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
if /^H [-A-Z0-9_\.]+$/ =~ text then
if text =~ /^M [rc]/ then
--- 1080,1099 ----
def self.make_default
a = self[
! genbank = RuleRegexp[ 'Bio::GenBank',
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
! genpept = RuleRegexp[ 'Bio::GenPept',
/^LOCUS .+ aa .+/ ],
! medline = RuleRegexp[ 'Bio::MEDLINE',
/^UI \- [0-9]+$/ ],
! embl = RuleRegexp[ 'Bio::EMBL',
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
! sptr = RuleRegexp[ 'Bio::SPTR',
/^ID .+\; *PRT\;/ ],
! prosite = RuleRegexp[ 'Bio::PROSITE',
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
! transfac = RuleRegexp[ 'Bio::TRANSFAC',
/^AC [-A-Za-z0-9_\.]+$/ ],
! aaindex = RuleProc.new('Bio::AAindex1', 'Bio::AAindex2') do |text|
if /^H [-A-Z0-9_\.]+$/ =~ text then
if text =~ /^M [rc]/ then
***************
*** 1068,1098 ****
end,
! litdb = RuleRegexp[ Bio::LITDB,
/^CODE [0-9]+$/ ],
! brite = RuleRegexp[ Bio::KEGG::BRITE,
/^Entry [A-Z0-9]+/ ],
! ko = RuleRegexp[ Bio::KEGG::KO,
/^ENTRY .+ KO\s*/ ],
! glycan = RuleRegexp[ Bio::KEGG::GLYCAN,
/^ENTRY .+ Glycan\s*/ ],
! enzyme = RuleRegexp2[ Bio::KEGG::ENZYME,
/^ENTRY EC [0-9\.]+$/,
/^ENTRY .+ Enzyme\s*/
],
! compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
/^ENTRY C[A-Za-z0-9\._]+$/,
/^ENTRY .+ Compound\s*/
],
! reaction = RuleRegexp2[ Bio::KEGG::REACTION,
/^ENTRY R[A-Za-z0-9\._]+$/,
/^ENTRY .+ Reaction\s*/
],
! genes = RuleRegexp[ Bio::KEGG::GENES,
/^ENTRY .+ (CDS|gene|.*RNA) / ],
! genome = RuleRegexp[ Bio::KEGG::GENOME,
/^ENTRY [a-z]+$/ ],
! fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
! Bio::FANTOM::MaXML::Sequence) do |text|
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
case $1
--- 1109,1139 ----
end,
! litdb = RuleRegexp[ 'Bio::LITDB',
/^CODE [0-9]+$/ ],
! brite = RuleRegexp[ 'Bio::KEGG::BRITE',
/^Entry [A-Z0-9]+/ ],
! ko = RuleRegexp[ 'Bio::KEGG::KO',
/^ENTRY .+ KO\s*/ ],
! glycan = RuleRegexp[ 'Bio::KEGG::GLYCAN',
/^ENTRY .+ Glycan\s*/ ],
! enzyme = RuleRegexp2[ 'Bio::KEGG::ENZYME',
/^ENTRY EC [0-9\.]+$/,
/^ENTRY .+ Enzyme\s*/
],
! compound = RuleRegexp2[ 'Bio::KEGG::COMPOUND',
/^ENTRY C[A-Za-z0-9\._]+$/,
/^ENTRY .+ Compound\s*/
],
! reaction = RuleRegexp2[ 'Bio::KEGG::REACTION',
/^ENTRY R[A-Za-z0-9\._]+$/,
/^ENTRY .+ Reaction\s*/
],
! genes = RuleRegexp[ 'Bio::KEGG::GENES',
/^ENTRY .+ (CDS|gene|.*RNA) / ],
! genome = RuleRegexp[ 'Bio::KEGG::GENOME',
/^ENTRY [a-z]+$/ ],
! fantom = RuleProc.new('Bio::FANTOM::MaXML::Cluster',
! 'Bio::FANTOM::MaXML::Sequence') do |text|
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
case $1
***************
*** 1109,1143 ****
end,
! pdb = RuleRegexp[ Bio::PDB,
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
! het = RuleRegexp[ Bio::PDB::ChemicalComponent,
/^RESIDUE +.+ +\d+\s*$/ ],
! clustal = RuleRegexp[ Bio::ClustalW::Report,
/^CLUSTAL .*\(.*\).*sequence +alignment/ ],
! blastxml = RuleRegexp[ Bio::Blast::Report,
/\<\!DOCTYPE BlastOutput PUBLIC / ],
! wublast = RuleRegexp[ Bio::Blast::WU::Report,
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! blast = RuleRegexp[ Bio::Blast::Default::Report,
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! tblast = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! blat = RuleRegexp[ Bio::Blat::Report,
/^psLayout version \d+\s*$/ ],
! spidey = RuleRegexp[ Bio::Spidey::Report,
/^\-\-SPIDEY version .+\-\-$/ ],
! hmmer = RuleRegexp[ Bio::HMMER::Report,
/^HMMER +\d+\./ ],
! sim4 = RuleRegexp[ Bio::Sim4::Report,
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
! fastaformat = RuleProc.new(Bio::FastaFormat,
! Bio::NBRF,
! Bio::FastaNumericFormat) do |text|
if /^>.+$/ =~ text
case text
--- 1150,1184 ----
end,
! pdb = RuleRegexp[ 'Bio::PDB',
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
! het = RuleRegexp[ 'Bio::PDB::ChemicalComponent',
/^RESIDUE +.+ +\d+\s*$/ ],
! clustal = RuleRegexp[ 'Bio::ClustalW::Report',
/^CLUSTAL .*\(.*\).*sequence +alignment/ ],
! blastxml = RuleRegexp[ 'Bio::Blast::Report',
/\<\!DOCTYPE BlastOutput PUBLIC / ],
! wublast = RuleRegexp[ 'Bio::Blast::WU::Report',
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! wutblast = RuleRegexp[ 'Bio::Blast::WU::Report_TBlast',
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
! blast = RuleRegexp[ 'Bio::Blast::Default::Report',
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! tblast = RuleRegexp[ 'Bio::Blast::Default::Report_TBlast',
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
! blat = RuleRegexp[ 'Bio::Blat::Report',
/^psLayout version \d+\s*$/ ],
! spidey = RuleRegexp[ 'Bio::Spidey::Report',
/^\-\-SPIDEY version .+\-\-$/ ],
! hmmer = RuleRegexp[ 'Bio::HMMER::Report',
/^HMMER +\d+\./ ],
! sim4 = RuleRegexp[ 'Bio::Sim4::Report',
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
! fastaformat = RuleProc.new('Bio::FastaFormat',
! 'Bio::NBRF',
! 'Bio::FastaNumericFormat') do |text|
if /^>.+$/ =~ text
case text
More information about the bioruby-cvs
mailing list