[BioRuby-cvs] bioruby/lib/bio/db aaindex.rb, 1.12, 1.13 fantom.rb, 1.9, 1.10 fasta.rb, 1.19, 1.20 gff.rb, 1.1, 1.2 go.rb, 1.6, 1.7 medline.rb, 1.11, 1.12 nbrf.rb, 1.2, 1.3 prosite.rb, 0.9, 0.10 transfac.rb, 1.8, 1.9

Wed Sep 7 21:22:13 EDT 2005

Update of /home/repository/bioruby/bioruby/lib/bio/db
In directory pub.open-bio.org:/tmp/cvs-serv9021/lib/bio/db

Modified Files:
	aaindex.rb fantom.rb fasta.rb gff.rb go.rb medline.rb nbrf.rb 
	prosite.rb transfac.rb 
Log Message:
* expanded tab at the line head

Index: prosite.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/prosite.rb,v
retrieving revision 0.9
retrieving revision 0.10
diff -C2 -d -r0.9 -r0.10
*** prosite.rb	11 Dec 2004 03:44:37 -0000	0.9
--- prosite.rb	8 Sep 2005 01:22:11 -0000	0.10
***************
*** 41,45 ****
      def name
        unless @data['ID']
! 	@data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ')
        end
        @data['ID']
--- 41,45 ----
      def name
        unless @data['ID']
!         @data['ID'], @data['TYPE'] = fetch('ID').chomp('.').split('; ')
        end
        @data['ID']
***************
*** 47,51 ****
      def division
        unless @data['TYPE']
! 	name
        end
        @data['TYPE']
--- 47,51 ----
      def division
        unless @data['TYPE']
!         name
        end
        @data['TYPE']
***************
*** 59,63 ****
      def ac
        unless @data['AC']
! 	@data['AC'] = fetch('AC').chomp(';')
        end
        @data['AC']
--- 59,63 ----
      def ac
        unless @data['AC']
!         @data['AC'] = fetch('AC').chomp(';')
        end
        @data['AC']
***************
*** 140,159 ****
      def nr
        unless @data['NR']
! 	hash = {}			# temporal hash
! 	fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
! 	  if v =~ /^(\d+)\((\d+)\)$/
! 	    hits = $1.to_i		# the number of hits
! 	    seqs = $2.to_i		# the number of sequences
! 	    v = [hits, seqs]
! 	  elsif v =~ /([\d\.]+),(\d+)/
! 	    sprel = $1			# the number of SWISS-PROT release
! 	    spseq = $2.to_i		# the number of SWISS-PROT sequences
! 	    v = [sprel, spseq]
! 	  else
! 	    v = v.to_i
! 	  end
! 	  hash[k] = v
! 	end
! 	@data['NR'] = hash
        end
        @data['NR']
--- 140,159 ----
      def nr
        unless @data['NR']
!         hash = {}			# temporal hash
!         fetch('NR').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
!           if v =~ /^(\d+)\((\d+)\)$/
!             hits = $1.to_i		# the number of hits
!             seqs = $2.to_i		# the number of sequences
!             v = [hits, seqs]
!           elsif v =~ /([\d\.]+),(\d+)/
!             sprel = $1			# the number of SWISS-PROT release
!             spseq = $2.to_i		# the number of SWISS-PROT sequences
!             v = [sprel, spseq]
!           else
!             v = v.to_i
!           end
!           hash[k] = v
!         end
!         @data['NR'] = hash
        end
        @data['NR']
***************
*** 244,252 ****
      def cc
        unless @data['CC']
! 	hash = {}			# temporal hash
! 	fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
! 	  hash[k] = v
! 	end
! 	@data['CC'] = hash
        end
        @data['CC']
--- 244,252 ----
      def cc
        unless @data['CC']
!         hash = {}			# temporal hash
!         fetch('CC').scan(%r{/(\S+)=([^;]+);}).each do |k, v|
!           hash[k] = v
!         end
!         @data['CC'] = hash
        end
        @data['CC']
***************
*** 257,271 ****
        range = comment['TAXO-RANGE']
        if range and expand
! 	expand = []
! 	range.scan(/./) do |x|
! 	  case x
! 	  when 'A'; expand.push('archaebacteria')
! 	  when 'B'; expand.push('bacteriophages')
! 	  when 'E'; expand.push('eukaryotes')
! 	  when 'P'; expand.push('prokaryotes')
! 	  when 'V'; expand.push('eukaryotic viruses')
! 	  end
! 	end
! 	range = expand
        end
        return range
--- 257,271 ----
        range = comment['TAXO-RANGE']
        if range and expand
!         expand = []
!         range.scan(/./) do |x|
!           case x
!           when 'A'; expand.push('archaebacteria')
!           when 'B'; expand.push('bacteriophages')
!           when 'E'; expand.push('eukaryotes')
!           when 'P'; expand.push('prokaryotes')
!           when 'V'; expand.push('eukaryotic viruses')
!           end
!         end
!         range = expand
        end
        return range
***************
*** 278,282 ****
      def site
        if comment['SITE']
! 	num, desc = comment['SITE'].split(',')
        end
        return [num.to_i, desc]
--- 278,282 ----
      def site
        if comment['SITE']
!         num, desc = comment['SITE'].split(',')
        end
        return [num.to_i, desc]
***************
*** 285,289 ****
      def skip_flag
        if comment['SKIP-FLAG'] == 'TRUE'
! 	return true
        end
      end
--- 285,289 ----
      def skip_flag
        if comment['SKIP-FLAG'] == 'TRUE'
!         return true
        end
      end
***************
*** 314,324 ****
      def dr
        unless @data['DR']
! 	hash = {}			# temporal hash
! 	if fetch('DR')
! 	  fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c|
! 	    hash[a] = [e, c]	# SWISS-PROT : accession, entry, true/false
! 	  end
! 	end
! 	@data['DR'] = hash
        end
        @data['DR']
--- 314,324 ----
      def dr
        unless @data['DR']
!         hash = {}			# temporal hash
!         if fetch('DR')
!           fetch('DR').scan(/(\w+)\s*, (\w+)\s*, (.);/).each do |a, e, c|
!             hash[a] = [e, c]	# SWISS-PROT : accession, entry, true/false
!           end
!         end
!         @data['DR'] = hash
        end
        @data['DR']
***************
*** 329,340 ****
        ary = []
        sp_xref.each do |sp_acc, value|
! 	if value[1] == flag
! 	  if by_name
! 	    sp_name = value[0]
! 	    ary.push(sp_name)
! 	  else
! 	    ary.push(sp_acc)
! 	  end
! 	end
        end
        return ary
--- 329,340 ----
        ary = []
        sp_xref.each do |sp_acc, value|
!         if value[1] == flag
!           if by_name
!             sp_name = value[0]
!             ary.push(sp_name)
!           else
!             ary.push(sp_acc)
!           end
!         end
        end
        return ary
***************
*** 368,372 ****
      def pdb_xref
        unless @data['3D']
! 	@data['3D'] = fetch('3D').split(/; /)
        end
        @data['3D']
--- 368,372 ----
      def pdb_xref
        unless @data['3D']
!         @data['3D'] = fetch('3D').split(/; /)
        end
        @data['3D']
***************
*** 427,434 ****
        pattern.sub!(/>$/, '$')	# (2) restricted to the C-terminal : `>'
        pattern.gsub!(/\{(\w+)\}/) { |m|
! 	'[^' + $1 + ']'		# (3) not accepted at a given position : '{}'
        }
        pattern.gsub!(/\(([\d,]+)\)/) { |m|
! 	'{' + $1 + '}'		# (4) repetition of an element : (n), (n,m)
        }
        pattern.tr!('x', '.')	# (5) any amino acid is accepted : 'x'
--- 427,434 ----
        pattern.sub!(/>$/, '$')	# (2) restricted to the C-terminal : `>'
        pattern.gsub!(/\{(\w+)\}/) { |m|
!         '[^' + $1 + ']'		# (3) not accepted at a given position : '{}'
        }
        pattern.gsub!(/\(([\d,]+)\)/) { |m|
!         '{' + $1 + '}'		# (4) repetition of an element : (n), (n,m)
        }
        pattern.tr!('x', '.')	# (5) any amino acid is accepted : 'x'

Index: go.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/go.rb,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** go.rb	4 Apr 2005 11:06:47 -0000	1.6
--- go.rb	8 Sep 2005 01:22:11 -0000	1.7
***************
*** 66,70 ****
      end

! 	
      # Bio::GO::Ontology.goid2term(goid)
      def goid2term(goid)
--- 66,70 ----
      end

!         
      # Bio::GO::Ontology.goid2term(goid)
      def goid2term(goid)
***************
*** 110,114 ****
              adj_list << Bio::Relation.new(stack[depth - 1], goid, rel)
            }
! 	    
            if en == ""
              loop {
--- 110,114 ----
              adj_list << Bio::Relation.new(stack[depth - 1], goid, rel)
            }
!             
            if en == ""
              loop {

Index: aaindex.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/aaindex.rb,v
retrieving revision 1.12
retrieving revision 1.13
diff -C2 -d -r1.12 -r1.13
*** aaindex.rb	21 Feb 2004 19:43:50 -0000	1.12
--- aaindex.rb	8 Sep 2005 01:22:11 -0000	1.13
***************
*** 81,85 ****

        if values.size != 20
! 	raise "Invalid format in #{entry_id} : #{values.inspect}"
        end

--- 81,85 ----

        if values.size != 20
!         raise "Invalid format in #{entry_id} : #{values.inspect}"
        end

***************
*** 107,120 ****

        aa.each_with_index do |a, i|
! 	case type
! 	when :string
! 	  hash[a] = values[i]
! 	when :float
! 	  hash[a] = values[i].to_f
! 	when :zscore
! 	  hash[a] = (values[i].to_f - mean) / sd
! 	when :integer
! 	  hash[a] = (values[i].to_f * 10 ** figure).to_i
! 	end
        end
        return hash
--- 107,120 ----

        aa.each_with_index do |a, i|
!         case type
!         when :string
!           hash[a] = values[i]
!         when :float
!           hash[a] = values[i].to_f
!         when :zscore
!           hash[a] = (values[i].to_f - mean) / sd
!         when :integer
!           hash[a] = (values[i].to_f * 10 ** figure).to_i
!         end
        end
        return hash
***************
*** 161,188 ****
        case field
        when / (ARNDCQEGHILKMFPSTWYV)\s+(.*)/	# 20x19/2 matrix
! 	aalist = $1
! 	values = $2.split(/\s+/)

! 	0.upto(aalist.length - 1) do |i|
! 	  @aa[aalist[i].chr] = i
! 	end

! 	ma = Array.new
! 	20.times do
! 	  ma.push(Array.new(20))		# 2D array of 20x(20)
! 	end

! 	for i in 0 .. 19 do
! 	  for j in i .. 19 do
! 	    ma[i][j] = values[i + j*(j+1)/2].to_f
! 	    ma[j][i] = ma[i][j]
! 	  end
! 	end
! 	Matrix[*ma]

        when / -ARNDCQEGHILKMFPSTWYV /		# 21x20/2 matrix (with gap)
! 	raise NotImplementedError
        when / ACDEFGHIKLMNPQRSTVWYJ- /		# 21x21 matrix (with gap)
! 	raise NotImplementedError
        end
      end
--- 161,188 ----
        case field
        when / (ARNDCQEGHILKMFPSTWYV)\s+(.*)/	# 20x19/2 matrix
!         aalist = $1
!         values = $2.split(/\s+/)

!         0.upto(aalist.length - 1) do |i|
!           @aa[aalist[i].chr] = i
!         end

!         ma = Array.new
!         20.times do
!           ma.push(Array.new(20))		# 2D array of 20x(20)
!         end

!         for i in 0 .. 19 do
!           for j in i .. 19 do
!             ma[i][j] = values[i + j*(j+1)/2].to_f
!             ma[j][i] = ma[i][j]
!           end
!         end
!         Matrix[*ma]

        when / -ARNDCQEGHILKMFPSTWYV /		# 21x20/2 matrix (with gap)
!         raise NotImplementedError
        when / ACDEFGHIKLMNPQRSTVWYJ- /		# 21x21 matrix (with gap)
!         raise NotImplementedError
        end
      end

Index: medline.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/medline.rb,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** medline.rb	7 Aug 2005 08:13:42 -0000	1.11
--- medline.rb	8 Sep 2005 01:22:11 -0000	1.12
***************
*** 32,39 ****
        tag = ''
        entry.each_line do |line|
! 	if line =~ /^\w/
! 	  tag = line[0,4].strip
! 	end
! 	@pubmed[tag] += line[6..-1] if line.length > 6
        end
      end
--- 32,39 ----
        tag = ''
        entry.each_line do |line|
!         if line =~ /^\w/
!           tag = line[0,4].strip
!         end
!         @pubmed[tag] += line[6..-1] if line.length > 6
        end
      end
***************
*** 109,117 ****
        pages = pg
        if pages =~ /-/
! 	from, to = pages.split('-')
! 	if (len = from.length - to.length) > 0
! 	  to = from[0,len] + to
! 	end
! 	pages = "#{from}-#{to}"
        end
        return pages
--- 109,117 ----
        pages = pg
        if pages =~ /-/
!         from, to = pages.split('-')
!         if (len = from.length - to.length) > 0
!           to = from[0,len] + to
!         end
!         pages = "#{from}-#{to}"
        end
        return pages
***************
*** 152,165 ****
        authors = []
        au.split(/\n/).each do |author|
! 	if author =~ / /
! 	  name = author.split(/\s+/)
! 	  suffix = name[-2] =~ /^[A-Z]+$/ ? name.pop : nil	# Jr etc.
! 	  initial = name.pop.split(//).join('. ')
! 	  author = "#{name.join(' ')}, #{initial}."
! 	end
! 	if suffix
! 	  author << " " + suffix
! 	end
! 	authors.push(author)
        end
        return authors
--- 152,165 ----
        authors = []
        au.split(/\n/).each do |author|
!         if author =~ / /
!           name = author.split(/\s+/)
!           suffix = name[-2] =~ /^[A-Z]+$/ ? name.pop : nil	# Jr etc.
!           initial = name.pop.split(//).join('. ')
!           author = "#{name.join(' ')}, #{initial}."
!         end
!         if suffix
!           author << " " + suffix
!         end
!         authors.push(author)
        end
        return authors

Index: gff.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/gff.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** gff.rb	21 Feb 2003 04:11:58 -0000	1.1
--- gff.rb	8 Sep 2005 01:22:11 -0000	1.2
***************
*** 27,31 ****
        @records = Array.new
        str.each_line do |line|
! 	@records << Record.new(line)
        end
      end
--- 27,31 ----
        @records = Array.new
        str.each_line do |line|
!         @records << Record.new(line)
        end
      end
***************
*** 34,42 ****
      class Record
        def initialize(str)
! 	@comments = str.chomp[/#.*/]
! 	return if /^#/.match(str)
! 	@seqname, @source, @feature, @start, @end, @score, @strand, @frame,
! 	  attributes, = str.chomp.split("\t")
! 	@attributes = parse_attributes(attributes) if attributes
        end
        attr_accessor :seqname, :source, :feature, :start, :end, :score,
--- 34,42 ----
      class Record
        def initialize(str)
!         @comments = str.chomp[/#.*/]
!         return if /^#/.match(str)
!         @seqname, @source, @feature, @start, @end, @score, @strand, @frame,
!           attributes, = str.chomp.split("\t")
!         @attributes = parse_attributes(attributes) if attributes
        end
        attr_accessor :seqname, :source, :feature, :start, :end, :score,
***************
*** 46,54 ****

        def parse_attributes(attributes)
! 	hash = Hash.new
! 	attributes.split(/[^\\];/).each do |atr|
! 	  key, value = atr.split(' ', 2)
! 	  hash[key] = value
! 	end
          return hash
        end
--- 46,54 ----

        def parse_attributes(attributes)
!         hash = Hash.new
!         attributes.split(/[^\\];/).each do |atr|
!           key, value = atr.split(' ', 2)
!           hash[key] = value
!         end
          return hash
        end

Index: fasta.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/fasta.rb,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** fasta.rb	15 Apr 2004 16:04:39 -0000	1.19
--- fasta.rb	8 Sep 2005 01:22:11 -0000	1.20
***************
*** 53,75 ****
      def seq
        unless defined?(@seq)
! 	unless /\A\s*^\#/ =~ @data then
! 	  @seq = Sequence.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
! 	else
! 	  a = @data.split(/(^\#.*$)/)
! 	  i = 0
! 	  cmnt = {}
! 	  s = []
! 	  a.each do |x|
              if /^# ?(.*)$/ =~ x then
! 	      cmnt[i] ? cmnt[i] << "\n" << $1 : cmnt[i] = $1
! 	    else
! 	      x.tr!(" \t\r\n0-9", '') # lazy clean up
! 	      i += x.length
! 	      s << x
! 	    end
! 	  end
! 	  @comment = cmnt
! 	  @seq = Bio::Sequence.new(s.join(''))
! 	end
        end
        @seq
--- 53,75 ----
      def seq
        unless defined?(@seq)
!         unless /\A\s*^\#/ =~ @data then
!           @seq = Sequence.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
!         else
!           a = @data.split(/(^\#.*$)/)
!           i = 0
!           cmnt = {}
!           s = []
!           a.each do |x|
              if /^# ?(.*)$/ =~ x then
!               cmnt[i] ? cmnt[i] << "\n" << $1 : cmnt[i] = $1
!             else
!               x.tr!(" \t\r\n0-9", '') # lazy clean up
!               i += x.length
!               s << x
!             end
!           end
!           @comment = cmnt
!           @seq = Bio::Sequence.new(s.join(''))
!         end
        end
        @seq
***************
*** 103,107 ****
      def identifiers
        unless defined?(@ids) then
! 	@ids = FastaDefline.new(@definition)
        end
        @ids
--- 103,107 ----
      def identifiers
        unless defined?(@ids) then
!         @ids = FastaDefline.new(@definition)
        end
        @ids
***************
*** 138,142 ****
      def data
        unless @list
! 	@list = @data.strip.split(/\s+/).map {|x| x.to_i}
        end
        @list
--- 138,142 ----
      def data
        unless @list
!         @list = @data.strip.split(/\s+/).map {|x| x.to_i}
        end
        @list
***************
*** 207,211 ****
        lines = str.split("\x01")
        lines.each do |line|
! 	add_defline(line)
        end
      end #def initialize
--- 207,211 ----
        lines = str.split("\x01")
        lines.each do |line|
!         add_defline(line)
        end
      end #def initialize
***************
*** 217,275 ****
        case str
        when /^\>?\s*((?:[^\|\s]*\|)+[^\s]+)\s*(.*)$/
! 	# NSIDs
! 	# examples:
! 	# >gi|9910844|sp|Q9UWG2|RL3_METVA 50S ribosomal protein L3P
! 	#
! 	# note: regexp (:?) means grouping without backreferences
! 	i = $1
! 	d = $2
! 	tks = i.split('|')
! 	tks << '' if i[-1,1] == '|'
! 	a = parse_NSIDs(tks)
! 	i = a[0].join('|')
! 	a.unshift('|')
! 	d = tks.join('|') + ' ' + d unless tks.empty?
! 	a << d
! 	this_line = a
! 	match_EC(d)
! 	parse_square_brackets(d).each do |x|
! 	  if !match_EC(x, false) and x =~ /\A[A-Z]/ then
! 	    di = [  x ]
! 	    @list_ids << di
! 	    @info['organism'] = x unless @info['organism']
! 	  end
! 	end

        when /^\>?\s*([a-zA-Z0-9]+\:[^\s]+)\s*(.*)$/
! 	# examples:
! 	# >sce:YBR160W  CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
! 	# >emb:CACDC28 [X80034] C.albicans CDC28 gene 
! 	i = $1
! 	d = $2
! 	a = parse_ColonSepID(i)
! 	i = a.join(':')
! 	this_line = [ ':', a , d ]
! 	match_EC(d)
! 	parse_square_brackets(d).each do |x|
! 	  if !match_EC(x, false) and x =~ /:/ then
! 	    parse_ColonSepID(x)
! 	  elsif x =~ /\A\s*([A-Z][A-Z0-9_\.]+)\s*\z/ then
! 	    @list_ids << [ $1 ]
! 	  end
! 	end

        when /^\>?\s*(\S+)(?:\s+(.+))?$/
! 	# examples:
! 	# >ABC12345 this is test
! 	i = $1
! 	d = $2.to_s
! 	@list_ids << [ i.chomp('.') ]
! 	this_line = [  '', [ i ], d ]
! 	match_EC(d)
        else
! 	i = str
! 	d = ''
! 	match_EC(i)
! 	this_line = [ '', [ i ], d ]
        end

--- 217,275 ----
        case str
        when /^\>?\s*((?:[^\|\s]*\|)+[^\s]+)\s*(.*)$/
!         # NSIDs
!         # examples:
!         # >gi|9910844|sp|Q9UWG2|RL3_METVA 50S ribosomal protein L3P
!         #
!         # note: regexp (:?) means grouping without backreferences
!         i = $1
!         d = $2
!         tks = i.split('|')
!         tks << '' if i[-1,1] == '|'
!         a = parse_NSIDs(tks)
!         i = a[0].join('|')
!         a.unshift('|')
!         d = tks.join('|') + ' ' + d unless tks.empty?
!         a << d
!         this_line = a
!         match_EC(d)
!         parse_square_brackets(d).each do |x|
!           if !match_EC(x, false) and x =~ /\A[A-Z]/ then
!             di = [  x ]
!             @list_ids << di
!             @info['organism'] = x unless @info['organism']
!           end
!         end

        when /^\>?\s*([a-zA-Z0-9]+\:[^\s]+)\s*(.*)$/
!         # examples:
!         # >sce:YBR160W  CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
!         # >emb:CACDC28 [X80034] C.albicans CDC28 gene 
!         i = $1
!         d = $2
!         a = parse_ColonSepID(i)
!         i = a.join(':')
!         this_line = [ ':', a , d ]
!         match_EC(d)
!         parse_square_brackets(d).each do |x|
!           if !match_EC(x, false) and x =~ /:/ then
!             parse_ColonSepID(x)
!           elsif x =~ /\A\s*([A-Z][A-Z0-9_\.]+)\s*\z/ then
!             @list_ids << [ $1 ]
!           end
!         end

        when /^\>?\s*(\S+)(?:\s+(.+))?$/
!         # examples:
!         # >ABC12345 this is test
!         i = $1
!         d = $2.to_s
!         @list_ids << [ i.chomp('.') ]
!         this_line = [  '', [ i ], d ]
!         match_EC(d)
        else
!         i = str
!         d = ''
!         match_EC(i)
!         this_line = [ '', [ i ], d ]
        end

***************
*** 281,289 ****
        di = nil
        str.scan(/EC\:((:?[\-\d]+\.){3}(:?[\-\d]+))/i) do |x|
! 	di = [ 'EC', $1 ]
! 	if write_flag then
! 	  @info['ec'] = di[1] if (!@info['ec'] or @info['ec'].to_s =~ /\-/)
! 	  @list_ids << di
! 	end
        end
        di
--- 281,289 ----
        di = nil
        str.scan(/EC\:((:?[\-\d]+\.){3}(:?[\-\d]+))/i) do |x|
!         di = [ 'EC', $1 ]
!         if write_flag then
!           @info['ec'] = di[1] if (!@info['ec'] or @info['ec'].to_s =~ /\-/)
!           @list_ids << di
!         end
        end
        di
***************
*** 294,298 ****
        r = []
        str.scan(/\[([^\]]*)\]/) do |x|
! 	r << x[0]
        end
        r
--- 294,298 ----
        r = []
        str.scan(/\[([^\]]*)\]/) do |x|
!         r << x[0]
        end
        r
***************
*** 312,341 ****
        data = []
        while token = ary.shift
! 	if labels = self.class::NSIDs[token] then
! 	  di = [ token ]
! 	  idtype = token
! 	  labels.each do |x|
! 	    token = ary.shift
! 	    break unless token
! 	    if self.class::NSIDs[token] then
! 	      ary.unshift(token)
! 	      break #each
! 	    end
! 	    if token.length > 0 then
! 	      di << token
! 	    else
! 	      di << nil
! 	    end
! 	  end
! 	  data << di
! 	else
! 	  if token.length > 0 then
! 	    # UCID (uncontrolled identifiers)
! 	    di = [ token ]
! 	    data << di
! 	    @info['ucid'] = token unless @info['ucid']
! 	  end
! 	  break #while
! 	end
        end #while
        @list_ids.concat data
--- 312,341 ----
        data = []
        while token = ary.shift
!         if labels = self.class::NSIDs[token] then
!           di = [ token ]
!           idtype = token
!           labels.each do |x|
!             token = ary.shift
!             break unless token
!             if self.class::NSIDs[token] then
!               ary.unshift(token)
!               break #each
!             end
!             if token.length > 0 then
!               di << token
!             else
!               di << nil
!             end
!           end
!           data << di
!         else
!           if token.length > 0 then
!             # UCID (uncontrolled identifiers)
!             di = [ token ]
!             data << di
!             @info['ucid'] = token unless @info['ucid']
!           end
!           break #while
!         end
        end #while
        @list_ids.concat data
***************
*** 346,351 ****
      def to_s
        @deflines.collect { |a|
! 	s = a[0]
! 	(a[1..-2].collect { |x| x.join(s) }.join(s) + ' ' + a[-1]).strip
        }.join("\x01")
      end
--- 346,351 ----
      def to_s
        @deflines.collect { |a|
!         s = a[0]
!         (a[1..-2].collect { |x| x.join(s) }.join(s) + ' ' + a[-1]).strip
        }.join("\x01")
      end
***************
*** 357,361 ****
      def descriptions
        @deflines.collect do |a|
! 	a[-1]
        end
      end
--- 357,361 ----
      def descriptions
        @deflines.collect do |a|
!         a[-1]
        end
      end
***************
*** 364,379 ****
        r = []
        @list_ids.each do |a|
! 	if a.size >= 2 then
! 	  r.concat a[1..-1].find_all { |x| x }
! 	else
! 	  if a[0].to_s.size > 0 and a[0] =~ /\A[A-Za-z0-9\.\-\_]+\z/
! 	    r << a[0]
! 	  end
! 	end
        end
        r.concat( words(true, []).find_all do |x|
! 		 x =~ /\A[A-Z][A-Za-z0-9\_]*[0-9]+[A-Za-z0-9\_]+\z/ or
! 		   x =~ /\A[A-Z][A-Z0-9]*\_[A-Z0-9\_]+\z/
! 	       end)
        r
      end
--- 364,379 ----
        r = []
        @list_ids.each do |a|
!         if a.size >= 2 then
!           r.concat a[1..-1].find_all { |x| x }
!         else
!           if a[0].to_s.size > 0 and a[0] =~ /\A[A-Za-z0-9\.\-\_]+\z/
!             r << a[0]
!           end
!         end
        end
        r.concat( words(true, []).find_all do |x|
!                  x =~ /\A[A-Z][A-Za-z0-9\_]*[0-9]+[A-Za-z0-9\_]+\z/ or
!                    x =~ /\A[A-Z][A-Z0-9]*\_[A-Z0-9\_]+\z/
!                end)
        r
      end
***************
*** 403,422 ****

      def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
! 	      kwhash = self.class::KillWordsHash)
        a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\#\s\x00-\x1f\x7f]+/)
        a.collect! do |x|
! 	x.sub!(/\A[\$\*\-\+]+/, '')
! 	x.sub!(/[\$\*\-\=]+\z/, '')
! 	if x.size <= 1 then
! 	  nil
! 	elsif kwhash[x.downcase] then
! 	  nil
! 	else
! 	  if kill_regexp.find { |expr| expr =~ x } then
! 	    nil
! 	  else
! 	    x
! 	  end
! 	end
        end
        a.compact!
--- 403,422 ----

      def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
!               kwhash = self.class::KillWordsHash)
        a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\#\s\x00-\x1f\x7f]+/)
        a.collect! do |x|
!         x.sub!(/\A[\$\*\-\+]+/, '')
!         x.sub!(/[\$\*\-\=]+\z/, '')
!         if x.size <= 1 then
!           nil
!         elsif kwhash[x.downcase] then
!           nil
!         else
!           if kill_regexp.find { |expr| expr =~ x } then
!             nil
!           else
!             x
!           end
!         end
        end
        a.compact!
***************
*** 431,449 ****
        r = nil
        unless r = @info[db] then
! 	di = @list_ids.find { |x| x[0] == db.to_s }
! 	if di and di.size <= 2 then
! 	  r = di[-1]
! 	elsif di then
! 	  labels = self.class::NSIDs[db]
! 	  [ 'acc_version', 'entry_id',
! 	    'locus', 'accession', 'number'].each do |x|
! 	    if i = labels.index(x) then
! 	      r = di[i+1]
! 	      break if r
! 	    end
! 	  end
! 	  r = di[1..-1].find { |x| x } unless r
! 	end
! 	@info[db] = r if r
        end
        r
--- 431,449 ----
        r = nil
        unless r = @info[db] then
!         di = @list_ids.find { |x| x[0] == db.to_s }
!         if di and di.size <= 2 then
!           r = di[-1]
!         elsif di then
!           labels = self.class::NSIDs[db]
!           [ 'acc_version', 'entry_id',
!             'locus', 'accession', 'number'].each do |x|
!             if i = labels.index(x) then
!               r = di[i+1]
!               break if r
!             end
!           end
!           r = di[1..-1].find { |x| x } unless r
!         end
!         @info[db] = r if r
        end
        r
***************
*** 452,460 ****
      def get_by_type(tstr)
        @list_ids.each do |x|
! 	if labels = self.class::NSIDs[x[0]] then
! 	  if i = labels.index(tstr) then
! 	    return x[i+1]
! 	  end
! 	end
        end
        nil
--- 452,460 ----
      def get_by_type(tstr)
        @list_ids.each do |x|
!         if labels = self.class::NSIDs[x[0]] then
!           if i = labels.index(tstr) then
!             return x[i+1]
!           end
!         end
        end
        nil
***************
*** 464,474 ****
        d = []
        @list_ids.each do |x|
! 	if labels = self.class::NSIDs[x[0]] then
! 	  tstrarg.each do |y|
! 	    if i = labels.index(y) then
! 	      d << x[i+1] if x[i+1]
! 	    end
! 	  end
! 	end
        end
        d
--- 464,474 ----
        d = []
        @list_ids.each do |x|
!         if labels = self.class::NSIDs[x[0]] then
!           tstrarg.each do |y|
!             if i = labels.index(y) then
!               d << x[i+1] if x[i+1]
!             end
!           end
!         end
        end
        d
***************
*** 477,481 ****
      def locus
        unless defined?(@locus)
! 	@locus = get_by_type('locus')
        end
        @locus
--- 477,481 ----
      def locus
        unless defined?(@locus)
!         @locus = get_by_type('locus')
        end
        @locus
***************
*** 484,488 ****
      def gi
        unless defined?(@gi) then
! 	@gi = get_by_type('gi')
        end
        @gi
--- 484,488 ----
      def gi
        unless defined?(@gi) then
!         @gi = get_by_type('gi')
        end
        @gi
***************
*** 491,495 ****
      def acc_version
        unless defined?(@acc_version) then
! 	@acc_version = get_by_type('acc_version')
        end
        @acc_version
--- 491,495 ----
      def acc_version
        unless defined?(@acc_version) then
!         @acc_version = get_by_type('acc_version')
        end
        @acc_version
***************
*** 498,503 ****
      def accessions
        unless defined?(@accessions) then
! 	@accessions = get_all_by_type('accession', 'acc_version')
! 	@accessions.collect! { |x| x.sub(/\..*\z/, '') }
        end
        @accessions
--- 498,503 ----
      def accessions
        unless defined?(@accessions) then
!         @accessions = get_all_by_type('accession', 'acc_version')
!         @accessions.collect! { |x| x.sub(/\..*\z/, '') }
        end
        @accessions
***************
*** 506,514 ****
      def accession
        unless defined?(@accession) then
! 	if acc_version then
! 	  @accession = acc_version.split('.')[0]
! 	else
! 	  @accession = accessions[0]
! 	end
        end
        @accession
--- 506,514 ----
      def accession
        unless defined?(@accession) then
!         if acc_version then
!           @accession = acc_version.split('.')[0]
!         else
!           @accession = accessions[0]
!         end
        end
        @accession
***************
*** 520,524 ****
        r = get(name, *args)
        if !r and !(self.class::NSIDs[name.to_s]) then
! 	raise "NameError: undefined method `#{name.inspect}'"
        end
        r
--- 520,524 ----
        r = get(name, *args)
        if !r and !(self.class::NSIDs[name.to_s]) then
!         raise "NameError: undefined method `#{name.inspect}'"
        end
        r

Index: fantom.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/fantom.rb,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** fantom.rb	28 Jul 2003 08:32:10 -0000	1.9
--- fantom.rb	8 Sep 2005 01:22:11 -0000	1.10
***************
*** 49,54 ****
        xml = ''
        Net::HTTP.start(addr, port, proxy.host, proxy.port) do |http|
! 	response, = http.get(path)
! 	xml = response.body
        end
        xml
--- 49,54 ----
        xml = ''
        Net::HTTP.start(addr, port, proxy.host, proxy.port) do |http|
!         response, = http.get(path)
!         xml = response.body
        end
        xml
***************
*** 68,385 ****

        def initialize(x)
! 	if x.is_a?(REXML::Element) then
! 	  @elem = x
! 	else
! 	  if x.is_a?(String) then
! 	    x = x.sub(/#{Regexp.escape(DELIMITER)}\z/om, "\n")
! 	  end
! 	  doc = REXML::Document.new(x)
! 	  @elem = doc.elements[self.class::Data_XPath]
! 	  #raise 'element is null' unless @elem
! 	  @elem = REXML::Document.new('') unless @elem
! 	end
        end
        attr_reader :elem

        def to_s
! 	@elem.to_s
        end

        def gsub_entities(str)
! 	# workaround for bug?
! 	if str then
! 	  str.gsub(/\&\#(\d{1,3})\;/) { sprintf("%c", $1.to_i) }
! 	else
! 	  str
! 	end
        end

        def entry_id
! 	unless defined?(@entry_id)
! 	  @entry_id = @elem.attributes['id']
! 	end
! 	@entry_id
        end
        def self.define_element_text_method(array)
! 	array.each do |tagstr|
! 	  module_eval("
! 	    def #{tagstr}
! 	      unless defined?(@#{tagstr})
! 		@#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
! 	      end
! 	      @#{tagstr}
! 	    end
! 	  ")
! 	end
        end
        private_class_method :define_element_text_method

        class Cluster < MaXML
! 	# (MaXML cluster)
! 	# ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allclust.sep.xml.gz

! 	Data_XPath = 'maxml-clusters/cluster'

! 	def representative_seqid
! 	  unless defined?(@representative_seqid)
! 	    @representative_seqid =
! 	      gsub_entities(@elem.text('representative-seqid'))
! 	  end
! 	  @representative_seqid
! 	end

! 	def sequences
! 	  unless defined?(@sequences)
! 	    @sequences = MaXML::Sequences.new(@elem)
! 	  end
! 	  @sequences
! 	end

! 	def sequence(idstr = nil)
! 	  idstr ? sequences[idstr] : representative_sequence
! 	end

! 	def representative_sequence
! 	  unless defined?(@representative_sequence)
! 	    rid = representative_seqid
   	    @representative_sequence =
! 	      rid ? sequences[representative_seqid] : nil
! 	  end
! 	  @representative_sequence
! 	end
! 	alias :representative_clone :representative_sequence

! 	def representative_annotations
! 	  e = representative_sequence
! 	  e ? e.annotations : nil
! 	end

! 	def representative_cloneid
! 	  e = representative_sequence
! 	  e ? e.cloneid : nil
! 	end

! 	define_element_text_method(%w(fantomid))
        end #class MaXML::Cluster

        class Sequences < MaXML
! 	Data_XPath = 'maxml-sequences'

! 	include Enumerable
! 	def each
! 	  to_a.each { |x| yield x }
! 	end

! 	def to_a
! 	  unless defined?(@sequences)
! 	    @sequences = @elem.get_elements('sequence')
! 	    @sequences.collect! { |e| MaXML::Sequence.new(e) }
! 	  end
! 	  @sequences
! 	end

! 	def get(idstr)
! 	  unless defined?(@hash)
! 	    @hash = {}
! 	  end
! 	  unless @hash.member?(idstr) then
! 	    @hash[idstr] = self.find do |x|
! 	      x.altid.values.index(idstr)
! 	    end
! 	  end
! 	  @hash[idstr]
! 	end

! 	def [](*arg)
! 	  if arg[0].is_a?(String) and arg.size == 1 then
! 	    get(arg[0])
! 	  else
! 	    to_a[*arg]
! 	  end
! 	end

! 	def cloneids
! 	  unless defined?(@cloneids)
! 	    @cloneids = to_a.collect { |x| x.cloneid }
! 	  end
! 	  @cloneids
! 	end

! 	def id_strings
! 	  unless defined?(@id_strings)
! 	    @id_strings = to_a.collect { |x| x.id_strings }
! 	    @id_strings.flatten!
! 	    @id_strings.sort!
! 	    @id_strings.uniq!
! 	  end
! 	  @id_strings
! 	end
        end #class MaXML::Sequences

        class Sequence < MaXML
! 	# (MaXML sequence)
! 	# ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allseq.sep.xml.gz
! 	# ftp://fantom2.gsc.riken.go.jp/fantom/2.1/repseq.sep.xml.gz
! 	
! 	Data_XPath = 'maxml-sequences/sequence'

! 	def altid(t = nil)
! 	  unless defined?(@altid)
! 	    @altid = {}
! 	    @elem.each_element('altid') do |e|
! 	      @altid[e.attributes['type']] = gsub_entities(e.text)
! 	    end
! 	  end
! 	  if t then
! 	    @altid[t]
! 	  else
! 	    @altid
! 	  end
! 	end

! 	def id_strings
! 	  altid.values.sort.uniq
! 	end

! 	def library_id
! 	  entry_id[0,2]
! 	end

! 	def annotations
! 	  unless defined?(@annotations)
! 	    @annotations =
! 	      MaXML::Annotations.new(@elem.elements['annotations'])
! 	  end
! 	  @annotations
! 	end

! 	define_element_text_method(%w(annotator version modified_time comment))

! 	def self.define_id_method(array)
! 	  array.each do |tagstr|
! 	    module_eval("
! 	      def #{tagstr}
! 	        unless defined?(@#{tagstr})
! 		  @#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
! 		  @#{tagstr} = altid('#{tagstr}') unless @#{tagstr}
! 	        end
! 	        @#{tagstr}
! 	      end
! 	    ")
! 	  end
          end
          private_class_method :define_id_method

! 	define_id_method(%w(seqid fantomid cloneid rearrayid accession))
        end #class MaXML::Sequence

        class Annotations < MaXML
! 	Data_XPath = nil

! 	include Enumerable
! 	def each
! 	  to_a.each { |x| yield x }
! 	end

! 	def to_a
! 	  unless defined?(@a)
! 	    @a = @elem.get_elements('annotation')
! 	    @a.collect! { |e| MaXML::Annotation.new(e) }
! 	  end
! 	  @a
! 	end

! 	def get_all_by_qualifier(qstr)
! 	  unless defined?(@hash)
! 	    @hash = {}
! 	  end
! 	  unless @hash.member?(qstr) then
! 	    @hash[qstr] = self.find_all do |x|
! 	      x.qualifier == qstr
! 	    end
! 	  end
! 	  @hash[qstr]
! 	end

! 	def get_by_qualifier(qstr)
! 	  a = get_all_by_qualifier(qstr)
! 	  a ? a[0] : nil
! 	end

! 	def [](*arg)
! 	  if arg[0].is_a?(String) and arg.size == 1 then
! 	    get_by_qualifier(arg[0])
! 	  else
! 	    to_a[*arg]
! 	  end
! 	end

! 	def cds_start
! 	  unless defined?(@cds_start)
! 	    e = get_by_qualifier('cds_start')
! 	    @cds_start = e ? e.anntext.to_i : nil
! 	  end
! 	  @cds_start
! 	end

! 	def cds_stop
! 	  unless defined?(@cds_stop)
! 	    e = get_by_qualifier('cds_stop')
! 	    @cds_stop = e ? e.anntext.to_i : nil
! 	  end
! 	  @cds_stop
! 	end

! 	def gene_name
! 	  unless defined?(@gene_name)
! 	    e = get_by_qualifier('gene_name')
! 	    @gene_name = e ? e.anntext : nil
! 	  end
! 	  @gene_name
! 	end

! 	def data_source
! 	  unless defined?(@data_source)
! 	    e = get_by_qualifier('gene_name')
! 	    @data_source = e ? e.datasrc[0] : nil
! 	  end
! 	  @data_source
! 	end

! 	def evidence
! 	  unless defined?(@evidence)
! 	    e = get_by_qualifier('gene_name')
! 	    @evidence = e ? e.evidence : nil
! 	  end
! 	  @evidence
! 	end
        end #class MaXML::Annotations

        class Annotation < MaXML
! 	def entry_id
! 	  nil
! 	end

! 	class DataSrc < String
! 	  def initialize(text, href)
! 	    super(text)
! 	    @href = href
! 	  end
! 	  attr_reader :href
! 	end

! 	def datasrc
! 	  unless defined?(@datasrc)
! 	    @datasrc = []
! 	    @elem.each_element('datasrc') do |e|
! 	      text = e.text
! 	      href = e.attributes['href']
! 	      @datasrc << DataSrc.new(gsub_entities(text), gsub_entities(href))
! 	    end
! 	  end
! 	  @datasrc
! 	end

! 	define_element_text_method(%w(qualifier srckey anntext evidence))
        end #class MaXML::Annotation

--- 68,385 ----

        def initialize(x)
!         if x.is_a?(REXML::Element) then
!           @elem = x
!         else
!           if x.is_a?(String) then
!             x = x.sub(/#{Regexp.escape(DELIMITER)}\z/om, "\n")
!           end
!           doc = REXML::Document.new(x)
!           @elem = doc.elements[self.class::Data_XPath]
!           #raise 'element is null' unless @elem
!           @elem = REXML::Document.new('') unless @elem
!         end
        end
        attr_reader :elem

        def to_s
!         @elem.to_s
        end

        def gsub_entities(str)
!         # workaround for bug?
!         if str then
!           str.gsub(/\&\#(\d{1,3})\;/) { sprintf("%c", $1.to_i) }
!         else
!           str
!         end
        end

        def entry_id
!         unless defined?(@entry_id)
!           @entry_id = @elem.attributes['id']
!         end
!         @entry_id
        end
        def self.define_element_text_method(array)
!         array.each do |tagstr|
!           module_eval("
!             def #{tagstr}
!               unless defined?(@#{tagstr})
!                 @#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
!               end
!               @#{tagstr}
!             end
!           ")
!         end
        end
        private_class_method :define_element_text_method

        class Cluster < MaXML
!         # (MaXML cluster)
!         # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allclust.sep.xml.gz

!         Data_XPath = 'maxml-clusters/cluster'

!         def representative_seqid
!           unless defined?(@representative_seqid)
!             @representative_seqid =
!               gsub_entities(@elem.text('representative-seqid'))
!           end
!           @representative_seqid
!         end

!         def sequences
!           unless defined?(@sequences)
!             @sequences = MaXML::Sequences.new(@elem)
!           end
!           @sequences
!         end

!         def sequence(idstr = nil)
!           idstr ? sequences[idstr] : representative_sequence
!         end

!         def representative_sequence
!           unless defined?(@representative_sequence)
!             rid = representative_seqid
   	    @representative_sequence =
!               rid ? sequences[representative_seqid] : nil
!           end
!           @representative_sequence
!         end
!         alias :representative_clone :representative_sequence

!         def representative_annotations
!           e = representative_sequence
!           e ? e.annotations : nil
!         end

!         def representative_cloneid
!           e = representative_sequence
!           e ? e.cloneid : nil
!         end

!         define_element_text_method(%w(fantomid))
        end #class MaXML::Cluster

        class Sequences < MaXML
!         Data_XPath = 'maxml-sequences'

!         include Enumerable
!         def each
!           to_a.each { |x| yield x }
!         end

!         def to_a
!           unless defined?(@sequences)
!             @sequences = @elem.get_elements('sequence')
!             @sequences.collect! { |e| MaXML::Sequence.new(e) }
!           end
!           @sequences
!         end

!         def get(idstr)
!           unless defined?(@hash)
!             @hash = {}
!           end
!           unless @hash.member?(idstr) then
!             @hash[idstr] = self.find do |x|
!               x.altid.values.index(idstr)
!             end
!           end
!           @hash[idstr]
!         end

!         def [](*arg)
!           if arg[0].is_a?(String) and arg.size == 1 then
!             get(arg[0])
!           else
!             to_a[*arg]
!           end
!         end

!         def cloneids
!           unless defined?(@cloneids)
!             @cloneids = to_a.collect { |x| x.cloneid }
!           end
!           @cloneids
!         end

!         def id_strings
!           unless defined?(@id_strings)
!             @id_strings = to_a.collect { |x| x.id_strings }
!             @id_strings.flatten!
!             @id_strings.sort!
!             @id_strings.uniq!
!           end
!           @id_strings
!         end
        end #class MaXML::Sequences

        class Sequence < MaXML
!         # (MaXML sequence)
!         # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allseq.sep.xml.gz
!         # ftp://fantom2.gsc.riken.go.jp/fantom/2.1/repseq.sep.xml.gz
!         
!         Data_XPath = 'maxml-sequences/sequence'

!         def altid(t = nil)
!           unless defined?(@altid)
!             @altid = {}
!             @elem.each_element('altid') do |e|
!               @altid[e.attributes['type']] = gsub_entities(e.text)
!             end
!           end
!           if t then
!             @altid[t]
!           else
!             @altid
!           end
!         end

!         def id_strings
!           altid.values.sort.uniq
!         end

!         def library_id
!           entry_id[0,2]
!         end

!         def annotations
!           unless defined?(@annotations)
!             @annotations =
!               MaXML::Annotations.new(@elem.elements['annotations'])
!           end
!           @annotations
!         end

!         define_element_text_method(%w(annotator version modified_time comment))

!         def self.define_id_method(array)
!           array.each do |tagstr|
!             module_eval("
!               def #{tagstr}
!                 unless defined?(@#{tagstr})
!                   @#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
!                   @#{tagstr} = altid('#{tagstr}') unless @#{tagstr}
!                 end
!                 @#{tagstr}
!               end
!             ")
!           end
          end
          private_class_method :define_id_method

!         define_id_method(%w(seqid fantomid cloneid rearrayid accession))
        end #class MaXML::Sequence

        class Annotations < MaXML
!         Data_XPath = nil

!         include Enumerable
!         def each
!           to_a.each { |x| yield x }
!         end

!         def to_a
!           unless defined?(@a)
!             @a = @elem.get_elements('annotation')
!             @a.collect! { |e| MaXML::Annotation.new(e) }
!           end
!           @a
!         end

!         def get_all_by_qualifier(qstr)
!           unless defined?(@hash)
!             @hash = {}
!           end
!           unless @hash.member?(qstr) then
!             @hash[qstr] = self.find_all do |x|
!               x.qualifier == qstr
!             end
!           end
!           @hash[qstr]
!         end

!         def get_by_qualifier(qstr)
!           a = get_all_by_qualifier(qstr)
!           a ? a[0] : nil
!         end

!         def [](*arg)
!           if arg[0].is_a?(String) and arg.size == 1 then
!             get_by_qualifier(arg[0])
!           else
!             to_a[*arg]
!           end
!         end

!         def cds_start
!           unless defined?(@cds_start)
!             e = get_by_qualifier('cds_start')
!             @cds_start = e ? e.anntext.to_i : nil
!           end
!           @cds_start
!         end

!         def cds_stop
!           unless defined?(@cds_stop)
!             e = get_by_qualifier('cds_stop')
!             @cds_stop = e ? e.anntext.to_i : nil
!           end
!           @cds_stop
!         end

!         def gene_name
!           unless defined?(@gene_name)
!             e = get_by_qualifier('gene_name')
!             @gene_name = e ? e.anntext : nil
!           end
!           @gene_name
!         end

!         def data_source
!           unless defined?(@data_source)
!             e = get_by_qualifier('gene_name')
!             @data_source = e ? e.datasrc[0] : nil
!           end
!           @data_source
!         end

!         def evidence
!           unless defined?(@evidence)
!             e = get_by_qualifier('gene_name')
!             @evidence = e ? e.evidence : nil
!           end
!           @evidence
!         end
        end #class MaXML::Annotations

        class Annotation < MaXML
!         def entry_id
!           nil
!         end

!         class DataSrc < String
!           def initialize(text, href)
!             super(text)
!             @href = href
!           end
!           attr_reader :href
!         end

!         def datasrc
!           unless defined?(@datasrc)
!             @datasrc = []
!             @elem.each_element('datasrc') do |e|
!               text = e.text
!               href = e.attributes['href']
!               @datasrc << DataSrc.new(gsub_entities(text), gsub_entities(href))
!             end
!           end
!           @datasrc
!         end

!         define_element_text_method(%w(qualifier srckey anntext evidence))
        end #class MaXML::Annotation

Index: transfac.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/transfac.rb,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** transfac.rb	30 Aug 2002 06:31:47 -0000	1.8
--- transfac.rb	8 Sep 2005 01:22:11 -0000	1.9
***************
*** 46,50 ****
      def ac
        unless @data['AC']
! 	@data['AC'] = fetch('AC')
        end
        @data['AC']
--- 46,50 ----
      def ac
        unless @data['AC']
!         @data['AC'] = fetch('AC')
        end
        @data['AC']
***************
*** 127,142 ****
        key = ''
        @orig.each do |k, v|
! 	if k =~ /^0*(\d+)/
! 	  key = $1.to_i
! 	  ma_dat[key] = fetch(k) unless ma_dat[key]
! 	end
        end
        ma_dat.keys.sort.each_with_index do |k, i|
! 	rep_nt = ma_dat[k].slice!(-1, 1)
! 	ma_dat[k].slice!(-1, 1)
! 	ma_ary[i] = ma_dat[k].split(/\s+/)
! 	ma_ary[i].each_with_index do |x, j|
! 	  ma_ary[i][j] = x.to_i
! 	end
        end
        Matrix[*ma_ary]
--- 127,142 ----
        key = ''
        @orig.each do |k, v|
!         if k =~ /^0*(\d+)/
!           key = $1.to_i
!           ma_dat[key] = fetch(k) unless ma_dat[key]
!         end
        end
        ma_dat.keys.sort.each_with_index do |k, i|
!         rep_nt = ma_dat[k].slice!(-1, 1)
!         ma_dat[k].slice!(-1, 1)
!         ma_ary[i] = ma_dat[k].split(/\s+/)
!         ma_ary[i].each_with_index do |x, j|
!           ma_ary[i][j] = x.to_i
!         end
        end
        Matrix[*ma_ary]

Index: nbrf.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/nbrf.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** nbrf.rb	10 Oct 2003 11:49:43 -0000	1.2
--- nbrf.rb	8 Sep 2005 01:22:11 -0000	1.3
***************
*** 43,48 ****
        @definition = line2.to_s.chomp
        if /^>?([A-Za-z0-9]{2})\;(.*)/ =~ line1.to_s then
! 	@seq_type = $1
! 	@entry_id = $2
        end
      end
--- 43,48 ----
        @definition = line2.to_s.chomp
        if /^>?([A-Za-z0-9]{2})\;(.*)/ =~ line1.to_s then
!         @seq_type = $1
!         @entry_id = $2
        end
      end
***************
*** 60,70 ****
        case @seq_type
        when /[PF]1/
! 	# protein
! 	Sequence::AA
        when /[DR][LC]/, /N[13]/
! 	# nucleic
! 	Sequence::NA
        else
! 	Sequence
        end
      end
--- 60,70 ----
        case @seq_type
        when /[PF]1/
!         # protein
!         Sequence::AA
        when /[DR][LC]/, /N[13]/
!         # nucleic
!         Sequence::NA
        else
!         Sequence
        end
      end
***************
*** 72,76 ****
      def seq
        unless defined?(@seq)
! 	@seq = seq_class.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
        end
        @seq
--- 72,76 ----
      def seq
        unless defined?(@seq)
!         @seq = seq_class.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
        end
        @seq
***************
*** 83,91 ****
      def naseq
        if seq.is_a?(Bio::Sequence::AA) then
! 	raise 'not nucleic but protein sequence'
        elsif seq.is_a?(Bio::Sequence::NA) then
! 	seq
        else
! 	Bio::Sequence::NA.new(seq)
        end
      end
--- 83,91 ----
      def naseq
        if seq.is_a?(Bio::Sequence::AA) then
!         raise 'not nucleic but protein sequence'
        elsif seq.is_a?(Bio::Sequence::NA) then
!         seq
        else
!         Bio::Sequence::NA.new(seq)
        end
      end
***************
*** 97,105 ****
      def aaseq
        if seq.is_a?(Bio::Sequence::NA) then
! 	raise 'not nucleic but protein sequence'
        elsif seq.is_a?(Bio::Sequence::AA) then
! 	seq
        else
! 	Bio::Sequence::AA.new(seq)
        end
      end
--- 97,105 ----
      def aaseq
        if seq.is_a?(Bio::Sequence::NA) then
!         raise 'not nucleic but protein sequence'
        elsif seq.is_a?(Bio::Sequence::AA) then
!         seq
        else
!         Bio::Sequence::AA.new(seq)
        end
      end
***************
*** 114,131 ****
        seq = hash[:seq]
        unless seq_type
! 	if seq.is_a?(Bio::Sequence::AA) then
! 	  seq_type = 'P1'
! 	elsif seq.is_a?(Bio::Sequence::NA) then
! 	  seq_type = /u/i =~ seq ? 'RL' : 'DL'
! 	else
! 	  seq_type = 'XX'
! 	end
        end
        width = hash.has_key?(:width) ? hash[:width] : 70
        if width then
! 	seq = seq.to_s + "*"
! 	seq.gsub!(Regexp.new(".{1,#{width}}"), "\\0\n")
        else
! 	seq = seq.to_s + "*\n"
        end
        ">#{seq_type};#{hash[:entry_id]}\n#{hash[:definition]}\n#{seq}"
--- 114,131 ----
        seq = hash[:seq]
        unless seq_type
!         if seq.is_a?(Bio::Sequence::AA) then
!           seq_type = 'P1'
!         elsif seq.is_a?(Bio::Sequence::NA) then
!           seq_type = /u/i =~ seq ? 'RL' : 'DL'
!         else
!           seq_type = 'XX'
!         end
        end
        width = hash.has_key?(:width) ? hash[:width] : 70
        if width then
!         seq = seq.to_s + "*"
!         seq.gsub!(Regexp.new(".{1,#{width}}"), "\\0\n")
        else
!         seq = seq.to_s + "*\n"
        end
        ">#{seq_type};#{hash[:entry_id]}\n#{hash[:definition]}\n#{seq}"