[BioRuby-cvs] bioruby/lib/bio/db/embl sptr.rb,1.31,1.32

Fri Jun 16 17:01:03 UTC 2006

Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory dev.open-bio.org:/tmp/cvs-serv1306

Modified Files:
	sptr.rb 
Log Message:
* fixed a PROPEP parsing bug in Bio::SPTR#ft. (reported by Makoto 
  Hamaguchi-san).
* changed codes in the Bio::SPTR#id_line and the Bio::SPTR#dt. [refactoring] 


Index: sptr.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/sptr.rb,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** sptr.rb	14 Apr 2006 05:52:28 -0000	1.31
--- sptr.rb	16 Jun 2006 17:01:01 -0000	1.32
***************
*** 2,6 ****
  # = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class
  # 
! # Copyright::   Copyright (C) 2001-2006 Mitsuteru C. Nakao <n at bioruby.org>
  # License::     Ruby's
  #
--- 2,6 ----
  # = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class
  # 
! # Copyright::   Copyright (C) 2001-2006  Mitsuteru C. Nakao <n at bioruby.org>
  # License::     Ruby's
  #
***************
*** 46,50 ****
    @@data_class = ["STANDARD", "PRELIMINARY"]
  
-   
    # returns a Hash of the ID line.
    #
--- 46,49 ----
***************
*** 57,84 ****
    #
    # === Examples
!   #   obj.id_line  #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD", "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
    #
    #   obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
    #
    def id_line(key = nil)
!     unless @data['ID']
!       tmp = Hash.new
!       a = @orig['ID'].split(/ +/)         
!       tmp['ENTRY_NAME']      = a[1]
!       tmp['DATA_CLASS']      = a[2].sub(/;/,'') 
!       tmp['MOLECULE_TYPE']   = a[3].sub(/;/,'')
!       tmp['SEQUENCE_LENGTH'] = a[4].to_i
!       @data['ID'] = tmp
!     end
  
!     if key
!       @data['ID'][key] # String/Int
!     else
!       @data['ID']      # Hash
!     end
    end
  
  
- 
    # returns a ENTRY_NAME in the ID line. 
    #
--- 56,78 ----
    #
    # === Examples
!   #   obj.id_line  #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD", 
!   #                     "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
    #
    #   obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
    #
    def id_line(key = nil)
!     return id_line[key] if key
!     return @data['ID'] if @data['ID']
  
!     part = @orig['ID'].split(/ +/)         
!     @data['ID'] = {
!       'ENTRY_NAME'      => part[1],
!       'DATA_CLASS'      => part[2].sub(/;/,''),
!       'MOLECULE_TYPE'   => part[3].sub(/;/,''),
!       'SEQUENCE_LENGTH' => part[4].to_i 
!     }
    end
  
  
    # returns a ENTRY_NAME in the ID line. 
    #
***************
*** 128,145 ****
    #   DT DD-MMM-YYY (rel. NN, Last annotation update)
    def dt(key = nil)
!     unless @data['DT']
!       tmp = Hash.new
!       a = self.get('DT').split(/\n/)
!       tmp['created']    = a[0].sub(/\w{2}   /,'').strip
!       tmp['sequence']   = a[1].sub(/\w{2}   /,'').strip
!       tmp['annotation'] = a[2].sub(/\w{2}   /,'').strip
!       @data['DT'] = tmp
!     end
  
!     if key
!       @data['DT'][key]
!     else
!       @data['DT']
!     end
    end
  
--- 122,134 ----
    #   DT DD-MMM-YYY (rel. NN, Last annotation update)
    def dt(key = nil)
!     return dt[key] if key
!     return @data['DT'] if @data['DT']
  
!     part = self.get('DT').split(/\n/)
!     @data['DT'] = {
!       'created'    => part[0].sub(/\w{2}   /,'').strip,
!       'sequence'   => part[1].sub(/\w{2}   /,'').strip,
!       'annotation' => part[2].sub(/\w{2}   /,'').strip
!     }
    end
  
***************
*** 451,455 ****
  
        # Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
!       tmp = {'Event' => nil, 'Named isoforms' => nil, 'Comment' => nil, 'Variants'  => []}
  
        if /Event=(.+?);/ =~ ap
--- 440,445 ----
  
        # Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
!       tmp = {'Event' => nil, 'Named isoforms' => nil, 'Comment' => nil, 
!              'Variants'  => []}
  
        if /Event=(.+?);/ =~ ap
***************
*** 499,503 ****
  
        ms.each do |m|
!         mass = {'MW'=>nil,'MW_ERR'=>nil,'METHOD'=>nil,'RANGE'=>nil}
          m.sub(/.$/,'').split(/;/).each do |line|
            case line
--- 489,493 ----
  
        ms.each do |m|
!         mass = {'MW' => nil,'MW_ERR' => nil,'METHOD' => nil,'RANGE' => nil}
          m.sub(/.$/,'').split(/;/).each do |line|
            case line
***************
*** 550,554 ****
      it.map {|ent|
        {:partner_id => ent[0].strip,
!        :nbexp => ent[1].strip, 
         :intact_acc => ent[2].split(', ') }
      }
--- 540,544 ----
      it.map {|ent|
        {:partner_id => ent[0].strip,
!        :nbexp      => ent[1].strip, 
         :intact_acc => ent[2].split(', ') }
      }
***************
*** 577,583 ****
  
    # returns conteins in the feature table.
    # * Bio::SPTR#ft -> Hash
!   #    {'feature_name' => [{'From' => str, 'To' => str,
!   #                         'Description' => str, 'FTId' => str}],...}
    #
    # returns an Array of the information about the feature_name in the feature table.
--- 567,589 ----
  
    # returns conteins in the feature table.
+   #
+   # == Examples
+   #
+   #  sp = Bio::SPTR.new(entry)
+   #  ft = sp.ft
+   #  ft.class #=> Hash
+   #  ft.keys.each do |feature_key|
+   #    ft[feature_key].each do |feature|
+   #      feature['From'] #=> '1'
+   #      feature['To']   #=> '21'
+   #      feature['Description'] #=> ''
+   #      feature['FTId'] #=> ''
+   #      feature['diff'] #=> []
+   #    end
+   #  end
+   #
    # * Bio::SPTR#ft -> Hash
!   #    {FEATURE_KEY => [{'From' => int, 'To' => int, 'diff' => [],
!   #                      'Description' => aStr, 'FTId' => aStr}],...}
    #
    # returns an Array of the information about the feature_name in the feature table.
***************
*** 595,692 ****
    #   35-75   Description (>=0 per key)
    #   -----   -----------------
!   def ft(feature_name = nil)
!     unless @data['FT']
!       table        = Hash.new()
!       last_feature = nil
! 
!       begin
!         get('FT').split(/\n/).each {|line|
! 
!           feature = line[5..12].strip
! 
!           if feature == '' and line[34..74]
!             tmp = ' ' + line[34..74].strip 
!             table[last_feature].last['Description'] << tmp
!             
!             next unless /\.$/ =~ line
!           else
!             from = line[14..19].strip
!             to   = line[21..26].strip
!             desc = line[34..74].strip if line[34..74]
  
!             table[feature] = [] unless table[feature]
!             table[feature] << {
!               'From'        => from.to_i, 
!               'To'          => to.to_i, 
!               'Description' => desc,
!               'diff'        => [],
!               'FTId'        => nil }
!             last_feature = feature
!             next
!           end
  
!           case last_feature
!           when 'VARSPLIC', 'VARIANT', 'CONFLICT'
!             if /FTId=(.+?)\./ =~ line   # version 41 >
!               ftid = $1
!               table[last_feature].last['FTId'] = ftid
!               table[last_feature].last['Description'].sub!(/ \/FTId=#{ftid}./,'') 
!             end
  
!             case table[last_feature].last['Description']
!             when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
!               original = $1
!               swap = $2
!               original = original.gsub(/ /,'').strip
!               swap = swap.gsub(/ /,'').strip
!             when /Missing/i
!               original = seq.subseq(table[last_feature].last['From'],
!                                     table[last_feature].last['To'])
!               swap = ''
!             else
!               raise line
!             end
!             table[last_feature].last['diff'] = [original, swap]
!           end
!         }
  
!       rescue
!         raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n" + 
!                   "'#{self.get('FT')}'\n"
!       end
  
!       table.each_key do |k|
!         table[k].each do |e|
!           if / -> / =~ e['Description']
!             pattern = /([A-Z][A-Z ]*[A-Z]*) -> ([A-Z][A-Z ]*[A-Z]*)/
!             e['Description'].sub!(pattern) {  
!               a = $1
!               b = $2
!               a.gsub(/ /,'') + " -> " + b.gsub(/ /,'') 
!             }
!           end
!           if /- [\w\d]/ =~ e['Description']
!             e['Description'].gsub!(/([\w\d]- [\w\d]+)/) { 
!               a = $1
!               if /- AND/ =~ a
!                 a
!               else
!                 a.sub(/ /,'') 
!               end
!             }
!           end
          end
        end
-       @data['FT'] = table
      end
! 
!     if feature_name
!       @data['FT'][feature_name]
!     else
!       @data['FT']
      end
    end
  
  
    # returns a Hash of conteins in the SQ lines.
    # * Bio::SPTRL#sq  -> hsh
--- 601,670 ----
    #   35-75   Description (>=0 per key)
    #   -----   -----------------
!   #
!   def ft(feature_key = nil)
!     return ft[feature_key] if feature_key
!     return @data['FT'] if @data['FT']
  
!     table = []
!     begin
!     get('FT').split("\n").each do |line|
!       if line =~ /^FT   \w/
!         feature = line.chomp.ljust(74)
!         table << [feature[ 5..12].strip,   # Feature Name
!                   feature[14..19].strip,   # From
!                   feature[21..26].strip,   # To
!                   feature[34..74].strip ]  # Description
!       else
!         table.last << line.chomp.sub!(/^FT +/, '')
!       end
!     end
  
!     # Join Desctiption lines
!     table = table.map { |feature| 
!       ftid = feature.pop if feature.last =~ /FTId=/
!       if feature.size > 4
!         feature = [feature[0], feature[1], feature[2], 
!                    feature[3, feature.size - 3].join(" ")]
!       end
!       feature << ftid
!     }
  
!     hash = {}
!     table.each do |feature|
!       hash[feature[0]] = [] unless hash[feature[0]]
  
!       hash[feature[0]] << {
!         'From' => feature[1].to_i, 
!         'To'   => feature[2].to_i, 
!         'Description' => feature[3], 
!         'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
!         'diff' => []
!       }
  
!       case feature[0]
!       when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
!         case hash[feature[0]].last['Description']
!         when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
!           original_res = $1
!           changed_res = $2
!           original_res = original_res.gsub(/ /,'').strip
!           chenged_res = changed_res.gsub(/ /,'').strip
!         when /Missing/i
!           original_res = seq.subseq(hash[feature[0]].last['From'],
!                                     hash[feature[0]].last['To'])
!           changed_res = ''
          end
+         hash[feature[0]].last['diff'] = [original_res, chenged_res]
        end
      end
!     rescue
!       raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
      end
+ 
+     @data['FT'] = hash
    end
  
  
+ 
    # returns a Hash of conteins in the SQ lines.
    # * Bio::SPTRL#sq  -> hsh
***************
*** 694,698 ****
    # returns a value of a key given in the SQ lines.
    # * Bio::SPTRL#sq(key)  -> int or str
!   # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length', 'CRC64']
    #
    # === SQ Line; sequence header (1/entry)
--- 672,677 ----
    # returns a value of a key given in the SQ lines.
    # * Bio::SPTRL#sq(key)  -> int or str
!   # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length', 
!   #          'CRC64']
    #
    # === SQ Line; sequence header (1/entry)