[BioRuby-cvs] bioruby/lib/bio/db/embl sptr.rb,1.31,1.32
Mitsuteru C. Nakao
nakao at dev.open-bio.org
Fri Jun 16 17:01:03 UTC 2006
Update of /home/repository/bioruby/bioruby/lib/bio/db/embl
In directory dev.open-bio.org:/tmp/cvs-serv1306
Modified Files:
sptr.rb
Log Message:
* fixed a PROPEP parsing bug in Bio::SPTR#ft. (reported by Makoto
Hamaguchi-san).
* changed codes in the Bio::SPTR#id_line and the Bio::SPTR#dt. [refactoring]
Index: sptr.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/db/embl/sptr.rb,v
retrieving revision 1.31
retrieving revision 1.32
diff -C2 -d -r1.31 -r1.32
*** sptr.rb 14 Apr 2006 05:52:28 -0000 1.31
--- sptr.rb 16 Jun 2006 17:01:01 -0000 1.32
***************
*** 2,6 ****
# = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class
#
! # Copyright:: Copyright (C) 2001-2006 Mitsuteru C. Nakao <n at bioruby.org>
# License:: Ruby's
#
--- 2,6 ----
# = bio/db/embl/sptr.rb - UniProt/SwissProt and TrEMBL database class
#
! # Copyright:: Copyright (C) 2001-2006 Mitsuteru C. Nakao <n at bioruby.org>
# License:: Ruby's
#
***************
*** 46,50 ****
@@data_class = ["STANDARD", "PRELIMINARY"]
-
# returns a Hash of the ID line.
#
--- 46,49 ----
***************
*** 57,84 ****
#
# === Examples
! # obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD", "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
#
# obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
#
def id_line(key = nil)
! unless @data['ID']
! tmp = Hash.new
! a = @orig['ID'].split(/ +/)
! tmp['ENTRY_NAME'] = a[1]
! tmp['DATA_CLASS'] = a[2].sub(/;/,'')
! tmp['MOLECULE_TYPE'] = a[3].sub(/;/,'')
! tmp['SEQUENCE_LENGTH'] = a[4].to_i
! @data['ID'] = tmp
! end
! if key
! @data['ID'][key] # String/Int
! else
! @data['ID'] # Hash
! end
end
-
# returns a ENTRY_NAME in the ID line.
#
--- 56,78 ----
#
# === Examples
! # obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD",
! # "SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
#
# obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
#
def id_line(key = nil)
! return id_line[key] if key
! return @data['ID'] if @data['ID']
! part = @orig['ID'].split(/ +/)
! @data['ID'] = {
! 'ENTRY_NAME' => part[1],
! 'DATA_CLASS' => part[2].sub(/;/,''),
! 'MOLECULE_TYPE' => part[3].sub(/;/,''),
! 'SEQUENCE_LENGTH' => part[4].to_i
! }
end
# returns a ENTRY_NAME in the ID line.
#
***************
*** 128,145 ****
# DT DD-MMM-YYY (rel. NN, Last annotation update)
def dt(key = nil)
! unless @data['DT']
! tmp = Hash.new
! a = self.get('DT').split(/\n/)
! tmp['created'] = a[0].sub(/\w{2} /,'').strip
! tmp['sequence'] = a[1].sub(/\w{2} /,'').strip
! tmp['annotation'] = a[2].sub(/\w{2} /,'').strip
! @data['DT'] = tmp
! end
! if key
! @data['DT'][key]
! else
! @data['DT']
! end
end
--- 122,134 ----
# DT DD-MMM-YYY (rel. NN, Last annotation update)
def dt(key = nil)
! return dt[key] if key
! return @data['DT'] if @data['DT']
! part = self.get('DT').split(/\n/)
! @data['DT'] = {
! 'created' => part[0].sub(/\w{2} /,'').strip,
! 'sequence' => part[1].sub(/\w{2} /,'').strip,
! 'annotation' => part[2].sub(/\w{2} /,'').strip
! }
end
***************
*** 451,455 ****
# Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
! tmp = {'Event' => nil, 'Named isoforms' => nil, 'Comment' => nil, 'Variants' => []}
if /Event=(.+?);/ =~ ap
--- 440,445 ----
# Event, Named isoforms, Comment, [Name, Synonyms, IsoId, Sequnce]+
! tmp = {'Event' => nil, 'Named isoforms' => nil, 'Comment' => nil,
! 'Variants' => []}
if /Event=(.+?);/ =~ ap
***************
*** 499,503 ****
ms.each do |m|
! mass = {'MW'=>nil,'MW_ERR'=>nil,'METHOD'=>nil,'RANGE'=>nil}
m.sub(/.$/,'').split(/;/).each do |line|
case line
--- 489,493 ----
ms.each do |m|
! mass = {'MW' => nil,'MW_ERR' => nil,'METHOD' => nil,'RANGE' => nil}
m.sub(/.$/,'').split(/;/).each do |line|
case line
***************
*** 550,554 ****
it.map {|ent|
{:partner_id => ent[0].strip,
! :nbexp => ent[1].strip,
:intact_acc => ent[2].split(', ') }
}
--- 540,544 ----
it.map {|ent|
{:partner_id => ent[0].strip,
! :nbexp => ent[1].strip,
:intact_acc => ent[2].split(', ') }
}
***************
*** 577,583 ****
# returns conteins in the feature table.
# * Bio::SPTR#ft -> Hash
! # {'feature_name' => [{'From' => str, 'To' => str,
! # 'Description' => str, 'FTId' => str}],...}
#
# returns an Array of the information about the feature_name in the feature table.
--- 567,589 ----
# returns conteins in the feature table.
+ #
+ # == Examples
+ #
+ # sp = Bio::SPTR.new(entry)
+ # ft = sp.ft
+ # ft.class #=> Hash
+ # ft.keys.each do |feature_key|
+ # ft[feature_key].each do |feature|
+ # feature['From'] #=> '1'
+ # feature['To'] #=> '21'
+ # feature['Description'] #=> ''
+ # feature['FTId'] #=> ''
+ # feature['diff'] #=> []
+ # end
+ # end
+ #
# * Bio::SPTR#ft -> Hash
! # {FEATURE_KEY => [{'From' => int, 'To' => int, 'diff' => [],
! # 'Description' => aStr, 'FTId' => aStr}],...}
#
# returns an Array of the information about the feature_name in the feature table.
***************
*** 595,692 ****
# 35-75 Description (>=0 per key)
# ----- -----------------
! def ft(feature_name = nil)
! unless @data['FT']
! table = Hash.new()
! last_feature = nil
!
! begin
! get('FT').split(/\n/).each {|line|
!
! feature = line[5..12].strip
!
! if feature == '' and line[34..74]
! tmp = ' ' + line[34..74].strip
! table[last_feature].last['Description'] << tmp
!
! next unless /\.$/ =~ line
! else
! from = line[14..19].strip
! to = line[21..26].strip
! desc = line[34..74].strip if line[34..74]
! table[feature] = [] unless table[feature]
! table[feature] << {
! 'From' => from.to_i,
! 'To' => to.to_i,
! 'Description' => desc,
! 'diff' => [],
! 'FTId' => nil }
! last_feature = feature
! next
! end
! case last_feature
! when 'VARSPLIC', 'VARIANT', 'CONFLICT'
! if /FTId=(.+?)\./ =~ line # version 41 >
! ftid = $1
! table[last_feature].last['FTId'] = ftid
! table[last_feature].last['Description'].sub!(/ \/FTId=#{ftid}./,'')
! end
! case table[last_feature].last['Description']
! when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
! original = $1
! swap = $2
! original = original.gsub(/ /,'').strip
! swap = swap.gsub(/ /,'').strip
! when /Missing/i
! original = seq.subseq(table[last_feature].last['From'],
! table[last_feature].last['To'])
! swap = ''
! else
! raise line
! end
! table[last_feature].last['diff'] = [original, swap]
! end
! }
! rescue
! raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n" +
! "'#{self.get('FT')}'\n"
! end
! table.each_key do |k|
! table[k].each do |e|
! if / -> / =~ e['Description']
! pattern = /([A-Z][A-Z ]*[A-Z]*) -> ([A-Z][A-Z ]*[A-Z]*)/
! e['Description'].sub!(pattern) {
! a = $1
! b = $2
! a.gsub(/ /,'') + " -> " + b.gsub(/ /,'')
! }
! end
! if /- [\w\d]/ =~ e['Description']
! e['Description'].gsub!(/([\w\d]- [\w\d]+)/) {
! a = $1
! if /- AND/ =~ a
! a
! else
! a.sub(/ /,'')
! end
! }
! end
end
end
- @data['FT'] = table
end
!
! if feature_name
! @data['FT'][feature_name]
! else
! @data['FT']
end
end
# returns a Hash of conteins in the SQ lines.
# * Bio::SPTRL#sq -> hsh
--- 601,670 ----
# 35-75 Description (>=0 per key)
# ----- -----------------
! #
! def ft(feature_key = nil)
! return ft[feature_key] if feature_key
! return @data['FT'] if @data['FT']
! table = []
! begin
! get('FT').split("\n").each do |line|
! if line =~ /^FT \w/
! feature = line.chomp.ljust(74)
! table << [feature[ 5..12].strip, # Feature Name
! feature[14..19].strip, # From
! feature[21..26].strip, # To
! feature[34..74].strip ] # Description
! else
! table.last << line.chomp.sub!(/^FT +/, '')
! end
! end
! # Join Desctiption lines
! table = table.map { |feature|
! ftid = feature.pop if feature.last =~ /FTId=/
! if feature.size > 4
! feature = [feature[0], feature[1], feature[2],
! feature[3, feature.size - 3].join(" ")]
! end
! feature << ftid
! }
! hash = {}
! table.each do |feature|
! hash[feature[0]] = [] unless hash[feature[0]]
! hash[feature[0]] << {
! 'From' => feature[1].to_i,
! 'To' => feature[2].to_i,
! 'Description' => feature[3],
! 'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''),
! 'diff' => []
! }
! case feature[0]
! when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT'
! case hash[feature[0]].last['Description']
! when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/
! original_res = $1
! changed_res = $2
! original_res = original_res.gsub(/ /,'').strip
! chenged_res = changed_res.gsub(/ /,'').strip
! when /Missing/i
! original_res = seq.subseq(hash[feature[0]].last['From'],
! hash[feature[0]].last['To'])
! changed_res = ''
end
+ hash[feature[0]].last['diff'] = [original_res, chenged_res]
end
end
! rescue
! raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n"
end
+
+ @data['FT'] = hash
end
+
# returns a Hash of conteins in the SQ lines.
# * Bio::SPTRL#sq -> hsh
***************
*** 694,698 ****
# returns a value of a key given in the SQ lines.
# * Bio::SPTRL#sq(key) -> int or str
! # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length', 'CRC64']
#
# === SQ Line; sequence header (1/entry)
--- 672,677 ----
# returns a value of a key given in the SQ lines.
# * Bio::SPTRL#sq(key) -> int or str
! # * Keys: ['MW', 'mw', 'molecular', 'weight', 'aalen', 'len', 'length',
! # 'CRC64']
#
# === SQ Line; sequence header (1/entry)
More information about the bioruby-cvs
mailing list