[BioRuby-cvs] bioruby/lib/bio/data aa.rb,0.7,0.8 na.rb,0.8,0.9
Katayama Toshiaki
k at pub.open-bio.org
Sun Aug 7 04:19:30 EDT 2005
Update of /home/repository/bioruby/bioruby/lib/bio/data
In directory pub.open-bio.org:/tmp/cvs-serv18079/lib/bio/data
Modified Files:
aa.rb na.rb
Log Message:
* Bio::NucleicAcid, Bio::AminoAcid classes are refactored to have Data module
and this module is included and extended to make methods as both of
instance methods and extend methods
* Bio::Sequence::NA class is rewrited (molecular_weight, to_re methods)
to use Bio::NucleicAcid
* molecular_weight method is fixed to subtract two hydrogens for each base
Index: aa.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/data/aa.rb,v
retrieving revision 0.7
retrieving revision 0.8
diff -C2 -d -r0.7 -r0.8
*** aa.rb 14 May 2004 03:03:40 -0000 0.7
--- aa.rb 7 Aug 2005 08:19:28 -0000 0.8
***************
*** 2,6 ****
# bio/data/aa.rb - Amino Acids
#
! # Copyright (C) 2001 KATAYAMA Toshiaki <k at bioruby.org>
#
# This library is free software; you can redistribute it and/or
--- 2,6 ----
# bio/data/aa.rb - Amino Acids
#
! # Copyright (C) 2001, 2005 KATAYAMA Toshiaki <k at bioruby.org>
#
# This library is free software; you can redistribute it and/or
***************
*** 25,81 ****
class AminoAcid
! # IUPAC code
! # * http://www.iupac.org/
! # * http://www.chem.qmw.ac.uk/iubmb/newsletter/1999/item3.html
!
! Names= {
! 'A' => 'Ala',
! 'C' => 'Cys',
! 'D' => 'Asp',
! 'E' => 'Glu',
! 'F' => 'Phe',
! 'G' => 'Gly',
! 'H' => 'His',
! 'I' => 'Ile',
! 'K' => 'Lys',
! 'L' => 'Leu',
! 'M' => 'Met',
! 'N' => 'Asn',
! 'P' => 'Pro',
! 'Q' => 'Gln',
! 'R' => 'Arg',
! 'S' => 'Ser',
! 'T' => 'Thr',
! 'V' => 'Val',
! 'W' => 'Trp',
! 'Y' => 'Tyr',
! 'U' => 'Sec',
! 'Ala' => 'alanine',
! 'Cys' => 'cysteine',
! 'Asp' => 'aspartic acid',
! 'Glu' => 'glutamic acid',
! 'Phe' => 'phenylalanine',
! 'Gly' => 'glycine',
! 'His' => 'histidine',
! 'Ile' => 'isoleucine',
! 'Lys' => 'lysine',
! 'Leu' => 'leucine',
! 'Met' => 'methionine',
! 'Asn' => 'asparagine',
! 'Pro' => 'proline',
! 'Gln' => 'glutamine',
! 'Arg' => 'arginine',
! 'Ser' => 'serine',
! 'Thr' => 'threonine',
! 'Val' => 'valine',
! 'Trp' => 'tryptophan',
! 'Tyr' => 'tyrosine',
! 'Sec' => 'selenocysteine',
! }
! Weight = {
# AAindex FASG760101 - Molecular weight (Fasman, 1976)
--- 25,87 ----
class AminoAcid
! module Data
! # IUPAC code
! # * http://www.iupac.org/
! # * http://www.chem.qmw.ac.uk/iubmb/newsletter/1999/item3.html
! Names= {
! 'A' => 'Ala',
! 'C' => 'Cys',
! 'D' => 'Asp',
! 'E' => 'Glu',
! 'F' => 'Phe',
! 'G' => 'Gly',
! 'H' => 'His',
! 'I' => 'Ile',
! 'K' => 'Lys',
! 'L' => 'Leu',
! 'M' => 'Met',
! 'N' => 'Asn',
! 'P' => 'Pro',
! 'Q' => 'Gln',
! 'R' => 'Arg',
! 'S' => 'Ser',
! 'T' => 'Thr',
! 'V' => 'Val',
! 'W' => 'Trp',
! 'Y' => 'Tyr',
! 'B' => 'Asx', # D/N
! 'Z' => 'Glx', # E/Q
! 'U' => 'Sec', # 'uga' (stop)
! '?' => 'Pyl', # 'uag' (stop)
!
! 'Ala' => 'alanine',
! 'Cys' => 'cysteine',
! 'Asp' => 'aspartic acid',
! 'Glu' => 'glutamic acid',
! 'Phe' => 'phenylalanine',
! 'Gly' => 'glycine',
! 'His' => 'histidine',
! 'Ile' => 'isoleucine',
! 'Lys' => 'lysine',
! 'Leu' => 'leucine',
! 'Met' => 'methionine',
! 'Asn' => 'asparagine',
! 'Pro' => 'proline',
! 'Gln' => 'glutamine',
! 'Arg' => 'arginine',
! 'Ser' => 'serine',
! 'Thr' => 'threonine',
! 'Val' => 'valine',
! 'Trp' => 'tryptophan',
! 'Tyr' => 'tyrosine',
! 'Asx' => 'asparagine/aspartic acid',
! 'Glx' => 'glutamine/glutamic acid',
! 'Sec' => 'selenocysteine',
! 'Pyl' => 'pyrrolysine',
! }
# AAindex FASG760101 - Molecular weight (Fasman, 1976)
***************
*** 84,109 ****
# Proteins - Volume 1, CRC Press, Cleveland (1976)
! 'A' => 89.09,
! 'C' => 121.15,
! 'D' => 133.10,
! 'E' => 147.13,
! 'F' => 165.19,
! 'G' => 75.07,
! 'H' => 155.16,
! 'I' => 131.17,
! 'K' => 146.19,
! 'L' => 131.17,
! 'M' => 149.21,
! 'N' => 132.12,
! 'P' => 115.13,
! 'Q' => 146.15,
! 'R' => 174.20,
! 'S' => 105.09,
! 'T' => 119.12,
! 'U' => 168.06,
! 'V' => 117.15,
! 'W' => 204.23,
! 'Y' => 181.19,
! }
def aa
--- 90,229 ----
# Proteins - Volume 1, CRC Press, Cleveland (1976)
! Weight = {
!
! 'A' => 89.09,
! 'C' => 121.15, # 121.16 according to the Wikipedia
! 'D' => 133.10,
! 'E' => 147.13,
! 'F' => 165.19,
! 'G' => 75.07,
! 'H' => 155.16,
! 'I' => 131.17,
! 'K' => 146.19,
! 'L' => 131.17,
! 'M' => 149.21,
! 'N' => 132.12,
! 'P' => 115.13,
! 'Q' => 146.15,
! 'R' => 174.20,
! 'S' => 105.09,
! 'T' => 119.12,
! 'U' => 168.06,
! 'V' => 117.15,
! 'W' => 204.23,
! 'Y' => 181.19,
! }
!
! def weight(x = nil)
! if x
! Weight[x]
! else
! Weight
! end
! end
!
! def [](x)
! Names[x]
! end
!
! def name(x)
! str = Names[x]
! if str and str.length == 3
! Names[str]
! else
! str
! end
! end
!
! def to_1(x)
! case x.to_s.length
! when 1
! x
! when 3
! three2one(x)
! else
! name2one(x)
! end
! end
!
! def to_3(x)
! case x.to_s.length
! when 1
! one2three(x)
! when 3
! x
! else
! name2three(x)
! end
! end
!
! def one2three(x)
! if x and x.length != 1
! raise ArgumentError
! else
! Names[x]
! end
! end
!
! def three2one(x)
! if x and x.length != 3
! raise ArgumentError
! else
! reverse[x]
! end
! end
!
! def one2name(x)
! if x and x.length != 1
! raise ArgumentError
! else
! Names[x]
! end
! end
!
! def name2one(x)
! str = reverse[x.to_s.downcase]
! if str and str.length == 3
! reverse[str]
! else
! str
! end
! end
!
! def three2name(x)
! if x and x.length != 3
! raise ArgumentError
! else
! Names[x]
! end
! end
!
! def name2three(x)
! reverse[x.downcase]
! end
!
! private
!
! def reverse
! hash = Hash.new
! Names.each do |k, v|
! hash[v] = k
! end
! hash
! end
!
! end
!
!
! # as instance methods
! include Data
!
! # as class methods
! extend Data
!
!
! # backward compatibility
! Names = Data::Names
! Weight = Data::Weight
def aa
***************
*** 115,123 ****
end
! def self.weight
! Weight
end
end
end
--- 235,324 ----
end
! private
!
! alias :orig_reverse :reverse
! def reverse
! unless @reverse
! @reverse = orig_reverse
! end
! @reverse
end
end
+
+ end
+
+
+ if __FILE__ == $0
+
+ puts "### aa = Bio::AminoAcid.new"
+ aa = Bio::AminoAcid.new
+
+ puts "# Bio::AminoAcid['A']"
+ p Bio::AminoAcid['A']
+ puts "# aa['A']"
+ p aa['A']
+
+ puts "# Bio::AminoAcid.name('A')"
+ p Bio::AminoAcid.name('A')
+ puts "# aa.name('A')"
+ p aa.name('A')
+
+ puts "# Bio::AminoAcid.to_1('alanine')"
+ p Bio::AminoAcid.to_1('alanine')
+ puts "# aa.to_1('alanine')"
+ p aa.to_1('alanine')
+ puts "# Bio::AminoAcid.to_1('Ala')"
+ p Bio::AminoAcid.to_1('Ala')
+ puts "# aa.to_1('Ala')"
+ p aa.to_1('Ala')
+ puts "# Bio::AminoAcid.to_1('A')"
+ p Bio::AminoAcid.to_1('A')
+ puts "# aa.to_1('A')"
+ p aa.to_1('A')
+
+ puts "# Bio::AminoAcid.to_3('alanine')"
+ p Bio::AminoAcid.to_3('alanine')
+ puts "# aa.to_3('alanine')"
+ p aa.to_3('alanine')
+ puts "# Bio::AminoAcid.to_3('Ala')"
+ p Bio::AminoAcid.to_3('Ala')
+ puts "# aa.to_3('Ala')"
+ p aa.to_3('Ala')
+ puts "# Bio::AminoAcid.to_3('A')"
+ p Bio::AminoAcid.to_3('A')
+ puts "# aa.to_3('A')"
+ p aa.to_3('A')
+
+
+ puts "# Bio::AminoAcid.one2three('A')"
+ p Bio::AminoAcid.one2three('A')
+ puts "# aa.one2three('A')"
+ p aa.one2three('A')
+
+ puts "# Bio::AminoAcid.three2one('Ala')"
+ p Bio::AminoAcid.three2one('Ala')
+ puts "# aa.three2one('Ala')"
+ p aa.three2one('Ala')
+
+ puts "# Bio::AminoAcid.one2name('A')"
+ p Bio::AminoAcid.one2name('A')
+ puts "# aa.one2name('A')"
+ p aa.one2name('A')
+
+ puts "# Bio::AminoAcid.name2one('alanine')"
+ p Bio::AminoAcid.name2one('alanine')
+ puts "# aa.name2one('alanine')"
+ p aa.name2one('alanine')
+
+ puts "# Bio::AminoAcid.three2name('Ala')"
+ p Bio::AminoAcid.three2name('Ala')
+ puts "# aa.three2name('Ala')"
+ p aa.three2name('Ala')
+
+ puts "# Bio::AminoAcid.name2three('alanine')"
+ p Bio::AminoAcid.name2three('alanine')
+ puts "# aa.name2three('alanine')"
+ p aa.name2three('alanine')
end
Index: na.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/data/na.rb,v
retrieving revision 0.8
retrieving revision 0.9
diff -C2 -d -r0.8 -r0.9
*** na.rb 22 Nov 2002 23:10:12 -0000 0.8
--- na.rb 7 Aug 2005 08:19:28 -0000 0.9
***************
*** 25,78 ****
class NucleicAcid
! # IUPAC code
! # * Faisst and Meyer (Nucleic Acids Res. 20:3-26, 1992)
! # * http://www.ncbi.nlm.nih.gov/collab/FT/
! Names = {
! 'y' => '[tc]', # pYrimidine
! 'r' => '[ag]', # puRine
! 'w' => '[at]', # Weak
! 's' => '[gc]', # Strong
! 'k' => '[tg]', # Keto
! 'm' => '[ac]', # aMino
! 'b' => '[tgc]', # not A
! 'd' => '[atg]', # not C
! 'h' => '[atc]', # not G
! 'v' => '[agc]', # not T
! 'n' => '[atgc]',
! 'a' => 'a',
! 't' => 't',
! 'g' => 'g',
! 'c' => 'c',
! 'u' => 'u',
! 'A' => 'adenine',
! 'T' => 'thymine',
! 'G' => 'guanine',
! 'C' => 'cytosine',
! 'U' => 'uracil',
! }
! Weight = {
! # Calculated by BioPerl's Bio::Tools::SeqStats.pm :-)
! :adenine => 135.15,
! :thymine => 126.13,
! :guanine => 151.15,
! :cytosine => 111.12,
! :uracil => 112.10,
! :deoxyribose_phosphate => 196.11,
! :ribose_phosphate => 212.11,
! :water => 18.015,
! }
def na
--- 25,146 ----
class NucleicAcid
! module Data
! # IUPAC code
! # * Faisst and Meyer (Nucleic Acids Res. 20:3-26, 1992)
! # * http://www.ncbi.nlm.nih.gov/collab/FT/
! Names = {
! 'y' => '[tc]', # pYrimidine
! 'r' => '[ag]', # puRine
! 'w' => '[at]', # Weak
! 's' => '[gc]', # Strong
! 'k' => '[tg]', # Keto
! 'm' => '[ac]', # aMino
! 'b' => '[tgc]', # not A
! 'd' => '[atg]', # not C
! 'h' => '[atc]', # not G
! 'v' => '[agc]', # not T
! 'n' => '[atgc]',
! 'a' => 'a',
! 't' => 't',
! 'g' => 'g',
! 'c' => 'c',
! 'u' => 'u',
! 'A' => 'adenine',
! 'T' => 'thymine',
! 'G' => 'guanine',
! 'C' => 'cytosine',
! 'U' => 'uracil',
! }
! Weight = {
! # Calculated by BioPerl's Bio::Tools::SeqStats.pm :-)
! 'a' => 135.15,
! 't' => 126.13,
! 'g' => 151.15,
! 'c' => 111.12,
! 'u' => 112.10,
! :adenine => 135.15,
! :thymine => 126.13,
! :guanine => 151.15,
! :cytosine => 111.12,
! :uracil => 112.10,
! :deoxyribose_phosphate => 196.11,
! :ribose_phosphate => 212.11,
!
! :hydrogen => 1.00,
! :water => 18.015,
!
! }
!
! def weight(x = nil, rna = nil)
! if x
! if x.length > 1
! if rna
! phosphate = Weight[:ribose_phosphate]
! else
! phosphate = Weight[:deoxyribose_phosphate]
! end
! hydrogen = Weight[:hydrogen]
! water = Weight[:water]
!
! total = 0.0
! x.each_byte do |byte|
! base = byte.chr.downcase
! total += Weight[base] + phosphate - hydrogen * 2
! end
! total -= water * (x.length - 1)
! else
! Weight[x.to_s.downcase]
! end
! else
! Weight
! end
! end
!
! def [](x)
! Names[x]
! end
!
! def name(x)
! Names[x.to_s.upcase]
! end
!
! def to_re(seq)
! str = ""
! seq.to_s.downcase.each_byte do |base|
! if re = Names[base.chr]
! str += re
! else
! str += "."
! end
! end
! Regexp.new(str)
! end
!
! end
!
!
! # as instance methods
! include Data
!
! # as class methods
! extend Data
!
!
! # backward compatibility
! Names = Data::Names
! Weight = Data::Weight
def na
***************
*** 84,93 ****
end
- def self.weight
- Weight
- end
-
end
end
--- 152,182 ----
end
end
end
+
+ if __FILE__ == $0
+
+ puts "### na = Bio::NucleicAcid.new"
+ na = Bio::NucleicAcid.new
+
+ puts "# na.to_re('yrwskmbdhvnatgc')"
+ p na.to_re('yrwskmbdhvnatgc')
+
+ puts "# Bio::NucleicAcid.to_re('yrwskmbdhvnatgc')"
+ p Bio::NucleicAcid.to_re('yrwskmbdhvnatgc')
+
+ puts "# na.weight('A')"
+ p na.weight('A')
+
+ puts "# Bio::NucleicAcid.weight('A')"
+ p Bio::NucleicAcid.weight('A')
+
+ puts "# na.weight('atgc')"
+ p na.weight('atgc')
+
+ puts "# Bio::NucleicAcid.weight('atgc')"
+ p Bio::NucleicAcid.weight('atgc')
+
+ end
More information about the bioruby-cvs
mailing list