[BioRuby-cvs] bioruby/lib/bio/sequence aa.rb, 1.2, 1.3 common.rb, 1.2, 1.3 compat.rb, 1.2, 1.3 format.rb, 1.2, 1.3 generic.rb, 1.3, 1.4 na.rb, 1.2, 1.3

Katayama Toshiaki k at dev.open-bio.org
Sun Mar 26 02:28:01 UTC 2006


Update of /home/repository/bioruby/bioruby/lib/bio/sequence
In directory dev.open-bio.org:/tmp/cvs-serv28853/sequence

Modified Files:
	aa.rb common.rb compat.rb format.rb generic.rb na.rb 
Log Message:
* comprehensive documentations contributed by Ryan Raaum and Jan Aerts are added.
* bug fixes in sequence.rb contributed by Ryan Raaum
  * Added 'U' and 'u' to the bases counted towards the nucleic acid total in Bio::Sequence#guess.  (Without this, RNA sequences were "guessed" to be Amino Acid sequences).
  * Changed the arguments for method_missing in Bio::Sequence from (*arg) to (sym, *args, &block).  With this argument set, blocks will be properly passed through to the encapsulated object.


Index: compat.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/compat.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** compat.rb	6 Feb 2006 14:18:03 -0000	1.2
--- compat.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 18,21 ****
--- 19,33 ----
    autoload :AA,     'bio/sequence/aa'
  
+   # Return sequence as 
+   # String[http://corelib.rubyonrails.org/classes/String.html].
+   # The original sequence is unchanged.
+   #
+   #   seq = Bio::Sequence.new('atgc')
+   #   puts s.to_s                             #=> 'atgc'
+   #   puts s.to_s.class                       #=> String
+   #   puts s                                  #=> 'atgc'
+   #   puts s.class                            #=> Bio::Sequence
+   # ---
+   # *Returns*:: String object
    def to_s
      String.new(@seq)
***************
*** 26,32 ****
--- 38,51 ----
  module Common
  
+   # *DEPRECIATED* Do not use! Use Bio::Sequence#output instead. 
+   # 
    # Output the FASTA format string of the sequence.  The 1st argument is
    # used as the comment string.  If the 2nd option is given, the output
    # sequence will be folded.
+   # ---
+   # *Arguments*:
+   # * (optional) _header_: String object
+   # * (optional) _width_: Fixnum object (default nil)
+   # *Returns*:: String
    def to_fasta(header = '', width = nil)
      warn "Bio::Sequence#to_fasta is obsolete. Use Bio::Sequence#output(:fasta) instead" if $DEBUG
***************
*** 44,52 ****
  class NA
  
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   def pikachu
      self.dna.tr("atgc", "pika") # joke, of course :-)
    end
--- 63,89 ----
  class NA
  
+   # Generate a new random sequence with the given frequency of bases.
+   # The sequence length is determined by their cumulative sum.
+   # (See also Bio::Sequence::Common#randomize which creates a new
+   # randomized sequence object using the base composition of an existing 
+   # sequence instance).
+   #
+   #   counts = {'a'=>1,'c'=>2,'g'=>3,'t'=>4}
+   #   puts Bio::Sequence::NA.randomize(counts)  #=> "ggcttgttac" (for example)
+   #
+   # You may also feed the output of randomize into a block
+   #
+   #   actual_counts = {'a'=>0, 'c'=>0, 'g'=>0, 't'=>0}
+   #   Bio::Sequence::NA.randomize(counts) {|x| actual_counts[x] += 1}
+   #   actual_counts                     #=> {"a"=>1, "c"=>2, "g"=>3, "t"=>4}
+   # ---
+   # *Arguments*:
+   # * (optional) _hash_: Hash object
+   # *Returns*:: Bio::Sequence::NA object
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   def pikachu #:nodoc:
      self.dna.tr("atgc", "pika") # joke, of course :-)
    end
***************
*** 57,60 ****
--- 94,115 ----
  class AA
  
+   # Generate a new random sequence with the given frequency of bases.
+   # The sequence length is determined by their cumulative sum.
+   # (See also Bio::Sequence::Common#randomize which creates a new
+   # randomized sequence object using the base composition of an existing 
+   # sequence instance).
+   #
+   #   counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
+   #   puts Bio::Sequence::AA.randomize(counts)  #=> "AAEAELALRE" (for example)
+   #
+   # You may also feed the output of randomize into a block
+   #
+   #   actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
+   #   Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
+   #   actual_counts                     #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
+   # ---
+   # *Arguments*:
+   # * (optional) _hash_: Hash object
+   # *Returns*:: Bio::Sequence::AA object
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)

Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/common.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** common.rb	6 Feb 2006 14:16:17 -0000	1.2
--- common.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 15,22 ****
  class Sequence
  
! # This module provides common methods for biological sequence classes
! # which must inherit String.
  module Common
  
    def to_s
      String.new(self)
--- 16,53 ----
  class Sequence
  
! # = DESCRIPTION
! # Bio::Sequence::Common is a 
! # Mixin[http://www.rubycentral.com/book/tut_modules.html]
! # implementing methods common to
! # Bio::Sequence::AA and Bio::Sequence::NA.  All of these methods
! # are available to either Amino Acid or Nucleic Acid sequences, and
! # by encapsulation are also available to Bio::Sequence objects.
! #
! # = USAGE
! #
! #   # Create a sequence
! #   dna = Bio::Sequence.auto('atgcatgcatgc')
! #
! #   # Splice out a subsequence using a Genbank-style location string
! #   puts dna.splice('complement(1..4)')
! #
! #   # What is the base composition?
! #   puts dna.composition
! #
! #   # Create a random sequence with the composition of a current sequence
! #   puts dna.randomize
  module Common
  
+   # Return sequence as 
+   # String[http://corelib.rubyonrails.org/classes/String.html].
+   # The original sequence is unchanged.
+   #
+   #   seq = Bio::Sequence::NA.new('atgc')
+   #   puts s.to_s                             #=> 'atgc'
+   #   puts s.to_s.class                       #=> String
+   #   puts s                                  #=> 'atgc'
+   #   puts s.class                            #=> Bio::Sequence::NA
+   # ---
+   # *Returns*:: String object
    def to_s
      String.new(self)
***************
*** 24,34 ****
    alias to_str to_s
  
!   # Force self to re-initialize for clean up (remove white spaces,
!   # case unification).
    def seq
      self.class.new(self)
    end
  
!   # Similar to the 'seq' method, but changes the self object destructively.
    def normalize!
      initialize(self)
--- 55,79 ----
    alias to_str to_s
  
!   # Create a new sequence based on the current sequence.
!   # The original sequence is unchanged.
!   # 
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   s2 = s.seq
!   #   puts s2                                 #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def seq
      self.class.new(self)
    end
  
!   # Normalize the current sequence, removing all whitespace and 
!   # transforming all positions to uppercase if the sequence is AA or
!   # transforming all positions to lowercase if the sequence is NA.
!   # The original sequence is modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   s.normalize!
!   # ---
!   # *Returns*:: current Bio::Sequence::NA/AA object (modified)
    def normalize!
      initialize(self)
***************
*** 37,40 ****
--- 82,95 ----
    alias seq! normalize!
  
+   # Add new data to the end of the current sequence.
+   # The original sequence is modified.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   s << 'atgc'
+   #   puts s                                  #=> "atgcatgc"
+   #   s << s
+   #   puts s                                  #=> "atgcatgcatgcatgc"
+   # ---
+   # *Returns*:: current Bio::Sequence::NA/AA object (modified)
    def <<(*arg)
      super(self.class.new(*arg))
***************
*** 42,50 ****
    alias concat <<
  
    def +(*arg)
      self.class.new(super(*arg))
    end
  
!   # Returns the subsequence of the self string.
    def subseq(s = 1, e = self.length)
      raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
--- 97,141 ----
    alias concat <<
  
+   # Create a new sequence by adding to an existing sequence.
+   # The existing sequence is not modified.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   s2 = s + 'atgc'
+   #   puts s2                                 #=> "atgcatgc"
+   #   puts s                                  #=> "atgc"
+   #
+   # The new sequence is of the same class as the existing sequence if 
+   # the new data was added to an existing sequence,
+   #
+   #   puts s2.class == s.class                #=> true
+   #
+   # but if an existing sequence is added to a String, the result is a String
+   #
+   #   s3 = 'atgc' + s
+   #   puts s3.class                           #=> String
+   # ---
+   # *Returns*:: new Bio::Sequence::NA/AA *or* String object
    def +(*arg)
      self.class.new(super(*arg))
    end
  
!   # Returns a new sequence containing the subsequence identified by the 
!   # start and end numbers given as parameters.  *Important:* Biological 
!   # sequence numbering conventions (one-based) rather than ruby's 
!   # (zero-based) numbering conventions are used.  
!   #
!   #   s = Bio::Sequence::NA.new('atggaatga')
!   #   puts s.subseq(1,3)                      #=> "atg"
!   #
!   # Start defaults to 1 and end defaults to the entire existing string, so
!   # subseq called without any parameters simply returns a new sequence 
!   # identical to the existing sequence.
!   #
!   #   puts s.subseq                           #=> "atggaatga"
!   # ---
!   # *Arguments*:
!   # * (optional) _s_(start): Integer (default 1)
!   # * (optional) _e_(end): Integer (default current sequence length)
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def subseq(s = 1, e = self.length)
      raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
***************
*** 54,80 ****
    end
  
!   # This method iterates on sub string with specified length 'window_size'.
!   # By specifing 'step_size', codon sized shifting or spliting genome
!   # sequence with ovelapping each end can easily be yielded.
    #
!   # The remainder sequence at the terminal end will be returned.
    #
!   # Example:
!   #   # prints average GC% on each 100bp
!   #   seq.window_search(100) do |subseq|
    #     puts subseq.gc
    #   end
!   #   # prints every translated peptide (length 5aa) in the same frame
!   #   seq.window_search(15, 3) do |subseq|
    #     puts subseq.translate
    #   end
!   #   # split genome sequence by 10000bp with 1000bp overlap in fasta format
    #   i = 1
!   #   remainder = seq.window_search(10000, 9000) do |subseq|
    #     puts subseq.to_fasta("segment #{i}", 60)
    #     i += 1
    #   end
    #   puts remainder.to_fasta("segment #{i}", 60)
!   #
    def window_search(window_size, step_size = 1)
      i = 0
--- 145,177 ----
    end
  
!   # This method steps through a sequences in steps of 'step_size' by 
!   # subsequences of 'window_size'. Typically used with a block.
!   # Any remaining sequence at the terminal end will be returned.
    #
!   # Prints average GC% on each 100bp
    #
!   #   s.window_search(100) do |subseq|
    #     puts subseq.gc
    #   end
!   #   
!   # Prints every translated peptide (length 5aa) in the same frame
!   #
!   #   s.window_search(15, 3) do |subseq|
    #     puts subseq.translate
    #   end
!   #
!   # Split genome sequence by 10000bp with 1000bp overlap in fasta format
!   #
    #   i = 1
!   #   remainder = s.window_search(10000, 9000) do |subseq|
    #     puts subseq.to_fasta("segment #{i}", 60)
    #     i += 1
    #   end
    #   puts remainder.to_fasta("segment #{i}", 60)
!   # ---
!   # *Arguments*:
!   # * (required) _window_size_: Fixnum
!   # * (optional) _step_size_: Fixnum (default 1)
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def window_search(window_size, step_size = 1)
      i = 0
***************
*** 85,91 ****
    end
  
!   # This method receive a hash of residues/bases to the particular values,
!   # and sum up the value along with the self sequence.  Especially useful
!   # to use with the window_search method and amino acid indices etc.
    def total(hash)
      hash.default = 0.0 unless hash.default
--- 182,195 ----
    end
  
!   # Returns a float total value for the sequence given a hash of
!   # base or residue values,
!   #
!   #   values = {'a' => 0.1, 't' => 0.2, 'g' => 0.3, 'c' => 0.4}
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.total(values)                    #=> 1.0
!   # ---
!   # *Arguments*:
!   # * (required) _hash_: Hash object
!   # *Returns*:: Float object
    def total(hash)
      hash.default = 0.0 unless hash.default
***************
*** 100,103 ****
--- 204,212 ----
  
    # Returns a hash of the occurrence counts for each residue or base.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   puts s.composition              #=> {"a"=>1, "c"=>1, "g"=>1, "t"=>1}
+   # ---
+   # *Returns*:: Hash object
    def composition
      count = Hash.new(0)
***************
*** 108,118 ****
    end
  
!   # Returns a randomized sequence keeping its composition by default.
!   # The argument is required when generating a random sequence from the empty
!   # sequence (used by the class methods NA.randomize, AA.randomize).
!   # If the block is given, yields for each random residue/base.
    def randomize(hash = nil)
      length = self.length
      if hash
        count = hash.clone
        count.each_value {|x| length += x}
--- 217,244 ----
    end
  
!   # Returns a randomized sequence. The default is to retain the same 
!   # base/residue composition as the original.  If a hash of base/residue 
!   # counts is given, the new sequence will be based on that hash 
!   # composition.  If a block is given, each new randomly selected 
!   # position will be passed into the block.  In all cases, the
!   # original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.randomize                        #=> "tcag"  (for example)
!   #
!   #   new_composition = {'a' => 2, 't' => 2}
!   #   puts s.randomize(new_composition)       #=> "ttaa"  (for example)
!   #
!   #   count = 0
!   #   s.randomize { |x| count += 1 }
!   #   puts count                              #=> 4
!   # ---
!   # *Arguments*:
!   # * (optional) _hash_: Hash object
!   # *Returns*:: new Bio::Sequence::NA/AA object
    def randomize(hash = nil)
      length = self.length
      if hash
+       length = 0
        count = hash.clone
        count.each_value {|x| length += x}
***************
*** 139,151 ****
    end
  
!   # Generate a new random sequence with the given frequency of bases
!   # or residues.  The sequence length is determined by the sum of each
!   # base/residue occurences.
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   # Receive a GenBank style position string and convert it to the Locations
!   # objects to splice the sequence itself.  See also: bio/location.rb
    def splice(position)
      unless position.is_a?(Locations) then
--- 265,305 ----
    end
  
!   # Generate a new random sequence with the given frequency of bases.
!   # The sequence length is determined by their cumulative sum.
!   # (See also Bio::Sequence::Common#randomize which creates a new
!   # randomized sequence object using the base composition of an existing 
!   # sequence instance).
!   #
!   #   counts = {'R'=>1,'L'=>2,'E'=>3,'A'=>4}
!   #   puts Bio::Sequence::AA.randomize(counts)  #=> "AAEAELALRE" (for example)
!   #
!   # You may also feed the output of randomize into a block
!   #
!   #   actual_counts = {'R'=>0,'L'=>0,'E'=>0,'A'=>0}
!   #   Bio::Sequence::AA.randomize(counts) {|x| actual_counts[x] += 1}
!   #   actual_counts                     #=> {"A"=>4, "L"=>2, "E"=>3, "R"=>1}
!   # ---
!   # *Arguments*:
!   # * (optional) _hash_: Hash object
!   # *Returns*:: Bio::Sequence::NA/AA object
    def self.randomize(*arg, &block)
      self.new('').randomize(*arg, &block)
    end
  
!   # Return a new sequence extracted from the original using a GenBank style 
!   # position string.  See also documentation for the Bio::Location class.
!   #
!   #   s = Bio::Sequence::NA.new('atgcatgcatgcatgc')
!   #   puts s.splice('1..3')                           #=> "atg"
!   #   puts s.splice('join(1..3,8..10)')               #=> "atgcat"
!   #   puts s.splice('complement(1..3)')               #=> "cat"
!   #   puts s.splice('complement(join(1..3,8..10))')   #=> "atgcat"
!   #
!   # Note that 'complement'ed Genbank position strings will have no 
!   # effect on Bio::Sequence::AA objects.
!   # ---
!   # *Arguments*:
!   # * (required) _position_: String *or* Bio::Location object
!   # *Returns*:: Bio::Sequence::NA/AA object
    def splice(position)
      unless position.is_a?(Locations) then

Index: format.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/format.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** format.rb	6 Feb 2006 14:20:35 -0000	1.2
--- format.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 4,8 ****
  # Copyright::   Copyright (C) 2006
  #               Toshiaki Katayama <k at bioruby.org>,
! #               Naohisa Goto <ng at bioruby.org>
  # License::     Ruby's
  #
--- 4,9 ----
  # Copyright::   Copyright (C) 2006
  #               Toshiaki Katayama <k at bioruby.org>,
! #               Naohisa Goto <ng at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 21,29 ****
  class Sequence
  
  module Format
  
!   # Output the FASTA format string of the sequence.  The 1st argument is
!   # used in the comment line.  If the 2nd argument (integer) is given,
!   # the output sequence will be folded.
    def format_fasta(header = nil, width = nil)
      header ||= "#{@entry_id} #{@definition}"
--- 22,56 ----
  class Sequence
  
+ # = DESCRIPTION
+ # A Mixin[http://www.rubycentral.com/book/tut_modules.html]
+ # of methods used by Bio::Sequence#output to output sequences in 
+ # common bioinformatic formats.  These are not called in isolation.
+ #
+ # = USAGE
+ #   # Given a Bio::Sequence object,
+ #   puts s.output(:fasta)
+ #   puts s.output(:genbank)
+ #   puts s.output(:embl)
  module Format
  
!   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
!   # case, it would be difficult to successfully call this method outside
!   # its expected context).
!   #
!   # Output the FASTA format string of the sequence.  
!   #
!   # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of 
!   # using either the header or width arguments.  So something needs to be
!   # changed...
!   #
!   # Currently, this method is used in Bio::Sequence#output like so,
!   #
!   #   s = Bio::Sequence.new('atgc')
!   #   puts s.output(:fasta)                   #=> "> \natgc\n"
!   # ---
!   # *Arguments*:
!   # * (optional) _header_: String (default nil)
!   # * (optional) _width_: Fixnum (default nil)
!   # *Returns*:: String object
    def format_fasta(header = nil, width = nil)
      header ||= "#{@entry_id} #{@definition}"
***************
*** 37,44 ****
    end
  
!   def format_gff
      raise NotImplementedError
    end
  
    def format_genbank
      prefix = ' ' * 5
--- 64,83 ----
    end
  
!   # Not yet implemented :)
!   # Remove the nodoc command after implementation!
!   # ---
!   # *Returns*:: String object
!   def format_gff #:nodoc:
      raise NotImplementedError
    end
  
+   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+   # case, it would be difficult to successfully call this method outside
+   # its expected context).
+   #
+   # Output the Genbank format string of the sequence.  
+   # Used in Bio::Sequence#output.
+   # ---
+   # *Returns*:: String object
    def format_genbank
      prefix = ' ' * 5
***************
*** 49,52 ****
--- 88,99 ----
    end
  
+   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
+   # case, it would be difficult to successfully call this method outside
+   # its expected context).
+   #
+   # Output the EMBL format string of the sequence.  
+   # Used in Bio::Sequence#output.
+   # ---
+   # *Returns*:: String object
    def format_embl
      prefix = 'FT   '

Index: aa.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/aa.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** aa.rb	6 Feb 2006 14:11:31 -0000	1.2
--- aa.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 17,27 ****
  class Sequence
  
! 
! # Amino Acid sequence
  class AA < String
  
    include Bio::Sequence::Common
  
!   # Generate a amino acid sequence object from a string.
    def initialize(str)
      super
--- 18,61 ----
  class Sequence
  
! # = DESCRIPTION
! # Bio::Sequence::AA represents a bare Amino Acid sequence in bioruby.
! #
! # = USAGE
! #   # Create an Amino Acid sequence.
! #   aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
! #
! #   # What are the three-letter codes for all the residues?
! #   puts aa.codes
! #
! #   # What are the names of all the residues?
! #   puts aa.names
! #
! #   # What is the molecular weight of this peptide?
! #   puts aa.molecular_weight
  class AA < String
  
    include Bio::Sequence::Common
  
!   # Generate an amino acid sequence object from a string.
!   #
!   #   s = Bio::Sequence::AA.new("RRLEHTFVFLRNFSLMLLRY")
!   #
!   # or maybe (if you have an amino acid sequence in a file)
!   #
!   #   s = Bio::Sequence:AA.new(File.open('aa.txt').read)
!   #
!   # Amino Acid sequences are *always* all uppercase in bioruby
!   #
!   #   s = Bio::Sequence::AA.new("rrLeHtfV")
!   #   puts s                                  #=> "RRLEHTFVF"
!   #
!   # Whitespace is stripped from the sequence
!   #
!   #   s = Bio::Sequence::AA.new("RRL\nELA\tRG\r  RL")
!   #   puts s                                  #=> "RRLELARGRL"
!   # ---
!   # *Arguments*:
!   # * (required) _str_: String
!   # *Returns*:: Bio::Sequence::AA object
    def initialize(str)
      super
***************
*** 31,45 ****
  
  
!   # Estimate the weight of this protein.
    def molecular_weight
      Bio::AminoAcid.weight(self)
    end
  
    def to_re
      Bio::AminoAcid.to_re(self)
    end
  
!   # Generate the list of the names of the each residue along with the
!   # sequence (3 letters code).
    def codes
      array = []
--- 65,98 ----
  
  
!   # Estimate molecular weight based on 
!   # Fasman1976[http://www.genome.ad.jp/dbget-bin/www_bget?aaindex+FASG760101]
!   #
!   #   s = Bio::Sequence::AA.new("RRLE")
!   #   puts s.molecular_weight             #=> 572.655
!   # ---
!   # *Returns*:: Float object
    def molecular_weight
      Bio::AminoAcid.weight(self)
    end
  
+   # Create a ruby regular expression instance 
+   # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]  
+   #
+   #   s = Bio::Sequence::AA.new("RRLE")
+   #   puts s.to_re                        #=> /RRLE/
+   # ---
+   # *Returns*:: Regexp object
    def to_re
      Bio::AminoAcid.to_re(self)
    end
  
!   # Generate the list of the names of each residue along with the
!   # sequence (3 letters code).  Codes used in bioruby are found in the
!   # Bio::AminoAcid::NAMES hash.
!   #
!   #   s = Bio::Sequence::AA.new("RRLE")
!   #   puts s.codes                        #=> ["Arg", "Arg", "Leu", "Glu"]
!   # ---
!   # *Returns*:: Array object
    def codes
      array = []
***************
*** 50,54 ****
    end
  
!   # Similar to codes but returns long names.
    def names
      self.codes.map do |x|
--- 103,115 ----
    end
  
!   # Generate the list of the names of each residue along with the
!   # sequence (full name).  Names used in bioruby are found in the
!   # Bio::AminoAcid::NAMES hash.
!   #
!   #   s = Bio::Sequence::AA.new("RRLE")
!   #   puts s.names  
!   #               #=> ["arginine", "arginine", "leucine", "glutamic acid"]
!   # ---
!   # *Returns*:: Array object
    def names
      self.codes.map do |x|

Index: generic.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/generic.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** generic.rb	6 Feb 2006 14:26:04 -0000	1.3
--- generic.rb	26 Mar 2006 02:27:59 -0000	1.4
***************
*** 14,18 ****
  class Sequence
  
! class Generic < String
  
    include Bio::Sequence::Common
--- 14,18 ----
  class Sequence
  
! class Generic < String #:nodoc:
  
    include Bio::Sequence::Common

Index: na.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/na.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** na.rb	6 Feb 2006 14:13:52 -0000	1.2
--- na.rb	26 Mar 2006 02:27:59 -0000	1.3
***************
*** 3,7 ****
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>
  # License::     Ruby's
  #
--- 3,8 ----
  #
  # Copyright::   Copyright (C) 2006
! #               Toshiaki Katayama <k at bioruby.org>,
! #               Ryan Raaum <ryan at raaum.org>
  # License::     Ruby's
  #
***************
*** 19,28 ****
  
  
! # Nucleic Acid sequence
  class NA < String
  
    include Bio::Sequence::Common
  
!   # Generate a nucleic acid sequence object from a string.
    def initialize(str)
      super
--- 20,78 ----
  
  
! # = DESCRIPTION
! # Bio::Sequence::NA represents a bare Nucleic Acid sequence in bioruby.
! #
! # = USAGE
! #   # Create a Nucleic Acid sequence.
! #   dna = Bio::Sequence.auto('atgcatgcATGCATGCAAAA')
! #   rna = Bio::Sequence.auto('augcaugcaugcaugcaaaa')
! #
! #   # What are the names of all the bases?
! #   puts dna.names
! #   puts rna.names
! #
! #   # What is the GC percentage?
! #   puts dna.gc_percent
! #   puts rna.gc_percent
! #
! #   # What is the molecular weight?
! #   puts dna.molecular_weight
! #   puts rna.molecular_weight
! #
! #   # What is the reverse complement?
! #   puts dna.reverse_complement
! #   puts dna.complement
! #
! #   # Is this sequence DNA or RNA?
! #   puts dna.rna?
! #
! #   # Translate my sequence (see method docs for many options)
! #   puts dna.translate
! #   puts rna.translate
  class NA < String
  
    include Bio::Sequence::Common
  
!   # Generate an nucleic acid sequence object from a string.
!   #
!   #   s = Bio::Sequence::NA.new("aagcttggaccgttgaagt")
!   #
!   # or maybe (if you have an nucleic acid sequence in a file)
!   #
!   #   s = Bio::Sequence:NA.new(File.open('dna.txt').read)
!   #
!   # Nucleic Acid sequences are *always* all lowercase in bioruby
!   #
!   #   s = Bio::Sequence::NA.new("AAGcTtGG")
!   #   puts s                                  #=> "aagcttgg"
!   #
!   # Whitespace is stripped from the sequence
!   #
!   #   seq = Bio::Sequence::NA.new("atg\nggg\ttt\r  gc")
!   #   puts s                                  #=> "atggggttgc"
!   # ---
!   # *Arguments*:
!   # * (required) _str_: String
!   # *Returns*:: Bio::Sequence::NA object
    def initialize(str)
      super
***************
*** 31,36 ****
    end
  
!   # This method depends on Locations class, see bio/location.rb
!   def splicing(position)
      mRNA = super
      if mRNA.rna?
--- 81,86 ----
    end
  
!   # Alias of Bio::Sequence::Common splice method, documented there.
!   def splicing(position) #:nodoc:
      mRNA = super
      if mRNA.rna?
***************
*** 42,46 ****
    end
  
!   # Returns complement sequence without reversing ("atgc" -> "tacg")
    def forward_complement
      s = self.class.new(self)
--- 92,103 ----
    end
  
!   # Returns a new complementary sequence object (without reversing).
!   # The original sequence object is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.forward_complement               #=> 'tacg'
!   #   puts s                                  #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def forward_complement
      s = self.class.new(self)
***************
*** 49,53 ****
    end
  
!   # Convert to complement sequence without reversing ("atgc" -> "tacg")
    def forward_complement!
      if self.rna?
--- 106,117 ----
    end
  
!   # Converts the current sequence into its complement (without reversing).
!   # The original sequence object is modified.
!   #
!   #   seq = Bio::Sequence::NA.new('atgc')
!   #   puts s.forward_complement!              #=> 'tacg'
!   #   puts s                                  #=> 'tacg'
!   # ---
!   # *Returns*:: current Bio::Sequence::NA object (modified)
    def forward_complement!
      if self.rna?
***************
*** 59,63 ****
    end
  
!   # Returns reverse complement sequence ("atgc" -> "gcat")
    def reverse_complement
      s = self.class.new(self)
--- 123,134 ----
    end
  
!   # Returns a new sequence object with the reverse complement 
!   # sequence to the original.  The original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.reverse_complement               #=> 'gcat'
!   #   puts s                                  #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def reverse_complement
      s = self.class.new(self)
***************
*** 66,70 ****
    end
  
!   # Convert to reverse complement sequence ("atgc" -> "gcat")
    def reverse_complement!
      self.reverse!
--- 137,148 ----
    end
  
!   # Converts the original sequence into its reverse complement.  
!   # The original sequence is modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.reverse_complement               #=> 'gcat'
!   #   puts s                                  #=> 'gcat'
!   # ---
!   # *Returns*:: current Bio::Sequence::NA object (modified)
    def reverse_complement!
      self.reverse!
***************
*** 72,87 ****
    end
  
!   # Aliases for short
    alias complement reverse_complement
    alias complement! reverse_complement!
  
  
!   # Translate into the amino acid sequence from the given frame and the
!   # selected codon table.  The table also can be a Bio::CodonTable object.
!   # The 'unknown' character is used for invalid/unknown codon (can be
!   # used for 'nnn' and/or gap translation in practice).
    #
!   # Frame can be 1, 2 or 3 for the forward strand and -1, -2 or -3
!   # (4, 5 or 6 is also accepted) for the reverse strand.
    def translate(frame = 1, table = 1, unknown = 'X')
      if table.is_a?(Bio::CodonTable)
--- 150,235 ----
    end
  
!   # Alias for Bio::Sequence::NA#reverse_complement
    alias complement reverse_complement
+   
+   # Alias for Bio::Sequence::NA#reverse_complement!
    alias complement! reverse_complement!
  
  
!   # Translate into an amino acid sequence.
!   #   
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.translate                        #=> "MA*"
    #
!   # By default, translate starts in reading frame position 1, but you
!   # can start in either 2 or 3 as well,
!   #
!   #   puts s.translate(2)                     #=> "WR"
!   #   puts s.translate(3)                     #=> "GV"
!   #
!   # You may also translate the reverse complement in one step by using frame
!   # values of -1, -2, and -3 (or 4, 5, and 6)
!   #
!   #   puts s.translate(-1)                    #=> "SRH"
!   #   puts s.translate(4)                     #=> "SRH"
!   #   puts s.reverse_complement.translate(1)  #=> "SRH"
!   #
!   # The default codon table in the translate function is the Standard
!   # Eukaryotic codon table.  The translate function takes either a 
!   # number or a Bio::CodonTable object for its table argument. 
!   # The available tables are 
!   # (NCBI[http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t]):
!   #
!   #   1. "Standard (Eukaryote)"
!   #   2. "Vertebrate Mitochondrial"
!   #   3. "Yeast Mitochondorial"
!   #   4. "Mold, Protozoan, Coelenterate Mitochondrial and Mycoplasma/Spiroplasma"
!   #   5. "Invertebrate Mitochondrial"
!   #   6. "Ciliate Macronuclear and Dasycladacean"
!   #   9. "Echinoderm Mitochondrial"
!   #   10. "Euplotid Nuclear"
!   #   11. "Bacteria"
!   #   12. "Alternative Yeast Nuclear"
!   #   13. "Ascidian Mitochondrial"
!   #   14. "Flatworm Mitochondrial"
!   #   15. "Blepharisma Macronuclear"
!   #   16. "Chlorophycean Mitochondrial"
!   #   21. "Trematode Mitochondrial"
!   #   22. "Scenedesmus obliquus mitochondrial"
!   #   23. "Thraustochytrium Mitochondrial"
!   #
!   # If you are using anything other than the default table, you must specify 
!   # frame in the translate method call,
!   #
!   #   puts s.translate                #=> "MA*"  (using defaults)
!   #   puts s.translate(1,1)           #=> "MA*"  (same as above, but explicit)
!   #   puts s.translate(1,2)           #=> "MAW"  (different codon table)
!   #
!   # and using a Bio::CodonTable instance in the translate method call,
!   #
!   #   mt_table = Bio::CodonTable[2]
!   #   puts s.translate(1, mt_table)           #=> "MAW"
!   #
!   # By default, any invalid or unknown codons (as could happen if the 
!   # sequence contains ambiguities) will be represented by 'X' in the 
!   # translated sequence. 
!   # You may change this to any character of your choice.
!   #
!   #   s = Bio::Sequence::NA.new('atgcNNtga')
!   #   puts s.translate                        #=> "MX*"
!   #   puts s.translate(1,1,'9')               #=> "M9*"
!   #
!   # The translate method considers gaps to be unknown characters and treats 
!   # them as such (i.e. does not collapse sequences prior to translation), so
!   #
!   #   s = Bio::Sequence::NA.new('atgc--tga')
!   #   puts s.translate                        #=> "MX*"
!   # ---
!   # *Arguments*:
!   # * (optional) _frame_:  one of 1,2,3,4,5,6,-1,-2,-3 (default 1)
!   # * (optional) _table_: Fixnum in range 1,23 or Bio::CodonTable object
!   #   (default 1)
!   # * (optional) _unknown_: Character (default 'X')
!   # *Returns*:: Bio::Sequence::AA object
    def translate(frame = 1, table = 1, unknown = 'X')
      if table.is_a?(Bio::CodonTable)
***************
*** 109,113 ****
    end
  
!   # Returns counts of the each codon in the sequence by Hash.
    def codon_usage
      hash = Hash.new(0)
--- 257,276 ----
    end
  
!   # Returns counts of each codon in the sequence in a hash.
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.codon_usage                #=> {"gcg"=>1, "tga"=>1, "atg"=>1}
!   #
!   # This method does not validate codons!  Any three letter group is a 'codon'. So,
!   #
!   #   s = Bio::Sequence::NA.new('atggNNtga')
!   #   puts s.codon_usage                #=> {"tga"=>1, "gnn"=>1, "atg"=>1}
!   #
!   #   seq = Bio::Sequence::NA.new('atgg--tga')
!   #   puts s.codon_usage                #=> {"tga"=>1, "g--"=>1, "atg"=>1}
!   #
!   # Also, there is no option to work in any frame other than the first.
!   # ---
!   # *Returns*:: Hash object
    def codon_usage
      hash = Hash.new(0)
***************
*** 118,122 ****
    end
  
!   # Calculate the ratio of GC / ATGC bases in percent.
    def gc_percent
      count = self.composition
--- 281,291 ----
    end
  
!   # Calculate the ratio of GC / ATGC bases as a percentage rounded to 
!   # the nearest whole number.
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.gc_percent                       #=> 55
!   # ---
!   # *Returns*:: Fixnum
    def gc_percent
      count = self.composition
***************
*** 127,136 ****
    end
  
!   # Show abnormal bases other than 'atgcu'.
    def illegal_bases
      self.scan(/[^atgcu]/).sort.uniq
    end
  
!   # Estimate the weight of this biological string molecule.
    def molecular_weight
      if self.rna?
--- 296,322 ----
    end
  
!   # Returns an alphabetically sorted array of any non-standard bases 
!   # (other than 'atgcu').
!   #
!   #   s = Bio::Sequence::NA.new('atgStgQccR')
!   #   puts s.illegal_bases                    #=> ["q", "r", "s"]
!   # ---
!   # *Returns*:: Array object
    def illegal_bases
      self.scan(/[^atgcu]/).sort.uniq
    end
  
!   # Estimate molecular weight (using the values from BioPerl's 
!   # SeqStats.pm[http://doc.bioperl.org/releases/bioperl-1.0.1/Bio/Tools/SeqStats.html] module).
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.molecular_weight                 #=> 2841.00708
!   #
!   # RNA and DNA do not have the same molecular weights,
!   #
!   #   s = Bio::Sequence::NA.new('auggcguga')
!   #   puts s.molecular_weight                 #=> 2956.94708
!   # ---
!   # *Returns*:: Float object
    def molecular_weight
      if self.rna?
***************
*** 141,145 ****
    end
  
!   # Convert the universal code string into the regular expression.
    def to_re
      if self.rna?
--- 327,337 ----
    end
  
!   # Create a ruby regular expression instance 
!   # (Regexp)[http://corelib.rubyonrails.org/classes/Regexp.html]  
!   #
!   #   s = Bio::Sequence::NA.new('atggcgtga')
!   #   puts s.to_re                            #=> /atggcgtga/
!   # ---
!   # *Returns*:: Regexp object
    def to_re
      if self.rna?
***************
*** 150,154 ****
    end
  
!   # Convert the self string into the list of the names of the each base.
    def names
      array = []
--- 342,353 ----
    end
  
!   # Generate the list of the names of each nucleotide along with the
!   # sequence (full name).  Names used in bioruby are found in the
!   # Bio::AminoAcid::NAMES hash.
!   #
!   #   s = Bio::Sequence::NA.new('atg')
!   #   puts s.names                    #=> ["Adenine", "Thymine", "Guanine"]
!   # ---
!   # *Returns*:: Array object
    def names
      array = []
***************
*** 159,176 ****
    end
  
!   # Output a DNA string by substituting 'u' to 't'.
    def dna
      self.tr('u', 't')
    end
  
    def dna!
      self.tr!('u', 't')
    end
  
!   # Output a RNA string by substituting 't' to 'u'.
    def rna
      self.tr('t', 'u')
    end
  
    def rna!
      self.tr!('t', 'u')
--- 358,405 ----
    end
  
!   # Returns a new sequence object with any 'u' bases changed to 't'.
!   # The original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('augc')
!   #   puts s.dna                              #=> 'atgc'
!   #   puts s                                  #=> 'augc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def dna
      self.tr('u', 't')
    end
  
+   # Changes any 'u' bases in the original sequence to 't'.
+   # The original sequence is modified.
+   #
+   #   s = Bio::Sequence::NA.new('augc')
+   #   puts s.dna!                             #=> 'atgc'
+   #   puts s                                  #=> 'atgc'
+   # ---
+   # *Returns*:: current Bio::Sequence::NA object (modified)
    def dna!
      self.tr!('u', 't')
    end
  
!   # Returns a new sequence object with any 't' bases changed to 'u'.
!   # The original sequence is not modified.
!   #
!   #   s = Bio::Sequence::NA.new('atgc')
!   #   puts s.dna                              #=> 'augc'  
!   #   puts s                                  #=> 'atgc'
!   # ---
!   # *Returns*:: new Bio::Sequence::NA object
    def rna
      self.tr('t', 'u')
    end
  
+   # Changes any 't' bases in the original sequence to 'u'.
+   # The original sequence is modified.
+   #
+   #   s = Bio::Sequence::NA.new('atgc')
+   #   puts s.dna!                             #=> 'augc'
+   #   puts s                                  #=> 'augc'
+   # ---
+   # *Returns*:: current Bio::Sequence::NA object (modified)
    def rna!
      self.tr!('t', 'u')




More information about the bioruby-cvs mailing list