[BioRuby-cvs] bioruby/lib/bio location.rb,0.22,0.23

Jan Aerts aerts at dev.open-bio.org
Thu Apr 20 15:58:36 UTC 2006


Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv14566

Modified Files:
	location.rb 
Log Message:
Added and reformatted documentation.


Index: location.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/location.rb,v
retrieving revision 0.22
retrieving revision 0.23
diff -C2 -d -r0.22 -r0.23
*** location.rb	18 Dec 2005 15:50:06 -0000	0.22
--- location.rb	20 Apr 2006 15:58:34 -0000	0.23
***************
*** 2,16 ****
  # = bio/location.rb - Locations/Location class (GenBank location format)
  #
! # Copyright::	Copyright (C) 2001, 2005
! #		KATAYAMA Toshiaki <k at bioruby.org>
  # License::	LGPL
  #
  # $Id$
  #
! # == Appendix : GenBank location descriptor classification
  # 
  # === Definition of the position notation of the GenBank location format
  # 
! # According to the GenBank manual 'gbrel.txt', I classified position notations
  # into 10 patterns - (A) to (J).
  # 
--- 2,167 ----
  # = bio/location.rb - Locations/Location class (GenBank location format)
  #
! # Copyright::	Copyright (C) 2001, 2005 KATAYAMA Toshiaki <k at bioruby.org>
! #                             2006 Jan Aerts <jan.aerts at bbsrc.ac.uk>
  # License::	LGPL
  #
  # $Id$
  #
! # 
! #--
! #
! #  This library is free software; you can redistribute it and/or
! #  modify it under the terms of the GNU Lesser General Public
! #  License as published by the Free Software Foundation; either
! #  version 2 of the License, or (at your option) any later version.
! #
! #  This library is distributed in the hope that it will be useful,
! #  but WITHOUT ANY WARRANTY; without even the implied warranty of
! #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
! #  Lesser General Public License for more details.
! #
! #  You should have received a copy of the GNU Lesser General Public
! #  License along with this library; if not, write to the Free Software
! #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
! #
! #++
! #
! 
! module Bio
! 
! # = DESCRIPTION
! # The Bio::Location class describes the position of a genomic locus. Typically,
! # Bio::Location objects are created automatically when the user creates a
! # Bio::Locations object, instead of initialized directly.
! #
! # = USAGE
! #   location = Bio::Location.new('500..550')
! #   puts "start=" + location.from.to_s + ";end=" + location.to.to_s
! #
! #   #, or better: through Bio::Locations
! #   locations = Bio::Locations.new('500..550')
! #   locations.each do |location|
! #     puts "start=" + location.from.to_s + ";end=" + location.to.to_s
! #   end
! class Location
! 
!   # Parses a'location' segment, which can be 'ID:' + ('n' or 'n..m' or 'n^m'
!   # or "seq") with '<' or '>', and returns a Bio::Location object.
!   #   location = Bio::Location.new('500..550')
!   # 
!   # ---
!   # *Arguments*:
!   # * (required) _str_: GenBank style position string (see Bio::Locations documentation)
!   # *Returns*:: Bio::Location object
!   def initialize(location = nil)
! 
!     if location
!       if location =~ /:/				# (G) ID:location
!         xref_id, location = location.split(':')
!       end
!       if location =~ /</				# (I) <,>
!         lt = true
!       end
!       if location =~ />/
!         gt = true
!       end
!     end
! 
!     # s : start base, e : end base => from, to
!     case location
!     when /^[<>]?(\d+)$/				# (A, I) n
!       s = e = $1.to_i
!     when /^[<>]?(\d+)\.\.[<>]?(\d+)$/			# (B, I) n..m
!       s = $1.to_i
!       e = $2.to_i
!       if e - s < 0
! #       raise "Error: invalid range : #{location}"
!         $stderr.puts "[Warning] invalid range : #{location}" if $DEBUG
!       end
!     when /^[<>]?(\d+)\^[<>]?(\d+)$/			# (C, I) n^m
!       s = $1.to_i
!       e = $2.to_i
!       if e - s != 1
! #       raise "Error: invalid range : #{location}"
!         $stderr.puts "[Warning] invalid range : #{location}" if $DEBUG
!       end
!     when /^"?([ATGCatgc]+)"?$/                  # (H) literal sequence
!       sequence = $1.downcase
!       s = e = nil
!     when nil
!       ;
!     else
!       raise "Error: unknown location format : #{location}"
!     end
! 
!     @from       = s             # start position of the location
!     @to         = e             # end position of the location
!     @strand     = 1             # strand direction of the location
!                                 #   forward => 1 or complement => -1
!     @sequence   = sequence      # literal sequence of the location
!     @lt         = lt            # true if the position contains '<'
!     @gt         = gt            # true if the position contains '>'
!     @xref_id    = xref_id       # link to the external entry as GenBank ID
!   end
! 
!   attr_accessor :from, :to, :strand, :sequence, :lt, :gt, :xref_id
! 
!   # Complements the sequence (i.e. alternates the strand).
!   # ---
!   # *Returns*:: the Bio::Location object
!   def complement
!     @strand *= -1
!     self					# return Location object
!   end
! 
!   # Replaces the sequence of the location.
!   # ---
!   # *Arguments*:
!   # * (required) _sequence_: sequence to be used to replace the sequence at the location
!   # *Returns*:: the Bio::Location object
!   def replace(sequence)
!     @sequence = sequence.downcase
!     self					# return Location object
!   end
! 
!   # Returns the range (from..to) of the location as a Range object.
!   def range
!     @from.. at to
!   end
! 
! end # class location
! 
! # = DESCRIPTION
! # The Bio::Locations class is a container for Bio::Location objects: creating a 
! # Bio::Locations object (based on a GenBank style position string) will
! # spawn an array of Bio::Location objects.
! #
! # = USAGE
! #   locations = Bio::Locations.new('join(complement(500..550), 600..625)')
! #   locations.each do |location|
! #     puts "class=" + location.class.to_s
! #     puts "start=" + location.from.to_s + ";end=" + location.to.to_s \
! #          + ";strand=" + location.strand.to_s
! #   end
! #     # Output would be:
! #     # class=Bio::Location
! #     # start=500;end=550;strand=-1
! #     # class=Bio::Location
! #     # start=600;end=625;strand=1
! # 
! #  # For the following three location strings, print the span and range
! #  ['one-of(898,900)..983',
! #   'one-of(5971..6308,5971..6309)',
! #   '8050..one-of(10731,10758,10905,11242)'].each do |loc|
! #      location = Bio::Locations.new(loc)
! #      puts location.span
! #      puts location.range
! #  end
! #
! # = GENBANK LOCATION DESCRIPTOR CLASSIFICATION
  # 
  # === Definition of the position notation of the GenBank location format
  # 
! # According to the GenBank manual 'gbrel.txt', position notations were classified
  # into 10 patterns - (A) to (J).
  # 
***************
*** 84,115 ****
  # === Reduction strategy of the position notations
  # 
! #   (A) Location n
! #   
! #   (B) Location n..m
! #   
! #   (C) Location n^m
! #   
! #   (D) (n.m)		=> Location n
! #   
! #   (E) one-of(n,m,..)	=> Location n
! #       one-of(n..m,..)	=> Location n..m
! #   
! #   (F) order(loc,loc,..)	=> join(loc, loc,..)
! #       group(loc,loc,..)	=> join(loc, loc,..)
! #       join(loc,loc,..)	=> Sequence
! #   
! #   (G) ID:loc		=> Location with ID
! #   
! #   (H) "atgc"		=> Location only with Sequence
! #   
! #   (I) <n			=> Location n with lt flag
! #       >n			=> Location n with gt flag
! #       <n..m		=> Location n..m with lt flag
! #       n..>m		=> Location n..m with gt flag
! #       <n..>m		=> Location n..m with lt, gt flag
! #   
! #   (J) complement(loc)	=> Sequence
! #   
! #   (K) replace(loc, str)	=> Location with replacement Sequence
  # 
  # === GenBank location examples
--- 235,259 ----
  # === Reduction strategy of the position notations
  # 
! # * (A) Location n
! # * (B) Location n..m
! # * (C) Location n^m
! # * (D) (n.m)		=> Location n
! # * (E)
! #   * one-of(n,m,..)	=> Location n
! #   * one-of(n..m,..)	=> Location n..m
! # * (F)
! #   * order(loc,loc,..)	=> join(loc, loc,..)
! #   * group(loc,loc,..)	=> join(loc, loc,..)
! #   * join(loc,loc,..)	=> Sequence
! # * (G) ID:loc		=> Location with ID
! # * (H) "atgc"		=> Location only with Sequence
! # * (I)
! #   * <n			=> Location n with lt flag
! #   * >n			=> Location n with gt flag
! #   * <n..m		=> Location n..m with lt flag
! #   * n..>m		=> Location n..m with gt flag
! #   * <n..>m		=> Location n..m with lt, gt flag
! # * (J) complement(loc)	=> Sequence
! # * (K) replace(loc, str)	=> Location with replacement Sequence
  # 
  # === GenBank location examples
***************
*** 232,340 ****
  # * [ADR40FIB]	replace(510..520,	<= replace(510..520, "taatcctaccg")
  # * [RATDYIIAAB]	replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")
- # 
- #--
- #
- #  This library is free software; you can redistribute it and/or
- #  modify it under the terms of the GNU Lesser General Public
- #  License as published by the Free Software Foundation; either
- #  version 2 of the License, or (at your option) any later version.
- #
- #  This library is distributed in the hope that it will be useful,
- #  but WITHOUT ANY WARRANTY; without even the implied warranty of
- #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- #  Lesser General Public License for more details.
- #
- #  You should have received a copy of the GNU Lesser General Public
- #  License along with this library; if not, write to the Free Software
- #  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
- #
- #++
- #
- 
- module Bio
- 
- class Location
- 
-   # Pass a range of the 'location' segment.  The 'location' segment can be
-   # 'ID:' + ('n' or 'n..m' or 'n^m' or "seq") with '<' or '>'.
-   def initialize(location = nil)
- 
-     if location
-       if location =~ /:/				# (G) ID:location
-         xref_id, location = location.split(':')
-       end
-       if location =~ /</				# (I) <,>
-         lt = true
-       end
-       if location =~ />/
-         gt = true
-       end
-     end
- 
-     # s : start base, e : end base => from, to
-     case location
-     when /^[<>]?(\d+)$/				# (A, I) n
-       s = e = $1.to_i
-     when /^[<>]?(\d+)\.\.[<>]?(\d+)$/			# (B, I) n..m
-       s = $1.to_i
-       e = $2.to_i
-       if e - s < 0
- #       raise "Error: invalid range : #{location}"
-         $stderr.puts "[Warning] invalid range : #{location}" if $DEBUG
-       end
-     when /^[<>]?(\d+)\^[<>]?(\d+)$/			# (C, I) n^m
-       s = $1.to_i
-       e = $2.to_i
-       if e - s != 1
- #       raise "Error: invalid range : #{location}"
-         $stderr.puts "[Warning] invalid range : #{location}" if $DEBUG
-       end
-     when /^"?([ATGCatgc]+)"?$/                  # (H) literal sequence
-       sequence = $1.downcase
-       s = e = nil
-     when nil
-       ;
-     else
-       raise "Error: unknown location format : #{location}"
-     end
- 
-     @from       = s             # start position of the location
-     @to         = e             # end position of the location
-     @strand     = 1             # strand direction of the location
-                                 #   forward => 1 or complement => -1
-     @sequence   = sequence      # literal sequence of the location
-     @lt         = lt            # true if the position contains '<'
-     @gt         = gt            # true if the position contains '>'
-     @xref_id    = xref_id       # link to the external entry as GenBank ID
-   end
- 
-   attr_accessor :from, :to, :strand, :sequence, :lt, :gt, :xref_id
- 
-   # Complement the sequence from outside.
-   def complement
-     @strand *= -1
-     self					# return Location object
-   end
- 
-   # Replace the sequence from outside.
-   def replace(sequence)
-     @sequence	= sequence.downcase
-     self					# return Location object
-   end
- 
-   # Returns a range (from..to) of the segment as a Range object.
-   def range
-     @from.. at to
-   end
- 
- end # class location
- 
- 
  class Locations
- 
    include Enumerable
  
!   # Parse a GenBank style position string and returns a Locations object,
!   # which contains a list of Location objects.
    def initialize(position)
      if position.is_a? Array
--- 376,390 ----
  # * [ADR40FIB]	replace(510..520,	<= replace(510..520, "taatcctaccg")
  # * [RATDYIIAAB]	replace(1306..1443,"aagaacatccacggagtcagaactgggctcttcacgccggatttggcgttcgaggccattgtgaaaaagcaggcaatgcaccagcaagctcagttcctacccctgcgtggacctggttatccaggagctaatcagtacagttaggtggtcaagctgaaagagccctgtctgaaa")
  class Locations
    include Enumerable
  
!   # Parses a GenBank style position string and returns a Bio::Locations object,
!   # which contains a list of Bio::Location objects.
!   #   locations = Bio::Locations.new('join(complement(500..550), 600..625)')
!   #
!   # ---
!   # *Arguments*:
!   # * (required) _str_: GenBank style position string
!   # *Returns*:: Bio::Locations object
    def initialize(position)
      if position.is_a? Array
***************
*** 342,351 ****
      else
        position   = gbl_cleanup(position)	# preprocessing
!       @locations = gbl_pos2loc(position)	# create an Array of Location
      end
    end
    attr_accessor :locations
  
!   # Iterates on each Location object.
    def each
      @locations.each do |x|
--- 392,403 ----
      else
        position   = gbl_cleanup(position)	# preprocessing
!       @locations = gbl_pos2loc(position)	# create an Array of Bio::Location objects
      end
    end
+ 
+   # An Array of Bio::Location objects
    attr_accessor :locations
  
!   # Iterates on each Bio::Location object.
    def each
      @locations.each do |x|
***************
*** 354,368 ****
    end
  
!   # Returns nth Location object.
    def [](n)
      @locations[n]
    end
  
!   # Returns first Location object.
    def first
      @locations.first
    end
  
!   # Returns last Location object.
    def last
      @locations.last
--- 406,420 ----
    end
  
!   # Returns nth Bio::Location object.
    def [](n)
      @locations[n]
    end
  
!   # Returns first Bio::Location object.
    def first
      @locations.first
    end
  
!   # Returns last Bio::Location object.
    def last
      @locations.last
***************
*** 370,374 ****
  
    # Returns an Array containing overall min and max position [min, max]
!   # of this Locations object.
    def span
      span_min = @locations.min { |a,b| a.from <=> b.from }
--- 422,426 ----
  
    # Returns an Array containing overall min and max position [min, max]
!   # of this Bio::Locations object.
    def span
      span_min = @locations.min { |a,b| a.from <=> b.from }
***************
*** 397,403 ****
    alias size length
  
!   # Convert absolute position in DNA (na) to relative position in RNA (na).
!   # If type == :aa,
!   # convert absolute position in DNA (na) to relative position in Protein (aa).
    def relative(n, type = nil)
      case type
--- 449,466 ----
    alias size length
  
!   # Converts absolute position in the whole of the DNA sequence to relative 
!   # position in the locus.
!   # 
!   # This method can for example be used to relate positions in a DNA-sequence
!   # with those in RNA. In this use, the optional ':aa'-flag returns the position
!   # of the associated amino-acid rather than the nucleotide.
!   #   loc = Bio::Locations.new('complement(12838..13533)')
!   #   puts loc.relative(13524)        # => 10
!   #   puts loc.relative(13506, :aa)   # => 3
!   # ---
!   # *Arguments*:
!   # * (required) _position_: nucleotide position within whole of the sequence
!   # * _:aa_: flag that lets method return position in aminoacid coordinates
!   # *Returns*:: position within the location
    def relative(n, type = nil)
      case type
***************
*** 415,430 ****
    end
  
!   # Convert relative position in RNA (na) to absolute position in DNA (na).
!   # If type == :aa,
!   # convert relative position in Protein (aa) -> absolute position in DNA (na).
!   #
!   # * Examples
!   #
!   #     loc = Bio::Locations.new('complement(12838..13533)')
!   #     loc.absolute(10)          #=> 13524 (rel2abs)
!   #     loc.relative(13524)       #=> 10    (abs2rel)
!   #     loc.absolute(10, :aa)     #=> 13506 (rel2abs)
!   #     loc.relative(13506, :aa)  #=> 10    (abs2rel)
!   #
    def absolute(n, type = nil)
      case type
--- 478,495 ----
    end
  
!   # Converts relative position in the locus to position in the whole of the
!   # DNA sequence.
!   # 
!   # This method can for example be used to relate positions in a DNA-sequence
!   # with those in RNA. In this use, the optional ':aa'-flag returns the position
!   # of the associated amino-acid rather than the nucleotide.
!   #   loc = Bio::Locations.new('complement(12838..13533)')
!   #   puts loc.absolute(10)          # => 13524
!   #   puts loc.absolute(10, :aa)     # => 13506
!   # ---
!   # *Arguments*:
!   # * (required) _position_: nucleotide position within locus
!   # * _:aa_: flag to be used if _position_ is a aminoacid position rather than a nucleotide position
!   # *Returns*:: position within the whole of the sequence
    def absolute(n, type = nil)
      case type
***************
*** 591,594 ****
--- 656,664 ----
    puts "Test new & span methods"
    [
+     '450',
+     '500..600',
+     'join(500..550, 600..625)',
+     'complement(join(500..550, 600..625))',
+     'join(complement(500..550), 600..625)',
      '754^755',
      'complement(53^54)',
***************
*** 618,624 ****
    ].each do |pos|
      p pos
!     p Bio::Locations.new(pos).span
!     p Bio::Locations.new(pos).range
!     p Bio::Locations.new(pos)
    end
  
--- 688,699 ----
    ].each do |pos|
      p pos
! #    p Bio::Locations.new(pos)
! #    p Bio::Locations.new(pos).span
! #    p Bio::Locations.new(pos).range
!     Bio::Locations.new(pos).each do |location|
!       puts "class=" + location.class.to_s
!       puts "start=" + location.from.to_s + "\tend=" + location.to.to_s + "\tstrand=" + location.strand.to_s
!     end
! 
    end
  




More information about the bioruby-cvs mailing list