[BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme README, NONE, 1.1 analysis.rb, NONE, 1.1 cut_symbol.rb, NONE, 1.1 double_stranded.rb, NONE, 1.1 enzymes.yaml, NONE, 1.1 integer.rb, NONE, 1.1 single_strand.rb, NONE, 1.1 single_strand_complement.rb, NONE, 1.1 string_formatting.rb, NONE, 1.1
Trevor Wennblom
trevor at pub.open-bio.org
Wed Feb 1 07:27:28 UTC 2006
- Previous message: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme/analysis calculated_cuts.rb, NONE, 1.1 cut_range.rb, NONE, 1.1 cut_ranges.rb, NONE, 1.1 fragment.rb, NONE, 1.1 fragments.rb, NONE, 1.1 horizontal_cut_range.rb, NONE, 1.1 permutation.rb, NONE, 1.1 sequence_range.rb, NONE, 1.1 tags.rb, NONE, 1.1 vertical_cut_range.rb, NONE, 1.1
- Next message: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme/double_stranded aligned_strands.rb, NONE, 1.1 cut_location_pair.rb, NONE, 1.1 cut_location_pair_in_enzyme_notation.rb, NONE, 1.1 cut_locations.rb, NONE, 1.1 cut_locations_in_enzyme_notation.rb, NONE, 1.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme
In directory pub.open-bio.org:/tmp/cvs-serv28844
Added Files:
README analysis.rb cut_symbol.rb double_stranded.rb
enzymes.yaml integer.rb single_strand.rb
single_strand_complement.rb string_formatting.rb
Log Message:
Bio::RestrictionEnzyme
--- NEW FILE: integer.rb ---
#
# bio/util/restrction_enzyme/integer.rb -
#
# Copyright:: Copyright (C) 2006 Trevor Wennblom <trevor at corevx.com>
# License:: LGPL
#
# $Id: integer.rb,v 1.1 2006/02/01 07:34:11 trevor Exp $
#
#
#--
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#++
#
#
=begin rdoc
bio/util/restrction_enzyme/integer.rb -
=end
class Integer
def negative?
self < 0
end
end
--- NEW FILE: string_formatting.rb ---
require 'pathname'
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
$:.unshift(libpath) unless $:.include?(libpath)
require 'bio/util/restriction_enzyme/cut_symbol'
module Bio; end
class Bio::RestrictionEnzyme
#
# bio/util/restriction_enzyme/string_formatting.rb - Useful functions for string manipulation
#
# Copyright:: Copyright (C) 2006 Trevor Wennblom <trevor at corevx.com>
# License:: LGPL
#
# $Id: string_formatting.rb,v 1.1 2006/02/01 07:34:11 trevor Exp $
#
#
#--
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#++
#
=begin rdoc
bio/util/restriction_enzyme/string_formatting.rb - Useful functions for string manipulation
=end
module StringFormatting
include CutSymbol
extend CutSymbol
# Return the sequence with spacing for alignment. Does not add whitespace
# around cut symbols.
#
# Example:
# pattern = 'n^ng^arraxt^n'
# add_spacing( pattern )
#
# Returns:
# "n^n g^a r r a x t^n"
#
def add_spacing( seq, cs = cut_symbol )
str = ''
flag = false
seq.each_byte do |c|
c = c.chr
if c == cs
str += c
flag = false
elsif flag
str += ' ' + c
else
str += c
flag = true
end
end
str
end
# Remove extraneous nucleic acid wildcards ('n' padding) from the
# left and right sides
def strip_padding( s )
if s[0].chr == 'n'
s =~ %r{(n+)(.+)}
s = $2
end
if s[-1].chr == 'n'
s =~ %r{(.+?)(n+)$}
s = $1
end
s
end
# Remove extraneous nucleic acid wildcards ('n' padding) from the
# left and right sides and remove cut symbols
def strip_cuts_and_padding( s )
strip_padding( s.tr(cut_symbol, '') )
end
# Return the 'n' padding on the left side of the strand
def left_padding( s )
s =~ %r{^n+}
ret = $&
ret ? ret : '' # Don't pass nil values
end
# Return the 'n' padding on the right side of the strand
def right_padding( s )
s =~ %r{n+$}
ret = $&
ret ? ret : '' # Don't pass nil values
end
end
end
--- NEW FILE: single_strand.rb ---
require 'pathname'
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
$:.unshift(libpath) unless $:.include?(libpath)
require 'bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation'
require 'bio/util/restriction_enzyme/cut_symbol'
require 'bio/util/restriction_enzyme/string_formatting'
require 'bio/sequence'
class Bio::RestrictionEnzyme
#
# bio/util/restriction_enzyme/single_strand.rb - Single strand of a restriction enzyme sequence
#
# Copyright:: Copyright (C) 2006 Trevor Wennblom <trevor at corevx.com>
# License:: LGPL
#
# $Id: single_strand.rb,v 1.1 2006/02/01 07:34:11 trevor Exp $
#
#
#--
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#++
#
=begin rdoc
bio/util/restriction_enzyme/single_strand.rb - Single strand of a restriction enzyme sequence
A single strand of restriction enzyme sequence pattern with a 5' to 3'
orientation.
DoubleStranded puts the SingleStrand and SingleStrandComplement together to
create the sequence pattern with cuts on both strands.
=end
class SingleStrand < Bio::Sequence::NA
include CutSymbol
include StringFormatting
# The cut locations in enzyme notation. Contains a
# CutLocationsInEnzymeNotation object.
attr_reader :cut_locations_in_enzyme_notation
# The cut locations transformed from enzyme index notation to 0-based
# array index notation. Contains an Array
attr_reader :cut_locations
# Orientation of the strand, 5' to 3'
def orientation; [5,3]; end
# +sequence+:: The enzyme sequence.
# +c+:: Cut locations in enzyme notation. See CutLocationsInEnzymeNotation.
#
# * +sequence+ is required, +c+ is optional
# * You cannot provide a sequence with cut symbols and provide cut locations.
# * If +c+ is omitted, +input_pattern+ must contain a cut symbol.
# * +sequence+ cannot contain adjacent cut symbols.
# * +c+ is in enzyme index notation and therefore cannot contain a 0.
#
# +sequence+ must be a kind of:
# * String
# * Bio::Sequence::NA
# * Bio::RestrictionEnzyme::SingleStrand
#
# +c+ must be a kind of:
# * Integer, one or more
# * Array
# * CutLocationsInEnzymeNotation
#
def initialize( sequence, *c )
c.flatten! # if an array was supplied as an argument
validate_args(sequence, c)
sequence.downcase!
if sequence =~ re_cut_symbol
@cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( strip_padding(sequence) )
else
@cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( c )
end
@stripped = Bio::Sequence::NA.new( strip_cuts_and_padding( sequence ) )
super( pattern )
@cut_locations = @cut_locations_in_enzyme_notation.to_array_index
end
# Returns true if this enzyme is palindromic with its reverse complement.
# Does not report if the +cut_locations+ are palindromic or not.
#
# Examples:
# * This would be palindromic:
# 5' - ATGCAT - 3'
# TACGTA
#
# * This would not be palindromic:
# 5' - ATGCGTA - 3'
# TACGCAT
#
def palindromic?
@stripped.reverse_complement == @stripped
end
# Pattern with no cut symbols and no 'n' padding.
# * <code>SingleStrand.new('garraxt', [-2, 1, 7]).stripped # "garraxt"</code>
attr_reader :stripped
# The sequence with 'n' padding and cut symbols.
# * <code>SingleStrand.new('garraxt', [-2, 1, 7]).with_cut_symbols # => "n^ng^arraxt^n"</code>
def with_cut_symbols
s = pattern
@cut_locations_in_enzyme_notation.to_array_index.sort.reverse.each { |c| s.insert(c+1, cut_symbol) }
s
end
# The sequence with 'n' padding on the left and right for cuts larger than the sequence.
# * <code>SingleStrand.new('garraxt', [-2, 1, 7]).pattern # => "nngarraxtn"</code>
def pattern
return stripped if @cut_locations_in_enzyme_notation.min == nil
left = (@cut_locations_in_enzyme_notation.min.negative? ? 'n' * @cut_locations_in_enzyme_notation.min.abs : '')
# Add one more 'n' if a cut is at the last position
right = (@cut_locations_in_enzyme_notation.max >= @stripped.length ? 'n' * (@cut_locations_in_enzyme_notation.max - @stripped.length + 1) : '')
[left, stripped, right].to_s
end
# The sequence with 'n' pads, cut symbols, and spacing for alignment.
# * <code>SingleStrand.new('garraxt', [-2, 1, 7]).with_spaces # => "n^n g^a r r a x t^n"</code>
def with_spaces
add_spacing( with_cut_symbols )
end
# NOTE: BEING WORKED ON, BUG EXISTS IN Bio::NucleicAcid
=begin
to_re - important
example z = [agc]
z must match [agcz]
not just [agc]
=end
#########
protected
#########
def validate_args( input_pattern, input_cut_locations )
unless input_pattern.kind_of?(String)
err = "input_pattern is not a String, Bio::Sequence::NA, or Bio::RestrictionEnzyme::SingleStrand::Sequence object\n"
err += "pattern: #{input_pattern}\n"
err += "class: #{input_pattern.class}"
raise ArgumentError, err
end
if ( input_pattern =~ re_cut_symbol ) and !input_cut_locations.empty?
err = "Cut symbol found in sequence, but cut locations were also supplied. Ambiguous.\n"
err += "pattern: #{input_pattern}\n"
err += "symbol: #{cut_symbol}\n"
err += "locations: #{input_cut_locations.inspect}"
raise ArgumentError, err
end
input_pattern.each_byte do |c|
c = c.chr.downcase
unless Bio::NucleicAcid::NAMES.has_key?(c) or c == 'x' or c == 'X' or c == cut_symbol
err = "Invalid character in pattern.\n"
err += "Not a nucleotide or representation of possible nucleotides. See Bio::NucleicAcid::NAMES for more information.\n"
err += "char: #{c}\n"
err += "input_pattern: #{input_pattern}"
raise ArgumentError, err
end
end
end
# Tadayoshi Funaba's method as discussed in Programming Ruby 2ed, p390
def self.once(*ids)
for id in ids
module_eval <<-"end;"
alias_method :__#{id.to_i}__, :#{id.to_s}
private :__#{id.to_i}__
def #{id.to_s}(*args, &block)
(@__#{id.to_i}__ ||= [__#{id.to_i}__(*args, &block)])[0]
end
end;
end
end
once :pattern, :with_cut_symbols, :with_spaces, :to_re
end
end
--- NEW FILE: README ---
todo
--- NEW FILE: enzymes.yaml ---
---
TspRI:
:c4: "0"
:c1: "7"
:len: "5"
:c2: "-3"
:c3: "0"
:pattern: CASTG
:name: TspRI
:blunt: "0"
:ncuts: "2"
MvnI:
:c4: "0"
:c1: "2"
:len: "4"
:c2: "2"
:c3: "0"
:pattern: CGCG
:name: MvnI
[...6692 lines suppressed...]
BseMI:
:c4: "0"
:c1: "8"
:len: "6"
:c2: "6"
:c3: "0"
:pattern: GCAATG
:name: BseMI
:blunt: "0"
:ncuts: "2"
EcoRII:
:c4: "0"
:c1: "-1"
:len: "5"
:c2: "5"
:c3: "0"
:pattern: CCWGG
:name: EcoRII
:blunt: "0"
:ncuts: "2"
--- NEW FILE: single_strand_complement.rb ---
require 'pathname'
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
$:.unshift(libpath) unless $:.include?(libpath)
require 'bio/util/restriction_enzyme/single_strand'
module Bio; end
class Bio::RestrictionEnzyme
#
# bio/util/restriction_enzyme/single_strand_complement.rb - Single strand restriction enzyme sequence in complement orientation
#
# Copyright:: Copyright (C) 2006 Trevor Wennblom <trevor at corevx.com>
# License:: LGPL
#
# $Id: single_strand_complement.rb,v 1.1 2006/02/01 07:34:11 trevor Exp $
#
#
#--
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#++
#
=begin rdoc
bio/util/restriction_enzyme/single_strand_complement.rb - Single strand restriction enzyme sequence in complement orientation
A single strand of restriction enzyme sequence pattern with a 3' to 5' orientation.
=end
class SingleStrandComplement < SingleStrand
# Orientation of the strand, 3' to 5'
def orientation; [3, 5]; end
end
end
--- NEW FILE: analysis.rb ---
#if RUBY_VERSION[0..2] == '1.9' or RUBY_VERSION == '2.0'
# err = "This class makes use of 'include' on ranges quite a bit. Possibly unstable in development Ruby. 2005/12/20."
# err += "http://blade.nagaokaut.ac.jp/cgi-bin/vframe.rb/ruby/ruby-talk/167182?167051-169742"
# raise err
#end
require 'pathname'
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
$:.unshift(libpath) unless $:.include?(libpath)
require 'bio'
class Bio::Sequence::NA
def cut_with_enzyme(*args)
Bio::RestrictionEnzyme::Analysis.cut(self, *args)
end
alias cut_with_enzymes cut_with_enzyme
end
require 'pp'
require 'bio/util/restriction_enzyme'
require 'bio/util/restriction_enzyme/analysis/sequence_range.rb'
require 'bio/util/restriction_enzyme/analysis/permutation.rb'
class Bio::RestrictionEnzyme
#
# bio/util/restrction_enzyme/analysis.rb - Does the work of fragmenting the DNA from the enzymes
#
# Copyright:: Copyright (C) 2006 Trevor Wennblom <trevor at corevx.com>
# License:: LGPL
#
# $Id: analysis.rb,v 1.1 2006/02/01 07:34:11 trevor Exp $
#
#
#--
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#++
#
#
=begin rdoc
bio/util/restrction_enzyme/analysis.rb - Does the work of fragmenting the DNA from the enzymes
=end
class Analysis
def self.cut( sequence, *args )
self.new.cut( sequence, *args )
end
def self.cut_without_permutations( sequence, *args )
self.new.cut_without_permutations( sequence, *args )
end
def self.cut_and_return_by_permutations( sequence, *args )
self.new.cut_and_return_by_permutations( sequence, *args )
end
def cut_without_permutations( sequence, *args )
sequence = Bio::Sequence::NA.new( sequence )
enzyme_actions = create_enzyme_actions( sequence, *args )
sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
enzyme_actions.each do |id, enzyme_action|
enzyme_action.cut_ranges.each do |cut_range|
sr_with_cuts.add_cut_range(cut_range)
end
end
sr_with_cuts.fragments.primary = sequence
sr_with_cuts.fragments.complement = sequence.forward_complement
tmp = {}
tmp[0] = sr_with_cuts
unique_fragments_for_display( tmp )
end
def cut_and_return_by_permutations( sequence, *args )
sequence = Bio::Sequence::NA.new( sequence )
enzyme_actions = create_enzyme_actions( sequence, *args )
permutations = Permutation.new(enzyme_actions.size).map { |p| p.value }
# Indexed by permutation.
hash_of_sequence_ranges_with_cuts = {}
permutations.each do |permutation|
previous_cut_ranges = []
sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
permutation.each do |id|
enzyme_action = enzyme_actions[id]
# conflict is false if the current enzyme action may cut in it's range.
# conflict is true if it cannot do to a previous enzyme action making
# a cut where this enzyme action needs a whole recognition site.
conflict = false
# If current size of enzyme_action overlaps with previous cut_range, don't cut
# note that the enzyme action may fall in the middle of a previous enzyme action
# so all cut locations must be checked that would fall underneath.
previous_cut_ranges.each do |cut_range|
next unless cut_range.class == VerticalCutRange # we aren't concerned with horizontal cuts
previous_cut_left = cut_range.range.first
previous_cut_right = cut_range.range.last
=begin
puts "--- #{permutation.inspect} ---"
puts "Previous cut left: #{previous_cut_left}"
puts "EA.left #{enzyme_action.left}"
puts "Previous cut right: #{previous_cut_right}"
puts "EA.right: #{enzyme_action.right}"
=end
# Keep in mind:
# * The cut location is to the immediate right of the base located at the index.
# ex: at^gc -- the cut location is at index 1
# * The enzyme action location is located at the base of the index.
# ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
if (enzyme_action.right <= previous_cut_left) or
(enzyme_action.left > previous_cut_right) or
(enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
# no conflict
#puts "no conflict"
else
conflict = true
#puts "conflict"
end
end
next if conflict == true
enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) }
previous_cut_ranges += enzyme_action.cut_ranges
end
hash_of_sequence_ranges_with_cuts[permutation] = sr_with_cuts
end
hash_of_sequence_ranges_with_cuts.each do |permutation, sr_with_cuts|
sr_with_cuts.fragments.primary = sequence
sr_with_cuts.fragments.complement = sequence.forward_complement
end
#pp hash_of_sequence_ranges_with_cuts
hash_of_sequence_ranges_with_cuts
end
def cut( sequence, *args )
hash_of_sequence_ranges_with_cuts = cut_and_return_by_permutations( sequence, *args )
unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
end
#########
protected
#########
UniqueFragment = Struct.new(:primary, :complement)
class UniqueFragments < Array
def primary
tmp = []
self.each { |uf| tmp << uf.primary }
tmp.sort.map { |e| e.tr(' ', '') }
end
def complement
tmp = []
self.each { |uf| tmp << uf.complement }
tmp.sort.map { |e| e.tr(' ', '') }
end
end
def unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
uf_ary = UniqueFragments.new
hash_of_sequence_ranges_with_cuts.each do |permutation, sr_with_cuts|
sr_with_cuts.fragments.for_display.each do |fragment|
uf = UniqueFragment.new
uf.primary = fragment.primary
uf.complement = fragment.complement
duplicate = false
uf_ary.each do |element|
if (uf.primary == element.primary) and (uf.complement == element.complement)
duplicate = true
break
end
end
uf_ary << uf unless duplicate
end
end
uf_ary
end
=begin
Strand = Struct.new(:primary, :complement, :p_left, :p_right, :c_left, :c_right)
def ts_fragments_to_strands( sequence, fragments )
sequence = Bio::Sequence::NA.new( sequence )
strands = []
fragments.each do |f|
p = sequence[f.p_left..f.p_right]
c = sequence[f.c_left..f.c_right]
strands << Strand.new(p, c, f.p_left, f.p_right, f.c_left, f.c_right)
end
strands
end
=end
# Defines a single enzyme action, in this case being a range that correlates
# to the DNA sequence that may contain it's own internal cuts.
class EnzymeAction < SequenceRange
end
# Creates an array of EnzymeActions based on the DNA sequence and supplied enzymes.
#
# +sequence+:: The string of DNA to match the enzyme recognition sites against
# +args+:: The enzymes to use.
def create_enzyme_actions( sequence, *args )
id = 0
enzyme_actions = {}
args.each do |enzyme|
enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
enzyme_actions[id] = enzyme_to_enzyme_action( enzyme, offset )
id += 1
end
end
enzyme_actions
end
# Returns the offsets of the match of a RegExp to a string.
#
# +string+:: The string to scan.
# +re+:: A regexp to use.
def find_match_locations( string, re )
md = string.match( re )
locations = []
location = -1
while md
location += md.pre_match.size + 1
locations << location
# md[0] is the same as $&, or "the match" itself
md = (md[0][1..-1] + md.post_match).match( re )
end
locations
end
# Takes a RestrictionEnzyme and a numerical offset to the sequence and
# returns an EnzymeAction
#
# +enzyme+:: RestrictionEnzyme
# +offset+:: Numerical offset of where the enzyme action occurs on the seqeunce
def enzyme_to_enzyme_action( enzyme, offset )
enzyme_action = EnzymeAction.new(offset, offset + enzyme.primary.size-1, offset, offset + enzyme.complement.size-1)
enzyme.cut_locations.each do |cut_pair|
p = cut_pair[0]
c = cut_pair[1]
if c >= p
enzyme_action.add_cut_range(offset+p, nil, nil, offset+c)
else
enzyme_action.add_cut_range(nil, offset+p, offset+c, nil)
end
end
enzyme_action
end
end
end
--- NEW FILE: cut_symbol.rb ---
module Bio; end
class Bio::RestrictionEnzyme
#
# bio/util/restrction_enzyme/cut_symbol.rb -
#
# Copyright:: Copyright (C) 2006 Trevor Wennblom <trevor at corevx.com>
# License:: LGPL
#
# $Id: cut_symbol.rb,v 1.1 2006/02/01 07:34:11 trevor Exp $
#
#
#--
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#++
#
#
=begin rdoc
bio/util/restrction_enzyme/cut_symbol.rb -
=end
module CutSymbol
require 'singleton'
class CutSymbol__
include Singleton
attr_accessor :cut_symbol
attr_accessor :escaped_cut_symbol
end
# NOTE verify this sometime
def cut_symbol=(c)
CutSymbol__.instance.cut_symbol = c
end
def cut_symbol
CutSymbol__.instance.cut_symbol ||= '^'
end
def escaped_cut_symbol
CutSymbol__.instance.escaped_cut_symbol ||= "\\#{cut_symbol}" # \^
end
# Used to check if multiple cut symbols are next to each other
def re_cut_symbol_adjacent
%r"#{escaped_cut_symbol}{2}"
end
# A Regexp of the cut_symbol. Convenience method.
def re_cut_symbol
%r"#{escaped_cut_symbol}"
end
end
end
--- NEW FILE: double_stranded.rb ---
require 'pathname'
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
$:.unshift(libpath) unless $:.include?(libpath)
require 'bio/db/rebase'
require 'bio/util/restriction_enzyme'
require 'bio/util/restriction_enzyme/cut_symbol'
require 'bio/util/restriction_enzyme/single_strand'
require 'bio/util/restriction_enzyme/single_strand_complement'
require 'bio/util/restriction_enzyme/double_stranded/aligned_strands'
require 'bio/util/restriction_enzyme/double_stranded/cut_locations'
require 'bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation'
module Bio; end
class Bio::RestrictionEnzyme
#
# bio/util/restriction_enzyme/double_stranded.rb - DoubleStranded restriction enzyme sequence
#
# Copyright:: Copyright (C) 2006 Trevor Wennblom <trevor at corevx.com>
# License:: LGPL
#
# $Id: double_stranded.rb,v 1.1 2006/02/01 07:34:11 trevor Exp $
#
#
#--
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
#++
#
=begin rdoc
bio/util/restriction_enzyme/double_stranded.rb - DoubleStranded restriction enzyme sequence
A pair of +SingleStrand+ and +SingleStrandComplement+ objects with methods to
add utility to their relation.
== Notes
* This is created by Bio::RestrictionEnzyme.new for convenience.
* The two strands accessible are +primary+ and +complement+.
* SingleStrand methods may be used on DoubleStranded and they will be passed to +primary+.
=end
class DoubleStranded
include CutSymbol
extend CutSymbol
include StringFormatting
extend StringFormatting
# The primary strand
attr_reader :primary
# The complement strand
attr_reader :complement
# Cut locations in 0-based index format, DoubleStranded::CutLocations object
attr_reader :cut_locations
# Cut locations in enzyme index notation, DoubleStranded::CutLocationsInEnzymeNotation object
attr_reader :cut_locations_in_enzyme_notation
# +erp+:: Enzyme or Rebase or Pattern. One of three: The name of an enzyme. A REBASE::EnzymeEntry object. A nucleotide pattern.
# +raw_cut_pairs+:: The cut locations in enzyme index notation.
#
# Enzyme index notation:: 1.._n_, value before 1 is -1
#
# Examples of the allowable cut locations for +raw_cut_pairs+ follows. 'p' and
# 'c' refer to a cut location on the 'p'rimary and 'c'omplement strands.
#
# 1, [3,2], [20,22], 57
# p, [p,c], [p, c], p
#
# Which is the same as:
#
# 1, (3..2), (20..22), 57
# p, (p..c), (p..c), p
#
# Examples of partial cuts:
# 1, [nil,2], [20,nil], 57
# p, [p, c], [p, c], p
#
def initialize(erp, *raw_cut_pairs)
k = erp.class
if k == Bio::REBASE::EnzymeEntry
# Passed a Bio::REBASE::EnzymeEntry object
unless raw_cut_pairs.empty?
err = "A Bio::REBASE::EnzymeEntry object was passed, however the cut locations contained values. Ambiguous or redundant.\n"
err += "inspect = #{raw_cut_pairs.inspect}"
raise ArgumentError, err
end
initialize_with_rebase( erp )
elsif erp.kind_of? String
# Passed something that could be an enzyme pattern or an anzyme name
# Decide if this String is an enzyme name or a pattern
if Bio::RestrictionEnzyme.enzyme_name?( erp )
# Check if it's a known name
known_enzyme = false
known_enzyme = true if Bio::RestrictionEnzyme.rebase[ erp ]
# Try harder to find the enzyme
unless known_enzyme
re = %r"^#{erp}$"i
Bio::RestrictionEnzyme.rebase.each { |name, v| (known_enzyme = true; erp = name; break) if name =~ re }
end
if known_enzyme
initialize_with_rebase( Bio::RestrictionEnzyme.rebase[erp] )
else
raise IndexError, "No entry found for enzyme named '#{erp}'"
end
else
# Not an enzyme name, so a pattern is assumed
if erp =~ re_cut_symbol
initialize_with_pattern_and_cut_symbols( erp )
else
initialize_with_pattern_and_cut_locations( erp, raw_cut_pairs )
end
end
elsif k == NilClass
err = "Passed a nil value. Perhaps you tried to pass a Bio::REBASE::EnzymeEntry that does not exist?\n"
err += "inspect = #{erp.inspect}"
raise ArgumentError, err
else
err = "I don't know what to do with class #{k} for erp.\n"
err += "inspect = #{erp.inspect}"
raise ArgumentError, err
end
end
# See AlignedStrands.align
def aligned_strands
AlignedStrands.align(@primary.pattern, @complement.pattern)
end
# See AlignedStrands.align_with_cuts
def aligned_strands_with_cuts
AlignedStrands.align_with_cuts(@primary.pattern, @complement.pattern, @primary.cut_locations, @complement.cut_locations)
end
# Returns +true+ if the cut pattern creates blunt fragments
def blunt?
as = aligned_strands_with_cuts
ary = [as.primary, as.complement]
ary.collect! { |seq| seq.split( cut_symbol ) }
# convert the cut sections to their lengths
ary.each { |i| i.collect! { |c| c.length } }
ary[0] == ary[1]
end
# Returns +true+ if the cut pattern creates sticky fragments
def sticky?
!blunt?
end
#########
protected
#########
def initialize_with_pattern_and_cut_symbols( s )
p_cl = SingleStrand::CutLocationsInEnzymeNotation.new( strip_padding(s) )
s = Bio::Sequence::NA.new( strip_cuts_and_padding(s) )
# * Reflect cuts that are in enzyme notation
# * 0 is not a valid enzyme index, decrement 0 and all negative
# c_cl = p_cl.collect { |n| s.length - n }.collect { |n| n <= 0 ? n - 1 : n } #wrong.
c_cl = p_cl.collect {|n| (n >= s.length or n < 1) ? ((s.length - n) - 1) : (s.length - n)}
create_cut_locations( p_cl.zip(c_cl) )
create_primary_and_complement( s, p_cl, c_cl )
end
def initialize_with_pattern_and_cut_locations( s, raw_cl )
create_cut_locations(raw_cl)
create_primary_and_complement( Bio::Sequence::NA.new(s), @cut_locations_in_enzyme_notation.primary, @cut_locations_in_enzyme_notation.complement )
end
def create_primary_and_complement(primary_seq, p_cuts, c_cuts)
@primary = SingleStrand.new( primary_seq, p_cuts )
@complement = SingleStrandComplement.new( primary_seq.forward_complement, c_cuts )
end
def create_cut_locations(raw_cl)
@cut_locations_in_enzyme_notation = CutLocationsInEnzymeNotation.new( *raw_cl.collect {|cl| CutLocationPairInEnzymeNotation.new(cl)} )
@cut_locations = @cut_locations_in_enzyme_notation.to_array_index
end
def initialize_with_rebase( e )
p_cl = [e.primary_strand_cut1, e.primary_strand_cut2]
c_cl = [e.complementary_strand_cut1, e.complementary_strand_cut2]
# If there's no cut in REBASE it's represented as a 0.
# 0 is an invalid index, it just means no cut.
p_cl.delete(0)
c_cl.delete(0)
raise IndexError unless p_cl.size == c_cl.size
initialize_with_pattern_and_cut_locations( e.pattern, p_cl.zip(c_cl) )
end
end
end
- Previous message: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme/analysis calculated_cuts.rb, NONE, 1.1 cut_range.rb, NONE, 1.1 cut_ranges.rb, NONE, 1.1 fragment.rb, NONE, 1.1 fragments.rb, NONE, 1.1 horizontal_cut_range.rb, NONE, 1.1 permutation.rb, NONE, 1.1 sequence_range.rb, NONE, 1.1 tags.rb, NONE, 1.1 vertical_cut_range.rb, NONE, 1.1
- Next message: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme/double_stranded aligned_strands.rb, NONE, 1.1 cut_location_pair.rb, NONE, 1.1 cut_location_pair_in_enzyme_notation.rb, NONE, 1.1 cut_locations.rb, NONE, 1.1 cut_locations_in_enzyme_notation.rb, NONE, 1.1
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the bioruby-cvs
mailing list