[BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme analysis_basic.rb, NONE, 1.1 analysis.rb, 1.7, 1.8 double_stranded.rb, 1.3, 1.4
Trevor Wennblom
trevor at dev.open-bio.org
Mon Jan 1 23:47:30 UTC 2007
- Previous message: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme/analysis calculated_cuts.rb, 1.3, 1.4 cut_range.rb, 1.2, 1.3 cut_ranges.rb, 1.3, 1.4 horizontal_cut_range.rb, 1.2, 1.3 sequence_range.rb, 1.4, 1.5 vertical_cut_range.rb, 1.2, 1.3
- Next message: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme/range - New directory
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme
In directory dev.open-bio.org:/tmp/cvs-serv2413/restriction_enzyme
Modified Files:
analysis.rb double_stranded.rb
Added Files:
analysis_basic.rb
Log Message:
--- NEW FILE: analysis_basic.rb ---
#
# bio/util/restrction_enzyme/analysis_basic.rb - Does the work of fragmenting the DNA from the enzymes
#
# Author:: Trevor Wennblom <mailto:trevor at corevx.com>
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
# License:: Distributes under the same terms as Ruby
#
# $Id: analysis_basic.rb,v 1.1 2007/01/01 23:47:27 trevor Exp $
#
#--
#if RUBY_VERSION[0..2] == '1.9' or RUBY_VERSION == '2.0'
# err = "This class makes use of 'include' on ranges quite a bit. Possibly unstable in development Ruby. 2005/12/20."
# err += "http://blade.nagaokaut.ac.jp/cgi-bin/vframe.rb/ruby/ruby-talk/167182?167051-169742"
# raise err
#end
#++
require 'pathname'
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
$:.unshift(libpath) unless $:.include?(libpath)
require 'bio'
class Bio::Sequence::NA
# Example:
#
# seq = Bio::Sequence::NA.new('gaattc')
# cuts = seq.cut_with_enzyme('EcoRI')
#
# _or_
#
# seq = Bio::Sequence::NA.new('gaattc')
# cuts = seq.cut_with_enzyme('g^aattc')
# ---
# See Bio::RestrictionEnzyme::Analysis.cut
def cut_with_enzyme(*args)
Bio::RestrictionEnzyme::Analysis.cut(self, *args)
end
alias cut_with_enzymes cut_with_enzyme
end
require 'pp'
require 'bio/util/restriction_enzyme'
require 'bio/util/restriction_enzyme/analysis/sequence_range'
class Bio::RestrictionEnzyme
#
# bio/util/restrction_enzyme/analysis_basic.rb - Does the work of fragmenting the DNA from the enzymes
#
# Author:: Trevor Wennblom <mailto:trevor at corevx.com>
# Copyright:: Copyright (c) 2005-2007 Midwinter Laboratories, LLC (http://midwinterlabs.com)
# License:: Distributes under the same terms as Ruby
#
class Analysis
def self.cut( sequence, *args )
# self.new.cut( sequence, *args )
end
def self.cut_without_permutations( sequence, *args )
self.new.cut_without_permutations( sequence, *args )
end
# Example:
#
# Analysis.cut_without_permutations('gaattc', 'EcoRI')
#
# _same as:_
#
# Analysis.cut_without_permutations('gaattc', 'g^aattc')
# ---
# *Arguments*
# * +sequence+: +String+ kind of object that will be used as a nucleic acid sequence
# * +args+: Series of
# *Returns*:: +Hash+ ?(array?) of Bio::RestrictionEnzyme::Analysis::UniqueFragment objects
def cut_without_permutations( sequence, *args )
return {} if !sequence.kind_of?(String) or sequence.empty?
sequence = Bio::Sequence::NA.new( sequence )
tmp = create_enzyme_actions( sequence, *args )
enzyme_actions = tmp[0].merge(tmp[1])
sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
enzyme_actions.each do |id, enzyme_action|
enzyme_action.cut_ranges.each do |cut_range|
sr_with_cuts.add_cut_range(cut_range)
end
end
sr_with_cuts.fragments.primary = sequence
sr_with_cuts.fragments.complement = sequence.forward_complement
unique_fragments_for_display( {0 => sr_with_cuts} )
end
#########
protected
#########
UniqueFragment = Struct.new(:primary, :complement)
class UniqueFragments < Array
def primary
tmp = []
self.each { |uf| tmp << uf.primary }
tmp.sort.map { |e| e.tr(' ', '') }
end
def complement
tmp = []
self.each { |uf| tmp << uf.complement }
tmp.sort.map { |e| e.tr(' ', '') }
end
end
def unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
uf_ary = UniqueFragments.new
return uf_ary if hash_of_sequence_ranges_with_cuts == nil or hash_of_sequence_ranges_with_cuts.empty?
hash_of_sequence_ranges_with_cuts.each do |permutation, sr_with_cuts|
sr_with_cuts.fragments.for_display.each do |fragment|
uf = UniqueFragment.new
uf.primary = fragment.primary
uf.complement = fragment.complement
duplicate = false
uf_ary.each do |element|
if (uf.primary == element.primary) and (uf.complement == element.complement)
duplicate = true
break
end
end
uf_ary << uf unless duplicate
end
end
uf_ary
end
# Creates an array of EnzymeActions based on the DNA sequence and supplied enzymes.
#
# +sequence+:: The string of DNA to match the enzyme recognition sites against
# +args+:: The enzymes to use.
def create_enzyme_actions( sequence, *args )
id = 0
enzyme_actions_that_sometimes_cut = {}
enzyme_actions_that_always_cut = {}
indicies_of_sometimes_cut = []
args.each do |enzyme|
enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
#enzyme_actions_that_always_cut[id] = enzyme_to_enzyme_action( enzyme, offset )
enzyme_actions_that_always_cut[id] = enzyme.create_action_at( offset )
id += 1
end
end
# enzyme_actions_that_always_cut may lose members, the members to be lost are recorded in indicies_of_sometimes_cut
max = enzyme_actions_that_always_cut.size - 1
0.upto(max) do |i|
enzyme_action = enzyme_actions_that_always_cut[i]
conflict = false
other_cut_ranges = {}
enzyme_actions_that_always_cut.each { |key,i_ea| next if i == key; other_cut_ranges[key] = i_ea.cut_ranges }
other_cut_ranges.each do |key, cut_ranges|
cut_ranges.each do |cut_range|
next unless cut_range.class == VerticalCutRange # we aren't concerned with horizontal cuts
previous_cut_left = cut_range.range.first
previous_cut_right = cut_range.range.last
if (enzyme_action.right <= previous_cut_left) or
(enzyme_action.left > previous_cut_right) or
(enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
# no conflict
else
conflict = true
end
indicies_of_sometimes_cut += [i, key] if conflict == true
end
end
end
indicies_of_sometimes_cut.uniq.each do |i|
enzyme_actions_that_sometimes_cut[i] = enzyme_actions_that_always_cut[i]
enzyme_actions_that_always_cut.delete(i)
end
[enzyme_actions_that_sometimes_cut, enzyme_actions_that_always_cut]
end
# Returns an +Array+ of the match indicies of a RegExp to a string.
#
# ---
# *Arguments*
# * +string+: The string to scan
# * +re+: A RegExp to use
# *Returns*:: +Array+ with indicies of match locations
def find_match_locations( string, re )
md = string.match( re )
locations = []
counter = 0
while md
# save the match index relative to the original string
locations << (counter += md.begin(0))
# find the next match
md = string[ (counter += 1)..-1 ].match( re )
end
locations
end
end # Analysis
end # Bio::RestrictionEnzyme
Index: analysis.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme/analysis.rb,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** analysis.rb 1 Jan 2007 02:16:05 -0000 1.7
--- analysis.rb 1 Jan 2007 23:47:27 -0000 1.8
***************
*** 21,37 ****
$:.unshift(libpath) unless $:.include?(libpath)
! require 'bio'
! class Bio::Sequence::NA
! # See Bio::RestrictionEnzyme::Analysis.cut
! def cut_with_enzyme(*args)
! Bio::RestrictionEnzyme::Analysis.cut(self, *args)
! end
! alias cut_with_enzymes cut_with_enzyme
! end
!
! require 'pp'
!
! require 'bio/util/restriction_enzyme'
! require 'bio/util/restriction_enzyme/analysis/sequence_range'
class Bio::RestrictionEnzyme
--- 21,25 ----
$:.unshift(libpath) unless $:.include?(libpath)
! require 'bio/util/restriction_enzyme/analysis_basic'
class Bio::RestrictionEnzyme
***************
*** 50,84 ****
end
! def self.cut_without_permutations( sequence, *args )
! self.new.cut_without_permutations( sequence, *args )
! end
!
! def self.cut_and_return_by_permutations( sequence, *args )
! self.new.cut_and_return_by_permutations( sequence, *args )
! end
!
! def cut_without_permutations( sequence, *args )
! return {} if !sequence.kind_of?(String) or sequence.empty?
! sequence = Bio::Sequence::NA.new( sequence )
!
! #enzyme_actions = create_enzyme_actions( sequence, *args )
! tmp = create_enzyme_actions( sequence, *args )
! enzyme_actions = tmp[0].merge(tmp[1])
!
! sr_with_cuts = SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
! enzyme_actions.each do |id, enzyme_action|
! enzyme_action.cut_ranges.each do |cut_range|
! sr_with_cuts.add_cut_range(cut_range)
! end
! end
!
! sr_with_cuts.fragments.primary = sequence
! sr_with_cuts.fragments.complement = sequence.forward_complement
!
! tmp = {}
! tmp[0] = sr_with_cuts
! unique_fragments_for_display( tmp )
end
def cut_and_return_by_permutations( sequence, *args )
return {} if !sequence.kind_of?(String) or sequence.empty?
--- 38,51 ----
end
! def cut( sequence, *args )
! return nil if !sequence.kind_of?(String) or sequence.empty?
! hash_of_sequence_ranges_with_cuts = cut_and_return_by_permutations( sequence, *args )
! unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
end
+ #########
+ protected
+ #########
+
def cut_and_return_by_permutations( sequence, *args )
return {} if !sequence.kind_of?(String) or sequence.empty?
***************
*** 123,134 ****
previous_cut_right = cut_range.range.last
- =begin
- puts "--- #{permutation.inspect} ---"
- puts "Previous cut left: #{previous_cut_left}"
- puts "EA.left #{enzyme_action.left}"
- puts "Previous cut right: #{previous_cut_right}"
- puts "EA.right: #{enzyme_action.right}"
- =end
-
# Keep in mind:
# * The cut location is to the immediate right of the base located at the index.
--- 90,93 ----
***************
*** 140,148 ****
(enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
# no conflict
- #puts "no conflict"
-
else
conflict = true
- #puts "conflict"
end
end
--- 99,104 ----
***************
*** 161,178 ****
end
- #pp hash_of_sequence_ranges_with_cuts
hash_of_sequence_ranges_with_cuts
end
- def cut( sequence, *args )
- return nil if !sequence.kind_of?(String) or sequence.empty?
- hash_of_sequence_ranges_with_cuts = cut_and_return_by_permutations( sequence, *args )
- unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
- end
-
- #########
- protected
- #########
-
def permute(count, permutations = [[0]])
return permutations if count <= 1
--- 117,123 ----
***************
*** 190,366 ****
end
- UniqueFragment = Struct.new(:primary, :complement)
- class UniqueFragments < Array
- def primary
- tmp = []
- self.each { |uf| tmp << uf.primary }
- tmp.sort.map { |e| e.tr(' ', '') }
- end
- def complement
- tmp = []
- self.each { |uf| tmp << uf.complement }
- tmp.sort.map { |e| e.tr(' ', '') }
- end
- end
-
- def unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
- uf_ary = UniqueFragments.new
- return uf_ary if hash_of_sequence_ranges_with_cuts == nil or hash_of_sequence_ranges_with_cuts.empty?
-
- hash_of_sequence_ranges_with_cuts.each do |permutation, sr_with_cuts|
- sr_with_cuts.fragments.for_display.each do |fragment|
- uf = UniqueFragment.new
- uf.primary = fragment.primary
- uf.complement = fragment.complement
-
- duplicate = false
- uf_ary.each do |element|
- if (uf.primary == element.primary) and (uf.complement == element.complement)
- duplicate = true
- break
- end
- end
-
- uf_ary << uf unless duplicate
- end
- end
- uf_ary
- end
-
-
- =begin
- Strand = Struct.new(:primary, :complement, :p_left, :p_right, :c_left, :c_right)
-
- def ts_fragments_to_strands( sequence, fragments )
- sequence = Bio::Sequence::NA.new( sequence )
- strands = []
- fragments.each do |f|
- p = sequence[f.p_left..f.p_right]
- c = sequence[f.c_left..f.c_right]
- strands << Strand.new(p, c, f.p_left, f.p_right, f.c_left, f.c_right)
- end
- strands
- end
- =end
-
- # Defines a single enzyme action, in this case being a range that correlates
- # to the DNA sequence that may contain it's own internal cuts.
- class EnzymeAction < SequenceRange
- end
-
- # Creates an array of EnzymeActions based on the DNA sequence and supplied enzymes.
- #
- # +sequence+:: The string of DNA to match the enzyme recognition sites against
- # +args+:: The enzymes to use.
- def create_enzyme_actions( sequence, *args )
- id = 0
- enzyme_actions_that_sometimes_cut = {}
- enzyme_actions_that_always_cut = {}
- indicies_of_sometimes_cut = []
-
- args.each do |enzyme|
- enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
- find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
- enzyme_actions_that_always_cut[id] = enzyme_to_enzyme_action( enzyme, offset )
- id += 1
- end
- end
-
- # enzyme_actions_that_always_cut may lose members, the members to be lost are recorded in indicies_of_sometimes_cut
-
- max = enzyme_actions_that_always_cut.size - 1
- 0.upto(max) do |i|
- enzyme_action = enzyme_actions_that_always_cut[i]
- conflict = false
- other_cut_ranges = {}
- #enzyme_actions.each { |key,enzyme_action| next if i == key; puts "i: #{i}, key: #{key}"; previous_cut_ranges += enzyme_action.cut_ranges }
- # enzyme_actions_that_always_cut.each { |key,i_ea| next if i == key; puts "i: #{i}, key: #{key}"; other_cut_ranges[key] = i_ea.cut_ranges }
- enzyme_actions_that_always_cut.each { |key,i_ea| next if i == key; other_cut_ranges[key] = i_ea.cut_ranges }
- # puts "Enzyme action #{i}:"
- # pp enzyme_actions[i]
- # pp enzyme_action
- # puts "Previous cut ranges:"
- # pp previous_cut_ranges
-
- other_cut_ranges.each do |key, cut_ranges|
- cut_ranges.each do |cut_range|
- next unless cut_range.class == VerticalCutRange # we aren't concerned with horizontal cuts
- previous_cut_left = cut_range.range.first
- previous_cut_right = cut_range.range.last
-
- if (enzyme_action.right <= previous_cut_left) or
- (enzyme_action.left > previous_cut_right) or
- (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
- # no conflict
- # puts "no conflict"
-
- else
- conflict = true
- # puts "conflict"
- #puts "cut range:"
- #pp cut_range
- #puts "enzyme action:"
- #pp enzyme_action
- end
-
- indicies_of_sometimes_cut += [i, key] if conflict == true
- end
- end
-
- # We don't need to make permutations with this enzyme action if it always cuts
- # indicies << i if conflict == false
- end
- # pp indicies_of_sometimes_cut
-
- indicies_of_sometimes_cut.uniq.each do |i|
- enzyme_actions_that_sometimes_cut[i] = enzyme_actions_that_always_cut[i]
- enzyme_actions_that_always_cut.delete(i)
- end
- #puts 'Always cut:'
- #pp enzyme_actions_that_always_cut
- #puts 'Permute:'
- #pp enzyme_actions_that_sometimes_cut
-
- [enzyme_actions_that_sometimes_cut, enzyme_actions_that_always_cut]
- end
-
- # Returns the offsets of the match of a RegExp to a string.
- #
- # +string+:: The string to scan.
- # +re+:: A regexp to use.
- def find_match_locations( string, re )
- md = string.match( re )
- locations = []
- location = -1
- while md
- location += md.pre_match.size + 1
- locations << location
- # md[0] is the same as $&, or "the match" itself
- md = (md[0][1..-1] + md.post_match).match( re )
- end
- locations
- end
-
- # Takes a RestrictionEnzyme and a numerical offset to the sequence and
- # returns an EnzymeAction
- #
- # +enzyme+:: RestrictionEnzyme
- # +offset+:: Numerical offset of where the enzyme action occurs on the seqeunce
- def enzyme_to_enzyme_action( enzyme, offset )
- enzyme_action = EnzymeAction.new(offset, offset + enzyme.primary.size-1, offset, offset + enzyme.complement.size-1)
-
- enzyme.cut_locations.each do |cut_pair|
- p = cut_pair[0]
- c = cut_pair[1]
- if c >= p
- enzyme_action.add_cut_range(offset+p, nil, nil, offset+c)
- else
- enzyme_action.add_cut_range(nil, offset+p, offset+c, nil)
- end
- end
-
- enzyme_action
- end
-
end # Analysis
end # Bio::RestrictionEnzyme
--- 135,138 ----
Index: double_stranded.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme/double_stranded.rb,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** double_stranded.rb 1 Jan 2007 05:07:04 -0000 1.3
--- double_stranded.rb 1 Jan 2007 23:47:27 -0000 1.4
***************
*** 14,17 ****
--- 14,19 ----
require 'bio/db/rebase'
require 'bio/util/restriction_enzyme'
+ require 'bio/util/restriction_enzyme/analysis/sequence_range'
+
require 'bio/util/restriction_enzyme/cut_symbol'
require 'bio/util/restriction_enzyme/single_strand'
***************
*** 96,99 ****
--- 98,102 ----
# Decide if this String is an enzyme name or a pattern
if Bio::RestrictionEnzyme.enzyme_name?( erp )
+ # FIXME we added this to rebase...
# Check if it's a known name
known_enzyme = false
***************
*** 157,160 ****
--- 160,252 ----
!blunt?
end
+
+ # Takes a RestrictionEnzyme object and a numerical offset to the sequence and
+ # returns an EnzymeAction
+ #
+ # +restriction_enzyme+:: RestrictionEnzyme
+ # +offset+:: Numerical offset of where the enzyme action occurs on the seqeunce
+ def create_action_at( offset )
+ #def enzyme_to_enzyme_action( restriction_enzyme, offset )
+ enzyme_action = EnzymeAction.new( offset,
+ offset + @primary.size-1,
+ offset,
+ offset + @complement.size-1)
+
+ @cut_locations.each do |cut_location_pair|
+ # cut_pair is a DoubleStranded::CutLocationPair
+ p, c = cut_location_pair.primary, cut_location_pair.complement
+ if c >= p
+ enzyme_action.add_cut_range(offset+p, nil, nil, offset+c)
+ else
+ enzyme_action.add_cut_range(nil, offset+p, offset+c, nil)
+ end
+ end
+
+ enzyme_action
+ end
+
+ # An EnzymeAction is a way of representing a potential effect that a
+ # RestrictionEnzyme may have on a nucleotide sequence, an 'action'.
+ #
+ # Multiple cuts in multiple locations on a sequence may occur in one
+ # 'action' if it is done by a single enzyme.
+ #
+ # An EnzymeAction is a series of locations that represents where the restriction
+ # enzyme will bind on the sequence, as well as what ranges are cut on the
+ # sequence itself. The complexity is due to the fact that our virtual
+ # restriction enzyme may create multiple segments from its cutting action,
+ # on which another restriction enzyme may operate upon.
+ #
+ # For example, the DNA sequence:
+ #
+ # 5' - G A A T A A A C G A - 3'
+ # 3' - C T T A T T T G C T - 5'
+ #
+ # When mixed with the restriction enzyme with the following cut pattern:
+ #
+ # 5' - A|A T A A A C|G - 3'
+ # +-+ +
+ # 3' - T T|A T T T G|C - 5'
+ #
+ # And also mixed with the restriction enzyme of the following cut pattern:
+ #
+ # 5' - A A|A C - 3'
+ # +-+
+ # 3' - T|T T G - 5'
+ #
+ # Would result in a DNA sequence with these cuts:
+ #
+ # 5' - G A|A T A A|A C|G A - 3'
+ # +-+ +-+ +
+ # 3' - C T T|A T|T T G|C T - 5'
+ #
+ # Or these separate "free-floating" sequences:
+ #
+ # 5' - G A - 3'
+ # 3' - C T T - 5'
+ #
+ # 5' - A T A A - 3'
+ # 3' - A T - 5'
+ #
+ # 5' - A C - 3'
+ # 3' - T T G - 5'
+ #
+ # 5' - G A - 3'
+ # 3' - C T - 5'
+ #
+ # This would be represented by two EnzymeActions - one for each
+ # RestrictionEnzyme.
+ #
+ # To initialize an EnzymeAction you must first instantiate it with the
+ # beginning and ending locations of where it will operate on a nucleotide
+ # sequence.
+ #
+ # Next the ranges of cu
+ #
+ # An EnzymeAction is
+ # Defines a single enzyme action, in this case being a range that correlates
+ # to the DNA sequence that may contain it's own internal cuts.
+ class EnzymeAction < Bio::RestrictionEnzyme::Analysis::SequenceRange
+ end
#########
- Previous message: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme/analysis calculated_cuts.rb, 1.3, 1.4 cut_range.rb, 1.2, 1.3 cut_ranges.rb, 1.3, 1.4 horizontal_cut_range.rb, 1.2, 1.3 sequence_range.rb, 1.4, 1.5 vertical_cut_range.rb, 1.2, 1.3
- Next message: [BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme/range - New directory
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the bioruby-cvs
mailing list