[BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme analysis.rb, 1.9, 1.10 analysis_basic.rb, 1.2, 1.3
Trevor Wennblom
trevor at dev.open-bio.org
Tue Jan 2 06:18:40 UTC 2007
Update of /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme
In directory dev.open-bio.org:/tmp/cvs-serv3143/restriction_enzyme
Modified Files:
analysis.rb analysis_basic.rb
Log Message:
Index: analysis_basic.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme/analysis_basic.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** analysis_basic.rb 2 Jan 2007 00:13:07 -0000 1.2
--- analysis_basic.rb 2 Jan 2007 06:18:38 -0000 1.3
***************
*** 81,137 ****
tmp = create_enzyme_actions( sequence, *args )
! enzyme_actions = tmp[0].merge(tmp[1])
! sr_with_cuts = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
! enzyme_actions.each do |id, enzyme_action|
enzyme_action.cut_ranges.each do |cut_range|
! sr_with_cuts.add_cut_range(cut_range)
end
end
! sr_with_cuts.fragments.primary = sequence
! sr_with_cuts.fragments.complement = sequence.forward_complement
! unique_fragments_for_display( {0 => sr_with_cuts} )
end
- #########
- protected
- #########
-
UniqueFragment = Struct.new(:primary, :complement)
class UniqueFragments < Array
! def primary
! tmp = []
! self.each { |uf| tmp << uf.primary }
! tmp.sort.map { |e| e.tr(' ', '') }
! end
! def complement
! tmp = []
! self.each { |uf| tmp << uf.complement }
! tmp.sort.map { |e| e.tr(' ', '') }
end
end
- def unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
- uf_ary = UniqueFragments.new
- return uf_ary if hash_of_sequence_ranges_with_cuts == nil or hash_of_sequence_ranges_with_cuts.empty?
-
- hash_of_sequence_ranges_with_cuts.each do |permutation, sr_with_cuts|
- sr_with_cuts.fragments.for_display.each do |fragment|
- uf = UniqueFragment.new
- uf.primary = fragment.primary
- uf.complement = fragment.complement
! duplicate = false
! uf_ary.each do |element|
! if (uf.primary == element.primary) and (uf.complement == element.complement)
! duplicate = true
! break
! end
! end
! uf_ary << uf unless duplicate
end
end
uf_ary
end
--- 81,130 ----
tmp = create_enzyme_actions( sequence, *args )
! #enzyme_actions = tmp[0].merge(tmp[1])
! enzyme_actions = tmp[0] + tmp[1]
! sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
! enzyme_actions.each do |enzyme_action|
enzyme_action.cut_ranges.each do |cut_range|
! sequence_range.add_cut_range(cut_range)
end
end
! sequence_range.fragments.primary = sequence
! sequence_range.fragments.complement = sequence.forward_complement
! unique_fragments_for_display( {0 => sequence_range} )
end
UniqueFragment = Struct.new(:primary, :complement)
+
class UniqueFragments < Array
! def primary; strip_and_sort(:primary); end
! def complement; strip_and_sort(:complement); end
!
! protected
!
! def strip_and_sort( sym_strand )
! self.map {|uf| uf.send( sym_strand ).tr(' ', '') }.sort
end
end
+
+ #########
+ protected
+ #########
! # * +hsh+: +Hash+ Key is a permutation ID, if any. Value is SequenceRange object that has cuts.
! #
! def unique_fragments_for_display( hsh )
! uf_ary = UniqueFragments.new
! return uf_ary if hsh == nil
! hsh.each do |permutation_id, sequence_range|
! sequence_range.fragments.for_display.each do |fragment|
! # NOTE might not need tr here
! uf_ary << UniqueFragment.new(fragment.primary.tr(' ', ''), fragment.complement.tr(' ', ''))
end
end
+ uf_ary.uniq!
uf_ary
end
***************
*** 142,193 ****
# +args+:: The enzymes to use.
def create_enzyme_actions( sequence, *args )
! id = 0
! enzyme_actions_that_sometimes_cut = {}
! enzyme_actions_that_always_cut = {}
! indicies_of_sometimes_cut = []
!
args.each do |enzyme|
enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
! #enzyme_actions_that_always_cut[id] = enzyme_to_enzyme_action( enzyme, offset )
! enzyme_actions_that_always_cut[id] = enzyme.create_action_at( offset )
! id += 1
end
end
# enzyme_actions_that_always_cut may lose members, the members to be lost are recorded in indicies_of_sometimes_cut
! max = enzyme_actions_that_always_cut.size - 1
! 0.upto(max) do |i|
! enzyme_action = enzyme_actions_that_always_cut[i]
conflict = false
other_cut_ranges = {}
- enzyme_actions_that_always_cut.each { |key,i_ea| next if i == key; other_cut_ranges[key] = i_ea.cut_ranges }
other_cut_ranges.each do |key, cut_ranges|
cut_ranges.each do |cut_range|
next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange # we aren't concerned with horizontal cuts
! previous_cut_left = cut_range.range.first
! previous_cut_right = cut_range.range.last
! if (enzyme_action.right <= previous_cut_left) or
! (enzyme_action.left > previous_cut_right) or
! (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
# no conflict
else
conflict = true
end
!
! indicies_of_sometimes_cut += [i, key] if conflict == true
end
end
end
! indicies_of_sometimes_cut.uniq.each do |i|
! enzyme_actions_that_sometimes_cut[i] = enzyme_actions_that_always_cut[i]
! enzyme_actions_that_always_cut.delete(i)
! end
!
! [enzyme_actions_that_sometimes_cut, enzyme_actions_that_always_cut]
end
--- 135,209 ----
# +args+:: The enzymes to use.
def create_enzyme_actions( sequence, *args )
! require 'set'
! always_cut = []
! indicies_of_sometimes_cut = Set.new
!
args.each do |enzyme|
enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
+
find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
! always_cut << enzyme.create_action_at( offset )
end
end
+
+ # VerticalCutRange should really be called VerticalAndHorizontalCutRange
+
+ # * always_cut is now full of EnzymeActions at specific locations across
+ # the sequence.
+ # * always_cut will now be examined to see if any EnzymeActions may
+ # conflict with one another, and if they do they'll be made note of in
+ # indicies_of_sometimes_cut. They will then be remove FIXME
+ # * a conflict occurs if another enzyme's bind site is compromised do due
+ # to another enzyme's cut. Enzyme's bind sites may overlap and not be
+ # competitive, however neither bind site may be part of the other
+ # enzyme's cut or else they do become competitive.
+ # * note that a small enzyme may possibly cut inbetween two cuts far apart
+ # made by a larger enzyme, this would be a "sometimes" cut since it's
+ # not guaranteed that the larger enzyme will cut first, therefore there
+ # is competition.
+
+ dirty = Set.new
+ =begin
+ always_cut.each_with_index do |ea, index|
+
+ end
+ =end
# enzyme_actions_that_always_cut may lose members, the members to be lost are recorded in indicies_of_sometimes_cut
! always_cut.each_with_index do |ea, index1|
conflict = false
other_cut_ranges = {}
+ always_cut.each_with_index do |i_ea, index2|
+ next if index1 == index2
+ other_cut_ranges[index2] = i_ea.cut_ranges
+ end
+
other_cut_ranges.each do |key, cut_ranges|
+
cut_ranges.each do |cut_range|
next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange # we aren't concerned with horizontal cuts
! previous_cut_left, previous_cut_right = cut_range.min, cut_range.max
! if (ea.right <= previous_cut_left) or
! (ea.left > previous_cut_right) or
! (ea.left > previous_cut_left and ea.right <= previous_cut_right) # in-between cuts
# no conflict
else
conflict = true
end
! indicies_of_sometimes_cut += [index1, key] if conflict == true
end
end
end
+
+ sometimes_cut = always_cut.values_at( *indicies_of_sometimes_cut )
+ always_cut.delete_if {|x| sometimes_cut.include? x }
! #puts "Sometimes cut: #{sometimes_cut.size}"
! #puts "Always cut: #{always_cut.size}"
! #puts
! [sometimes_cut, always_cut]
end
Index: analysis.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme/analysis.rb,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** analysis.rb 2 Jan 2007 00:13:07 -0000 1.9
--- analysis.rb 2 Jan 2007 06:18:38 -0000 1.10
***************
*** 40,45 ****
def cut( sequence, *args )
return nil if !sequence.kind_of?(String) or sequence.empty?
! hash_of_sequence_ranges_with_cuts = cut_and_return_by_permutations( sequence, *args )
! unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
end
--- 40,44 ----
def cut( sequence, *args )
return nil if !sequence.kind_of?(String) or sequence.empty?
! unique_fragments_for_display( cut_and_return_by_permutations( sequence, *args ) )
end
***************
*** 51,121 ****
return {} if !sequence.kind_of?(String) or sequence.empty?
sequence = Bio::Sequence::NA.new( sequence )
enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
! return {} if enzyme_actions.empty? and initial_cuts.empty?
if enzyme_actions.size > 1
permutations = permute(enzyme_actions.size)
! else
! permutations = []
! end
!
! # Indexed by permutation.
! hash_of_sequence_ranges_with_cuts = {}
!
! if permutations.empty?
! sr_with_cuts = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
! initial_cuts.each { |key, enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) } }
! hash_of_sequence_ranges_with_cuts[0] = sr_with_cuts
! end
!
! permutations.each do |permutation|
! previous_cut_ranges = []
! sr_with_cuts = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
! initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) } }
! permutation.each do |id|
! enzyme_action = enzyme_actions[id]
! # conflict is false if the current enzyme action may cut in it's range.
! # conflict is true if it cannot do to a previous enzyme action making
! # a cut where this enzyme action needs a whole recognition site.
! conflict = false
! # If current size of enzyme_action overlaps with previous cut_range, don't cut
! # note that the enzyme action may fall in the middle of a previous enzyme action
! # so all cut locations must be checked that would fall underneath.
! previous_cut_ranges.each do |cut_range|
! next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange # we aren't concerned with horizontal cuts
! previous_cut_left = cut_range.range.first
! previous_cut_right = cut_range.range.last
! # Keep in mind:
! # * The cut location is to the immediate right of the base located at the index.
! # ex: at^gc -- the cut location is at index 1
! # * The enzyme action location is located at the base of the index.
! # ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
! if (enzyme_action.right <= previous_cut_left) or
! (enzyme_action.left > previous_cut_right) or
! (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
! # no conflict
! else
! conflict = true
end
end
! next if conflict == true
! enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) }
! previous_cut_ranges += enzyme_action.cut_ranges
end
! hash_of_sequence_ranges_with_cuts[permutation] = sr_with_cuts
! end
! hash_of_sequence_ranges_with_cuts.each do |permutation, sr_with_cuts|
! sr_with_cuts.fragments.primary = sequence
! sr_with_cuts.fragments.complement = sequence.forward_complement
end
! hash_of_sequence_ranges_with_cuts
end
--- 50,129 ----
return {} if !sequence.kind_of?(String) or sequence.empty?
sequence = Bio::Sequence::NA.new( sequence )
+ sequence.freeze
+
+ # +Hash+ Key is permutation ID, value is SequenceRange
+ my_hash = {}
+
enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
! return my_hash if enzyme_actions.empty? and initial_cuts.empty?
if enzyme_actions.size > 1
permutations = permute(enzyme_actions.size)
!
! permutations.each do |permutation|
! previous_cut_ranges = []
! sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0,
! 0,
! sequence.size-1,
! sequence.size-1 )
!
! initial_cuts.each { |enzyme_action|
! raise initial_cuts.inspect
!
! enzyme_action.cut_ranges.each { |cut_range|
! sequence_range.add_cut_range(cut_range) } }
! permutation.each do |id|
! enzyme_action = enzyme_actions[id]
! # conflict is false if the current enzyme action may cut in it's range.
! # conflict is true if it cannot due to a previous enzyme action making
! # a cut where this enzyme action needs a whole recognition site.
! conflict = false
! # If current size of enzyme_action overlaps with previous cut_range, don't cut
! # note that the enzyme action may fall in the middle of a previous enzyme action
! # so all cut locations must be checked that would fall underneath.
! previous_cut_ranges.each do |cut_range|
! next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange # we aren't concerned with horizontal cuts
! previous_cut_left = cut_range.range.first
! previous_cut_right = cut_range.range.last
! # Keep in mind:
! # * The cut location is to the immediate right of the base located at the index.
! # ex: at^gc -- the cut location is at index 1
! # * The enzyme action location is located at the base of the index.
! # ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
! if (enzyme_action.right <= previous_cut_left) or
! (enzyme_action.left > previous_cut_right) or
! (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
! # no conflict
! else
! conflict = true
! end
end
+
+ next if conflict == true
+ enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) }
+ previous_cut_ranges += enzyme_action.cut_ranges
end
! sequence_range.fragments.primary = sequence
! sequence_range.fragments.complement = sequence.forward_complement
! my_hash[permutation] = sequence_range
end
+
+ else # !if enzyme_actions.size > 1
+ sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
! #initial_cuts.each { |key, enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } }
! initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } }
! sequence_range.fragments.primary = sequence
! sequence_range.fragments.complement = sequence.forward_complement
! my_hash[0] = sequence_range
end
! my_hash
end
More information about the bioruby-cvs
mailing list