[BioRuby-cvs] bioruby/lib/bio/util/restriction_enzyme analysis.rb, 1.9, 1.10 analysis_basic.rb, 1.2, 1.3

Trevor Wennblom trevor at dev.open-bio.org
Tue Jan 2 06:18:40 UTC 2007


Update of /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme
In directory dev.open-bio.org:/tmp/cvs-serv3143/restriction_enzyme

Modified Files:
	analysis.rb analysis_basic.rb 
Log Message:


Index: analysis_basic.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme/analysis_basic.rb,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** analysis_basic.rb	2 Jan 2007 00:13:07 -0000	1.2
--- analysis_basic.rb	2 Jan 2007 06:18:38 -0000	1.3
***************
*** 81,137 ****
  
      tmp = create_enzyme_actions( sequence, *args )
!     enzyme_actions = tmp[0].merge(tmp[1])
  
!     sr_with_cuts = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
!     enzyme_actions.each do |id, enzyme_action|
        enzyme_action.cut_ranges.each do |cut_range|
!         sr_with_cuts.add_cut_range(cut_range)
        end
      end
  
!     sr_with_cuts.fragments.primary = sequence
!     sr_with_cuts.fragments.complement = sequence.forward_complement
!     unique_fragments_for_display( {0 => sr_with_cuts} )
    end
  
-   #########
-   protected
-   #########
-   
    UniqueFragment = Struct.new(:primary, :complement)
    class UniqueFragments < Array
!     def primary
!       tmp = []
!       self.each { |uf| tmp << uf.primary }
!       tmp.sort.map { |e| e.tr(' ', '') }
!     end
!     def complement
!       tmp = []
!       self.each { |uf| tmp << uf.complement }
!       tmp.sort.map { |e| e.tr(' ', '') }
      end
    end
  
-   def unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
-     uf_ary = UniqueFragments.new
-     return uf_ary if hash_of_sequence_ranges_with_cuts == nil or hash_of_sequence_ranges_with_cuts.empty?
- 
-     hash_of_sequence_ranges_with_cuts.each do |permutation, sr_with_cuts|
-       sr_with_cuts.fragments.for_display.each do |fragment|
-         uf = UniqueFragment.new
-         uf.primary = fragment.primary
-         uf.complement = fragment.complement
  
!         duplicate = false
!         uf_ary.each do |element|
!           if (uf.primary == element.primary) and (uf.complement == element.complement)
!             duplicate = true
!             break
!           end
!         end
  
!         uf_ary << uf unless duplicate
        end
      end
      uf_ary
    end
--- 81,130 ----
  
      tmp = create_enzyme_actions( sequence, *args )
!     #enzyme_actions = tmp[0].merge(tmp[1])
!     enzyme_actions = tmp[0] + tmp[1]
  
!     sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
!     enzyme_actions.each do |enzyme_action|
        enzyme_action.cut_ranges.each do |cut_range|
!         sequence_range.add_cut_range(cut_range)
        end
      end
  
!     sequence_range.fragments.primary = sequence
!     sequence_range.fragments.complement = sequence.forward_complement
!     unique_fragments_for_display( {0 => sequence_range} )
    end
  
    UniqueFragment = Struct.new(:primary, :complement)
+   
    class UniqueFragments < Array
!     def primary; strip_and_sort(:primary); end
!     def complement; strip_and_sort(:complement); end
!     
!     protected
!     
!     def strip_and_sort( sym_strand )
!       self.map {|uf| uf.send( sym_strand ).tr(' ', '') }.sort
      end
    end
+   
+   #########
+   protected
+   #########
  
  
!   # * +hsh+: +Hash+  Key is a permutation ID, if any.  Value is SequenceRange object that has cuts.
!   # 
!   def unique_fragments_for_display( hsh )
!     uf_ary = UniqueFragments.new
!     return uf_ary if hsh == nil
  
!     hsh.each do |permutation_id, sequence_range|
!       sequence_range.fragments.for_display.each do |fragment|
!         # NOTE might not need tr here
!         uf_ary << UniqueFragment.new(fragment.primary.tr(' ', ''), fragment.complement.tr(' ', ''))
        end
      end
+     uf_ary.uniq!
      uf_ary
    end
***************
*** 142,193 ****
    # +args+:: The enzymes to use.
    def create_enzyme_actions( sequence, *args )
!     id = 0
!     enzyme_actions_that_sometimes_cut = {}
!     enzyme_actions_that_always_cut = {}
!     indicies_of_sometimes_cut = []
! 
      args.each do |enzyme|
        enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
        find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
!         #enzyme_actions_that_always_cut[id] = enzyme_to_enzyme_action( enzyme, offset )
!         enzyme_actions_that_always_cut[id] = enzyme.create_action_at( offset )
!         id += 1
        end
      end
  
      # enzyme_actions_that_always_cut may lose members, the members to be lost are recorded in indicies_of_sometimes_cut
  
!     max = enzyme_actions_that_always_cut.size - 1
!     0.upto(max) do |i|
!       enzyme_action = enzyme_actions_that_always_cut[i]
        conflict = false
        other_cut_ranges = {}
-       enzyme_actions_that_always_cut.each { |key,i_ea| next if i == key; other_cut_ranges[key] = i_ea.cut_ranges }
  
        other_cut_ranges.each do |key, cut_ranges|
          cut_ranges.each do |cut_range|
            next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange  # we aren't concerned with horizontal cuts
!           previous_cut_left = cut_range.range.first 
!           previous_cut_right = cut_range.range.last
  
!           if (enzyme_action.right <= previous_cut_left) or
!              (enzyme_action.left > previous_cut_right) or
!              (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
              # no conflict
            else
              conflict = true
            end
! 
!           indicies_of_sometimes_cut += [i, key] if conflict == true
          end
        end
      end
  
!     indicies_of_sometimes_cut.uniq.each do |i|
!       enzyme_actions_that_sometimes_cut[i] = enzyme_actions_that_always_cut[i]
!       enzyme_actions_that_always_cut.delete(i)
!     end
! 
!     [enzyme_actions_that_sometimes_cut, enzyme_actions_that_always_cut]
    end
  
--- 135,209 ----
    # +args+:: The enzymes to use.
    def create_enzyme_actions( sequence, *args )
!     require 'set'
!     always_cut = []
!     indicies_of_sometimes_cut = Set.new
!     
      args.each do |enzyme|
        enzyme = Bio::RestrictionEnzyme.new(enzyme) unless enzyme.class == Bio::RestrictionEnzyme::DoubleStranded
+ 
        find_match_locations( sequence, enzyme.primary.to_re ).each do |offset|
!         always_cut << enzyme.create_action_at( offset )
        end
      end
+     
+     # VerticalCutRange should really be called VerticalAndHorizontalCutRange
+     
+     # * always_cut is now full of EnzymeActions at specific locations across 
+     #   the sequence.
+     # * always_cut will now be examined to see if any EnzymeActions may
+     #   conflict with one another, and if they do they'll be made note of in
+     #   indicies_of_sometimes_cut.  They will then be remove FIXME
+     # * a conflict occurs if another enzyme's bind site is compromised do due
+     #   to another enzyme's cut.  Enzyme's bind sites may overlap and not be
+     #   competitive, however neither bind site may be part of the other
+     #   enzyme's cut or else they do become competitive.
+     # * note that a small enzyme may possibly cut inbetween two cuts far apart
+     #   made by a larger enzyme, this would be a "sometimes" cut since it's
+     #   not guaranteed that the larger enzyme will cut first, therefore there
+     #   is competition.
+     
+ dirty = Set.new
  
+ =begin
+     always_cut.each_with_index do |ea, index|
+       
+     end
+ =end
      # enzyme_actions_that_always_cut may lose members, the members to be lost are recorded in indicies_of_sometimes_cut
  
!     always_cut.each_with_index do |ea, index1|
        conflict = false
        other_cut_ranges = {}
  
+       always_cut.each_with_index do |i_ea, index2|
+         next if index1 == index2
+         other_cut_ranges[index2] = i_ea.cut_ranges 
+       end
+       
        other_cut_ranges.each do |key, cut_ranges|
+         
          cut_ranges.each do |cut_range|
            next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange  # we aren't concerned with horizontal cuts
!           previous_cut_left, previous_cut_right = cut_range.min, cut_range.max
  
!           if (ea.right <= previous_cut_left) or
!              (ea.left > previous_cut_right) or
!              (ea.left > previous_cut_left and ea.right <= previous_cut_right) # in-between cuts
              # no conflict
            else
              conflict = true
            end
!           indicies_of_sometimes_cut += [index1, key] if conflict == true
          end
        end
      end
+     
+     sometimes_cut = always_cut.values_at( *indicies_of_sometimes_cut )
+     always_cut.delete_if {|x| sometimes_cut.include? x }
  
! #puts "Sometimes cut: #{sometimes_cut.size}"
! #puts "Always cut: #{always_cut.size}"
! #puts
!     [sometimes_cut, always_cut]
    end
  

Index: analysis.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/util/restriction_enzyme/analysis.rb,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -d -r1.9 -r1.10
*** analysis.rb	2 Jan 2007 00:13:07 -0000	1.9
--- analysis.rb	2 Jan 2007 06:18:38 -0000	1.10
***************
*** 40,45 ****
    def cut( sequence, *args )
      return nil if !sequence.kind_of?(String) or sequence.empty?
!     hash_of_sequence_ranges_with_cuts = cut_and_return_by_permutations( sequence, *args )
!     unique_fragments_for_display( hash_of_sequence_ranges_with_cuts )
    end
  
--- 40,44 ----
    def cut( sequence, *args )
      return nil if !sequence.kind_of?(String) or sequence.empty?
!     unique_fragments_for_display( cut_and_return_by_permutations( sequence, *args ) )
    end
  
***************
*** 51,121 ****
      return {} if !sequence.kind_of?(String) or sequence.empty?
      sequence = Bio::Sequence::NA.new( sequence )
      enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
!     return {} if enzyme_actions.empty? and initial_cuts.empty?
  
      if enzyme_actions.size > 1
        permutations = permute(enzyme_actions.size)
!     else
!       permutations = []
!     end
! 
!     # Indexed by permutation.
!     hash_of_sequence_ranges_with_cuts = {}
! 
!     if permutations.empty?
!       sr_with_cuts = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
!       initial_cuts.each { |key, enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) } }
!       hash_of_sequence_ranges_with_cuts[0] = sr_with_cuts
!     end
! 
!     permutations.each do |permutation|
!       previous_cut_ranges = []
!       sr_with_cuts = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
!       initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) } }
  
!       permutation.each do |id|
!         enzyme_action = enzyme_actions[id]
  
!         # conflict is false if the current enzyme action may cut in it's range.
!         # conflict is true if it cannot do to a previous enzyme action making
!         # a cut where this enzyme action needs a whole recognition site.
!         conflict = false
  
!         # If current size of enzyme_action overlaps with previous cut_range, don't cut
!         # note that the enzyme action may fall in the middle of a previous enzyme action
!         # so all cut locations must be checked that would fall underneath.
!         previous_cut_ranges.each do |cut_range|
!           next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange  # we aren't concerned with horizontal cuts
!           previous_cut_left = cut_range.range.first 
!           previous_cut_right = cut_range.range.last
  
!           # Keep in mind: 
!           # * The cut location is to the immediate right of the base located at the index.
!           #   ex: at^gc -- the cut location is at index 1
!           # * The enzyme action location is located at the base of the index.
!           #   ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
!           if (enzyme_action.right <= previous_cut_left) or
!              (enzyme_action.left > previous_cut_right) or
!              (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
!             # no conflict
!           else
!             conflict = true
            end
          end
  
!         next if conflict == true
!         enzyme_action.cut_ranges.each { |cut_range| sr_with_cuts.add_cut_range(cut_range) }
!         previous_cut_ranges += enzyme_action.cut_ranges
        end
  
!       hash_of_sequence_ranges_with_cuts[permutation] = sr_with_cuts
!     end
  
!     hash_of_sequence_ranges_with_cuts.each do |permutation, sr_with_cuts|
!       sr_with_cuts.fragments.primary = sequence
!       sr_with_cuts.fragments.complement = sequence.forward_complement
      end
  
!     hash_of_sequence_ranges_with_cuts
    end
  
--- 50,129 ----
      return {} if !sequence.kind_of?(String) or sequence.empty?
      sequence = Bio::Sequence::NA.new( sequence )
+     sequence.freeze
+     
+     # +Hash+ Key is permutation ID, value is SequenceRange
+     my_hash = {}
+     
      enzyme_actions, initial_cuts = create_enzyme_actions( sequence, *args )
!     return my_hash if enzyme_actions.empty? and initial_cuts.empty?
  
      if enzyme_actions.size > 1
        permutations = permute(enzyme_actions.size)
!       
!       permutations.each do |permutation|
!         previous_cut_ranges = []
!         sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new(  0, 
!                                                                             0, 
!                                                                             sequence.size-1, 
!                                                                             sequence.size-1 )
!                                                                             
!         initial_cuts.each { |enzyme_action|
!           raise initial_cuts.inspect
!            
!                             enzyme_action.cut_ranges.each { |cut_range| 
!                                                             sequence_range.add_cut_range(cut_range) } }
  
!         permutation.each do |id|
!           enzyme_action = enzyme_actions[id]
  
!           # conflict is false if the current enzyme action may cut in it's range.
!           # conflict is true if it cannot due to a previous enzyme action making
!           # a cut where this enzyme action needs a whole recognition site.
!           conflict = false
  
!           # If current size of enzyme_action overlaps with previous cut_range, don't cut
!           # note that the enzyme action may fall in the middle of a previous enzyme action
!           # so all cut locations must be checked that would fall underneath.
!           previous_cut_ranges.each do |cut_range|
!             next unless cut_range.class == Bio::RestrictionEnzyme::Range::VerticalCutRange  # we aren't concerned with horizontal cuts
!             previous_cut_left = cut_range.range.first 
!             previous_cut_right = cut_range.range.last
  
!             # Keep in mind: 
!             # * The cut location is to the immediate right of the base located at the index.
!             #   ex: at^gc -- the cut location is at index 1
!             # * The enzyme action location is located at the base of the index.
!             #   ex: atgc -- 0 => 'a', 1 => 't', 2 => 'g', 3 => 'c'
!             if (enzyme_action.right <= previous_cut_left) or
!                (enzyme_action.left > previous_cut_right) or
!                (enzyme_action.left > previous_cut_left and enzyme_action.right <= previous_cut_right) # in between cuts
!               # no conflict
!             else
!               conflict = true
!             end
            end
+ 
+           next if conflict == true
+           enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) }
+           previous_cut_ranges += enzyme_action.cut_ranges        
          end
  
!         sequence_range.fragments.primary = sequence
!         sequence_range.fragments.complement = sequence.forward_complement
!         my_hash[permutation] = sequence_range
        end
+       
+     else # !if enzyme_actions.size > 1
+       sequence_range = Bio::RestrictionEnzyme::Range::SequenceRange.new( 0, 0, sequence.size-1, sequence.size-1 )
  
!       #initial_cuts.each { |key, enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } }
!       initial_cuts.each { |enzyme_action| enzyme_action.cut_ranges.each { |cut_range| sequence_range.add_cut_range(cut_range) } }
  
!       sequence_range.fragments.primary = sequence
!       sequence_range.fragments.complement = sequence.forward_complement
!       my_hash[0] = sequence_range
      end
  
!     my_hash
    end
  




More information about the bioruby-cvs mailing list