[BioRuby-cvs] bioruby/lib/bio/util sirna.rb,1.1,1.2

Katayama Toshiaki k at pub.open-bio.org
Tue Aug 9 01:39:28 EDT 2005


Update of /home/repository/bioruby/bioruby/lib/bio/util
In directory pub.open-bio.org:/tmp/cvs-serv625

Modified Files:
	sirna.rb 
Log Message:
* clean up by katayama


Index: sirna.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/util/sirna.rb,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** sirna.rb	9 Aug 2005 03:50:36 -0000	1.1
--- sirna.rb	9 Aug 2005 05:39:26 -0000	1.2
***************
*** 1,6 ****
  #
! # bio/util/sirna.rb - Class for Designing small inhibitory RNAs
  #
! #   Copyright (C) 2004 Itoshi NIKAIDO <itoshi.nikaido at nifty.com>
  #
  #  This library is free software; you can redistribute it and/or
--- 1,6 ----
  #
! # bio/util/sirna.rb - Class for designing small inhibitory RNAs
  #
! #   Copyright (C) 2004, 2005  Itoshi NIKAIDO <dritoshi at gmail.com>
  #
  #  This library is free software; you can redistribute it and/or
***************
*** 24,27 ****
--- 24,28 ----
  
  module Bio
+ 
    class SiRNA
  
***************
*** 33,149 ****
        @min_gc_percent = min_gc_percent
      end
!     attr_accessor :antisense_size
!     
      def uitei?(target)
!       return false unless /^.{2}[G|C]/i =~ target.to_s
!       return false unless /[A|U].{2}$/i =~ target.to_s
  
        one_third  = target.size * 1 / 3
!       start = @target_size - one_third - 1
!       one_third_seq = target.subseq(start, @target_size - 2)
!       gc = 0
!       one_third_seq.scan(/[A|U]/i) { gc += 1 }
!       return false if gc < 5
    
-       return false if /[G|C]{9}/i =~ target
-       return false if /[G|C]{9}/i =~ target.complement.rna
        return true
      end
!     
      def reynolds?(target)
!       return false if /[G|C]{9}/i =~ target
!       return false if /[G|C]{9}/i =~ target.complement.rna
!       if /^.{4}A.{6}U.{2}[A|U|C].{5}[A|U].{2}$/i =~ target.to_s
!         return true
!       else
! 	return false
!       end
      end
  
      def design(rule = 'uitei')
        @target_size = @antisense_size + 2
-       bp = 0
-       @seq.window_search(@target_size) do |target|
-         bp += 1
  
          antisense = target.subseq(1, @target_size - 2).complement.rna
          sense     = target.subseq(3, @target_size)
  
!         target_start = bp
!         target_stop  = bp + @target_size
  	antisense_gc_percent = antisense.gc_percent
  	next if antisense_gc_percent > @max_gc_percent
  	next if antisense_gc_percent < @min_gc_percent
  	
!         if rule == 'uitei'
!   	  next if uitei?(target) == false
! 	elsif rule == 'reynolds'
!     	  next if reynolds?(target) == false
          else
! 	  next
  	end
  
          pair = Bio::SiRNA::Pair.new(target, sense, antisense, target_start, target_stop, rule, antisense_gc_percent)
  	@pairs.push(pair)
! 	
!       end #window_search
        return @pairs
!     end #design
  
      class Pair
  
        def initialize(target, sense, antisense, start, stop, rule, gc_percent)
! 	@target    = target
! 	@sense     = sense
! 	@antisense = antisense
! 	@start = start
! 	@stop  = stop
! 	@rule  = rule
  	@gc_percent = gc_percent
        end
!       
!       attr_accessor :target
!       attr_accessor :sense
!       attr_accessor :antisense
!       attr_accessor :start, :stop
!       attr_accessor :rule
!       attr_accessor :gc_percent
! 
!       def as_human_readable_text
!         # human readable report
! 	report =  "--\n"
! 	report << 'start: ' + @start.to_s + "\n"
! 	report << 'stop:  ' + @stop.to_s  + "\n"
! 	report << 'rule:  ' + @rule.to_s  + "\n"
! 	report << 'gc_percent:  ' + @gc_percent.to_s  + "\n"
! 	report << 'target:    '        + @target.upcase + "\n"
! 	report << 'sense:     ' + '  ' + @sense.upcase  + "\n"
! 	report << 'antisense: '        + @antisense.reverse.upcase + "\n"
  
!         # computer parseble
!         # puts antisense
!         # puts target_start
!         # puts target_stop
        end
-       alias :to_s :as_human_readable_text
  
!     end #class Bio::SiRNA::Pair
      
      class ShRNA
      
!       def initialize(pair, method_name)
          @pair = pair
-         @method_name = method_name		
- 	@top_strand_shrna    = nil
- 	@bottom_strand_shrna = nil
- 	@loop = nil
        end
!       attr_accessor :method_name
!       attr_accessor :top_strand_shrna
!       attr_accessor :bottom_strand_shrna
!       attr_accessor :loop
!       
!       def design
!         if @method_name == 'BLOCK-iT'
  	  block_it
  	else
--- 34,143 ----
        @min_gc_percent = min_gc_percent
      end
!     attr_accessor :antisense_size, :max_gc_percent, :min_gc_percent
! 
      def uitei?(target)
!       return false unless /^.{2}[GC]/i =~ target
!       return false unless /[AU].{2}$/i =~ target
!       return false if     /[GC]{9}/i   =~ target
!       return false if     /[GC]{9}/i   =~ target.complement.rna # same as above?
  
        one_third  = target.size * 1 / 3
!       start_pos  = @target_size - one_third - 1
!       remain_seq = target.subseq(start_pos, @target_size - 2)
!       gc_number  = remain_seq.scan(/[AU]/i).size
!       return false if gc_number < 5
    
        return true
      end
! 
      def reynolds?(target)
!       return false if /[GC]{9}/i =~ target
!       return false if /[GC]{9}/i =~ target.complement.rna # means same as above?
!       return false unless /^.{4}A.{6}U.{2}[AUC].{5}[AU].{2}$/i =~ target
!       return true
!     end
! 
!     def uitei
!       design('uitei')
!     end
! 
!     def reynolds
!       design('reynolds')
      end
  
      def design(rule = 'uitei')
        @target_size = @antisense_size + 2
  
+       target_start = 0
+       @seq.window_search(@target_size) do |target|
          antisense = target.subseq(1, @target_size - 2).complement.rna
          sense     = target.subseq(3, @target_size)
  
!         target_start += 1
!         target_stop  = target_start + @target_size
! 
  	antisense_gc_percent = antisense.gc_percent
  	next if antisense_gc_percent > @max_gc_percent
  	next if antisense_gc_percent < @min_gc_percent
  	
!         case rule
!         when 'uitei'
!   	  next unless uitei?(target)
!         when 'reynolds'
!     	  next unless reynolds?(target)
          else
!           raise NotImplementedError
  	end
  
          pair = Bio::SiRNA::Pair.new(target, sense, antisense, target_start, target_stop, rule, antisense_gc_percent)
  	@pairs.push(pair)
!       end
        return @pairs
!     end
! 
  
      class Pair
  
        def initialize(target, sense, antisense, start, stop, rule, gc_percent)
! 	@target     = target
! 	@sense      = sense
! 	@antisense  = antisense
! 	@start      = start
! 	@stop       = stop
! 	@rule       = rule
  	@gc_percent = gc_percent
        end
!       attr_accessor :target, :sense, :antisense, :start, :stop, :rule, :gc_percent
  
!       # human readable report
!       def report
! 	report =  "### siRNA\n"
! 	report << 'Start: ' + @start.to_s + "\n"
! 	report << 'Stop:  ' + @stop.to_s  + "\n"
! 	report << 'Rule:  ' + @rule.to_s  + "\n"
! 	report << 'GC %:  ' + @gc_percent.to_s  + "\n"
! 	report << 'Target:    '        + @target.upcase + "\n"
! 	report << 'Sense:     ' + '  ' + @sense.upcase  + "\n"
! 	report << 'Antisense: '        + @antisense.reverse.upcase + "\n"
        end
  
!       # computer parsable report
!       #def to_s
!       #  [ @antisense, @start, @stop ].join("\t")
!       #end
! 
!     end #class Pair
! 
      
      class ShRNA
      
!       def initialize(pair)
          @pair = pair
        end
!       attr_accessor :top_strand, :bottom_strand
! 
!       def design(method = 'BLOCK-iT')
!         case method
!         when 'BLOCK-iT'
  	  block_it
  	else
***************
*** 151,203 ****
          end
        end
-       
-       def block_it
-         top_strand_shrna_overhang    = Bio::Sequence::NA.new('CACC')
-         bottom_strand_shrna_overhang = Bio::Sequence::NA.new('AAAA')
- #        loop = Bio::Sequence::NA.new('CGAA')  # From BLOCK-iT's manual
-         @loop = Bio::Sequence::NA.new('GTGTGCTGTCC')   # From piGENE document
  
!         if /^[G|g]/ =~ @pair.sense
!   	  @top_strand_shrna    = top_strand_shrna_overhang    + @pair.sense + loop + @pair.sense.complement
! 	  @bottom_strand_shrna = bottom_strand_shrna_overhang + @pair.sense + loop.complement + @pair.sense.complement
  	else
!   	  @top_strand_shrna    = top_strand_shrna_overhang    + 'G' + @pair.sense + loop            + @pair.sense.complement
! 	  @bottom_strand_shrna = bottom_strand_shrna_overhang +       @pair.sense + loop.complement + @pair.sense.complement + 'C'
  	end
- 	
- #	@top_strand_shrna    = Bio::Sequence::NA.new(@top_strand_shrna).dna!
- #	@bottom_strand_shrna = Bio::Sequence::NA.new(@bottom_strand_shrna).dna!
        end
        
!       def as_human_readable_text
!         report = ''
! #        report << 'Top Strand shRNA:    ' + @top_strand_shrna.upcase.gsub(/G/, 'g') + "\n"
! #        report << 'Bottom Strand shRNA: ' + @bottom_strand_shrna.upcase.gsub(/G/, 'g') + "\n"
!         report << 'Top Strand shRNA:    ' + @top_strand_shrna.upcase    + "\n"
!         report << 'Bottom Strand shRNA: ' + @bottom_strand_shrna.upcase + "\n"
!         report << 'Size of Top Strand shRNA:    ' + @top_strand_shrna.size.to_s    + ' nt' + "\n"
!         report << 'Size of Bottom Strand shRNA: ' + @bottom_strand_shrna.size.to_s + ' nt' + "\n"
!         report << "5'-" + @top_strand_shrna.upcase            + "-3'" + "\n"
!         report << "    3'-" + @bottom_strand_shrna.reverse.upcase + "-5'" + "\n"
        end
-       alias :to_s :as_human_readable_text
  
!     end #class Bio::SiRNA::ShRNA
    end #class SiRNA
  end #module bio
  
  if __FILE__ == $0
    
!   input_seq = ARGF.read
!   seq = Bio::Sequence::NA.new(input_seq)
!   sirna_designer = Bio::SiRNA.new(seq)
!   pairs = sirna_designer.design(rule = 'uitei') # or (rule = 'reynolds')  
    pairs.each do |pair|
!     shRNA = Bio::SiRNA::ShRNA.new(pair, 'BLOCK-iT')
!     shRNA.design
  
!     puts pair.as_human_readable_text            
!     puts shRNA.as_human_readable_text
!     puts [shRNA.top_strand_shrna.dna!, shRNA.bottom_strand_shrna.dna!].join("\t")
    end
  
--- 145,209 ----
          end
        end
  
!       def block_it(method = 'piGENE')
!         top = Bio::Sequence::NA.new('CACC')	# top_strand_shrna_overhang
!         bot = Bio::Sequence::NA.new('AAAA')	# bottom_strand_shrna_overhang
!         fwd = @pair.sense
!         rev = @pair.sense.complement
! 
!         case method
!         when 'BLOCK-iT'
!           # From BLOCK-iT's manual
!           loop_fwd = Bio::Sequence::NA.new('CGAA')
!           loop_rev = loop_fwd.complement
!         when 'piGENE'
!           # From piGENE document
!           loop_fwd = Bio::Sequence::NA.new('GTGTGCTGTCC')
!           loop_rev = loop_fwd.complement
!         else
!           raise NotImplementedError
!         end
! 
!         if /^G/i =~ fwd
!   	  @top_strand    = top + fwd + loop_fwd + rev
! 	  @bottom_strand = bot + fwd + loop_rev + rev
  	else
!   	  @top_strand    = top + 'G' + fwd + loop_fwd + rev
! 	  @bottom_strand = bot + fwd + loop_rev + rev + 'C'
  	end
        end
        
!       def report
!         report = "### shRNA\n"
!         report << "Top strand shRNA (#{@top_strand.length} nt):\n"
!         report << "  5'-#{@top_strand.upcase}-3'\n"
!         report << "Bottom strand shRNA (#{@bottom_strand.length} nt):\n"
!         report << "      3'-#{@bottom_strand.reverse.upcase}-5'\n"
        end
  
!     end #class ShRNA
! 
    end #class SiRNA
+ 
  end #module bio
  
+ 
  if __FILE__ == $0
    
!   seq = Bio::Sequence::NA.new(ARGF.read)
! 
!   sirna = Bio::SiRNA.new(seq)
!   pairs = sirna.design		# or .design('uitei') or .uitei or .reynolds
! 
    pairs.each do |pair|
!     puts pair.report
  
!     shrna = Bio::SiRNA::ShRNA.new(pair)
!     shrna.design		# or .design('BLOCK-iT') or .block_it
!     puts shrna.report
! 
!     puts "# as DNA"
!     puts shrna.top_strand.dna
!     puts shrna.bottom_strand.dna
    end
  
***************
*** 223,232 ****
      Nature Biotech. 2004 22: 326-330.
  
! -- Bio::SiRNA.new(seq, antisense_size, max_gc_percent, min_gc_percent)
  
! -- Bio::SiRNA#design(rule)
! -- Bio::SiRNA#antisense_size
! -- Bio::SiRNA#max_gc_percent
! -- Bio::SiRNA#min_gc_percent
  
  == Bio::SiRNA::Pair
--- 229,249 ----
      Nature Biotech. 2004 22: 326-330.
  
! --- Bio::SiRNA.new(seq, antisense_size, max_gc_percent, min_gc_percent)
  
! --- Bio::SiRNA#design(rule)
! 
!   rule can be one of 'uitei' (default) and 'reynolds'.
! 
! --- Bio::SiRNA#uitei
! 
!   same as design('uitei')
! 
! --- Bio::SiRNA#reynolds
! 
!   same as design('reynolds')
! 
! --- Bio::SiRNA#antisense_size
! --- Bio::SiRNA#max_gc_percent
! --- Bio::SiRNA#min_gc_percent
  
  == Bio::SiRNA::Pair
***************
*** 239,258 ****
  --- Bio::SiRNA::Pair#start
  --- Bio::SiRNA::Pair#stop
! --- Bio::SiRNA::Pair#as_human_readable_text
  
! = Bio::ShRNA
  
      Input is a Bio::SiRNA::Pair object (the target sequence).
-     Output is a list of Bio::SiRNA::Pair object.
  
! -- Bio::ShRNA.new(pair, 'design rule name')
  
! -- Bio::ShRNA#design(rule)
! -- Bio::ShRNA#antisense_size
! -- Bio::ShRNA#max_gc_percent
! -- Bio::ShRNA#min_gc_percent
  
  
! -- ChangeLog
  
    2005/03/21 Itoshi NIKAIDO <itoshi.nikaido at nifty.com>
--- 256,283 ----
  --- Bio::SiRNA::Pair#start
  --- Bio::SiRNA::Pair#stop
! --- Bio::SiRNA::Pair#rule
! --- Bio::SiRNA::Pair#report
  
! == Bio::SiRNA::ShRNA
  
      Input is a Bio::SiRNA::Pair object (the target sequence).
  
! --- Bio::ShRNA.new(pair)
  
! --- Bio::ShRNA#design(rule)
  
+   only the 'BLOCK-iT' rule is implemented for now
  
! --- Bio::ShRNA#block_it(method)
! 
!   same as design('BLOCK-iT').
!   method can be one of 'piGENE' (default) and 'BLOCK-iT'.
! 
! --- Bio::ShRNA#top_strand
! --- Bio::ShRNA#bottom_strand
! --- Bio::ShRNA#report
! 
! 
! === ChangeLog
  
    2005/03/21 Itoshi NIKAIDO <itoshi.nikaido at nifty.com>



More information about the bioruby-cvs mailing list