[BioRuby-cvs] bioruby/lib/bio/sequence common.rb, 1.6, 1.6.2.1 format.rb, 1.4.2.3, 1.4.2.4

Jan Aerts aerts at dev.open-bio.org
Wed Feb 20 09:56:24 UTC 2008


Update of /home/repository/bioruby/bioruby/lib/bio/sequence
In directory dev.open-bio.org:/tmp/cvs-serv15755/lib/bio/sequence

Modified Files:
      Tag: BRANCH-biohackathon2008
	common.rb format.rb 
Log Message:
* Rewrote some of the code for converting EMBL files into Bio::Sequence.
* Added functionality to export Bio::Sequence to EMBL format.

Changes:
* renamed Sequence::Format#wrap and #fold to String#wrap and #fold (stored in bio.rb)
* lib/bio/db/common.rb:
    - rewrote def ref and def references
    - added to_biosequence
    - def references now returns an Array instead of a Bio::References object (tests changed accordingly)
* lib/bio/db/embl/embl.rb
    - def ft now returns Array instead of Bio::Features object (tests changed accordingly)
* lib/bio/db/embl/format.erb
* lib/bio/sequence/common.rb
    - added def format_embl


Index: format.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/format.rb,v
retrieving revision 1.4.2.3
retrieving revision 1.4.2.4
diff -C2 -d -r1.4.2.3 -r1.4.2.4
*** format.rb	15 Feb 2008 02:18:21 -0000	1.4.2.3
--- format.rb	20 Feb 2008 09:56:22 -0000	1.4.2.4
***************
*** 31,106 ****
  #   puts s.output(:embl)
  module Format
- 
-   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
-   # case, it would be difficult to successfully call this method outside
-   # its expected context).
-   #
-   # Output the FASTA format string of the sequence.  
-   #
-   # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of 
-   # using either the header or width arguments.  So something needs to be
-   # changed...
-   #
-   # Currently, this method is used in Bio::Sequence#output like so,
-   #
-   #   s = Bio::Sequence.new('atgc')
-   #   puts s.output(:fasta)                   #=> "> \natgc\n"
-   # ---
-   # *Arguments*:
-   # * (optional) _header_: String (default nil)
-   # * (optional) _width_: Fixnum (default nil)
-   # *Returns*:: String object
-   def format_fasta(header = nil, width = nil)
-     header ||= "#{@entry_id} #{@definition}"
- 
-     ">#{header}\n" +
-     if width
-       @seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
-     else
-       @seq.to_s + "\n"
-     end
-   end
- 
-   # Not yet implemented :)
-   # Remove the nodoc command after implementation!
-   # ---
-   # *Returns*:: String object
-   def format_gff #:nodoc:
-     raise NotImplementedError
-   end
- 
-   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
-   # case, it would be difficult to successfully call this method outside
-   # its expected context).
-   #
-   # Output the Genbank format string of the sequence.  
-   # Used in Bio::Sequence#output.
-   # ---
-   # *Returns*:: String object
-   def format_genbank
-     prefix = ' ' * 5
-     indent = prefix + ' ' * 16
-     fwidth = 79 - indent.length
- 
-     format_features(prefix, indent, fwidth)
-   end
- 
-   # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
-   # case, it would be difficult to successfully call this method outside
-   # its expected context).
-   #
-   # Output the EMBL format string of the sequence.  
-   # Used in Bio::Sequence#output.
-   # ---
-   # *Returns*:: String object
-   def format_embl
-     prefix = 'FT   '
-     indent = prefix + ' ' * 16
-     fwidth = 80 - indent.length
- 
-     format_features(prefix, indent, fwidth)
-   end
- 
- 
    private
  
--- 31,34 ----
***************
*** 114,123 ****
  
        head = ''
!       wrap(position, width).each_line do |line|
          result << head << line
          head = indent
        end
  
!       result << format_qualifiers(feature.qualifiers, width)
      end
      return result
--- 42,51 ----
  
        head = ''
!       (position).wrap(width).each_line do |line|
          result << head << line
          head = indent
        end
  
!       result << format_qualifiers(feature.qualifiers, indent, width)
      end
      return result
***************
*** 130,136 ****
  
        if v == true
!         lines = wrap('/' + q, width)
        elsif q == 'translation'
!         lines = fold('/' + q + '=' + v, width)
        else
          if v[/\D/]
--- 58,64 ----
  
        if v == true
!         lines =('/' + q).wrap(width)
        elsif q == 'translation'
!         lines = ('/' + q + '="' + v + '"').fold(width)
        else
          if v[/\D/]
***************
*** 139,143 ****
            v = '"' + v + '"'
          end
!         lines = wrap('/' + q + '=' + v, width)
        end
  
--- 67,71 ----
            v = '"' + v + '"'
          end
!         lines = ('/' + q + '=' + v).wrap(width)
        end
  
***************
*** 147,177 ****
    end
  
-   def fold(str, width)
-     str.gsub(Regexp.new("(.{1,#{width}})"), "\\1\n")
-   end
- 
-   def wrap(str, width)
-     result = []
-     left = str.dup
-     while left and left.length > width
-       line = nil
-       width.downto(1) do |i|
-         if left[i..i] == ' ' or /[\,\;]/ =~ left[(i-1)..(i-1)]  then
-           line = left[0..(i-1)].sub(/ +\z/, '')
-           left = left[i..-1].sub(/\A +/, '')
-           break
-         end
-       end
-       if line.nil? then
-         line = left[0..(width-1)]
-         left = left[width..-1]
-       end
-       result << line
-     end
-     result << left if left
-     result_string = result.join("\n")
-     result_string << "\n" unless result_string.empty?
-     return result_string
-   end
  
  end # Format
--- 75,78 ----

Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/common.rb,v
retrieving revision 1.6
retrieving revision 1.6.2.1
diff -C2 -d -r1.6 -r1.6.2.1
*** common.rb	27 Dec 2007 17:36:02 -0000	1.6
--- common.rb	20 Feb 2008 09:56:22 -0000	1.6.2.1
***************
*** 38,42 ****
  #   puts dna.randomize
  module Common
! 
    # Return sequence as 
    # String[http://corelib.rubyonrails.org/classes/String.html].
--- 38,42 ----
  #   puts dna.randomize
  module Common
!   
    # Return sequence as 
    # String[http://corelib.rubyonrails.org/classes/String.html].
***************
*** 66,69 ****
--- 66,86 ----
      self.class.new(self)
    end
+   
+   def format_embl
+     output_lines = Array.new
+     counter = 0
+     remainder = self.window_search(60,60) do |subseq|
+       counter += 60
+       subseq.gsub!(/(.{10})/, '\1 ')
+       output_lines.push(' '*5 + subseq + counter.to_s.rjust(9))
+     end
+     counter += remainder.length
+     remainder = (remainder.to_s + ' '*(60-remainder.length))
+     remainder.gsub!(/(.{10})/, '\1 ')
+     output_lines.push(' '*5 + remainder + counter.to_s.rjust(9))
+     return output_lines.join("\n")
+   end
+ 
+ 
  
    # Normalize the current sequence, removing all whitespace and 




More information about the bioruby-cvs mailing list