[BioRuby-cvs] bioruby/lib/bio/sequence common.rb, 1.6, 1.6.2.1 format.rb, 1.4.2.3, 1.4.2.4
Jan Aerts
aerts at dev.open-bio.org
Wed Feb 20 09:56:24 UTC 2008
Update of /home/repository/bioruby/bioruby/lib/bio/sequence
In directory dev.open-bio.org:/tmp/cvs-serv15755/lib/bio/sequence
Modified Files:
Tag: BRANCH-biohackathon2008
common.rb format.rb
Log Message:
* Rewrote some of the code for converting EMBL files into Bio::Sequence.
* Added functionality to export Bio::Sequence to EMBL format.
Changes:
* renamed Sequence::Format#wrap and #fold to String#wrap and #fold (stored in bio.rb)
* lib/bio/db/common.rb:
- rewrote def ref and def references
- added to_biosequence
- def references now returns an Array instead of a Bio::References object (tests changed accordingly)
* lib/bio/db/embl/embl.rb
- def ft now returns Array instead of Bio::Features object (tests changed accordingly)
* lib/bio/db/embl/format.erb
* lib/bio/sequence/common.rb
- added def format_embl
Index: format.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/format.rb,v
retrieving revision 1.4.2.3
retrieving revision 1.4.2.4
diff -C2 -d -r1.4.2.3 -r1.4.2.4
*** format.rb 15 Feb 2008 02:18:21 -0000 1.4.2.3
--- format.rb 20 Feb 2008 09:56:22 -0000 1.4.2.4
***************
*** 31,106 ****
# puts s.output(:embl)
module Format
-
- # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
- # case, it would be difficult to successfully call this method outside
- # its expected context).
- #
- # Output the FASTA format string of the sequence.
- #
- # UNFORTUNATLY, the current implementation of Bio::Sequence is incapable of
- # using either the header or width arguments. So something needs to be
- # changed...
- #
- # Currently, this method is used in Bio::Sequence#output like so,
- #
- # s = Bio::Sequence.new('atgc')
- # puts s.output(:fasta) #=> "> \natgc\n"
- # ---
- # *Arguments*:
- # * (optional) _header_: String (default nil)
- # * (optional) _width_: Fixnum (default nil)
- # *Returns*:: String object
- def format_fasta(header = nil, width = nil)
- header ||= "#{@entry_id} #{@definition}"
-
- ">#{header}\n" +
- if width
- @seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
- else
- @seq.to_s + "\n"
- end
- end
-
- # Not yet implemented :)
- # Remove the nodoc command after implementation!
- # ---
- # *Returns*:: String object
- def format_gff #:nodoc:
- raise NotImplementedError
- end
-
- # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
- # case, it would be difficult to successfully call this method outside
- # its expected context).
- #
- # Output the Genbank format string of the sequence.
- # Used in Bio::Sequence#output.
- # ---
- # *Returns*:: String object
- def format_genbank
- prefix = ' ' * 5
- indent = prefix + ' ' * 16
- fwidth = 79 - indent.length
-
- format_features(prefix, indent, fwidth)
- end
-
- # INTERNAL USE ONLY, YOU SHOULD NOT CALL THIS METHOD. (And in any
- # case, it would be difficult to successfully call this method outside
- # its expected context).
- #
- # Output the EMBL format string of the sequence.
- # Used in Bio::Sequence#output.
- # ---
- # *Returns*:: String object
- def format_embl
- prefix = 'FT '
- indent = prefix + ' ' * 16
- fwidth = 80 - indent.length
-
- format_features(prefix, indent, fwidth)
- end
-
-
private
--- 31,34 ----
***************
*** 114,123 ****
head = ''
! wrap(position, width).each_line do |line|
result << head << line
head = indent
end
! result << format_qualifiers(feature.qualifiers, width)
end
return result
--- 42,51 ----
head = ''
! (position).wrap(width).each_line do |line|
result << head << line
head = indent
end
! result << format_qualifiers(feature.qualifiers, indent, width)
end
return result
***************
*** 130,136 ****
if v == true
! lines = wrap('/' + q, width)
elsif q == 'translation'
! lines = fold('/' + q + '=' + v, width)
else
if v[/\D/]
--- 58,64 ----
if v == true
! lines =('/' + q).wrap(width)
elsif q == 'translation'
! lines = ('/' + q + '="' + v + '"').fold(width)
else
if v[/\D/]
***************
*** 139,143 ****
v = '"' + v + '"'
end
! lines = wrap('/' + q + '=' + v, width)
end
--- 67,71 ----
v = '"' + v + '"'
end
! lines = ('/' + q + '=' + v).wrap(width)
end
***************
*** 147,177 ****
end
- def fold(str, width)
- str.gsub(Regexp.new("(.{1,#{width}})"), "\\1\n")
- end
-
- def wrap(str, width)
- result = []
- left = str.dup
- while left and left.length > width
- line = nil
- width.downto(1) do |i|
- if left[i..i] == ' ' or /[\,\;]/ =~ left[(i-1)..(i-1)] then
- line = left[0..(i-1)].sub(/ +\z/, '')
- left = left[i..-1].sub(/\A +/, '')
- break
- end
- end
- if line.nil? then
- line = left[0..(width-1)]
- left = left[width..-1]
- end
- result << line
- end
- result << left if left
- result_string = result.join("\n")
- result_string << "\n" unless result_string.empty?
- return result_string
- end
end # Format
--- 75,78 ----
Index: common.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence/common.rb,v
retrieving revision 1.6
retrieving revision 1.6.2.1
diff -C2 -d -r1.6 -r1.6.2.1
*** common.rb 27 Dec 2007 17:36:02 -0000 1.6
--- common.rb 20 Feb 2008 09:56:22 -0000 1.6.2.1
***************
*** 38,42 ****
# puts dna.randomize
module Common
!
# Return sequence as
# String[http://corelib.rubyonrails.org/classes/String.html].
--- 38,42 ----
# puts dna.randomize
module Common
!
# Return sequence as
# String[http://corelib.rubyonrails.org/classes/String.html].
***************
*** 66,69 ****
--- 66,86 ----
self.class.new(self)
end
+
+ def format_embl
+ output_lines = Array.new
+ counter = 0
+ remainder = self.window_search(60,60) do |subseq|
+ counter += 60
+ subseq.gsub!(/(.{10})/, '\1 ')
+ output_lines.push(' '*5 + subseq + counter.to_s.rjust(9))
+ end
+ counter += remainder.length
+ remainder = (remainder.to_s + ' '*(60-remainder.length))
+ remainder.gsub!(/(.{10})/, '\1 ')
+ output_lines.push(' '*5 + remainder + counter.to_s.rjust(9))
+ return output_lines.join("\n")
+ end
+
+
# Normalize the current sequence, removing all whitespace and
More information about the bioruby-cvs
mailing list