[BioRuby-cvs] bioruby/lib/bio sequence.rb,0.58.2.4,0.58.2.5
Jan Aerts
aerts at dev.open-bio.org
Fri Feb 15 04:49:39 UTC 2008
Update of /home/repository/bioruby/bioruby/lib/bio
In directory dev.open-bio.org:/tmp/cvs-serv26608
Modified Files:
Tag: BRANCH-biohackathon2008
sequence.rb
Log Message:
Added functionality to convert a Bio::EMBL object into a full-blown Bio::Sequence object that contains features, references and other additional
information.
Index: sequence.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/sequence.rb,v
retrieving revision 0.58.2.4
retrieving revision 0.58.2.5
diff -C2 -d -r0.58.2.4 -r0.58.2.5
*** sequence.rb 15 Feb 2008 03:33:51 -0000 0.58.2.4
--- sequence.rb 15 Feb 2008 04:49:37 -0000 0.58.2.5
***************
*** 13,16 ****
--- 13,17 ----
#
+ require 'erb'
require 'bio/sequence/compat'
***************
*** 73,76 ****
--- 74,79 ----
include Format
+ attr_accessor :sequence_version, :topology, :molecule_type, :data_class, :division, :primary_accession, :secondary_accessions, :date_created, :date_modified, :species, :classification
+
# Create a new Bio::Sequence object
#
***************
*** 165,181 ****
# ---
# *Arguments*:
! # * (required) _style_: :fasta, :genbank, *or* :embl
# *Returns*:: String object
! def output(style)
! case style
! when :fasta
! format_fasta
! when :gff
! format_gff
! when :genbank
! format_genbank
! when :embl
! format_embl
! end
end
--- 168,176 ----
# ---
# *Arguments*:
! # * (required) _format_: :fasta, :genbank, *or* :embl
# *Returns*:: String object
! def output(format = :fasta)
! record_template = ERB.new(File.read(File.dirname(__FILE__) + "/db/#{format.to_s}/format.erb"))
! record_template.result(binding)
end
***************
*** 372,375 ****
--- 367,375 ----
end
+
+ def accessions
+ return [@primary_accession, @secondary_accessions].flatten
+ end
+
end # Sequence
***************
*** 380,510 ****
if __FILE__ == $0
! puts "== Test Bio::Sequence::NA.new"
! p Bio::Sequence::NA.new('')
! p na = Bio::Sequence::NA.new('atgcatgcATGCATGCAAAA')
! p rna = Bio::Sequence::NA.new('augcaugcaugcaugcaaaa')
!
! puts "\n== Test Bio::Sequence::AA.new"
! p Bio::Sequence::AA.new('')
! p aa = Bio::Sequence::AA.new('ACDEFGHIKLMNPQRSTVWYU')
!
! puts "\n== Test Bio::Sequence#to_s"
! p na.to_s
! p aa.to_s
!
! puts "\n== Test Bio::Sequence#subseq(2,6)"
! p na
! p na.subseq(2,6)
!
! puts "\n== Test Bio::Sequence#[2,6]"
! p na
! p na[2,6]
!
! puts "\n== Test Bio::Sequence#to_fasta('hoge', 8)"
! puts na.to_fasta('hoge', 8)
!
! puts "\n== Test Bio::Sequence#window_search(15)"
! p na
! na.window_search(15) {|x| p x}
!
! puts "\n== Test Bio::Sequence#total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})"
! p na.total({'a'=>0.1,'t'=>0.2,'g'=>0.3,'c'=>0.4})
!
! puts "\n== Test Bio::Sequence#composition"
! p na
! p na.composition
! p rna
! p rna.composition
!
! puts "\n== Test Bio::Sequence::NA#splicing('complement(join(1..5,16..20))')"
! p na
! p na.splicing("complement(join(1..5,16..20))")
! p rna
! p rna.splicing("complement(join(1..5,16..20))")
!
! puts "\n== Test Bio::Sequence::NA#complement"
! p na.complement
! p rna.complement
! p Bio::Sequence::NA.new('tacgyrkmhdbvswn').complement
! p Bio::Sequence::NA.new('uacgyrkmhdbvswn').complement
!
! puts "\n== Test Bio::Sequence::NA#translate"
! p na
! p na.translate
! p rna
! p rna.translate
!
! puts "\n== Test Bio::Sequence::NA#gc_percent"
! p na.gc_percent
! p rna.gc_percent
!
! puts "\n== Test Bio::Sequence::NA#illegal_bases"
! p na.illegal_bases
! p Bio::Sequence::NA.new('tacgyrkmhdbvswn').illegal_bases
! p Bio::Sequence::NA.new('abcdefghijklmnopqrstuvwxyz-!%#$@').illegal_bases
!
! puts "\n== Test Bio::Sequence::NA#molecular_weight"
! p na
! p na.molecular_weight
! p rna
! p rna.molecular_weight
!
! puts "\n== Test Bio::Sequence::NA#to_re"
! p Bio::Sequence::NA.new('atgcrymkdhvbswn')
! p Bio::Sequence::NA.new('atgcrymkdhvbswn').to_re
! p Bio::Sequence::NA.new('augcrymkdhvbswn')
! p Bio::Sequence::NA.new('augcrymkdhvbswn').to_re
!
! puts "\n== Test Bio::Sequence::NA#names"
! p na.names
!
! puts "\n== Test Bio::Sequence::NA#pikachu"
! p na.pikachu
!
! puts "\n== Test Bio::Sequence::NA#randomize"
! print "Orig : "; p na
! print "Rand : "; p na.randomize
! print "Rand : "; p na.randomize
! print "Rand : "; p na.randomize.randomize
! print "Block : "; na.randomize do |x| print x end; puts
!
! print "Orig : "; p rna
! print "Rand : "; p rna.randomize
! print "Rand : "; p rna.randomize
! print "Rand : "; p rna.randomize.randomize
! print "Block : "; rna.randomize do |x| print x end; puts
!
! puts "\n== Test Bio::Sequence::NA.randomize(counts)"
! print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'t'=>40}
! print "Rand : "; p Bio::Sequence::NA.randomize(counts)
! print "Count : "; p counts = {'a'=>10,'c'=>20,'g'=>30,'u'=>40}
! print "Rand : "; p Bio::Sequence::NA.randomize(counts)
! print "Block : "; Bio::Sequence::NA.randomize(counts) {|x| print x}; puts
!
! puts "\n== Test Bio::Sequence::AA#codes"
! p aa
! p aa.codes
!
! puts "\n== Test Bio::Sequence::AA#names"
! p aa
! p aa.names
!
! puts "\n== Test Bio::Sequence::AA#molecular_weight"
! p aa.subseq(1,20)
! p aa.subseq(1,20).molecular_weight
!
! puts "\n== Test Bio::Sequence::AA#randomize"
! aaseq = 'MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDA'
! s = Bio::Sequence::AA.new(aaseq)
! print "Orig : "; p s
! print "Rand : "; p s.randomize
! print "Rand : "; p s.randomize
! print "Rand : "; p s.randomize.randomize
! print "Block : "; s.randomize {|x| print x}; puts
! puts "\n== Test Bio::Sequence::AA.randomize(counts)"
! print "Count : "; p counts = s.composition
! print "Rand : "; puts Bio::Sequence::AA.randomize(counts)
! print "Block : "; Bio::Sequence::AA.randomize(counts) {|x| print x}; puts
end
--- 380,404 ----
if __FILE__ == $0
! require 'bio'
! seq = Bio::Sequence.new('aattaaaacgccacgcaaggcgattctaggaaatcaaaacgacacgaaatgtggggtgggtgtttgggtaggaaagacagttgtcaacatcagggatttggattgaatcaaaaaaaaagtccttagatttcataaaagctaatcacgcctcaaaactggggcctatctcttcttttttgtcgcttcctgtcggtccttctctatttcttctccaacccctcatttttgaatatttacataacaaaccgttttactttctttggtcaaaattagacccaaaattctatattagtttaagatatgtggtctgtaatttattgttgtattgatataaaaattagttataagcgattatatttttatgctcaagtaactggtgttagttaactatattccaccacgataacctgattacataaaatatgattttaatcattttagtaaaccatatcgcacgttggatgattaattttaacggtttaataacacgtgattaaattatttttagaatgattatttacaaacggaaaagctatatgtgacacaataactcgtgcagtattgttagtttgaaaagtgtatttggtttcttatatttggcctcgattttcagtttatgtgctttttacaaagttttattttcgttatctgtttaacgcgacatttgttgtatggctttaccgatttgagaataaaatcatattacctttatgtagccatgtgtggtgtaatatataataatggtccttctacgaaaaaagcagatcacaattgaaataaagggtgaaatttggtgtcccttttcttcgtcgaaataacagaactaaataaaagaaagtgttatagtatattacgtccgaagaataatccatattcctgaaatacagtcaacatattatatatttagtactttatataaagttaggaattaaatcatatgttttatcgaccatattaagt!
cacaactttatcataaattaatctgtaattagaattccaagttcgccaccgaatttcgtaacctaatctacatataatagataaaatatatatatgtagagtaattatgatatctatgtatgtagtcatggtatatgaattttgaaattggcaaggtaacattgacggatcgtaacccaacaaataatattaattacaaaatgggtgggcgggaatagtatacaactcataattccactcactttttgtattattaggatatgaaataagagtaatcaacatgcataataaagatgtataatttcttcatcttaaaaaacataactacatggtttaatacacaattttaccttttatcaaaaaagtatttcacaattcactcgcaaattacgaaatgatggctagtgcttcaactccaaatttcgaatattttaaatcacgatgtgtagaaccttttatttactggatactaatcactagtttattgagccaaccaattagttaaatagaacaatcaatattatagccagatattttttcctttaaaaatatttaaaagaggggccagaaaagaaccagagagggaggccatgagacattattatcactagtcaaaaacaacaaaccctccttttgctttttcatataaattattatattttattttgcaggtttcttctcttcttcttcttcttcttcttcttcttcctcttggctgctttctttcatcatccataaagtgaaagctaacgcatagagagagccatatcgtcccaaaaaaagcaaaagtccaaaaaaaaacaactccaaaacattctctcttagctctttactctttagtttctctctctctctctgcctttctctttgttgaagttcatggatgctacgaagtggactcaggtacgtaaaaagatatctctctgctatatctgtttgtttgtagcttctccccgactctcacgctctctctctctctctctctctc!
tttgtgtatctctctactcacataaatatatacatgtgtgtgtatgcatgtttatatgtatgtatgaaac
cagtagtggttatacagatagtctatatagagatatcaatatgatgtgttttaatttagactttttatatatccgtttgaaacttccgaagttctcgaatggagttaaggaagttttgttctctacaagttcaatttttcttgtcattaattataaaactctgataactaatggataaaaaaggtatgctttgttagttaccttttgttcttggtgctcaggtcttaccatttttttcctaaattttaattagtctcctttctttaattaattttatgttaacgcactgacgatttaacgttaacaaaaaaacctagattctttttcttttcaatagagcataattattacttcaatttcatttatctcacactaaaccctaatcttggcgaaattccttttatatatataaatttaattaatttttccacaatcttggcggaattcaggactcggttttgcttgttattgttctctcttttaatttgacatggttagggaatacttaaagtatgtcttaattttatagggttttcaagaaatgataaacgtaaagccaatggagcaaatgatttctagcaccaacaacaacacaccgcaacaacaaccaacattcatcgccaccaacacaaggccaaacgccaccgcatccaatggtggctccggaggaaataccaacaacacggctacgatggaaactagaaaggcgaggccacaagagaaagtaaattgtccaagatgcaactcaacaaacacaaagttctgttattacaacaactacagtctcacgcaaccaagatacttctgcaaaggttgtcgaaggtattggaccgaaggtggctctcttcgtaacgtcccagtcggaggtagctcaagaaagaacaagagatcctctacacctttagcttcaccttctaatcccaaacttccagatctaaacccaccgattcttttctcaagccaaatccctaataagtcaaataaagatc!
tcaacttgctatctttcccggtcatgcaagatcatcatcatcatggtatgtctcatttttttcatatgcccaagatagagaacaacaatacttcatcctcaatctatgcttcatcatctcctgtctcagctcttgagcttctaagatccaatggagtctcttcaagaggcatgaacacgttcttgcctggtcaaatgatggattcaaactcagtcctgtactcatctttagggtttccaacaatgcctgattacaaacagagtaataacaacctttcattctccattgatcatcatcaagggattggacataacaccatcaacagtaaccaaagagctcaagataacaatgatgacatgaatggagcaagtagggttttgttccctttttcagacatgaaagagctttcaagcacaacccaagagaagagtcatggtaataatacatattggaatgggatgttcagtaatacaggaggatcttcatggtgaaaaaaggttaaaaagagctcatgaactatcagctttcttctctttttctgtttttttctcctattttattatagtttttactttgatgatcttttgttttttctcacatggggaactttacttaaagttgtcagaacttagtttacagattgtctttttattccttctttctggttttccttttttcctttttttatcagtctttttaaaatatgtatttcataattgggtttgatcattcatatttattagtatcaaaatagagtctatgttcatgagggagtgttaaggggtgtgagggtagaagaataagtgaatacgggggcccg')
! seq.entry_id = 'AJ224122'
! seq.sequence_version = 3
! seq.topology = 'linear'
! seq.molecule_type = 'genomic DNA'
! seq.data_class = 'STD'
! seq.division = 'PLN'
! seq.primary_accession = 'AJ224122'
! seq.secondary_accessions = []
! seq.date_created = '27-FEB-1998 (Rel. 54, Created)'
! seq.date_modified = '14-NOV-2006 (Rel. 89, Last updated, Version 6)'
! seq.definition = 'Arabidopsis thaliana DAG1 gene'
! seq.keywords = ['BBFa gene', 'transcription factor']
! seq.species = 'Arabidopsis thaliana (thale cress)'
! seq.classification = ['Eukaryota', 'Viridiplantae', 'Streptophyta', 'Embryophyta', 'Tracheophyta',
! 'Spermatophyta', 'Magnoliophyta', 'eudicotyledons', 'core eudicotyledons', 'rosids',
! 'eurosids II', 'Brassicales', 'Brassicaceae', 'Arabidopsis']
! # puts seq.output(:embl)
! puts seq.output(:fasta)
end
More information about the bioruby-cvs
mailing list