[BioRuby-cvs] bioruby/lib/bio/db/biosql sequence.rb,NONE,1.1.2.1

Raoul Jean Pierre Bonnal helios at dev.open-bio.org
Tue Mar 25 15:47:04 UTC 2008


Update of /home/repository/bioruby/bioruby/lib/bio/db/biosql
In directory dev.open-bio.org:/tmp/cvs-serv13290/lib/bio/db/biosql

Added Files:
      Tag: BRANCH-biohackathon2008
	sequence.rb 
Log Message:
BioSQL release "MIFI". biosql->biosequence, biosequence->biosql. Supported formats: Embl, Genbank; support sql stransactions creating new sequences on biosql; does not support references and comments for genbank and embl. Fasta->biosequence->biosql dosn't work.

--- NEW FILE: sequence.rb ---

#TODO save on db reading from a genbank or embl object
module Bio
  class SQL 
    
    class Sequence
      private
      #      example
      #      bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type'
      #      this function creates other 3 functions, molecule_type, molecule_type=, molecule_type_update
      #molecule_type => return an array of strings, where each string is the value associated with the qualifier, ordered by rank.
      #molecule_type=value add a bioentry_qualifier value to the table
      #molecule_type_update(value, rank) update an entry of the table with an existing rank
      #the method inferr the qualifier term from the name of the first symbol, or you can specify a synonym to use
      
      #creating an object with to_biosql is transaction safe.
      
      #TODO: implement setting for more than a qualifier-vale. 
      def self.bioentry_qualifier_anchor(sym, *args)
        options = args.first || Hash.new
        #options.assert_valid_keys(:rank,:synonym,:multi)
        method_reader = sym.to_s.to_sym
        method_writer_operator = (sym.to_s+"=").to_sym
        method_writer_modder = (sym.to_s+"_update").to_sym
        synonym = options[:synonym].nil? ? sym.to_s : options[:synonym]
        
        #Bio::SQL::Term.create(:name=>synonym, :ontology=> Bio::SQL::Ontology.find_by_name('Annotation Tags')) unless Bio::SQL::Term.exists?(:name =>synonym)
        send :define_method, method_reader do
          #return an array of bioentry_qualifier_values
          begin
            term  = Term.find_or_create_by_name(:name => synonym, :ontology=> Ontology.find_by_name('Annotation Tags'))
            bioentry_qualifier_values = @entry.bioentry_qualifier_values.find_all_by_term_id(term)          
            bioentry_qualifier_values.map{|row| row.value} unless bioentry_qualifier_values.nil?
          rescue Exception => e 
            puts "Reader Error: #{synonym} #{e.message}"
          end
        end
        
        send :define_method, method_writer_operator do |value|
          begin
            term  = Term.find_or_create_by_name(:name => synonym, :ontology=> Ontology.find_by_name('Annotation Tags'))
            datas = @entry.bioentry_qualifier_values.find_all_by_term_id(term.term_id)
            #add an element incrementing the rank or setting the first to 1
            @entry.bioentry_qualifier_values.create(:term_id=>term.term_id, :rank=>datas.empty? ? 1 : datas.last.rank.succ, :value=>value)
          rescue Exception => e 
            puts "WriterOperator= Error: #{synonym} #{e.message}"
          end
        end
        
        send :define_method, method_writer_modder do |value, rank|
          begin
            term  = Term.find_or_create_by_name(:name => synonym, :ontology=> Ontology.find_by_name('Annotation Tags'))
            data = @entry.bioentry_qualifier_values.find_by_term_id_and_rank(term.term_id, rank)
            if data.nil?
              send method_writer_operator, value
            else
              data.value=value
              data.save!
            end
          rescue Exception => e
            puts "WriterModder Error: #{synonym} #{e.message}"
          end
        end
        
      end
      public
      attr_reader :entry
      
      def delete
        @entry.destroy
      end
      
      def get_seqfeature(sf)
        
        #in seqfeature BioSQL class
        locations_str = sf.locations.map{|loc| loc.to_s}.join(',')
        #pp sf.locations.inspect
        locations_str = "join(#{locations_str})" if sf.locations.count>1 
        Bio::Feature.new(sf.type_term.name, locations_str,sf.seqfeature_qualifier_values.collect{|sfqv| Bio::Feature::Qualifier.new(sfqv.term.name,sfqv.value)})
      end
      
      def length=(len)
        @entry.biosequence.length=len
      end
      
      def initialize(options={})
        options.assert_valid_keys(:entry, :biodatabase_id,:biosequence)
        return @entry = options[:entry] unless options[:entry].nil?
        return to_biosql(options[:biosequence], options[:biodatabase_id]) unless options[:biosequence].nil? or options[:biodatabase_id].nil?
      end
      
      def to_biosql(bs,biodatabase_id)
        #Transcaction works greatly!!!
        
        #
        begin
          Bioentry.transaction do           
            @entry = Bioentry.new(:biodatabase_id=>biodatabase_id, :name=>bs.entry_id)
            #            pp "primary"
            self.primary_accession = bs.primary_accession
            #            pp "def"
            self.definition = bs.definition unless bs.definition.nil?
            #            pp "seqver"
            self.sequence_version = bs.sequence_version
            #            pp "divi"
            self.division = bs.division unless bs.division.nil?
            @entry.save!
            #            pp "secacc"
            bs.secondary_accessions.each do |sa|
              #write as qualifier every secondary accession into the array
              self.secondary_accessions = sa
            end
            #to create the sequence entry needs to exists
            #            pp "seq"
            self.seq = bs.seq unless bs.seq.nil?
            #            pp "mol"
            self.molecule_type = bs.molecule_type unless bs.molecule_type.nil?
            #            pp "dc"
            self.data_class = bs.data_class unless bs.data_class.nil?
            #            pp "top"
            self.topology = bs.topology unless bs.topology.nil?
            #            pp "datec"
            self.date_created = bs.date_created unless bs.date_created.nil?
            #            pp "datemod"
            self.date_modified = bs.date_modified unless bs.date_modified.nil?
            #            pp "key"
            bs.keywords.each do |kw|
              #write as qualifier every secondary accessions into the array
              self.keywords = kw
            end
            #FIX: problem settinf texon_name: embl has "Arabidopsis thaliana (thale cress)" but in taxon_name table there isn't this name. I must check if there is a new version of the table
            #pp "spec"        
            self.species = bs.species unless bs.species.nil?
            #            pp "Debug: #{bs.species}"
            #            pp "feat"
            bs.features.each do |feat|
              self.feature=feat
            end
            
            #TODO: add comments and references
            
          end #transaction
          return self
        rescue Exception => e
          pp "to_biosql exception: #{e}"
        end
      end #to_biosql
      
      
      def name 
        @entry.name
      end
      alias entry_id name
      
      def name=(value)
        @entry.name=value
      end
      alias entry_id= name=
      
      def primary_accession
        @entry.accession
      end
      
      def primary_accession=(value)
        @entry.accession=value
      end
      
      #TODO      def secondary_accession
      #        @entry.bioentry_qualifier_values
      #      end
      
      def organism
        @entry.taxon.nil? ? "" : @entry.taxon.taxon_scientific_name.name
      end
      alias species organism
      
      def organism=(value)
        taxon_name=TaxonName.find_by_name_and_name_class(value,'scientific name')
        if taxon_name.nil?
          puts "Error value doesn't exists in taxon_name table with scientific name constraint."
        else
          @entry.taxon_id=taxon_name.taxon_id
          @entry.save!
        end
      end
      alias species= organism=
      
      def database
        @entry.biodatabase.name
      end
      
      def database_desc
        @entry.biodatabase.description
      end
      
      def version
        @entry.version
      end
      alias sequence_version version 
      
      def version=(value)
        @entry.version=value
      end
      alias sequence_version= version=
      
      def division
        @entry.division
      end
      def division=(value)
        @entry.division=value
      end
      
      def description
        @entry.description
      end
      alias definition description
      
      def description=(value)
        @entry.description=value
      end
      alias definition= description=
      
      def identifier
        @entry.identifier
      end
      
      def identifier=(value)
        @entry.identifier=value
      end
      
      bioentry_qualifier_anchor :data_class
      bioentry_qualifier_anchor :molecule_type, :synonym=>'mol_type'
      bioentry_qualifier_anchor :topology
      bioentry_qualifier_anchor :date_created      
      bioentry_qualifier_anchor :date_modified, :synonym=>'date_changed'
      bioentry_qualifier_anchor :keywords, :synonym=>'keyword'
      bioentry_qualifier_anchor :secondary_accessions, :synonym=>'secondary_accession'
      
      def features
        Bio::Features.new(@entry.seqfeatures.collect {|sf|
          self.get_seqfeature(sf)})
      end
      
      def feature=(feat)
        #TODO: fix ontology_id and source_term_id 
        type_term = Term.find_or_create_by_name(:name=>feat.feature, :ontology_id=>1)
        seqfeature = Seqfeature.new(:bioentry=>@entry, :source_term_id=>2, :type_term=>type_term, :rank=>@entry.seqfeatures.count.succ, :display_name=>'')
        seqfeature.save!        
        feat.locations.each do |loc|
          location = Location.new(:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand, :rank=>seqfeature.locations.count.succ)
          location.save!
        end
        feat.each do |qualifier|
          qual_term = Term.find_or_create_by_name(:name=>qualifier.qualifier, :ontology_id=>3)
          qual = SeqfeatureQualifierValue.new(:seqfeature=>seqfeature, :term=>qual_term, :value=>qualifier.value, :rank=>seqfeature.seqfeature_qualifier_values.count.succ)
          qual.save!          
        end
      end
      
      
      def seq
        Bio::Sequence.auto(@entry.biosequence.seq) unless @entry.biosequence.nil?
      end	
      
      def seq=(value)
        #chk which type of alphabet is, NU/NA/nil
        #value could be nil ? I think no.
        if @entry.biosequence.nil?
          @entry.biosequence = Biosequence.new(:seq=>value)
          @entry.biosequence.save!
        else
          @entry.biosequence.seq=value
        end
        
        self.length=value.length
      end
      
      def taxonomy
        tax = []
        taxon = @entry.taxon
        while taxon and taxon.taxon_id != taxon.parent_taxon_id
          tax << taxon.taxon_scientific_name.name
          #Note: I don't like this call very much, correct with a relationship in the ref class.
          taxon = Taxon.find(taxon.parent_taxon_id)
        end
        tax.reverse
      end
      
      def length 
        @entry.biosequence.length
      end
      
      def references
        #return and array of hash, hash has these keys ["title", "dbxref_id", "reference_id", "authors", "crc", "location"]
        #probably would be better to d a class refrence to collect these informations
        @entry.bioentry_references.collect do |ref|
          hash = Hash.new
          hash['authors'] = ref.reference.authors
          hash['title'] = ref.reference.title
          hash['embl_gb_record_number'] = ref.reference.rank
          #about location/journal take a look to hilmar' schema overview. 
          #TODO: solve the problem with specific comment per reference.
          #TODO: get dbxref
          hash['journal'] = ref.reference.location
          hash['xrefs'] = "#{ref.reference.dbxref.dbname}; #{ref.reference.dbxref.accession}."
          Bio::Reference.new(hash)
        end        
      end
      
      def comments
        @entry.comments.map do |comment|
          comment.comment_text
        end
      end
      
      
      def save
        #I should add chks for SQL errors
        @entry.biosequence.save
        @entry.save
      end
      def to_fasta
        #prima erano 2 print in stdout, meglio ritornare una stringa in modo che poi ci si possa fare quello che si vuole
        #print ">" + accession + "\n" 
        #print seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
				">" + accession + "\n" + seq.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
      end
      
      def to_fasta_reverse_complememt
				">" + accession + "\n" + seq.reverse_complement.gsub(Regexp.new(".{1,#{60}}"), "\\0\n")
      end
      
      
      # converts Bio::SQL::Sequence to Bio::Sequence
      # ---
      # *Arguments*: 
      # *Returns*:: Bio::Sequence object
      #TODO:      def to_biosequence
      #        sequence = Bio::Sequence.new(seq)
      #        sequence.entry_id = entry_id
      #        
      #        sequence.primary_accession = accession
      #        sequence.secondary_accessions = accession
      #        
      #        sequence.molecule_type = natype
      #        sequence.division = division
      #        sequence.topology = circular
      #        
      #        sequence.sequence_version = version
      #        #sequence.date_created = nil #????
      #        sequence.date_modified = date
      #        
      #        sequence.definition = definition
      #        sequence.keywords = keywords
      #        sequence.species = organism
      #        sequence.classification = self.taxonomy.to_s.sub(/\.\z/, '').split(/\s*\;\s*/)
      #        #sequence.organnella = nil # not used
      #        sequence.comments = comment
      #        sequence.references = references
      #        sequence.features = features
      #        return sequence
      #      end
      #
      #			    def load_fasta(entry, biodatabase)
      #				result=nil
      #			#	    if !entry.accession.nil? then
      #				    ##	pp biodatabase
      #					begin 
      #						Bioentry.transaction do 
      #							bioentry=Bioentry.new(:biodatabase=>biodatabase, :name=>entry.accession, :accession=>entry.accession, \
      #							  :description=>entry.definition, :version=>0)
      #						
      #			#				bioentry=Bioentry.new(:biodatabase=>biodatabase, :name=>entry.accession, :accession=>entry.accession, \
      #			#				  :description=>entry.definition, :version=>entry.acc_version.split(/\./).last, :identifier=>entry.gi)
      #					##		pp bioentry
      #							bioentry.save!
      #							result=bioentry
      #							begin
      #								Biosequence.transaction do
      #									bioentry.biosequence = Biosequence.new(:seq=>entry.seq, :version=>0, :length=>entry.seq.length, :alphabet=>'')
      #									bioentry.biosequence.save!
      #								end #Bioseqence.transaction
      #							rescue Exception => exc
      #								puts "Error Biosequence: #{exc.message}"
      #							end #Rescue Biosequence
      #						end #Bioentry.transaction
      #					rescue ActiveRecord::RecordInvalid => e
      #						puts "Error: Transaction Aborted on class #{e.record.class}, table #{e.record.class.table_name} due to:"
      #						e.record.errors.each{|att, msg|
      #							puts "#{att} => #{msg}" 
      #						}
      #					rescue Exception => exc 
      #						puts "Errore Bioentry: #{exc.message}"
      #					end #Resce Bioentry
      #			#	    end #entry chk
      #				return result
      #			    end #load_fasta
      #
      #			    def load_gb(entry, biodatabase)
      #			##	pp biodatabase
      #				result=nil
      #
      #				begin 
      #				Bioentry.transaction do 
      #					bioentry=Bioentry.new(:biodatabase=>biodatabase, :name=>entry.entry_id, :accession=>entry.entry_id, :division=>entry.division, \
      #				  :description=>entry.definition, :version=>entry.version, :identifier=>entry.gi.split(/:/).last.to_i)
      #			##		pp bioentry
      #					bioentry.save!
      #
      #					result=bioentry
      #
      #			#		end #Bioentry.transaction
      #			##debug	pp ["Bioentry", [:name=>entry.entry_id, :accession=>entry.entry_id, :division=>entry.division,
      #			##  :description=>entry.definition, :version=>entry.version, :identifier=>entry.gi.split(/:/).last.to_i]] 
      #
      #			#delete	biodatabase.bioentries << bioentry
      #				#note Alphabet not defined
      #
      #				begin
      #				rank_comment=1
      #				Comment.transaction do 
      #					if !entry.comment.empty? then
      #						bioentry.comment = Comment.new(:comment_text=>entry.comment, :rank=>rank_comment)
      #						bioentry.comment.save!
      #						rank_comment=rank_comment.next
      #					end
      #				end #Comment.transaction
      #				rescue Exception => exc
      #					puts "Error Comment: #{exc.message}"
      #				end #Rescue Command
      #			#debug	pp "Comment"
      #			##debug  	pp ["Comment", [:comment_text=>entry.comment]] if !entry.comment.empty?
      #				begin
      #				Biosequence.transaction do
      #					bioentry.biosequence = Biosequence.new(:seq=>entry.seq, :version=>0, :length=>entry.seq.length, :alphabet=>'')
      #					bioentry.biosequence.save!
      #				end #Bioseqence.transaction
      #				rescue Exception => exc
      #					puts "Error Biosequence: #{exc.message}"
      #				end #Rescue Biosequence
      #			#debug	pp "Biosequence"
      #			##debug  	pp ["Biosequence", :seq=>entry.seq, :version=>0, :length=>entry.seq.length, :alphabet=>'']
      #				begin
      #				rank_seqfeature=1
      #				Seqfeature.transaction do 
      #					entry.features.each do |feature|
      #					#note Rank default to ZERO, display_name String empty
      #					#note Chek if exists term name
      ##delete puts "Feature #{feature.inspect}"			
      ##delete puts "FeatureFeature #{feature.feature.inspect}"		
      #
      #						type_term = Term.exists?(:name=>feature.feature) ? Term.find_by_name(feature.feature) : Term.create!(:name=>feature.feature, :ontology_id=>1)
      #			#			seqfeature = Seqfeature.new(:bioentry=>bioentry, :source_term_id=>2, :typeterm=>Term.find_by_name(feature.feature), :rank=>rank_seqfeature, :display_name=>'')
      ##delete puts "Type Term #{type_term.inspect}"			
      #						seqfeature = Seqfeature.new(:bioentry=>bioentry, :source_term_id=>2, :type_term=>type_term, :rank=>rank_seqfeature, :display_name=>'')
      ##delete puts "Seqfeature #{seqfeature.inspect}"						
      #						seqfeature.save!
      #			##debug		pp ["Seqfeature", [:source_term_id=>2, :typeterm=>Term.find_by_name(feature.feature), :rank=>0, :display_name=>'']]
      #						begin
      #						Location.transaction do 	
      #							feature.locations.each do |loc|
      #								location = Location.new(:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand)
      #								location.save!
      #			##debug			pp ["Location",[:start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand]]
      #							end #locations
      #						end #Location.transaction
      #						rescue Exception => exc
      #							puts "Error Location: #{exc.message}"
      #						end #Rescue Location
      #			#debug			pp "Locations"
      #			#delete			bioentry.seqfeatures << seqfeature
      ##delete if nil
      #						begin
      #						rank_seqfeaturequalifiervalue=0
      #						rank_qual_qualifier=""
      #						SeqfeatureQualifierValue.transaction do
      #							feature.each do |qual|
      #	
      #							#gestisce il livello dei qualificatori...
      #								if (rank_qual_qualifier==qual.qualifier) then 
      #									rank_seqfeaturequalifiervalue=rank_seqfeaturequalifiervalue.next
      #								else
      #									rank_seqfeaturequalifiervalue=1
      #									rank_qual_qualifier=qual.qualifier
      #								end
      #
      #			##debug			pp ["SeqfeatureQualifierValue",  qual.qualifier, [ :term=>Term.find_by_name(qual.qualifier), :value=>qual.value]]
      #								term = Term.exists?(:name=>qual.qualifier) ? Term.find_by_name(qual.qualifier) : Term.create!(:name=>qual.qualifier, :ontology_id=>3)
      #
      #			#				qual = SeqfeatureQualifierValue.new(:seqfeature=>seqfeature, :term=>Term.find_by_name(qual.qualifier), :value=>qual.value, :rank=>rank_seqfeaturequalifiervalue)
      #								qual = SeqfeatureQualifierValue.new(:seqfeature=>seqfeature, :term=>term, :value=>qual.value, :rank=>rank_seqfeaturequalifiervalue)
      #								qual.save!
      #							end #qualifiers
      #						end #SeqfeatureQualifierValue.transaction
      #						rescue Exception => exc
      #							puts "Error SeqfeatureQualifierValue: #{exc.message}"
      #						end #Rescue SeqfeatureQualifierValue
      ###delete end #debug if nil
      #			#debug			pp "SeqfeatureQualifierValue"
      #					rank_seqfeature=rank_seqfeature.next
      #					end #features
      #				end #Seqfeature.transaction
      #				rescue Exception => exc
      #					puts "Error Seqfeature: #{exc.message}"
      #				end #Rescue Seqfeature
      #
      #				end #Bioentry.transaction
      #				rescue ActiveRecord::RecordInvalid => e
      #					puts "Error: Transaction Aborted on class #{e.record.class}, table #{e.record.class.table_name} due to:"
      #					e.record.errors.each{|att, msg|
      #						puts "#{att} => #{msg}" 
      #					}
      #				rescue Exception => exc 
      #					puts "Errore Bioentry: #{exc.message}"
      #				end #Resce Bioentry
      #				return result
      #			    end #load_gb
      #			    
      #			    def load_embl(entry, biodatabase)
      #			    
      #			#	puts biodatabase
      #				result=nil
      #
      #				begin 
      #				Bioentry.transaction do 
      #					bioentry=Bioentry.new(:biodatabase=>biodatabase, :name=>entry.entry_id, :accession=>entry.entry_id, :division=>entry.division, \
      #				  :description=>entry.definition, :version=>entry.version, :identifier=>entry.entry_id)
      #			#		puts bioentry
      #					bioentry.save!
      #					result=bioentry
      #
      #			#		end #Bioentry.transaction
      #			#	puts ["Bioentry", [:name=>entry.entry_id, :accession=>entry.entry_id, :division=>entry.division,\
      #			#  :description=>entry.definition, :version=>entry.version, :identifier=>entry.entry_id]] 
      #
      #			#delete	biodatabase.bioentries << bioentry
      #				#note Alphabet not defined
      #				begin
      #				rank_comment=1
      #				#qui potrebbero essercene di più
      #				Comment.transaction do 
      #					if !entry.cc.empty?
      #						bioentry.comment = Comment.new(:comment_text=>entry.cc, :rank=>rank_comment)
      #						bioentry.comment.save!
      #						rank_comment=rank_comment.next
      #					end
      #				end #Comment.transaction
      #				rescue Exception => exc
      #					puts "Error Comment: #{exc.message}"
      #				end #Rescue Command
      #			#	puts "Comment"
      #			 # 	puts ["Comment", [:comment_text=>entry.cc]] if !entry.cc.empty?
      #				begin
      #				Biosequence.transaction do
      #					bioentry.biosequence = Biosequence.new(:seq=>entry.seq, :version=>0, :length=>entry.seq.length, :alphabet=>entry.molecule_type)
      #					bioentry.biosequence.save!
      #				end #Bioseqence.transaction
      #				rescue Exception => exc
      #					puts "Error Biosequence: #{exc.message}"
      #				end #Rescue Biosequence
      #			#debug	pp "Biosequence"
      #			##debug  	pp ["Biosequence", :seq=>entry.seq, :version=>0, :length=>entry.seq.length, :alphabet=>'']
      #				begin
      #				rank_seqfeature=1
      #				Seqfeature.transaction do 
      #					entry.features.each do |feature|
      #					#note Rank default to ZERO, display_name String empty
      #					#note Chek if exists term name
      #						type_term = Term.exists?(:name=>feature.feature) ? Term.find_by_name(feature.feature) : Term.create!(:name=>feature.feature, :ontology_id=>1)
      #			#			seqfeature = Seqfeature.new(:bioentry=>bioentry, :source_term_id=>2, :typeterm=>Term.find_by_name(feature.feature), :rank=>rank_seqfeature, :display_name=>'')
      #						seqfeature = Seqfeature.new(:bioentry=>bioentry, :source_term_id=>2, :type_term=>type_term, :rank=>rank_seqfeature, :display_name=>'')
      #						seqfeature.save!
      #			##debug		pp ["Seqfeature", [:source_term_id=>2, :typeterm=>Term.find_by_name(feature.feature), :rank=>0, :display_name=>'']]
      #						begin
      #						Location.transaction do 	
      #							feature.locations.each do |loc|
      #								location = Location.new(:seqfeature=>seqfeature, :start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand)
      #								location.save!
      #			##debug			pp ["Location",[:start_pos=>loc.from, :end_pos=>loc.to, :strand=>loc.strand]]
      #							end #locations
      #						end #Location.transaction
      #						rescue Exception => exc
      #							puts "Error Location: #{exc.message}"
      #						end #Rescue Location
      #			#debug			pp "Locations"
      #			#delete			bioentry.seqfeatures << seqfeature
      #						begin
      #						rank_seqfeaturequalifiervalue=0
      #						rank_qual_qualifier=""
      #						SeqfeatureQualifierValue.transaction do
      #							feature.each do |qual|
      #							#gestisce il livello dei qualificatori...
      #								if (rank_qual_qualifier==qual.qualifier) then 
      #									rank_seqfeaturequalifiervalue=rank_seqfeaturequalifiervalue.next
      #								else
      #									rank_seqfeaturequalifiervalue=1
      #									rank_qual_qualifier=qual.qualifier
      #								end
      #
      #			##debug			pp ["SeqfeatureQualifierValue",  qual.qualifier, [ :term=>Term.find_by_name(qual.qualifier), :value=>qual.value]]
      #								term = Term.exists?(:name=>qual.qualifier) ? Term.find_by_name(qual.qualifier) : Term.create!(:name=>qual.qualifier, :ontology_id=>3)
      #			#				qual = SeqfeatureQualifierValue.new(:seqfeature=>seqfeature, :term=>Term.find_by_name(qual.qualifier), :value=>qual.value, :rank=>rank_seqfeaturequalifiervalue)
      #								qual = SeqfeatureQualifierValue.new(:seqfeature=>seqfeature, :term=>term, :value=>qual.value, :rank=>rank_seqfeaturequalifiervalue)
      #
      #								qual.save!
      #							end #qualifiers
      #						end #SeqfeatureQualifierValue.transaction
      #						rescue Exception => exc
      #							puts "Error SeqfeatureQualifierValue: #{exc.message}"
      #						end #Rescue SeqfeatureQualifierValue
      #			#debug			pp "SeqfeatureQualifierValue"
      #					rank_seqfeature=rank_seqfeature.next
      #					end #features
      #				end #Seqfeature.transaction
      #				rescue Exception => exc
      #					puts "Error Seqfeature: #{exc.message}"
      #				end #Rescue Seqfeature
      #				end #Bioentry.transaction
      #				rescue ActiveRecord::RecordInvalid => e
      #					puts "Error: Transaction Aborted on class #{e.record.class}, table #{e.record.class.table_name} due to:"
      #					e.record.errors.each{|att, msg|
      #						puts "#{att} => #{msg}" 
      #					}
      #				rescue Exception => exc 
      #					puts "Errore Bioentry: #{exc.message}"
      #				end #Resce Bioentry
      #				
      #				return result
      #			    end #load_embl
      
      
      def to_biosequence
        bio_seq = Bio::Sequence.new(seq)
        bio_seq.entry_id = entry_id
        bio_seq.primary_accession = primary_accession
        bio_seq.secondary_accessions = secondary_accessions
        bio_seq.molecule_type = molecule_type
        #TODO: identify where is stored data_class in biosql      
        bio_seq.data_class = data_class
        bio_seq.definition = description
        bio_seq.topology = topology
        bio_seq.date_created = date_created
        bio_seq.date_modified = date_modified
        bio_seq.division = division
        bio_seq.sequence_version = sequence_version
        bio_seq.keywords = keywords
        bio_seq.species = species
        bio_seq.classification = taxonomy
        bio_seq.references = references
        bio_seq.features = features       
        return bio_seq
      end
    end #Sequence
    
    #gb=Bio::FlatcFile.open(Bio::GenBank, "/Development/Projects/Cocco/Data/Riferimenti/Genomi/NC_003098_Cocco_R6.gb")
    #db=Biodatabase.find_by_name('fake')
    #gb.each_entry {|entry| Sequence.new(:entry=>entry, :biodatabase=>db)}
    
    
  end #SQL
end #Bio

#TODO create tests for sequence object, roundtrip of informations

if __FILE__ == $0
  
  require 'bio'
  require 'bio/io/sql'
  require 'pp'
  
  #  connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
  connection = Bio::SQL.establish_connection({'development'=>{'database'=>"biorails_development", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development') 
  databases = Bio::SQL.list_databases
  
  #  parser = Bio::FlatFile.auto('/home/febo/Desktop/aj224122.embl')
  #  parser = Bio::FlatFile.auto('/home/febo/Desktop/aj224122.gb')
  parser = Bio::FlatFile.auto('/home/febo/Desktop/aj224122.fasta')
  
  parser.each do |entry|
    biosequence = entry.to_biosequence
    result = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>databases.first[:id]) unless Bio::SQL.exists_accession(biosequence.primary_accession) 
    
    if result.nil?
      pp "The sequence is already present into the biosql database"
    else
      #      pp "Sequence"
      puts result.to_biosequence.output(:genbank) #:embl
    end   
  end
  #NOTE: ho sistemato le features e le locations, mancano le references e i comments. poi credo che il tutto sia a posto.
  
  
  if false
    sqlseq = Bio::SQL.fetch_accession('AJ224122')
    #only output tests.
    pp "Connection"
    pp connection
    pp "Seq in dbs"
    pp Bio::SQL.list_entries
    #; NC_003098
    
    
    #pp sqlseq
    pp sqlseq.entry.inspect
    pp "sequence"
    #pp Bio::Sequence.auto(sqlseq.seq)
    pp "entry_id"
    pp sqlseq.entry_id
    
    pp "primary"
    pp sqlseq.accession
    pp "secondary_accessions"
    pp sqlseq.secondary_accessions
    pp "molecule type"
    pp sqlseq.molecule_type
    pp "data_class"
    pp sqlseq.data_class
    pp "division"
    pp sqlseq.division
    # NOTE : Topology is not represented in biosql?
    pp "topology"
    #TODO:  CIRCULAR   this at present maps to bioentry_qualifier_value, though there are plans to make it a column in table biosequence.
    pp sqlseq.topology
    pp "version"
    pp sqlseq.version
    #sequence.date_created = nil #????
    pp "date modified"
    pp sqlseq.date_modified
    pp "definition"
    pp sqlseq.definition
    pp "keywords"
    pp sqlseq.keywords
    pp "species"
    pp sqlseq.organism
    #sequence.classification = self.taxonomy.to_s.sub(/\.\z/, '').split(/\s*\;\s*/)"
    pp "classification"
    pp sqlseq.taxonomy
    #sequence.organnella = nil # not used
    pp "comments"
    pp sqlseq.comments
    pp "references"
    pp sqlseq.references
    pp "features"
    pp sqlseq.features
    puts sqlseq.to_biosequence.output(:embl)
  end
  ##
end



More information about the bioruby-cvs mailing list