[BioRuby-cvs] bioruby/lib/bio/io sql.rb,1.8,1.8.2.1

Raoul Jean Pierre Bonnal helios at dev.open-bio.org
Tue Mar 25 15:47:01 UTC 2008


Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv13290/lib/bio/io

Modified Files:
      Tag: BRANCH-biohackathon2008
	sql.rb 
Log Message:
BioSQL release "MIFI". biosql->biosequence, biosequence->biosql. Supported formats: Embl, Genbank; support sql stransactions creating new sequences on biosql; does not support references and comments for genbank and embl. Fasta->biosequence->biosql dosn't work.

Index: sql.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/sql.rb,v
retrieving revision 1.8
retrieving revision 1.8.2.1
diff -C2 -d -r1.8 -r1.8.2.1
*** sql.rb	5 Apr 2007 23:35:41 -0000	1.8
--- sql.rb	25 Mar 2008 15:46:32 -0000	1.8.2.1
***************
*** 1,365 ****
- #
- # = bio/io/sql.rb - BioSQL access module
- #
- # Copyright::  Copyright (C) 2002 Toshiaki Katayama <k at bioruby.org>
- # Copyright::  Copyright (C) 2006 Raoul Jean Pierre Bonnal <raoul.bonnal at itb.cnr.it>
- # License::    The Ruby License
- #
- # $Id$
- #
- 
- begin
-   require 'dbi'
- rescue LoadError
- end
- require 'bio/sequence'
- require 'bio/feature'
- 
- 
- module Bio
- 
- class SQL
- 
-   def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
-     @dbh = DBI.connect(db, user, pass)
-   end
- 
-   def close
-     @dbh.disconnect
-   end
- 
-   # Returns Bio::SQL::Sequence object.
-   def fetch(accession)	# or display_id for fall back
-     query = "select * from bioentry where accession = ?"
-     entry = @dbh.execute(query, accession).fetch
-     return Sequence.new(@dbh, entry) if entry
- 
-     query = "select * from bioentry where display_id = ?"
-     entry = @dbh.execute(query, accession).fetch
-     return Sequence.new(@dbh, entry) if entry
-   end
-   alias get_by_id fetch
- 
- 
-   # for lazy fetching
- 
-   class Sequence
- 
-     def initialize(dbh, entry)
-       @dbh = dbh
-       @bioentry_id = entry['bioentry_id']
-       @database_id = entry['biodatabase_id']
-       @entry_id = entry['display_id']
-       @accession = entry['accession']
-       @version = entry['entry_version']
-       @division = entry['division']
-     end
-     attr_reader :accession, :division, :entry_id, :version
- 
- 
-     def to_fasta
-       if seq = seq
-         return seq.to_fasta(@accession)
-       end
-     end
- 
-     # Returns Bio::Sequence::NA or AA object.
-     def seq
-       query = "select * from biosequence where bioentry_id = ?"
-       row = @dbh.execute(query, @bioentry_id).fetch
-       return unless row
- 
-       mol = row['alphabet']
-       seq = row['seq']
- 
-       case mol
-       when /.na/i			# 'dna' or 'rna'
-         Bio::Sequence::NA.new(seq)
-       else				# 'protein'
-         Bio::Sequence::AA.new(seq)
-       end
-     end
- 
-     # Returns Bio::Sequence::NA or AA object (by lazy fetching).
-     def subseq(from, to)
-       length = to - from + 1
-       query = "select alphabet, substring(seq, ?, ?) as subseq" +
-               " from biosequence where bioentry_id = ?"
-       row = @dbh.execute(query, from, length, @bioentry_id).fetch
-       return unless row
- 
-       mol = row['alphabet']
-       seq = row['subseq']
- 
-       case mol
-       when /.na/i			# 'dna' or 'rna'
-         Bio::Sequence::NA.new(seq)
-       else				# 'protein'
-         Bio::Sequence::AA.new(seq)
-       end
-     end
- 
- 
-     # Returns Bio::Features object.
-     def features
-       array = []
-       query = "select * from seqfeature where bioentry_id = ?"
-       @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
-         next unless row
- 
-         f_id = row['seqfeature_id']
-         k_id = row['type_term_id']
-         s_id = row['source_term_id']
-         rank = row['rank'].to_i - 1
  
!         # key : type (gene, CDS, ...)
!         type = feature_key(k_id)
! 
!         # source : database (EMBL/GenBank/SwissProt)
!         database = feature_source(s_id)
! 
!         # location : position
!         locations = feature_locations(f_id)
! 
!         # qualifier
!         qualifiers = feature_qualifiers(f_id)
! 
!         # rank
!         array[rank] = Bio::Feature.new(type, locations, qualifiers)
!       end
!       return Bio::Features.new(array)
!     end
! 
! 
!     # Returns reference informations in Array of Hash (not Bio::Reference).
!     def references
!       array = []
!       query = <<-END
!         select * from bioentry_reference, reference
!         where bioentry_id = ? and
!         bioentry_reference.reference_id = reference.reference_id
!       END
!       @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
!         next unless row
! 
!         hash = {
!           'start'	=> row['start_pos'],
!           'end'		=> row['end_pos'],
!           'journal'	=> row['location'],
!           'title'	=> row['title'],
!           'authors'	=> row['authors'],
!           'medline'	=> row['crc']
!         }
!         hash.default = ''
! 
!         rank = row['rank'].to_i - 1
!         array[rank] = hash
!       end
!       return array
!     end
! 
! 
!     # Returns the first comment.  For complete comments, use comments method.
!     def comment
!       query = "select * from comment where bioentry_id = ?"
!       row = @dbh.execute(query, @bioentry_id).fetch
!       row ? row['comment_text'] : ''
!     end
! 
!     # Returns comments in an Array of Strings.
!     def comments
!       array = []
!       query = "select * from comment where bioentry_id = ?"
!       @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
!         next unless row
!         rank = row['rank'].to_i - 1
!         array[rank] = row['comment_text']
!       end
!       return array
!     end
! 
!     def database
!       query = "select * from biodatabase where biodatabase_id = ?"
!       row = @dbh.execute(query, @database_id).fetch
!       row ? row['name'] : ''
!     end
! 
!     def date
!       query = "select * from bioentry_date where bioentry_id = ?"
!       row = @dbh.execute(query, @bioentry_id).fetch
!       row ? row['date'] : ''
!     end
! 
!     def dblink
!       query = "select * from bioentry_direct_links where source_bioentry_id = ?"
!       row = @dbh.execute(query, @bioentry_id).fetch
!       row ? [row['dbname'], row['accession']] : []
!     end
! 
!     def definition
!       query = "select * from bioentry_description where bioentry_id = ?"
!       row = @dbh.execute(query, @bioentry_id).fetch
!       row ? row['description'] : ''
!     end
! 
!     def keyword
!       query = "select * from bioentry_keywords where bioentry_id = ?"
!       row = @dbh.execute(query, @bioentry_id).fetch
!       row ? row['keywords'] : ''
!     end
! 
!     # Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
!     def taxonomy
!       query = <<-END
!         select taxon_name.name, taxon.ncbi_taxon_id from bioentry
!         join taxon_name using(taxon_id) join taxon using (taxon_id)
!         where bioentry_id = ?
!       END
!       row = @dbh.execute(query, @bioentry_id).fetch
! #     @lineage = row ? row['full_lineage'] : ''
!       @common_name = row ? row['name'] : ''
!       @ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
!       row ? [@lineage, @common_name, @ncbi_taxa_id] : []
!     end
  
!     def lineage
!       taxonomy unless @lineage
!       return @lineage
!     end
  
-     def common_name
-       taxonomy unless @common_name
-       return @common_name
-     end
  
!     def ncbi_taxa_id
!       taxonomy unless @ncbi_taxa_id
!       return @ncbi_taxa_id
      end
! 
! 
!     private
! 
!     def feature_key(k_id)
!       query = "select * from term where term_id= ?"
!       row = @dbh.execute(query, k_id).fetch
!       row ? row['name'] : ''
      end
! 
!     def feature_source(s_id)
!       query = "select * from term where term_id = ?"
!       row = @dbh.execute(query, s_id).fetch
!       row ? row['name'] : ''
      end
! 
!     def feature_locations(f_id)
!       locations = []
!       query = "select * from location where seqfeature_id = ?"
!       @dbh.execute(query, f_id).fetch_all.each do |row|
!         next unless row
! 
!         location = Bio::Location.new
!         location.strand = row['strand']
!         location.from = row['start_pos']
!         location.to = row['end_pos']
! 
!         xref = feature_locations_remote(row['dbxref_if'])
!         location.xref_id = xref.shift unless xref.empty?
! 
!         # just omit fuzzy location for now...
!         #feature_locations_qv(row['seqfeature_location_id'])
! 
!         rank = row['rank'].to_i - 1
!         locations[rank] = location
!       end
!       return Bio::Locations.new(locations)
      end
! 
!     def feature_locations_remote(l_id)
!       query = "select * from  dbxref where dbxref_id = ?"
!       row = @dbh.execute(query, l_id).fetch
!       row ? [row['accession'], row['version']] : []
      end
! 
!     def feature_locations_qv(l_id)
!       query = "select * from location_qualifier_value where location_id = ?"
!       row = @dbh.execute(query, l_id).fetch
!       row ? [row['value'], row['int_value']] : []
      end
! 
!     def feature_qualifiers(f_id)
!       qualifiers = []
!       query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
!       @dbh.execute(query, f_id).fetch_all.each do |row|
!         next unless row
! 
!         key = feature_qualifiers_key(row['seqfeature_id'])
!         value = row['value']
!         qualifier = Bio::Feature::Qualifier.new(key, value)
! 
!         rank = row['rank'].to_i - 1
!         qualifiers[rank] = qualifier
!       end
!       return qualifiers.compact	# .compact is nasty hack for a while
      end
! 
!     def feature_qualifiers_key(q_id)
!       query = <<-END
!         select * from seqfeature_qualifier_value
!         join term using(term_id) where seqfeature_id = ?
!       END
!       row = @dbh.execute(query, q_id).fetch
!       row ? row['name'] : ''
      end
!   end
! 
! end # SQL
! 
! end # Bio
! 
  
  if __FILE__ == $0
!   begin
!     require 'pp'
!     alias p pp
!   rescue LoadError
    end
! 
!   db = ARGV.empty? ? 'dbi:Mysql:database=biosql;host=localhost' : ARGV.shift
!   serv = Bio::SQL.new(db, 'root')
! 
!   ent0 = serv.fetch('X76706')
!   ent0 = serv.fetch('A15H9FIB')
!   ent1 = serv.fetch('J01902')
!   ent2 = serv.fetch('X04311')
! 
!   pp ent0.features
!   pp ent0.references
! 
!   pp ent1.seq
!   pp ent1.seq.translate
!   pp ent1.seq.gc
!   pp ent1.subseq(1,20)
! 
!   pp ent2.accession
!   pp ent2.comment
!   pp ent2.comments
!   pp ent2.common_name
!   pp ent2.database
!   pp ent2.date
!   pp ent2.dblink
!   pp ent2.definition
!   pp ent2.division
!   pp ent2.entry_id
!   pp ent2.features
!   pp ent2.keyword
!   pp ent2.lineage
!   pp ent2.ncbi_taxa_id
!   pp ent2.references
!   pp ent2.seq
!   pp ent2.subseq(1,10)
!   pp ent2.taxonomy
!   pp ent2.version
! 
  end
- 
--- 1,145 ----
  
! require 'rubygems'
! require 'erb'
! require 'composite_primary_keys'
! # BiosqlPlug
  
! =begin
! Ok Hilmar gives to me some clarification
! 1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
!    If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source. 
  
  
! =end
! =begin
! TODO:
! 1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
! 2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file. 
! 3) Chk Locations in Biofeatures ArSQL
! =end
! module Bio
!   class SQL	
!     #no check is made
!     def self.establish_connection(configurations, env)
!       #configurations is an hash similar what YAML returns.
!       #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
!       configurations.assert_valid_keys('development', 'production','test')
!       configurations[env].assert_valid_keys('database','adapter','username','password')
!       DummyBase.configurations = configurations
!       DummyBase.establish_connection "#{env}"
      end
!     
!     def self.fetch_id(id)
!       Bio::SQL::Bioentry.find(id)
      end
!     
!     def self.fetch_accession(accession)
!       accession.upcase!
!       Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil			
      end
!     
!     def self.exists_accession(accession)
!       Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
      end
!     
!     def self.list_entries
!       Bio::SQL::Bioentry.find(:all).collect{|entry|
!         {:id=>entry.bioentry_id, :accession=>entry.accession}
!       }
      end
!     
!     def self.list_databases
!       Bio::SQL::Biodatabase.find(:all).collect{|entry|
!         {:id=>entry.biodatabase_id, :name => entry.name}
!       }
      end
!     
!     def self.delete_entry_id(id)
!       Bioentry.delete(id)
      end
!     
!     def self.delete_entry_accession(accession)
!       Bioentry.delete(Bioentry.find_by_accession(accession))
      end
!     
!     
!     class DummyBase <  ActiveRecord::Base
!       #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
!       #NOTE: this class will not establish the connection automatically
!       self.abstract_class = true			
!       self.pluralize_table_names = false
!       #prepend table name to the usual id, avoid to specify primary id for every table
!       self.primary_key_prefix_type = :table_name_with_underscore
!       #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
!       #self.configurations=biosql_configurations
!       #self.establish_connection "development"
!     end #DummyBase
!     
!     autoload :Biodatabase, 'bio/io/biosql/biodatabase'
!     autoload :Bioentry, 'bio/io/biosql/bioentry'
!     autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
!     autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
!     autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
!     autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
!     autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
!     autoload :Biosequence, 'bio/io/biosql/biosequence'
!     autoload :Comment, 'bio/io/biosql/comment'
!     autoload :Dbxref, 'bio/io/biosql/dbxref'
!     autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
!     autoload :Location, 'bio/io/biosql/location'
!     autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
!     autoload :Ontology, 'bio/io/biosql/ontology'
!     autoload :Reference, 'bio/io/biosql/reference'
!     autoload :Seqfeature, 'bio/io/biosql/seqfeature'
!     autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
!     autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
!     autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
!     autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
!     autoload :Taxon, 'bio/io/biosql/taxon'
!     autoload :TaxonName, 'bio/io/biosql/taxon_name'
!     autoload :Term, 'bio/io/biosql/term'
!     autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
!     autoload :TermPath, 'bio/io/biosql/term_path'
!     autoload :TermRelationship, 'bio/io/biosql/term_relationship'
!     autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
!     autoload :Sequence, 'bio/db/biosql/sequence'
!   end #biosql
!   
! end #Bio
  
  if __FILE__ == $0
!   require 'rubygems'
!   require 'composite_primary_keys'
!   require 'bio'
!   require 'pp'
!   
!   #  pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
!   pp connection = Bio::SQL.establish_connection({'development'=>{'database'=>"biorails_development", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
!   #pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
!   
!   if nil
!     pp Bio::SQL.list_entries
!     bioseq = Bio::SQL.fetch_accession('AJ224122')   
!     pp bioseq
!     pp bioseq.entry_id    
!     #TODO create a test only for tables not sequence here
!     pp bioseq.molecule_type
!     #pp  bioseq.molecule_type.class
!     #bioseq.molecule_type_update('dna', 1)
!     pp Bio::SQL::Taxon.find(8121).taxon_names
    end
!   #pp  bioseq.molecule_type
!   #term = Bio::SQL::Term.find_by_name('mol_type')
!   #pp term
!   #pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
!   #pp bioseq.entry.bioentry_qualifier_values.inspect
!   #pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
!   #pp primo.class
!   #  pp primo.value='dna'
!   #  pp primo.save
!   #pp bioseq.molecule_type= 'prova'
!   
!   #Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
!   
!   
  end




More information about the bioruby-cvs mailing list