[BioRuby-cvs] bioruby/lib/bio/io sql.rb,1.8,1.8.2.1
Raoul Jean Pierre Bonnal
helios at dev.open-bio.org
Tue Mar 25 15:47:01 UTC 2008
Update of /home/repository/bioruby/bioruby/lib/bio/io
In directory dev.open-bio.org:/tmp/cvs-serv13290/lib/bio/io
Modified Files:
Tag: BRANCH-biohackathon2008
sql.rb
Log Message:
BioSQL release "MIFI". biosql->biosequence, biosequence->biosql. Supported formats: Embl, Genbank; support sql stransactions creating new sequences on biosql; does not support references and comments for genbank and embl. Fasta->biosequence->biosql dosn't work.
Index: sql.rb
===================================================================
RCS file: /home/repository/bioruby/bioruby/lib/bio/io/sql.rb,v
retrieving revision 1.8
retrieving revision 1.8.2.1
diff -C2 -d -r1.8 -r1.8.2.1
*** sql.rb 5 Apr 2007 23:35:41 -0000 1.8
--- sql.rb 25 Mar 2008 15:46:32 -0000 1.8.2.1
***************
*** 1,365 ****
- #
- # = bio/io/sql.rb - BioSQL access module
- #
- # Copyright:: Copyright (C) 2002 Toshiaki Katayama <k at bioruby.org>
- # Copyright:: Copyright (C) 2006 Raoul Jean Pierre Bonnal <raoul.bonnal at itb.cnr.it>
- # License:: The Ruby License
- #
- # $Id$
- #
-
- begin
- require 'dbi'
- rescue LoadError
- end
- require 'bio/sequence'
- require 'bio/feature'
-
-
- module Bio
-
- class SQL
-
- def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
- @dbh = DBI.connect(db, user, pass)
- end
-
- def close
- @dbh.disconnect
- end
-
- # Returns Bio::SQL::Sequence object.
- def fetch(accession) # or display_id for fall back
- query = "select * from bioentry where accession = ?"
- entry = @dbh.execute(query, accession).fetch
- return Sequence.new(@dbh, entry) if entry
-
- query = "select * from bioentry where display_id = ?"
- entry = @dbh.execute(query, accession).fetch
- return Sequence.new(@dbh, entry) if entry
- end
- alias get_by_id fetch
-
-
- # for lazy fetching
-
- class Sequence
-
- def initialize(dbh, entry)
- @dbh = dbh
- @bioentry_id = entry['bioentry_id']
- @database_id = entry['biodatabase_id']
- @entry_id = entry['display_id']
- @accession = entry['accession']
- @version = entry['entry_version']
- @division = entry['division']
- end
- attr_reader :accession, :division, :entry_id, :version
-
-
- def to_fasta
- if seq = seq
- return seq.to_fasta(@accession)
- end
- end
-
- # Returns Bio::Sequence::NA or AA object.
- def seq
- query = "select * from biosequence where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- return unless row
-
- mol = row['alphabet']
- seq = row['seq']
-
- case mol
- when /.na/i # 'dna' or 'rna'
- Bio::Sequence::NA.new(seq)
- else # 'protein'
- Bio::Sequence::AA.new(seq)
- end
- end
-
- # Returns Bio::Sequence::NA or AA object (by lazy fetching).
- def subseq(from, to)
- length = to - from + 1
- query = "select alphabet, substring(seq, ?, ?) as subseq" +
- " from biosequence where bioentry_id = ?"
- row = @dbh.execute(query, from, length, @bioentry_id).fetch
- return unless row
-
- mol = row['alphabet']
- seq = row['subseq']
-
- case mol
- when /.na/i # 'dna' or 'rna'
- Bio::Sequence::NA.new(seq)
- else # 'protein'
- Bio::Sequence::AA.new(seq)
- end
- end
-
-
- # Returns Bio::Features object.
- def features
- array = []
- query = "select * from seqfeature where bioentry_id = ?"
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
- next unless row
-
- f_id = row['seqfeature_id']
- k_id = row['type_term_id']
- s_id = row['source_term_id']
- rank = row['rank'].to_i - 1
! # key : type (gene, CDS, ...)
! type = feature_key(k_id)
!
! # source : database (EMBL/GenBank/SwissProt)
! database = feature_source(s_id)
!
! # location : position
! locations = feature_locations(f_id)
!
! # qualifier
! qualifiers = feature_qualifiers(f_id)
!
! # rank
! array[rank] = Bio::Feature.new(type, locations, qualifiers)
! end
! return Bio::Features.new(array)
! end
!
!
! # Returns reference informations in Array of Hash (not Bio::Reference).
! def references
! array = []
! query = <<-END
! select * from bioentry_reference, reference
! where bioentry_id = ? and
! bioentry_reference.reference_id = reference.reference_id
! END
! @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
! next unless row
!
! hash = {
! 'start' => row['start_pos'],
! 'end' => row['end_pos'],
! 'journal' => row['location'],
! 'title' => row['title'],
! 'authors' => row['authors'],
! 'medline' => row['crc']
! }
! hash.default = ''
!
! rank = row['rank'].to_i - 1
! array[rank] = hash
! end
! return array
! end
!
!
! # Returns the first comment. For complete comments, use comments method.
! def comment
! query = "select * from comment where bioentry_id = ?"
! row = @dbh.execute(query, @bioentry_id).fetch
! row ? row['comment_text'] : ''
! end
!
! # Returns comments in an Array of Strings.
! def comments
! array = []
! query = "select * from comment where bioentry_id = ?"
! @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
! next unless row
! rank = row['rank'].to_i - 1
! array[rank] = row['comment_text']
! end
! return array
! end
!
! def database
! query = "select * from biodatabase where biodatabase_id = ?"
! row = @dbh.execute(query, @database_id).fetch
! row ? row['name'] : ''
! end
!
! def date
! query = "select * from bioentry_date where bioentry_id = ?"
! row = @dbh.execute(query, @bioentry_id).fetch
! row ? row['date'] : ''
! end
!
! def dblink
! query = "select * from bioentry_direct_links where source_bioentry_id = ?"
! row = @dbh.execute(query, @bioentry_id).fetch
! row ? [row['dbname'], row['accession']] : []
! end
!
! def definition
! query = "select * from bioentry_description where bioentry_id = ?"
! row = @dbh.execute(query, @bioentry_id).fetch
! row ? row['description'] : ''
! end
!
! def keyword
! query = "select * from bioentry_keywords where bioentry_id = ?"
! row = @dbh.execute(query, @bioentry_id).fetch
! row ? row['keywords'] : ''
! end
!
! # Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
! def taxonomy
! query = <<-END
! select taxon_name.name, taxon.ncbi_taxon_id from bioentry
! join taxon_name using(taxon_id) join taxon using (taxon_id)
! where bioentry_id = ?
! END
! row = @dbh.execute(query, @bioentry_id).fetch
! # @lineage = row ? row['full_lineage'] : ''
! @common_name = row ? row['name'] : ''
! @ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
! row ? [@lineage, @common_name, @ncbi_taxa_id] : []
! end
! def lineage
! taxonomy unless @lineage
! return @lineage
! end
- def common_name
- taxonomy unless @common_name
- return @common_name
- end
! def ncbi_taxa_id
! taxonomy unless @ncbi_taxa_id
! return @ncbi_taxa_id
end
!
!
! private
!
! def feature_key(k_id)
! query = "select * from term where term_id= ?"
! row = @dbh.execute(query, k_id).fetch
! row ? row['name'] : ''
end
!
! def feature_source(s_id)
! query = "select * from term where term_id = ?"
! row = @dbh.execute(query, s_id).fetch
! row ? row['name'] : ''
end
!
! def feature_locations(f_id)
! locations = []
! query = "select * from location where seqfeature_id = ?"
! @dbh.execute(query, f_id).fetch_all.each do |row|
! next unless row
!
! location = Bio::Location.new
! location.strand = row['strand']
! location.from = row['start_pos']
! location.to = row['end_pos']
!
! xref = feature_locations_remote(row['dbxref_if'])
! location.xref_id = xref.shift unless xref.empty?
!
! # just omit fuzzy location for now...
! #feature_locations_qv(row['seqfeature_location_id'])
!
! rank = row['rank'].to_i - 1
! locations[rank] = location
! end
! return Bio::Locations.new(locations)
end
!
! def feature_locations_remote(l_id)
! query = "select * from dbxref where dbxref_id = ?"
! row = @dbh.execute(query, l_id).fetch
! row ? [row['accession'], row['version']] : []
end
!
! def feature_locations_qv(l_id)
! query = "select * from location_qualifier_value where location_id = ?"
! row = @dbh.execute(query, l_id).fetch
! row ? [row['value'], row['int_value']] : []
end
!
! def feature_qualifiers(f_id)
! qualifiers = []
! query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
! @dbh.execute(query, f_id).fetch_all.each do |row|
! next unless row
!
! key = feature_qualifiers_key(row['seqfeature_id'])
! value = row['value']
! qualifier = Bio::Feature::Qualifier.new(key, value)
!
! rank = row['rank'].to_i - 1
! qualifiers[rank] = qualifier
! end
! return qualifiers.compact # .compact is nasty hack for a while
end
!
! def feature_qualifiers_key(q_id)
! query = <<-END
! select * from seqfeature_qualifier_value
! join term using(term_id) where seqfeature_id = ?
! END
! row = @dbh.execute(query, q_id).fetch
! row ? row['name'] : ''
end
! end
!
! end # SQL
!
! end # Bio
!
if __FILE__ == $0
! begin
! require 'pp'
! alias p pp
! rescue LoadError
end
!
! db = ARGV.empty? ? 'dbi:Mysql:database=biosql;host=localhost' : ARGV.shift
! serv = Bio::SQL.new(db, 'root')
!
! ent0 = serv.fetch('X76706')
! ent0 = serv.fetch('A15H9FIB')
! ent1 = serv.fetch('J01902')
! ent2 = serv.fetch('X04311')
!
! pp ent0.features
! pp ent0.references
!
! pp ent1.seq
! pp ent1.seq.translate
! pp ent1.seq.gc
! pp ent1.subseq(1,20)
!
! pp ent2.accession
! pp ent2.comment
! pp ent2.comments
! pp ent2.common_name
! pp ent2.database
! pp ent2.date
! pp ent2.dblink
! pp ent2.definition
! pp ent2.division
! pp ent2.entry_id
! pp ent2.features
! pp ent2.keyword
! pp ent2.lineage
! pp ent2.ncbi_taxa_id
! pp ent2.references
! pp ent2.seq
! pp ent2.subseq(1,10)
! pp ent2.taxonomy
! pp ent2.version
!
end
-
--- 1,145 ----
! require 'rubygems'
! require 'erb'
! require 'composite_primary_keys'
! # BiosqlPlug
! =begin
! Ok Hilmar gives to me some clarification
! 1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
! If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
! =end
! =begin
! TODO:
! 1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
! 2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
! 3) Chk Locations in Biofeatures ArSQL
! =end
! module Bio
! class SQL
! #no check is made
! def self.establish_connection(configurations, env)
! #configurations is an hash similar what YAML returns.
! #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
! configurations.assert_valid_keys('development', 'production','test')
! configurations[env].assert_valid_keys('database','adapter','username','password')
! DummyBase.configurations = configurations
! DummyBase.establish_connection "#{env}"
end
!
! def self.fetch_id(id)
! Bio::SQL::Bioentry.find(id)
end
!
! def self.fetch_accession(accession)
! accession.upcase!
! Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
end
!
! def self.exists_accession(accession)
! Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
end
!
! def self.list_entries
! Bio::SQL::Bioentry.find(:all).collect{|entry|
! {:id=>entry.bioentry_id, :accession=>entry.accession}
! }
end
!
! def self.list_databases
! Bio::SQL::Biodatabase.find(:all).collect{|entry|
! {:id=>entry.biodatabase_id, :name => entry.name}
! }
end
!
! def self.delete_entry_id(id)
! Bioentry.delete(id)
end
!
! def self.delete_entry_accession(accession)
! Bioentry.delete(Bioentry.find_by_accession(accession))
end
!
!
! class DummyBase < ActiveRecord::Base
! #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
! #NOTE: this class will not establish the connection automatically
! self.abstract_class = true
! self.pluralize_table_names = false
! #prepend table name to the usual id, avoid to specify primary id for every table
! self.primary_key_prefix_type = :table_name_with_underscore
! #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
! #self.configurations=biosql_configurations
! #self.establish_connection "development"
! end #DummyBase
!
! autoload :Biodatabase, 'bio/io/biosql/biodatabase'
! autoload :Bioentry, 'bio/io/biosql/bioentry'
! autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
! autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
! autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
! autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
! autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
! autoload :Biosequence, 'bio/io/biosql/biosequence'
! autoload :Comment, 'bio/io/biosql/comment'
! autoload :Dbxref, 'bio/io/biosql/dbxref'
! autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
! autoload :Location, 'bio/io/biosql/location'
! autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
! autoload :Ontology, 'bio/io/biosql/ontology'
! autoload :Reference, 'bio/io/biosql/reference'
! autoload :Seqfeature, 'bio/io/biosql/seqfeature'
! autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
! autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
! autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
! autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
! autoload :Taxon, 'bio/io/biosql/taxon'
! autoload :TaxonName, 'bio/io/biosql/taxon_name'
! autoload :Term, 'bio/io/biosql/term'
! autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
! autoload :TermPath, 'bio/io/biosql/term_path'
! autoload :TermRelationship, 'bio/io/biosql/term_relationship'
! autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
! autoload :Sequence, 'bio/db/biosql/sequence'
! end #biosql
!
! end #Bio
if __FILE__ == $0
! require 'rubygems'
! require 'composite_primary_keys'
! require 'bio'
! require 'pp'
!
! # pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
! pp connection = Bio::SQL.establish_connection({'development'=>{'database'=>"biorails_development", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
! #pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
!
! if nil
! pp Bio::SQL.list_entries
! bioseq = Bio::SQL.fetch_accession('AJ224122')
! pp bioseq
! pp bioseq.entry_id
! #TODO create a test only for tables not sequence here
! pp bioseq.molecule_type
! #pp bioseq.molecule_type.class
! #bioseq.molecule_type_update('dna', 1)
! pp Bio::SQL::Taxon.find(8121).taxon_names
end
! #pp bioseq.molecule_type
! #term = Bio::SQL::Term.find_by_name('mol_type')
! #pp term
! #pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
! #pp bioseq.entry.bioentry_qualifier_values.inspect
! #pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
! #pp primo.class
! # pp primo.value='dna'
! # pp primo.save
! #pp bioseq.molecule_type= 'prova'
!
! #Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
!
!
end
More information about the bioruby-cvs
mailing list