[BioSQL-l] Oracle support...
Len Trigg
len at reeltwo.com
Tue Jul 29 17:49:37 EDT 2003
Hi all,
I have been using BioJava for some bioinformatics problems, and use
BioSQL for storing sequences in mysql. All in all, it's very nice.
I then had to use Oracle as the database, and ran into a whole pile of
headaches. I am definitely no expert in Oracle, so that may account
for a large part of my problems, but I guess it also says something
when the procedure for creating the biosql database in mysql pretty
much involves executing one sql file, but the oracle setup consists of
around 140 files, and the table and field names you end up with don't
match those in the BioSQL relational model described in
biosql-ERD.pdf. Trying to fathom what is going on is a nightmare for
those who have only had experience with the mysql installation.
In desperation, I actually just ported the mysql schema file to Oracle
directly and have happily been using that. There were a couple of
minor niggles that I thought I would mention because either they might
warrant changes to the BioSQL schema, or someone might know of
workarounds.
First, I discovered that "comment" and "synonym" are reserved words in
Oracle, so I had to rename the comment table, and the synonym field of
term_synonym table. Perhaps the main schema could be changed to avoid
these reserved words.
Second, to support autoincrement fields, I created an oracle sequence
and trigger for each table. Having one per table makes it easy to
access the last id assigned when doing an insert (which BioJava makes
use of).
I have attached my ported schema file (and one to drop the schema) for
those interested. It should be possible to autogenerate this file, as
I gather that is how the postgres one is created. I think that a
"vanilla" alternative to the existing oracle schema would make a nice
addition for those who don't need extra bells and whistles. These
files correspond to current BioSQL CVS.
Cheers,
Len.
-------------- next part --------------
-- conventions:
-- <table_name>_id is primary internal id (usually autogenerated)
-- Authors: Ewan Birney, Elia Stupka
-- Contributors: Hilmar Lapp, Aaron Mackey
--
-- Copyright Ewan Birney. You may use, modify, and distribute this code under
-- the same terms as Perl. See the Perl Artistic License.
--
-- comments to biosql - biosql-l at open-bio.org
--
-- Migration of the MySQL schema to InnoDB by Hilmar Lapp <hlapp at gmx.net>
-- Post-Cape Town changes by Hilmar Lapp.
-- Singapore changes by Hilmar Lapp and Aaron Mackey.
--
CREATE SEQUENCE SE_biodatabase
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_taxon
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_ontology
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_term
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_term_relationship
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_term_path
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_bioentry
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_bioentry_relationship
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_dbxref
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_reference
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_comment_
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_seqfeature
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_seqfeature_relationship
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE SE_location
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
-- database have bioentries. That is about it.
-- we do not store different versions of a database as different dbids
-- (there is no concept of versions of database). There is a concept of
-- versions of entries. Versions of databases deserve their own table and
-- join to bioentry table for tracking with versions of entries
CREATE TABLE biodatabase (
biodatabase_id int NOT NULL ,
name VARCHAR(128) NOT NULL,
authority VARCHAR(128),
description VARCHAR2(100),
PRIMARY KEY (biodatabase_id),
UNIQUE (name)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX db_auth on biodatabase(authority) TABLESPACE "SYMGENE_INDEX";
-- we could insist that taxa are NCBI taxon id, but on reflection I made this
-- an optional extra line, as many flat file formats do not have the NCBI id
--
-- no organelle/sub species
-- corresponds to the node table of the NCBI taxonomy databaase
CREATE TABLE taxon (
taxon_id int NOT NULL ,
ncbi_taxon_id int,
parent_taxon_id int ,
node_rank VARCHAR(32),
genetic_code INT ,
mito_genetic_code INT ,
left_value int ,
right_value int ,
PRIMARY KEY (taxon_id),
UNIQUE (ncbi_taxon_id),
UNIQUE (left_value),
UNIQUE (right_value)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX taxparent ON taxon(parent_taxon_id) TABLESPACE "SYMGENE_INDEX";
-- corresponds to the names table of the NCBI taxonomy databaase
CREATE TABLE taxon_name (
taxon_id int NOT NULL,
name VARCHAR(255) NOT NULL,
name_class VARCHAR(32) NOT NULL,
UNIQUE (taxon_id,name,name_class)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX taxnametaxonid ON taxon_name(taxon_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX taxnamename ON taxon_name(name) TABLESPACE "SYMGENE_INDEX";
-- this is the namespace (controlled vocabulary) ontology terms live in
-- we chose to have a separate table for this instead of reusing biodatabase
CREATE TABLE ontology (
ontology_id int NOT NULL ,
name VARCHAR(32) NOT NULL,
definition VARCHAR2(100),
PRIMARY KEY (ontology_id),
UNIQUE (name)
) TABLESPACE "SYMGENE_DATA";
-- any controlled vocab term, everything from full ontology
-- terms eg GO IDs to the various keys allowed as qualifiers
CREATE TABLE term (
term_id int NOT NULL ,
name VARCHAR(255) NOT NULL,
definition VARCHAR2(100),
identifier VARCHAR(40),
is_obsolete CHAR(1),
ontology_id int NOT NULL,
PRIMARY KEY (term_id),
UNIQUE (name,ontology_id),
UNIQUE (identifier)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX term_ont ON term(ontology_id) TABLESPACE "SYMGENE_INDEX";
-- ontology terms have synonyms, here is how to store them
CREATE TABLE term_synonym (
synonym_ VARCHAR(255) NOT NULL,
term_id int NOT NULL,
PRIMARY KEY (term_id,synonym_)
) TABLESPACE "SYMGENE_DATA";
-- ontology terms to dbxref association: ontology terms have dbxrefs
CREATE TABLE term_dbxref (
term_id int NOT NULL,
dbxref_id int NOT NULL,
rank SMALLINT,
PRIMARY KEY (term_id, dbxref_id)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX trmdbxref_dbxrefid ON term_dbxref(dbxref_id) TABLESPACE "SYMGENE_INDEX";
-- relationship between controlled vocabulary / ontology term
-- we use subject/predicate/object but this could also
-- be thought of as child/relationship-type/parent.
-- the subject/predicate/object naming is better as we
-- can think of the graph as composed of statements.
--
-- we also treat the relationshiptypes / predicates as
-- controlled terms in themselves; this is quite useful
-- as a lot of systems (eg GO) will soon require
-- ontologies of relationship types (eg subtle differences
-- in the partOf relationship)
--
-- this table probably won''t be filled for a while, the core
-- will just treat ontologies as flat lists of terms
CREATE TABLE term_relationship (
term_relationship_id int NOT NULL ,
subject_term_id int NOT NULL,
predicate_term_id int NOT NULL,
object_term_id int NOT NULL,
ontology_id int NOT NULL,
PRIMARY KEY (term_relationship_id),
UNIQUE (subject_term_id,predicate_term_id,object_term_id,ontology_id)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX trmrel_predicateid ON term_relationship(predicate_term_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX trmrel_objectid ON term_relationship(object_term_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX trmrel_ontid ON term_relationship(ontology_id) TABLESPACE "SYMGENE_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX ontrel_subjectid ON term_relationship(subject_term_id);
-- the infamous transitive closure table on ontology term relationships
-- this is a warehouse approach - you will need to update this regularly
--
-- the triple of (subject, predicate, object) is the same as for ontology
-- relationships, with the exception of predicate being the greatest common
-- denominator of the relationships types visited in the path (i.e., if
-- relationship type A is-a relationship type B, the greatest common
-- denominator for path containing both types A and B is B)
--
-- See the GO database or Chado schema for other (and possibly better
-- documented) implementations of the transitive closure table approach.
CREATE TABLE term_path (
term_path_id int NOT NULL ,
subject_term_id int NOT NULL,
predicate_term_id int NOT NULL,
object_term_id int NOT NULL,
ontology_id int NOT NULL,
distance int ,
PRIMARY KEY (term_path_id),
UNIQUE (subject_term_id,predicate_term_id,object_term_id,ontology_id,distance)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX trmpath_predicateid ON term_path(predicate_term_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX trmpath_objectid ON term_path(object_term_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX trmpath_ontid ON term_path(ontology_id) TABLESPACE "SYMGENE_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX trmpath_subjectid ON term_path(subject_term_id);
-- we can be a bioentry without a biosequence, but not visa-versa
-- most things are going to be keyed off bioentry_id
--
-- accession is the stable id, display_id is a potentially volatile,
-- human readable name.
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
-- not all entries have a taxon, but many do.
-- one bioentry only has one taxon! (weirdo chimerias are not handled. tough)
--
-- Name maps to display_id in bioperl. We have a different column name
-- here to avoid confusion with the naming convention for foreign keys.
CREATE TABLE bioentry (
bioentry_id int NOT NULL ,
biodatabase_id int NOT NULL,
taxon_id int ,
name VARCHAR(40) NOT NULL,
accession VARCHAR(40) NOT NULL,
identifier VARCHAR(40),
division VARCHAR(6),
description VARCHAR2(100),
version SMALLINT NOT NULL,
PRIMARY KEY (bioentry_id),
UNIQUE (accession,biodatabase_id,version),
UNIQUE (identifier)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX bioentry_name ON bioentry(name) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX bioentry_db ON bioentry(biodatabase_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX bioentry_tax ON bioentry(taxon_id) TABLESPACE "SYMGENE_INDEX";
--
-- bioentry-bioentry relationships: these are typed
--
CREATE TABLE bioentry_relationship (
bioentry_relationship_id int NOT NULL ,
object_bioentry_id int NOT NULL,
subject_bioentry_id int NOT NULL,
term_id int NOT NULL,
rank INT,
PRIMARY KEY (bioentry_relationship_id),
UNIQUE (object_bioentry_id,subject_bioentry_id,term_id)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX bioentryrel_trm ON bioentry_relationship(term_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX bioentryrel_child ON bioentry_relationship(subject_bioentry_id) TABLESPACE "SYMGENE_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX bioentryrel_parent ON bioentry_relationship(object_bioentry_id);
-- for deep (depth > 1) bioentry relationship trees we need a transitive
-- closure table too
CREATE TABLE bioentry_path (
object_bioentry_id int NOT NULL,
subject_bioentry_id int NOT NULL,
term_id int NOT NULL,
distance int ,
UNIQUE (object_bioentry_id,subject_bioentry_id,term_id,distance)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX bioentrypath_trm ON bioentry_path(term_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX bioentrypath_child ON bioentry_path(subject_bioentry_id) TABLESPACE "SYMGENE_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX bioentrypath_parent ON bioentry_path(object_bioentry_id);
-- some bioentries will have a sequence
-- biosequence because sequence is sometimes a reserved word
CREATE TABLE biosequence (
bioentry_id int NOT NULL,
version SMALLINT,
length int,
alphabet VARCHAR(10),
seq LONG,
PRIMARY KEY (bioentry_id)
) TABLESPACE "SYMGENE_DATA";
-- add these only if you want them:
-- ALTER TABLE biosequence ADD COLUMN ( isoelec_pt NUMERIC(4,2) );
-- ALTER TABLE biosequence ADD COLUMN ( mol_wgt DOUBLE PRECISION );
-- ALTER TABLE biosequence ADD COLUMN ( perc_gc DOUBLE PRECISION );
-- database cross-references (e.g., GenBank:AC123456.1)
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
CREATE TABLE dbxref (
dbxref_id int NOT NULL ,
dbname VARCHAR(40) NOT NULL,
accession VARCHAR(40) NOT NULL,
version SMALLINT NOT NULL,
PRIMARY KEY (dbxref_id),
UNIQUE(accession, dbname, version)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX dbxref_db ON dbxref(dbname) TABLESPACE "SYMGENE_INDEX";
-- for roundtripping embl/genbank, we need to have the "optional ID"
-- for the dbxref.
--
-- another use of this table could be for storing
-- descriptive text for a dbxref. for example, we may want to
-- know stuff about the interpro accessions we store (without
-- importing all of interpro), so we can attach the text
-- description as a synonym
CREATE TABLE dbxref_qualifier_value (
dbxref_id int NOT NULL,
term_id int NOT NULL,
rank INT DEFAULT 0 NOT NULL,
value VARCHAR2(100),
PRIMARY KEY (dbxref_id,term_id,rank)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX dbxrefqual_dbx ON dbxref_qualifier_value(dbxref_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX dbxrefqual_trm ON dbxref_qualifier_value(term_id) TABLESPACE "SYMGENE_INDEX";
-- Direct dblinks. It is tempting to do this
-- from bioentry_id to bioentry_id. But that wont work
-- during updates of one database - we will have to edit
-- this table each time. Better to do the join through accession
-- and db each time. Should be almost as cheap
CREATE TABLE bioentry_dbxref (
bioentry_id int NOT NULL,
dbxref_id int NOT NULL,
rank SMALLINT,
PRIMARY KEY (bioentry_id,dbxref_id)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX dblink_dbx ON bioentry_dbxref(dbxref_id) TABLESPACE "SYMGENE_INDEX";
-- We can have multiple references per bioentry, but one reference
-- can also be used for the same bioentry.
--
-- No two references can reference the same reference database entry
-- (dbxref_id). This is where the MEDLINE id goes: PUBMED:123456.
CREATE TABLE reference (
reference_id int NOT NULL ,
dbxref_id int ,
location VARCHAR2(100) NOT NULL,
title VARCHAR2(100),
authors VARCHAR2(100) NOT NULL,
crc VARCHAR(32),
PRIMARY KEY (reference_id),
UNIQUE (dbxref_id),
UNIQUE (crc)
) TABLESPACE "SYMGENE_DATA";
-- bioentry to reference associations
CREATE TABLE bioentry_reference (
bioentry_id int NOT NULL,
reference_id int NOT NULL,
start_pos int,
end_pos int,
rank SMALLINT DEFAULT 0 NOT NULL,
PRIMARY KEY(bioentry_id,reference_id,rank)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX bioentryref_ref ON bioentry_reference(reference_id) TABLESPACE "SYMGENE_INDEX";
-- We can have multiple comments per seqentry, and
-- comments can have embedded '\n' characters
CREATE TABLE comment_ (
comment_id int NOT NULL ,
bioentry_id int NOT NULL,
comment_text VARCHAR2(100) NOT NULL,
rank SMALLINT DEFAULT 0 NOT NULL,
PRIMARY KEY (comment_id),
UNIQUE(bioentry_id, rank)
) TABLESPACE "SYMGENE_DATA";
-- tag/value and ontology term annotation for bioentries goes here
CREATE TABLE bioentry_qualifier_value (
bioentry_id int NOT NULL,
term_id int NOT NULL,
value VARCHAR2(100),
rank INT DEFAULT 0 NOT NULL,
UNIQUE (bioentry_id,term_id,rank)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX bioentryqual_trm ON bioentry_qualifier_value(term_id) TABLESPACE "SYMGENE_INDEX";
-- feature table. We cleanly handle
-- - simple locations
-- - split locations
-- - split locations on remote sequences
CREATE TABLE seqfeature (
seqfeature_id int NOT NULL ,
bioentry_id int NOT NULL,
type_term_id int NOT NULL,
source_term_id int NOT NULL,
display_name VARCHAR(64),
rank SMALLINT DEFAULT 0 NOT NULL,
PRIMARY KEY (seqfeature_id),
UNIQUE (bioentry_id,type_term_id,source_term_id,rank)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX seqfeature_trm ON seqfeature(type_term_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX seqfeature_fsrc ON seqfeature(source_term_id) TABLESPACE "SYMGENE_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeature_bioentryid ON seqfeature(bioentry_id);
-- seqfeatures can be arranged in containment hierarchies.
-- one can imagine storing other relationships between features,
-- in this case the term_id can be used to type the relationship
CREATE TABLE seqfeature_relationship (
seqfeature_relationship_id int NOT NULL ,
object_seqfeature_id int NOT NULL,
subject_seqfeature_id int NOT NULL,
term_id int NOT NULL,
rank INT,
PRIMARY KEY (seqfeature_relationship_id),
UNIQUE (object_seqfeature_id,subject_seqfeature_id,term_id)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX seqfeaturerel_trm ON seqfeature_relationship(term_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX seqfeaturerel_child ON seqfeature_relationship(subject_seqfeature_id) TABLESPACE "SYMGENE_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON seqfeature_relationship(object_seqfeature_id);
-- for deep (depth > 1) seqfeature relationship trees we need a transitive
-- closure table too
CREATE TABLE seqfeature_path (
object_seqfeature_id int NOT NULL,
subject_seqfeature_id int NOT NULL,
term_id int NOT NULL,
distance int ,
UNIQUE (object_seqfeature_id,subject_seqfeature_id,term_id,distance)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX seqfeaturepath_trm ON seqfeature_path(term_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX seqfeaturepath_child ON seqfeature_path(subject_seqfeature_id) TABLESPACE "SYMGENE_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON seqfeature_path(object_seqfeature_id);
-- tag/value associations - or ontology annotations
CREATE TABLE seqfeature_qualifier_value (
seqfeature_id int NOT NULL,
term_id int NOT NULL,
rank SMALLINT DEFAULT 0 NOT NULL,
value VARCHAR2(100) NOT NULL,
PRIMARY KEY (seqfeature_id,term_id,rank)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX seqfeaturequal_trm ON seqfeature_qualifier_value(term_id) TABLESPACE "SYMGENE_INDEX";
-- DBXrefs for features. This is necessary for genome oriented viewpoints,
-- where you have a few have long sequences (contigs, or chromosomes) with many
-- features on them. In that case the features are the semantic scope for
-- their annotation bundles, not the bioentry they are attached to.
CREATE TABLE seqfeature_dbxref (
seqfeature_id int NOT NULL,
dbxref_id int NOT NULL,
rank SMALLINT,
PRIMARY KEY (seqfeature_id,dbxref_id)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX feadblink_dbx ON seqfeature_dbxref(dbxref_id) TABLESPACE "SYMGENE_INDEX";
-- basically we model everything as potentially having
-- any number of locations, ie, a split location. SimpleLocations
-- just have one location. We need to have a location id for the qualifier
-- associations of fuzzy locations.
-- please do not try to model complex assemblies with this thing. It wont
-- work. Check out the ensembl schema for this.
-- we allow nulls for start/end - this is useful for fuzzies as
-- standard range queries will not be included
-- for remote locations, the join to make is to DBXref
-- the FK to term is a possibility to store the type of the
-- location for determining in one hit whether it's a fuzzy or not
CREATE TABLE location (
location_id int NOT NULL ,
seqfeature_id int NOT NULL,
dbxref_id int ,
term_id int ,
start_pos int,
end_pos int,
strand INT NOT NULL,
rank SMALLINT DEFAULT 0 NOT NULL,
PRIMARY KEY (location_id),
UNIQUE (seqfeature_id, rank)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX seqfeatureloc_start ON location(start_pos, end_pos) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX seqfeatureloc_dbx ON location(dbxref_id) TABLESPACE "SYMGENE_INDEX";
CREATE INDEX seqfeatureloc_trm ON location(term_id) TABLESPACE "SYMGENE_INDEX";
-- location qualifiers - mainly intended for fuzzies but anything
-- can go in here
-- some controlled vocab terms have slots;
-- fuzzies could be modeled as min_start(5), max_start(5)
--
-- there is no restriction on extending the fuzzy ontology
-- for your own nefarious aims, although the bio* apis will
-- most likely ignore these
CREATE TABLE location_qualifier_value (
location_id int NOT NULL,
term_id int NOT NULL,
value VARCHAR(255) NOT NULL,
int_value int,
PRIMARY KEY (location_id,term_id)
) TABLESPACE "SYMGENE_DATA";
CREATE INDEX locationqual_trm ON location_qualifier_value(term_id) TABLESPACE "SYMGENE_INDEX";
--
-- Create the foreign key constraints
--
-- ontology term
ALTER TABLE term ADD CONSTRAINT FKont_term
FOREIGN KEY (ontology_id) REFERENCES ontology(ontology_id)
ON DELETE CASCADE;
-- term synonyms
ALTER TABLE term_synonym ADD CONSTRAINT FKterm_syn
FOREIGN KEY (term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
-- term_dbxref
ALTER TABLE term_dbxref ADD CONSTRAINT FKdbxref_trmdbxref
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
ON DELETE CASCADE;
ALTER TABLE term_dbxref ADD CONSTRAINT FKterm_trmdbxref
FOREIGN KEY (term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
-- term_relationship
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmsubject_trmrel
FOREIGN KEY (subject_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmpredicate_trmrel
FOREIGN KEY (predicate_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmobject_trmrel
FOREIGN KEY (object_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_relationship ADD CONSTRAINT FKterm_trmrel
FOREIGN KEY (ontology_id) REFERENCES ontology(ontology_id)
ON DELETE CASCADE;
-- term_path
ALTER TABLE term_path ADD CONSTRAINT FKtrmsubject_trmpath
FOREIGN KEY (subject_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_path ADD CONSTRAINT FKtrmpredicate_trmpath
FOREIGN KEY (predicate_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_path ADD CONSTRAINT FKtrmobject_trmpath
FOREIGN KEY (object_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_path ADD CONSTRAINT FKontology_trmpath
FOREIGN KEY (ontology_id) REFERENCES ontology(ontology_id)
ON DELETE CASCADE;
-- taxon, taxon_name
-- unfortunately, we can't constrain parent_taxon_id as it is violated
-- occasionally by the downloads available from NCBI
-- ALTER TABLE taxon ADD CONSTRAINT FKtaxon_taxon
-- FOREIGN KEY (parent_taxon_id) REFERENCES taxon(taxon_id);
ALTER TABLE taxon_name ADD CONSTRAINT FKtaxon_taxonname
FOREIGN KEY (taxon_id) REFERENCES taxon(taxon_id)
ON DELETE CASCADE;
-- bioentry
ALTER TABLE bioentry ADD CONSTRAINT FKtaxon_bioentry
FOREIGN KEY (taxon_id) REFERENCES taxon(taxon_id);
ALTER TABLE bioentry ADD CONSTRAINT FKbiodatabase_bioentry
FOREIGN KEY (biodatabase_id) REFERENCES biodatabase(biodatabase_id);
-- bioentry_relationship
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKterm_bioentryrel
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKparentent_bioentryrel
FOREIGN KEY (object_bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKchildent_bioentryrel
FOREIGN KEY (subject_bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- bioentry_path
ALTER TABLE bioentry_path ADD CONSTRAINT FKterm_bioentrypath
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE bioentry_path ADD CONSTRAINT FKparentent_bioentrypath
FOREIGN KEY (object_bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_path ADD CONSTRAINT FKchildent_bioentrypath
FOREIGN KEY (subject_bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- biosequence
ALTER TABLE biosequence ADD CONSTRAINT FKbioentry_bioseq
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- comment
ALTER TABLE comment_ ADD CONSTRAINT FKbioentry_comment
FOREIGN KEY(bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- bioentry_dbxref
ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKbioentry_dblink
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKdbxref_dblink
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
ON DELETE CASCADE;
-- dbxref_qualifier_value
ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKtrm_dbxrefqual
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKdbxref_dbxrefqual
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
ON DELETE CASCADE;
-- bioentry_reference
ALTER TABLE bioentry_reference ADD CONSTRAINT FKbioentry_entryref
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_reference ADD CONSTRAINT FKreference_entryref
FOREIGN KEY (reference_id) REFERENCES reference(reference_id)
ON DELETE CASCADE;
-- bioentry_qualifier_value
ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKbioentry_entqual
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKterm_entqual
FOREIGN KEY (term_id) REFERENCES term(term_id);
-- reference
ALTER TABLE reference ADD CONSTRAINT FKdbxref_reference
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id ) ;
-- seqfeature
ALTER TABLE seqfeature ADD CONSTRAINT FKterm_seqfeature
FOREIGN KEY (type_term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature ADD CONSTRAINT FKsourceterm_seqfeature
FOREIGN KEY (source_term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature ADD CONSTRAINT FKbioentry_seqfeature
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- seqfeature_relationship
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKterm_seqfeatrel
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKparentfeat_seqfeatrel
FOREIGN KEY (object_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKchildfeat_seqfeatrel
FOREIGN KEY (subject_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
-- seqfeature_path
ALTER TABLE seqfeature_path ADD CONSTRAINT FKterm_seqfeatpath
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature_path ADD CONSTRAINT FKparentfeat_seqfeatpath
FOREIGN KEY (object_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
ALTER TABLE seqfeature_path ADD CONSTRAINT FKchildfeat_seqfeatpath
FOREIGN KEY (subject_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
-- seqfeature_qualifier_value
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKterm_featqual
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKseqfeature_featqual
FOREIGN KEY (seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
-- seqfeature_dbxref
ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKseqfeature_feadblink
FOREIGN KEY (seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKdbxref_feadblink
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
ON DELETE CASCADE;
-- location
ALTER TABLE location ADD CONSTRAINT FKseqfeature_location
FOREIGN KEY (seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
ALTER TABLE location ADD CONSTRAINT FKdbxref_location
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id);
ALTER TABLE location ADD CONSTRAINT FKterm_featloc
FOREIGN KEY (term_id) REFERENCES term(term_id);
-- location_qualifier_value
ALTER TABLE location_qualifier_value ADD CONSTRAINT FKfeatloc_locqual
FOREIGN KEY (location_id) REFERENCES location(location_id)
ON DELETE CASCADE;
ALTER TABLE location_qualifier_value ADD CONSTRAINT FKterm_locqual
FOREIGN KEY (term_id) REFERENCES term(term_id);
--
-- Triggers for automatic primary key generation and other sanity checks
--
CREATE OR REPLACE TRIGGER BID_location
BEFORE INSERT
on location
--
for each row
BEGIN
IF :new.location_id IS NULL THEN
SELECT SE_location.nextval INTO :new.location_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_seqfeature
BEFORE INSERT
on seqfeature
--
for each row
BEGIN
IF :new.seqfeature_id IS NULL THEN
SELECT SE_seqfeature.nextval INTO :new.seqfeature_id FROM DUAL;
END IF;
END;
/
CREATE TRIGGER BID_seqfeature_relationship
BEFORE INSERT
on seqfeature_relationship
--
for each row
BEGIN
IF :new.seqfeature_relationship_id IS NULL THEN
SELECT SE_seqfeature_relationship.nextval INTO :new.seqfeature_relationship_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_comment
BEFORE INSERT
on comment_
--
for each row
BEGIN
IF :new.comment_id IS NULL THEN
SELECT SE_comment_.nextval INTO :new.comment_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_reference
BEFORE INSERT
on reference
--
for each row
BEGIN
IF :new.reference_id IS NULL THEN
SELECT SE_reference.nextval INTO :new.reference_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_bioentry_relationship
BEFORE INSERT
on bioentry_relationship
--
for each row
BEGIN
IF :new.bioentry_relationship_id IS NULL THEN
SELECT SE_bioentry_relationship.nextval INTO :new.bioentry_relationship_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_bioentry
BEFORE INSERT
on bioentry
--
for each row
BEGIN
IF :new.bioentry_id IS NULL THEN
SELECT SE_bioentry.nextval INTO :new.bioentry_id FROM DUAL;
END IF;
-- IF :new.Division IS NULL THEN
-- :new.Division := 'UNK';
-- END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_term
BEFORE INSERT
on term
--
for each row
BEGIN
IF :new.term_id IS NULL THEN
SELECT SE_term.nextval INTO :new.term_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_term_relationship
BEFORE INSERT
on term_relationship
--
for each row
BEGIN
IF :new.term_relationship_id IS NULL THEN
SELECT SE_term_relationship.nextval INTO :new.term_relationship_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_term_path
BEFORE INSERT
on term_path
--
for each row
BEGIN
IF :new.term_path_id IS NULL THEN
SELECT SE_term_path.nextval INTO :new.term_path_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_ontology
BEFORE INSERT
on ontology
--
for each row
BEGIN
IF :new.ontology_id IS NULL THEN
SELECT SE_ontology.nextval INTO :new.ontology_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_taxon
BEFORE INSERT
on taxon
--
for each row
BEGIN
IF :new.taxon_id IS NULL THEN
SELECT SE_taxon.nextval INTO :new.taxon_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_biodatabase
BEFORE INSERT
on biodatabase
--
for each row
BEGIN
IF :new.biodatabase_id IS NULL THEN
SELECT SE_biodatabase.nextval INTO :new.biodatabase_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_dbxref
BEFORE INSERT
on dbxref
--
for each row
BEGIN
IF :new.dbxref_id IS NULL THEN
SELECT SE_dbxref.nextval INTO :new.dbxref_id FROM DUAL;
END IF;
END;
/
-------------- next part --------------
-------------- next part --------------
DROP SEQUENCE SE_biodatabase;
DROP SEQUENCE SE_taxon;
DROP SEQUENCE SE_ontology;
DROP SEQUENCE SE_term;
DROP SEQUENCE SE_term_relationship;
DROP SEQUENCE SE_term_path;
DROP SEQUENCE SE_bioentry;
DROP SEQUENCE SE_bioentry_relationship;
DROP SEQUENCE SE_dbxref;
DROP SEQUENCE SE_reference;
DROP SEQUENCE SE_comment_;
DROP SEQUENCE SE_seqfeature;
DROP SEQUENCE SE_seqfeature_relationship;
DROP SEQUENCE SE_location;
DROP TABLE biodatabase cascade constraints;
DROP INDEX db_auth ;
DROP TABLE taxon cascade constraints;
DROP INDEX taxparent ;
DROP TABLE taxon_name cascade constraints;
DROP INDEX taxnametaxonid;
DROP INDEX taxnamename;
DROP TABLE ontology cascade constraints;
DROP TABLE term cascade constraints;
DROP INDEX term_ont ;
DROP TABLE term_synonym cascade constraints;
DROP TABLE term_dbxref cascade constraints;
DROP INDEX trmdbxref_dbxrefid ;
DROP TABLE term_relationship cascade constraints;
DROP INDEX trmrel_predicateid ;
DROP INDEX trmrel_objectid;
DROP INDEX trmrel_ontid ;
--DROP INDEX ontrel_subjectid ON term_relationshipsubject_term_id);
DROP TABLE term_path cascade constraints;
DROP INDEX trmpath_predicateid ;
DROP INDEX trmpath_objectid ;
DROP INDEX trmpath_ontid ;
--DROP INDEX trmpath_subjectid ON term_pathsubject_term_id);
DROP TABLE bioentry cascade constraints;
DROP INDEX bioentry_name ;
DROP INDEX bioentry_db ;
DROP INDEX bioentry_tax ;
DROP TABLE bioentry_relationship cascade constraints;
DROP INDEX bioentryrel_trm ;
DROP INDEX bioentryrel_child ;
--DROP INDEX bioentryrel_parent ;
DROP TABLE bioentry_path cascade constraints;
DROP INDEX bioentrypath_trm ;
DROP INDEX bioentrypath_child ;
--DROP INDEX bioentrypath_parent ON bioentry_pathobject_bioentry_id);
DROP TABLE biosequence cascade constraints;
DROP TABLE dbxref cascade constraints;
DROP INDEX dbxref_db ;
DROP TABLE dbxref_qualifier_value cascade constraints;
DROP INDEX dbxrefqual_dbx ;
DROP INDEX dbxrefqual_trm ;
DROP TABLE bioentry_dbxref cascade constraints ;
DROP INDEX dblink_dbx ;
DROP TABLE reference cascade constraints;
DROP TABLE bioentry_reference cascade constraints;
DROP INDEX bioentryref_ref ;
DROP TABLE comment_ cascade constraints;
DROP TABLE bioentry_qualifier_value cascade constraints;
DROP INDEX bioentryqual_trm ;
DROP TABLE seqfeature cascade constraints;
DROP INDEX seqfeature_trm ;
DROP INDEX seqfeature_fsrc ;
--DROP INDEX seqfeature_bioentryid ON seqfeaturebioentry_id);
DROP TABLE seqfeature_relationship cascade constraints;
DROP INDEX seqfeaturerel_trm ;
DROP INDEX seqfeaturerel_child ;
--DROP INDEX seqfeaturerel_parent ON seqfeature_relationshipobject_seqfeature_id);
DROP TABLE seqfeature_path cascade constraints;
DROP INDEX seqfeaturepath_trm ;
DROP INDEX seqfeaturepath_child ;
--DROP INDEX seqfeaturerel_parent ON seqfeature_pathobject_seqfeature_id);
DROP TABLE seqfeature_qualifier_value cascade constraints;
DROP INDEX seqfeaturequal_trm ;
DROP TABLE seqfeature_dbxref cascade constraints;
DROP INDEX feadblink_dbx ;
DROP TABLE location cascade constraints;
DROP INDEX seqfeatureloc_start ;
DROP INDEX seqfeatureloc_dbx ;
DROP INDEX seqfeatureloc_trm ;
DROP TABLE location_qualifier_value cascade constraints;
DROP INDEX locationqual_trm ;
-- Create the foreign key constraints
-------------- next part --------------
More information about the BioSQL-l
mailing list