[Biojava-l] BioSQL
Len Trigg
len at reeltwo.com
Mon Nov 1 15:13:31 EST 2004
Mark Schreiber wrote:
> Does anyone have a current BioSQL schema that matches the BioJava
> bindings? Preferably one for Oracle.
AFAIK, current BioSQL CVS matches what BioJava expects -- Hilmar
recently added the extra table that BioJava uses. I've also attached
the Oracle schema that I've used (it's a bit simpler than the full
BioSQL Oracle schema).
Cheers,
Len.
-------------- next part --------------
-- conventions:
-- <table_name>_id is primary internal id (usually autogenerated)
-- Authors: Ewan Birney, Elia Stupka
-- Contributors: Hilmar Lapp, Aaron Mackey
--
-- Copyright Ewan Birney. You may use, modify, and distribute this code under
-- the same terms as Perl. See the Perl Artistic License.
--
-- comments to biosql - biosql-l at open-bio.org
--
-- Migration of the MySQL schema to InnoDB by Hilmar Lapp <hlapp at gmx.net>
-- Post-Cape Town changes by Hilmar Lapp.
-- Singapore changes by Hilmar Lapp and Aaron Mackey.
--
CREATE SEQUENCE biodatabase_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE taxon_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE ontology_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE term_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE term_relationship_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE term_path_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE bioentry_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE bioentry_relationship_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE dbxref_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE reference_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE anncomment_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE seqfeature_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE seqfeature_relationship_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE location_pk_seq
INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
-- database have bioentries. That is about it.
-- we do not store different versions of a database as different dbids
-- (there is no concept of versions of database). There is a concept of
-- versions of entries. Versions of databases deserve their own table and
-- join to bioentry table for tracking with versions of entries
CREATE TABLE biodatabase (
biodatabase_id int NOT NULL ,
name VARCHAR(128) NOT NULL,
authority VARCHAR(128),
description VARCHAR2(250),
PRIMARY KEY (biodatabase_id),
UNIQUE (name)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX db_auth on biodatabase(authority) TABLESPACE "BIOSQL_INDEX";
-- we could insist that taxa are NCBI taxon id, but on reflection I made this
-- an optional extra line, as many flat file formats do not have the NCBI id
--
-- no organelle/sub species
-- corresponds to the node table of the NCBI taxonomy databaase
CREATE TABLE taxon (
taxon_id int NOT NULL ,
ncbi_taxon_id int,
parent_taxon_id int ,
node_rank VARCHAR(32),
genetic_code INT ,
mito_genetic_code INT ,
left_value int ,
right_value int ,
PRIMARY KEY (taxon_id),
UNIQUE (ncbi_taxon_id),
UNIQUE (left_value),
UNIQUE (right_value)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX taxparent ON taxon(parent_taxon_id) TABLESPACE "BIOSQL_INDEX";
-- corresponds to the names table of the NCBI taxonomy databaase
CREATE TABLE taxon_name (
taxon_id int NOT NULL,
name VARCHAR(255) NOT NULL,
name_class VARCHAR(32) NOT NULL,
UNIQUE (taxon_id,name,name_class)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX taxnametaxonid ON taxon_name(taxon_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX taxnamename ON taxon_name(name) TABLESPACE "BIOSQL_INDEX";
-- this is the namespace (controlled vocabulary) ontology terms live in
-- we chose to have a separate table for this instead of reusing biodatabase
CREATE TABLE ontology (
ontology_id int NOT NULL ,
name VARCHAR(32) NOT NULL,
definition VARCHAR2(250),
PRIMARY KEY (ontology_id),
UNIQUE (name)
) TABLESPACE "BIOSQL_DATA";
-- any controlled vocab term, everything from full ontology
-- terms eg GO IDs to the various keys allowed as qualifiers
CREATE TABLE term (
term_id int NOT NULL ,
name VARCHAR(255) NOT NULL,
definition VARCHAR2(250),
identifier VARCHAR(40),
is_obsolete CHAR(1),
ontology_id int NOT NULL,
PRIMARY KEY (term_id),
UNIQUE (name,ontology_id),
UNIQUE (identifier)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX term_ont ON term(ontology_id) TABLESPACE "BIOSQL_INDEX";
-- ontology terms have synonyms, here is how to store them
CREATE TABLE term_synonym (
name VARCHAR(255) NOT NULL,
term_id int NOT NULL,
PRIMARY KEY (term_id,name)
) TABLESPACE "BIOSQL_DATA";
-- ontology terms to dbxref association: ontology terms have dbxrefs
CREATE TABLE term_dbxref (
term_id int NOT NULL,
dbxref_id int NOT NULL,
rank SMALLINT,
PRIMARY KEY (term_id, dbxref_id)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX trmdbxref_dbxrefid ON term_dbxref(dbxref_id) TABLESPACE "BIOSQL_INDEX";
-- relationship between controlled vocabulary / ontology term
-- we use subject/predicate/object but this could also
-- be thought of as child/relationship-type/parent.
-- the subject/predicate/object naming is better as we
-- can think of the graph as composed of statements.
--
-- we also treat the relationshiptypes / predicates as
-- controlled terms in themselves; this is quite useful
-- as a lot of systems (eg GO) will soon require
-- ontologies of relationship types (eg subtle differences
-- in the partOf relationship)
--
-- this table probably won''t be filled for a while, the core
-- will just treat ontologies as flat lists of terms
CREATE TABLE term_relationship (
term_relationship_id int NOT NULL ,
subject_term_id int NOT NULL,
predicate_term_id int NOT NULL,
object_term_id int NOT NULL,
ontology_id int NOT NULL,
PRIMARY KEY (term_relationship_id),
UNIQUE (subject_term_id,predicate_term_id,object_term_id,ontology_id)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX trmrel_predicateid ON term_relationship(predicate_term_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX trmrel_objectid ON term_relationship(object_term_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX trmrel_ontid ON term_relationship(ontology_id) TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX ontrel_subjectid ON term_relationship(subject_term_id);
-- the infamous transitive closure table on ontology term relationships
-- this is a warehouse approach - you will need to update this regularly
--
-- the triple of (subject, predicate, object) is the same as for ontology
-- relationships, with the exception of predicate being the greatest common
-- denominator of the relationships types visited in the path (i.e., if
-- relationship type A is-a relationship type B, the greatest common
-- denominator for path containing both types A and B is B)
--
-- See the GO database or Chado schema for other (and possibly better
-- documented) implementations of the transitive closure table approach.
CREATE TABLE term_path (
term_path_id int NOT NULL ,
subject_term_id int NOT NULL,
predicate_term_id int NOT NULL,
object_term_id int NOT NULL,
ontology_id int NOT NULL,
distance int ,
PRIMARY KEY (term_path_id),
UNIQUE (subject_term_id,predicate_term_id,object_term_id,ontology_id,distance)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX trmpath_predicateid ON term_path(predicate_term_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX trmpath_objectid ON term_path(object_term_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX trmpath_ontid ON term_path(ontology_id) TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX trmpath_subjectid ON term_path(subject_term_id);
-- BioJava addition
CREATE TABLE term_relationship_term (
term_relationship_id int DEFAULT 0 NOT NULL,
term_id int DEFAULT 0 NOT NULL,
PRIMARY KEY (term_relationship_id,term_id),
) TABLESPACE "BIOSQL_DATA";
ALTER TABLE term_relationship_term ADD CONSTRAINT uni_term_relationship_id
UNIQUE (term_relationship_id) ENABLE VALIDATE;
ALTER TABLE term_relationship_term ADD CONSTRAINT uni_term_id
UNIQUE (term_id) ENABLE VALIDATE;
-- we can be a bioentry without a biosequence, but not visa-versa
-- most things are going to be keyed off bioentry_id
--
-- accession is the stable id, display_id is a potentially volatile,
-- human readable name.
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
-- not all entries have a taxon, but many do.
-- one bioentry only has one taxon! (weirdo chimerias are not handled. tough)
--
-- Name maps to display_id in bioperl. We have a different column name
-- here to avoid confusion with the naming convention for foreign keys.
CREATE TABLE bioentry (
bioentry_id int NOT NULL ,
biodatabase_id int NOT NULL,
taxon_id int ,
name VARCHAR(40) NOT NULL,
accession VARCHAR(40) NOT NULL,
identifier VARCHAR(40),
division VARCHAR(6),
description VARCHAR2(250),
version SMALLINT NOT NULL,
PRIMARY KEY (bioentry_id),
UNIQUE (accession,biodatabase_id,version),
UNIQUE (identifier)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX bioentry_name ON bioentry(name) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX bioentry_db ON bioentry(biodatabase_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX bioentry_tax ON bioentry(taxon_id) TABLESPACE "BIOSQL_INDEX";
--
-- bioentry-bioentry relationships: these are typed
--
CREATE TABLE bioentry_relationship (
bioentry_relationship_id int NOT NULL ,
object_bioentry_id int NOT NULL,
subject_bioentry_id int NOT NULL,
term_id int NOT NULL,
rank INT,
PRIMARY KEY (bioentry_relationship_id),
UNIQUE (object_bioentry_id,subject_bioentry_id,term_id)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX bioentryrel_trm ON bioentry_relationship(term_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX bioentryrel_child ON bioentry_relationship(subject_bioentry_id) TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX bioentryrel_parent ON bioentry_relationship(object_bioentry_id);
-- for deep (depth > 1) bioentry relationship trees we need a transitive
-- closure table too
CREATE TABLE bioentry_path (
object_bioentry_id int NOT NULL,
subject_bioentry_id int NOT NULL,
term_id int NOT NULL,
distance int ,
UNIQUE (object_bioentry_id,subject_bioentry_id,term_id,distance)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX bioentrypath_trm ON bioentry_path(term_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX bioentrypath_child ON bioentry_path(subject_bioentry_id) TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX bioentrypath_parent ON bioentry_path(object_bioentry_id);
-- some bioentries will have a sequence
-- biosequence because sequence is sometimes a reserved word
CREATE TABLE biosequence (
bioentry_id int NOT NULL,
version SMALLINT,
length int,
alphabet VARCHAR(10),
seq LONG,
PRIMARY KEY (bioentry_id)
) TABLESPACE "BIOSQL_DATA";
-- add these only if you want them:
-- ALTER TABLE biosequence ADD COLUMN ( isoelec_pt NUMERIC(4,2) );
-- ALTER TABLE biosequence ADD COLUMN ( mol_wgt DOUBLE PRECISION );
-- ALTER TABLE biosequence ADD COLUMN ( perc_gc DOUBLE PRECISION );
-- database cross-references (e.g., GenBank:AC123456.1)
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
CREATE TABLE dbxref (
dbxref_id int NOT NULL ,
dbname VARCHAR(40) NOT NULL,
accession VARCHAR(40) NOT NULL,
version SMALLINT NOT NULL,
PRIMARY KEY (dbxref_id),
UNIQUE(accession, dbname, version)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX dbxref_db ON dbxref(dbname) TABLESPACE "BIOSQL_INDEX";
-- for roundtripping embl/genbank, we need to have the "optional ID"
-- for the dbxref.
--
-- another use of this table could be for storing
-- descriptive text for a dbxref. for example, we may want to
-- know stuff about the interpro accessions we store (without
-- importing all of interpro), so we can attach the text
-- description as a synonym
CREATE TABLE dbxref_qualifier_value (
dbxref_id int NOT NULL,
term_id int NOT NULL,
rank INT DEFAULT 0 NOT NULL,
value VARCHAR2(100),
PRIMARY KEY (dbxref_id,term_id,rank)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX dbxrefqual_dbx ON dbxref_qualifier_value(dbxref_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX dbxrefqual_trm ON dbxref_qualifier_value(term_id) TABLESPACE "BIOSQL_INDEX";
-- Direct dblinks. It is tempting to do this
-- from bioentry_id to bioentry_id. But that wont work
-- during updates of one database - we will have to edit
-- this table each time. Better to do the join through accession
-- and db each time. Should be almost as cheap
CREATE TABLE bioentry_dbxref (
bioentry_id int NOT NULL,
dbxref_id int NOT NULL,
rank SMALLINT,
PRIMARY KEY (bioentry_id,dbxref_id)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX dblink_dbx ON bioentry_dbxref(dbxref_id) TABLESPACE "BIOSQL_INDEX";
-- We can have multiple references per bioentry, but one reference
-- can also be used for the same bioentry.
--
-- No two references can reference the same reference database entry
-- (dbxref_id). This is where the MEDLINE id goes: PUBMED:123456.
CREATE TABLE reference (
reference_id int NOT NULL ,
dbxref_id int ,
location VARCHAR2(100) NOT NULL,
title VARCHAR2(100),
authors VARCHAR2(100) NOT NULL,
crc VARCHAR(32),
PRIMARY KEY (reference_id),
UNIQUE (dbxref_id),
UNIQUE (crc)
) TABLESPACE "BIOSQL_DATA";
-- bioentry to reference associations
CREATE TABLE bioentry_reference (
bioentry_id int NOT NULL,
reference_id int NOT NULL,
start_pos int,
end_pos int,
rank SMALLINT DEFAULT 0 NOT NULL,
PRIMARY KEY(bioentry_id,reference_id,rank)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX bioentryref_ref ON bioentry_reference(reference_id) TABLESPACE "BIOSQL_INDEX";
-- We can have multiple comments per seqentry, and
-- comments can have embedded '\n' characters
CREATE TABLE anncomment (
comment_id int NOT NULL ,
bioentry_id int NOT NULL,
comment_text VARCHAR2(100) NOT NULL,
rank SMALLINT DEFAULT 0 NOT NULL,
PRIMARY KEY (comment_id),
UNIQUE(bioentry_id, rank)
) TABLESPACE "BIOSQL_DATA";
-- tag/value and ontology term annotation for bioentries goes here
CREATE TABLE bioentry_qualifier_value (
bioentry_id int NOT NULL,
term_id int NOT NULL,
value VARCHAR2(100),
rank INT DEFAULT 0 NOT NULL,
UNIQUE (bioentry_id,term_id,rank)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX bioentryqual_trm ON bioentry_qualifier_value(term_id) TABLESPACE "BIOSQL_INDEX";
-- feature table. We cleanly handle
-- - simple locations
-- - split locations
-- - split locations on remote sequences
CREATE TABLE seqfeature (
seqfeature_id int NOT NULL ,
bioentry_id int NOT NULL,
type_term_id int NOT NULL,
source_term_id int NOT NULL,
display_name VARCHAR(64),
rank SMALLINT DEFAULT 0 NOT NULL,
PRIMARY KEY (seqfeature_id),
UNIQUE (bioentry_id,type_term_id,source_term_id,rank)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX seqfeature_trm ON seqfeature(type_term_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX seqfeature_fsrc ON seqfeature(source_term_id) TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeature_bioentryid ON seqfeature(bioentry_id);
-- seqfeatures can be arranged in containment hierarchies.
-- one can imagine storing other relationships between features,
-- in this case the term_id can be used to type the relationship
CREATE TABLE seqfeature_relationship (
seqfeature_relationship_id int NOT NULL ,
object_seqfeature_id int NOT NULL,
subject_seqfeature_id int NOT NULL,
term_id int NOT NULL,
rank INT,
PRIMARY KEY (seqfeature_relationship_id),
UNIQUE (object_seqfeature_id,subject_seqfeature_id,term_id)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX seqfeaturerel_trm ON seqfeature_relationship(term_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX seqfeaturerel_child ON seqfeature_relationship(subject_seqfeature_id) TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON seqfeature_relationship(object_seqfeature_id);
-- for deep (depth > 1) seqfeature relationship trees we need a transitive
-- closure table too
CREATE TABLE seqfeature_path (
object_seqfeature_id int NOT NULL,
subject_seqfeature_id int NOT NULL,
term_id int NOT NULL,
distance int ,
UNIQUE (object_seqfeature_id,subject_seqfeature_id,term_id,distance)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX seqfeaturepath_trm ON seqfeature_path(term_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX seqfeaturepath_child ON seqfeature_path(subject_seqfeature_id) TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON seqfeature_path(object_seqfeature_id);
-- tag/value associations - or ontology annotations
CREATE TABLE seqfeature_qualifier_value (
seqfeature_id int NOT NULL,
term_id int NOT NULL,
rank SMALLINT DEFAULT 0 NOT NULL,
value VARCHAR2(4000) NOT NULL,
PRIMARY KEY (seqfeature_id,term_id,rank)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX seqfeaturequal_trm ON seqfeature_qualifier_value(term_id) TABLESPACE "BIOSQL_INDEX";
-- DBXrefs for features. This is necessary for genome oriented viewpoints,
-- where you have a few have long sequences (contigs, or chromosomes) with many
-- features on them. In that case the features are the semantic scope for
-- their annotation bundles, not the bioentry they are attached to.
CREATE TABLE seqfeature_dbxref (
seqfeature_id int NOT NULL,
dbxref_id int NOT NULL,
rank SMALLINT,
PRIMARY KEY (seqfeature_id,dbxref_id)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX feadblink_dbx ON seqfeature_dbxref(dbxref_id) TABLESPACE "BIOSQL_INDEX";
-- basically we model everything as potentially having
-- any number of locations, ie, a split location. SimpleLocations
-- just have one location. We need to have a location id for the qualifier
-- associations of fuzzy locations.
-- please do not try to model complex assemblies with this thing. It wont
-- work. Check out the ensembl schema for this.
-- we allow nulls for start/end - this is useful for fuzzies as
-- standard range queries will not be included
-- for remote locations, the join to make is to DBXref
-- the FK to term is a possibility to store the type of the
-- location for determining in one hit whether it's a fuzzy or not
CREATE TABLE location (
location_id int NOT NULL ,
seqfeature_id int NOT NULL,
dbxref_id int ,
term_id int ,
start_pos int,
end_pos int,
strand INT NOT NULL,
rank SMALLINT DEFAULT 0 NOT NULL,
PRIMARY KEY (location_id),
UNIQUE (seqfeature_id, rank)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX seqfeatureloc_start ON location(start_pos, end_pos) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX seqfeatureloc_dbx ON location(dbxref_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX seqfeatureloc_trm ON location(term_id) TABLESPACE "BIOSQL_INDEX";
-- location qualifiers - mainly intended for fuzzies but anything
-- can go in here
-- some controlled vocab terms have slots;
-- fuzzies could be modeled as min_start(5), max_start(5)
--
-- there is no restriction on extending the fuzzy ontology
-- for your own nefarious aims, although the bio* apis will
-- most likely ignore these
CREATE TABLE location_qualifier_value (
location_id int NOT NULL,
term_id int NOT NULL,
value VARCHAR(255) NOT NULL,
int_value int,
PRIMARY KEY (location_id,term_id)
) TABLESPACE "BIOSQL_DATA";
CREATE INDEX locationqual_trm ON location_qualifier_value(term_id) TABLESPACE "BIOSQL_INDEX";
--
-- Create the foreign key constraints
--
-- ontology term
ALTER TABLE term ADD CONSTRAINT FKont_term
FOREIGN KEY (ontology_id) REFERENCES ontology(ontology_id)
ON DELETE CASCADE;
-- term synonyms
ALTER TABLE term_synonym ADD CONSTRAINT FKterm_syn
FOREIGN KEY (term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
-- term_dbxref
ALTER TABLE term_dbxref ADD CONSTRAINT FKdbxref_trmdbxref
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
ON DELETE CASCADE;
ALTER TABLE term_dbxref ADD CONSTRAINT FKterm_trmdbxref
FOREIGN KEY (term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
-- term_relationship
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmsubject_trmrel
FOREIGN KEY (subject_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmpredicate_trmrel
FOREIGN KEY (predicate_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmobject_trmrel
FOREIGN KEY (object_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_relationship ADD CONSTRAINT FKterm_trmrel
FOREIGN KEY (ontology_id) REFERENCES ontology(ontology_id)
ON DELETE CASCADE;
-- term_path
ALTER TABLE term_path ADD CONSTRAINT FKtrmsubject_trmpath
FOREIGN KEY (subject_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_path ADD CONSTRAINT FKtrmpredicate_trmpath
FOREIGN KEY (predicate_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_path ADD CONSTRAINT FKtrmobject_trmpath
FOREIGN KEY (object_term_id) REFERENCES term(term_id)
ON DELETE CASCADE;
ALTER TABLE term_path ADD CONSTRAINT FKontology_trmpath
FOREIGN KEY (ontology_id) REFERENCES ontology(ontology_id)
ON DELETE CASCADE;
-- taxon, taxon_name
-- unfortunately, we can't constrain parent_taxon_id as it is violated
-- occasionally by the downloads available from NCBI
-- ALTER TABLE taxon ADD CONSTRAINT FKtaxon_taxon
-- FOREIGN KEY (parent_taxon_id) REFERENCES taxon(taxon_id);
ALTER TABLE taxon_name ADD CONSTRAINT FKtaxon_taxonname
FOREIGN KEY (taxon_id) REFERENCES taxon(taxon_id)
ON DELETE CASCADE;
-- bioentry
ALTER TABLE bioentry ADD CONSTRAINT FKtaxon_bioentry
FOREIGN KEY (taxon_id) REFERENCES taxon(taxon_id);
ALTER TABLE bioentry ADD CONSTRAINT FKbiodatabase_bioentry
FOREIGN KEY (biodatabase_id) REFERENCES biodatabase(biodatabase_id);
-- bioentry_relationship
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKterm_bioentryrel
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKparentent_bioentryrel
FOREIGN KEY (object_bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKchildent_bioentryrel
FOREIGN KEY (subject_bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- bioentry_path
ALTER TABLE bioentry_path ADD CONSTRAINT FKterm_bioentrypath
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE bioentry_path ADD CONSTRAINT FKparentent_bioentrypath
FOREIGN KEY (object_bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_path ADD CONSTRAINT FKchildent_bioentrypath
FOREIGN KEY (subject_bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- biosequence
ALTER TABLE biosequence ADD CONSTRAINT FKbioentry_bioseq
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- comment
ALTER TABLE anncomment ADD CONSTRAINT FKbioentry_comment
FOREIGN KEY(bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- bioentry_dbxref
ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKbioentry_dblink
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKdbxref_dblink
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
ON DELETE CASCADE;
-- dbxref_qualifier_value
ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKtrm_dbxrefqual
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKdbxref_dbxrefqual
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
ON DELETE CASCADE;
-- bioentry_reference
ALTER TABLE bioentry_reference ADD CONSTRAINT FKbioentry_entryref
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_reference ADD CONSTRAINT FKreference_entryref
FOREIGN KEY (reference_id) REFERENCES reference(reference_id)
ON DELETE CASCADE;
-- bioentry_qualifier_value
ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKbioentry_entqual
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKterm_entqual
FOREIGN KEY (term_id) REFERENCES term(term_id);
-- reference
ALTER TABLE reference ADD CONSTRAINT FKdbxref_reference
FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id ) ;
-- seqfeature
ALTER TABLE seqfeature ADD CONSTRAINT FKterm_seqfeature
FOREIGN KEY (type_term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature ADD CONSTRAINT FKsourceterm_seqfeature
FOREIGN KEY (source_term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature ADD CONSTRAINT FKbioentry_seqfeature
FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
ON DELETE CASCADE;
-- seqfeature_relationship
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKterm_seqfeatrel
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKparentfeat_seqfeatrel
FOREIGN KEY (object_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKchildfeat_seqfeatrel
FOREIGN KEY (subject_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
-- seqfeature_path
ALTER TABLE seqfeature_path ADD CONSTRAINT FKterm_seqfeatpath
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature_path ADD CONSTRAINT FKparentfeat_seqfeatpath
FOREIGN KEY (object_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
ALTER TABLE seqfeature_path ADD CONSTRAINT FKchildfeat_seqfeatpath
FOREIGN KEY (subject_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
-- seqfeature_qualifier_value
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKterm_featqual
FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKseqfeature_featqual
FOREIGN KEY (seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
-- seqfeature_dbxref
ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKseqfeature_feadblink
FOREIGN KEY (seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKdbxref_feadblink
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
ON DELETE CASCADE;
-- location
ALTER TABLE location ADD CONSTRAINT FKseqfeature_location
FOREIGN KEY (seqfeature_id) REFERENCES seqfeature(seqfeature_id)
ON DELETE CASCADE;
ALTER TABLE location ADD CONSTRAINT FKdbxref_location
FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id);
ALTER TABLE location ADD CONSTRAINT FKterm_featloc
FOREIGN KEY (term_id) REFERENCES term(term_id);
-- location_qualifier_value
ALTER TABLE location_qualifier_value ADD CONSTRAINT FKfeatloc_locqual
FOREIGN KEY (location_id) REFERENCES location(location_id)
ON DELETE CASCADE;
ALTER TABLE location_qualifier_value ADD CONSTRAINT FKterm_locqual
FOREIGN KEY (term_id) REFERENCES term(term_id);
--
-- Triggers for automatic primary key generation and other sanity checks
--
CREATE OR REPLACE TRIGGER BID_location
BEFORE INSERT
on location
--
for each row
BEGIN
IF :new.location_id IS NULL THEN
SELECT location_pk_seq.nextval INTO :new.location_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_seqfeature
BEFORE INSERT
on seqfeature
--
for each row
BEGIN
IF :new.seqfeature_id IS NULL THEN
SELECT seqfeature_pk_seq.nextval INTO :new.seqfeature_id FROM DUAL;
END IF;
END;
/
CREATE TRIGGER BID_seqfeature_relationship
BEFORE INSERT
on seqfeature_relationship
--
for each row
BEGIN
IF :new.seqfeature_relationship_id IS NULL THEN
SELECT seqfeature_relationship_pk_seq.nextval INTO :new.seqfeature_relationship_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_anncomment
BEFORE INSERT
on anncomment
--
for each row
BEGIN
IF :new.comment_id IS NULL THEN
SELECT anncomment_pk_seq.nextval INTO :new.comment_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_reference
BEFORE INSERT
on reference
--
for each row
BEGIN
IF :new.reference_id IS NULL THEN
SELECT reference_pk_seq.nextval INTO :new.reference_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_bioentry_relationship
BEFORE INSERT
on bioentry_relationship
--
for each row
BEGIN
IF :new.bioentry_relationship_id IS NULL THEN
SELECT bioentry_relationship_pk_seq.nextval INTO :new.bioentry_relationship_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_bioentry
BEFORE INSERT
on bioentry
--
for each row
BEGIN
IF :new.bioentry_id IS NULL THEN
SELECT bioentry_pk_seq.nextval INTO :new.bioentry_id FROM DUAL;
END IF;
-- IF :new.Division IS NULL THEN
-- :new.Division := 'UNK';
-- END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_term
BEFORE INSERT
on term
--
for each row
BEGIN
IF :new.term_id IS NULL THEN
SELECT term_pk_seq.nextval INTO :new.term_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_term_relationship
BEFORE INSERT
on term_relationship
--
for each row
BEGIN
IF :new.term_relationship_id IS NULL THEN
SELECT term_relationship_pk_seq.nextval INTO :new.term_relationship_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_term_path
BEFORE INSERT
on term_path
--
for each row
BEGIN
IF :new.term_path_id IS NULL THEN
SELECT term_path_pk_seq.nextval INTO :new.term_path_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_ontology
BEFORE INSERT
on ontology
--
for each row
BEGIN
IF :new.ontology_id IS NULL THEN
SELECT ontology_pk_seq.nextval INTO :new.ontology_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_taxon
BEFORE INSERT
on taxon
--
for each row
BEGIN
IF :new.taxon_id IS NULL THEN
SELECT taxon_pk_seq.nextval INTO :new.taxon_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_biodatabase
BEFORE INSERT
on biodatabase
--
for each row
BEGIN
IF :new.biodatabase_id IS NULL THEN
SELECT biodatabase_pk_seq.nextval INTO :new.biodatabase_id FROM DUAL;
END IF;
END;
/
CREATE OR REPLACE TRIGGER BID_dbxref
BEFORE INSERT
on dbxref
--
for each row
BEGIN
IF :new.dbxref_id IS NULL THEN
SELECT dbxref_pk_seq.nextval INTO :new.dbxref_id FROM DUAL;
END IF;
END;
/
-------------- next part --------------
More information about the Biojava-l
mailing list