[MOBY-guts] biomoby commit
Dirk Haase
d.haase at dev.open-bio.org
Tue Jan 30 14:49:20 UTC 2007
d.haase
Tue Jan 30 09:49:20 EST 2007
Update of /home/repository/moby/moby-live/Perl/MOBY
In directory dev.open-bio.org:/tmp/cvs-serv1900
Modified Files:
OntologyServer.pm
Log Message:
Re-implementation of methods Relationships and traverseDAG; reduces DB interaction
and enables expansion to 'leaves' direction in Relationships; traverseDAG now
uses Relationships call internally.
moby-live/Perl/MOBY OntologyServer.pm,1.101,1.102
===================================================================
RCS file: /home/repository/moby/moby-live/Perl/MOBY/OntologyServer.pm,v
retrieving revision 1.101
retrieving revision 1.102
diff -u -r1.101 -r1.102
--- /home/repository/moby/moby-live/Perl/MOBY/OntologyServer.pm 2006/03/23 18:00:50 1.101
+++ /home/repository/moby/moby-live/Perl/MOBY/OntologyServer.pm 2007/01/30 14:49:20 1.102
@@ -63,6 +63,7 @@
=cut
package MOBY::OntologyServer;
+
use strict;
use Carp;
use vars qw($AUTOLOAD);
@@ -1253,122 +1254,232 @@
=cut
sub Relationships {
-
- # this entire subroutine assumes that there is NOT multiple parenting!!
- my ( $self, %args ) = @_;
- my $ontology = $args{ontology} ? $args{ontology} : $self->ontology;
-
- my $results;
- if ( $ontology eq 'service' ) {
- $results = &_ServiceRelationships($self, %args);
- } elsif ( $ontology eq 'object' ) {
- $results = &_ObjectRelationships($self, %args);
- } else {
- return {[]};
- }
- return $results;
-}
-
-sub _ServiceRelationships {
- my ($self, %args) = @_;
- my %results = [];
- my $term = $args{term};
- my $relationship = $args{relationship};
- my $direction = $args{direction} ? $args{direction} : 'root';
- my $expand = $args{expand} ? 1 : 0;
- my $ontology = "service";
- return \%results unless ($term);
-
- $term = $self->getServiceURI($term);
- $relationship ||="isa";
- my $OS = MOBY::OntologyServer->new(ontology => 'relationship');
- $relationship = $OS->getRelationshipURI("service", $relationship);
-
- while ($term && (!($term =~ /urn\:lsid\:biomoby.org\:servicetype\:Service\:/ ) )){
- my $defs = $self->_doRelationshipsQuery($ontology,$term,$relationship,$direction );
- next unless $defs; # somethig has gone terribly wrong!
- my $lsid;
- my $rel;
- my $articleName;
- foreach ( @{$defs} ) {
- $lsid = $_->[0];
- $rel = $_->[1];
- $articleName = $_->[2];
- $articleName ||="";
- my $info = $self->serviceInfo(term => $lsid); # we need to get the term name, and that doesn't come from here
- my $term = $info->{service_type};
- push @{ $results{$rel} }, {lsid => $lsid, term => $term};
- }
- last unless ($expand);
- last unless ( $direction eq "root" ); # if we aren't going to root, then be careful or we'll loop infnitely
- $term = $lsid; # this entire subroutine assumes that there is NOT multiple parenting...
- }
-
- return \%results;
-}
-
-sub _ObjectRelationships {
- my ($self, %args) = @_;
- my %results = [];
- my $term = $args{term};
- my $relationship = $args{relationship};
- my $direction = $args{direction} ? $args{direction} : 'root';
- my $expand = $args{expand} ? 1 : 0;
- my $ontology = 'object';
- return \%results unless ($term);
- $term = $self->getObjectURI($term);
- $relationship ||="isa";
- my $OS = MOBY::OntologyServer->new(ontology => 'relationship');
- $relationship = $OS->getRelationshipURI("object", $relationship);
- my $isa_relationship = $OS->getRelationshipURI("object", "ISA");
- return \%results unless $isa_relationship; # we need this to do the isa traversal to root when expanding the hasa and has relationships call
-
- my @isa_hierarchy = ($term);
- while ($term && (!($term =~ /urn\:lsid\:biomoby.org\:servicetype\:Object\:/ ) )){ # first build the entire ISA hierarchy
- my $defs = $self->_doRelationshipsQuery($ontology,$term,$isa_relationship,$direction );
- next unless $defs; # somethig has gone terribly wrong!
- my $lsid;
- my $def = shift @{$defs}; # for ISA there should be only one parent
- $lsid = $def->[0]; # the lsid is in slot 0
- push @isa_hierarchy, $lsid if $lsid;
- $term = $lsid; # this entire subroutine assumes that there is NOT multiple parenting...
- }
- foreach my $node(@isa_hierarchy){ # now traverse along the ISA hierarchy and get what you need
- my $defs = $self->_doRelationshipsQuery($ontology,$node,$relationship,$direction );
- next unless $defs; # somethig has gone terribly wrong!
- my $lsid;
- my $rel;
- my $articleName;
- foreach ( @{$defs} ) {
- next unless $_->[0];
- $lsid = $_->[0];
- $rel = $_->[1];
- $articleName = $_->[2];
- $articleName ||="";
- my $info = $self->objectInfo(term => $lsid); # we need to get the term name, and that doesn't come from here
- my $term = $info->{object_type};
- push @{ $results{$rel} }, {lsid => $lsid, articleName => $articleName, term => $term};
- }
- last unless ($expand);
- last unless ( $direction eq "root" ); # if we aren't going to root, then be careful or we'll loop infnitely
- $term = $lsid; # this entire subroutine assumes that there is NOT multiple parenting...
- }
- return \%results;
-}
-
-sub _doRelationshipsQuery {
- my ( $self, $ontology, $term, $relationship, $direction ) = @_;
- $CONFIG ||= MOBY::Config->new; # exported by Config.pm
- my $datasource = "moby$ontology"; # like mobyobject, or mobyservice
- my $adaptor = $CONFIG->getDataAdaptor( datasource => $datasource );
- my $defs;
- # query returns a reference to an array containing array references
- $defs = $adaptor->get_relationship(direction => $direction,
- ontology => $ontology,
- term => $term,
- relationship => $relationship);
- # a very long piece of SQL statements have been refactored into Moby::Adaptor::moby::queryapi::mysql.pm
- return $defs;
+ my ($self, %args) = @_;
+ my %results;
+
+ my $term = $args{term};
+ my $ontology = $args{ontology} ? $args{ontology} : $self->ontology;
+ my $direction = $args{direction} ? $args{direction} : 'root';
+ $direction = $direction eq 'root'? 'root' : 'leaves'; # map anything else to 'leaves'
+ my $relationship = $args{relationship};
+ my $expand = $args{expand} ? 1 : 0;
+
+ # in order to make this function also usable for 'traverseDAG'
+ # we need a more precise definition what to expand. Note that
+ # the default settings assure the behaviour of the old 'expand' param.
+ # 1. expand along the isa relationship?
+ my $isaExpand = $args{isaExpand} ? $args{isaExpand} : $expand;
+ # 2. expand along the inclusion relationship types (has/hasa),
+ # i.e. get inclusions of inclusions?
+ # (Note: this is set when called by 'traverseDAG')
+ my $incExpand = $args{incExpand} ? $args{incExpand} : 0;
+ # 3. explore inclusion relationships for complete isa hierarchy?
+ # (Note: this was fix behaviour of the old 'expand',
+ # but is not used by traverseDAG)
+ my $mapIncToIsa = $args{mapIncToIsa} ? $args{mapIncToIsa} : $expand;
+
+ # first of all, get ID of query entity,
+ # internally, we will operate on pure IDs
+ # as long as possible...
+ $CONFIG ||= MOBY::Config->new; # exported by Config.pm
+ my $datasource = "moby$ontology"; # like mobyobject, or mobyservice
+ my $adaptor = $CONFIG->getDataAdaptor( datasource => $datasource );
+ my $queryId;
+ my $query_method = "query_$ontology";
+ my $result = $adaptor->$query_method(type => $term);
+ my $row = shift @$result;
+ $queryId = $row->{"${ontology}_id"};
+
+ return {} unless $queryId;
+
+ # get all relationships in the database in one query
+ my $relHash = $adaptor->get_all_relationships(direction=>$direction,ontology=>$ontology);
+
+ # find out which relationships to return
+ # use keys of %$relHash, because these are lsids:
+
+ # initialize to return all relationships (becomes effective if eg. 'all' was used)
+ my @relList = keys %$relHash;
+ if ( (not $relationship) or # ISA (and nothing else) is the default if nothing specified
+ ($relationship =~ /isa$/i) ) {
+ @relList = grep { /isa$/i } @relList;
+ }
+ elsif ( $relationship =~ /has(a)?$/i ) {
+ # if either has or hasa was specified, use only that
+ @relList = grep { /$relationship$/i } @relList;
+ }
+
+ # build the isa hierarchy, it's needed in any case...
+ my ($isaLsid) = grep { /isa$/i } keys %$relHash; # we need the lsid...
+ my $isa_hierarchy = $self->_getIsaHierarchy($relHash->{$isaLsid}, $queryId, $direction, $isaExpand);
+
+ # prepare the hash for storing HAS/HASA relationship details
+ my $hasRelDetails;
+
+ # table fields needed to get entity details:
+ my @fields = ("${ontology}_lsid","${ontology}_type");
+
+ # nodes to check for has/hasa relationship
+ my @checkNodes = ($queryId);
+ # mapIncToIsa means that has/hasa has to be checked
+ # not only for the query object alone but also for all
+ # isa ancestors/descendants
+ push @checkNodes, @$isa_hierarchy if $mapIncToIsa;
+
+ # the result hash will consist of one list for each included relationship type...
+ foreach my $rel ( @relList ) {
+ my @entityQueryList = (); # this collects the unique object ids
+ my @entityResultList = (); # this collects ids of objects to add to the result, maybe not unique
+ # the latter one is not essential to have, the only benefit is
+ # a somehow predictable order in the output...
+
+ # find out which entities we have to include in the result
+ # and how these are related to each other;
+ # Note: all needed information is present in the relationship hash %$relHash!
+
+ if ( $rel ne $isaLsid ) {
+ # either HAS or HASA
+ foreach my $node ( @checkNodes ) {
+ my $incls = $self->_getInclusions($relHash,$node,[$rel], $incExpand);
+ foreach my $triplet ( @$incls ) {
+ my ($inclId, $inclArtName, $inclAssert) = @$triplet;
+ $hasRelDetails->{$inclId}->{$inclAssert} = $inclArtName;
+ push @entityResultList, $inclId;
+ }
+ }
+ # set up list of unique object ids for the database lookup
+ @entityQueryList = keys %$hasRelDetails;
+ }
+ else {
+ # ISA
+ @entityQueryList = @$isa_hierarchy; # isa hierarchy is guaranteed to be unique...
+ @entityResultList = @$isa_hierarchy; # ... but still both variables have to be set
+ }
+
+ # now it's time to move away from pure ids, retrieve details from database:
+ my $details = $adaptor->get_details_for_id_list($ontology, \@fields, \@entityQueryList);
+
+ # enhance details with information about relationships and build result hash
+ foreach my $entityId (@entityResultList) {
+ # add articleName slot if necessary
+ if ( exists $hasRelDetails->{$entityId} ) {
+ foreach my $assert ( keys %{$hasRelDetails->{$entityId}} ) {
+ $details->{$entityId}->{'articleName'} = $hasRelDetails->{$entityId}->{$assert};
+ }
+ }
+ elsif ( $ontology eq 'object') {
+ # for isa, articleName is the empty string
+ $details->{$entityId}->{'articleName'} = '';
+ }
+
+ # map ontology specific field names to commons slots:
+ # 1. 'object_lsid'/'service_lsid' -> 'lsid'
+ $details->{$entityId}->{'lsid'} = $details->{$entityId}->{"${ontology}_lsid"}
+ unless exists $details->{$entityId}->{'lsid'}; # do just once foreach object!
+ delete $details->{$entityId}->{"${ontology}_lsid"}; # remove redundant slot
+ # 2. 'object_type'/'service_type' -> 'term'
+ $details->{$entityId}->{'term'} = $details->{$entityId}->{"${ontology}_type"}
+ unless exists $details->{$entityId}->{'term'}; # do just once foreach object!
+ delete $details->{$entityId}->{"${ontology}_type"}; # remove redundant slot
+
+ # finally, add record to the result hash
+ push @{ $results{$rel} }, $details->{$entityId};
+ }
+ }
+ return \%results;
+}
+
+sub _getIsaHierarchy {
+ # Finds out the isa hierarchy for the query entity, that is
+ # the parent (the one which it inherits from) if direction is 'root' or
+ # the children (one or more which inherit from it) if direction is 'leaves'.
+ # If 'expand' is set all deeper levels (ancestors or descendants if you like)
+ # are also included.
+ # Note 1: this implementation relies on pure single inheritance!
+ # Note 2: we can use the same method for both directions only because the
+ # provided isaHash is built with the direction in mind, make sure
+ # to have direction consistent!
+
+ # returned is a reference to a flat list
+
+ my ($self, $isaHash, $query, $direction, $expand) = @_;
+
+ my @hierarchy = ();
+ if ( exists $isaHash->{$query} ) {
+ if ( $direction eq 'root' ) {
+ # push the parent entity
+ push @hierarchy, $isaHash->{$query}; # relies on single inheritance!
+ }
+ elsif ( $direction eq 'leaves' ) {
+ # push the direct children
+ push @hierarchy, @{$isaHash->{$query}};
+ }
+ else {
+ # it has to be either 'root' or 'leaves'
+ warn "_getIsaHierarchy was called with wrong direction indicator,
+ use either 'root' or 'leaves'!\n";
+ return [];
+ }
+ if ( $expand ) {
+ my @firstLevel = @hierarchy;
+ foreach my $entity ( @firstLevel ) {
+ my $deeperLevels = $self->_getIsaHierarchy($isaHash, $entity, $direction, 1);
+ push @hierarchy, @$deeperLevels;
+ }
+ }
+ return \@hierarchy;
+ }
+ else {
+ # important: anchor the recursion!
+ return [];
+ }
+}
+
+sub _getInclusions {
+
+ # Finds out the objects related to the query by one of the inclusion
+ # relationships (HAS or HASA). This is the HAS/HASA-analogue to
+ # _getIsaHierarchy, but is more complicated, because the values in
+ # the provided relationship hash ($relHash) are not simple ids but
+ # triplets ("relationship records") in the format of:
+ # [id of relationship partner, articleName, assertion id]
+ # On the other hand, direction does not matter here, because
+ # we have to deal with multi relationships in any case.
+ # Like for ISA, be aware that the relationship hash '$relHash'
+ # is built direction dependant. Make sure to use it consistently!
+
+ # Note: third argument is a listref of relationship types, that is
+ # it could be called with HAS and HASA (expected are lsids) at
+ # the same time and in this way merge both inclusion relationship
+ # types. However, this usage is not used currently and not tested!
+
+ # Returned is a reference to a list with each element being
+ # a triplet (listref to a relationship record) as explained above.
+
+ my ($self, $relHash, $query, $relList, $expand) = @_;
+
+ my %nodeCheckDone; # for avoiding multiple check of one node (if expand is set)
+ my @allInclusions = ();
+ foreach my $relType ( @$relList ) {
+ # 'root' means: include all relationships where query is the
+ # containing (outer) object;
+ # eg. if A HAS B, and A is query, include this record
+ if ( exists $relHash->{$relType}->{$query} ) {
+ my $relRecords = $relHash->{$relType}->{$query};
+ foreach my $record ( @$relRecords ) {
+ push @allInclusions, $record;
+ if ( $expand ) {
+ my ($incId, $artName, $assert) = @$record;
+ if ( not exists $nodeCheckDone{$incId} ) {
+ my $deeperInclusions = $self->_getInclusions($relHash, $incId, $relList, 1);
+ push @allInclusions, @$deeperInclusions;
+ $nodeCheckDone{$incId}++;
+ }
+ }
+ }
+ }
+ }
+ return \@allInclusions; # empty if nothing found, this anchors the recursion
}
=head2 setURI
@@ -1402,73 +1513,40 @@
=cut
sub traverseDAG {
- my ( $self, $term, $direction ) = @_;
- my $ontology = $self->ontology;
- return {} unless $ontology;
- return {} unless $term;
- $direction = "root" unless ($direction);
- return {} unless ( ( $direction eq 'root' ) || ( $direction eq 'leaves' ) );
- if ( $ontology eq 'service' ) {
- $term = $self->getServiceURI($term);
- } elsif ( $ontology eq 'object' ) {
- $term = $self->getObjectURI($term);
- }
- return {} unless $term =~ /^urn\:lsid/; # now its a URI
- my $relhash = $self->getRelationshipTypes( ontology => $ontology ); # get teh types of relationships for the object/service ontology
- return {} unless $relhash;
- my @rels = keys %{$relhash}; #@rels is the list of relationship types for that ontology
- my %relationships;
- foreach my $relationship (@rels) {
- my %IDS;
- my $OS = MOBY::OntologyServer->new( ontology => 'relationship' );
- my $reluri =
- $OS->getRelationshipURI( $ontology, $relationship )
- ; # get the URI for that relationship type if it ins't already a URI
- $IDS{$term} = "untestedroot"; # mark the one in-hand as being untested
- while ( grep /untested/, ( values %IDS ) )
- { # now, while there are untested services in our list...
- foreach my $termthingy ( keys %IDS )
- { # start parsing through the current list (hash keys)
- $debug && _LOG("testing $relationship of $termthingy\n");
- next
- if ( $IDS{$termthingy} eq "tested" )
- ; # if it has been tested already then move on
- my $lsids = $self->Relationships(
- term => $termthingy,
- relationship => $relationship,
- direction => $direction
- )
- ; # get the related terms for this type; this should return a single hash value
- if ( $IDS{$termthingy} =~ /root/ )
- { # here is where we remove self
- delete $IDS{$termthingy};
- $debug && _LOG("deleting $termthingy\n");
- } else {
- $debug && _LOG("marking $termthingy as TESTED\n");
- $IDS{$termthingy} =
- "tested"; # mark the current one as now being "done"
- }
-
- #${$lsids}{relationshiptype}=[lsid, lsid, lsid];
- foreach my $lsid_article ( @{ $lsids->{$relationship} } )
- { # go through the related terms
- my $lsid = $lsid_article->{lsid},
- my $article = $lsid_article->{articleName};
- $debug && _LOG("found $lsid as relationship");
- next
- if ( defined $IDS{$lsid} )
- ; # if we have already seen that term, skip it
- $debug && _LOG("setting $lsid as untested\n");
- $IDS{$lsid} =
- "untested" # otherwise add it to the list and loop again.
- }
- }
- }
- my @IDS = keys %IDS;
- $relationships{$relationship} =
- \@IDS; # and associate them all with the current relationship type
- }
- return \%relationships;
+ my ( $self, $term, $direction ) = @_;
+ my $ontology = $self->ontology;
+ return {} unless $ontology;
+ return {} unless $term;
+ $direction = "root" unless ($direction);
+ return {} unless ( ( $direction eq 'root' ) || ( $direction eq 'leaves' ) );
+ if ( $ontology eq 'service' ) {
+ $term = $self->getServiceURI($term);
+ } elsif ( $ontology eq 'object' ) {
+ $term = $self->getObjectURI($term);
+ }
+ return {} unless $term; # search term not in db!
+ return {} unless $term =~ /^urn\:lsid/; # now its a URI
+
+ my $result = {};
+ # get the types of relationships for the object/service ontology
+ my $relTypeHash = $self->getRelationshipTypes( ontology => $ontology );
+ my $relHash = $self->Relationships( term => $term,
+ direction => $direction,
+ ontology => $ontology,
+ isaExpand => 1,
+ incExpand => 1,
+ mapIncToIsa => 0,
+ relationship => 'all');
+ foreach my $relType ( keys %$relTypeHash ) {
+ $result->{$relType} = [];
+ my %tmpHash; # avoid doubles!
+ my $relList = $relHash->{$relType};
+ foreach my $rel ( @$relList ) {
+ $tmpHash{$rel->{'lsid'}}++;
+ }
+ @{$result->{$relType}} = keys %tmpHash;
+ }
+ return $result;
}
sub _LOG {
More information about the MOBY-guts
mailing list