[MOBY-guts] biomoby commit

Dirk Haase d.haase at dev.open-bio.org
Tue Jan 30 14:49:20 UTC 2007


d.haase
Tue Jan 30 09:49:20 EST 2007
Update of /home/repository/moby/moby-live/Perl/MOBY
In directory dev.open-bio.org:/tmp/cvs-serv1900

Modified Files:
	OntologyServer.pm 
Log Message:
Re-implementation of methods Relationships and traverseDAG; reduces DB interaction
and enables expansion to 'leaves' direction in Relationships; traverseDAG now 
uses Relationships call internally.

moby-live/Perl/MOBY OntologyServer.pm,1.101,1.102
===================================================================
RCS file: /home/repository/moby/moby-live/Perl/MOBY/OntologyServer.pm,v
retrieving revision 1.101
retrieving revision 1.102
diff -u -r1.101 -r1.102
--- /home/repository/moby/moby-live/Perl/MOBY/OntologyServer.pm	2006/03/23 18:00:50	1.101
+++ /home/repository/moby/moby-live/Perl/MOBY/OntologyServer.pm	2007/01/30 14:49:20	1.102
@@ -63,6 +63,7 @@
 =cut
 
 package MOBY::OntologyServer;
+
 use strict;
 use Carp;
 use vars qw($AUTOLOAD);
@@ -1253,122 +1254,232 @@
 =cut
 
 sub Relationships {
-
-	# this entire subroutine assumes that there is NOT multiple parenting!!
-	my ( $self, %args ) = @_;
-	my $ontology     = $args{ontology} ? $args{ontology} : $self->ontology;
-
-	my $results;
-	if ( $ontology eq 'service' ) {
-		$results = &_ServiceRelationships($self, %args);
-	} elsif ( $ontology eq 'object' ) {
-		$results = &_ObjectRelationships($self, %args);
-	} else {
-		return {[]};
-	}	
-	return $results; 
-}
-
-sub _ServiceRelationships {
-	my ($self, %args) = @_;
-	my %results = [];
-	my $term         = $args{term};
-	my $relationship = $args{relationship};
-	my $direction    = $args{direction} ? $args{direction} : 'root';
-	my $expand       = $args{expand} ? 1 : 0;
-	my $ontology = "service";
-	return \%results unless ($term);
-
-	$term = $self->getServiceURI($term);
-	$relationship ||="isa";
-	my $OS = MOBY::OntologyServer->new(ontology => 'relationship');
-	$relationship = $OS->getRelationshipURI("service", $relationship);
-
-	while ($term && (!($term =~ /urn\:lsid\:biomoby.org\:servicetype\:Service\:/ ) )){
-		my $defs = $self->_doRelationshipsQuery($ontology,$term,$relationship,$direction );
-		next unless $defs; # somethig has gone terribly wrong!
-		my $lsid;
-		my $rel;
-		my $articleName;
-		foreach ( @{$defs} ) {
-			$lsid = $_->[0];
-			$rel  = $_->[1];
-			$articleName = $_->[2];
-			$articleName ||="";
-			my $info = $self->serviceInfo(term => $lsid);  # we need to get the term name, and that doesn't come from here
-			my $term = $info->{service_type};
-			push @{ $results{$rel} }, {lsid => $lsid, term => $term};
-		}
-		last unless ($expand);
-		last unless ( $direction eq "root" ); # if we aren't going to root, then be careful or we'll loop infnitely
-		$term = $lsid; # this entire subroutine assumes that there is NOT multiple parenting...
-	}
-
-	return \%results;	
-}
-
-sub _ObjectRelationships {
-	my ($self, %args) = @_;
-	my %results = [];
-	my $term         = $args{term};
-	my $relationship = $args{relationship};
-	my $direction    = $args{direction} ? $args{direction} : 'root';
-	my $expand       = $args{expand} ? 1 : 0;
-	my $ontology = 'object';
-	return \%results unless ($term);
-	$term = $self->getObjectURI($term);
-	$relationship ||="isa";
-	my $OS = MOBY::OntologyServer->new(ontology => 'relationship');
-	$relationship = $OS->getRelationshipURI("object", $relationship);
-	my $isa_relationship = $OS->getRelationshipURI("object", "ISA");
-	return \%results unless $isa_relationship;  # we need this to do the isa traversal to root when expanding the hasa and has relationships call
-	
-	my @isa_hierarchy = ($term);	
-	while ($term && (!($term =~ /urn\:lsid\:biomoby.org\:servicetype\:Object\:/ ) )){  # first build the entire ISA hierarchy
-		my $defs = $self->_doRelationshipsQuery($ontology,$term,$isa_relationship,$direction );
-		next unless $defs; # somethig has gone terribly wrong!
-		my $lsid;
-		my $def = shift @{$defs};  # for ISA there should be only one parent
-		$lsid = $def->[0];  # the lsid is in slot 0
-		push @isa_hierarchy, $lsid if $lsid;
-		$term = $lsid; # this entire subroutine assumes that there is NOT multiple parenting...
-	}
-	foreach my $node(@isa_hierarchy){  # now traverse along the ISA hierarchy and get what you need
-		my $defs = $self->_doRelationshipsQuery($ontology,$node,$relationship,$direction );
-		next unless $defs; # somethig has gone terribly wrong!
-		my $lsid;
-		my $rel;
-		my $articleName;
-		foreach ( @{$defs} ) {
-			next unless $_->[0];
-			$lsid = $_->[0];
-			$rel  = $_->[1];
-			$articleName = $_->[2];
-			$articleName ||="";
-			my $info = $self->objectInfo(term => $lsid);  # we need to get the term name, and that doesn't come from here
-			my $term = $info->{object_type};
-			push @{ $results{$rel} }, {lsid => $lsid, articleName => $articleName, term => $term};
-		}
-		last unless ($expand);
-		last unless ( $direction eq "root" ); # if we aren't going to root, then be careful or we'll loop infnitely
-		$term = $lsid; # this entire subroutine assumes that there is NOT multiple parenting...
-	}
-	return \%results;
-}
-
-sub _doRelationshipsQuery {
-	my ( $self, $ontology, $term, $relationship, $direction ) = @_;
-	$CONFIG ||= MOBY::Config->new;    # exported by Config.pm
-	my $datasource = "moby$ontology";  # like mobyobject, or mobyservice
-	my $adaptor = $CONFIG->getDataAdaptor( datasource => $datasource );
-	my $defs;
-	# query returns a reference to an array containing array references
-	$defs = $adaptor->get_relationship(direction => $direction,
-					   ontology => $ontology,
-					   term => $term,
-					   relationship => $relationship);
-	# a very long piece of SQL statements have been refactored into Moby::Adaptor::moby::queryapi::mysql.pm
-	return $defs;
+  my ($self, %args) = @_;
+  my %results;
+  
+  my $term         = $args{term};
+  my $ontology     = $args{ontology} ? $args{ontology} : $self->ontology;
+  my $direction    = $args{direction} ? $args{direction} : 'root';
+  $direction = $direction eq 'root'? 'root' : 'leaves'; # map anything else to 'leaves'
+  my $relationship  = $args{relationship};
+  my $expand       = $args{expand} ? 1 : 0;
+  
+  # in order to make this function also usable for 'traverseDAG'
+  # we need a more precise definition what to expand. Note that
+  # the default settings assure the behaviour of the old 'expand' param.
+  #  1. expand along the isa relationship?
+  my $isaExpand    = $args{isaExpand} ? $args{isaExpand} : $expand;
+  #  2. expand along the inclusion relationship types (has/hasa),
+  #     i.e. get inclusions of inclusions?
+  #     (Note: this is set when called by 'traverseDAG')
+  my $incExpand   = $args{incExpand} ? $args{incExpand} : 0;
+  #  3. explore inclusion relationships for complete isa hierarchy?
+  #     (Note: this was fix behaviour of the old 'expand',
+  #      but is not used by traverseDAG)
+  my $mapIncToIsa = $args{mapIncToIsa} ? $args{mapIncToIsa} : $expand;
+
+  # first of all, get ID of query entity,
+  # internally, we will operate on pure IDs
+  # as long as possible...
+  $CONFIG ||= MOBY::Config->new;     # exported by Config.pm
+  my $datasource = "moby$ontology";  # like mobyobject, or mobyservice
+  my $adaptor = $CONFIG->getDataAdaptor( datasource => $datasource );
+  my $queryId;
+  my $query_method = "query_$ontology";
+  my $result = $adaptor->$query_method(type => $term);
+  my $row = shift @$result;
+  $queryId = $row->{"${ontology}_id"};
+
+  return {} unless $queryId;
+
+  # get all relationships in the database in one query
+  my $relHash = $adaptor->get_all_relationships(direction=>$direction,ontology=>$ontology);
+
+  # find out which relationships to return
+  # use keys of %$relHash, because these are lsids:
+
+  # initialize to return all relationships (becomes effective if eg. 'all' was used)
+  my @relList = keys %$relHash;
+  if ( (not $relationship) or # ISA (and nothing else) is the default if nothing specified
+       ($relationship =~ /isa$/i) ) {
+    @relList = grep { /isa$/i } @relList;
+  }
+  elsif ( $relationship =~ /has(a)?$/i ) {
+    # if either has or hasa was specified, use only that
+    @relList = grep { /$relationship$/i } @relList;
+  }
+
+  # build the isa hierarchy, it's needed in any case...
+  my ($isaLsid) = grep { /isa$/i } keys %$relHash; # we need the lsid...
+  my $isa_hierarchy = $self->_getIsaHierarchy($relHash->{$isaLsid}, $queryId, $direction, $isaExpand);
+
+  # prepare the hash for storing HAS/HASA relationship details
+  my $hasRelDetails;
+
+  # table fields needed to get entity details:
+  my @fields = ("${ontology}_lsid","${ontology}_type");
+
+  # nodes to check for has/hasa relationship
+  my @checkNodes = ($queryId);
+  # mapIncToIsa means that has/hasa has to be checked
+  # not only for the query object alone but also for all
+  # isa ancestors/descendants
+  push @checkNodes, @$isa_hierarchy if $mapIncToIsa;
+  
+  # the result hash will consist of one list for each included relationship type...
+  foreach my $rel ( @relList ) {
+    my @entityQueryList = ();  # this collects the unique object ids
+    my @entityResultList = (); # this collects ids of objects to add to the result, maybe not unique
+    # the latter one is not essential to have, the only benefit is
+    # a somehow predictable order in the output...
+
+    # find out which entities we have to include in the result
+    # and how these are related to each other;
+    # Note: all needed information is present in the relationship hash %$relHash!
+
+    if ( $rel ne $isaLsid ) {
+      # either HAS or HASA
+      foreach my $node ( @checkNodes ) {
+	my $incls = $self->_getInclusions($relHash,$node,[$rel], $incExpand);
+	foreach my $triplet ( @$incls ) {
+	  my ($inclId, $inclArtName, $inclAssert) = @$triplet;
+	  $hasRelDetails->{$inclId}->{$inclAssert} = $inclArtName;
+	  push @entityResultList, $inclId;
+	}
+      }
+      # set up list of unique object ids for the database lookup
+      @entityQueryList = keys %$hasRelDetails;
+    }
+    else {
+      # ISA
+      @entityQueryList = @$isa_hierarchy;  # isa hierarchy is guaranteed to be unique...
+      @entityResultList = @$isa_hierarchy; # ... but still both variables have to be set
+    }
+    
+    # now it's time to move away from pure ids, retrieve details from database:
+    my $details = $adaptor->get_details_for_id_list($ontology, \@fields, \@entityQueryList);
+
+    # enhance details with information about relationships and build result hash
+    foreach my $entityId (@entityResultList) {
+      # add articleName slot if necessary
+      if ( exists $hasRelDetails->{$entityId} ) {
+	foreach my $assert ( keys %{$hasRelDetails->{$entityId}} ) {
+	  $details->{$entityId}->{'articleName'} = $hasRelDetails->{$entityId}->{$assert};
+	}
+      }
+      elsif ( $ontology eq 'object') {
+	# for isa, articleName is the empty string
+	$details->{$entityId}->{'articleName'} = '';
+      }
+
+      # map ontology specific field names to commons slots:
+      # 1. 'object_lsid'/'service_lsid' -> 'lsid'
+      $details->{$entityId}->{'lsid'} = $details->{$entityId}->{"${ontology}_lsid"} 
+	unless exists $details->{$entityId}->{'lsid'}; # do just once foreach object!
+      delete $details->{$entityId}->{"${ontology}_lsid"}; # remove redundant slot
+      # 2. 'object_type'/'service_type' -> 'term'
+      $details->{$entityId}->{'term'} = $details->{$entityId}->{"${ontology}_type"}
+	unless exists $details->{$entityId}->{'term'}; # do just once foreach object!
+      delete $details->{$entityId}->{"${ontology}_type"}; # remove redundant slot
+
+      # finally, add record to the result hash
+      push @{ $results{$rel} }, $details->{$entityId};
+    }
+  }
+  return \%results;
+}
+
+sub _getIsaHierarchy {
+  # Finds out the isa hierarchy for the query entity, that is
+  # the parent (the one which it inherits from) if direction is 'root' or
+  # the children (one or more which inherit from it) if direction is 'leaves'.
+  # If 'expand' is set all deeper levels (ancestors or descendants if you like)
+  # are also included.
+  # Note 1: this implementation relies on pure single inheritance!
+  # Note 2: we can use the same method for both directions only because the
+  #         provided isaHash is built with the direction in mind, make sure
+  #         to have direction consistent!
+
+  # returned is a reference to a flat list
+  
+  my ($self, $isaHash, $query, $direction, $expand) = @_;
+
+  my @hierarchy = ();
+  if ( exists $isaHash->{$query} ) {
+    if ( $direction eq 'root' ) {
+      # push the parent entity
+      push @hierarchy, $isaHash->{$query}; # relies on single inheritance!
+    }
+    elsif ( $direction eq 'leaves' ) {
+      # push the direct children
+      push @hierarchy, @{$isaHash->{$query}};
+    }
+    else {
+      # it has to be either 'root' or 'leaves'
+      warn "_getIsaHierarchy was called with wrong direction indicator,
+            use either 'root' or 'leaves'!\n";
+      return [];
+    }
+    if ( $expand ) {
+      my @firstLevel = @hierarchy;
+      foreach my $entity ( @firstLevel ) {
+	my $deeperLevels = $self->_getIsaHierarchy($isaHash, $entity, $direction, 1);
+	push @hierarchy, @$deeperLevels;
+      }
+    }
+    return \@hierarchy;
+  }
+  else {
+    # important: anchor the recursion!
+    return [];
+  }
+}
+
+sub _getInclusions {
+
+  # Finds out the objects related to the query by one of the inclusion
+  # relationships (HAS or HASA). This is the HAS/HASA-analogue to
+  # _getIsaHierarchy, but is more complicated, because the values in
+  # the provided relationship hash ($relHash) are not simple ids but
+  # triplets ("relationship records") in the format of:
+  # [id of relationship partner, articleName, assertion id]
+  # On the other hand, direction does not matter here, because
+  # we have to deal with multi relationships in any case.
+  # Like for ISA, be aware that the relationship hash '$relHash'
+  # is built direction dependant. Make sure to use it consistently!
+
+  # Note: third argument is a listref of relationship types, that is
+  # it could be called with HAS and HASA (expected are lsids) at
+  # the same time and in this way merge both inclusion relationship
+  # types. However, this usage is not used currently and not tested!
+  
+  # Returned is a reference to a list with each element being
+  # a triplet (listref to a relationship record) as explained above.
+  
+  my ($self, $relHash, $query, $relList, $expand) = @_;
+
+  my %nodeCheckDone; # for avoiding multiple check of one node (if expand is set)
+  my @allInclusions = ();
+  foreach my $relType ( @$relList ) {
+    # 'root' means: include all relationships where query is the
+    # containing (outer) object;
+    # eg. if A HAS B, and A is query, include this record
+    if ( exists $relHash->{$relType}->{$query} ) {
+      my $relRecords = $relHash->{$relType}->{$query};
+      foreach my $record ( @$relRecords ) {
+	push @allInclusions, $record;
+	if ( $expand ) {
+	  my ($incId, $artName, $assert) = @$record;
+	  if ( not exists $nodeCheckDone{$incId} ) {
+	    my $deeperInclusions = $self->_getInclusions($relHash, $incId, $relList, 1);
+	    push @allInclusions, @$deeperInclusions;
+	    $nodeCheckDone{$incId}++;
+	  }
+	}
+      }
+    }
+  }
+  return \@allInclusions; # empty if nothing found, this anchors the recursion
 }
 
 =head2 setURI
@@ -1402,73 +1513,40 @@
 =cut
 
 sub traverseDAG {
-	my ( $self, $term, $direction ) = @_;
-	my $ontology = $self->ontology;
-	return {} unless $ontology;
-	return {} unless $term;
-	$direction = "root" unless ($direction);
-	return {} unless ( ( $direction eq 'root' ) || ( $direction eq 'leaves' ) );
-	if ( $ontology eq 'service' ) {
-		$term = $self->getServiceURI($term);
-	} elsif ( $ontology eq 'object' ) {
-		$term = $self->getObjectURI($term);
-	}
-	return {} unless $term =~ /^urn\:lsid/;    # now its a URI
-	my $relhash = $self->getRelationshipTypes( ontology => $ontology );    # get teh types of relationships for the object/service ontology
-	return {} unless $relhash;
-	my @rels = keys %{$relhash};    #@rels is the list of relationship types for that ontology
-	my %relationships;
-	foreach my $relationship (@rels) {
-		my %IDS;
-		my $OS = MOBY::OntologyServer->new( ontology => 'relationship' );
-		my $reluri =
-		  $OS->getRelationshipURI( $ontology, $relationship )
-		  ;   # get the URI for that relationship type if it ins't already a URI
-		$IDS{$term} = "untestedroot";   # mark the one in-hand as being untested
-		while ( grep /untested/, ( values %IDS ) )
-		{    # now, while there are untested services in our list...
-			foreach my $termthingy ( keys %IDS )
-			{    # start parsing through the current list (hash keys)
-				$debug && _LOG("testing $relationship of $termthingy\n");
-				next
-				  if ( $IDS{$termthingy} eq "tested" )
-				  ;    # if it has been tested already then move on
-				my $lsids = $self->Relationships(
-					term         => $termthingy,
-					relationship => $relationship,
-					direction    => $direction
-				  )
-				  ; # get the related terms for this type; this should return a single hash value
-				if ( $IDS{$termthingy} =~ /root/ )
-				{    # here is where we remove self
-					delete $IDS{$termthingy};
-					$debug && _LOG("deleting $termthingy\n");
-				} else {
-					$debug && _LOG("marking $termthingy as TESTED\n");
-					$IDS{$termthingy} =
-					  "tested";    # mark the current one as now being "done"
-				}
-
-				#${$lsids}{relationshiptype}=[lsid, lsid, lsid];
-				foreach my $lsid_article ( @{ $lsids->{$relationship} } )
-				{                  # go through the related terms
-					my $lsid = $lsid_article->{lsid},
-					my $article = $lsid_article->{articleName};
-					$debug && _LOG("found $lsid as relationship");
-					next
-					  if ( defined $IDS{$lsid} )
-					  ;            # if we have already seen that term, skip it
-					$debug && _LOG("setting $lsid as untested\n");
-					$IDS{$lsid} =
-					  "untested"  # otherwise add it to the list and loop again.
-				}
-			}
-		}
-		my @IDS = keys %IDS;
-		$relationships{$relationship} =
-		  \@IDS;    # and associate them all with the current relationship type
-	}
-	return \%relationships;
+  my ( $self, $term, $direction ) = @_;
+  my $ontology = $self->ontology;
+  return {} unless $ontology;
+  return {} unless $term;
+  $direction = "root" unless ($direction);
+  return {} unless ( ( $direction eq 'root' ) || ( $direction eq 'leaves' ) );
+  if ( $ontology eq 'service' ) {
+    $term = $self->getServiceURI($term);
+  } elsif ( $ontology eq 'object' ) {
+    $term = $self->getObjectURI($term);
+  }
+  return {} unless $term; # search term not in db!
+  return {} unless $term =~ /^urn\:lsid/;    # now its a URI
+
+  my $result = {};
+  # get the types of relationships for the object/service ontology
+  my $relTypeHash = $self->getRelationshipTypes( ontology => $ontology );
+  my $relHash = $self->Relationships( term => $term,
+				  direction => $direction,
+				  ontology => $ontology,
+				  isaExpand => 1,
+				  incExpand => 1,
+				  mapIncToIsa => 0,
+				  relationship => 'all');
+  foreach my $relType ( keys %$relTypeHash ) {
+    $result->{$relType} = [];
+    my %tmpHash; # avoid doubles!
+    my $relList = $relHash->{$relType};
+    foreach my $rel ( @$relList ) {
+      $tmpHash{$rel->{'lsid'}}++;
+    }
+    @{$result->{$relType}} = keys %tmpHash;
+  }
+  return $result;
 }
 
 sub _LOG {




More information about the MOBY-guts mailing list