[Bioperl-l] Patch to CVS to implement ComparableI interface

Peter van Heusden pvh at egenetics.com
Tue Apr 13 09:38:49 EDT 2004


Hi Bioperlers

In February I mentioned that I was working on the code to implement an 
equals() method for Bioperl Seq objects, in order to test the SeqIO 
framework by 'roundtripping' sequences from disk, back to disk again and 
then off disk a second time and checking for any differences.

Since then, and following comments on the original thread, I've gone 
further and implemented diff() methods for Bio::Seq and related objects. 
All objects which have a diff() and an equals() method inherit from 
Bio::ComparableI (in fact, they all simply inherit the Bio::ComparableI 
equals() method, which is implemented in terms of diff()). I've attached 
a patch against the current CVS tree, as well as the ComparableI.pm file 
(which also fixes two minor bugs in Bio::Annotation::SimpleValue and 
Bio::Annotation::Comment).

Please comment and, hopefully, incorporate into the tree.

Peter
P.S. I'm not sure what the procedure for documenting derived methods 
is... at the moment I've only documented the superclass method, since 
all the ones in the derived classes have the same signature and semantics.
-------------- next part --------------
? Bio/ComparableI.pm
Index: Bio/AnnotationI.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/AnnotationI.pm,v
retrieving revision 1.7
diff -u -3 -p -r1.7 AnnotationI.pm
--- Bio/AnnotationI.pm	2002/10/22 07:38:24	1.7
+++ Bio/AnnotationI.pm	2004/04/13 12:52:55
@@ -116,7 +116,8 @@ use strict;
 use Bio::Root::RootI;
 
 
- at ISA = qw(Bio::Root::RootI);
+ at ISA = qw(Bio::Root::RootI
+          Bio::ComparableI);
 
 
 =head2 as_text
@@ -175,6 +176,37 @@ sub hash_tree{
 
 sub tagname{
     shift->throw_not_implemented();
+}
+
+sub diff {
+  my $self = shift;
+  my $other = shift;
+  my $fastmatch = shift;
+
+  return [] if $self == $other;
+
+  my @diffs = ();
+
+  (ref($self) ne ref($other)) && push(@diffs, 'class') 
+    && $fastmatch && return undef;
+
+  my $self_hash = $self->hash_tree();
+  my $other_hash = $other->hash_tree();
+  foreach my $key (keys(%{$self->hash_tree()})) {
+    ($self_hash->{$key} ne $other_hash->{$key}) && push(@diffs, $key) &&
+      $fastmatch && return \@diffs;
+  }
+
+  return \@diffs;
+}
+
+sub hash_key {
+  my $self = shift;
+  
+  my $hash_tree = $self->hash_tree();
+  my @keys = sort keys(%{$hash_tree});
+  my $string = join(':', map { $hash_tree->{$_} } @keys);
+  return $string;
 }
 
 1;
Index: Bio/LocationI.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/LocationI.pm,v
retrieving revision 1.18
diff -u -3 -p -r1.18 LocationI.pm
--- Bio/LocationI.pm	2002/12/01 00:05:19	1.18
+++ Bio/LocationI.pm	2004/04/13 12:52:57
@@ -66,9 +66,10 @@ use strict;
 
 use Bio::RangeI;
 use Bio::Location::WidestCoordPolicy;
+use Bio::ComparableI;
 use Carp;
 
- at ISA = qw(Bio::RangeI);
+ at ISA = qw(Bio::RangeI Bio::ComparableI);
 
 BEGIN {
     $coord_policy = Bio::Location::WidestCoordPolicy->new();
@@ -409,6 +410,38 @@ sub to_FTstring { 
 sub each_Location {
     my ($self, at args) = @_;
     $self->throw_not_implemented();
+}
+
+sub diff {
+  my $self = shift;
+  my $other = shift;
+  my $fastmatch = shift;
+
+  return [] if ($self == $other);
+
+  my @diffs = ();
+  my $simple_location = ($self->max_start() == $self->min_start() && 
+    $self->max_start() == $self->min_start()) ? 1 : 0;
+
+  my @intvals = $simple_location ? qw(start end) : 
+    qw(min_start max_start min_end max_end);
+  push(@intvals, 'is_remote');
+ 
+  foreach my $method (@intvals) {
+    ($self->$method() != $other->$method()) && push(@diffs, $method) &&
+      $fastmatch && return \@diffs;
+  }
+
+  foreach my $method (qw(start_pos_type end_pos_type location_type)) {
+    ($self->$method() ne $other->$method()) && push(@diffs, $method) &&
+      $fastmatch && return \@diffs;
+  }
+
+  #compare the classes of the coordinate policy objects
+  (ref($self->coordinate_policy()) ne ref($other->coordinate_policy())) &&
+    push(@diffs, 'coordinate_policy') && $fastmatch && return \@diffs;
+
+  return \@diffs;
 }
 
 1;
Index: Bio/PrimarySeq.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/PrimarySeq.pm,v
retrieving revision 1.81
diff -u -3 -p -r1.81 PrimarySeq.pm
--- Bio/PrimarySeq.pm	2004/01/29 03:36:49	1.81
+++ Bio/PrimarySeq.pm	2004/04/13 12:53:00
@@ -120,9 +120,10 @@ use Bio::Root::Root;
 use Bio::PrimarySeqI;
 use Bio::IdentifiableI;
 use Bio::DescribableI;
+use Bio::ComparableI;
 
 @ISA = qw(Bio::Root::Root Bio::PrimarySeqI
-	  Bio::IdentifiableI Bio::DescribableI);
+	  Bio::IdentifiableI Bio::DescribableI Bio::ComparableI);
 
 #
 # setup the allowed values for alphabet()
@@ -519,6 +520,7 @@ sub primary_id {
     my $obj = shift;
 
     if(@_) {
+	print "setting the primary id now: ", $_[0], "\n";
 	$obj->{'primary_id'} = shift;
     }
     if( ! defined($obj->{'primary_id'}) ) {
@@ -866,6 +868,33 @@ sub accession {
     $self->warn(ref($self)."::accession is deprecated, ".
 		"use accession_number() instead");
     return $self->accession_number(@_);
+}
+
+sub diff {
+  my $self = shift;
+  my $other = shift;
+  my $fastmatch = shift;
+
+  return [] if $self == $other;
+  
+  my @diffs = ();
+  (ref($self) ne ref($other)) && push(@diffs, 'class') && $fastmatch 
+    && return \@diffs;
+
+  foreach my $method (qw(seq display_id accession_number alphabet desc)) {
+    ($self->$method() ne $other->$method()) && push(@diffs, $method) &&
+      $fastmatch && return \@diffs;
+  }
+  # primary_id returns a stringified object id if no primary id is set
+  # we really don't want to use that for equality comparisons
+  if ($self->primary_id() ne "$self") {
+    ($self->primary_id() eq $other->primary_id()) && 
+      push(@diffs, 'primary_id') && $fastmatch && return \@diffs;
+  }
+  ($self->is_circular() != $other->is_circular()) && 
+    push(@diffs, 'is_circular') && $fastmatch && return \@diffs;
+  
+  return \@diffs;
 }
 
 1;
Index: Bio/Seq.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Seq.pm,v
retrieving revision 1.84
diff -u -3 -p -r1.84 Seq.pm
--- Bio/Seq.pm	2004/01/29 03:37:33	1.84
+++ Bio/Seq.pm	2004/04/13 12:53:04
@@ -459,11 +459,13 @@ use Bio::IdentifiableI;
 use Bio::DescribableI;
 use Bio::AnnotatableI;
 use Bio::FeatureHolderI;
+use Bio::ComparableI;
 
 
 @ISA = qw(Bio::Root::Root Bio::SeqI
 	  Bio::IdentifiableI Bio::DescribableI
-	  Bio::AnnotatableI Bio::FeatureHolderI);
+	  Bio::AnnotatableI Bio::FeatureHolderI
+	  Bio::ComparableI);
 
 =head2 new
 
@@ -1228,6 +1230,66 @@ sub species {
     } else {
         return $self->{'species'};
     }
+}
+
+sub diff {
+  my $self = shift;
+  my $other = shift;
+  my $fastmatch = shift;
+
+  # if we are the same object, we're obviously equal
+  return [] if $self eq $other;
+
+  my @diffs = ();
+  # two different classes can't be equal
+  (ref($self) ne ref($other)) && push(@diffs, 'class') && $fastmatch &&
+    return \@diffs;
+
+
+  # a Seq is made out of a:
+  # PrimarySeq primary_seq
+  # AnnotationCollectionI annotation->get_Annotations
+  # SeqFeatureI get_all_SeqFeatures 
+  # Species species
+
+  my $priseq_diff = $self->primary_seq()->diff($other->primary_seq(), $fastmatch);
+  if ($#{$priseq_diff} != -1) {
+    push(@diffs, 'primary_seq', $priseq_diff);
+    return \@diffs if $fastmatch;
+  }
+  my $species_diff = $self->species()->diff($other->species(), $fastmatch);
+  if ($#{$species_diff} != -1) {
+    push(@diffs, 'species', $species_diff);
+    return \@diffs if $fastmatch;
+  }
+
+  my @self_annotations = sort {$a->hash_key() cmp $b->hash_key()} $self->annotation->get_Annotations();
+  my @other_annotations = sort {$a->hash_key() cmp $b->hash_key()} $other->annotation->get_Annotations();
+  ($#self_annotations != $#other_annotations) && 
+    push(@diffs, 'num_annotations') && $fastmatch && return \@diffs;
+  foreach my $i (0..$#self_annotations) {
+    my $anno_diffs = $self_annotations[$i]->diff($other_annotations[$i], $fastmatch);
+    if ($#{$anno_diffs} != -1) {
+      my $subclass = (split(/::/,ref($self_annotations[$i])))[2];
+      push(@diffs, "annotation::${subclass}_" . $i, $anno_diffs);
+      return \@diffs if $fastmatch;
+    }
+  }
+
+  my @self_features = sort {$a->hash_key() cmp $b->hash_key()} $self->get_SeqFeatures();
+  my @other_features = sort {$a->hash_key() cmp $b->hash_key()} $other->get_SeqFeatures();
+  ($#self_features != $#other_features) && push(@diffs, 'num_features') &&
+    $fastmatch && return \@diffs;
+  for my $i (0..$#self_features) {
+    my $feature_diffs = $self_features[$i]->diff($other_features[$i], $fastmatch);
+    if ($#{$feature_diffs} != -1) {
+      push(@diffs, 'feature_' . $i, $feature_diffs);
+      return \@diffs if $fastmatch;
+    }
+  }
+
+  # ok, everything is equal
+  return \@diffs;
 }
 
 =head1 Internal methods
Index: Bio/SeqFeatureI.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/SeqFeatureI.pm,v
retrieving revision 1.55
diff -u -3 -p -r1.55 SeqFeatureI.pm
--- Bio/SeqFeatureI.pm	2004/03/13 01:30:02	1.55
+++ Bio/SeqFeatureI.pm	2004/04/13 12:53:07
@@ -91,11 +91,13 @@ BEGIN {
 }
 use Bio::RangeI;
 use Bio::Seq;
+use Bio::ComparableI;
 
 use Carp;
 
- at ISA = qw(Bio::RangeI);
 
+ at ISA = qw(Bio::RangeI Bio::ComparableI);
+
 =head1 SeqFeatureI specific methods
 
 New method interfaces.
@@ -366,6 +368,80 @@ sub gff_string{
 
    $formatter = $self->_static_gff_formatter unless $formatter;
    return $formatter->gff_string($self);
+}
+
+sub diff {
+  my $self = shift;
+  my $other = shift;
+  my $fastmatch = shift;
+
+  return [] if $self == $other;
+
+  my @diffs = ();
+  (ref($self) ne ref($other)) && push(@diffs, 'class') && $fastmatch &&
+    return \@diffs;
+
+  foreach my $method (qw(display_name primary_tag source_tag seq_id)) {
+    ($self->$method() ne $other->$method()) && push(@diffs, $method) &&
+      $fastmatch && return \@diffs;
+  }
+
+  my @self_tags = $self->get_all_tags();
+  my @other_tags = $other->get_all_tags();
+  ($#self_tags != $#other_tags) && push(@diffs, 'num_tags') &&
+    $fastmatch && return \@diffs;
+  for my $tag (@self_tags) {
+    my @self_values = sort $self->get_tag_values($tag);
+    my @other_values = sort $other->get_tag_values($tag);
+
+    ($#self_values != $#other_values) && push(@diffs, $tag . '_num_values') &&
+      $fastmatch && return \@diffs;
+    for my $i (0..$#self_values) {
+      ($self_values[$i] ne $other_values[$i]) && 
+	push(@diffs, $tag . '_value_' . $i) && $fastmatch &&
+	return \@diffs;
+    }
+  }
+
+  my $location_diffs = $self->location()->diff($other->location());
+  if ($#{$location_diffs} != -1) {
+    push(@diffs, 'location', $location_diffs);
+  }
+
+  my $equals = 1;
+  my @self_subfeatures = sort { $a->hash_key() cmp $b->hash_key() } $self->get_SeqFeatures();
+  my @other_subfeatures = sort { $a->hash_key() cmp $b->hash_key() } $self->get_SeqFeatures();
+  ($#self_subfeatures != $#other_subfeatures) && 
+    push(@diffs, 'num_subfeatures') && $fastmatch && return \@diffs;
+  for my $i (0..$#self_subfeatures) {
+    my $sub_diffs = $self_subfeatures[$i]->diff($other_subfeatures[$i], $fastmatch);
+    if (len(@{$sub_diffs}) != 0) {
+      push(@diffs, 'subfeature_' . $i, $sub_diffs);
+      return \@diffs if $fastmatch;
+    }
+  }
+
+  return \@diffs;
+}
+
+=head2 hash_key
+
+ Title   : hash_key
+ Usage   : $key = hash_key();
+ Function: Provides a string which can be used as the key for a hash
+           table. This string is composed out of attributes of this
+           object in such a way that if two SeqFeatureI objects have the
+           same hash_key(), they are equivalent.
+
+ Returns : string
+ Args    : None
+
+=cut
+
+sub hash_key {
+  my $self = shift;
+
+  return join(':', $self->primary_tag(), $self->display_name(), $self->start(), $self->end());
 }
 
 my $static_gff_formatter = undef;
Index: Bio/Species.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Species.pm,v
retrieving revision 1.29
diff -u -3 -p -r1.29 Species.pm
--- Bio/Species.pm	2003/11/14 11:38:26	1.29
+++ Bio/Species.pm	2004/04/13 12:53:20
@@ -68,8 +68,9 @@ use strict;
 # Object preamble - inherits from Bio::Root::Object
 
 use Bio::Root::Root;
+use Bio::ComparableI;
 
- at ISA = qw(Bio::Root::Root);
+ at ISA = qw(Bio::Root::Root Bio::ComparableI);
 
 =head2 new
 
@@ -364,6 +365,34 @@ sub division{
     
     return $self->{'_division'} = shift if @_;
     return $self->{'_division'};
+}
+
+sub diff {
+  my $self = shift;
+  my $other = shift;
+  my $fastmatch = shift;
+
+  return [] if $self == $other;
+
+  my @diffs = ();
+
+  ref($self) ne ref($other) && push(@diffs, 'class') && $fastmatch &&
+    return \@diffs;
+
+  for my $method (qw(common_name variant organelle species genus sub_species ncbi_taxid division)) {
+    $self->$method() ne $other->$method() && push(@diffs, $method) &&
+      $fastmatch && return \@diffs;
+  }
+  my @self_classification = $self->classification();
+  my @other_classification = $other->classification();
+  my $same_classification = 1;
+  for my $i (0..$#self_classification) {
+    $same_classification = 0 if $self_classification[$i] ne $other_classification[$i];
+  }
+  $same_classification == 0 && push(@diffs, 'classification') && 
+    $fastmatch && return \@diffs;
+
+  return \@diffs;
 }
 
 1;
Index: Bio/Annotation/Comment.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Annotation/Comment.pm,v
retrieving revision 1.9
diff -u -3 -p -r1.9 Comment.pm
--- Bio/Annotation/Comment.pm	2003/12/22 08:50:00	1.9
+++ Bio/Annotation/Comment.pm	2004/04/13 12:53:20
@@ -116,6 +116,8 @@ sub hash_tree{
    
    my $h = {};
    $h->{'text'} = $self->text;
+
+   return $h;
 }
 
 =head2 tagname
Index: Bio/Annotation/SimpleValue.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Annotation/SimpleValue.pm,v
retrieving revision 1.10
diff -u -3 -p -r1.10 SimpleValue.pm
--- Bio/Annotation/SimpleValue.pm	2003/03/10 21:53:56	1.10
+++ Bio/Annotation/SimpleValue.pm	2004/04/13 12:54:06
@@ -142,6 +142,8 @@ sub hash_tree{
    
    my $h = {};
    $h->{'value'} = $self->value;
+
+   return $h;
 }
 
 =head2 tagname
-------------- next part --------------
package Bio::ComparableI;
use vars(@ISA);
use strict;
use Bio::Root::Root;

#@ISA = qw(Bio::Root::Root);


=pod
  Title   : equals
  Usage   : $obj->equals($other_obj)
  Function: Tests if two objects are equal, and returns true if they
            are, and false otherwise.
  Example : 
  Returns : boolean
  Args    : $other_obj (same type as $obj)
=cut

sub equals {
  my $self = shift;
  my $other = shift;

  return ($#{($self->diff($other, 0))} == -1);
}

=pod
  Title   : diff
  Usage   : $obj->diff($other_obj, [$fastmatch])
  Function: Returns a list of the attributes where $self doesn't match
            $other_obj. If the $fastmatch flag is set to true, only
	    returns the first difference. In the case of complex objects,
	    differences between two components are reflected as an 
	    attribute name followed by a listref listing the attributes
	    of the component object that don't match. Thus, the listref 
	    returned by this method should strictly be interpreted as
	    a recursive data structure.

	    For example, this might be the listref returned by diff()
	    on two Bio::Seq objects: [ 'primaryseq', [ 'display_id' ]]
  Example : 
  Returns : listref of string and arrayref
  Args    : $other_obj (Bio::ComparableI subclass)
            $fastmatch (int) 
=cut

sub diff {
  shift->throw_not_implemented();
}

1;


More information about the Bioperl-l mailing list