[Bioperl-l] Patch to CVS to implement ComparableI interface
Peter van Heusden
pvh at egenetics.com
Tue Apr 13 09:38:49 EDT 2004
Hi Bioperlers
In February I mentioned that I was working on the code to implement an
equals() method for Bioperl Seq objects, in order to test the SeqIO
framework by 'roundtripping' sequences from disk, back to disk again and
then off disk a second time and checking for any differences.
Since then, and following comments on the original thread, I've gone
further and implemented diff() methods for Bio::Seq and related objects.
All objects which have a diff() and an equals() method inherit from
Bio::ComparableI (in fact, they all simply inherit the Bio::ComparableI
equals() method, which is implemented in terms of diff()). I've attached
a patch against the current CVS tree, as well as the ComparableI.pm file
(which also fixes two minor bugs in Bio::Annotation::SimpleValue and
Bio::Annotation::Comment).
Please comment and, hopefully, incorporate into the tree.
Peter
P.S. I'm not sure what the procedure for documenting derived methods
is... at the moment I've only documented the superclass method, since
all the ones in the derived classes have the same signature and semantics.
-------------- next part --------------
? Bio/ComparableI.pm
Index: Bio/AnnotationI.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/AnnotationI.pm,v
retrieving revision 1.7
diff -u -3 -p -r1.7 AnnotationI.pm
--- Bio/AnnotationI.pm 2002/10/22 07:38:24 1.7
+++ Bio/AnnotationI.pm 2004/04/13 12:52:55
@@ -116,7 +116,8 @@ use strict;
use Bio::Root::RootI;
- at ISA = qw(Bio::Root::RootI);
+ at ISA = qw(Bio::Root::RootI
+ Bio::ComparableI);
=head2 as_text
@@ -175,6 +176,37 @@ sub hash_tree{
sub tagname{
shift->throw_not_implemented();
+}
+
+sub diff {
+ my $self = shift;
+ my $other = shift;
+ my $fastmatch = shift;
+
+ return [] if $self == $other;
+
+ my @diffs = ();
+
+ (ref($self) ne ref($other)) && push(@diffs, 'class')
+ && $fastmatch && return undef;
+
+ my $self_hash = $self->hash_tree();
+ my $other_hash = $other->hash_tree();
+ foreach my $key (keys(%{$self->hash_tree()})) {
+ ($self_hash->{$key} ne $other_hash->{$key}) && push(@diffs, $key) &&
+ $fastmatch && return \@diffs;
+ }
+
+ return \@diffs;
+}
+
+sub hash_key {
+ my $self = shift;
+
+ my $hash_tree = $self->hash_tree();
+ my @keys = sort keys(%{$hash_tree});
+ my $string = join(':', map { $hash_tree->{$_} } @keys);
+ return $string;
}
1;
Index: Bio/LocationI.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/LocationI.pm,v
retrieving revision 1.18
diff -u -3 -p -r1.18 LocationI.pm
--- Bio/LocationI.pm 2002/12/01 00:05:19 1.18
+++ Bio/LocationI.pm 2004/04/13 12:52:57
@@ -66,9 +66,10 @@ use strict;
use Bio::RangeI;
use Bio::Location::WidestCoordPolicy;
+use Bio::ComparableI;
use Carp;
- at ISA = qw(Bio::RangeI);
+ at ISA = qw(Bio::RangeI Bio::ComparableI);
BEGIN {
$coord_policy = Bio::Location::WidestCoordPolicy->new();
@@ -409,6 +410,38 @@ sub to_FTstring {
sub each_Location {
my ($self, at args) = @_;
$self->throw_not_implemented();
+}
+
+sub diff {
+ my $self = shift;
+ my $other = shift;
+ my $fastmatch = shift;
+
+ return [] if ($self == $other);
+
+ my @diffs = ();
+ my $simple_location = ($self->max_start() == $self->min_start() &&
+ $self->max_start() == $self->min_start()) ? 1 : 0;
+
+ my @intvals = $simple_location ? qw(start end) :
+ qw(min_start max_start min_end max_end);
+ push(@intvals, 'is_remote');
+
+ foreach my $method (@intvals) {
+ ($self->$method() != $other->$method()) && push(@diffs, $method) &&
+ $fastmatch && return \@diffs;
+ }
+
+ foreach my $method (qw(start_pos_type end_pos_type location_type)) {
+ ($self->$method() ne $other->$method()) && push(@diffs, $method) &&
+ $fastmatch && return \@diffs;
+ }
+
+ #compare the classes of the coordinate policy objects
+ (ref($self->coordinate_policy()) ne ref($other->coordinate_policy())) &&
+ push(@diffs, 'coordinate_policy') && $fastmatch && return \@diffs;
+
+ return \@diffs;
}
1;
Index: Bio/PrimarySeq.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/PrimarySeq.pm,v
retrieving revision 1.81
diff -u -3 -p -r1.81 PrimarySeq.pm
--- Bio/PrimarySeq.pm 2004/01/29 03:36:49 1.81
+++ Bio/PrimarySeq.pm 2004/04/13 12:53:00
@@ -120,9 +120,10 @@ use Bio::Root::Root;
use Bio::PrimarySeqI;
use Bio::IdentifiableI;
use Bio::DescribableI;
+use Bio::ComparableI;
@ISA = qw(Bio::Root::Root Bio::PrimarySeqI
- Bio::IdentifiableI Bio::DescribableI);
+ Bio::IdentifiableI Bio::DescribableI Bio::ComparableI);
#
# setup the allowed values for alphabet()
@@ -519,6 +520,7 @@ sub primary_id {
my $obj = shift;
if(@_) {
+ print "setting the primary id now: ", $_[0], "\n";
$obj->{'primary_id'} = shift;
}
if( ! defined($obj->{'primary_id'}) ) {
@@ -866,6 +868,33 @@ sub accession {
$self->warn(ref($self)."::accession is deprecated, ".
"use accession_number() instead");
return $self->accession_number(@_);
+}
+
+sub diff {
+ my $self = shift;
+ my $other = shift;
+ my $fastmatch = shift;
+
+ return [] if $self == $other;
+
+ my @diffs = ();
+ (ref($self) ne ref($other)) && push(@diffs, 'class') && $fastmatch
+ && return \@diffs;
+
+ foreach my $method (qw(seq display_id accession_number alphabet desc)) {
+ ($self->$method() ne $other->$method()) && push(@diffs, $method) &&
+ $fastmatch && return \@diffs;
+ }
+ # primary_id returns a stringified object id if no primary id is set
+ # we really don't want to use that for equality comparisons
+ if ($self->primary_id() ne "$self") {
+ ($self->primary_id() eq $other->primary_id()) &&
+ push(@diffs, 'primary_id') && $fastmatch && return \@diffs;
+ }
+ ($self->is_circular() != $other->is_circular()) &&
+ push(@diffs, 'is_circular') && $fastmatch && return \@diffs;
+
+ return \@diffs;
}
1;
Index: Bio/Seq.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Seq.pm,v
retrieving revision 1.84
diff -u -3 -p -r1.84 Seq.pm
--- Bio/Seq.pm 2004/01/29 03:37:33 1.84
+++ Bio/Seq.pm 2004/04/13 12:53:04
@@ -459,11 +459,13 @@ use Bio::IdentifiableI;
use Bio::DescribableI;
use Bio::AnnotatableI;
use Bio::FeatureHolderI;
+use Bio::ComparableI;
@ISA = qw(Bio::Root::Root Bio::SeqI
Bio::IdentifiableI Bio::DescribableI
- Bio::AnnotatableI Bio::FeatureHolderI);
+ Bio::AnnotatableI Bio::FeatureHolderI
+ Bio::ComparableI);
=head2 new
@@ -1228,6 +1230,66 @@ sub species {
} else {
return $self->{'species'};
}
+}
+
+sub diff {
+ my $self = shift;
+ my $other = shift;
+ my $fastmatch = shift;
+
+ # if we are the same object, we're obviously equal
+ return [] if $self eq $other;
+
+ my @diffs = ();
+ # two different classes can't be equal
+ (ref($self) ne ref($other)) && push(@diffs, 'class') && $fastmatch &&
+ return \@diffs;
+
+
+ # a Seq is made out of a:
+ # PrimarySeq primary_seq
+ # AnnotationCollectionI annotation->get_Annotations
+ # SeqFeatureI get_all_SeqFeatures
+ # Species species
+
+ my $priseq_diff = $self->primary_seq()->diff($other->primary_seq(), $fastmatch);
+ if ($#{$priseq_diff} != -1) {
+ push(@diffs, 'primary_seq', $priseq_diff);
+ return \@diffs if $fastmatch;
+ }
+ my $species_diff = $self->species()->diff($other->species(), $fastmatch);
+ if ($#{$species_diff} != -1) {
+ push(@diffs, 'species', $species_diff);
+ return \@diffs if $fastmatch;
+ }
+
+ my @self_annotations = sort {$a->hash_key() cmp $b->hash_key()} $self->annotation->get_Annotations();
+ my @other_annotations = sort {$a->hash_key() cmp $b->hash_key()} $other->annotation->get_Annotations();
+ ($#self_annotations != $#other_annotations) &&
+ push(@diffs, 'num_annotations') && $fastmatch && return \@diffs;
+ foreach my $i (0..$#self_annotations) {
+ my $anno_diffs = $self_annotations[$i]->diff($other_annotations[$i], $fastmatch);
+ if ($#{$anno_diffs} != -1) {
+ my $subclass = (split(/::/,ref($self_annotations[$i])))[2];
+ push(@diffs, "annotation::${subclass}_" . $i, $anno_diffs);
+ return \@diffs if $fastmatch;
+ }
+ }
+
+ my @self_features = sort {$a->hash_key() cmp $b->hash_key()} $self->get_SeqFeatures();
+ my @other_features = sort {$a->hash_key() cmp $b->hash_key()} $other->get_SeqFeatures();
+ ($#self_features != $#other_features) && push(@diffs, 'num_features') &&
+ $fastmatch && return \@diffs;
+ for my $i (0..$#self_features) {
+ my $feature_diffs = $self_features[$i]->diff($other_features[$i], $fastmatch);
+ if ($#{$feature_diffs} != -1) {
+ push(@diffs, 'feature_' . $i, $feature_diffs);
+ return \@diffs if $fastmatch;
+ }
+ }
+
+ # ok, everything is equal
+ return \@diffs;
}
=head1 Internal methods
Index: Bio/SeqFeatureI.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/SeqFeatureI.pm,v
retrieving revision 1.55
diff -u -3 -p -r1.55 SeqFeatureI.pm
--- Bio/SeqFeatureI.pm 2004/03/13 01:30:02 1.55
+++ Bio/SeqFeatureI.pm 2004/04/13 12:53:07
@@ -91,11 +91,13 @@ BEGIN {
}
use Bio::RangeI;
use Bio::Seq;
+use Bio::ComparableI;
use Carp;
- at ISA = qw(Bio::RangeI);
+ at ISA = qw(Bio::RangeI Bio::ComparableI);
+
=head1 SeqFeatureI specific methods
New method interfaces.
@@ -366,6 +368,80 @@ sub gff_string{
$formatter = $self->_static_gff_formatter unless $formatter;
return $formatter->gff_string($self);
+}
+
+sub diff {
+ my $self = shift;
+ my $other = shift;
+ my $fastmatch = shift;
+
+ return [] if $self == $other;
+
+ my @diffs = ();
+ (ref($self) ne ref($other)) && push(@diffs, 'class') && $fastmatch &&
+ return \@diffs;
+
+ foreach my $method (qw(display_name primary_tag source_tag seq_id)) {
+ ($self->$method() ne $other->$method()) && push(@diffs, $method) &&
+ $fastmatch && return \@diffs;
+ }
+
+ my @self_tags = $self->get_all_tags();
+ my @other_tags = $other->get_all_tags();
+ ($#self_tags != $#other_tags) && push(@diffs, 'num_tags') &&
+ $fastmatch && return \@diffs;
+ for my $tag (@self_tags) {
+ my @self_values = sort $self->get_tag_values($tag);
+ my @other_values = sort $other->get_tag_values($tag);
+
+ ($#self_values != $#other_values) && push(@diffs, $tag . '_num_values') &&
+ $fastmatch && return \@diffs;
+ for my $i (0..$#self_values) {
+ ($self_values[$i] ne $other_values[$i]) &&
+ push(@diffs, $tag . '_value_' . $i) && $fastmatch &&
+ return \@diffs;
+ }
+ }
+
+ my $location_diffs = $self->location()->diff($other->location());
+ if ($#{$location_diffs} != -1) {
+ push(@diffs, 'location', $location_diffs);
+ }
+
+ my $equals = 1;
+ my @self_subfeatures = sort { $a->hash_key() cmp $b->hash_key() } $self->get_SeqFeatures();
+ my @other_subfeatures = sort { $a->hash_key() cmp $b->hash_key() } $self->get_SeqFeatures();
+ ($#self_subfeatures != $#other_subfeatures) &&
+ push(@diffs, 'num_subfeatures') && $fastmatch && return \@diffs;
+ for my $i (0..$#self_subfeatures) {
+ my $sub_diffs = $self_subfeatures[$i]->diff($other_subfeatures[$i], $fastmatch);
+ if (len(@{$sub_diffs}) != 0) {
+ push(@diffs, 'subfeature_' . $i, $sub_diffs);
+ return \@diffs if $fastmatch;
+ }
+ }
+
+ return \@diffs;
+}
+
+=head2 hash_key
+
+ Title : hash_key
+ Usage : $key = hash_key();
+ Function: Provides a string which can be used as the key for a hash
+ table. This string is composed out of attributes of this
+ object in such a way that if two SeqFeatureI objects have the
+ same hash_key(), they are equivalent.
+
+ Returns : string
+ Args : None
+
+=cut
+
+sub hash_key {
+ my $self = shift;
+
+ return join(':', $self->primary_tag(), $self->display_name(), $self->start(), $self->end());
}
my $static_gff_formatter = undef;
Index: Bio/Species.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Species.pm,v
retrieving revision 1.29
diff -u -3 -p -r1.29 Species.pm
--- Bio/Species.pm 2003/11/14 11:38:26 1.29
+++ Bio/Species.pm 2004/04/13 12:53:20
@@ -68,8 +68,9 @@ use strict;
# Object preamble - inherits from Bio::Root::Object
use Bio::Root::Root;
+use Bio::ComparableI;
- at ISA = qw(Bio::Root::Root);
+ at ISA = qw(Bio::Root::Root Bio::ComparableI);
=head2 new
@@ -364,6 +365,34 @@ sub division{
return $self->{'_division'} = shift if @_;
return $self->{'_division'};
+}
+
+sub diff {
+ my $self = shift;
+ my $other = shift;
+ my $fastmatch = shift;
+
+ return [] if $self == $other;
+
+ my @diffs = ();
+
+ ref($self) ne ref($other) && push(@diffs, 'class') && $fastmatch &&
+ return \@diffs;
+
+ for my $method (qw(common_name variant organelle species genus sub_species ncbi_taxid division)) {
+ $self->$method() ne $other->$method() && push(@diffs, $method) &&
+ $fastmatch && return \@diffs;
+ }
+ my @self_classification = $self->classification();
+ my @other_classification = $other->classification();
+ my $same_classification = 1;
+ for my $i (0..$#self_classification) {
+ $same_classification = 0 if $self_classification[$i] ne $other_classification[$i];
+ }
+ $same_classification == 0 && push(@diffs, 'classification') &&
+ $fastmatch && return \@diffs;
+
+ return \@diffs;
}
1;
Index: Bio/Annotation/Comment.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Annotation/Comment.pm,v
retrieving revision 1.9
diff -u -3 -p -r1.9 Comment.pm
--- Bio/Annotation/Comment.pm 2003/12/22 08:50:00 1.9
+++ Bio/Annotation/Comment.pm 2004/04/13 12:53:20
@@ -116,6 +116,8 @@ sub hash_tree{
my $h = {};
$h->{'text'} = $self->text;
+
+ return $h;
}
=head2 tagname
Index: Bio/Annotation/SimpleValue.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Annotation/SimpleValue.pm,v
retrieving revision 1.10
diff -u -3 -p -r1.10 SimpleValue.pm
--- Bio/Annotation/SimpleValue.pm 2003/03/10 21:53:56 1.10
+++ Bio/Annotation/SimpleValue.pm 2004/04/13 12:54:06
@@ -142,6 +142,8 @@ sub hash_tree{
my $h = {};
$h->{'value'} = $self->value;
+
+ return $h;
}
=head2 tagname
-------------- next part --------------
package Bio::ComparableI;
use vars(@ISA);
use strict;
use Bio::Root::Root;
#@ISA = qw(Bio::Root::Root);
=pod
Title : equals
Usage : $obj->equals($other_obj)
Function: Tests if two objects are equal, and returns true if they
are, and false otherwise.
Example :
Returns : boolean
Args : $other_obj (same type as $obj)
=cut
sub equals {
my $self = shift;
my $other = shift;
return ($#{($self->diff($other, 0))} == -1);
}
=pod
Title : diff
Usage : $obj->diff($other_obj, [$fastmatch])
Function: Returns a list of the attributes where $self doesn't match
$other_obj. If the $fastmatch flag is set to true, only
returns the first difference. In the case of complex objects,
differences between two components are reflected as an
attribute name followed by a listref listing the attributes
of the component object that don't match. Thus, the listref
returned by this method should strictly be interpreted as
a recursive data structure.
For example, this might be the listref returned by diff()
on two Bio::Seq objects: [ 'primaryseq', [ 'display_id' ]]
Example :
Returns : listref of string and arrayref
Args : $other_obj (Bio::ComparableI subclass)
$fastmatch (int)
=cut
sub diff {
shift->throw_not_implemented();
}
1;
More information about the Bioperl-l
mailing list