[Bioperl-l] Re: Testing Bioperl objects for equality: attempt 2
Hilmar Lapp
hlapp at gmx.net
Sun May 23 18:51:08 EDT 2004
Peter, I fixed the two bugs from your diffs. Other than that, I don't
see your modules in the repository. Did you not want to commit them yet?
-hilmar
On Monday, May 3, 2004, at 03:49 AM, Peter van Heusden wrote:
> Ok, so recently I posted my diffs to the Bioperl source which added
> equals() and diff() methods to Bioperl object. Hilmar objected, citing
> two main objections:
>
> 1) The 'hash_key()' methods I added to various objects were
> inappopriately named.
> 2) Equality is not an objective measure, so a single 'equals' method
> didn't make sense.
>
> To address (2), I've pulled the equals code out of the Bioperl objects,
> and created a new interface, ComparatorI, which can test a certain set
> of Bioperl objects for equality. And I've created a SampleComparator.pm
> module implementing my version of equality.
>
> I've attached the ComparatorI.pm and SampleComparator.pm modules to
> this
> message, as well as a small patch to this morning's Bioperl which is
> necessary for them to work (it still includes the misnamed hash_key()
> stuff that will be fixed in the future).
>
> I know the name Comparator is clumsy, but its just an interim name. And
> equals() should be renamed is_equal(). But I just wanted to get
> something out there for comment before going further. Here's some
> sample
> code:
>
> $comp = Bio::SampleComparator();
>
> if ($comp->equals($seq1, $seq2)) {
> print "they are equal\n";
> }
>
> $differences = $comp->diff($seq1, $seq2);
>
> Obviously the SampleComparator.pm module would need to keep up-to-date
> with respect to the classes that it can compare, but I don't see any
> way
> around those kind of dependencies.
>
> Peter
>
> use strict;
> package Bio::SampleComparator;
>
> use Bio::ComparatorI;
> use vars qw(@ISA);
>
> @ISA = qw(Bio::ComparatorI);
>
> sub set_supported_classes {
> my $self = shift;
>
> @{$self->{'supported_classes'}} = qw(
> Bio::AnnotationI
> Bio::LocationI
> Bio::PrimarySeq
> Bio::Seq
> Bio::SeqFeatureI
> Bio::Species
> );
>
> }
>
> sub diff {
> my $self = shift;
> my $obj1 = shift;
> my $obj2 = shift;
> my $fastmatch = shift;
>
> return [] if $obj1 == $obj2;
>
> my @diffs = ();
>
> (ref($obj1) ne ref($obj2)) && push(@diffs, 'class') &&
> $fastmatch && return undef;
>
> my $diff_method = $self->can_compare($obj1);
>
> if (defined($diff_method)) {
> return $self->$diff_method($obj1, $obj2, $fastmatch);
> } else {
> $self->throw("trying to compare unsupported object type: " .
> ref($obj1));
> }
> }
>
>
> sub diff_BioAnnotationI {
> my $self = shift;
> my $obj1 = shift;
> my $obj2 = shift;
> my $fastmatch = shift;
>
> my @diffs = ();
>
> my $obj1_hash = $obj1->hash_tree();
> my $obj2_hash = $obj2->hash_tree();
> foreach my $key (keys(%{$obj1->hash_tree()})) {
> ($obj1_hash->{$key} ne $obj2_hash->{$key}) && push(@diffs, $key) &&
> $fastmatch && return \@diffs;
> }
>
> return \@diffs;
> }
>
>
> sub diff_BioLocationI {
> my $self = shift;
> my $obj1 = shift;
> my $obj2 = shift;
> my $fastmatch = shift;
>
> my @diffs = ();
> my $simple_location = ($obj1->max_start() == $obj1->min_start() &&
> $obj1->max_start() == $obj1->min_start()) ? 1 : 0;
>
> my @intvals = $simple_location ? qw(start end) :
> qw(min_start max_start min_end max_end);
> push(@intvals, 'is_remote');
>
> foreach my $method (@intvals) {
> ($obj1->$method() != $obj2->$method()) && push(@diffs, $method) &&
> $fastmatch && return \@diffs;
> }
>
> foreach my $method (qw(start_pos_type end_pos_type location_type)) {
> ($obj1->$method() ne $obj2->$method()) && push(@diffs, $method) &&
> $fastmatch && return \@diffs;
> }
>
> #compare the classes of the coordinate policy objects
> (ref($obj1->coordinate_policy()) ne ref($obj2->coordinate_policy()))
> &&
> push(@diffs, 'coordinate_policy') && $fastmatch && return \@diffs;
>
> return \@diffs;
> }
>
> sub diff_BioPrimarySeq {
> my $self = shift;
> my $obj1 = shift;
> my $obj2 = shift;
> my $fastmatch = shift;
>
> my @diffs = ();
>
> foreach my $method (qw(seq display_id accession_number alphabet
> desc)) {
> ($obj1->$method() ne $obj2->$method()) && push(@diffs, $method) &&
> $fastmatch && return \@diffs;
> }
> # primary_id returns a stringified object id if no primary id is set
> # we really don't want to use that for equality comparisons
> if ($obj1->primary_id() ne "$self") {
> ($obj1->primary_id() eq $obj2->primary_id()) &&
> push(@diffs, 'primary_id') && $fastmatch && return \@diffs;
> }
> ($obj1->is_circular() != $obj2->is_circular()) &&
> push(@diffs, 'is_circular') && $fastmatch && return \@diffs;
>
> return \@diffs;
> }
>
> sub diff_BioSeq {
> my $self = shift;
> my $obj1 = shift;
> my $obj2 = shift;
> my $fastmatch = shift;
>
> my @diffs = ();
>
>
> # a Seq is made out of a:
> # PrimarySeq primary_seq
> # AnnotationCollectionI annotation->get_Annotations
> # SeqFeatureI get_all_SeqFeatures
> # Species species
>
> my $priseq_diff = $self->diff($obj1->primary_seq(),
> $obj2->primary_seq(), $fastmatch);
> if ($#{$priseq_diff} != -1) {
> push(@diffs, 'primary_seq', $priseq_diff);
> return \@diffs if $fastmatch;
> }
> my $species_diff = $self->diff($obj1->species(), $obj2->species(),
> $fastmatch);
> if ($#{$species_diff} != -1) {
> push(@diffs, 'species', $species_diff);
> return \@diffs if $fastmatch;
> }
>
> my @obj1_annotations = sort {$a->hash_key() cmp $b->hash_key()}
> $obj1->annotation->get_Annotations();
> my @obj2_annotations = sort {$a->hash_key() cmp $b->hash_key()}
> $obj2->annotation->get_Annotations();
> ($#obj1_annotations != $#obj2_annotations) &&
> push(@diffs, 'num_annotations') && $fastmatch && return \@diffs;
> foreach my $i (0..$#obj1_annotations) {
> my $anno_diffs = $self->diff($obj1_annotations[$i],
> $obj2_annotations[$i], $fastmatch);
> if ($#{$anno_diffs} != -1) {
> my $subclass = (split(/::/,ref($obj1_annotations[$i])))[2];
> push(@diffs, "annotation::${subclass}_" . $i, $anno_diffs);
> return \@diffs if $fastmatch;
> }
> }
>
> my @obj1_features = sort {$a->hash_key() cmp $b->hash_key()}
> $obj1->get_SeqFeatures();
> my @obj2_features = sort {$a->hash_key() cmp $b->hash_key()}
> $obj2->get_SeqFeatures();
> ($#obj1_features != $#obj2_features) && push(@diffs, 'num_features')
> &&
> $fastmatch && return \@diffs;
> for my $i (0..$#obj2_features) {
> my $feature_diffs = $self->diff($obj1_features[$i],
> $obj2_features[$i], $fastmatch);
> if ($#{$feature_diffs} != -1) {
> push(@diffs, 'feature_' . $i, $feature_diffs);
> return \@diffs if $fastmatch;
> }
> }
>
> return \@diffs;
> }
>
> sub diff_BioSeqFeatureI {
> my $self = shift;
> my $obj1 = shift;
> my $obj2 = shift;
> my $fastmatch = shift;
>
> return [] if $obj1 == $obj2;
>
> my @diffs = ();
> (ref($obj1) ne ref($obj2)) && push(@diffs, 'class') && $fastmatch &&
> return \@diffs;
>
> foreach my $method (qw(display_name primary_tag source_tag seq_id)) {
> ($obj1->$method() ne $obj2->$method()) && push(@diffs, $method) &&
> $fastmatch && return \@diffs;
> }
>
> my @obj1_tags = $obj1->get_all_tags();
> my @obj2_tags = $obj2->get_all_tags();
> ($#obj1_tags != $#obj2_tags) && push(@diffs, 'num_tags') &&
> $fastmatch && return \@diffs;
> for my $tag (@obj1_tags) {
> my @obj1_values = sort $obj1->get_tag_values($tag);
> my @obj2_values = sort $obj2->get_tag_values($tag);
>
> ($#obj1_values != $#obj2_values) && push(@diffs, $tag .
> '_num_values') &&
> $fastmatch && return \@diffs;
> for my $i (0..$#obj1_values) {
> ($obj1_values[$i] ne $obj2_values[$i]) &&
> push(@diffs, $tag . '_value_' . $i) && $fastmatch &&
> return \@diffs;
> }
> }
>
> my $location_diffs = $self->diff($obj1->location(),
> $obj2->location());
> if ($#{$location_diffs} != -1) {
> push(@diffs, 'location', $location_diffs);
> }
>
> my $equals = 1;
> my @obj1_subfeatures = sort { $a->hash_key() cmp $b->hash_key() }
> $obj1->get_SeqFeatures();
> my @obj2_subfeatures = sort { $a->hash_key() cmp $b->hash_key() }
> $obj1->get_SeqFeatures();
> ($#obj1_subfeatures != $#obj2_subfeatures) &&
> push(@diffs, 'num_subfeatures') && $fastmatch && return \@diffs;
> for my $i (0..$#obj1_subfeatures) {
> my $sub_diffs = $self->diff($obj1_subfeatures[$i],
> $obj2_subfeatures[$i], $fastmatch);
> if (len(@{$sub_diffs}) != 0) {
> push(@diffs, 'subfeature_' . $i, $sub_diffs);
> return \@diffs if $fastmatch;
> }
> }
>
> return \@diffs;
> }
>
> sub diff_BioSpecies {
> my $self = shift;
> my $obj1 = shift;
> my $obj2 = shift;
> my $fastmatch = shift;
>
> return [] if $obj1 == $obj2;
>
> my @diffs = ();
>
> ref($obj1) ne ref($obj2) && push(@diffs, 'class') && $fastmatch &&
> return \@diffs;
>
> for my $method (qw(common_name variant organelle species genus
> sub_species ncbi_taxid division)) {
> $obj1->$method() ne $obj2->$method() && push(@diffs, $method) &&
> $fastmatch && return \@diffs;
> }
> my @obj1_classification = $obj1->classification();
> my @obj2_classification = $obj2->classification();
> my $same_classification = 1;
> for my $i (0..$#obj1_classification) {
> $same_classification = 0 if $obj1_classification[$i] ne
> $obj2_classification[$i];
> }
> $same_classification == 0 && push(@diffs, 'classification') &&
> $fastmatch && return \@diffs;
>
> return \@diffs;
> }
>
> 1;
> package Bio::ComparatorI;
> use vars(qw(@ISA));
> use strict;
> use Bio::Root::Root;
>
> @ISA = qw(Bio::Root::Root);
>
> my $comparatorClassVar;
>
> sub new {
> my $class = shift;
>
> my $self = bless {}, $class;
>
> $self->set_supported_classes();
> $self->{'diff_methods'} = {};
> foreach my $supported_class (@{$self->{'supported_classes'}}) {
> my $compacted_name = join('',split(/::/,$supported_class));
> $self->{'diff_methods'}->{$supported_class} = 'diff_' .
> $compacted_name;
> }
>
> return $self;
> }
>
> =pod
> Title : equals
> Usage : $obj->equals($obj, $other_obj)
> Function: Tests if two objects are equal, and returns true if they
> are, and false otherwise.
> Example :
> Returns : boolean
> Args : $other_obj (same type as $obj)
> =cut
>
> sub equals {
> my $self = shift;
> my $obj1 = shift;
> my $obj2 = shift;
>
> return ($#{($self->diff($obj1, $obj2, 0))} == -1);
> }
>
> =pod
> Title : diff
> Usage : $obj->diff($other_obj, [$fastmatch])
> Function: Returns a list of the attributes where $self doesn't match
> $other_obj. If the $fastmatch flag is set to true, only
> returns the first difference. In the case of complex objects,
> differences between two components are reflected as an
> attribute name followed by a listref listing the attributes
> of the component object that don't match. Thus, the listref
> returned by this method should strictly be interpreted as
> a recursive data structure.
>
> For example, this might be the listref returned by diff()
> on two Bio::Seq objects: [ 'primaryseq', [ 'display_id' ]]
> Example :
> Returns : listref of string and arrayref
> Args : $other_obj (Bio::ComparableI subclass)
> $fastmatch (int)
> =cut
>
> sub diff {
> my $self = shift;
>
> $self->throw_not_implemented();
> }
>
> sub set_supported_classes {
> my $self = shift;
>
> $self->throw_not_implemented();
> }
>
> sub can_compare {
> my $self = shift;
> my $obj1 = shift;
>
> my $class = ref($obj1);
> my $diff_method;
> if (!defined($self->{'diff_methods'}->{$class})) {
> for my $supported_class (@{$self->{'supported_classes'}}) {
> if ($obj1->isa($supported_class)) {
> $diff_method = $self->{'diff_methods'}->{$class} =
> $self->{'diff_methods'}->{$supported_class};
> last;
> }
> }
> } else {
> $diff_method = $self->{'diff_methods'}->{$class};
> }
>
> return $diff_method;
> }
>
> 1;
> ? ComparatorI.pm
> ? SampleComparator.pm
> ? Root/.Exception.pm.swp
> Index: AnnotationI.pm
> ===================================================================
> RCS file: /home/repository/bioperl/bioperl-live/Bio/AnnotationI.pm,v
> retrieving revision 1.7
> diff -u -3 -p -r1.7 AnnotationI.pm
> --- AnnotationI.pm 2002/10/22 07:38:24 1.7
> +++ AnnotationI.pm 2004/05/03 09:48:14
> @@ -177,4 +177,13 @@ sub tagname{
> shift->throw_not_implemented();
> }
>
> +sub hash_key {
> + my $self = shift;
> +
> + my $hash_tree = $self->hash_tree();
> + my @keys = sort keys(%{$hash_tree});
> + my $string = join(':', map { $hash_tree->{$_} } @keys);
> + return $string;
> +}
> +
> 1;
> Index: SeqFeatureI.pm
> ===================================================================
> RCS file: /home/repository/bioperl/bioperl-live/Bio/SeqFeatureI.pm,v
> retrieving revision 1.55
> diff -u -3 -p -r1.55 SeqFeatureI.pm
> --- SeqFeatureI.pm 2004/03/13 01:30:02 1.55
> +++ SeqFeatureI.pm 2004/05/03 09:48:45
> @@ -368,6 +368,26 @@ sub gff_string{
> return $formatter->gff_string($self);
> }
>
> +=head2 hash_key
> +
> + Title : hash_key
> + Usage : $key = hash_key();
> + Function: Provides a string which can be used as the key for a hash
> + table. This string is composed out of attributes of this
> + object in such a way that if two SeqFeatureI objects have
> the
> + same hash_key(), they are equivalent.
> +
> + Returns : string
> + Args : None
> +
> +=cut
> +
> +sub hash_key {
> + my $self = shift;
> +
> + return join(':', $self->primary_tag(), $self->display_name(),
> $self->start(), $self->end());
> +}
> +
> my $static_gff_formatter = undef;
>
> =head2 _static_gff_formatter
> Index: Annotation/Comment.pm
> ===================================================================
> RCS file:
> /home/repository/bioperl/bioperl-live/Bio/Annotation/Comment.pm,v
> retrieving revision 1.10
> diff -u -3 -p -r1.10 Comment.pm
> --- Annotation/Comment.pm 2004/05/03 00:28:20 1.10
> +++ Annotation/Comment.pm 2004/05/03 09:48:46
> @@ -116,6 +116,8 @@ sub hash_tree{
>
> my $h = {};
> $h->{'text'} = $self->text;
> +
> + return $h;
> }
>
> =head2 tagname
> Index: Annotation/SimpleValue.pm
> ===================================================================
> RCS file:
> /home/repository/bioperl/bioperl-live/Bio/Annotation/SimpleValue.pm,v
> retrieving revision 1.10
> diff -u -3 -p -r1.10 SimpleValue.pm
> --- Annotation/SimpleValue.pm 2003/03/10 21:53:56 1.10
> +++ Annotation/SimpleValue.pm 2004/05/03 09:49:37
> @@ -142,6 +142,8 @@ sub hash_tree{
>
> my $h = {};
> $h->{'value'} = $self->value;
> +
> + return $h;
> }
>
> =head2 tagname
>
--
-------------------------------------------------------------
Hilmar Lapp email: lapp at gnf.org
GNF, San Diego, Ca. 92121 phone: +1-858-812-1757
-------------------------------------------------------------
More information about the Bioperl-l
mailing list