[Biojava-l] subsequences

Thomas Down td2@sanger.ac.uk
Thu, 17 Jan 2002 13:18:20 +0000


--pWyiEgJYm5f9v55/
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

Following on from this discussion, there is one remaining
issue with the SubSequence code.  It's never been terribly
clear what to do when features overlap the boundary of
a SubSequence.  Currently, they're still projected onto the
subsequence -- and thus end up with coordinates outside the
SubSequence to which they're attached.

This has been discussed in the past, and the conclusion
seemed to be that partial features should be presented
as an alternative feature type (RemoteFeature), which can
(where possible) be resolved back to the underlying,
complete feature.  The RemoteFeature interface, plus a 
general-purpose implementation, have been included in the
tree for some time, but aren't being widely used.

I've now written a (currently experimental) replacement for
SubSequence which transforms overhanging features into
RemoteFeatures.  This seems to work okay, and it's a good
demonstration of RemoteFeatures in action.

I'm now wondering if it's worth committing this code before
the 1.2 branch (with very careful testing, obviously).  Does
anyone have strong feelings either way?  And is there anyone
who wants to speak out in favour of the old-style (overhanging
features) way of doing things?

Anyway, code's attached for anyone who wants to try it, plus
a test suite.  One caveat at the moment:

  - If any overhanging feature has child features, these
    are no longer projected onto the subsequence, even if
    some of the child features are fully contained within
    the subsequence.  Should those child features which
    overlap the subsequence be projected as children of
    the RemoteFeature?  I'm tending towards the view that
    they should (it's not too hard to implement this :-),
    but it's worth discussing -- I don't think I've ever
    seen anyone implement RemoteFeature with child features...

Thomas.


--pWyiEgJYm5f9v55/
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="SubSequence2.java"

/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */

package org.biojava.bio.seq;

import org.biojava.bio.*;
import org.biojava.bio.symbol.*;
import org.biojava.bio.seq.impl.*;
import org.biojava.bio.seq.projection.*;
import org.biojava.utils.*;

import java.util.*;

/**
 * View a sub-section of a given sequence object, including all the
 * features intersecting that region.
 *
 * @author Thomas Down
 * @since 1.3
 */

public class SubSequence2 extends LazyFeatureHolder implements Sequence {
    private Sequence parent;
    private SymbolList symbols;
    private String name;
    private String uri;
    private Annotation annotation;
    private ProjectionContext projectionContext;
    private int start;
    private int end;

    /**
     * Construct a new SubSequence of the specified sequence.
     *
     * @param seq A sequence to view
     * @param start The start of the range to view
     * @param end The end of the range to view
     * @throws IndexOutOfBoundsException is the start or end position is illegal.
     */

    public SubSequence2(Sequence seq, final int start, final int end) {
        this.parent = seq;
	this.start = start;
	this.end = end;

	symbols = seq.subList(start, end);
	name = seq.getName() + " (" + start + " - " + end + ")";
	uri = seq.getURN() + "?start=" + start + ";end=" + end;
	annotation = seq.getAnnotation();
	this.projectionContext = new ProjectionContext() {
		public FeatureHolder getParent(Feature f) {
		    return SubSequence2.this;
		}

		public Sequence getSequence(Feature f) {
		    return SubSequence2.this;
		}

		public Location getLocation(Feature f) {
		    return f.getLocation().translate(1 - start);
		}

		public StrandedFeature.Strand getStrand(StrandedFeature f) {
		    return f.getStrand();
		}

		public Annotation getAnnotation(Feature f) {
		    return f.getAnnotation();
		}

		public FeatureHolder projectChildFeatures(Feature f, FeatureHolder parent) {
		    return ProjectedFeatureHolder.projectFeatureHolder(f, parent, 1 - start, false);
		}
	    } ;
    }

    //
    // SymbolList stuff
    //

    public Symbol symbolAt(int pos) {
	return symbols.symbolAt(pos);
    }

    public Alphabet getAlphabet() {
	return symbols.getAlphabet();
    }

    public SymbolList subList(int start, int end) {
	return symbols.subList(start, end);
    }

    public String seqString() {
	return symbols.seqString();
    }

    public String subStr(int start, int end) {
	return symbols.subStr(start, end);
    }

    public List toList() {
	return symbols.toList();
    }

    public int length() {
	return symbols.length();
    }

    public Iterator iterator() {
	return symbols.iterator();
    }

    public void edit(Edit edit)
        throws ChangeVetoException
    {
	throw new ChangeVetoException("Can't edit SubSequences");
    }

    //
    // Lazy feature holder
    //

    protected FeatureHolder createFeatureHolder() {
	try {
	    SimpleFeatureHolder results = new SimpleFeatureHolder();
	    FeatureHolder rawFeatures = parent.filter(new FeatureFilter.OverlapsLocation(new RangeLocation(start, end)), false);
	    for (Iterator i = rawFeatures.features(); i.hasNext(); ) {
		final Feature f = (Feature) i.next();
		Location l = f.getLocation();
		if (l.getMin() >= start && l.getMax() <= end) {
		    results.addFeature(ProjectionEngine.DEFAULT.projectFeature(f, projectionContext));
		} else {
		    RemoteFeature.Template rft = new RemoteFeature.Template();
		    rft.type = f.getType();
		    rft.source = f.getSource();
		    rft.annotation = f.getAnnotation();
		    rft.location = LocationTools.intersection(l.translate(1 - start), 
							      new RangeLocation(1, end - start + 1));
		    rft.resolver = new RemoteFeature.Resolver() {
			    public Feature resolve(RemoteFeature rFeat) {
				return f;
			    }
			} ;
		    rft.regions = Collections.nCopies(1, new RemoteFeature.Region(f.getLocation(), f.getSequence().getName()));
		    
		    results.addFeature(new SimpleRemoteFeature(this, this, rft));
		}
	    }

	    return results;
	} catch (ChangeVetoException cve) {
	    throw new BioError("Assertion failure: can't modify newly created feature holder");
	}
    }

    //
    // Identifiable
    //

    public String getName() {
	return name;
    }

    public String getURN() {
	return uri;
    }

    //
    // Annotatable
    //

    public Annotation getAnnotation() {
	return annotation;
    }

    public Sequence getParent() {
      return this.parent;
    }
}

--pWyiEgJYm5f9v55/
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="SubSequenceTest2.java"

/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */

package org.biojava.bio.seq;

import java.util.*;
import org.biojava.bio.*;
import org.biojava.bio.symbol.*;
import org.biojava.bio.seq.impl.*;
import junit.framework.TestCase;

/**
 * Tests for SimpleAssembly.  By dependancy, this also
 * tests ProjectedFeatureHolder and SimpleAssembly.
 *
 * @author Thomas Down
 * @since 1.3
 */

public class SubSequenceTest2 extends TestCase
{
    protected Sequence seq;
    protected Sequence subseq;

    public SubSequenceTest2(String name) {
	super(name);
    }

    protected void setUp() throws Exception {
	seq = new SimpleSequence(DNATools.createDNA("aacgtaggttccatgc"),
				       "fragment1",
				       "fragment1",
				       Annotation.EMPTY_ANNOTATION);
	
	Feature.Template sft = new Feature.Template();
	sft.type = "test";
	sft.source = "test";
	sft.annotation = Annotation.EMPTY_ANNOTATION;
	sft.location = new RangeLocation(1, 3);
	seq.createFeature(sft);

	sft.location = new RangeLocation(10, 12);
	seq.createFeature(sft);

	sft.location = new RangeLocation(7, 9);
	seq.createFeature(sft);

	subseq = new SubSequence2(seq, 8, 14);
    }

    public void testSymbols()
	throws Exception
    {
	assertTrue(compareSymbolList(subseq,
				     DNATools.createDNA("gttccat")));
    }

    public void testFeatureClipping()
        throws Exception
    {
	assertEquals(subseq.countFeatures(), 2);
    }

    public void testFeatureProjection()
        throws Exception
    {
	Feature f = (Feature) subseq.filter(new FeatureFilter.Not(new FeatureFilter.ByClass(RemoteFeature.class)), false).features().next();
	Location fl = f.getLocation();
	assertEquals(fl.getMin(), 3);
	assertEquals(fl.getMax(), 5);
    }

    public void testRemoteFeature()
        throws Exception
    {
	RemoteFeature f = (RemoteFeature) subseq.filter(new FeatureFilter.ByClass(RemoteFeature.class), false).features().next();
	Location fl = f.getLocation();
	assertEquals(fl.getMin(), 1);
	assertEquals(fl.getMax(), 2);
	assertEquals(f.getRemoteFeature().getSequence().getName(), seq.getName());
    }

    private boolean compareSymbolList(SymbolList sl1, SymbolList sl2) {
	if (sl1.length() != sl2.length()) {
	    return false;
	}
	
	Iterator si1 = sl1.iterator();
	Iterator si2 = sl2.iterator();
	while (si1.hasNext()) {
	    if (! (si1.next() == si2.next())) {
		return false;
	    }
	}

	return true;
    }
}

--pWyiEgJYm5f9v55/--