[Biojava-l] location parser JUnit test

Brian King brian.king@animorphics.net
Tue, 13 Aug 2002 20:23:04 -0700 (PDT)


--0-167519580-1029295384=:22592
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

 

I wrote the attached location parser JUnit test case
while trying some alternative location
representations.  The test case parses short EMBL
records strings and tests to see if the parsed
Location object is what was expected.  Please use it
in BioJava if it's suitable.

Regards,
Brian

__________________________________________________
Do You Yahoo!?
HotJobs - Search Thousands of New Jobs
http://www.hotjobs.com
--0-167519580-1029295384=:22592
Content-Type: text/x-java; name="LocationParserTest.java"
Content-Description: LocationParserTest.java
Content-Disposition: inline; filename="LocationParserTest.java"

/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */

package org.biojava.bio.seq.io;

import java.io.*;
import java.util.Iterator;
import junit.framework.*;
import org.biojava.bio.seq.Feature;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.StrandedFeature;
import org.biojava.bio.seq.io.SeqIOTools;
import org.biojava.bio.symbol.FuzzyLocation;
import org.biojava.bio.symbol.Location;
import org.biojava.bio.symbol.PointLocation;

/**
 * Test parsing of GenBank/EMBL/DDBJ location strings.  Test reads
 * example records from a StringReader. 
 *
 * @author Brian King (brian.king@sun.com, brian.king@animorphics.net)
 */
public class LocationParserTest extends TestCase
{
    /**
     * Constructor.
     *
     * @param name The name of the test case
     * @see TestCase
     */
    public LocationParserTest(String name)
    {
            super(name);
    }

    /**
     * Runs the unit tests defined here.
     */
    public static void main(String args[])
    {
            junit.textui.TestRunner.run(LocationParserTest.class);
    }

    /**
     * Test parse of a point location: "variation       1896"
     *
     */
    public void 
    testPoint() throws Exception
    {
        // create initial data model from record in EMBL format
        // extracted from AB000360.
        String doc =
            "ID   AB000360   standard; DNA; HUM; 2582 BP.\n" +
            "AC   AB000360                          \n" +  // gives Sequence object a URN
            "FH   Key             Location/Qualifiers\n" +
            "FH                                      \n" +
            "FT   variation       1896\n" +
            "//";
        BufferedReader r = new BufferedReader(new StringReader(doc));  
        
        SequenceIterator seqItr = SeqIOTools.readEmbl(r); 
        Sequence seq = seqItr.nextSequence();
        
        Iterator itr = seq.features();
        StrandedFeature f = (StrandedFeature) itr.next();
        PointLocation loc = (PointLocation) f.getLocation();
        
        assertEquals("Feature should be in POSITIVE orientation.",
                     StrandedFeature.POSITIVE,
                     f.getStrand());
        
        if (!loc.isContiguous())
        {   
            fail("Location should be contiguous");
            
            // can't use JUnit assert() because "assert" is a JDK 1.4 keyword
        }
        
        assertEquals("Start of point incorrect.",
                     1896,              // expected
                     loc.getMin());     // actual
        assertEquals("End of point incorrect.",
                     1896,              // expected
                     loc.getMax());     // actual     
    }

    /**
     * Test the most common case (start..end)
     */
    public void
    testRange() throws Exception
    {
        // create initial data model from record in EMBL format
        //
        String doc =
            "ID   ABCDEFGH   standard; DNA; HUM; 4238 BP.\n" +
            "AC   ABCDEFGH                          \n" +  // gives Sequence object a URN
            "FH   Key             Location/Qualifiers\n" +
            "FH                                      \n" +
            "FT   polyA_signal    4183..4188\n" +
            "//";
        BufferedReader r = new BufferedReader(new StringReader(doc));  
        
        SequenceIterator seqItr = SeqIOTools.readEmbl(r); 
        Sequence seq = seqItr.nextSequence();
        
        Iterator itr = seq.features();
        StrandedFeature f = (StrandedFeature) itr.next();
        Location loc = f.getLocation();
        
        assertEquals("Feature should be in POSITIVE orientation.",
             StrandedFeature.POSITIVE,
             f.getStrand());
        
        if (!loc.isContiguous())
        {   
            fail("Location should be contiguous");
            
            // can't use JUnit assert() because "assert" is a JDK 1.4 keyword
        }
        
        assertEquals("Start of range incorrect.",
                     4183,              // expected
                     loc.getMin());     // actual
        assertEquals("End of range incorrect.",
                     4188,              // expected
                     loc.getMax());     // actual  

    }
    
    /**
     * <1..99
     *
     */
    public void
    testFuzzy() throws Exception
    {
        // create initial data model from record in EMBL format
        //
        String doc =
            "ID   ABCDEFGH   standard; DNA; HUM; 4238 BP.\n" +
            "AC   ABCDEFGH                          \n" +  // gives Sequence object a URN
            "FH   Key             Location/Qualifiers\n" +
            "FH                                      \n" +
            "FT   intron          <1..99\n" +
            "//";    
        BufferedReader r = new BufferedReader(new StringReader(doc));  
        
        SequenceIterator seqItr = SeqIOTools.readEmbl(r); 
        Sequence seq = seqItr.nextSequence();
        
        Iterator itr = seq.features();
        StrandedFeature f = (StrandedFeature) itr.next();
        FuzzyLocation loc = (FuzzyLocation) f.getLocation();
        
        assertEquals("Feature should be in POSITIVE orientation.",
                     StrandedFeature.POSITIVE,
                     f.getStrand());
        
        if (!loc.isContiguous())
        {   
            fail("Location should be contiguous");
            
            // can't use JUnit assert() because "assert" is a JDK 1.4 keyword
        }
        
        if (!loc.isMinFuzzy())
        {
            fail("Location min should be fuzzy");            
        }
        
        if (loc.isMaxFuzzy())
        {
            fail("Location max should not be fuzzy");            
        }        
        
        assertEquals("Start of range incorrect.",
                     1,              // expected
                     loc.getMin());     // actual
        assertEquals("End of range incorrect.",
                     99,              // expected
                     loc.getMax());     // actual          
    }
    
    /**
     * join(2762..2959,3175..3319)
     *
     */
    public void
    testJoin() throws Exception
    {
        // create initial data model from record in EMBL format
        //
        String doc =
            "ID   ABCDEFGH   standard; DNA; HUM; 4238 BP.\n" +
            "AC   ABCDEFGH                          \n" +  // gives Sequence object a URN
            "FH   Key             Location/Qualifiers\n" +
            "FH                                      \n" +
            "FT   CDS             join(2762..2959,3175..3319)\n" +
            "//";
        BufferedReader r = new BufferedReader(new StringReader(doc));  
        
        SequenceIterator seqItr = SeqIOTools.readEmbl(r); 
        Sequence seq = seqItr.nextSequence();
        
        Iterator itr = seq.features();
        StrandedFeature f = (StrandedFeature) itr.next();
        Location loc = f.getLocation();
        
        assertEquals("Feature should be in POSITIVE orientation.",
                     StrandedFeature.POSITIVE,
                     f.getStrand());
        
        if (loc.isContiguous())
        {   
            fail("Location should not be contiguous");
            
            // can't use JUnit assert() because "assert" is a JDK 1.4 keyword
        }
        
        // test each region
        //
        
        itr = loc.blockIterator();
        loc = (Location) itr.next();
        
        assertEquals("Start of first region of join incorrect.",
                     2762,              // expected
                     loc.getMin());     // actual
        assertEquals("End of first region of join incorrect.",
                     2959,              // expected
                     loc.getMax());     // actual  
        
        loc = (Location) itr.next();
        
        assertEquals("Start of second region of join incorrect.",
                     3175,              // expected
                     loc.getMin());     // actual
        assertEquals("End of second region of join incorrect.",
                     3319,              // expected
                     loc.getMax());     // actual        
    }
        
    /**
     * complement(4183..4188)
     *
     */
    public void
    testComplementRange() throws Exception
    {
        // create initial data model from record in EMBL format
        //
        String doc =
            "ID   ABCDEFGH   standard; DNA; HUM; 4238 BP.\n" +
            "AC   ABCDEFGH                          \n" +  // gives Sequence object a URN
            "FH   Key             Location/Qualifiers\n" +
            "FH                                      \n" +
            "FT   polyA_signal    complement(4183..4188)\n" +
            "//";
        BufferedReader r = new BufferedReader(new StringReader(doc));  
        
        SequenceIterator seqItr = SeqIOTools.readEmbl(r); 
        Sequence seq = seqItr.nextSequence();
        
        Iterator itr = seq.features();
        StrandedFeature f = (StrandedFeature) itr.next();
        Location loc = f.getLocation();
        
        assertEquals("Feature should be in NEGATIVE orientation.",
             StrandedFeature.NEGATIVE,
             f.getStrand());
        
        if (!loc.isContiguous())
        {   
            fail("Location should be contiguous");
            
            // can't use JUnit assert() because "assert" is a JDK 1.4 keyword
        }
        
        assertEquals("Start of range incorrect.",
                     4183,              // expected
                     loc.getMin());     // actual
        assertEquals("End of range incorrect.",
                     4188,              // expected
                     loc.getMax());     // actual  
    }        

    /**
     * Test for parsing of joins of remote locations
     *
     */
    public void
    testRemoteJoin() throws Exception
    {
        // create initial data model.  Extracted from EMBL record 
        // AB002461
        //
        String doc =
            "ID   AB002461   standard; DNA; HUM; 4238 BP.\n" +
            "AC   AB002461;                          \n" +
            "FH   Key             Location/Qualifiers\n" +
            "FH                                      \n" +
            "FT   CDS             join(AB002455.1:2762..2959,AB002456.1:175..319,\n" +
            "FT                   AB002456.1:1517..1570,AB002456.1:1661..1788,\n" +
            "FT                   AB002457.1:449..570,AB002458.1:284..554,\n" +
            "FT                   AB002459.1:309..375,AB002459.1:555..684,\n" +
            "FT                   AB002460.1:446..579,AB002460.1:672..736,100..182,602..767)\n" +
            "//";
        
        BufferedReader r = new BufferedReader(new StringReader(doc));  
        
        SequenceIterator seqItr = SeqIOTools.readEmbl(r); 
        Sequence seq = seqItr.nextSequence();
        
        Iterator itr = seq.features();
        
        // SimpleRemoteFeature
        Feature f = (Feature) itr.next();
        Location loc = f.getLocation();
        /*
            // remote joins don't have a strand
        assertEquals("Feature should be in POSITIVE orientation.",
                     StrandedFeature.POSITIVE,
                     f.getStrand());
        */
        if (loc.isContiguous())
        {   
            fail("Location should not be contiguous");
            
            // can't use JUnit assert() because "assert" is a JDK 1.4 keyword
        }
        
        // test each region
        // the given join has two local regions
        
        itr = loc.blockIterator();
        loc = (Location) itr.next();
        
        assertEquals("Start of first region of join incorrect.",
                     100,              // expected
                     loc.getMin());     // actual
        assertEquals("End of first region of join incorrect.",
                     182,              // expected
                     loc.getMax());     // actual  
        
        loc = (Location) itr.next();
        
        assertEquals("Start of second region of join incorrect.",
                     602,              // expected
                     loc.getMin());     // actual
        assertEquals("End of second region of join incorrect.",
                     767,              // expected
                     loc.getMax());     // actual               
    }

    /**
     * Test complement(join(2762..2959,3175..3319))
     *
     */
    public void 
    testComplementJoin() throws Exception
    {
        // create initial data model from record in EMBL format
        //
        String doc =
            "ID   ABCDEFGH   standard; DNA; HUM; 4238 BP.\n" +
            "AC   ABCDEFGH                          \n" +  // gives Sequence object a URN
            "FH   Key             Location/Qualifiers\n" +
            "FH                                      \n" +
            "FT   CDS             complement(join(2762..2959,3175..3319))\n" +
            "//";
        BufferedReader r = new BufferedReader(new StringReader(doc));  
        
        SequenceIterator seqItr = SeqIOTools.readEmbl(r); 
        Sequence seq = seqItr.nextSequence();
        
        Iterator itr = seq.features();
        StrandedFeature f = (StrandedFeature) itr.next();
        Location loc = f.getLocation();
        
        assertEquals("Feature should be in NEGATIVE orientation.",
                     StrandedFeature.NEGATIVE,
                     f.getStrand());
        
        if (loc.isContiguous())
        {   
            fail("Location should not be contiguous");
        }
        
        // test each region
        //
        
        itr = loc.blockIterator();
        loc = (Location) itr.next();
        
        assertEquals("Start of first region of join incorrect.",
                     2762,              // expected
                     loc.getMin());     // actual
        assertEquals("End of first region of join incorrect.",
                     2959,              // expected
                     loc.getMax());     // actual  
        
        loc = (Location) itr.next();
        
        assertEquals("Start of second region of join incorrect.",
                     3175,              // expected
                     loc.getMin());     // actual
        assertEquals("End of second region of join incorrect.",
                     3319,              // expected
                     loc.getMax());     // actual    
    }
}

--0-167519580-1029295384=:22592--