[Biojava-l] Issues with FlexibleAlignment

Russ Kepler russ at kepler-eng.com
Thu Nov 24 09:54:35 EST 2005


In working with FlexibleAlignment I've found a problem.  When an overlap is empty the
underlying code in AbstractULAlignment will return nulls for positions not containing 
symbols, and this confuses things later on.  An example program below will demonstrate 
a simple case of the problem (code stolen from TestSimpleAlignment.java and modified).

Is there a symbol that should be returned for this case?  It should be distinct from the 
gap symbol as areas outside the aligning sequence isn't gap space, it just doesn't exist.
Some comments in the list archive suggest that a space was going to be returned, but
I can't see an attempt to implement that (it's not added to the alphabet, as an example) 
and right now a null is returned.


package symbol;

import java.util.ArrayList;
import java.util.Iterator;

import org.biojava.bio.alignment.FlexibleAlignment;
import org.biojava.bio.alignment.SimpleAlignmentElement;
import org.biojava.bio.symbol.RangeLocation;
import org.biojava.bio.symbol.SymbolList;

public class TestFlexibleAlignment {
    public static void main(String[] args) {
        try {
          // make three random sequences
          SymbolList res1 = Tools.createSymbolList(10);
          SymbolList res2 = Tools.createSymbolList(10);
          SymbolList res3 = Tools.createSymbolList(10);

          // think of three names
          String name1 = "pigs";
          String name2 = "dogs";
          String name3 = "cats";

          // create list with reference sequence
          ArrayList list = new ArrayList(1);
          SymbolList refSeq = Tools.createSymbolList(30);
          list.add(new SimpleAlignmentElement("reference", refSeq, new RangeLocation(1, 30)));
          // create the alignment with the reference sequence
          FlexibleAlignment ali = new FlexibleAlignment(list);

          // add the sequences as alignments
          ali.addSequence(new SimpleAlignmentElement(name1, res1, new RangeLocation(1, 10)));
          ali.addSequence(new SimpleAlignmentElement(name2, res2, new RangeLocation(11, 20)));
          ali.addSequence(new SimpleAlignmentElement(name3, res3, new RangeLocation(21, 30)));

          // print out each row in the alignment
          System.out.println("Sequences in alignment");
          for(Iterator i = ali.getLabels().iterator(); i.hasNext(); ) {
            String label = (String) i.next();
            SymbolList rl = ali.symbolListForLabel(label);
            System.out.println(label + ":\t" + rl.seqString());
          }
          System.out.flush();

          // print out each column
          System.out.println("Columns");
          for(int i = 1; i <= ali.length(); i++) {
            System.out.println(i + ":\t" + ali.symbolAt(i).getName());
          }
        } catch (Exception ex) {
          ex.printStackTrace(System.err);
          System.exit(1);
        }
    }
}


More information about the Biojava-l mailing list