[Biojava-l] Issues with FlexibleAlignment
Russ Kepler
russ at kepler-eng.com
Thu Nov 24 09:54:35 EST 2005
In working with FlexibleAlignment I've found a problem. When an overlap is empty the
underlying code in AbstractULAlignment will return nulls for positions not containing
symbols, and this confuses things later on. An example program below will demonstrate
a simple case of the problem (code stolen from TestSimpleAlignment.java and modified).
Is there a symbol that should be returned for this case? It should be distinct from the
gap symbol as areas outside the aligning sequence isn't gap space, it just doesn't exist.
Some comments in the list archive suggest that a space was going to be returned, but
I can't see an attempt to implement that (it's not added to the alphabet, as an example)
and right now a null is returned.
package symbol;
import java.util.ArrayList;
import java.util.Iterator;
import org.biojava.bio.alignment.FlexibleAlignment;
import org.biojava.bio.alignment.SimpleAlignmentElement;
import org.biojava.bio.symbol.RangeLocation;
import org.biojava.bio.symbol.SymbolList;
public class TestFlexibleAlignment {
public static void main(String[] args) {
try {
// make three random sequences
SymbolList res1 = Tools.createSymbolList(10);
SymbolList res2 = Tools.createSymbolList(10);
SymbolList res3 = Tools.createSymbolList(10);
// think of three names
String name1 = "pigs";
String name2 = "dogs";
String name3 = "cats";
// create list with reference sequence
ArrayList list = new ArrayList(1);
SymbolList refSeq = Tools.createSymbolList(30);
list.add(new SimpleAlignmentElement("reference", refSeq, new RangeLocation(1, 30)));
// create the alignment with the reference sequence
FlexibleAlignment ali = new FlexibleAlignment(list);
// add the sequences as alignments
ali.addSequence(new SimpleAlignmentElement(name1, res1, new RangeLocation(1, 10)));
ali.addSequence(new SimpleAlignmentElement(name2, res2, new RangeLocation(11, 20)));
ali.addSequence(new SimpleAlignmentElement(name3, res3, new RangeLocation(21, 30)));
// print out each row in the alignment
System.out.println("Sequences in alignment");
for(Iterator i = ali.getLabels().iterator(); i.hasNext(); ) {
String label = (String) i.next();
SymbolList rl = ali.symbolListForLabel(label);
System.out.println(label + ":\t" + rl.seqString());
}
System.out.flush();
// print out each column
System.out.println("Columns");
for(int i = 1; i <= ali.length(); i++) {
System.out.println(i + ":\t" + ali.symbolAt(i).getName());
}
} catch (Exception ex) {
ex.printStackTrace(System.err);
System.exit(1);
}
}
}
More information about the Biojava-l
mailing list