[Biojava-dev] Creating Custom Symbols and Alphabets

Uday kamath kamathuday at gmail.com
Tue Jul 2 18:31:13 UTC 2013


Hello

I have my own character sequences from A to I (9 characters) . I
created my own alphabet, but when i try to do matches i get an
exception, while i think i have done the creation right as in the code
below. Any help?

*****************************EXCEPTION*********************************
org.biojava.bio.BioError: Internal error: failed to tokenize a Symbol
from an existing SymbolList
at org.biojava.bio.seq.io.SymbolListCharSequence.<init>(SymbolListCharSequence.java:84)
at org.biojava.utils.regex.Matcher.<init>(Matcher.java:27)
at org.biojava.utils.regex.Pattern.matcher(Pattern.java:52)
Caused by: org.biojava.bio.symbol.IllegalSymbolException: No mapping
for symbol [g h b f a]
at org.biojava.bio.seq.io.CharacterTokenization._tokenizeSymbol(CharacterTokenization.java:192)
at org.biojava.bio.seq.io.CharacterTokenization.tokenizeSymbol(CharacterTokenization.java:200)
at org.biojava.bio.seq.io.SymbolListCharSequence.<init>(SymbolListCharSequence.java:78)
... 10 more
**********************************CODE***********************************************
public class SAXAlphabets {
//collect the Symbols in a Set
static Set  symbols = new HashSet();
static{
//make the "a" Symbol with no annotation
   Symbol a =
       AlphabetManager.createSymbol('a',"a", Annotation.EMPTY_ANNOTATION);

   //make the "b" Symbol
   Symbol b =
       AlphabetManager.createSymbol('b',"b", Annotation.EMPTY_ANNOTATION);

   //make the "c" Symbol
   Symbol c =
       AlphabetManager.createSymbol('c',"c", Annotation.EMPTY_ANNOTATION);

   //make the "d" Symbol
   Symbol d =
       AlphabetManager.createSymbol('d',"d", Annotation.EMPTY_ANNOTATION);

   //make the "e" Symbol
   Symbol e =
       AlphabetManager.createSymbol('e',"e", Annotation.EMPTY_ANNOTATION);
   //make the "f" Symbol
   Symbol f =
       AlphabetManager.createSymbol('f',"f", Annotation.EMPTY_ANNOTATION);

   //make the "g" Symbol
   Symbol g =
       AlphabetManager.createSymbol('g',"g", Annotation.EMPTY_ANNOTATION);
  //make the "h" Symbol
   Symbol h =
       AlphabetManager.createSymbol('h',"h", Annotation.EMPTY_ANNOTATION);
   //make the "i" Symbol
   Symbol i =
       AlphabetManager.createSymbol('i',"i", Annotation.EMPTY_ANNOTATION);
   symbols.add(a);
   symbols.add(b);
   symbols.add(c);
   symbols.add(d);
   symbols.add(e);
   symbols.add(f);
   symbols.add(g);
   symbols.add(h);
   symbols.add(i);


}
public  static FiniteAlphabet getSAXAlphabets() throws Exception{
//make the SAX Alphabet
SimpleAlphabet sax = new SimpleAlphabet(symbols, "sax");
   CharacterTokenization tokenization = new CharacterTokenization(sax,false);
   //it is usual to register newly created Alphabets with the AlphabetManager
   AlphabetManager.registerAlphabet(sax.getName(), sax);
   Iterator iterator =sax.iterator();
   List all = new ArrayList();
   while(iterator.hasNext()){
    Symbol symbol = (Symbol)iterator.next();
    all.add(symbol);
    tokenization.bindSymbol(symbol, symbol.getName().toCharArray()[0]);
   }
SymbolList allSymbols = new SimpleSymbolList(sax, all);
   tokenization.tokenizeSymbolList(allSymbols);
   sax.putTokenization("token",tokenization);
   return sax;
}



More information about the biojava-dev mailing list