[Biojava-l] Re: OrderNDistributions
Russell Smithies
russell.smithies at xtra.co.nz
Sun Feb 23 11:39:57 EST 2003
> Hi,
>
> Does anyone have some sample code on how to use the higher order
> distributions? I'm not quite sure on how to make the count works there.
>
> Also, is there any code to evaluate the probability of a sequence given
> a distribution? If not, where should it be added (which class) if I was
> to put it in there?
>
> Thanks,
>
> François
>
> "I know life isn't fair, but why can't it ever be un-fair in MY favor!?"
> Calvin (Calvin & Hobbes)
here's a bit of code to show how to create a custom Alphabet and an
OrderNDistribution on it
(apologies to any dwarfs, I got sick of the Dice Alphabet being used as a
demo)
----------------------------------------------------------------------------
-----------------------
import java.util.*;
import org.biojava.bio.*;
import org.biojava.bio.dist.*;
import org.biojava.bio.symbol.*;
import org.biojava.utils.*;
import java.io.FileOutputStream;
public class DistTest {
public static void main(String[] args) throws Exception{
//create a custom dwarf alphabet
String[] dNames = {
"Grumpy", "Sleepy", "Dopey", "Doc", "Happy", "Sneezy", "Bashful"
};
Symbol[] dwarfs = new Symbol[7];
SimpleAlphabet dwarfAlphabet = new SimpleAlphabet();
dwarfAlphabet.setName("Dwarf");
for (int i = 1; i <= 7; i++) {
try {
dwarfs[i - 1] = AlphabetManager.createSymbol((char) ('0' +
i),
"" + dNames[i - 1],
Annotation.EMPTY_ANNOTATION);
dwarfAlphabet.addSymbol(dwarfs[i - 1]);
} catch (Exception e) {
throw new NestedError(e, "Can't create symbols to represent
dwarf");
}
}
//create the OrderNDistribution
int order = 3;
Alphabet a =
AlphabetManager.getCrossProductAlphabet(Collections.nCopies(order,
dwarfAlphabet));
OrderNDistribution ond = (OrderNDistribution)
OrderNDistributionFactory.DEFAULT.createDistribution(a);
//create DistributionTrainer
DistributionTrainerContext dtc = new
SimpleDistributionTrainerContext();
//register the dist with the trainer
dtc.registerDistribution(ond);
//create a random symbolList of dwarves
UniformDistribution udist = new UniformDistribution((FiniteAlphabet)
dwarfAlphabet);
int size = 102;
List list = list = new ArrayList();
for (int i = 0; i < size; i++) {
list.add(udist.sampleSymbol());
}
//create a symbolList
SymbolList symbl = new SimpleSymbolList(dwarfAlphabet, list);
//make it into an orderNSymbolList
symbl = SymbolListViews.orderNSymbolList(symbl, order);
//or you could have a windowed symbolList
//symbl = SymbolListViews.windowedSymbolList(symbl, order);
//check the distributin alphabet
System.out.println(ond.getAlphabet());
//((Dwarf x Dwarf) x Dwarf)
//add counts to the distribution
for (Iterator i = symbl.iterator(); i.hasNext();) {
try {
Symbol s = (Symbol) i.next();
// System.out.println(s.getName());
dtc.addCount(ond, s, 1.0);
} catch (IllegalSymbolException ex) {
}
}
// don't forget to train or none of your weights will be added
dtc.train();
//write the distribution to XML
XMLDistributionWriter writer = new XMLDistributionWriter();
writer.writeDistribution(ond, new FileOutputStream("dwarf.xml"));
/*
<?xml version="1.0" ?>
- <Distribution type="OrderNDistribution">
<alphabet name="((Dwarf x Dwarf) x Dwarf)" />
- <conditioning_symbol name="(Grumpy Happy)">
<weight sym="Happy" prob="0.0" />
<weight sym="Sleepy" prob="0.3333333333333333" />
<weight sym="Grumpy" prob="0.6666666666666666" />
<weight sym="Sneezy" prob="0.0" />
<weight sym="Dopey" prob="0.0" />
<weight sym="Doc" prob="0.0" />
<weight sym="Bashful" prob="0.0" />
</conditioning_symbol>
- <conditioning_symbol name="(Dopey Dopey)">
<weight sym="Happy" prob="0.0" />
<weight sym="Sleepy" prob="0.0" />
<weight sym="Grumpy" prob="0.0" />
<weight sym="Sneezy" prob="0.0" />
<weight sym="Dopey" prob="0.0" />
<weight sym="Doc" prob="0.0" />
<weight sym="Bashful" prob="1.0" />
</conditioning_symbol>
- <conditioning_symbol name="(Sneezy Grumpy)">
<weight sym="Happy" prob="0.0" />
*/
}
}
----------------------------------------------------------------------
More information about the Biojava-l
mailing list