[Biojava-l] Re: OrderNDistributions

Russell Smithies russell.smithies at xtra.co.nz
Sun Feb 23 11:39:57 EST 2003


> Hi,
>
> Does anyone have some sample code on how to use the higher order
> distributions? I'm not quite sure on how to make the count works there.
>
> Also, is there any code to evaluate the probability of a sequence given
> a distribution? If not, where should it be added (which class) if I was
> to put it in there?
>
> Thanks,
>
> François
>
> "I know life isn't fair, but why can't it ever be un-fair in MY favor!?"
>                   Calvin (Calvin & Hobbes)

here's a bit of code to show how to create a custom Alphabet and an
OrderNDistribution on it
(apologies to any dwarfs, I got sick of the Dice Alphabet being used as a
demo)
----------------------------------------------------------------------------
-----------------------
import java.util.*;

import org.biojava.bio.*;
import org.biojava.bio.dist.*;
import org.biojava.bio.symbol.*;
import org.biojava.utils.*;
import java.io.FileOutputStream;


public class DistTest {
    public static void main(String[] args) throws Exception{

      //create a custom dwarf alphabet
        String[] dNames = {
            "Grumpy", "Sleepy", "Dopey", "Doc", "Happy", "Sneezy", "Bashful"
        };
        Symbol[] dwarfs = new Symbol[7];
        SimpleAlphabet dwarfAlphabet = new SimpleAlphabet();
        dwarfAlphabet.setName("Dwarf");

        for (int i = 1; i <= 7; i++) {
            try {
                dwarfs[i - 1] = AlphabetManager.createSymbol((char) ('0' +
i),
                                                "" +  dNames[i - 1],
                           Annotation.EMPTY_ANNOTATION);
                dwarfAlphabet.addSymbol(dwarfs[i - 1]);
            } catch (Exception e) {
                throw new NestedError(e, "Can't create symbols to represent
dwarf");
            }
        }

 //create the OrderNDistribution
        int order = 3;

        Alphabet a =
AlphabetManager.getCrossProductAlphabet(Collections.nCopies(order,
dwarfAlphabet));

        OrderNDistribution  ond = (OrderNDistribution)
OrderNDistributionFactory.DEFAULT.createDistribution(a);

 //create DistributionTrainer
        DistributionTrainerContext dtc = new
SimpleDistributionTrainerContext();

 //register the dist with the trainer
        dtc.registerDistribution(ond);

 //create a random symbolList of dwarves
        UniformDistribution udist = new UniformDistribution((FiniteAlphabet)
dwarfAlphabet);

        int size = 102;
        List list = list = new ArrayList();

        for (int i = 0; i < size; i++) {
            list.add(udist.sampleSymbol());
        }


       //create a symbolList
        SymbolList symbl = new SimpleSymbolList(dwarfAlphabet, list);

      //make it into an orderNSymbolList
      symbl = SymbolListViews.orderNSymbolList(symbl, order);

     //or you could have a windowed symbolList
     //symbl = SymbolListViews.windowedSymbolList(symbl, order);


        //check the distributin alphabet
 System.out.println(ond.getAlphabet());
 //((Dwarf x Dwarf) x Dwarf)


 //add counts to the distribution
        for (Iterator i = symbl.iterator(); i.hasNext();) {
            try {
                Symbol s = (Symbol) i.next();
             //   System.out.println(s.getName());
                dtc.addCount(ond, s, 1.0);
            } catch (IllegalSymbolException ex) {
            }
        }

        // don't forget to train or none of your weights will be added
 dtc.train();

 //write the distribution to XML
        XMLDistributionWriter writer = new XMLDistributionWriter();

 writer.writeDistribution(ond, new FileOutputStream("dwarf.xml"));

 /*

<?xml version="1.0" ?>
- <Distribution type="OrderNDistribution">
  <alphabet name="((Dwarf x Dwarf) x Dwarf)" />
    - <conditioning_symbol name="(Grumpy Happy)">
 <weight sym="Happy" prob="0.0" />
 <weight sym="Sleepy" prob="0.3333333333333333" />
 <weight sym="Grumpy" prob="0.6666666666666666" />
 <weight sym="Sneezy" prob="0.0" />
 <weight sym="Dopey" prob="0.0" />
 <weight sym="Doc" prob="0.0" />
 <weight sym="Bashful" prob="0.0" />
 </conditioning_symbol>
    - <conditioning_symbol name="(Dopey Dopey)">
 <weight sym="Happy" prob="0.0" />
 <weight sym="Sleepy" prob="0.0" />
 <weight sym="Grumpy" prob="0.0" />
 <weight sym="Sneezy" prob="0.0" />
 <weight sym="Dopey" prob="0.0" />
 <weight sym="Doc" prob="0.0" />
 <weight sym="Bashful" prob="1.0" />
 </conditioning_symbol>
    - <conditioning_symbol name="(Sneezy Grumpy)">
<weight sym="Happy" prob="0.0" />

 */
    }
}
----------------------------------------------------------------------



More information about the Biojava-l mailing list