[Biojava-dev] [Bug 2046] New: Cannot read in more than one serialized ProfileHMM object, attempt crashes

bugzilla-daemon at newportal.open-bio.org bugzilla-daemon at newportal.open-bio.org
Fri Jul 14 16:27:51 UTC 2006


http://bugzilla.open-bio.org/show_bug.cgi?id=2046

           Summary: Cannot read in more than one serialized ProfileHMM
                    object, attempt crashes
           Product: BioJava
           Version: unspecified
          Platform: PC
        OS/Version: Windows XP
            Status: NEW
          Severity: critical
          Priority: P1
         Component: dist/dp
        AssignedTo: biojava-dev at biojava.org
        ReportedBy: toddri at eden.rutgers.edu


The following small demo code illustrates the problem.  For me, the second
attempt of reading in the same serialized ProfileHMM object fails (with the
same error from running the code when the 2 serialized objects are different). 
Very repeatable.


------------------------------------------------------------------------

/*
 * SerializeProfile.java
 *
 * Created on April 27, 2006, 11:29 AM
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

//package hmm;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import org.biojava.bio.dist.DistributionFactory ;
import org.biojava.bio.dp.BaumWelchTrainer;
import org.biojava.bio.dp.DP;
import org.biojava.bio.dp.DPFactory;
import org.biojava.bio.dp.ModelTrainer;
import org.biojava.bio.dp.ProfileHMM;
import org.biojava.bio.dp.SimpleModelTrainer ;
import org.biojava.bio.dp.StoppingCriteria;
import org.biojava.bio.dp.TrainingAlgorithm;
import org.biojava.bio.seq.DNATools;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.db.HashSequenceDB ;
import org.biojava.bio.seq.db.SequenceDB;

/**
 *
 * @author Mark Schreiber
 */
public class SerializeProfile {

    /**
     * Creates a new instance of SerializeProfile
     */
    public SerializeProfile() {
    }

    public void testSerialize(ProfileHMM hmm) throws Exception{
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        ObjectOutputStream oos = new ObjectOutputStream(bos);
        System.out.println("Writing HMM");
        oos.writeObject(hmm);
        oos.flush();
        System.out.println("Wrote "+bos.size()+" bytes");
        oos.close();

        ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
        ObjectInputStream ois = new ObjectInputStream(bis);
        System.out.println("Reading HMM");
        ProfileHMM hmm2 = (ProfileHMM)ois.readObject();
        ois.close();
        System.out.println("Read HMM");

        ByteArrayInputStream bis2 = new
ByteArrayInputStream(bos.toByteArray());
        ObjectInputStream ois2 = new ObjectInputStream(bis2);
        System.out.println("Reading HMM again!");
        ProfileHMM hmm3 = (ProfileHMM)ois2.readObject();
        ois2.close();
        System.out.println("Read HMM again.");
    }

    public ProfileHMM generateProfile() throws Exception{
               /*
                * Make a profile HMM over the DNA Alphabet with 12 'columns'
and default
                * DistributionFactories to construct the transition and
emmission
                * Distributions
                */
        ProfileHMM hmm = new ProfileHMM(DNATools.getDNA(),
                12,
                DistributionFactory.DEFAULT,
                DistributionFactory.DEFAULT,
                "my profilehmm");

        //create the Dynamic Programming matrix for the model.
        DP dp = DPFactory.DEFAULT.createDP(hmm);

        //Database to hold the training set
        SequenceDB db = new HashSequenceDB();

        //code here to load the training set
        Sequence seq = DNATools.createDNASequence("aaaggctagctg", "seq1");
        db.addSequence(seq);
        seq = DNATools.createDNASequence("aaaggcgagctg", "seq2");
        db.addSequence(seq);
        seq = DNATools.createDNASequence("aaattctagctg", "seq3");
        db.addSequence(seq);

        //train the model to have uniform parameters
        ModelTrainer mt = new SimpleModelTrainer();
        //register the model to train
        mt.registerModel(hmm);
        //as no other counts are being used the null weight will cause
everything to be uniform
        mt.setNullModelWeight(1.0);
        mt.train();

        //create a BW trainer for the dp matrix generated from the HMM
        BaumWelchTrainer bwt = new BaumWelchTrainer(dp);

        //anonymous implementation of the stopping criteria interface to stop
after 20 iterations
        StoppingCriteria stopper = new StoppingCriteria(){
            public boolean isTrainingComplete(TrainingAlgorithm ta){
                return (ta.getCycle() > 20);
            }
        };

       /*
        * optimize the dp matrix to reflect the training set in db using a null
model
        * weight of 1.0 and the Stopping criteria defined above.
        */
        bwt.train(db,1.0,stopper);


        return hmm;
    }

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws Exception{
        SerializeProfile sp = new SerializeProfile();
        ProfileHMM hmm = sp.generateProfile();
        sp.testSerialize(hmm);
    }

}


-- 
Configure bugmail: http://bugzilla.open-bio.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug, or are watching the assignee.



More information about the biojava-dev mailing list