[Biojava-dev] First draft of a remote blast service class

James Carman james at carmanconsulting.com
Thu Jun 11 14:24:44 UTC 2009


Are we allowed to use JDK5?  Why not use enums rather than int codes?

On Thu, Jun 11, 2009 at 9:52 AM, Sylvain
Foisy<sylvain.foisy at diploide.net> wrote:
> Hi to all,
>
> I've been working on this for the past week or so and after discussing this
> with Andreas, I am putting my code here for critical review. I'll put this
> stuff in biojava-live as soon as Andreas can fix my SVN access.
>
> First, an interface called RemotePairwiseAlignementSerivce defines the basic
> components of a remote service: sequence/database/progam/run options/output
> options. RemoteQBlastService implements this interface and runs remote
> Qblast requests and creates output in either text, XML or HTML. At present
> time, regular blastall programs work, no blastpgp/megablast support yet.
>
> I'll need some guidance to make it work on other type of web services like
> EBI.
>
> Best regards
>
> Sylvain
>
> ===================================================================
>
>  Sylvain Foisy, Ph. D.
>  Consultant Bio-informatique / Bioinformatics
>  Diploide.net - TI pour la vie / IT for Life
>
>  Courriel: sylvain.foisy at diploide.net
>  Web: http://www.diploide.net
>  Tel: (514) 893-4363
> ===================================================================
>
> import java.io.InputStream;
>
> import org.biojava.bio.BioException;
> /**
>  * This interface specifies minimal information needed to execute a pairwise
> alignment on a remote service.
>  *
>  * Example of service: QBlast service at NCBI
>  *                     Web Service at EBI
>  *
>  * @author Sylvain Foisy
>  * @since 1.8
>  *
>  */
> public interface RemotePairwiseAlignementService {
>
>    /**
>     * This field specifies that the output format of results
>     * is text.
>     *
>     */
>    public static final int TEXT = 0;
>
>    /**
>     * This field specifies that the output format of results
>     * is XML.
>     *
>     */
>    public static final int XML = 1;
>
>    /**
>     * This field specifies that the output format of results
>     * is HTML.
>     *
>     */
>    public static final int HTML = 2;
>
>    /**
>     * Setting the database to use for doing the pairwise alignment
>     *
>     * @param db: a <code>String</code> with a valid database ID for the
> service used.
>     *
>     */
>    public void setDatabase(String db);
>
>    /**
>     * Setting the sequence to be align for this for this request
>     *
>     * @param seq: a <code>String</code> with a sequence to be aligned.
>     *
>     */
>    public void setSequence(String seq);
>
>    /**
>     * Setting the program to use for this pairwise alignment
>     *
>     * @param prog: a <code>String</code> with a valid database ID for the
> service used.
>     *
>     */
>    public void setProgram(String prog);
>
>    /**
>     * Setting all other options to use for this pairwise alignment
>     *
>     * @param db: a <code>String</code> with a valid database ID for the
> service used.
>     *
>     */
>    public void setAdvancedOptions(String str);
>
>    /**
>     * Doing the actual analysis on the instantiated service
>     *
>     * @throws BioException
>     */
>    public void executeSearch() throws BioException;
>
>    /**
>     * Getting the actual alignment results from this instantiated service
>     *
>     * @return : an <code>InputStream</code> with the actual alignment
> results
>     * @throws BioException
>     */
>    public InputStream getAlignmentResults() throws BioException;
> }
>
> import java.io.BufferedReader;
> import java.io.IOException;
> import java.io.InputStream;
> import java.io.InputStreamReader;
> import java.io.OutputStreamWriter;
> import java.net.MalformedURLException;
> import java.net.URL;
> import java.net.URLConnection;
>
> import org.biojava.bio.BioException;
>
> /**
>  * RemoteQBlastService - A simple way of submitting BLAST request to the
> QBlast
>  * service at NCBI.
>  *
>  * <p>
>  * NCBI provides a Blast server through a CGI-BIN interface.
> RemoteQBlastService simply
>  * encapsulates an access to it by giving users access to get/set methods to
> fix
>  * sequence, program and database as well as advanced options.
>  * </p>
>  *
>  * <p>
>  * As of version 1.0, only blastall programs are usable. blastpgp and
> megablast are high-priorities.
>  * </p>
>  *
>  * @author Sylvain Foisy
>  * @version 1.0
>  * @since 1.8
>  *
>  *
>  */
> public class RemoteQBlastService implements RemotePairwiseAlignementService{
>
> //    public static final int TEXT = 0;
> //    public static final int XML = 1;
> //    public static final int HTML = 2;
>
>    private static String baseurl =
> "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi";
>    private URL aUrl;
>    private URLConnection uConn;
>    private OutputStreamWriter fromQBlast;
>    private BufferedReader rd;
>
>    private String seq = null;
>    private String prog = null;
>    private String db = null;
>    private String outputFormat = null;
>    private String advanced = null;
>
>    private String rid;
>    private long step;
>    private boolean done = false;
>    private long start;
>
>    public RemoteQBlastService() throws BioException {
>        try {
>            aUrl = new URL(baseurl);
>            uConn = setQBlastProperties(aUrl.openConnection());
>
>            outputFormat = "Text";
>        }
>        /*
>         * Needed but should never be thrown since the URL is static and
> known to exist
>         */
>        catch (MalformedURLException e) {
>            throw new BioException("It looks like the URL for NCBI QBlast
> service is bad");
>        }
>        /*
>         * Intercept if the program can't connect to QBlast service
>         */
>        catch (IOException e) {
>            throw new BioException(
>                    "Impossible to connect to QBlast service at this time.
> Check your network connection");
>        }
>    }
>
>    /**
>     * This method execute the Blast request via the Put command of the
> CGI-BIN
>     * interface. It gets the estimated time of completion by capturing the
>     * value of the RTOE variable and sets a loop that will check for
> completion
>     * of analysis at intervals specified by RTOE.
>     *
>     * <p>
>     * It also capture the value for the RID variable, necessary for
> fetching
>     * the actual results after completion.
>     * </p>
>     *
>     * @throws BioException
>     *             if it is not possible to sent the BLAST command
>     */
>    public void executeSearch() throws BioException {
>
>        if (seq == null || db == null || prog == null) {
>            throw new BioException(
>                    "Impossible to execute QBlast request. One or more of
> seq|db|prog has not been set");
>        }
>        /*
>         * sending the command to execute the Blast analysis
>         */
>        String cmd = "CMD=Put&SERVICE=plain" + "&" + seq + "&" + prog + "&"
>                + db + "&" + "FORMAT_TYPE=HTML";
>
>        if (advanced != null) {
>            cmd += cmd + "&" + advanced;
>        }
>
>        try {
>
>            uConn = setQBlastProperties(aUrl.openConnection());
>
>            fromQBlast = new OutputStreamWriter(uConn.getOutputStream());
>
>            fromQBlast.write(cmd);
>            fromQBlast.flush();
>
>            // Get the response
>            rd = new BufferedReader(new InputStreamReader(uConn
>                    .getInputStream()));
>
>            String line = "";
>
>            while ((line = rd.readLine()) != null) {
>                if (line.contains("RID")) {
>                    String[] arr = line.split("=");
>                    rid = arr[1].trim();
>                } else if (line.contains("RTOE")) {
>                    String[] arr = line.split("=");
>                    step = Long.parseLong(arr[1].trim()) * 1000;
>                    start = System.currentTimeMillis() + step;
>                }
>            }
>        } catch (IOException e) {
>            throw new BioException(
>                    "Can't submit sequence to BLAST server at this time.");
>        }
>        /*
>         * Getting the info out of the NCBI system
>         */
>        while (!done) {
>            long prez = System.currentTimeMillis();
>            done = isReady(rid, prez);
>        }
>    }
>
>    /**
>     * <p>This method is used only for the executeBlastSearch method to
> check for completion of
>     * request using the NCBI specified RTOE variable</p>
>     *
>     * @param id
>     * @param present
>     * @return
>     */
>    private boolean isReady(String id, long present) {
>
>        boolean ready = false;
>        String check = "CMD=Get&RID=" + id;
>        /*
>         * If present time is less than the start of the search added to
> step
>         * obtained from NCBI, just do nothing ;-)
>         */
>        if (present < start) {
>            ;
>        }
>        /*
>         * If we are at least step seconds in the future from the actual
> call of
>         * method executeBlastSearch()
>         */
>        else {
>            try {
>                uConn = setQBlastProperties(aUrl.openConnection());
>
>                fromQBlast = new
> OutputStreamWriter(uConn.getOutputStream());
>                fromQBlast.write(check);
>                fromQBlast.flush();
>
>                rd = new BufferedReader(new InputStreamReader(uConn
>                        .getInputStream()));
>
>                String line = "";
>
>                while ((line = rd.readLine()) != null) {
>                    if (line.contains("READY")) {
>                        ready = true;
>                    } else if (line.contains("WAITING")) {
>                        /*
>                         * Else, move start forward in time...
>                         */
>                        start = present + step;
>                    }
>                }
>            } catch (IOException e) {
>                e.printStackTrace();
>            }
>        }
>        return ready;
>    }
>
>    /**
>     * <p>This method extracts this actual Blast report. The default format
> is Text but can be changed before with the method
>     * setQBlastOutputFormat.</p>
>     *
>     *
>     * @return
>     * @throws BioException
>     */
>    public InputStream getAlignmentResults() throws BioException {
>        String srid = "CMD=Get&RID=" + rid;
>        srid += "&FORMAT_TYPE=" + outputFormat;
>
>        if(!this.done){
>            throw new BioException("Unable to get report at this time. Your
> Blast request has not been processed yet.");
>        }
>
>        try {
>            uConn = setQBlastProperties(aUrl.openConnection());
>
>            fromQBlast = new OutputStreamWriter(uConn.getOutputStream());
>            fromQBlast.write(srid);
>            fromQBlast.flush();
>
>            return uConn.getInputStream();
>
>        } catch (IOException ioe) {
>            throw new BioException(
>                    "It is not possible to fetch Blast report from NCBI at
> this time");
>        }
>    }
>
>    /**
>     * <p>
>     * Set the sequence to be blasted using the String that correspond to
> the
>     * sequence.
>     * </p>
>     *
>     * <p>
>     * Take note that this method is mutually exclusive to setGIToBlast()
> for a
>     * given Blast request.
>     * </p>
>     *
>     * @param aStr
>     *            : a String with the sequence
>     */
>    public void setSequence(String aStr) {
>        this.seq = "QUERY=" + aStr;
>    }
>
>    /**
>     * Simply return a string with the blasted sequence.
>     *
>     * @return seq : a string with the sequence
>     */
>    public String getSeqToBlast() {
>        return this.seq;
>    }
>
>    /**
>     * <p>
>     * Set the sequence to be blasted using the NCBI GI value. At this time,
>     * there is no effort made to check the validity of this GI.
>     * </p>
>     *
>     * <p>
>     * Take note that this method is mutually exclusive to setSeqToBlast()
> for a
>     * given Blast request.
>     * </p>
>     *
>     * @param gi
>     *            : an integer value representing a NCBI GI
>     */
>    public void setGIToBlast(String gi) {
>        this.seq = "QUERY=" + gi;
>    }
>
>    /**
>     * <p>
>     * Simply return a string with the sequence blasted.
>     * </p>
>     *
>     * @return GI : a String with the GI of the blasted sequence
>     */
>    public String getGIToBlast() {
>        return this.seq;
>    }
>
>    /**
>     * <p>
>     * This method set the program to be used to blast the given
> sequence/GI. At
>     * this time, there is no attempt at checking the matching of sequence
> type
>     * to program.
>     * </p>
>     *
>     * @param prog
>     *            : a String representing the program specified for this
> QBlast
>     *            request.
>     *
>     */
>    public void setProgram(String prog) {
>        this.prog = "PROGRAM=" + prog;
>    }
>
>    /**
>     * <p>
>     * Simply returns the program used for the given Blast request.
>     * </p>
>     *
>     * @return prog : a String with the program used for this QBlast
> request.
>     */
>    public String getProgram() {
>        return this.prog;
>    }
>
>    /**
>     * <p>
>     * This method set the database to be used to blast the given
> sequence/GI.
>     * At this time, there is no attempt at checking the matching of
> sequence
>     * type to database.
>     * </p>
>     *
>     * @param db: a String for the database specified for this QBlast
> request
>     */
>    public void setDatabase(String db) {
>        this.db = "DATABASE=" + db;
>    }
>
>    /**
>     * <p>
>     * Simply returns the database used for the given Blast request.
>     * </p>
>     *
>     * @return db: a String with the database used for this QBlast request.
>     */
>    public String getBlastDatabase() {
>        return this.db;
>    }
>
>    /**
>     * <p>This method let the user specify which format to use for
> generating the output.</p>
>     *
>     * @param type:an integer taken from the static constant of this class,
> either be TEXT, XML or HTML
>     */
>    public void setQBlastOutputFormat(int type) {
>
>        switch (type) {
>            case 0:
>                this.outputFormat = "Text";
>                break;
>            case 1:
>                this.outputFormat = "XML";
>                break;
>            case 2:
>                this.outputFormat = "HTML";
>                break;
>        }
>    }
>
>    /**
>     * <p>
>     * Simply returns the output format used for the given Blast report.
>     * </p>
>     *
>     * @return outputFormat : a String with the format specified for the
> QBlast report.
>     */
>    public String getQBlastOutputFormat() {
>        return this.outputFormat;
>    }
>
>    /**
>     * <p>This method is to be used if a request is to use non-default
> values at submission. According to QBlast info,
>     * the accepted parameters for PUT requests are:</p>
>     *
>     * <ul>
>     * <li>-G: cost to create a gap. Default = 5 (nuc-nuc) / 11 (protein) /
> non-affine for megablast</li>
>     * <li>-E: Cost to extend a gap. Default = 2 (nuc-nuc) / 1 (protein) /
> non-affine for megablast</li>
>     * <li>-r: integer to reward for match. Default = 1</li>
>     * <li>-q: negative integer for penalty to allow mismatch. Default =
> -3</li>
>     * <li>-e: expectation value. Default = 10.0</li>
>     * <li>-W: word size. Default = 3 (proteins) / 11 (nuc-nuc) / 28
> (megablast)</li>
>     * <li>-y: dropoff for blast extensions in bits, using default if not
> specified. Default = 20 for blastn, 7 for all others
>     * (except megablast for which it is not applicable).</li>
>     * <li>-X: X dropoff value for gapped alignment, in bits. Default = 30
> for blastn/megablast, 15 for all others.</li>
>     * <li>-Z: final X dropoff value for gapped alignement, in bits. Default
> = 50 for blastn, 25 for all others
>     * (except megablast for which it is not applicable)</li>
>     * <li>-P: equals 0 for multiple hits 1-pass, 1 for single hit 1-pass.
> Does not apply to blastn ou megablast.</li>
>     * <li>-A: multiple hits window size. Default = 0 (for single hit
> algorithm)</li>
>     * <li>-I: number of database sequences to save hits for. Default =
> 500</li>
>     * <li>-Y: effective length of the search space. Default = 0 (0
> represents using the whole space)</li>
>     * <li>-z: a real specifying the effective length of the database to
> use. Default = 0 (0 represents the real size)</li>
>     * <li>-c: an integer representing pseudocount constant for PSI-BLAST.
> Default = 7</li>
>     * <li>-F: any filtering directive</li>
>     * </ul>
>     *
>     * <p>You have to be aware that at not moment is there any error
> checking on the use of these parameters by this class.</p>
>     * @param aStr: a String with any number of optional parameters with an
> associated value.
>     *
>     */
>    public void setAdvancedOptions(String aStr) {
>        this.advanced = "OTHER_ADVANCED=" + aStr;
>    }
>
>    /**
>     *
>     * Simply return the string given as argument via
> setBlastAdvancedOptions
>     *
>     * @return advanced: the string with the advanced options
>     */
>    public String getBlastAdvancedOptions() {
>        return this.advanced;
>    }
>
>    /**
>     *
>     * Simply return the QBlast RID for this specific QBlast request
>     *
>     * @return rid: the string with the RID
>     */
>    public String getBlastRID() {
>        return this.rid;
>    }
>
>    /**
>     * A simple method to check the availability of the QBlast service
>     *
>     * @throws BioException
>     */
>    public void printRemoteBlastInfo() throws BioException {
>        try {
>            OutputStreamWriter out = new OutputStreamWriter(uConn
>                    .getOutputStream());
>
>            out.write("CMD=Info");
>            out.flush();
>
>            // Get the response
>            BufferedReader rd = new BufferedReader(new
> InputStreamReader(uConn
>                    .getInputStream()));
>
>            String line = "";
>
>            while ((line = rd.readLine()) != null) {
>                System.out.println(line);
>            }
>
>            out.close();
>            rd.close();
>        } catch (IOException e) {
>            throw new BioException(
>                    "Impossible to get info from QBlast service at this
> time. Check your network connection");
>        }
>    }
>
>    private URLConnection setQBlastProperties(URLConnection conn) {
>
>        URLConnection tmp = conn;
>
>        conn.setDoOutput(true);
>        conn.setUseCaches(false);
>
>        tmp.setRequestProperty("User-Agent", "Biojava/RemoteQBlastService");
>        tmp.setRequestProperty("Connection", "Keep-Alive");
>        tmp.setRequestProperty("Content-type",
>                "application/x-www-form-urlencoded");
>        tmp.setRequestProperty("Content-length", "200");
>
>        return tmp;
>    }
> }
>
>
> _______________________________________________
> biojava-dev mailing list
> biojava-dev at lists.open-bio.org
> http://lists.open-bio.org/mailman/listinfo/biojava-dev
>




More information about the biojava-dev mailing list