[Biojava-dev] First draft of a remote blast service class
James Carman
james at carmanconsulting.com
Thu Jun 11 14:24:44 UTC 2009
Are we allowed to use JDK5? Why not use enums rather than int codes?
On Thu, Jun 11, 2009 at 9:52 AM, Sylvain
Foisy<sylvain.foisy at diploide.net> wrote:
> Hi to all,
>
> I've been working on this for the past week or so and after discussing this
> with Andreas, I am putting my code here for critical review. I'll put this
> stuff in biojava-live as soon as Andreas can fix my SVN access.
>
> First, an interface called RemotePairwiseAlignementSerivce defines the basic
> components of a remote service: sequence/database/progam/run options/output
> options. RemoteQBlastService implements this interface and runs remote
> Qblast requests and creates output in either text, XML or HTML. At present
> time, regular blastall programs work, no blastpgp/megablast support yet.
>
> I'll need some guidance to make it work on other type of web services like
> EBI.
>
> Best regards
>
> Sylvain
>
> ===================================================================
>
> Sylvain Foisy, Ph. D.
> Consultant Bio-informatique / Bioinformatics
> Diploide.net - TI pour la vie / IT for Life
>
> Courriel: sylvain.foisy at diploide.net
> Web: http://www.diploide.net
> Tel: (514) 893-4363
> ===================================================================
>
> import java.io.InputStream;
>
> import org.biojava.bio.BioException;
> /**
> * This interface specifies minimal information needed to execute a pairwise
> alignment on a remote service.
> *
> * Example of service: QBlast service at NCBI
> * Web Service at EBI
> *
> * @author Sylvain Foisy
> * @since 1.8
> *
> */
> public interface RemotePairwiseAlignementService {
>
> /**
> * This field specifies that the output format of results
> * is text.
> *
> */
> public static final int TEXT = 0;
>
> /**
> * This field specifies that the output format of results
> * is XML.
> *
> */
> public static final int XML = 1;
>
> /**
> * This field specifies that the output format of results
> * is HTML.
> *
> */
> public static final int HTML = 2;
>
> /**
> * Setting the database to use for doing the pairwise alignment
> *
> * @param db: a <code>String</code> with a valid database ID for the
> service used.
> *
> */
> public void setDatabase(String db);
>
> /**
> * Setting the sequence to be align for this for this request
> *
> * @param seq: a <code>String</code> with a sequence to be aligned.
> *
> */
> public void setSequence(String seq);
>
> /**
> * Setting the program to use for this pairwise alignment
> *
> * @param prog: a <code>String</code> with a valid database ID for the
> service used.
> *
> */
> public void setProgram(String prog);
>
> /**
> * Setting all other options to use for this pairwise alignment
> *
> * @param db: a <code>String</code> with a valid database ID for the
> service used.
> *
> */
> public void setAdvancedOptions(String str);
>
> /**
> * Doing the actual analysis on the instantiated service
> *
> * @throws BioException
> */
> public void executeSearch() throws BioException;
>
> /**
> * Getting the actual alignment results from this instantiated service
> *
> * @return : an <code>InputStream</code> with the actual alignment
> results
> * @throws BioException
> */
> public InputStream getAlignmentResults() throws BioException;
> }
>
> import java.io.BufferedReader;
> import java.io.IOException;
> import java.io.InputStream;
> import java.io.InputStreamReader;
> import java.io.OutputStreamWriter;
> import java.net.MalformedURLException;
> import java.net.URL;
> import java.net.URLConnection;
>
> import org.biojava.bio.BioException;
>
> /**
> * RemoteQBlastService - A simple way of submitting BLAST request to the
> QBlast
> * service at NCBI.
> *
> * <p>
> * NCBI provides a Blast server through a CGI-BIN interface.
> RemoteQBlastService simply
> * encapsulates an access to it by giving users access to get/set methods to
> fix
> * sequence, program and database as well as advanced options.
> * </p>
> *
> * <p>
> * As of version 1.0, only blastall programs are usable. blastpgp and
> megablast are high-priorities.
> * </p>
> *
> * @author Sylvain Foisy
> * @version 1.0
> * @since 1.8
> *
> *
> */
> public class RemoteQBlastService implements RemotePairwiseAlignementService{
>
> // public static final int TEXT = 0;
> // public static final int XML = 1;
> // public static final int HTML = 2;
>
> private static String baseurl =
> "http://www.ncbi.nlm.nih.gov/blast/Blast.cgi";
> private URL aUrl;
> private URLConnection uConn;
> private OutputStreamWriter fromQBlast;
> private BufferedReader rd;
>
> private String seq = null;
> private String prog = null;
> private String db = null;
> private String outputFormat = null;
> private String advanced = null;
>
> private String rid;
> private long step;
> private boolean done = false;
> private long start;
>
> public RemoteQBlastService() throws BioException {
> try {
> aUrl = new URL(baseurl);
> uConn = setQBlastProperties(aUrl.openConnection());
>
> outputFormat = "Text";
> }
> /*
> * Needed but should never be thrown since the URL is static and
> known to exist
> */
> catch (MalformedURLException e) {
> throw new BioException("It looks like the URL for NCBI QBlast
> service is bad");
> }
> /*
> * Intercept if the program can't connect to QBlast service
> */
> catch (IOException e) {
> throw new BioException(
> "Impossible to connect to QBlast service at this time.
> Check your network connection");
> }
> }
>
> /**
> * This method execute the Blast request via the Put command of the
> CGI-BIN
> * interface. It gets the estimated time of completion by capturing the
> * value of the RTOE variable and sets a loop that will check for
> completion
> * of analysis at intervals specified by RTOE.
> *
> * <p>
> * It also capture the value for the RID variable, necessary for
> fetching
> * the actual results after completion.
> * </p>
> *
> * @throws BioException
> * if it is not possible to sent the BLAST command
> */
> public void executeSearch() throws BioException {
>
> if (seq == null || db == null || prog == null) {
> throw new BioException(
> "Impossible to execute QBlast request. One or more of
> seq|db|prog has not been set");
> }
> /*
> * sending the command to execute the Blast analysis
> */
> String cmd = "CMD=Put&SERVICE=plain" + "&" + seq + "&" + prog + "&"
> + db + "&" + "FORMAT_TYPE=HTML";
>
> if (advanced != null) {
> cmd += cmd + "&" + advanced;
> }
>
> try {
>
> uConn = setQBlastProperties(aUrl.openConnection());
>
> fromQBlast = new OutputStreamWriter(uConn.getOutputStream());
>
> fromQBlast.write(cmd);
> fromQBlast.flush();
>
> // Get the response
> rd = new BufferedReader(new InputStreamReader(uConn
> .getInputStream()));
>
> String line = "";
>
> while ((line = rd.readLine()) != null) {
> if (line.contains("RID")) {
> String[] arr = line.split("=");
> rid = arr[1].trim();
> } else if (line.contains("RTOE")) {
> String[] arr = line.split("=");
> step = Long.parseLong(arr[1].trim()) * 1000;
> start = System.currentTimeMillis() + step;
> }
> }
> } catch (IOException e) {
> throw new BioException(
> "Can't submit sequence to BLAST server at this time.");
> }
> /*
> * Getting the info out of the NCBI system
> */
> while (!done) {
> long prez = System.currentTimeMillis();
> done = isReady(rid, prez);
> }
> }
>
> /**
> * <p>This method is used only for the executeBlastSearch method to
> check for completion of
> * request using the NCBI specified RTOE variable</p>
> *
> * @param id
> * @param present
> * @return
> */
> private boolean isReady(String id, long present) {
>
> boolean ready = false;
> String check = "CMD=Get&RID=" + id;
> /*
> * If present time is less than the start of the search added to
> step
> * obtained from NCBI, just do nothing ;-)
> */
> if (present < start) {
> ;
> }
> /*
> * If we are at least step seconds in the future from the actual
> call of
> * method executeBlastSearch()
> */
> else {
> try {
> uConn = setQBlastProperties(aUrl.openConnection());
>
> fromQBlast = new
> OutputStreamWriter(uConn.getOutputStream());
> fromQBlast.write(check);
> fromQBlast.flush();
>
> rd = new BufferedReader(new InputStreamReader(uConn
> .getInputStream()));
>
> String line = "";
>
> while ((line = rd.readLine()) != null) {
> if (line.contains("READY")) {
> ready = true;
> } else if (line.contains("WAITING")) {
> /*
> * Else, move start forward in time...
> */
> start = present + step;
> }
> }
> } catch (IOException e) {
> e.printStackTrace();
> }
> }
> return ready;
> }
>
> /**
> * <p>This method extracts this actual Blast report. The default format
> is Text but can be changed before with the method
> * setQBlastOutputFormat.</p>
> *
> *
> * @return
> * @throws BioException
> */
> public InputStream getAlignmentResults() throws BioException {
> String srid = "CMD=Get&RID=" + rid;
> srid += "&FORMAT_TYPE=" + outputFormat;
>
> if(!this.done){
> throw new BioException("Unable to get report at this time. Your
> Blast request has not been processed yet.");
> }
>
> try {
> uConn = setQBlastProperties(aUrl.openConnection());
>
> fromQBlast = new OutputStreamWriter(uConn.getOutputStream());
> fromQBlast.write(srid);
> fromQBlast.flush();
>
> return uConn.getInputStream();
>
> } catch (IOException ioe) {
> throw new BioException(
> "It is not possible to fetch Blast report from NCBI at
> this time");
> }
> }
>
> /**
> * <p>
> * Set the sequence to be blasted using the String that correspond to
> the
> * sequence.
> * </p>
> *
> * <p>
> * Take note that this method is mutually exclusive to setGIToBlast()
> for a
> * given Blast request.
> * </p>
> *
> * @param aStr
> * : a String with the sequence
> */
> public void setSequence(String aStr) {
> this.seq = "QUERY=" + aStr;
> }
>
> /**
> * Simply return a string with the blasted sequence.
> *
> * @return seq : a string with the sequence
> */
> public String getSeqToBlast() {
> return this.seq;
> }
>
> /**
> * <p>
> * Set the sequence to be blasted using the NCBI GI value. At this time,
> * there is no effort made to check the validity of this GI.
> * </p>
> *
> * <p>
> * Take note that this method is mutually exclusive to setSeqToBlast()
> for a
> * given Blast request.
> * </p>
> *
> * @param gi
> * : an integer value representing a NCBI GI
> */
> public void setGIToBlast(String gi) {
> this.seq = "QUERY=" + gi;
> }
>
> /**
> * <p>
> * Simply return a string with the sequence blasted.
> * </p>
> *
> * @return GI : a String with the GI of the blasted sequence
> */
> public String getGIToBlast() {
> return this.seq;
> }
>
> /**
> * <p>
> * This method set the program to be used to blast the given
> sequence/GI. At
> * this time, there is no attempt at checking the matching of sequence
> type
> * to program.
> * </p>
> *
> * @param prog
> * : a String representing the program specified for this
> QBlast
> * request.
> *
> */
> public void setProgram(String prog) {
> this.prog = "PROGRAM=" + prog;
> }
>
> /**
> * <p>
> * Simply returns the program used for the given Blast request.
> * </p>
> *
> * @return prog : a String with the program used for this QBlast
> request.
> */
> public String getProgram() {
> return this.prog;
> }
>
> /**
> * <p>
> * This method set the database to be used to blast the given
> sequence/GI.
> * At this time, there is no attempt at checking the matching of
> sequence
> * type to database.
> * </p>
> *
> * @param db: a String for the database specified for this QBlast
> request
> */
> public void setDatabase(String db) {
> this.db = "DATABASE=" + db;
> }
>
> /**
> * <p>
> * Simply returns the database used for the given Blast request.
> * </p>
> *
> * @return db: a String with the database used for this QBlast request.
> */
> public String getBlastDatabase() {
> return this.db;
> }
>
> /**
> * <p>This method let the user specify which format to use for
> generating the output.</p>
> *
> * @param type:an integer taken from the static constant of this class,
> either be TEXT, XML or HTML
> */
> public void setQBlastOutputFormat(int type) {
>
> switch (type) {
> case 0:
> this.outputFormat = "Text";
> break;
> case 1:
> this.outputFormat = "XML";
> break;
> case 2:
> this.outputFormat = "HTML";
> break;
> }
> }
>
> /**
> * <p>
> * Simply returns the output format used for the given Blast report.
> * </p>
> *
> * @return outputFormat : a String with the format specified for the
> QBlast report.
> */
> public String getQBlastOutputFormat() {
> return this.outputFormat;
> }
>
> /**
> * <p>This method is to be used if a request is to use non-default
> values at submission. According to QBlast info,
> * the accepted parameters for PUT requests are:</p>
> *
> * <ul>
> * <li>-G: cost to create a gap. Default = 5 (nuc-nuc) / 11 (protein) /
> non-affine for megablast</li>
> * <li>-E: Cost to extend a gap. Default = 2 (nuc-nuc) / 1 (protein) /
> non-affine for megablast</li>
> * <li>-r: integer to reward for match. Default = 1</li>
> * <li>-q: negative integer for penalty to allow mismatch. Default =
> -3</li>
> * <li>-e: expectation value. Default = 10.0</li>
> * <li>-W: word size. Default = 3 (proteins) / 11 (nuc-nuc) / 28
> (megablast)</li>
> * <li>-y: dropoff for blast extensions in bits, using default if not
> specified. Default = 20 for blastn, 7 for all others
> * (except megablast for which it is not applicable).</li>
> * <li>-X: X dropoff value for gapped alignment, in bits. Default = 30
> for blastn/megablast, 15 for all others.</li>
> * <li>-Z: final X dropoff value for gapped alignement, in bits. Default
> = 50 for blastn, 25 for all others
> * (except megablast for which it is not applicable)</li>
> * <li>-P: equals 0 for multiple hits 1-pass, 1 for single hit 1-pass.
> Does not apply to blastn ou megablast.</li>
> * <li>-A: multiple hits window size. Default = 0 (for single hit
> algorithm)</li>
> * <li>-I: number of database sequences to save hits for. Default =
> 500</li>
> * <li>-Y: effective length of the search space. Default = 0 (0
> represents using the whole space)</li>
> * <li>-z: a real specifying the effective length of the database to
> use. Default = 0 (0 represents the real size)</li>
> * <li>-c: an integer representing pseudocount constant for PSI-BLAST.
> Default = 7</li>
> * <li>-F: any filtering directive</li>
> * </ul>
> *
> * <p>You have to be aware that at not moment is there any error
> checking on the use of these parameters by this class.</p>
> * @param aStr: a String with any number of optional parameters with an
> associated value.
> *
> */
> public void setAdvancedOptions(String aStr) {
> this.advanced = "OTHER_ADVANCED=" + aStr;
> }
>
> /**
> *
> * Simply return the string given as argument via
> setBlastAdvancedOptions
> *
> * @return advanced: the string with the advanced options
> */
> public String getBlastAdvancedOptions() {
> return this.advanced;
> }
>
> /**
> *
> * Simply return the QBlast RID for this specific QBlast request
> *
> * @return rid: the string with the RID
> */
> public String getBlastRID() {
> return this.rid;
> }
>
> /**
> * A simple method to check the availability of the QBlast service
> *
> * @throws BioException
> */
> public void printRemoteBlastInfo() throws BioException {
> try {
> OutputStreamWriter out = new OutputStreamWriter(uConn
> .getOutputStream());
>
> out.write("CMD=Info");
> out.flush();
>
> // Get the response
> BufferedReader rd = new BufferedReader(new
> InputStreamReader(uConn
> .getInputStream()));
>
> String line = "";
>
> while ((line = rd.readLine()) != null) {
> System.out.println(line);
> }
>
> out.close();
> rd.close();
> } catch (IOException e) {
> throw new BioException(
> "Impossible to get info from QBlast service at this
> time. Check your network connection");
> }
> }
>
> private URLConnection setQBlastProperties(URLConnection conn) {
>
> URLConnection tmp = conn;
>
> conn.setDoOutput(true);
> conn.setUseCaches(false);
>
> tmp.setRequestProperty("User-Agent", "Biojava/RemoteQBlastService");
> tmp.setRequestProperty("Connection", "Keep-Alive");
> tmp.setRequestProperty("Content-type",
> "application/x-www-form-urlencoded");
> tmp.setRequestProperty("Content-length", "200");
>
> return tmp;
> }
> }
>
>
> _______________________________________________
> biojava-dev mailing list
> biojava-dev at lists.open-bio.org
> http://lists.open-bio.org/mailman/listinfo/biojava-dev
>
More information about the biojava-dev
mailing list