[Biojava-l] Help on NCBIQBlastService and BlastXMLQuery

Shakuntala Baichoo shakunb at uom.ac.mu
Thu Jun 30 19:20:41 UTC 2011


Hi!
Grateful If anybody could help me with NCBIQBlastService

I need to blast a set (in this case only 2) of nucleotide sequences and I am
using Biojava3's NCBIQBlastService. I direct the results in xml files and
try to parse that xml file so as to get all the results, in terms of %
match, e-value etc... But I am only getting the reference of the sequences
that have matched, as follows:
...........
trying to get BLAST results for RID 0TJFFD5E01S
Jun 30, 2011 11:10:03 PM org.biojava3.genome.query.
BlastXMLQuery <init>
INFO: Start read of 0TJFFD5E01SResults_XML.xml
Jun 30, 2011 11:10:03 PM org.biojava3.genome.query.BlastXMLQuery <init>
INFO: Read finished
Jun 30, 2011 11:10:03 PM org.biojava3.genome.query.BlastXMLQuery
getHitsQueryDef
INFO: Query for hits
Jun 30, 2011 11:10:03 PM org.biojava3.genome.query.BlastXMLQuery
getHitsQueryDef
INFO: 1 hits
[CP002614, CP002487, FQ312003, CP001363, FN424405, CP000857, AE006468,
AE017220, CP001138, CP001127, AM933172, AM933173, CP001144, FM200053,
CP001120, CP001113, CP000886, CP000026, FR775193, AE014613, AL627266]
***********************************************
trying to get BLAST results for RID 0TJFHZV201S
Jun 30, 2011 11:10:27 PM org.biojava3.genome.query.BlastXMLQuery <init>
INFO: Start read of 0TJFHZV201SResults_XML.xml
Jun 30, 2011 11:10:27 PM org.biojava3.genome.query.BlastXMLQuery <init>
INFO: Read finished
Jun 30, 2011 11:10:27 PM org.biojava3.genome.query.BlastXMLQuery
getHitsQueryDef
INFO: Query for hits
Jun 30, 2011 11:10:27 PM org.biojava3.genome.query.BlastXMLQuery
getHitsQueryDef
INFO: 1 hits
[CP002614, CP002487, AP011957, FQ312003, CP001363, FN424405, AE006468,
L19338, CP001113, CP000857, CP001138, AE017220, CP001120, CP000886,
FR775195, AM933172, FM200053, AM933173, CP000026, CP001144, CP001127,
AE014613, AL627267, M90677, CP000822]
BUILD SUCCESSFUL (total time: 54 seconds)


Note that when I open the generated xml file, it does contain all the
results. Any idea how to extract all the info. Please...


Here's the sample program:
--------------------------------
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */

package BlastPackage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import org.biojava3.core.sequence.DNASequence;
import org.biojava3.genome.query.BlastXMLQuery;
import org.biojava3.core.sequence.ProteinSequence;
import org.biojava3.core.sequence.compound.AmbiguityDNACompoundSet;
import org.biojava3.core.sequence.compound.NucleotideCompound;
import org.biojava3.core.sequence.io.DNASequenceCreator;
import org.biojava3.core.sequence.io.FastaReader;
import org.biojava3.core.sequence.io.FastaReaderHelper;
import org.biojava3.core.sequence.io.GenericFastaHeaderParser;
import org.biojava3.ws.alignment.qblast.NCBIQBlastService;
import org.biojava3.ws.alignment.qblast.NCBIQBlastAlignmentProperties;
import org.biojava3.ws.alignment.qblast.NCBIQBlastOutputProperties;
import org.biojava3.ws.alignment.qblast.NCBIQBlastOutputFormat;

import org.biojava.bio.program.sax.*;
import org.biojava.bio.program.ssbind.*;
import org.biojava.bio.search.*;
import org.biojava.bio.seq.db.*;
import org.xml.sax.*;
import org.biojava.bio.*;


public class NCBIQBlastServiceTest {
    /**
     * The program take only a string with a path toward a sequence file
     *
     * For this example, I keep it simple with a single FASTA formatted file
     *
     */
    public static void main(String[] args) {

              NCBIQBlastService rbw;
              NCBIQBlastAlignmentProperties rqb;
              NCBIQBlastOutputProperties rof;
              InputStream is = null;
              ArrayList<String> rid = new ArrayList<String>();

        try {

            // Let's capture the sequences in a file...
            //LinkedHashMap<String, DNASequence> a =
FastaReaderHelper.readFastaDNASequence(new File("TestBlast.fas"));
                        FileInputStream inStream = new FileInputStream(
"TestBlast.fas" );
                        FastaReader<DNASequence,NucleotideCompound>
fastaReader =
                        new FastaReader<DNASequence,NucleotideCompound>(
                    inStream,
                    new
GenericFastaHeaderParser<DNASequence,NucleotideCompound>(),
                    new
DNASequenceCreator(AmbiguityDNACompoundSet.getDNACompoundSet()));
                        LinkedHashMap<String, DNASequence> b =
fastaReader.process();
                                /*
                                 * You would imagine that one would blast a
bunch of sequences of
                                 * identical nature with identical
parameters...
                                 */
                        rbw = new NCBIQBlastService();
                        rqb = new NCBIQBlastAlignmentProperties();

                        rqb.NCBIQBlastAlignmentProperties();

                        rqb.setBlastProgram("blastn");
                        rqb.setBlastDatabase("nr");

                        /*
                         * First, let's send all the sequences to the QBlast
service and
                         * keep the RID for fetching the results at some
later moments
                         * (actually, in a few seconds :-))
                         *
                         * Using a data structure to keep track of all
request IDs is a good
                         * practice.
                         *
                         */
                        for (Entry<String, DNASequence> entry :
b.entrySet()) {
                                System.out.println(
entry.getValue().getOriginalHeader() + "\n");
                                String s = entry.getValue().toString();
                                //System.out.println("Query Sequence:");
                                System.out.println(s);
                                String request =
rbw.sendAlignmentRequest(s,rqb);
                                //request=rbw.
                                rid.add(request);
                        }

                        /*
                         * Let's check that our requests have been
processed. If completed,
                         * let's look at the alignments with my own
selection of output and
                         * alignment formats.
                         */
                        for (String aRid : rid) {

System.out.println("***********************************************");
                                System.out.println("trying to get BLAST
results for RID "
                                                + aRid);
                                boolean wasBlasted = false;

                                while (!wasBlasted) {
                                        wasBlasted = rbw.isReady(aRid,
System.currentTimeMillis());
                                }

                                rof = new NCBIQBlastOutputProperties();

rof.setOutputFormat(NCBIQBlastOutputFormat.XML);

rof.setAlignmentOutputFormat(NCBIQBlastOutputFormat.TABULAR);
                                rof.setDescriptionNumber(20);
                                rof.setAlignmentNumber(20);

                                //System.out.println("Output
Options:"+"\n"+rof.getOutputOptions());

                                is = rbw.getAlignmentResults(aRid, rof);

                                BufferedReader br = new BufferedReader(
                                                new InputStreamReader(is));

                                String line = null;

                                String
OutputFilename1=aRid+"Results_XML.xml";
                                FileOutputStream fp1=null;
                                fp1 = new FileOutputStream(OutputFilename1);

                                while ((line = br.readLine()) != null) {
                                        //System.out.println(line);
                                        new PrintStream(fp1).println(line);
                                }

                                fp1.close();

                                BlastHomologyHits BL=new
BlastHomologyHits();
                                BlastXMLQuery B=new
BlastXMLQuery(OutputFilename1);
                                LinkedHashMap<String, ArrayList<String>>
hits=B.getHitsQueryDef(1E-100);
                                //System.out.println(hits);

                                //LinkedHashMap<String, ArrayList<String>>
Homologyhits=BL.getMatches(new File(OutputFilename1), 1E-100);
                                Collection c=hits.values();
                                Iterator i=c.iterator();
                                while(i.hasNext())
                                    System.out.println(i.next());
                        }
                        is.close();

        }
        /*
         * What happens if the file can't be read
         */
        catch (IOException ioe) {
            ioe.printStackTrace();
        }
        /*
         * What happens if FastaReaderHelper hits a snag
         */
        catch (Exception bio) {
            bio.printStackTrace();
        }
    }
}
------------------------
Thanks
Shakuntala

Email Disclaimer:
This email and all its contents are subject to the disclaimer at http://www.uom.ac.mu/emaildisclaimer  




More information about the Biojava-l mailing list