[MOBY-guts] biomoby commit
Paul Gordon
gordonp at dev.open-bio.org
Fri Apr 9 15:58:23 UTC 2010
gordonp
Fri Apr 9 11:58:22 EDT 2010
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/util
In directory dev.open-bio.org:/tmp/cvs-serv1902/src/main/ca/ucalgary/seahawk/util
Modified Files:
DataFlowRecorder.java
Log Message:
Added support for case sensitive searches, and 'previous input' conditional service execution
moby-live/Java/src/main/ca/ucalgary/seahawk/util DataFlowRecorder.java,1.2,1.3
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/util/DataFlowRecorder.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/util/DataFlowRecorder.java 2010/03/29 20:53:04 1.2
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/util/DataFlowRecorder.java 2010/04/09 15:58:22 1.3
@@ -31,6 +31,8 @@
public static final String T2FLOW_DISPATCHXML = "ca/ucalgary/seahawk/resources/t2flowDispatchStack.xml";
public static final String T2FLOW_REGEXFILTER_BEANSHELL = "ca/ucalgary/seahawk/resources/RegexFilterBeanShell";
public static final String T2FLOW_XPATHFILTER_BEANSHELL = "ca/ucalgary/seahawk/resources/XPathFilterBeanShell";
+ public static final String T2FLOW_PASSFILTER_BEANSHELL = "ca/ucalgary/seahawk/resources/PassFilterBeanShell"; // = if condition
+ public static final String T2FLOW_LISTFLATTEN_BEANSHELL = "ca/ucalgary/seahawk/resources/FlattenListBeanShell";
private Central mobyCentral;
private Map<String,Integer> namesUsed; // keep count of workflow element name usage so as not to duplicate an ID
@@ -39,9 +41,12 @@
// Value is processor and port names
private Map<String,String[]> filter2Processor;
private Map<String,String[]> decomp2Processor; // reuse decomposition processors if same decomp done more than once on a given doc
+ private Map<String,String[]> input2Processor; // if same input use more than once, create only one input port
private DocumentBuilder docBuilder;
private static String regexFilterScript = null;
private static String xpathFilterScript = null;
+ private static String passFilterScript = null;
+ private static String listFlattenScript = null;
private static Element dispatchStack = null;
private static Transformer nullTransformer = null;
private static Logger logger = Logger.getLogger(DataFlowRecorder.class.getName());
@@ -57,8 +62,9 @@
namesUsed = new HashMap<String,Integer>();
url2Processor = new HashMap<String,String>();
- filter2Processor = new HashMap<String,String[]>(); // key is compound: url \n regex \n xpath
+ filter2Processor = new HashMap<String,String[]>(); // key is compound: url \n regex \n xpath \n caseSensitivity
decomp2Processor = new HashMap<String,String[]>(); // key is url with xpath attached as ref (the way Seahawk generates them)
+ input2Processor = new HashMap<String,String[]>(); // key is input instance XML
}
/**
@@ -154,6 +160,7 @@
url2Processor.clear(); // clear map of results already produced in backtracking (handling workflow forks)
filter2Processor.clear(); // ditto
decomp2Processor.clear(); // ditto
+ input2Processor.clear(); // ditto
// Generate all of the outputs requested by the call. None should be intermediaries
// of another (the backtracking done by addWorkflowElements will capture these).
@@ -162,7 +169,7 @@
// Create the inputs, processors and data links required for the workflow to create the result doc
String ultimateProcessorName = addWorkflowElements(result.getKey(), doc, inputPorts, processors, datalinks);
- // If there is a filter active on the currently displyed doc, we need to apply it here
+ // If there is a filter active on the currently displayed doc, we need to apply it here
FilterSearch filter = result.getValue();
String[] regexProcessorAndPorts = null;
String filterKey = null;
@@ -190,9 +197,11 @@
}
else{
regexProcessorAndPorts = createRegexFilter(filter.getFilterRegex().toString(),
- filter.getSelectedXPath(),
+ filter.getSelectedXPath(),
+ filter.getCaseSensitivity(),
ultimateProcessorName,
- getPortName(outputParam, true),
+ getPortName(outputParam, true),
+ 1, // desired list dpeth
processors, datalinks, doc);
filter2Processor.put(filterKey, regexProcessorAndPorts);
}
@@ -275,6 +284,9 @@
MobyService service = DataUtils.getService(resultDom);
Registry registry = DataUtils.getRegistry(resultDom);
MobyDataJob sampleJob = DataUtils.getInputSample(resultDom, registry);
+ if(sampleJob == null){
+ return "foo"; //todo: doc was a loaded Moby XML, not a service output or recognized data
+ }
Element processorElement = createProcessorElement(service, sampleJob, doc);
// add processor to workflow
@@ -297,111 +309,181 @@
MobyPrimaryData sampleData = (MobyPrimaryData) sd;
// has provenance info?
+ String[] data = null;
+ String[] condPassProcessorAndPorts = null; //[procname, input port, outputport]
if(sampleData.getUserData() != null){
- // User data has the form srcURL#generalSelectionXPath <tab> actualInputDataXptr <tab> regexFilter
- // where the filter is optional
- String[] data = sampleData.getUserData().toString().split("\t");
+ // User data has the form
+ // srcURL#generalSelectionXPath <tab> actualInputDataXptr <tab> regexFilter <tab> conditionalURL
+ // where the filter and conditionalURL are optional
+ data = sampleData.getUserData().toString().split("\t");
+ //options: selection + filter + cond, selection + cond, or cond only
+ if(data.length == 11 || data.length == 7 || data.length == 5){
+ System.err.println("Adding conditional for " + resultURLString);
+ String conditionURL = data[data.length-5];
+ String conditionRegex = data[data.length-4];
+ XPathOption conditionXPath = new XPathOption(data[data.length-3], data[data.length-2]);
+ boolean caseSensitivity = Boolean.parseBoolean(data[data.length-1]);
+ condPassProcessorAndPorts = createServiceConditionFilter(new URL(conditionURL), conditionRegex, conditionXPath, caseSensitivity,
+ doc, inputPorts, processors, datalinks);
+ String[] conditionlessData = new String[data.length-5];
+ System.arraycopy(data, 0, conditionlessData, 0, data.length-5);
+ data = conditionlessData;
+ }
+ else{
+ System.err.println("Skipping conditional, only " + data.length +
+ "members in provenance data for " + resultURLString);
+ }
+ }
+
+ // true means treat as a collection if that's what the sample data is
+ String sinkPortName = getPortName(sampleData, true);
+ if(data != null && data.length != 0){
URL dataSrcURL = null;
try{
dataSrcURL = new URL(data[0]);
} catch(Exception e){
logger.log(Level.WARNING, "Ignoring unexpected UserData in Moby DOM, was not " +
"a provenance URL as expected URL (" + sampleData.getUserData().toString() + ")");
+ continue;
}
- if(dataSrcURL != null){
- // recursion for workflow creation by backtracking service input provenance
- String feedingProcessorName = null;
- String feedingProcessorPort = null;
- String dataSrcURLString = dataSrcURL.toString().replaceFirst("#.*$", ""); //get rid of ref part
- if(url2Processor.containsKey(dataSrcURLString)){
- feedingProcessorName = url2Processor.get(dataSrcURLString);
+ // recursion for workflow creation by backtracking service input provenance
+ String feedingProcessorName = null;
+ String feedingProcessorPort = null;
+ String dataSrcURLString = dataSrcURL.toString().replaceFirst("#.*$", ""); //get rid of ref part
+ if(url2Processor.containsKey(dataSrcURLString)){
+ feedingProcessorName = url2Processor.get(dataSrcURLString);
+ }
+ else{
+ feedingProcessorName = addWorkflowElements(dataSrcURL, doc, inputPorts, processors, datalinks);
+ }
+
+ // Take into account data[2..5] if they are present,
+ // which filter the data by a regex before any other activities happen
+ // Format of spec is regex <tab> xpath <tab> xpathTextDesc <tab> booleanForCaseSensitivity
+ if(data.length == 6){
+ String[] origFeederProcessorAndPort = getPortFromURLRef(dataSrcURL,
+ sampleData,
+ feedingProcessorName,
+ null,
+ processors,
+ datalinks,
+ doc,
+ false);
+
+ // Lookup key is url \n regex \n xpath \n caseSensitivity
+ String[] regexProcessorAndPorts = null;
+ String filterKey = dataSrcURLString+"\n"+data[2]+"\n"+data[3]+"\n"+data[5];
+ if(filter2Processor.containsKey(filterKey)){ // filter already exists from another branch
+ regexProcessorAndPorts = filter2Processor.get(filterKey);
}
else{
- feedingProcessorName = addWorkflowElements(dataSrcURL, doc, inputPorts, processors, datalinks);
- }
-
- // true means treat as a collection if that's what the sample data is
- String sinkPortName = getPortName(sampleData, true);
-
- // Take into account data[2..4] if they are present,
- // which filter the data by a regex before any other activities happen
- // Format of spec is regex <tab> xpath <tab> xpathTextDesc, so the earlier split gives use indices 1..3
- if(data.length == 5){
- String[] origFeederProcessorAndPort = getPortFromURLRef(dataSrcURL,
- sampleData,
- feedingProcessorName,
- null,
- processors,
- datalinks,
- doc,
- false);
-
- // Lookup key is url \n regex \n xpath
- String[] regexProcessorAndPorts = null;
- String filterKey = dataSrcURLString+"\n"+data[2]+"\n"+data[3];
- if(filter2Processor.containsKey(filterKey)){ // filter already exists from another branch
- regexProcessorAndPorts = filter2Processor.get(filterKey);
- }
- else{
- regexProcessorAndPorts = createRegexFilter(data[2],
- new XPathOption(data[3], data[4]),
- origFeederProcessorAndPort[0],
- origFeederProcessorAndPort[1],
- processors, datalinks, doc);
- // New filter is applied to just this branch from a service for the moment...record the filter
- // so that if same filter criteria are applied to more than one branch, we only make one filter
- filter2Processor.put(dataSrcURLString+"\n"+data[2]+"\n"+data[3], regexProcessorAndPorts);
- }
- feedingProcessorName = regexProcessorAndPorts[0];
- feedingProcessorPort = regexProcessorAndPorts[regexProcessorAndPorts.length-1]; //last one is output
+ regexProcessorAndPorts = createRegexFilter(data[2],
+ new XPathOption(data[3], data[4]),
+ Boolean.parseBoolean(data[5]),
+ origFeederProcessorAndPort[0],
+ origFeederProcessorAndPort[1],
+ 1, // desired list depth
+ processors, datalinks, doc);
+ // New filter is applied to just this branch from a service for the moment...record the filter
+ // so that if same filter criteria are applied to more than one branch, we only make one filter
+ filter2Processor.put(filterKey, regexProcessorAndPorts);
}
-
- // getPortFromURLRef() may inject extra processors between the processorName and feedingProcessorName
- // in order to maintain type safety, etc. so the data link may change (based on last arg being set to true).
- String[] feederProcessorAndPort = getPortFromURLRef(dataSrcURL,
- sampleData,
- feedingProcessorName,
- feedingProcessorPort, //if null, determined from urlref
- processors,
- datalinks,
- doc,
- true);
- datalinks.appendChild(createDataLinkElement(feederProcessorAndPort[0], feederProcessorAndPort[1],
- processorName, sinkPortName,
+ feedingProcessorName = regexProcessorAndPorts[0];
+ feedingProcessorPort = regexProcessorAndPorts[regexProcessorAndPorts.length-1]; //last one is output
+ }
+
+ // getPortFromURLRef() may inject extra processors between the processorName and feedingProcessorName
+ // in order to maintain type safety, etc. so the data link may change (based on last arg being set to true).
+ String[] feederProcessorAndPort = getPortFromURLRef(dataSrcURL,
+ sampleData,
+ feedingProcessorName,
+ feedingProcessorPort, //if null, determined from urlref
+ processors,
+ datalinks,
+ doc,
+ true);
+ if(condPassProcessorAndPorts != null){
+ // inject condition filter between feeder service and current service
+ datalinks.appendChild(createDataLinkElement(feederProcessorAndPort[0],
+ feederProcessorAndPort[1],
+ condPassProcessorAndPorts[0],
+ condPassProcessorAndPorts[1],
doc));
-
+ feederProcessorAndPort[0] = condPassProcessorAndPorts[0];
+ feederProcessorAndPort[1] = condPassProcessorAndPorts[2];
}
+
+ datalinks.appendChild(createDataLinkElement(feederProcessorAndPort[0], feederProcessorAndPort[1],
+ processorName, sinkPortName,
+ doc));
+
}
// Otherwise it's a workflow input the user will need to specify
else{
- String uniqueInputName = createUniqueName(getInputName(sampleData));
- // todo: sample data may be of more specific type than service requires
- inputPorts.appendChild(createWorkflowInputElement(uniqueInputName, sampleData, doc));
- if(sampleData.getDataType().getName().equals(MobyTags.MOBYOBJECT)){
- String mobifyingProcessorName = addIdMobifyingProcessor(processors, datalinks,
- sampleData, processorName,
- inputName, doc);
- datalinks.appendChild(createWorkflowInputLinkElement(uniqueInputName,
- mobifyingProcessorName,
- "fileurl",
- sampleData,
- doc));
+ // pName is editable name for input processor target...
+ // allows injection of condition without affecting method's return value
+ String pName = processorName;
+ if(condPassProcessorAndPorts != null){
+ // inject condition filter between data creator and current service
+ datalinks.appendChild(createDataLinkElement(condPassProcessorAndPorts[0],
+ condPassProcessorAndPorts[2],
+ pName,
+ sinkPortName,
+ doc));
+ pName = condPassProcessorAndPorts[0];
+ sinkPortName = condPassProcessorAndPorts[1];
+ }
+ // The sample data may be used in more than one branch of the flow...avoid duplication
+ // Get the real data XML, to see if we've encountered it before as input (it'd be in the hash)
+ // Remember that sd is the loop's original MobyDataInstance before it's been coerced into a MobyPrimaryData,
+ // which doesn't have XML modes, etc.
+ String inputKey = getInputKey(sd);
+ System.err.println("Input key for " + processorName + " is " + inputKey);
+
+ String[] mobifyingProcessorNameAndPorts = null;
+ if(input2Processor.containsKey(inputKey)){
+ mobifyingProcessorNameAndPorts = input2Processor.get(inputKey);
}
else{
- // TODO: Need to build complex input from MOB rule or spreadsheet fields?
+ if(sampleData.getDataType().getName().equals(MobyTags.MOBYOBJECT)){
+ mobifyingProcessorNameAndPorts = addIdMobifyingProcessor(processors, datalinks, inputPorts,
+ sampleData, doc);
+ input2Processor.put(inputKey, mobifyingProcessorNameAndPorts);
+ }
+ else{
+ // TODO: Need to build complex input from MOB rule or spreadsheet fields?
+ }
}
+ // link the created data to the workflow service
+ datalinks.appendChild(createDataLinkElement(mobifyingProcessorNameAndPorts[0],
+ mobifyingProcessorNameAndPorts[1],
+ pName, sinkPortName,
+ doc));
}
+
}
return processorName;
}
+ // returns the same value for moby sample data instances containing the same data in XML form
+ private String getInputKey(MobyDataInstance sd){
+ int oldXmlMode = sd.getXmlMode();
+ sd.setXmlMode(MobyDataInstance.SERVICE_XML_MODE);
+ String inputKey = sd.toXML();
+ sd.setXmlMode(oldXmlMode);
+
+ // get rid of the top-level article name, as this is immaterial to the
+ // value equivalence of objects (only matters to the actual service call mechanism)
+ return inputKey.replaceFirst(MobyTags.ARTICLENAME+"\\s*=\\s*(['\"]).*?\\1", "");
+ }
+
// Actually adds a few processors, but only one needs to be returned and connected to an input port.
// The others are constant values. By default we assume that you want to run a bunch of IDs
// in a file, we also link in a Taverna spreadsheet importer.
- private String addIdMobifyingProcessor(Element processors, Element datalinks,
- MobyPrimaryData sampleData, String targetProcessorName,
- String targetProcessorPort, Document doc) throws Exception{
+ // Returns [spreadsheetReadingProcName, outputPort]
+ private String[] addIdMobifyingProcessor(Element processors, Element datalinks, Element dataFlowInputPorts,
+ MobyPrimaryData sampleData, Document doc) throws Exception{
String processorName = "Create-" + sampleData.getDataType().getName();
MobyNamespace ns = null;
MobyNamespace[] nss = ((MobyPrimaryData) sampleData).getNamespaces();
@@ -518,7 +600,7 @@
Map<String,String> feeds = new HashMap<String,String>(); //what port on data creator it maps to
constants.put(ns.getName()+"-namespace-constant", ns.getName());
feeds.put(ns.getName()+"-namespace-constant", "namespace");
- constants.put("article_name-constant", targetProcessorPort);
+ constants.put("article_name-constant", "unimportant");
feeds.put("article_name-constant", "article name");
for(Map.Entry<String,String> constant: constants.entrySet()){
@@ -632,19 +714,23 @@
port.setAttribute("name", "fileurl");
port.setAttribute("depth", "0");
+ String uniqueInputName = createUniqueName(getInputName(sampleData));
+ // todo: sample data may be of more specific type than service requires
+ dataFlowInputPorts.appendChild(createWorkflowInputElement(uniqueInputName, sampleData, doc));
+ datalinks.appendChild(createWorkflowInputLinkElement(uniqueInputName,
+ importerName,
+ "fileurl",
+ sampleData,
+ doc));
+
// link the spreadsheet reader to the Moby data creator
datalinks.appendChild(createDataLinkElement(importerName, outputUniqueName,
processorName, "id",
doc));
- // link the created data to the first workflow service, which takes an id
- datalinks.appendChild(createDataLinkElement(processorName, "mobyData",
- targetProcessorName, "Object("+targetProcessorPort+")",
- doc));
-
- // return the name of the spreadsheet reading processor so it can be hooked up
- // to an input port for a Moby service
- return importerName;
+ // return the name of the spreadsheet reading processor and output port so it can be hooked up
+ // to input ports for Moby services
+ return new String[]{processorName, portNames[portNames.length-1]};
}
private Element createConstantProcessor(String constantName, String constantValue, Document doc)
@@ -884,15 +970,86 @@
processors, datalinks, doc);
}
- private String[] createRegexFilter(String regex, XPathOption xpath, String srcProcessor, String srcPort,
+ // Make the continued use of data X in the workflow conditional on the results of running X through service f
+ // and passing the filter condition set on f. Equivalent to if(f(X) matches f_filter){...}
+ // returns [proc name, input port, output port]
+ private String[] createServiceConditionFilter(URL conditionURL, String filterRegex, XPathOption filterXPath,
+ boolean caseSensitive, Document doc,
+ Element inputPorts, Element processors, Element datalinks)
+ throws Exception{
+
+ // Open the conditionURL, and find out what service result is used, and what filter applied
+ Document conditionDoc = docBuilder.parse(conditionURL.openStream());
+ MobyService service = DataUtils.getService(conditionDoc);
+ Document condServiceOutputDoc = DataUtils.getInputDoc(conditionDoc);
+
+ // The bits to run the service "f"
+
+ String dataSrcURLString = conditionURL.toString().replaceFirst("#.*$", ""); //get rid of ref part
+ String processorName = null;
+ // See if the service creating the input for the conditional service has already been added to the workflow
+ if(url2Processor.containsKey(dataSrcURLString)){
+ processorName = url2Processor.get(dataSrcURLString);
+ }
+ else{
+ processorName = addWorkflowElements(conditionURL, doc, inputPorts, processors, datalinks);
+ }
+
+ String filterKey = dataSrcURLString+"\n"+filterRegex+"\n"+filterXPath.getXPath()+"\n"+caseSensitive;
+ String[] regexFilterProcNameAndPort = null;
+ //todo: gimpy loop below, as more than one service output would cause a trampling of regex output ports
+ for(MobyPrimaryData outputParam: service.getPrimaryOutputs()){
+ // See if the filter on the conditional service has already been used in the workflow
+ if(filter2Processor.containsKey(filterKey)){
+ regexFilterProcNameAndPort = filter2Processor.get(filterKey);
+ }
+ else{
+ regexFilterProcNameAndPort = createRegexFilter(filterRegex,
+ filterXPath,
+ caseSensitive,
+ processorName,
+ getPortName(outputParam, true),
+ 0, //depth of list desired (will return match count)
+ processors, datalinks, doc);
+ filter2Processor.put(filterKey, regexFilterProcNameAndPort);
+ }
+ }
+
+ // We need to flatten the 2-deep list generated by the regex filter's cross product
+ String[] beanShellFlattenerProcNameAndPorts = addListFlattenBeanShell(processors, doc);
+
+ datalinks.appendChild(createDataLinkElement(regexFilterProcNameAndPort[0],
+ regexFilterProcNameAndPort[1],
+ beanShellFlattenerProcNameAndPorts[0],
+ beanShellFlattenerProcNameAndPorts[1],
+ doc));
+
+ // Essentially an "if" condition, error if incoming value is empty, missing, or zero
+ String[] beanShellFilterProcNameAndPorts = addPassFilterBeanShell(processors, doc);
+
+ datalinks.appendChild(createDataLinkElement(beanShellFlattenerProcNameAndPorts[0],
+ beanShellFlattenerProcNameAndPorts[2],
+ beanShellFilterProcNameAndPorts[0],
+ beanShellFilterProcNameAndPorts[2],
+ doc));
+
+ return new String[]{beanShellFilterProcNameAndPorts[0],
+ beanShellFilterProcNameAndPorts[1],
+ beanShellFilterProcNameAndPorts[3]};
+ }
+
+ private String[] createRegexFilter(String regex, XPathOption xpath, boolean caseSensitive,
+ String srcProcessor, String srcPort, int listDepth,
Element processors, Element datalinks, Document doc)
throws Exception{
String constantXPathName = createUniqueName(xpath.toString());
processors.appendChild(createConstantProcessor(constantXPathName, xpath.getXPath(), doc));
String constantRegexName = createUniqueName(regex);
processors.appendChild(createConstantProcessor(constantRegexName, regex, doc));
+ String constantCaseSensitivityName = createUniqueName("cs_"+caseSensitive);
+ processors.appendChild(createConstantProcessor(constantCaseSensitivityName, ""+caseSensitive, doc));
- String[] beanShellFilterProcNameAndPorts = addRegexFilterBeanShell(processors, doc);
+ String[] beanShellFilterProcNameAndPorts = addRegexFilterBeanShell(listDepth, processors, doc);
datalinks.appendChild(createDataLinkElement(srcProcessor,
srcPort,
@@ -903,11 +1060,15 @@
beanShellFilterProcNameAndPorts[0],
beanShellFilterProcNameAndPorts[2],
doc));
- datalinks.appendChild(createDataLinkElement(constantXPathName, "value",
+ datalinks.appendChild(createDataLinkElement(constantCaseSensitivityName, "value",
beanShellFilterProcNameAndPorts[0],
beanShellFilterProcNameAndPorts[3],
doc));
- return new String[]{beanShellFilterProcNameAndPorts[0], beanShellFilterProcNameAndPorts[4]};
+ datalinks.appendChild(createDataLinkElement(constantXPathName, "value",
+ beanShellFilterProcNameAndPorts[0],
+ beanShellFilterProcNameAndPorts[4],
+ doc));
+ return new String[]{beanShellFilterProcNameAndPorts[0], beanShellFilterProcNameAndPorts[5]};
}
private String[] createXrefParser(MobyNamespace nsObj, String srcProcessor, String srcPort,
@@ -1231,29 +1392,76 @@
}
}
+ private String[] addListFlattenBeanShell(Element processors, Document doc)
+ throws Exception{
+ String beanShellProcName = createUniqueName("Create_Pass_Fail_List");
+
+ // Now do all the param-specific stuff below (inserts into various parts of the elements defined above)
+ Map<String,String> inputsMap = new LinkedHashMap<String,String>();
+ // linked because order is important to line up port connections
+ Map<String,String> inputTypes = new LinkedHashMap<String,String>();
+ inputsMap.put("inputlist", "2");
+ inputTypes.put("inputlist", "text/plain");
+ Map<String,String> outputsMap = new LinkedHashMap<String,String>();
+ outputsMap.put("outputlist", "1");
+
+ return addBeanShell(beanShellProcName, "dot",
+ inputsMap, inputTypes, outputsMap,
+ getListFlattenScript(), new String[]{},
+ processors, doc);
+ }
+
+ private String[] addPassFilterBeanShell(Element processors, Document doc)
+ throws Exception{
+ String beanShellProcName = createUniqueName("IfPassesContentFilter");
+
+ // Now do all the param-specific stuff below (inserts into various parts of the elements defined above)
+ Map<String,String> inputsMap = new LinkedHashMap<String,String>();
+ // linked because order is important to line up port connections
+ Map<String,String> inputTypes = new LinkedHashMap<String,String>();
+ inputsMap.put("dataToPassThrough", "1");
+ inputsMap.put("conditionResults", "1");
+ inputTypes.put("dataToPassThrough", "text/xml");
+ inputTypes.put("conditionResults", "text/plain");
+ Map<String,String> outputsMap = new LinkedHashMap<String,String>();
+ outputsMap.put("dataPassed", "1");
+
+ return addBeanShell(beanShellProcName, "cross",
+ inputsMap, inputTypes, outputsMap,
+ getPassFilterScript(), new String[]{},
+ processors, doc);
+ }
+
// Returns the name of the bean shell processor and its ports, so we can create the data links in the caller
// The input is the original moby xml, and the xpath to extract, and the regex to apply to the text contents of the xpath results.
// The output is a list of moby xml docs, one for each mobyData that fit the xpath and regex criteria.
- private String[] addRegexFilterBeanShell(Element processors, Document doc)
+ private String[] addRegexFilterBeanShell(int listDepth, Element processors, Document doc)
throws Exception{
- String beanShellProcName = createUniqueName("Filter_By_Content");
+ String beanShellProcName = createUniqueName("Filter"+(listDepth == 0 ? "_Match_Count" : "_By_Content"));
// Now do all the param-specific stuff below (inserts into various parts of the elements defined above)
Map<String,String> inputsMap = new LinkedHashMap<String,String>();
- // linked because order is importtant to line up port connections
+ // linked because order is important to line up port connections
Map<String,String> inputTypes = new LinkedHashMap<String,String>();
inputsMap.put("xml_text", "0");
inputsMap.put("regex", "0");
+ inputsMap.put("case_sensitive", "0");
inputsMap.put("xpath", "0");
inputTypes.put("xml_text", "text/xml");
inputTypes.put("xpath", "text/plain");
inputTypes.put("regex", "text/plain");
+ inputTypes.put("case_sensitive", "text/plain");
Map<String,String> outputsMap = new LinkedHashMap<String,String>();
- outputsMap.put("nodelistAsXML", "1");
+ if(listDepth == 0){
+ outputsMap.put("matchCount", "0");
+ }
+ else{
+ outputsMap.put("nodelistAsXML", ""+listDepth);
+ }
- return addBeanShell(beanShellProcName,
+ return addBeanShell(beanShellProcName, "cross",
inputsMap, inputTypes, outputsMap,
- getRegexFilterScript(), new String[]{"dom4j:dom4j:1.6"},
+ getRegexFilterScript(listDepth), new String[]{"dom4j:dom4j:1.6"},
processors, doc);
}
@@ -1274,13 +1482,13 @@
Map<String,String> outputsMap = new HashMap<String,String>();
outputsMap.put("nodelistAsXML", "1");
- return addBeanShell(beanShellProcName,
+ return addBeanShell(beanShellProcName, "cross",
inputsMap, inputTypes, outputsMap,
getXPathFilterScript(), new String[]{"dom4j:dom4j:1.6"},
processors, doc);
}
- private String[] addBeanShell(String beanShellProcName,
+ private String[] addBeanShell(String beanShellProcName, String vectorComboOp,
Map<String,String> inputsMap, Map<String,String> inputTypes,
Map<String,String> outputsMap, String script, String[] dependencySpecs,
Element processors, Document doc) throws Exception{
@@ -1345,8 +1553,11 @@
iterationStrategyStack.appendChild(iteration);
Element strategy = doc.createElementNS(T2FLOW_NS, "strategy");
iteration.appendChild(strategy);
- Element cross = doc.createElementNS(T2FLOW_NS, "cross");
- strategy.appendChild(cross);
+ Element vectorOp = null;
+ if(vectorComboOp != null){
+ vectorOp = doc.createElementNS(T2FLOW_NS, vectorComboOp);
+ strategy.appendChild(vectorOp); // either cross or dot
+ }
// Lst processor name, input ports and output ports, in that order.
Vector<String> returnSpec = new Vector<String>();
@@ -1370,7 +1581,9 @@
mimeTypes.appendChild(createElWithText(doc, "", "string", inputTypes.get(input.getKey())));
Element port = doc.createElementNS(T2FLOW_NS, "port");
- cross.appendChild(port);
+ if(vectorOp != null){
+ vectorOp.appendChild(port);
+ }
port.setAttribute("depth", input.getValue());
port.setAttribute("name", input.getKey());
}
@@ -1517,7 +1730,7 @@
private String createUniqueName(String preferredName){
if(namesUsed.containsKey(preferredName)){
namesUsed.put(preferredName, namesUsed.get(preferredName).intValue()+1); //increment
- preferredName += namesUsed.get(preferredName);
+ preferredName += "_"+namesUsed.get(preferredName);
}
else{
namesUsed.put(preferredName, 1); // will be auto-boxed to Integer
@@ -1528,8 +1741,6 @@
// dataType(articleName) as required by the Taverna Moby plugin
private String getPortName(MobyPrimaryData data, boolean asCollection){
if(data instanceof MobyPrimaryDataSet){
- // todo: Hmm...don't yet handle case where collections are passed from one service to another...
- // here we assume iterating over the collection
if(asCollection){
return data.getDataType().getName()+"(Collection - '"+data.getName()+"')";
}
@@ -1680,7 +1891,7 @@
// Did we override the default value for the secondary param in the example input?
Element stringValue = doc.createElementNS("", "string");
- if(sampleInput.containsKey(secParam.getName())){
+ if(sampleInput != null && sampleInput.containsKey(secParam.getName())){
stringValue.appendChild(doc.createTextNode(((MobyDataSecondaryInstance) sampleInput.get(secParam.getName())).getValue()));
}
else{ // use the default defined in Moby Central
@@ -1739,6 +1950,28 @@
return central;
}
+ private synchronized String getListFlattenScript() throws Exception{
+ if(listFlattenScript == null){
+ URL scriptURL = getClass().getClassLoader().getResource(T2FLOW_LISTFLATTEN_BEANSHELL);
+ if(scriptURL == null){
+ throw new Exception("Cannot find resource " + T2FLOW_LISTFLATTEN_BEANSHELL);
+ }
+ listFlattenScript = HTMLUtils.getURLContents(scriptURL);
+ }
+ return listFlattenScript;
+ }
+
+ private synchronized String getPassFilterScript() throws Exception{
+ if(passFilterScript == null){
+ URL scriptURL = getClass().getClassLoader().getResource(T2FLOW_PASSFILTER_BEANSHELL);
+ if(scriptURL == null){
+ throw new Exception("Cannot find resource " + T2FLOW_PASSFILTER_BEANSHELL);
+ }
+ passFilterScript = HTMLUtils.getURLContents(scriptURL);
+ }
+ return passFilterScript;
+ }
+
private synchronized String getXPathFilterScript() throws Exception{
if(xpathFilterScript == null){
URL scriptURL = getClass().getClassLoader().getResource(T2FLOW_XPATHFILTER_BEANSHELL);
@@ -1750,7 +1983,7 @@
return xpathFilterScript;
}
- private synchronized String getRegexFilterScript() throws Exception{
+ private synchronized String getRegexFilterScript(int listDepth) throws Exception{
if(regexFilterScript == null){
URL scriptURL = getClass().getClassLoader().getResource(T2FLOW_REGEXFILTER_BEANSHELL);
if(scriptURL == null){
@@ -1758,7 +1991,12 @@
}
regexFilterScript = HTMLUtils.getURLContents(scriptURL);
}
- return regexFilterScript;
+ if(listDepth == 0){// list to scalar conversion
+ return regexFilterScript+"\nString matchCount = \"\"+nodelistAsXML.size();";
+ }
+ else{
+ return regexFilterScript;
+ }
}
private synchronized Element getDispatchStack(Document newOwnerDoc) throws Exception{
More information about the MOBY-guts
mailing list