[MOBY-guts] biomoby commit

Paul Gordon gordonp at dev.open-bio.org
Fri Apr 9 15:58:23 UTC 2010


gordonp
Fri Apr  9 11:58:22 EDT 2010
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/util
In directory dev.open-bio.org:/tmp/cvs-serv1902/src/main/ca/ucalgary/seahawk/util

Modified Files:
	DataFlowRecorder.java 
Log Message:
Added support for case sensitive searches, and 'previous input' conditional service execution
moby-live/Java/src/main/ca/ucalgary/seahawk/util DataFlowRecorder.java,1.2,1.3
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/util/DataFlowRecorder.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/util/DataFlowRecorder.java	2010/03/29 20:53:04	1.2
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/util/DataFlowRecorder.java	2010/04/09 15:58:22	1.3
@@ -31,6 +31,8 @@
     public static final String T2FLOW_DISPATCHXML = "ca/ucalgary/seahawk/resources/t2flowDispatchStack.xml";
     public static final String T2FLOW_REGEXFILTER_BEANSHELL = "ca/ucalgary/seahawk/resources/RegexFilterBeanShell";
     public static final String T2FLOW_XPATHFILTER_BEANSHELL = "ca/ucalgary/seahawk/resources/XPathFilterBeanShell";
+    public static final String T2FLOW_PASSFILTER_BEANSHELL = "ca/ucalgary/seahawk/resources/PassFilterBeanShell"; // = if condition
+    public static final String T2FLOW_LISTFLATTEN_BEANSHELL = "ca/ucalgary/seahawk/resources/FlattenListBeanShell";
 
     private Central mobyCentral;
     private Map<String,Integer> namesUsed;  // keep count of workflow element name usage so as not to duplicate an ID
@@ -39,9 +41,12 @@
     // Value is processor and port names
     private Map<String,String[]> filter2Processor;
     private Map<String,String[]> decomp2Processor; // reuse decomposition processors if same decomp done more than once on a given doc
+    private Map<String,String[]> input2Processor; // if same input use more than once, create only one input port
     private DocumentBuilder docBuilder;
     private static String regexFilterScript = null;
     private static String xpathFilterScript = null;
+    private static String passFilterScript = null;
+    private static String listFlattenScript = null;
     private static Element dispatchStack = null;
     private static Transformer nullTransformer = null;
     private static Logger logger = Logger.getLogger(DataFlowRecorder.class.getName());
@@ -57,8 +62,9 @@
 
 	namesUsed = new HashMap<String,Integer>();
 	url2Processor = new HashMap<String,String>();
-	filter2Processor = new HashMap<String,String[]>(); // key is compound:  url \n regex \n xpath
+	filter2Processor = new HashMap<String,String[]>(); // key is compound:  url \n regex \n xpath \n caseSensitivity
 	decomp2Processor = new HashMap<String,String[]>(); // key is url with xpath attached as ref (the way Seahawk generates them)
+	input2Processor = new HashMap<String,String[]>(); // key is input instance XML 
     }
 
     /**
@@ -154,6 +160,7 @@
 	url2Processor.clear();  // clear map of results already produced in backtracking (handling workflow forks)
 	filter2Processor.clear(); // ditto
 	decomp2Processor.clear(); // ditto
+	input2Processor.clear(); // ditto
 
 	// Generate all of the outputs requested by the call.  None should be intermediaries 
 	// of another (the backtracking done by addWorkflowElements will capture these).
@@ -162,7 +169,7 @@
 	    // Create the inputs, processors and data links required for the workflow to create the result doc
 	    String ultimateProcessorName = addWorkflowElements(result.getKey(), doc, inputPorts, processors, datalinks);
 	    
-	    // If there is a filter active on the currently displyed doc, we need to apply it here
+	    // If there is a filter active on the currently displayed doc, we need to apply it here
 	    FilterSearch filter = result.getValue();
 	    String[] regexProcessorAndPorts = null;
 	    String filterKey = null;
@@ -190,9 +197,11 @@
 		    }
 		    else{
 			regexProcessorAndPorts = createRegexFilter(filter.getFilterRegex().toString(), 
-								   filter.getSelectedXPath(), 
+								   filter.getSelectedXPath(),
+								   filter.getCaseSensitivity(),
 								   ultimateProcessorName,
-								   getPortName(outputParam, true), 
+								   getPortName(outputParam, true),
+								   1, // desired list dpeth
 								   processors, datalinks, doc);
 			filter2Processor.put(filterKey, regexProcessorAndPorts);
 		    }
@@ -275,6 +284,9 @@
 	MobyService service = DataUtils.getService(resultDom);
 	Registry registry = DataUtils.getRegistry(resultDom);
 	MobyDataJob sampleJob = DataUtils.getInputSample(resultDom, registry);
+	if(sampleJob == null){
+	    return "foo";  //todo: doc was a loaded Moby XML, not a service output or recognized data
+	}
 
 	Element processorElement = createProcessorElement(service, sampleJob, doc);
 	// add processor to workflow
@@ -297,111 +309,181 @@
 	    MobyPrimaryData sampleData = (MobyPrimaryData) sd;
 
 	    // has provenance info?
+	    String[] data = null;
+	    String[] condPassProcessorAndPorts = null;  //[procname, input port, outputport]
 	    if(sampleData.getUserData() != null){
-		// User data has the form srcURL#generalSelectionXPath <tab> actualInputDataXptr <tab> regexFilter 
-		// where the filter is optional 
-		String[] data = sampleData.getUserData().toString().split("\t");
+		// User data has the form 
+		// srcURL#generalSelectionXPath <tab> actualInputDataXptr <tab> regexFilter <tab> conditionalURL
+		// where the filter and conditionalURL are optional 
+		data = sampleData.getUserData().toString().split("\t");
+		//options: selection + filter + cond, selection + cond, or cond only
+		if(data.length == 11 || data.length == 7 || data.length == 5){
+		    System.err.println("Adding conditional for " + resultURLString);
+		    String conditionURL = data[data.length-5];
+		    String conditionRegex = data[data.length-4];
+		    XPathOption conditionXPath = new XPathOption(data[data.length-3], data[data.length-2]);
+		    boolean caseSensitivity = Boolean.parseBoolean(data[data.length-1]);
+		    condPassProcessorAndPorts = createServiceConditionFilter(new URL(conditionURL), conditionRegex, conditionXPath, caseSensitivity,
+									     doc, inputPorts, processors, datalinks);
+		    String[] conditionlessData = new String[data.length-5];
+		    System.arraycopy(data, 0, conditionlessData, 0, data.length-5);
+		    data = conditionlessData;
+		}
+		else{
+		    System.err.println("Skipping conditional, only " + data.length + 
+				       "members in provenance data for " + resultURLString);
+		}
+	    }
+
+	    // true means treat as a collection if that's what the sample data is
+	    String sinkPortName = getPortName(sampleData, true);
+	    if(data != null && data.length != 0){
 		URL dataSrcURL = null;
 		try{
 		    dataSrcURL = new URL(data[0]);
 		} catch(Exception e){
 		    logger.log(Level.WARNING, "Ignoring unexpected UserData in Moby DOM, was not " +
 			       "a provenance URL as expected URL (" + sampleData.getUserData().toString() + ")");
+		    continue;
 		}
-		if(dataSrcURL != null){
-		    // recursion for workflow creation by backtracking service input provenance
-		    String feedingProcessorName = null;
-		    String feedingProcessorPort = null;
-		    String dataSrcURLString = dataSrcURL.toString().replaceFirst("#.*$", ""); //get rid of ref part
-		    if(url2Processor.containsKey(dataSrcURLString)){
-			feedingProcessorName = url2Processor.get(dataSrcURLString);
+		// recursion for workflow creation by backtracking service input provenance
+		String feedingProcessorName = null;
+		String feedingProcessorPort = null;
+		String dataSrcURLString = dataSrcURL.toString().replaceFirst("#.*$", ""); //get rid of ref part
+		if(url2Processor.containsKey(dataSrcURLString)){
+		    feedingProcessorName = url2Processor.get(dataSrcURLString);
+		}
+		else{
+		    feedingProcessorName = addWorkflowElements(dataSrcURL, doc, inputPorts, processors, datalinks);
+		}
+		
+		// Take into account data[2..5] if they are present, 
+		// which filter the data by a regex before any other activities happen
+		// Format of spec is regex <tab> xpath <tab> xpathTextDesc <tab> booleanForCaseSensitivity
+		if(data.length == 6){
+		    String[] origFeederProcessorAndPort = getPortFromURLRef(dataSrcURL,
+									    sampleData,
+									    feedingProcessorName,
+									    null,
+									    processors, 
+									    datalinks,
+									    doc,
+									    false);
+		    
+		    // Lookup key is url \n regex \n xpath \n caseSensitivity
+		    String[] regexProcessorAndPorts = null;
+		    String filterKey = dataSrcURLString+"\n"+data[2]+"\n"+data[3]+"\n"+data[5];
+		    if(filter2Processor.containsKey(filterKey)){ // filter already exists from another branch
+			regexProcessorAndPorts = filter2Processor.get(filterKey);
 		    }
 		    else{
-			feedingProcessorName = addWorkflowElements(dataSrcURL, doc, inputPorts, processors, datalinks);
-		    }
-
-		    // true means treat as a collection if that's what the sample data is
-		    String sinkPortName = getPortName(sampleData, true);
-
-		    // Take into account data[2..4] if they are present, 
-		    // which filter the data by a regex before any other activities happen
-		    // Format of spec is regex <tab> xpath <tab> xpathTextDesc, so the earlier split gives use indices 1..3
-		    if(data.length == 5){
-			String[] origFeederProcessorAndPort = getPortFromURLRef(dataSrcURL,
-										sampleData,
-										feedingProcessorName,
-										null,
-										processors, 
-										datalinks,
-										doc,
-										false);
-
-			// Lookup key is url \n regex \n xpath
-			String[] regexProcessorAndPorts = null;
-			String filterKey = dataSrcURLString+"\n"+data[2]+"\n"+data[3];
-			if(filter2Processor.containsKey(filterKey)){ // filter already exists from another branch
-			    regexProcessorAndPorts = filter2Processor.get(filterKey);
-			}
-			else{
-			    regexProcessorAndPorts = createRegexFilter(data[2], 
-								       new XPathOption(data[3], data[4]), 
-								       origFeederProcessorAndPort[0], 
-								       origFeederProcessorAndPort[1], 
-								       processors, datalinks, doc);
-			    // New filter is applied to just this branch from a service for the moment...record the filter
-			    // so that if same filter criteria are applied to more than one branch, we only make one filter
-			    filter2Processor.put(dataSrcURLString+"\n"+data[2]+"\n"+data[3], regexProcessorAndPorts);
-			}
-			feedingProcessorName = regexProcessorAndPorts[0];
-			feedingProcessorPort = regexProcessorAndPorts[regexProcessorAndPorts.length-1];  //last one is output
+			regexProcessorAndPorts = createRegexFilter(data[2], 
+								   new XPathOption(data[3], data[4]),
+								   Boolean.parseBoolean(data[5]),
+								   origFeederProcessorAndPort[0], 
+								   origFeederProcessorAndPort[1],
+								   1, // desired list depth 
+								   processors, datalinks, doc);
+			// New filter is applied to just this branch from a service for the moment...record the filter
+			// so that if same filter criteria are applied to more than one branch, we only make one filter
+			filter2Processor.put(filterKey, regexProcessorAndPorts);
 		    }
-
-		    // getPortFromURLRef() may inject extra processors between the processorName and feedingProcessorName
-		    // in order to maintain type safety, etc. so the data link may change (based on last arg being set to true).
-		    String[] feederProcessorAndPort = getPortFromURLRef(dataSrcURL,
-									sampleData,
-									feedingProcessorName,
-									feedingProcessorPort, //if null, determined from urlref
-									processors, 
-									datalinks,
-									doc,
-									true); 
-		    datalinks.appendChild(createDataLinkElement(feederProcessorAndPort[0], feederProcessorAndPort[1],
-								processorName, sinkPortName, 
+		    feedingProcessorName = regexProcessorAndPorts[0];
+		    feedingProcessorPort = regexProcessorAndPorts[regexProcessorAndPorts.length-1];  //last one is output
+		}
+		
+		// getPortFromURLRef() may inject extra processors between the processorName and feedingProcessorName
+		// in order to maintain type safety, etc. so the data link may change (based on last arg being set to true).
+		String[] feederProcessorAndPort = getPortFromURLRef(dataSrcURL,
+								    sampleData,
+								    feedingProcessorName,
+								    feedingProcessorPort, //if null, determined from urlref
+								    processors, 
+								    datalinks,
+								    doc,
+								    true); 
+		if(condPassProcessorAndPorts != null){
+		    // inject condition filter between feeder service and current service 
+		    datalinks.appendChild(createDataLinkElement(feederProcessorAndPort[0],
+								feederProcessorAndPort[1],
+								condPassProcessorAndPorts[0], 
+								condPassProcessorAndPorts[1],
 								doc));
-		    
+		    feederProcessorAndPort[0] = condPassProcessorAndPorts[0];
+		    feederProcessorAndPort[1] = condPassProcessorAndPorts[2];
 		}
+
+		datalinks.appendChild(createDataLinkElement(feederProcessorAndPort[0], feederProcessorAndPort[1],
+							    processorName, sinkPortName, 
+							    doc));
+		
 	    }
 	    // Otherwise it's a workflow input the user will need to specify
 	    else{
-		String uniqueInputName = createUniqueName(getInputName(sampleData));
-		// todo: sample data may be of more specific type than service requires
-		inputPorts.appendChild(createWorkflowInputElement(uniqueInputName, sampleData, doc));
-		if(sampleData.getDataType().getName().equals(MobyTags.MOBYOBJECT)){
-		    String mobifyingProcessorName = addIdMobifyingProcessor(processors, datalinks, 
-									    sampleData, processorName, 
-									    inputName, doc);
-		    datalinks.appendChild(createWorkflowInputLinkElement(uniqueInputName, 
-									 mobifyingProcessorName, 
-									 "fileurl", 
-									 sampleData, 
-									 doc));
+		// pName is editable name for input processor target...
+		// allows injection of condition without affecting method's return value
+		String pName = processorName; 
+		if(condPassProcessorAndPorts != null){
+		    // inject condition filter between data creator and current service 
+		    datalinks.appendChild(createDataLinkElement(condPassProcessorAndPorts[0],
+								condPassProcessorAndPorts[2],
+								pName,
+								sinkPortName,
+								doc));
+		    pName = condPassProcessorAndPorts[0];
+		    sinkPortName = condPassProcessorAndPorts[1];
+		}
+		// The sample data may be used in more than one branch of the flow...avoid duplication
+		// Get the real data XML, to see if we've encountered it before as input (it'd be in the hash)
+		// Remember that sd is the loop's original MobyDataInstance before it's been coerced into a MobyPrimaryData, 
+		// which doesn't have XML modes, etc.
+		String inputKey = getInputKey(sd);
+		System.err.println("Input key for " + processorName + " is " + inputKey);
+
+		String[] mobifyingProcessorNameAndPorts = null;
+		if(input2Processor.containsKey(inputKey)){
+		    mobifyingProcessorNameAndPorts = input2Processor.get(inputKey);		    
 		}
 		else{
-		    // TODO: Need to build complex input from MOB rule or spreadsheet fields?
+		    if(sampleData.getDataType().getName().equals(MobyTags.MOBYOBJECT)){
+			mobifyingProcessorNameAndPorts = addIdMobifyingProcessor(processors, datalinks, inputPorts,
+										 sampleData, doc);
+			input2Processor.put(inputKey, mobifyingProcessorNameAndPorts);
+		    }
+		    else{
+			// TODO: Need to build complex input from MOB rule or spreadsheet fields?
+		    }
 		}
+		// link the created data to the workflow service
+		datalinks.appendChild(createDataLinkElement(mobifyingProcessorNameAndPorts[0], 
+							    mobifyingProcessorNameAndPorts[1], 
+							    pName, sinkPortName, 
+							    doc));
 	    }
+
 	}
 
 	return processorName;
     }
 
+    // returns the same value for moby sample data instances containing the same data in XML form
+    private String getInputKey(MobyDataInstance sd){
+	int oldXmlMode = sd.getXmlMode();
+	sd.setXmlMode(MobyDataInstance.SERVICE_XML_MODE);
+	String inputKey = sd.toXML();
+	sd.setXmlMode(oldXmlMode);
+
+	// get rid of the top-level article name, as this is immaterial to the 
+	// value equivalence of objects (only matters to the actual service call mechanism)
+	return inputKey.replaceFirst(MobyTags.ARTICLENAME+"\\s*=\\s*(['\"]).*?\\1", "");
+    }
+
     // Actually adds a few processors, but only one needs to be returned and connected to an input port.
     // The others are constant values.  By default we assume that you want to run a bunch of IDs 
     // in a file, we also link in a Taverna spreadsheet importer.
-    private String addIdMobifyingProcessor(Element processors, Element datalinks, 
-					   MobyPrimaryData sampleData, String targetProcessorName, 
-					   String targetProcessorPort, Document doc) throws Exception{
+    // Returns [spreadsheetReadingProcName, outputPort]
+    private String[] addIdMobifyingProcessor(Element processors, Element datalinks, Element dataFlowInputPorts,
+					     MobyPrimaryData sampleData, Document doc) throws Exception{
 	String processorName = "Create-" + sampleData.getDataType().getName();
 	MobyNamespace ns = null;
 	MobyNamespace[] nss = ((MobyPrimaryData) sampleData).getNamespaces();
@@ -518,7 +600,7 @@
 	Map<String,String> feeds = new HashMap<String,String>();  //what port on data creator it maps to
 	constants.put(ns.getName()+"-namespace-constant", ns.getName());
 	feeds.put(ns.getName()+"-namespace-constant", "namespace");
-	constants.put("article_name-constant", targetProcessorPort);
+	constants.put("article_name-constant", "unimportant");
 	feeds.put("article_name-constant", "article name");
 
 	for(Map.Entry<String,String> constant: constants.entrySet()){
@@ -632,19 +714,23 @@
 	port.setAttribute("name", "fileurl");
 	port.setAttribute("depth", "0");
 
+	String uniqueInputName = createUniqueName(getInputName(sampleData));
+	// todo: sample data may be of more specific type than service requires
+	dataFlowInputPorts.appendChild(createWorkflowInputElement(uniqueInputName, sampleData, doc));
+	datalinks.appendChild(createWorkflowInputLinkElement(uniqueInputName, 
+							     importerName, 
+							     "fileurl", 
+							     sampleData, 
+							     doc));
+
 	// link the spreadsheet reader to the Moby data creator 
 	datalinks.appendChild(createDataLinkElement(importerName, outputUniqueName,
 						    processorName, "id",
 						    doc));
 
-	// link the created data to the first workflow service, which takes an id
-	datalinks.appendChild(createDataLinkElement(processorName, "mobyData", 
-						    targetProcessorName, "Object("+targetProcessorPort+")", 
-						    doc));
-
-	// return the name of the spreadsheet reading processor so it can be hooked up
-	// to an input port for a Moby service
-	return importerName;
+	// return the name of the spreadsheet reading processor and output port so it can be hooked up
+	// to input ports for Moby services
+	return new String[]{processorName, portNames[portNames.length-1]};
     }
 
     private Element createConstantProcessor(String constantName, String constantValue, Document doc)
@@ -884,15 +970,86 @@
 				 processors, datalinks, doc);
     }
 
-    private String[] createRegexFilter(String regex, XPathOption xpath, String srcProcessor, String srcPort, 
+    // Make the continued use of data X in the workflow conditional on the results of running X through service f
+    // and passing the filter condition set on f.  Equivalent to if(f(X) matches f_filter){...}
+    // returns [proc name, input port, output port]
+    private String[] createServiceConditionFilter(URL conditionURL, String filterRegex, XPathOption filterXPath,
+						  boolean caseSensitive, Document doc,
+						  Element inputPorts, Element processors, Element datalinks)
+	throws Exception{
+
+	// Open the conditionURL, and find out what service result is used, and what filter applied
+ 	Document conditionDoc = docBuilder.parse(conditionURL.openStream());
+ 	MobyService service = DataUtils.getService(conditionDoc);
+ 	Document condServiceOutputDoc = DataUtils.getInputDoc(conditionDoc);
+
+	// The bits to run the service "f"
+
+ 	String dataSrcURLString = conditionURL.toString().replaceFirst("#.*$", ""); //get rid of ref part
+ 	String processorName = null;
+ 	// See if the service creating the input for the conditional service has already been added to the workflow
+ 	if(url2Processor.containsKey(dataSrcURLString)){
+ 	    processorName = url2Processor.get(dataSrcURLString);
+ 	}
+ 	else{
+	    processorName = addWorkflowElements(conditionURL, doc, inputPorts, processors, datalinks);
+ 	}
+
+ 	String filterKey = dataSrcURLString+"\n"+filterRegex+"\n"+filterXPath.getXPath()+"\n"+caseSensitive;
+ 	String[] regexFilterProcNameAndPort = null;
+ 	//todo: gimpy loop below, as more than one service output would cause a trampling of regex output ports
+ 	for(MobyPrimaryData outputParam: service.getPrimaryOutputs()){
+ 	    // See if the filter on the conditional service has already been used in the workflow
+ 	    if(filter2Processor.containsKey(filterKey)){
+ 		regexFilterProcNameAndPort = filter2Processor.get(filterKey);
+ 	    }
+ 	    else{
+ 		regexFilterProcNameAndPort = createRegexFilter(filterRegex, 
+ 							       filterXPath,
+							       caseSensitive, 
+ 							       processorName,
+ 							       getPortName(outputParam, true),
+							       0, //depth of list desired (will return match count)
+ 							       processors, datalinks, doc);
+ 		filter2Processor.put(filterKey, regexFilterProcNameAndPort);
+ 	    }
+ 	}
+
+	// We need to flatten the 2-deep list generated by the regex filter's cross product
+	String[] beanShellFlattenerProcNameAndPorts = addListFlattenBeanShell(processors, doc);
+
+	datalinks.appendChild(createDataLinkElement(regexFilterProcNameAndPort[0],
+						    regexFilterProcNameAndPort[1],
+						    beanShellFlattenerProcNameAndPorts[0], 
+						    beanShellFlattenerProcNameAndPorts[1],
+						    doc));
+
+	// Essentially an "if" condition, error if incoming value is empty, missing, or zero
+	String[] beanShellFilterProcNameAndPorts = addPassFilterBeanShell(processors, doc);
+
+	datalinks.appendChild(createDataLinkElement(beanShellFlattenerProcNameAndPorts[0],
+						    beanShellFlattenerProcNameAndPorts[2],
+						    beanShellFilterProcNameAndPorts[0], 
+						    beanShellFilterProcNameAndPorts[2],
+						    doc));
+
+	return new String[]{beanShellFilterProcNameAndPorts[0], 
+                            beanShellFilterProcNameAndPorts[1], 
+                            beanShellFilterProcNameAndPorts[3]};
+    }
+
+    private String[] createRegexFilter(String regex, XPathOption xpath, boolean caseSensitive, 
+				       String srcProcessor, String srcPort, int listDepth, 
  				       Element processors, Element datalinks, Document doc)
  	throws Exception{
 	String constantXPathName = createUniqueName(xpath.toString());
 	processors.appendChild(createConstantProcessor(constantXPathName, xpath.getXPath(), doc));
 	String constantRegexName = createUniqueName(regex);
 	processors.appendChild(createConstantProcessor(constantRegexName, regex, doc));
+	String constantCaseSensitivityName = createUniqueName("cs_"+caseSensitive);
+	processors.appendChild(createConstantProcessor(constantCaseSensitivityName, ""+caseSensitive, doc));
 
-	String[] beanShellFilterProcNameAndPorts = addRegexFilterBeanShell(processors, doc);
+	String[] beanShellFilterProcNameAndPorts = addRegexFilterBeanShell(listDepth, processors, doc);
 	
 	datalinks.appendChild(createDataLinkElement(srcProcessor,
 						    srcPort,
@@ -903,11 +1060,15 @@
 						    beanShellFilterProcNameAndPorts[0], 
 						    beanShellFilterProcNameAndPorts[2],
 						    doc)); 
-	datalinks.appendChild(createDataLinkElement(constantXPathName, "value",
+	datalinks.appendChild(createDataLinkElement(constantCaseSensitivityName, "value",
 						    beanShellFilterProcNameAndPorts[0], 
 						    beanShellFilterProcNameAndPorts[3],
 						    doc)); 
-	return new String[]{beanShellFilterProcNameAndPorts[0], beanShellFilterProcNameAndPorts[4]};	
+	datalinks.appendChild(createDataLinkElement(constantXPathName, "value",
+						    beanShellFilterProcNameAndPorts[0], 
+						    beanShellFilterProcNameAndPorts[4],
+						    doc)); 
+	return new String[]{beanShellFilterProcNameAndPorts[0], beanShellFilterProcNameAndPorts[5]};	
     }
     
     private String[] createXrefParser(MobyNamespace nsObj, String srcProcessor, String srcPort, 
@@ -1231,29 +1392,76 @@
 	}
     }
 
+   private String[] addListFlattenBeanShell(Element processors, Document doc) 
+	throws Exception{
+	String beanShellProcName = createUniqueName("Create_Pass_Fail_List");
+	
+	// Now do all the param-specific stuff below (inserts into various parts of the elements defined above)
+	Map<String,String> inputsMap = new LinkedHashMap<String,String>();  
+	// linked because order is important to line up port connections
+	Map<String,String> inputTypes = new LinkedHashMap<String,String>();
+	inputsMap.put("inputlist", "2");
+	inputTypes.put("inputlist", "text/plain");
+	Map<String,String> outputsMap = new LinkedHashMap<String,String>();
+	outputsMap.put("outputlist", "1");
+
+	return addBeanShell(beanShellProcName, "dot",
+			    inputsMap, inputTypes, outputsMap, 
+			    getListFlattenScript(), new String[]{}, 
+			    processors, doc);	
+    }    
+
+    private String[] addPassFilterBeanShell(Element processors, Document doc) 
+	throws Exception{
+	String beanShellProcName = createUniqueName("IfPassesContentFilter");
+	
+	// Now do all the param-specific stuff below (inserts into various parts of the elements defined above)
+	Map<String,String> inputsMap = new LinkedHashMap<String,String>();  
+	// linked because order is important to line up port connections
+	Map<String,String> inputTypes = new LinkedHashMap<String,String>();
+	inputsMap.put("dataToPassThrough", "1");
+	inputsMap.put("conditionResults", "1");
+	inputTypes.put("dataToPassThrough", "text/xml");
+	inputTypes.put("conditionResults", "text/plain");
+	Map<String,String> outputsMap = new LinkedHashMap<String,String>();
+	outputsMap.put("dataPassed", "1");
+
+	return addBeanShell(beanShellProcName, "cross",
+			    inputsMap, inputTypes, outputsMap, 
+			    getPassFilterScript(), new String[]{}, 
+			    processors, doc);	
+    }    
+
     // Returns the name of the bean shell processor and its ports, so we can create the data links in the caller
     // The input is the original moby xml, and the xpath to extract, and the regex to apply to the text contents of the xpath results.
     // The output is a list of moby xml docs, one for each mobyData that fit the xpath and regex criteria.
-    private String[] addRegexFilterBeanShell(Element processors, Document doc) 
+    private String[] addRegexFilterBeanShell(int listDepth, Element processors, Document doc) 
 	throws Exception{
-	String beanShellProcName = createUniqueName("Filter_By_Content");
+	String beanShellProcName = createUniqueName("Filter"+(listDepth == 0 ? "_Match_Count" : "_By_Content"));
 	
 	// Now do all the param-specific stuff below (inserts into various parts of the elements defined above)
 	Map<String,String> inputsMap = new LinkedHashMap<String,String>();  
-	// linked because order is importtant to line up port connections
+	// linked because order is important to line up port connections
 	Map<String,String> inputTypes = new LinkedHashMap<String,String>();
 	inputsMap.put("xml_text", "0");
 	inputsMap.put("regex", "0");
+        inputsMap.put("case_sensitive", "0");
 	inputsMap.put("xpath", "0");
 	inputTypes.put("xml_text", "text/xml");
 	inputTypes.put("xpath", "text/plain");
 	inputTypes.put("regex", "text/plain");
+	inputTypes.put("case_sensitive", "text/plain");
 	Map<String,String> outputsMap = new LinkedHashMap<String,String>();
-	outputsMap.put("nodelistAsXML", "1");
+	if(listDepth == 0){
+	    outputsMap.put("matchCount", "0");
+        }
+        else{
+            outputsMap.put("nodelistAsXML", ""+listDepth);
+        }
 
-	return addBeanShell(beanShellProcName,
+	return addBeanShell(beanShellProcName, "cross",
 			    inputsMap, inputTypes, outputsMap, 
-			    getRegexFilterScript(), new String[]{"dom4j:dom4j:1.6"}, 
+			    getRegexFilterScript(listDepth), new String[]{"dom4j:dom4j:1.6"}, 
 			    processors, doc);	
     }
 
@@ -1274,13 +1482,13 @@
 	Map<String,String> outputsMap = new HashMap<String,String>();
 	outputsMap.put("nodelistAsXML", "1");
 
-	return addBeanShell(beanShellProcName,
+	return addBeanShell(beanShellProcName, "cross",
 			    inputsMap, inputTypes, outputsMap, 
 			    getXPathFilterScript(), new String[]{"dom4j:dom4j:1.6"}, 
 			    processors, doc);
     }
 
-    private String[] addBeanShell(String beanShellProcName,
+    private String[] addBeanShell(String beanShellProcName, String vectorComboOp,
 				  Map<String,String> inputsMap, Map<String,String> inputTypes, 
 				  Map<String,String> outputsMap, String script, String[] dependencySpecs,
 				  Element processors, Document doc) throws Exception{
@@ -1345,8 +1553,11 @@
 	iterationStrategyStack.appendChild(iteration);
 	Element strategy = doc.createElementNS(T2FLOW_NS, "strategy");
 	iteration.appendChild(strategy);
-	Element cross = doc.createElementNS(T2FLOW_NS, "cross");
-	strategy.appendChild(cross);
+	Element vectorOp = null;
+        if(vectorComboOp != null){
+            vectorOp = doc.createElementNS(T2FLOW_NS, vectorComboOp);
+	    strategy.appendChild(vectorOp); // either cross or dot
+        }
 
 	// Lst processor name, input ports and output ports, in that order.
 	Vector<String> returnSpec = new Vector<String>();
@@ -1370,7 +1581,9 @@
 	    mimeTypes.appendChild(createElWithText(doc, "", "string", inputTypes.get(input.getKey())));
 
 	    Element port = doc.createElementNS(T2FLOW_NS, "port");
-	    cross.appendChild(port);
+	    if(vectorOp != null){
+                vectorOp.appendChild(port);
+            }
 	    port.setAttribute("depth", input.getValue());
 	    port.setAttribute("name", input.getKey());
 	}
@@ -1517,7 +1730,7 @@
     private String createUniqueName(String preferredName){
 	if(namesUsed.containsKey(preferredName)){
 	    namesUsed.put(preferredName, namesUsed.get(preferredName).intValue()+1); //increment
-	    preferredName += namesUsed.get(preferredName);
+	    preferredName += "_"+namesUsed.get(preferredName);
 	}
 	else{
 	    namesUsed.put(preferredName, 1); // will be auto-boxed to Integer
@@ -1528,8 +1741,6 @@
     // dataType(articleName) as required by the Taverna Moby plugin
     private String getPortName(MobyPrimaryData data, boolean asCollection){
 	if(data instanceof MobyPrimaryDataSet){
-	    // todo: Hmm...don't yet handle case where collections are passed from one service to another...
-	    // here we assume iterating over the collection
 	    if(asCollection){
 		return data.getDataType().getName()+"(Collection - '"+data.getName()+"')";
 	    }
@@ -1680,7 +1891,7 @@
 
 	    // Did we override the default value for the secondary param in the example input?
 	    Element stringValue = doc.createElementNS("", "string");
-	    if(sampleInput.containsKey(secParam.getName())){
+	    if(sampleInput != null && sampleInput.containsKey(secParam.getName())){
 		stringValue.appendChild(doc.createTextNode(((MobyDataSecondaryInstance) sampleInput.get(secParam.getName())).getValue()));
 	    }
 	    else{  // use the default defined in Moby Central
@@ -1739,6 +1950,28 @@
 	return central;
     }
 
+    private synchronized String getListFlattenScript() throws Exception{
+	if(listFlattenScript == null){
+	    URL scriptURL = getClass().getClassLoader().getResource(T2FLOW_LISTFLATTEN_BEANSHELL);
+	    if(scriptURL == null){
+		throw new Exception("Cannot find resource " + T2FLOW_LISTFLATTEN_BEANSHELL);
+	    }
+	    listFlattenScript = HTMLUtils.getURLContents(scriptURL);
+	}
+	return listFlattenScript;
+    }
+
+    private synchronized String getPassFilterScript() throws Exception{
+	if(passFilterScript == null){
+	    URL scriptURL = getClass().getClassLoader().getResource(T2FLOW_PASSFILTER_BEANSHELL);
+	    if(scriptURL == null){
+		throw new Exception("Cannot find resource " + T2FLOW_PASSFILTER_BEANSHELL);
+	    }
+	    passFilterScript = HTMLUtils.getURLContents(scriptURL);
+	}
+	return passFilterScript;
+    }
+
     private synchronized String getXPathFilterScript() throws Exception{
 	if(xpathFilterScript == null){
 	    URL scriptURL = getClass().getClassLoader().getResource(T2FLOW_XPATHFILTER_BEANSHELL);
@@ -1750,7 +1983,7 @@
 	return xpathFilterScript;
     }
 
-    private synchronized String getRegexFilterScript() throws Exception{
+    private synchronized String getRegexFilterScript(int listDepth) throws Exception{
 	if(regexFilterScript == null){
 	    URL scriptURL = getClass().getClassLoader().getResource(T2FLOW_REGEXFILTER_BEANSHELL);
 	    if(scriptURL == null){
@@ -1758,7 +1991,12 @@
 	    }
 	    regexFilterScript = HTMLUtils.getURLContents(scriptURL);
 	}
-	return regexFilterScript;
+	if(listDepth == 0){// list to scalar conversion
+	    return regexFilterScript+"\nString matchCount = \"\"+nodelistAsXML.size();";
+        }
+        else{
+ 	    return regexFilterScript;
+        }
     }
 
     private synchronized Element getDispatchStack(Document newOwnerDoc) throws Exception{




More information about the MOBY-guts mailing list