[MOBY-guts] biomoby commit

Paul Gordon gordonp at dev.open-bio.org
Mon Mar 12 14:37:24 UTC 2007


gordonp
Mon Mar 12 10:37:24 EDT 2007
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/services
In directory dev.open-bio.org:/tmp/cvs-serv3415/src/main/ca/ucalgary/seahawk/services

Modified Files:
	MobyClient.java 
Log Message:
Added several functions to parse multi-part data and binary data (because ACDService has these).  Also added processing attriubutes to member fields for whitespace stripping and Base64 encoding.
moby-live/Java/src/main/ca/ucalgary/seahawk/services MobyClient.java,1.5,1.6
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/services/MobyClient.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/services/MobyClient.java	2007/02/08 17:05:11	1.5
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/services/MobyClient.java	2007/03/12 14:37:24	1.6
@@ -35,8 +35,7 @@
  * <a href="http://biomoby.open-bio.org/CVS_CONTENT/moby-live/Java/docs/seahawkRules.html">described 
  * here</a>. Used in Seahawk to provide the service options popup items.
  */
-public class MobyClient 
-{
+public class MobyClient{
     public static final String DATA_MAPPING_XML_RESOURCE = "ca/ucalgary/seahawk/resources/mobyBuilderRules.xml";
     public static final String RESOURCE_SYSTEM_PROPERTY = "seahawk.rules";
     public static final String RULE_SET_TAG = "object";
@@ -52,12 +51,20 @@
     public static final String URL_REGEX_TAG = "url_regex";
     public static final String REGEX_TAG = "regex";
     public static final String XPATH_TAG = "xpath";
+    public static final String WHITESPACE_ATTR = "whitespace";
+    public static final String WHITESPACE_ATTR_STRIP_VAL = "strip";
+    public static final String WHITESPACE_ATTR_NORMALIZE_VAL = "normalize";
+    public static final String WHITESPACE_ATTR_KEEP_VAL = "keep";
+    public static final String ENCODING_ATTR = "encoding";
+    public static final String ENCODING_ATTR_BASE64_VAL = "Base64";
+    public static final String ENCODING_ATTR_NONE_VAL = "none";
+    public static final String SINGLE_RETURNED_VALUE_KEY = "_no_acd_param_should_have_this_name";
 
     private NamespaceContextImpl nsContext;
     private CentralImpl c;
     private HashMap xpathMap; 
     private HashMap urlRegexMap; 
-    private HashMap regexMap; 
+    private Map<Pattern,MobyComplexBuilder> regexMap; 
     private URL dataMappingXMLURL;
     private DocumentBuilder docBuilder;
     private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(MobyClient.class);
@@ -68,7 +75,7 @@
         c = new CentralCachedCallsImpl();
 	xpathMap = new HashMap();
 	urlRegexMap = new HashMap();
-	regexMap = new HashMap();
+	regexMap = new HashMap<Pattern,MobyComplexBuilder>();
 	nsContext = new NamespaceContextImpl();
 
  	DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
@@ -88,7 +95,12 @@
         if(rulesResource == null){
 	  dataMappingXMLURL = cl.getResource(DATA_MAPPING_XML_RESOURCE);
         }
-        else{
+	
+	// See if it's a URL
+	try{
+	    dataMappingXMLURL = new URL(rulesResource);
+	}
+        catch(Exception e){
           dataMappingXMLURL = cl.getResource(rulesResource);
         }
 	if(dataMappingXMLURL == null){
@@ -376,6 +388,30 @@
 				"have a non-blank " + DATATYPE_RULE_ATTR + 
 				" attribute as required");
 	}
+	String memberWhitespaceSetting = memTag.getAttribute(WHITESPACE_ATTR);
+	if(memberWhitespaceSetting == null || memberWhitespaceSetting.length() == 0){
+	    memberWhitespaceSetting = WHITESPACE_ATTR_KEEP_VAL;  // default is to keep whitespace
+	}
+	else if(!memberWhitespaceSetting.equals(WHITESPACE_ATTR_KEEP_VAL) &&
+		!memberWhitespaceSetting.equals(WHITESPACE_ATTR_NORMALIZE_VAL) &&
+		!memberWhitespaceSetting.equals(WHITESPACE_ATTR_STRIP_VAL)){
+	    System.err.println("Object member " + memberNameKey + 
+			       " has an unrecognized value for the " + WHITESPACE_ATTR +
+			       " attribute, overriding with default of " + WHITESPACE_ATTR_KEEP_VAL);
+	    memberWhitespaceSetting = WHITESPACE_ATTR_KEEP_VAL;
+	}
+	String memberEncodingSetting = memTag.getAttribute(ENCODING_ATTR);
+	if(memberEncodingSetting == null || memberEncodingSetting.length() == 0){
+	    memberEncodingSetting = ENCODING_ATTR_NONE_VAL;  // default is to not encode
+	}
+	else if(!memberEncodingSetting.equals(ENCODING_ATTR_NONE_VAL) &&
+		!memberEncodingSetting.equals(ENCODING_ATTR_BASE64_VAL)){
+	    System.err.println("Object member " + memberNameKey + 
+			       " has an unrecognized value for the " + ENCODING_ATTR +
+			       " attribute, overriding with default of " + ENCODING_ATTR_NONE_VAL);
+	    memberWhitespaceSetting = ENCODING_ATTR_NONE_VAL;
+	}
+
 	if(membersMap.containsKey(memberNameKey)){
 	    System.err.println("Object member " + memberNameKey + " already exists, ignoring new definition");
 	    return;
@@ -385,7 +421,9 @@
 	if(ruleValue == null || ruleValue.length() == 0){
 	    System.err.println("Object member " + memberNameKey + " has a blank value rule");
 	}
-	membersMap.put(memberNameKey, ruleValue);
+	membersMap.put(memberNameKey, new String[]{ruleValue, 
+						   memberWhitespaceSetting, 
+						   memberEncodingSetting});
     }
 
     protected void addNamespaceMapping(Element nsTag, Map namespaceStrings) throws Exception{
@@ -458,9 +496,206 @@
 
     /**
      * Using the regular expression mappings that have been set up, 
+     * maps a string using rules producing moby objects of the given type.
+     */
+    public MobyDataObject[] getMobyObjects(String textData, MobyDataType targetDataType){
+	Vector<MobyDataObject> objectVector = new Vector<MobyDataObject>();
+
+	for(Pattern pattern: regexMap.keySet()){
+	    MobyComplexBuilder rule = regexMap.get(pattern);
+	    if(rule.getDataType().inheritsFrom(targetDataType)){
+		Matcher matcher = pattern.matcher(textData);
+
+		while(matcher.find()){
+		    try{
+			MobyDataObject mobyObj = rule.apply(matcher);
+			if(mobyObj != null){
+			    objectVector.add(mobyObj);		
+			}
+		    }
+		    catch(MobyException me){
+			System.err.println("Could not build Moby object from match:" + me);
+			me.printStackTrace();
+		    }
+		}
+	    }
+	}
+
+	return (MobyDataObject[]) objectVector.toArray(new MobyDataObject[objectVector.size()]);	
+    }
+
+    /**
+     * Create a MOBY data instance from a map of name->bytes[] by applying the rules of the given MobyClient.
+     * The creation of a single object or collection is done according to the dataTemplate provided.
+     *
+     * @throws MobyServiceException NOTE: these are just warning level exceptions about collection-simple casting that you can add to the service response, or ignore if you like
+     */
+    public MobyDataInstance getMobyObject(Map<String, byte[]> resultParts, MobyPrimaryData dataTemplate) 
+	throws Exception, MobyServiceException{
+	Vector<MobyDataObject> results = new Vector<MobyDataObject>();
+	// The output parameter *potentially* has multiple parts 
+	// (e.g. multiple .png file from EMBOSS's "banana" program)
+	// hence the nested for loops (part iteration, followed by created-object iteration)
+	for(String resultPartName: resultParts.keySet()){
+	    for(MobyDataObject resultPart: getMobyObjects(resultParts.get(resultPartName), dataTemplate.getDataType())){
+		// Maintain the name for the object in the collection, as banana.1.png, banana.2.png, etc.
+		// is useful information for the end-user for making sense of them together! (left to right)
+		if(!resultPartName.equals(SINGLE_RETURNED_VALUE_KEY)){  // except for trivial single-byte-array case
+		    resultPart.setName(resultPartName);
+		}
+		results.add(resultPart);
+	    }
+	}
+
+	if(results.size() == 0){
+	    if(dataTemplate instanceof MobyPrimaryDataSimple){
+		if(resultParts != null && resultParts.size() > 0){
+		    // Is it a single-byte-array response?
+		    if(resultParts.containsKey(SINGLE_RETURNED_VALUE_KEY)){
+			if(resultParts.get(SINGLE_RETURNED_VALUE_KEY).length != 0){
+			    throw new MobyServiceException(MobyServiceException.WARNING, 
+							   MobyServiceException.INTERNAL_PROCESSING_ERROR, 
+							   null, 
+							   dataTemplate.getName(),
+							   "The non-blank data provided " +
+							   "did not match any MOBY Object rules, " +
+							   "therefore a blank response is being returned.  Contact " +
+							   " the service provider to fix the MOBY Object rules.");
+			}
+		    }
+		    // Otherwise it's a multi-part result
+		    else{
+			for(String partName: resultParts.keySet()){
+			    if(resultParts.get(partName).length != 0){
+				throw new MobyServiceException(MobyServiceException.WARNING, 
+							       MobyServiceException.INTERNAL_PROCESSING_ERROR, 
+							       null, 
+							       dataTemplate.getName(),
+							       "The non-blank data provided (" + partName + 
+							       ") did not match any MOBY Object rules, " +
+							       "therefore a blank response is being returned.  Contact " +
+							       " the service provider to fix the MOBY Object rules.");
+			    }
+			}
+		    }  //end multi-part result
+		}  //end if some results present
+		return null; //nothing to report
+	    }
+	    else{  // Empty set
+		return new MobyDataObjectSet(dataTemplate.getName());		    
+	    }
+	}
+	else if(results.size() > 1){
+	    if(dataTemplate instanceof MobyPrimaryDataSimple){
+		System.err.println("The data map provided " +
+				   "was a collection, but the request was to return a simple. " +
+				   "Only the first value in the collection has been returned.");
+		// TODO: should we instead return the one deepest in the heirarchy (or with the most members)?
+		return results.elementAt(0);
+	    }
+	    else{
+		MobyDataObjectSet resultSet = new MobyDataObjectSet(dataTemplate.getName());
+		resultSet.addAll(results);
+		return resultSet;
+	    }
+	}
+	// One result
+	else{
+	    if(dataTemplate instanceof MobyPrimaryDataSimple){
+		return results.elementAt(0);
+	    }
+	    else{  // Collection of 1
+		MobyDataObjectSet resultSet = new MobyDataObjectSet(dataTemplate.getName());
+		resultSet.add(results.elementAt(0));
+		return resultSet;
+	    }
+	}
+    }
+
+    /**
+     * Using the regular expression mappings that have been set up, 
+     * maps a string using rules producing moby objects of the given type.
+     * Particularly, the regex is <b>assumed to include only ASCII characters at first</b>,
+     * and the byte-translation of it is checked against the raw data's bytes.
+     * If no rules match, the raw data is converted to a String, and the regex
+     * are tried as normal...
+     * 
+     * The net effect is that you specify magic signatures for file types as regular expression rules,
+     * and calling this method will match up the file type by byte-wise comparing the data and pattern.
+     */
+    public MobyDataObject[] getMobyObjects(byte[] rawData, MobyDataType targetDataType){
+	// Only do the magic check if the target data type inherits from MOBY's base64 class (i.e. might encode binary data)
+	if(!targetDataType.inheritsFrom(MobyDataType.getDataType(MobyDataBytes.BASE64_DATATYPE))){
+	    return getMobyObjects(new String(rawData));
+	}
+
+	String rawDataAsString = null;
+
+	Vector<MobyDataObject> objectVector = new Vector<MobyDataObject>();
+
+	for(Pattern pattern: regexMap.keySet()){
+	    MobyComplexBuilder rule = regexMap.get(pattern);
+	    if(rule.getDataType().inheritsFrom(targetDataType)){
+		// Only build the string representation of the byte array if we
+		// found a rule that applies (since it's a somewhat expensive operation
+		// to do the conversion)
+		if(rawDataAsString == null){
+		    rawDataAsString = bytesToString(rawData);
+		    System.err.println("Start of data is " + rawDataAsString.substring(0, 4));
+		}
+		Matcher matcher = pattern.matcher(rawDataAsString);
+
+		while(matcher.find()){
+		    System.err.println("Found match for binary data");
+		    try{
+			MobyDataObject mobyObj = rule.apply(matcher, rawData);
+			if(mobyObj != null){
+			    objectVector.add(mobyObj);		
+			}
+		    }
+		    catch(MobyException me){
+			System.err.println("Could not build Moby object from match:" + me);
+			me.printStackTrace();
+		    }
+		}
+	    }
+	}
+
+	if(objectVector.size() != 0){
+	    return (MobyDataObject[]) objectVector.toArray(new MobyDataObject[objectVector.size()]);	
+	}
+	// Didn't find anything in the raw form, but there was a rule that applies to the data type, so try as a string...
+	//else if(rawDataAsString != null){
+	//    return getMobyObjects(new String(rawData));
+	//}
+	else{
+	    return new MobyDataObject[0];
+	}
+    }
+
+    //Pad out the bytes (8-bit) into chars (16-bits), for regex checking of the data
+    private String bytesToString(byte[] bytes){
+//       	return new String(bytes);
+ 	StringBuffer stringBuffer = new StringBuffer(bytes.length);
+
+ 	for(int i = 0; i < bytes.length; i++){
+ 	    // Casting byte to char pads it out with a byte e.g. 0x34 become 0x0034 (Unicode 16-bit)
+ 	    // Do this because regex use unicode, not ASCII
+ 	    stringBuffer.append((char) bytes[i]);
+ 	}
+	
+ 	return stringBuffer.toString();
+    }
+
+    /**
+     * Using the regular expression mappings that have been set up, 
      * maps a string to moby objects.
      */
     public MobyDataObject[] getMobyObjects(String textData){
+	return getMobyObjects(textData, (byte[]) null);
+    }
+
+    public MobyDataObject[] getMobyObjects(String textData, byte[] bytes){
 	if(regexMap.isEmpty()){
 	    System.out.println("The MOBY Client has not been provided any regex->moby data mappings!");
 	    return new MobyDataObject[0];
@@ -478,7 +713,7 @@
 		MobyComplexBuilder rule = (MobyComplexBuilder) regexMap.get(pattern);
 
 		try{
-		    MobyDataObject mobyObj = rule.apply(matcher);
+		    MobyDataObject mobyObj = rule.apply(matcher, bytes);
 		    if(mobyObj != null){
 			objectVector.add(mobyObj);		
 		    }
@@ -519,7 +754,7 @@
 	    Matcher matcher = pattern.matcher(urlString);
 
 	    while(matcher.find()){
-		MobyComplexBuilder rule = (MobyComplexBuilder) regexMap.get(pattern);
+		MobyComplexBuilder rule = (MobyComplexBuilder) urlRegexMap.get(pattern);
 
 		try{
 		    MobyDataObject mobyObj = rule.apply(matcher);
@@ -771,9 +1006,10 @@
     }
 
     public void addRegexMapping(String regexp, Map nsRules, String mobyDataType, Map membersMap){ //mobyObj<--mobyNamespaces
-	//System.out.println("regex addMapping: " + regexp);
 	try{
-	    Pattern pattern = Pattern.compile(processRegExp(regexp));	
+	    // Pattern.DOTALL to allow ".*" to span multiple lines, also allow comments (# to EOL) and whitespace
+	    // for better readability in the rules file.
+	    Pattern pattern = Pattern.compile(processRegExp(regexp), Pattern.DOTALL | Pattern.COMMENTS);	
 
 	    // Base object
 	    if(mobyDataType == null || mobyDataType.length() == 0){
@@ -873,6 +1109,7 @@
     public void clear(){
 	xpathMap.clear();
 	regexMap.clear();
+	(new Exception()).printStackTrace();
     }
 
     public void clearXPaths(){
@@ -881,6 +1118,7 @@
 
     public void clearRegexs(){
 	regexMap.clear();
+	(new Exception()).printStackTrace();
     }
 
     public void clearURLRegexs(){
@@ -899,5 +1137,19 @@
 	}
 	//System.out.println("the number of key-value mappings in this map: " + map.size());
     }
+
+    /**
+     * Indicates whether at least one production rule exists for the data type or one of its children.
+     */
+    public boolean canProduceDataTypeFromString(MobyDataType targetDataType){
+	//System.err.println("The rules database has " + regexMap.size() + " entries");
+	for(MobyComplexBuilder rule: regexMap.values()){
+	    //System.err.println("Checking regex rule " + rule);
+	    if(rule.getDataType().inheritsFrom(targetDataType)){
+		return true;
+	    }
+	}
+	return false;
+    }
 }
 




More information about the MOBY-guts mailing list