[MOBY-guts] biomoby commit
Paul Gordon
gordonp at dev.open-bio.org
Mon Mar 12 14:37:24 UTC 2007
gordonp
Mon Mar 12 10:37:24 EDT 2007
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/services
In directory dev.open-bio.org:/tmp/cvs-serv3415/src/main/ca/ucalgary/seahawk/services
Modified Files:
MobyClient.java
Log Message:
Added several functions to parse multi-part data and binary data (because ACDService has these). Also added processing attriubutes to member fields for whitespace stripping and Base64 encoding.
moby-live/Java/src/main/ca/ucalgary/seahawk/services MobyClient.java,1.5,1.6
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/services/MobyClient.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/services/MobyClient.java 2007/02/08 17:05:11 1.5
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/services/MobyClient.java 2007/03/12 14:37:24 1.6
@@ -35,8 +35,7 @@
* <a href="http://biomoby.open-bio.org/CVS_CONTENT/moby-live/Java/docs/seahawkRules.html">described
* here</a>. Used in Seahawk to provide the service options popup items.
*/
-public class MobyClient
-{
+public class MobyClient{
public static final String DATA_MAPPING_XML_RESOURCE = "ca/ucalgary/seahawk/resources/mobyBuilderRules.xml";
public static final String RESOURCE_SYSTEM_PROPERTY = "seahawk.rules";
public static final String RULE_SET_TAG = "object";
@@ -52,12 +51,20 @@
public static final String URL_REGEX_TAG = "url_regex";
public static final String REGEX_TAG = "regex";
public static final String XPATH_TAG = "xpath";
+ public static final String WHITESPACE_ATTR = "whitespace";
+ public static final String WHITESPACE_ATTR_STRIP_VAL = "strip";
+ public static final String WHITESPACE_ATTR_NORMALIZE_VAL = "normalize";
+ public static final String WHITESPACE_ATTR_KEEP_VAL = "keep";
+ public static final String ENCODING_ATTR = "encoding";
+ public static final String ENCODING_ATTR_BASE64_VAL = "Base64";
+ public static final String ENCODING_ATTR_NONE_VAL = "none";
+ public static final String SINGLE_RETURNED_VALUE_KEY = "_no_acd_param_should_have_this_name";
private NamespaceContextImpl nsContext;
private CentralImpl c;
private HashMap xpathMap;
private HashMap urlRegexMap;
- private HashMap regexMap;
+ private Map<Pattern,MobyComplexBuilder> regexMap;
private URL dataMappingXMLURL;
private DocumentBuilder docBuilder;
private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(MobyClient.class);
@@ -68,7 +75,7 @@
c = new CentralCachedCallsImpl();
xpathMap = new HashMap();
urlRegexMap = new HashMap();
- regexMap = new HashMap();
+ regexMap = new HashMap<Pattern,MobyComplexBuilder>();
nsContext = new NamespaceContextImpl();
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
@@ -88,7 +95,12 @@
if(rulesResource == null){
dataMappingXMLURL = cl.getResource(DATA_MAPPING_XML_RESOURCE);
}
- else{
+
+ // See if it's a URL
+ try{
+ dataMappingXMLURL = new URL(rulesResource);
+ }
+ catch(Exception e){
dataMappingXMLURL = cl.getResource(rulesResource);
}
if(dataMappingXMLURL == null){
@@ -376,6 +388,30 @@
"have a non-blank " + DATATYPE_RULE_ATTR +
" attribute as required");
}
+ String memberWhitespaceSetting = memTag.getAttribute(WHITESPACE_ATTR);
+ if(memberWhitespaceSetting == null || memberWhitespaceSetting.length() == 0){
+ memberWhitespaceSetting = WHITESPACE_ATTR_KEEP_VAL; // default is to keep whitespace
+ }
+ else if(!memberWhitespaceSetting.equals(WHITESPACE_ATTR_KEEP_VAL) &&
+ !memberWhitespaceSetting.equals(WHITESPACE_ATTR_NORMALIZE_VAL) &&
+ !memberWhitespaceSetting.equals(WHITESPACE_ATTR_STRIP_VAL)){
+ System.err.println("Object member " + memberNameKey +
+ " has an unrecognized value for the " + WHITESPACE_ATTR +
+ " attribute, overriding with default of " + WHITESPACE_ATTR_KEEP_VAL);
+ memberWhitespaceSetting = WHITESPACE_ATTR_KEEP_VAL;
+ }
+ String memberEncodingSetting = memTag.getAttribute(ENCODING_ATTR);
+ if(memberEncodingSetting == null || memberEncodingSetting.length() == 0){
+ memberEncodingSetting = ENCODING_ATTR_NONE_VAL; // default is to not encode
+ }
+ else if(!memberEncodingSetting.equals(ENCODING_ATTR_NONE_VAL) &&
+ !memberEncodingSetting.equals(ENCODING_ATTR_BASE64_VAL)){
+ System.err.println("Object member " + memberNameKey +
+ " has an unrecognized value for the " + ENCODING_ATTR +
+ " attribute, overriding with default of " + ENCODING_ATTR_NONE_VAL);
+ memberWhitespaceSetting = ENCODING_ATTR_NONE_VAL;
+ }
+
if(membersMap.containsKey(memberNameKey)){
System.err.println("Object member " + memberNameKey + " already exists, ignoring new definition");
return;
@@ -385,7 +421,9 @@
if(ruleValue == null || ruleValue.length() == 0){
System.err.println("Object member " + memberNameKey + " has a blank value rule");
}
- membersMap.put(memberNameKey, ruleValue);
+ membersMap.put(memberNameKey, new String[]{ruleValue,
+ memberWhitespaceSetting,
+ memberEncodingSetting});
}
protected void addNamespaceMapping(Element nsTag, Map namespaceStrings) throws Exception{
@@ -458,9 +496,206 @@
/**
* Using the regular expression mappings that have been set up,
+ * maps a string using rules producing moby objects of the given type.
+ */
+ public MobyDataObject[] getMobyObjects(String textData, MobyDataType targetDataType){
+ Vector<MobyDataObject> objectVector = new Vector<MobyDataObject>();
+
+ for(Pattern pattern: regexMap.keySet()){
+ MobyComplexBuilder rule = regexMap.get(pattern);
+ if(rule.getDataType().inheritsFrom(targetDataType)){
+ Matcher matcher = pattern.matcher(textData);
+
+ while(matcher.find()){
+ try{
+ MobyDataObject mobyObj = rule.apply(matcher);
+ if(mobyObj != null){
+ objectVector.add(mobyObj);
+ }
+ }
+ catch(MobyException me){
+ System.err.println("Could not build Moby object from match:" + me);
+ me.printStackTrace();
+ }
+ }
+ }
+ }
+
+ return (MobyDataObject[]) objectVector.toArray(new MobyDataObject[objectVector.size()]);
+ }
+
+ /**
+ * Create a MOBY data instance from a map of name->bytes[] by applying the rules of the given MobyClient.
+ * The creation of a single object or collection is done according to the dataTemplate provided.
+ *
+ * @throws MobyServiceException NOTE: these are just warning level exceptions about collection-simple casting that you can add to the service response, or ignore if you like
+ */
+ public MobyDataInstance getMobyObject(Map<String, byte[]> resultParts, MobyPrimaryData dataTemplate)
+ throws Exception, MobyServiceException{
+ Vector<MobyDataObject> results = new Vector<MobyDataObject>();
+ // The output parameter *potentially* has multiple parts
+ // (e.g. multiple .png file from EMBOSS's "banana" program)
+ // hence the nested for loops (part iteration, followed by created-object iteration)
+ for(String resultPartName: resultParts.keySet()){
+ for(MobyDataObject resultPart: getMobyObjects(resultParts.get(resultPartName), dataTemplate.getDataType())){
+ // Maintain the name for the object in the collection, as banana.1.png, banana.2.png, etc.
+ // is useful information for the end-user for making sense of them together! (left to right)
+ if(!resultPartName.equals(SINGLE_RETURNED_VALUE_KEY)){ // except for trivial single-byte-array case
+ resultPart.setName(resultPartName);
+ }
+ results.add(resultPart);
+ }
+ }
+
+ if(results.size() == 0){
+ if(dataTemplate instanceof MobyPrimaryDataSimple){
+ if(resultParts != null && resultParts.size() > 0){
+ // Is it a single-byte-array response?
+ if(resultParts.containsKey(SINGLE_RETURNED_VALUE_KEY)){
+ if(resultParts.get(SINGLE_RETURNED_VALUE_KEY).length != 0){
+ throw new MobyServiceException(MobyServiceException.WARNING,
+ MobyServiceException.INTERNAL_PROCESSING_ERROR,
+ null,
+ dataTemplate.getName(),
+ "The non-blank data provided " +
+ "did not match any MOBY Object rules, " +
+ "therefore a blank response is being returned. Contact " +
+ " the service provider to fix the MOBY Object rules.");
+ }
+ }
+ // Otherwise it's a multi-part result
+ else{
+ for(String partName: resultParts.keySet()){
+ if(resultParts.get(partName).length != 0){
+ throw new MobyServiceException(MobyServiceException.WARNING,
+ MobyServiceException.INTERNAL_PROCESSING_ERROR,
+ null,
+ dataTemplate.getName(),
+ "The non-blank data provided (" + partName +
+ ") did not match any MOBY Object rules, " +
+ "therefore a blank response is being returned. Contact " +
+ " the service provider to fix the MOBY Object rules.");
+ }
+ }
+ } //end multi-part result
+ } //end if some results present
+ return null; //nothing to report
+ }
+ else{ // Empty set
+ return new MobyDataObjectSet(dataTemplate.getName());
+ }
+ }
+ else if(results.size() > 1){
+ if(dataTemplate instanceof MobyPrimaryDataSimple){
+ System.err.println("The data map provided " +
+ "was a collection, but the request was to return a simple. " +
+ "Only the first value in the collection has been returned.");
+ // TODO: should we instead return the one deepest in the heirarchy (or with the most members)?
+ return results.elementAt(0);
+ }
+ else{
+ MobyDataObjectSet resultSet = new MobyDataObjectSet(dataTemplate.getName());
+ resultSet.addAll(results);
+ return resultSet;
+ }
+ }
+ // One result
+ else{
+ if(dataTemplate instanceof MobyPrimaryDataSimple){
+ return results.elementAt(0);
+ }
+ else{ // Collection of 1
+ MobyDataObjectSet resultSet = new MobyDataObjectSet(dataTemplate.getName());
+ resultSet.add(results.elementAt(0));
+ return resultSet;
+ }
+ }
+ }
+
+ /**
+ * Using the regular expression mappings that have been set up,
+ * maps a string using rules producing moby objects of the given type.
+ * Particularly, the regex is <b>assumed to include only ASCII characters at first</b>,
+ * and the byte-translation of it is checked against the raw data's bytes.
+ * If no rules match, the raw data is converted to a String, and the regex
+ * are tried as normal...
+ *
+ * The net effect is that you specify magic signatures for file types as regular expression rules,
+ * and calling this method will match up the file type by byte-wise comparing the data and pattern.
+ */
+ public MobyDataObject[] getMobyObjects(byte[] rawData, MobyDataType targetDataType){
+ // Only do the magic check if the target data type inherits from MOBY's base64 class (i.e. might encode binary data)
+ if(!targetDataType.inheritsFrom(MobyDataType.getDataType(MobyDataBytes.BASE64_DATATYPE))){
+ return getMobyObjects(new String(rawData));
+ }
+
+ String rawDataAsString = null;
+
+ Vector<MobyDataObject> objectVector = new Vector<MobyDataObject>();
+
+ for(Pattern pattern: regexMap.keySet()){
+ MobyComplexBuilder rule = regexMap.get(pattern);
+ if(rule.getDataType().inheritsFrom(targetDataType)){
+ // Only build the string representation of the byte array if we
+ // found a rule that applies (since it's a somewhat expensive operation
+ // to do the conversion)
+ if(rawDataAsString == null){
+ rawDataAsString = bytesToString(rawData);
+ System.err.println("Start of data is " + rawDataAsString.substring(0, 4));
+ }
+ Matcher matcher = pattern.matcher(rawDataAsString);
+
+ while(matcher.find()){
+ System.err.println("Found match for binary data");
+ try{
+ MobyDataObject mobyObj = rule.apply(matcher, rawData);
+ if(mobyObj != null){
+ objectVector.add(mobyObj);
+ }
+ }
+ catch(MobyException me){
+ System.err.println("Could not build Moby object from match:" + me);
+ me.printStackTrace();
+ }
+ }
+ }
+ }
+
+ if(objectVector.size() != 0){
+ return (MobyDataObject[]) objectVector.toArray(new MobyDataObject[objectVector.size()]);
+ }
+ // Didn't find anything in the raw form, but there was a rule that applies to the data type, so try as a string...
+ //else if(rawDataAsString != null){
+ // return getMobyObjects(new String(rawData));
+ //}
+ else{
+ return new MobyDataObject[0];
+ }
+ }
+
+ //Pad out the bytes (8-bit) into chars (16-bits), for regex checking of the data
+ private String bytesToString(byte[] bytes){
+// return new String(bytes);
+ StringBuffer stringBuffer = new StringBuffer(bytes.length);
+
+ for(int i = 0; i < bytes.length; i++){
+ // Casting byte to char pads it out with a byte e.g. 0x34 become 0x0034 (Unicode 16-bit)
+ // Do this because regex use unicode, not ASCII
+ stringBuffer.append((char) bytes[i]);
+ }
+
+ return stringBuffer.toString();
+ }
+
+ /**
+ * Using the regular expression mappings that have been set up,
* maps a string to moby objects.
*/
public MobyDataObject[] getMobyObjects(String textData){
+ return getMobyObjects(textData, (byte[]) null);
+ }
+
+ public MobyDataObject[] getMobyObjects(String textData, byte[] bytes){
if(regexMap.isEmpty()){
System.out.println("The MOBY Client has not been provided any regex->moby data mappings!");
return new MobyDataObject[0];
@@ -478,7 +713,7 @@
MobyComplexBuilder rule = (MobyComplexBuilder) regexMap.get(pattern);
try{
- MobyDataObject mobyObj = rule.apply(matcher);
+ MobyDataObject mobyObj = rule.apply(matcher, bytes);
if(mobyObj != null){
objectVector.add(mobyObj);
}
@@ -519,7 +754,7 @@
Matcher matcher = pattern.matcher(urlString);
while(matcher.find()){
- MobyComplexBuilder rule = (MobyComplexBuilder) regexMap.get(pattern);
+ MobyComplexBuilder rule = (MobyComplexBuilder) urlRegexMap.get(pattern);
try{
MobyDataObject mobyObj = rule.apply(matcher);
@@ -771,9 +1006,10 @@
}
public void addRegexMapping(String regexp, Map nsRules, String mobyDataType, Map membersMap){ //mobyObj<--mobyNamespaces
- //System.out.println("regex addMapping: " + regexp);
try{
- Pattern pattern = Pattern.compile(processRegExp(regexp));
+ // Pattern.DOTALL to allow ".*" to span multiple lines, also allow comments (# to EOL) and whitespace
+ // for better readability in the rules file.
+ Pattern pattern = Pattern.compile(processRegExp(regexp), Pattern.DOTALL | Pattern.COMMENTS);
// Base object
if(mobyDataType == null || mobyDataType.length() == 0){
@@ -873,6 +1109,7 @@
public void clear(){
xpathMap.clear();
regexMap.clear();
+ (new Exception()).printStackTrace();
}
public void clearXPaths(){
@@ -881,6 +1118,7 @@
public void clearRegexs(){
regexMap.clear();
+ (new Exception()).printStackTrace();
}
public void clearURLRegexs(){
@@ -899,5 +1137,19 @@
}
//System.out.println("the number of key-value mappings in this map: " + map.size());
}
+
+ /**
+ * Indicates whether at least one production rule exists for the data type or one of its children.
+ */
+ public boolean canProduceDataTypeFromString(MobyDataType targetDataType){
+ //System.err.println("The rules database has " + regexMap.size() + " entries");
+ for(MobyComplexBuilder rule: regexMap.values()){
+ //System.err.println("Checking regex rule " + rule);
+ if(rule.getDataType().inheritsFrom(targetDataType)){
+ return true;
+ }
+ }
+ return false;
+ }
}
More information about the MOBY-guts
mailing list