[MOBY-guts] biomoby commit

Paul Gordon gordonp at dev.open-bio.org
Fri Jun 8 14:04:27 UTC 2007


gordonp
Fri Jun  8 10:04:27 EDT 2007
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/services/resources
In directory dev.open-bio.org:/tmp/cvs-serv28675/src/main/ca/ucalgary/services/resources

Modified Files:
	acdRules.xml 
Log Message:
Commit of Seahawk 1.0 updates and associated core updates
moby-live/Java/src/main/ca/ucalgary/services/resources acdRules.xml,1.1,1.2
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/services/resources/acdRules.xml,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/services/resources/acdRules.xml	2007/03/12 14:33:38	1.1
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/services/resources/acdRules.xml	2007/06/08 14:04:27	1.2
@@ -35,7 +35,7 @@
 <!-- Next two: A FastA DNA record, \N is seahawk-specific regex shorthand for IUPAC nucleic acids -->
 
 <object>
-  <regex>>\s*(\S+)\s*([^\n]+)((?:\n\N+)+)</regex>
+  <regex>>\x20*(\S+)\x20+([^\n]+)((?:\n\N+)+)</regex>
   <namespace>
     <ns value="unknown">$1</ns>
   </namespace>
@@ -46,7 +46,7 @@
 </object>
 
 <object>
-  <regex>>\s*(\S*)\s*((?:\n\N+)+)</regex>
+  <regex>>\x20*(\S*)\x20*((?:\n\N+)+)</regex>
   <namespace>
     <ns value="unknown">$1</ns>
   </namespace>
@@ -58,7 +58,7 @@
 <!-- Next two: A FastA amino acid record, \P is seahawk-specific regex shorthand for IUPAC amino acid residues -->
 
 <object>
-  <regex>>\s*(\S+)\s+([^\n]+)((?:\n\P+)+)</regex>
+  <regex>>\x20*(\S+)\x20+([^\n]+)((?:\n\P+)+)</regex>
   <namespace>
     <ns value="unknown">$1</ns>
   </namespace>
@@ -69,7 +69,7 @@
 </object>
 
 <object>
-  <regex>>\s*(\S*)\s*((?:\n\P+)+)</regex>
+  <regex>>\x20*(\S*)\x20*((?:\n\P+)+)</regex>
   <namespace>
     <ns value="unknown">$1</ns>
   </namespace>
@@ -78,11 +78,58 @@
   <member value="Length" whitespace="strip">string-length('$2')</member>
 </object>
 
-<!-- PNG file magic signature at start of file -->
+<!--
+<object name="rowWithWhitespaceSeparatedColumns">
+  <!- - Any number of non-whitespace data column values, separated by spaces (at least 2, or a tab so
+       as not to capture just any random space-separated words on a line). - ->
+  <regex>\A\s*(\S+(\x20{2,}|\t+|))+\n</regex>
+  <datatype value="tableRow"/>
+  <!- - First capture group recorded as many times as it matches (HAS relationship for "cell" field) - ->
+  <member value="cell" whitespace="flanking">$1</member> 
+</object> -->
+
+<!--
+<object name="tableWithWhitespaceSeparatedColumns">
+  <!- - A table is one or more rows of data. - ->
+  <regex anchor="line">(\p{rowWithWhitepaceSeparatedColumns})+</regex>
+  <datatype value="table"/>
+  <!- - First capture group recorded as many times as it matches (HAS relationship for "tableRow" field)
+       \p{rowWithSpaceSeparatedColumns} in the regex is known to generate objects of data type "tableRow". - ->
+  <member value="body">$1</member>
+</object> -->
+
+<!-- Output from EMBOSS 'charge' program. 
+<object>
+  <regex anchor="line">
+         (CHARGE \s of \s .*?)\n        # Description of input (used as caption)
+         \n                             # Blank line
+         (Position \s+ Charge\n)        # Column headers
+         ((?: \d+ \s+ -?\d+\.\d+ \n)+ ) # One or more lines of (integer) (spaces) (+/-decimal number)
+                                        # Ignore any trailing garbage (no \z given to anchor the end of the input being tested)
+  </regex>
+  <datatype value="PeptideChargeTable"/>
+  <member value="caption">$1</member>
+  <!- - Create a member of data type tableRow using another rule. - ->
+  <member value="header" rule="rowWithWhitespaceSeparatedColumns">$2</member>
+  <!- - Inherit parsing from other rule to fill in some members (IS-A field completion from rule for parent data type).
+       We are stating this explicitly because we wanted to ensure the (int) (sp) (-/+dec) format in the
+       regex above instead of accepting any data matching the looser regex for rowWithSpaceSeparatedColumns
+       (any number of non-whitespace columns separated by spaces). - ->
+  <inheritMembers rule="tableWithWhitespaceSeparatedColumns">$3</inheritMembers> 
+</object> -->
+
+<!-- PNG image's magic signature at start of file -->
 <object>
 <!--  <regex>\A\x89PNG\r\n\x1A\n.*\z</regex>-->
   <regex>\A.PNG\r\n\x1A\n.*\z</regex>
-  <datatype value="b64_Encoded_PNG"/>
+  <datatype value="PNGFormatImage"/>
+  <member value="content" encoding="Base64">$0</member>
+</object>
+
+<!-- ABI sequence trace's magic signature at start of file -->
+<object>
+  <regex>\AABIF.*\z</regex>
+  <datatype value="ABISequenceChromatogram"/>
   <member value="content" encoding="Base64">$0</member>
 </object>
 




More information about the MOBY-guts mailing list