[MOBY-guts] biomoby commit
Paul Gordon
gordonp at dev.open-bio.org
Mon Jul 23 22:01:23 UTC 2007
gordonp
Mon Jul 23 18:01:22 EDT 2007
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/services/resources
In directory dev.open-bio.org:/tmp/cvs-serv17921/src/main/ca/ucalgary/services/resources
Modified Files:
acdRules.xml
Log Message:
Update to include nested parsing rule example
moby-live/Java/src/main/ca/ucalgary/services/resources acdRules.xml,1.2,1.3
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/services/resources/acdRules.xml,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/services/resources/acdRules.xml 2007/06/08 14:04:27 1.2
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/services/resources/acdRules.xml 2007/07/23 22:01:22 1.3
@@ -78,45 +78,45 @@
<member value="Length" whitespace="strip">string-length('$2')</member>
</object>
-<!--
+
<object name="rowWithWhitespaceSeparatedColumns">
- <!- - Any number of non-whitespace data column values, separated by spaces (at least 2, or a tab so
- as not to capture just any random space-separated words on a line). - ->
- <regex>\A\s*(\S+(\x20{2,}|\t+|))+\n</regex>
+ <!-- Any number of non-whitespace data column values, separated by spaces (at least 2, or a tab so
+ as not to capture just any random space-separated words on a line). -->
+ <regex anchor="line">(?:\A|\n)\s*(\S+(\x20{2,}|\t+|))+(?=\n|\z)</regex>
<datatype value="tableRow"/>
- <!- - First capture group recorded as many times as it matches (HAS relationship for "cell" field) - ->
- <member value="cell" whitespace="flanking">$1</member>
-</object> -->
+ <!-- First regex capture group recorded as many times as it matches (HAS relationship for "cell" field) -->
+ <member value="cell" datatype="String" whitespace="flanking">$1</member>
+</object>
+
-<!--
<object name="tableWithWhitespaceSeparatedColumns">
- <!- - A table is one or more rows of data. - ->
- <regex anchor="line">(\p{rowWithWhitepaceSeparatedColumns})+</regex>
+ <!-- A table is one or more rows of data. -->
+ <regex>(\p{rowWithWhitespaceSeparatedColumns})+</regex>
<datatype value="table"/>
- <!- - First capture group recorded as many times as it matches (HAS relationship for "tableRow" field)
- \p{rowWithSpaceSeparatedColumns} in the regex is known to generate objects of data type "tableRow". - ->
- <member value="body">$1</member>
-</object> -->
+ <!-- First capture group recorded as many times as it matches (HAS relationship for "tableRow" field)
+ \p{rowWithSpaceSeparatedColumns} in the regex is known to generate objects of data type "tableRow". -->
+ <member value="row">$1</member>
+</object>
-<!-- Output from EMBOSS 'charge' program.
+<!-- Output from EMBOSS 'charge' program. -->
<object>
<regex anchor="line">
- (CHARGE \s of \s .*?)\n # Description of input (used as caption)
+ (CHARGE \s of \s .*?)\n # Description of input used as caption
\n # Blank line
(Position \s+ Charge\n) # Column headers
- ((?: \d+ \s+ -?\d+\.\d+ \n)+ ) # One or more lines of (integer) (spaces) (+/-decimal number)
- # Ignore any trailing garbage (no \z given to anchor the end of the input being tested)
+ ((?: \d+ \s+ -?\d+\.\d+ \n)+ ) # One or more lines of integer spaces +/-decimal-number
+ # Ignore any trailing data, no \z given to anchor end of input being tested
</regex>
- <datatype value="PeptideChargeTable"/>
+ <datatype value="ProteinChargeTable"/>
<member value="caption">$1</member>
- <!- - Create a member of data type tableRow using another rule. - ->
+ <!-- Create a member of data type tableRow using another rule. -->
<member value="header" rule="rowWithWhitespaceSeparatedColumns">$2</member>
- <!- - Inherit parsing from other rule to fill in some members (IS-A field completion from rule for parent data type).
+ <!-- Inherit parsing from other rule to fill in some members (IS-A field completion from rule for parent data type).
We are stating this explicitly because we wanted to ensure the (int) (sp) (-/+dec) format in the
regex above instead of accepting any data matching the looser regex for rowWithSpaceSeparatedColumns
- (any number of non-whitespace columns separated by spaces). - ->
+ (any number of non-whitespace columns separated by spaces). -->
<inheritMembers rule="tableWithWhitespaceSeparatedColumns">$3</inheritMembers>
-</object> -->
+</object>
<!-- PNG image's magic signature at start of file -->
<object>
@@ -133,4 +133,4 @@
<member value="content" encoding="Base64">$0</member>
</object>
-</mappings>
\ No newline at end of file
+</mappings>
More information about the MOBY-guts
mailing list