[MOBY-guts] biomoby commit

Mon Aug 17 21:31:53 UTC 2009

gordonp
Mon Aug 17 17:31:52 EDT 2009
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources
In directory dev.open-bio.org:/tmp/cvs-serv26241/src/main/ca/ucalgary/seahawk/resources

Modified Files:
	mobyBuilderRules.xml 
Log Message:
Added a few rules, and Dublin Core IDs for rules
moby-live/Java/src/main/ca/ucalgary/seahawk/resources mobyBuilderRules.xml,1.8,1.9
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9

--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml	2008/10/30 02:33:24	1.8
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml	2009/08/17 21:31:52	1.9
@@ -1,11 +1,44 @@
 <?xml version="1.0"?>
-<mappings>
+<mappings xmlns:dc="http://purl.org/dc/elements/1.1/"> <!-- import Dublin Core for marking systematic identifier -->
 <prefix value="agave">http://www.bioxml.info/dtd/agave.dtd</prefix>
 <prefix value="tigr">http://www.bioxml.info/dtd/tigrxml.dtd</prefix>
 <prefix value="bioseq">http://www.bioxml.info/dtd/Bioseq.dtd</prefix>
 
+<object>
+<regex>(HEADER\s+.*?(\S+)\n
+	(?:TITLE.*)?
+	(?:\nCOMPND.*)?
+	(?:\nSOURCE.*)?
+	(?:\nKEYWDS.*)?
+	(?:\nEXPDTA.*)?
+	(?:\nAUTHOR.*)?
+	(?:\nREVDAT.*)?
+	(?:\nREMARK.*)?
+	(?:\nSEQRES.*)?
+	(?:\nCRYST1.*)?
+	(?:\nORIGX1.*)?
+	(?:\nORIGX2.*)?
+	(?:\nORIGX3.*)?
+	(?:\nMASTER.*)?
+	\nEND)(?=\s|\n|\Z)</regex>
+  <namespace>
+    <ns value="PDB">$2</ns>
+  </namespace>
+  <datatype value="PDB-Text"/>
+  <member value="content">$1</member>
+</object>
+
+<!-- Get gene symbols from omim text-->
+<object>
+<regex>(?:^|\n)\* \S+[^\n]*;\s+(\S+)\s*(?=\n|$)</regex>
+  <namespace>
+    <ns value="Global_GeneSymbol">$1</ns>
+  </namespace>
+</object>
+
 <!-- Creates a DNASequence, \N is IUPAC nucleotide shorthand --> 
 <object>
+  <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareDNA2DNASequence</dc:source>
   <regex>\N*\s*(?:\N{10,}(?:\x20|\r|\t|\n)*)+\N*</regex>
   <datatype value="DNASequence"/>
   <namespace>
@@ -17,13 +50,17 @@
 
 <!-- Creates an AminoAcidSequence, \P is IUPAC amino acid residue shorthand -->
 <object>
-  <regex>\P*\s*(?:\P{10,}(?:\x20|\r|\t|\n)*)+\P*</regex>
+  <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareAA2AminoAcidSequence</dc:source>
+  <regex>(?:^|[^A-Za-z])     # some non-alphabetical boundary
+         (\P*\s*(?:\P{10,}(?:\x20|\r|\t|\n){2,})+\P*)  # iupac or spacing
+         (?![A-Za-z])         # should not be followed by letters
+  </regex>
   <datatype value="AminoAcidSequence"/>
   <namespace>
     <ns value="unknown">''</ns>
   </namespace>
-  <member value="SequenceString" whitespace="strip">$0</member>
-  <member value="Length" whitespace="strip">string-length('$0')</member>
+  <member value="SequenceString" whitespace="strip">$1</member>
+  <member value="Length" whitespace="strip">string-length('$1')</member>
 </object>
 
 <!-- ABI sequence trace's magic signature at start of file -->
@@ -113,6 +150,7 @@
 
 <!-- Digital Object Identifier, with header -->
 <object>
+  <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:prefixedDOI2DOI</dc:source>
   <regex>(?:DOI|doi|[Dd]igital\s+[Oo]bject\s+[Ii][Dd](?:entifier))\s*:?\s*(10\.\d+/[^%"\#\x20\t\r\n]+)</regex>
   <namespace>
     <ns value="DOI">$1</ns>
@@ -121,6 +159,7 @@
 
 <!-- Digital Object Identifier, likely guess since you don't divide numbers by letters -->
 <object>
+  <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareDOI2DOI</dc:source>
   <regex>(?:\s|\A|/)(10\.\d+/[A-Za-z]+[^%"\#\x20\t\r\n]+)</regex>
   <namespace>
     <ns value="DOI">$1</ns>
@@ -160,6 +199,7 @@
 </object>
 
 <object>
+  <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareEC2MobyEC</dc:source>
   <!-- have the form '6.1.99.-', where '-' is a wildcard -->
   <regex>([1-6]\.(?:[0-9]{1,2}|-)(?:\.[0-9]{1,3}|-){2})</regex>
   <namespace>
@@ -169,7 +209,8 @@
 
 <!-- NCBI Entrez gene -->
 <object>
-  <regex>GeneID:(\d+)</regex>
+  <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:GeneId2EntrezGeneID</dc:source>
+  <regex>GeneID\s*:\s*(\d+)</regex>
   <!-- readseq xml tag (usually fitem) that has a feature note containing a geneID -->
   <xpath>substring-after(./bioseq:fnote/bioseq:fval[starts-with(., "GeneID:")], "GeneID:")</xpath>
   <namespace>
@@ -219,4 +260,11 @@
   <datatype value="FASTA_AA"/>
   <member value="content">$1</member>
 </object>
+
+<object>
+  <url_regex>http://www.ncbi.nlm.nih.gov/sites/entrez\?Db=mesh&amp;TermToSearch=(\d+)</url_regex>
+  <namespace>
+    <ns value="MeSH">$1</ns>
+  </namespace>
+</object>
 </mappings>