[MOBY-guts] biomoby commit
Paul Gordon
gordonp at dev.open-bio.org
Mon Aug 17 21:31:53 UTC 2009
gordonp
Mon Aug 17 17:31:52 EDT 2009
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources
In directory dev.open-bio.org:/tmp/cvs-serv26241/src/main/ca/ucalgary/seahawk/resources
Modified Files:
mobyBuilderRules.xml
Log Message:
Added a few rules, and Dublin Core IDs for rules
moby-live/Java/src/main/ca/ucalgary/seahawk/resources mobyBuilderRules.xml,1.8,1.9
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml 2008/10/30 02:33:24 1.8
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml 2009/08/17 21:31:52 1.9
@@ -1,11 +1,44 @@
<?xml version="1.0"?>
-<mappings>
+<mappings xmlns:dc="http://purl.org/dc/elements/1.1/"> <!-- import Dublin Core for marking systematic identifier -->
<prefix value="agave">http://www.bioxml.info/dtd/agave.dtd</prefix>
<prefix value="tigr">http://www.bioxml.info/dtd/tigrxml.dtd</prefix>
<prefix value="bioseq">http://www.bioxml.info/dtd/Bioseq.dtd</prefix>
+<object>
+<regex>(HEADER\s+.*?(\S+)\n
+ (?:TITLE.*)?
+ (?:\nCOMPND.*)?
+ (?:\nSOURCE.*)?
+ (?:\nKEYWDS.*)?
+ (?:\nEXPDTA.*)?
+ (?:\nAUTHOR.*)?
+ (?:\nREVDAT.*)?
+ (?:\nREMARK.*)?
+ (?:\nSEQRES.*)?
+ (?:\nCRYST1.*)?
+ (?:\nORIGX1.*)?
+ (?:\nORIGX2.*)?
+ (?:\nORIGX3.*)?
+ (?:\nMASTER.*)?
+ \nEND)(?=\s|\n|\Z)</regex>
+ <namespace>
+ <ns value="PDB">$2</ns>
+ </namespace>
+ <datatype value="PDB-Text"/>
+ <member value="content">$1</member>
+</object>
+
+<!-- Get gene symbols from omim text-->
+<object>
+<regex>(?:^|\n)\* \S+[^\n]*;\s+(\S+)\s*(?=\n|$)</regex>
+ <namespace>
+ <ns value="Global_GeneSymbol">$1</ns>
+ </namespace>
+</object>
+
<!-- Creates a DNASequence, \N is IUPAC nucleotide shorthand -->
<object>
+ <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareDNA2DNASequence</dc:source>
<regex>\N*\s*(?:\N{10,}(?:\x20|\r|\t|\n)*)+\N*</regex>
<datatype value="DNASequence"/>
<namespace>
@@ -17,13 +50,17 @@
<!-- Creates an AminoAcidSequence, \P is IUPAC amino acid residue shorthand -->
<object>
- <regex>\P*\s*(?:\P{10,}(?:\x20|\r|\t|\n)*)+\P*</regex>
+ <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareAA2AminoAcidSequence</dc:source>
+ <regex>(?:^|[^A-Za-z]) # some non-alphabetical boundary
+ (\P*\s*(?:\P{10,}(?:\x20|\r|\t|\n){2,})+\P*) # iupac or spacing
+ (?![A-Za-z]) # should not be followed by letters
+ </regex>
<datatype value="AminoAcidSequence"/>
<namespace>
<ns value="unknown">''</ns>
</namespace>
- <member value="SequenceString" whitespace="strip">$0</member>
- <member value="Length" whitespace="strip">string-length('$0')</member>
+ <member value="SequenceString" whitespace="strip">$1</member>
+ <member value="Length" whitespace="strip">string-length('$1')</member>
</object>
<!-- ABI sequence trace's magic signature at start of file -->
@@ -113,6 +150,7 @@
<!-- Digital Object Identifier, with header -->
<object>
+ <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:prefixedDOI2DOI</dc:source>
<regex>(?:DOI|doi|[Dd]igital\s+[Oo]bject\s+[Ii][Dd](?:entifier))\s*:?\s*(10\.\d+/[^%"\#\x20\t\r\n]+)</regex>
<namespace>
<ns value="DOI">$1</ns>
@@ -121,6 +159,7 @@
<!-- Digital Object Identifier, likely guess since you don't divide numbers by letters -->
<object>
+ <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareDOI2DOI</dc:source>
<regex>(?:\s|\A|/)(10\.\d+/[A-Za-z]+[^%"\#\x20\t\r\n]+)</regex>
<namespace>
<ns value="DOI">$1</ns>
@@ -160,6 +199,7 @@
</object>
<object>
+ <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareEC2MobyEC</dc:source>
<!-- have the form '6.1.99.-', where '-' is a wildcard -->
<regex>([1-6]\.(?:[0-9]{1,2}|-)(?:\.[0-9]{1,3}|-){2})</regex>
<namespace>
@@ -169,7 +209,8 @@
<!-- NCBI Entrez gene -->
<object>
- <regex>GeneID:(\d+)</regex>
+ <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:GeneId2EntrezGeneID</dc:source>
+ <regex>GeneID\s*:\s*(\d+)</regex>
<!-- readseq xml tag (usually fitem) that has a feature note containing a geneID -->
<xpath>substring-after(./bioseq:fnote/bioseq:fval[starts-with(., "GeneID:")], "GeneID:")</xpath>
<namespace>
@@ -219,4 +260,11 @@
<datatype value="FASTA_AA"/>
<member value="content">$1</member>
</object>
+
+<object>
+ <url_regex>http://www.ncbi.nlm.nih.gov/sites/entrez\?Db=mesh&TermToSearch=(\d+)</url_regex>
+ <namespace>
+ <ns value="MeSH">$1</ns>
+ </namespace>
+</object>
</mappings>
More information about the MOBY-guts
mailing list