[MOBY-guts] biomoby commit
Paul Gordon
gordonp at dev.open-bio.org
Thu Feb 3 21:54:51 UTC 2011
gordonp
Thu Feb 3 16:54:51 EST 2011
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources
In directory dev.open-bio.org:/tmp/cvs-serv20716/src/main/ca/ucalgary/seahawk/resources
Modified Files:
mobyBuilderRules.xml
Log Message:
New and improved protein recognition rule
moby-live/Java/src/main/ca/ucalgary/seahawk/resources mobyBuilderRules.xml,1.11,1.12
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml 2010/04/11 02:20:46 1.11
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml 2011/02/03 21:54:51 1.12
@@ -55,8 +55,8 @@
<object>
<dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareAA2AminoAcidSequence</dc:source>
<regex>(?:^|[^A-Za-z]) # some non-alphabetical boundary
- (\P*\s*(?:\P{10,}(?:\x20|\r|\t|\n){2,})+\P*) # iupac or spacing
- (?![A-Za-z]) # should not be followed by letters
+ (\P*[^ACGTNacgtn]\s*(?:\P{10,}(?:\x20|\r|\t|\n)+)+\P*) # iupac or spacing, at least one non-DNA char
+ (?![A-Za-z]|\Z) # should not be followed by letters (try not to recognize free text as protein)
</regex>
<datatype value="AminoAcidSequence"/>
<namespace>
More information about the MOBY-guts
mailing list