[MOBY-guts] biomoby commit

Paul Gordon gordonp at dev.open-bio.org
Thu Feb 3 21:54:51 UTC 2011


gordonp
Thu Feb  3 16:54:51 EST 2011
Update of /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources
In directory dev.open-bio.org:/tmp/cvs-serv20716/src/main/ca/ucalgary/seahawk/resources

Modified Files:
	mobyBuilderRules.xml 
Log Message:
New and improved protein recognition rule
moby-live/Java/src/main/ca/ucalgary/seahawk/resources mobyBuilderRules.xml,1.11,1.12
===================================================================
RCS file: /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml	2010/04/11 02:20:46	1.11
+++ /home/repository/moby/moby-live/Java/src/main/ca/ucalgary/seahawk/resources/mobyBuilderRules.xml	2011/02/03 21:54:51	1.12
@@ -55,8 +55,8 @@
 <object>
   <dc:source>urn:lsid:bioxml.info:mobyLiftingSchemaMapping:bareAA2AminoAcidSequence</dc:source>
   <regex>(?:^|[^A-Za-z])     # some non-alphabetical boundary
-         (\P*\s*(?:\P{10,}(?:\x20|\r|\t|\n){2,})+\P*)  # iupac or spacing
-         (?![A-Za-z])         # should not be followed by letters
+         (\P*[^ACGTNacgtn]\s*(?:\P{10,}(?:\x20|\r|\t|\n)+)+\P*)  # iupac or spacing, at least one non-DNA char
+         (?![A-Za-z]|\Z)         # should not be followed by letters (try not to recognize free text as protein)
   </regex>
   <datatype value="AminoAcidSequence"/>
   <namespace>




More information about the MOBY-guts mailing list