Split prescriptions regexes.

2015-12-02 14:20:42 +01:00
parent 2296fcfe41
commit 865e5bff1e
1 changed files with 33 additions and 8 deletions
--- a/zaaReloaded2/Medication/Prescription.cs
+++ b/zaaReloaded2/Medication/Prescription.cs
@@ -37,7 +37,7 @@ namespace zaaReloaded2.Medication
        /// <returns>True if the line contains prescriptions.</returns>
        public static bool IsPrescriptionLine(string line)
        {
-            return lineRegex.IsMatch(line);
+            return canonicalRegex.IsMatch(line);
        }

        #endregion
@@ -55,7 +55,7 @@ namespace zaaReloaded2.Medication
            // Replace any runs of whitespace with a single space
            // (from http://stackoverflow.com/a/206946/270712)
            // line = Regex.Replace(line, @"\s+", " ");
-            Match m = lineRegex.Match(line);
+            Match m = unifiedRegex.Match(line);
            int n = m.Groups[DOSE_GROUP].Captures.Count;

            return new Prescription(
@@ -76,7 +76,7 @@ namespace zaaReloaded2.Medication
        public static IList<Prescription> ManyFromLine(string line)
        {
            // line = Regex.Replace(line, @"\s+", " ");
-            MatchCollection mc = lineRegex.Matches(line);
+            MatchCollection mc = unifiedRegex.Matches(line);
            List<Prescription> list = new List<Prescription>();
            foreach (Match m in mc)
            {
@@ -197,16 +197,41 @@ namespace zaaReloaded2.Medication
        private const string DOSE = @"(\d\s+1/[234]|(\d\s?)?[\u00bd\u2153\u00bc]|\d+)";
        private const string SPACER = @"(\s*[-\u2012\u2013\u2014]+\s*)";

-        // Enclose entire regular expression in parentheses so we can use it
-        // to split a line and capture the delimiter.
-        private static readonly Regex lineRegex = new Regex(
-            @"(((?<drug>[^\t]+)\s+" +
+        /// <summary>
+        /// The 'canonical' regex matches a prescription the form "Ramipril 5 mg 1-0-0"
+        /// with or without trailing comment.
+        /// </summary>
+        /// <remarks>
+        /// Enclose entire regular expression in parentheses so we can use it
+        /// with or without trailing comment.
+        /// </remarks>
+        private const string canonicalPattern =
+            @"((?<drug>[^\t]+)\s+" +
            @"(?<dose>" + DOSE + @")" + SPACER +
            @"(?<dose>" + DOSE + @")" + SPACER +
            @"(?<dose>" + DOSE + @")" +
            @"(" + SPACER + @"(?<dose>" + DOSE + @"))?" +
-            @"( +(?<comment>[^\t]+))?)|((?<drug>[^\t]+)(  +|\t+)(?<comment>[^\t]+)))");
+            @"( +(?<comment>[^\t]+))?)";
+        private static readonly Regex canonicalRegex = new Regex(canonicalPattern);

+        /// <summary>
+        /// The 'alternative' regex matches prescriptions that do not contain regular
+        /// dosing intervals ("1-0-0"), but free-style comments: "Cotrim forte   alle 2 Tage".
+        /// </summary>
+        /// <remarks>
+        /// Because this alternative pattern matches other lines as well (e.g. with
+        /// signature names), it requires special handling.
+        /// </remarks>
+        private const string alternativePattern =
+            @"((?<drug>[^\t]+)(  +|\t+)(?<comment>[^\t]+))";
+        private static readonly Regex alternativeRegex = new Regex(alternativePattern);
+
+        private static readonly Regex unifiedRegex = new Regex(
+            "(" + canonicalPattern + "|" + alternativePattern + ")");
+
+        /// <summary>
+        /// A 'cached', reusable regex to match several whitespace characters.
+        /// </summary>
        private static readonly Regex spaceRegex = new Regex(@"\s+");

        #endregion