Split prescriptions regexes.

This commit is contained in:
Daniel Kraus 2015-12-02 14:20:42 +01:00
parent 2296fcfe41
commit 865e5bff1e

View File

@ -37,7 +37,7 @@ namespace zaaReloaded2.Medication
/// <returns>True if the line contains prescriptions.</returns> /// <returns>True if the line contains prescriptions.</returns>
public static bool IsPrescriptionLine(string line) public static bool IsPrescriptionLine(string line)
{ {
return lineRegex.IsMatch(line); return canonicalRegex.IsMatch(line);
} }
#endregion #endregion
@ -55,7 +55,7 @@ namespace zaaReloaded2.Medication
// Replace any runs of whitespace with a single space // Replace any runs of whitespace with a single space
// (from http://stackoverflow.com/a/206946/270712) // (from http://stackoverflow.com/a/206946/270712)
// line = Regex.Replace(line, @"\s+", " "); // line = Regex.Replace(line, @"\s+", " ");
Match m = lineRegex.Match(line); Match m = unifiedRegex.Match(line);
int n = m.Groups[DOSE_GROUP].Captures.Count; int n = m.Groups[DOSE_GROUP].Captures.Count;
return new Prescription( return new Prescription(
@ -76,7 +76,7 @@ namespace zaaReloaded2.Medication
public static IList<Prescription> ManyFromLine(string line) public static IList<Prescription> ManyFromLine(string line)
{ {
// line = Regex.Replace(line, @"\s+", " "); // line = Regex.Replace(line, @"\s+", " ");
MatchCollection mc = lineRegex.Matches(line); MatchCollection mc = unifiedRegex.Matches(line);
List<Prescription> list = new List<Prescription>(); List<Prescription> list = new List<Prescription>();
foreach (Match m in mc) foreach (Match m in mc)
{ {
@ -197,16 +197,41 @@ namespace zaaReloaded2.Medication
private const string DOSE = @"(\d\s+1/[234]|(\d\s?)?[\u00bd\u2153\u00bc]|\d+)"; private const string DOSE = @"(\d\s+1/[234]|(\d\s?)?[\u00bd\u2153\u00bc]|\d+)";
private const string SPACER = @"(\s*[-\u2012\u2013\u2014]+\s*)"; private const string SPACER = @"(\s*[-\u2012\u2013\u2014]+\s*)";
// Enclose entire regular expression in parentheses so we can use it /// <summary>
// to split a line and capture the delimiter. /// The 'canonical' regex matches a prescription the form "Ramipril 5 mg 1-0-0"
private static readonly Regex lineRegex = new Regex( /// with or without trailing comment.
@"(((?<drug>[^\t]+)\s+" + /// </summary>
/// <remarks>
/// Enclose entire regular expression in parentheses so we can use it
/// with or without trailing comment.
/// </remarks>
private const string canonicalPattern =
@"((?<drug>[^\t]+)\s+" +
@"(?<dose>" + DOSE + @")" + SPACER + @"(?<dose>" + DOSE + @")" + SPACER +
@"(?<dose>" + DOSE + @")" + SPACER + @"(?<dose>" + DOSE + @")" + SPACER +
@"(?<dose>" + DOSE + @")" + @"(?<dose>" + DOSE + @")" +
@"(" + SPACER + @"(?<dose>" + DOSE + @"))?" + @"(" + SPACER + @"(?<dose>" + DOSE + @"))?" +
@"( +(?<comment>[^\t]+))?)|((?<drug>[^\t]+)( +|\t+)(?<comment>[^\t]+)))"); @"( +(?<comment>[^\t]+))?)";
private static readonly Regex canonicalRegex = new Regex(canonicalPattern);
/// <summary>
/// The 'alternative' regex matches prescriptions that do not contain regular
/// dosing intervals ("1-0-0"), but free-style comments: "Cotrim forte alle 2 Tage".
/// </summary>
/// <remarks>
/// Because this alternative pattern matches other lines as well (e.g. with
/// signature names), it requires special handling.
/// </remarks>
private const string alternativePattern =
@"((?<drug>[^\t]+)( +|\t+)(?<comment>[^\t]+))";
private static readonly Regex alternativeRegex = new Regex(alternativePattern);
private static readonly Regex unifiedRegex = new Regex(
"(" + canonicalPattern + "|" + alternativePattern + ")");
/// <summary>
/// A 'cached', reusable regex to match several whitespace characters.
/// </summary>
private static readonly Regex spaceRegex = new Regex(@"\s+"); private static readonly Regex spaceRegex = new Regex(@"\s+");
#endregion #endregion