Split prescriptions regexes.
This commit is contained in:
parent
2296fcfe41
commit
865e5bff1e
@ -37,7 +37,7 @@ namespace zaaReloaded2.Medication
|
||||
/// <returns>True if the line contains prescriptions.</returns>
|
||||
public static bool IsPrescriptionLine(string line)
|
||||
{
|
||||
return lineRegex.IsMatch(line);
|
||||
return canonicalRegex.IsMatch(line);
|
||||
}
|
||||
|
||||
#endregion
|
||||
@ -55,7 +55,7 @@ namespace zaaReloaded2.Medication
|
||||
// Replace any runs of whitespace with a single space
|
||||
// (from http://stackoverflow.com/a/206946/270712)
|
||||
// line = Regex.Replace(line, @"\s+", " ");
|
||||
Match m = lineRegex.Match(line);
|
||||
Match m = unifiedRegex.Match(line);
|
||||
int n = m.Groups[DOSE_GROUP].Captures.Count;
|
||||
|
||||
return new Prescription(
|
||||
@ -76,7 +76,7 @@ namespace zaaReloaded2.Medication
|
||||
public static IList<Prescription> ManyFromLine(string line)
|
||||
{
|
||||
// line = Regex.Replace(line, @"\s+", " ");
|
||||
MatchCollection mc = lineRegex.Matches(line);
|
||||
MatchCollection mc = unifiedRegex.Matches(line);
|
||||
List<Prescription> list = new List<Prescription>();
|
||||
foreach (Match m in mc)
|
||||
{
|
||||
@ -197,16 +197,41 @@ namespace zaaReloaded2.Medication
|
||||
private const string DOSE = @"(\d\s+1/[234]|(\d\s?)?[\u00bd\u2153\u00bc]|\d+)";
|
||||
private const string SPACER = @"(\s*[-\u2012\u2013\u2014]+\s*)";
|
||||
|
||||
// Enclose entire regular expression in parentheses so we can use it
|
||||
// to split a line and capture the delimiter.
|
||||
private static readonly Regex lineRegex = new Regex(
|
||||
@"(((?<drug>[^\t]+)\s+" +
|
||||
/// <summary>
|
||||
/// The 'canonical' regex matches a prescription the form "Ramipril 5 mg 1-0-0"
|
||||
/// with or without trailing comment.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Enclose entire regular expression in parentheses so we can use it
|
||||
/// with or without trailing comment.
|
||||
/// </remarks>
|
||||
private const string canonicalPattern =
|
||||
@"((?<drug>[^\t]+)\s+" +
|
||||
@"(?<dose>" + DOSE + @")" + SPACER +
|
||||
@"(?<dose>" + DOSE + @")" + SPACER +
|
||||
@"(?<dose>" + DOSE + @")" +
|
||||
@"(" + SPACER + @"(?<dose>" + DOSE + @"))?" +
|
||||
@"( +(?<comment>[^\t]+))?)|((?<drug>[^\t]+)( +|\t+)(?<comment>[^\t]+)))");
|
||||
@"( +(?<comment>[^\t]+))?)";
|
||||
private static readonly Regex canonicalRegex = new Regex(canonicalPattern);
|
||||
|
||||
/// <summary>
|
||||
/// The 'alternative' regex matches prescriptions that do not contain regular
|
||||
/// dosing intervals ("1-0-0"), but free-style comments: "Cotrim forte alle 2 Tage".
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// Because this alternative pattern matches other lines as well (e.g. with
|
||||
/// signature names), it requires special handling.
|
||||
/// </remarks>
|
||||
private const string alternativePattern =
|
||||
@"((?<drug>[^\t]+)( +|\t+)(?<comment>[^\t]+))";
|
||||
private static readonly Regex alternativeRegex = new Regex(alternativePattern);
|
||||
|
||||
private static readonly Regex unifiedRegex = new Regex(
|
||||
"(" + canonicalPattern + "|" + alternativePattern + ")");
|
||||
|
||||
/// <summary>
|
||||
/// A 'cached', reusable regex to match several whitespace characters.
|
||||
/// </summary>
|
||||
private static readonly Regex spaceRegex = new Regex(@"\s+");
|
||||
|
||||
#endregion
|
||||
|
Loading…
Reference in New Issue
Block a user