diff --git a/zaaReloaded2/Medication/Importer.cs b/zaaReloaded2/Medication/Importer.cs index 853f64f..393ac34 100755 --- a/zaaReloaded2/Medication/Importer.cs +++ b/zaaReloaded2/Medication/Importer.cs @@ -36,40 +36,118 @@ namespace zaaReloaded2.Medication /// The detected block is selected. /// /// True if a block was detected, false if not. + /// + /// + /// Autodetection works by examining the document paragraph by + /// paragraph, starting at the end of the document. The first + /// block of at least two lines that are identified as prescription + /// lines is selected. + /// + /// + /// It should be noted that every paragraph (a.k.a. line) may + /// be regarded as one of three things: + /// + /// + /// A typical prescription line (in the form "Ramipril 5 mg 1-0-0") + /// A typical non-prescription text line + /// Something inbetween, e.g. a line with tab stops as in + /// "Ramipril 5 mg \t alle zwei Tage" or in "Prof. B. Oss \t Dr. A. Sistent" + /// + /// + /// + /// It is the third type of line that may cause confusion. If such a line + /// is encountered at the start of a putative block of prescriptions, we + /// therefore enter a "fuzzy" state in the detection algorithm and take + /// it from there, i.e. disregard the block if there are no lines that + /// are clearly prescriptions lines, or accept the block if we do detect + /// adjacent lines with unequivocal prescriptions. + /// + /// + /// public static bool AutoDetect(Document document) { Paragraph start = null; Paragraph end = null; + bool insideBlock = false; + bool fuzzy = false; + bool result = false; int i = document.Paragraphs.Count; while (i > 1) { string line = document.Paragraphs[i].Range.Text; - if (Prescription.IsPrescriptionLine(line)) + + if (Prescription.IsCanonicalPrescriptionLine(line)) { - end = document.Paragraphs[i]; - break; + // The current line is unequivocally a prescription line: + // If we're not inside a block already, mark the bottom + // of the block. + // If we are inside a block already, make sure to leave + // the 'fuzzy' state because this clearly now is a prescription + // block. + if (insideBlock) + { + fuzzy = false; + } + else + { + end = document.Paragraphs[i]; + insideBlock = true; + } } + else if (Prescription.IsPotentialPrescriptionLine(line)) + { + // The current line is a putative prescription line: + // If we're not inside a block already, enter the + // "fuzzy" state. + // If we are inside a block, no special action is + // needed, we can continue with the next paragraph. + if (!insideBlock) + { + fuzzy = true; + insideBlock = true; + end = document.Paragraphs[i]; + } + } + else + { + // The current line is not a prescription line: + // If we are currently in a definitive block of prescriptions, + // mark the line below the current line as the start of the block. + // If we're in a putative block, discard the information + // about the bottom end of the block and reset all flags. + if (insideBlock) + { + if (!fuzzy) + { + start = document.Paragraphs[i + 1]; + break; + } + else + { + fuzzy = false; + insideBlock = false; + end = null; + } + } + } + i--; } if (end != null) - { - start = end; - while (i > 2) + { + // If we don't have a start paragraph, + // but do have an end paragraph, we set the start paragraph to the + // first paragraph of the document. + if (start == null) { - if (!Prescription.IsPrescriptionLine(document.Paragraphs[i - 1].Range.Text)) - { - start = document.Paragraphs[i]; - break; - } - i--; + start = document.Paragraphs[1]; } - document.Range(start.Range.Start, end.Range.End).Select(); - return true; - } - return false; + result = true; + } + return result; } #endregion @@ -102,7 +180,7 @@ namespace zaaReloaded2.Medication string[] lines = Helpers.SplitParagraphs(text); foreach (string line in lines) { - if (Prescription.IsPrescriptionLine(line)) + if (Prescription.IsCanonicalPrescriptionLine(line)) { addition = Prescription.ManyFromLine(line); columns = System.Math.Max(columns, addition.Count); diff --git a/zaaReloaded2/Medication/Prescription.cs b/zaaReloaded2/Medication/Prescription.cs index 67bb130..dff6bf5 100755 --- a/zaaReloaded2/Medication/Prescription.cs +++ b/zaaReloaded2/Medication/Prescription.cs @@ -35,11 +35,16 @@ namespace zaaReloaded2.Medication /// /// Line to inspect. /// True if the line contains prescriptions. - public static bool IsPrescriptionLine(string line) + public static bool IsCanonicalPrescriptionLine(string line) { return canonicalRegex.IsMatch(line); } + public static bool IsPotentialPrescriptionLine(string line) + { + return alternativeRegex.IsMatch(line); + } + #endregion #region Factory