diff --git a/zaaReloaded2/Medication/Importer.cs b/zaaReloaded2/Medication/Importer.cs
index 853f64f..393ac34 100755
--- a/zaaReloaded2/Medication/Importer.cs
+++ b/zaaReloaded2/Medication/Importer.cs
@@ -36,40 +36,118 @@ namespace zaaReloaded2.Medication
/// The detected block is selected.
///
/// True if a block was detected, false if not.
+ ///
+ ///
+ /// Autodetection works by examining the document paragraph by
+ /// paragraph, starting at the end of the document. The first
+ /// block of at least two lines that are identified as prescription
+ /// lines is selected.
+ ///
+ ///
+ /// It should be noted that every paragraph (a.k.a. line) may
+ /// be regarded as one of three things:
+ ///
+ ///
+ /// - A typical prescription line (in the form "Ramipril 5 mg 1-0-0")
+ /// - A typical non-prescription text line
+ /// - Something inbetween, e.g. a line with tab stops as in
+ /// "Ramipril 5 mg \t alle zwei Tage" or in "Prof. B. Oss \t Dr. A. Sistent"
+ ///
+ ///
+ ///
+ /// It is the third type of line that may cause confusion. If such a line
+ /// is encountered at the start of a putative block of prescriptions, we
+ /// therefore enter a "fuzzy" state in the detection algorithm and take
+ /// it from there, i.e. disregard the block if there are no lines that
+ /// are clearly prescriptions lines, or accept the block if we do detect
+ /// adjacent lines with unequivocal prescriptions.
+ ///
+ ///
+ ///
public static bool AutoDetect(Document document)
{
Paragraph start = null;
Paragraph end = null;
+ bool insideBlock = false;
+ bool fuzzy = false;
+ bool result = false;
int i = document.Paragraphs.Count;
while (i > 1)
{
string line = document.Paragraphs[i].Range.Text;
- if (Prescription.IsPrescriptionLine(line))
+
+ if (Prescription.IsCanonicalPrescriptionLine(line))
{
- end = document.Paragraphs[i];
- break;
+ // The current line is unequivocally a prescription line:
+ // If we're not inside a block already, mark the bottom
+ // of the block.
+ // If we are inside a block already, make sure to leave
+ // the 'fuzzy' state because this clearly now is a prescription
+ // block.
+ if (insideBlock)
+ {
+ fuzzy = false;
+ }
+ else
+ {
+ end = document.Paragraphs[i];
+ insideBlock = true;
+ }
}
+ else if (Prescription.IsPotentialPrescriptionLine(line))
+ {
+ // The current line is a putative prescription line:
+ // If we're not inside a block already, enter the
+ // "fuzzy" state.
+ // If we are inside a block, no special action is
+ // needed, we can continue with the next paragraph.
+ if (!insideBlock)
+ {
+ fuzzy = true;
+ insideBlock = true;
+ end = document.Paragraphs[i];
+ }
+ }
+ else
+ {
+ // The current line is not a prescription line:
+ // If we are currently in a definitive block of prescriptions,
+ // mark the line below the current line as the start of the block.
+ // If we're in a putative block, discard the information
+ // about the bottom end of the block and reset all flags.
+ if (insideBlock)
+ {
+ if (!fuzzy)
+ {
+ start = document.Paragraphs[i + 1];
+ break;
+ }
+ else
+ {
+ fuzzy = false;
+ insideBlock = false;
+ end = null;
+ }
+ }
+ }
+
i--;
}
if (end != null)
- {
- start = end;
- while (i > 2)
+ {
+ // If we don't have a start paragraph,
+ // but do have an end paragraph, we set the start paragraph to the
+ // first paragraph of the document.
+ if (start == null)
{
- if (!Prescription.IsPrescriptionLine(document.Paragraphs[i - 1].Range.Text))
- {
- start = document.Paragraphs[i];
- break;
- }
- i--;
+ start = document.Paragraphs[1];
}
-
document.Range(start.Range.Start, end.Range.End).Select();
- return true;
- }
- return false;
+ result = true;
+ }
+ return result;
}
#endregion
@@ -102,7 +180,7 @@ namespace zaaReloaded2.Medication
string[] lines = Helpers.SplitParagraphs(text);
foreach (string line in lines)
{
- if (Prescription.IsPrescriptionLine(line))
+ if (Prescription.IsCanonicalPrescriptionLine(line))
{
addition = Prescription.ManyFromLine(line);
columns = System.Math.Max(columns, addition.Count);
diff --git a/zaaReloaded2/Medication/Prescription.cs b/zaaReloaded2/Medication/Prescription.cs
index 67bb130..dff6bf5 100755
--- a/zaaReloaded2/Medication/Prescription.cs
+++ b/zaaReloaded2/Medication/Prescription.cs
@@ -35,11 +35,16 @@ namespace zaaReloaded2.Medication
///
/// Line to inspect.
/// True if the line contains prescriptions.
- public static bool IsPrescriptionLine(string line)
+ public static bool IsCanonicalPrescriptionLine(string line)
{
return canonicalRegex.IsMatch(line);
}
+ public static bool IsPotentialPrescriptionLine(string line)
+ {
+ return alternativeRegex.IsMatch(line);
+ }
+
#endregion
#region Factory