Improve prescription block autodetection.
This commit is contained in:
parent
e71e69d4c4
commit
86c8a9a1b4
@ -36,40 +36,118 @@ namespace zaaReloaded2.Medication
|
||||
/// The detected block is selected.
|
||||
/// </summary>
|
||||
/// <returns>True if a block was detected, false if not.</returns>
|
||||
/// <remarks>
|
||||
/// <para>
|
||||
/// Autodetection works by examining the document paragraph by
|
||||
/// paragraph, starting at the end of the document. The first
|
||||
/// block of at least two lines that are identified as prescription
|
||||
/// lines is selected.
|
||||
/// </para>
|
||||
/// <para>
|
||||
/// It should be noted that every paragraph (a.k.a. line) may
|
||||
/// be regarded as one of three things:
|
||||
/// </para>
|
||||
/// <list type="ol">
|
||||
/// <item>A typical prescription line (in the form "Ramipril 5 mg 1-0-0")</item>
|
||||
/// <item>A typical non-prescription text line</item>
|
||||
/// <item>Something inbetween, e.g. a line with tab stops as in
|
||||
/// "Ramipril 5 mg \t alle zwei Tage" or in "Prof. B. Oss \t Dr. A. Sistent"
|
||||
/// </item>
|
||||
/// </list>
|
||||
/// <para>
|
||||
/// It is the third type of line that may cause confusion. If such a line
|
||||
/// is encountered at the start of a putative block of prescriptions, we
|
||||
/// therefore enter a "fuzzy" state in the detection algorithm and take
|
||||
/// it from there, i.e. disregard the block if there are no lines that
|
||||
/// are clearly prescriptions lines, or accept the block if we do detect
|
||||
/// adjacent lines with unequivocal prescriptions.
|
||||
/// </para>
|
||||
///
|
||||
/// </remarks>
|
||||
public static bool AutoDetect(Document document)
|
||||
{
|
||||
Paragraph start = null;
|
||||
Paragraph end = null;
|
||||
bool insideBlock = false;
|
||||
bool fuzzy = false;
|
||||
bool result = false;
|
||||
int i = document.Paragraphs.Count;
|
||||
|
||||
while (i > 1)
|
||||
{
|
||||
string line = document.Paragraphs[i].Range.Text;
|
||||
if (Prescription.IsPrescriptionLine(line))
|
||||
|
||||
if (Prescription.IsCanonicalPrescriptionLine(line))
|
||||
{
|
||||
end = document.Paragraphs[i];
|
||||
break;
|
||||
// The current line is unequivocally a prescription line:
|
||||
// If we're not inside a block already, mark the bottom
|
||||
// of the block.
|
||||
// If we are inside a block already, make sure to leave
|
||||
// the 'fuzzy' state because this clearly now is a prescription
|
||||
// block.
|
||||
if (insideBlock)
|
||||
{
|
||||
fuzzy = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
end = document.Paragraphs[i];
|
||||
insideBlock = true;
|
||||
}
|
||||
}
|
||||
else if (Prescription.IsPotentialPrescriptionLine(line))
|
||||
{
|
||||
// The current line is a putative prescription line:
|
||||
// If we're not inside a block already, enter the
|
||||
// "fuzzy" state.
|
||||
// If we are inside a block, no special action is
|
||||
// needed, we can continue with the next paragraph.
|
||||
if (!insideBlock)
|
||||
{
|
||||
fuzzy = true;
|
||||
insideBlock = true;
|
||||
end = document.Paragraphs[i];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// The current line is not a prescription line:
|
||||
// If we are currently in a definitive block of prescriptions,
|
||||
// mark the line below the current line as the start of the block.
|
||||
// If we're in a putative block, discard the information
|
||||
// about the bottom end of the block and reset all flags.
|
||||
if (insideBlock)
|
||||
{
|
||||
if (!fuzzy)
|
||||
{
|
||||
start = document.Paragraphs[i + 1];
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
fuzzy = false;
|
||||
insideBlock = false;
|
||||
end = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i--;
|
||||
}
|
||||
|
||||
if (end != null)
|
||||
{
|
||||
start = end;
|
||||
while (i > 2)
|
||||
{
|
||||
// If we don't have a start paragraph,
|
||||
// but do have an end paragraph, we set the start paragraph to the
|
||||
// first paragraph of the document.
|
||||
if (start == null)
|
||||
{
|
||||
if (!Prescription.IsPrescriptionLine(document.Paragraphs[i - 1].Range.Text))
|
||||
{
|
||||
start = document.Paragraphs[i];
|
||||
break;
|
||||
}
|
||||
i--;
|
||||
start = document.Paragraphs[1];
|
||||
}
|
||||
|
||||
document.Range(start.Range.Start, end.Range.End).Select();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
result = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#endregion
|
||||
@ -102,7 +180,7 @@ namespace zaaReloaded2.Medication
|
||||
string[] lines = Helpers.SplitParagraphs(text);
|
||||
foreach (string line in lines)
|
||||
{
|
||||
if (Prescription.IsPrescriptionLine(line))
|
||||
if (Prescription.IsCanonicalPrescriptionLine(line))
|
||||
{
|
||||
addition = Prescription.ManyFromLine(line);
|
||||
columns = System.Math.Max(columns, addition.Count);
|
||||
|
@ -35,11 +35,16 @@ namespace zaaReloaded2.Medication
|
||||
/// </summary>
|
||||
/// <param name="line">Line to inspect.</param>
|
||||
/// <returns>True if the line contains prescriptions.</returns>
|
||||
public static bool IsPrescriptionLine(string line)
|
||||
public static bool IsCanonicalPrescriptionLine(string line)
|
||||
{
|
||||
return canonicalRegex.IsMatch(line);
|
||||
}
|
||||
|
||||
public static bool IsPotentialPrescriptionLine(string line)
|
||||
{
|
||||
return alternativeRegex.IsMatch(line);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Factory
|
||||
|
Loading…
Reference in New Issue
Block a user