Improve prescription block autodetection.
This commit is contained in:
parent
e71e69d4c4
commit
86c8a9a1b4
@ -36,40 +36,118 @@ namespace zaaReloaded2.Medication
|
|||||||
/// The detected block is selected.
|
/// The detected block is selected.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <returns>True if a block was detected, false if not.</returns>
|
/// <returns>True if a block was detected, false if not.</returns>
|
||||||
|
/// <remarks>
|
||||||
|
/// <para>
|
||||||
|
/// Autodetection works by examining the document paragraph by
|
||||||
|
/// paragraph, starting at the end of the document. The first
|
||||||
|
/// block of at least two lines that are identified as prescription
|
||||||
|
/// lines is selected.
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
|
/// It should be noted that every paragraph (a.k.a. line) may
|
||||||
|
/// be regarded as one of three things:
|
||||||
|
/// </para>
|
||||||
|
/// <list type="ol">
|
||||||
|
/// <item>A typical prescription line (in the form "Ramipril 5 mg 1-0-0")</item>
|
||||||
|
/// <item>A typical non-prescription text line</item>
|
||||||
|
/// <item>Something inbetween, e.g. a line with tab stops as in
|
||||||
|
/// "Ramipril 5 mg \t alle zwei Tage" or in "Prof. B. Oss \t Dr. A. Sistent"
|
||||||
|
/// </item>
|
||||||
|
/// </list>
|
||||||
|
/// <para>
|
||||||
|
/// It is the third type of line that may cause confusion. If such a line
|
||||||
|
/// is encountered at the start of a putative block of prescriptions, we
|
||||||
|
/// therefore enter a "fuzzy" state in the detection algorithm and take
|
||||||
|
/// it from there, i.e. disregard the block if there are no lines that
|
||||||
|
/// are clearly prescriptions lines, or accept the block if we do detect
|
||||||
|
/// adjacent lines with unequivocal prescriptions.
|
||||||
|
/// </para>
|
||||||
|
///
|
||||||
|
/// </remarks>
|
||||||
public static bool AutoDetect(Document document)
|
public static bool AutoDetect(Document document)
|
||||||
{
|
{
|
||||||
Paragraph start = null;
|
Paragraph start = null;
|
||||||
Paragraph end = null;
|
Paragraph end = null;
|
||||||
|
bool insideBlock = false;
|
||||||
|
bool fuzzy = false;
|
||||||
|
bool result = false;
|
||||||
int i = document.Paragraphs.Count;
|
int i = document.Paragraphs.Count;
|
||||||
|
|
||||||
while (i > 1)
|
while (i > 1)
|
||||||
{
|
{
|
||||||
string line = document.Paragraphs[i].Range.Text;
|
string line = document.Paragraphs[i].Range.Text;
|
||||||
if (Prescription.IsPrescriptionLine(line))
|
|
||||||
|
if (Prescription.IsCanonicalPrescriptionLine(line))
|
||||||
{
|
{
|
||||||
end = document.Paragraphs[i];
|
// The current line is unequivocally a prescription line:
|
||||||
break;
|
// If we're not inside a block already, mark the bottom
|
||||||
|
// of the block.
|
||||||
|
// If we are inside a block already, make sure to leave
|
||||||
|
// the 'fuzzy' state because this clearly now is a prescription
|
||||||
|
// block.
|
||||||
|
if (insideBlock)
|
||||||
|
{
|
||||||
|
fuzzy = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
end = document.Paragraphs[i];
|
||||||
|
insideBlock = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
else if (Prescription.IsPotentialPrescriptionLine(line))
|
||||||
|
{
|
||||||
|
// The current line is a putative prescription line:
|
||||||
|
// If we're not inside a block already, enter the
|
||||||
|
// "fuzzy" state.
|
||||||
|
// If we are inside a block, no special action is
|
||||||
|
// needed, we can continue with the next paragraph.
|
||||||
|
if (!insideBlock)
|
||||||
|
{
|
||||||
|
fuzzy = true;
|
||||||
|
insideBlock = true;
|
||||||
|
end = document.Paragraphs[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// The current line is not a prescription line:
|
||||||
|
// If we are currently in a definitive block of prescriptions,
|
||||||
|
// mark the line below the current line as the start of the block.
|
||||||
|
// If we're in a putative block, discard the information
|
||||||
|
// about the bottom end of the block and reset all flags.
|
||||||
|
if (insideBlock)
|
||||||
|
{
|
||||||
|
if (!fuzzy)
|
||||||
|
{
|
||||||
|
start = document.Paragraphs[i + 1];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fuzzy = false;
|
||||||
|
insideBlock = false;
|
||||||
|
end = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
i--;
|
i--;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end != null)
|
if (end != null)
|
||||||
{
|
{
|
||||||
start = end;
|
// If we don't have a start paragraph,
|
||||||
while (i > 2)
|
// but do have an end paragraph, we set the start paragraph to the
|
||||||
|
// first paragraph of the document.
|
||||||
|
if (start == null)
|
||||||
{
|
{
|
||||||
if (!Prescription.IsPrescriptionLine(document.Paragraphs[i - 1].Range.Text))
|
start = document.Paragraphs[1];
|
||||||
{
|
|
||||||
start = document.Paragraphs[i];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
i--;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
document.Range(start.Range.Start, end.Range.End).Select();
|
document.Range(start.Range.Start, end.Range.End).Select();
|
||||||
return true;
|
result = true;
|
||||||
}
|
}
|
||||||
return false;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
@ -102,7 +180,7 @@ namespace zaaReloaded2.Medication
|
|||||||
string[] lines = Helpers.SplitParagraphs(text);
|
string[] lines = Helpers.SplitParagraphs(text);
|
||||||
foreach (string line in lines)
|
foreach (string line in lines)
|
||||||
{
|
{
|
||||||
if (Prescription.IsPrescriptionLine(line))
|
if (Prescription.IsCanonicalPrescriptionLine(line))
|
||||||
{
|
{
|
||||||
addition = Prescription.ManyFromLine(line);
|
addition = Prescription.ManyFromLine(line);
|
||||||
columns = System.Math.Max(columns, addition.Count);
|
columns = System.Math.Max(columns, addition.Count);
|
||||||
|
@ -35,11 +35,16 @@ namespace zaaReloaded2.Medication
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="line">Line to inspect.</param>
|
/// <param name="line">Line to inspect.</param>
|
||||||
/// <returns>True if the line contains prescriptions.</returns>
|
/// <returns>True if the line contains prescriptions.</returns>
|
||||||
public static bool IsPrescriptionLine(string line)
|
public static bool IsCanonicalPrescriptionLine(string line)
|
||||||
{
|
{
|
||||||
return canonicalRegex.IsMatch(line);
|
return canonicalRegex.IsMatch(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static bool IsPotentialPrescriptionLine(string line)
|
||||||
|
{
|
||||||
|
return alternativeRegex.IsMatch(line);
|
||||||
|
}
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
#region Factory
|
#region Factory
|
||||||
|
Loading…
Reference in New Issue
Block a user