zaaReloaded2/zaaReloaded2/Medication/Importer.cs

209 lines
7.3 KiB
C#
Executable File

using Microsoft.Office.Interop.Word;
/* Importer.cs
* part of zaaReloaded2
*
* Copyright 2015-2017 Daniel Kraus
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace zaaReloaded2.Medication
{
/// <summary>
/// Imports prescriptions from a physician's letter.
/// </summary>
public class Importer
{
#region Static methods
/// <summary>
/// Attempts to automatically detect a block of prescriptions
/// in a document. The document is screened from end to start.
/// The detected block is selected.
/// </summary>
/// <returns>True if a block was detected, false if not.</returns>
/// <remarks>
/// <para>
/// Autodetection works by examining the document paragraph by
/// paragraph, starting at the end of the document. The first
/// block of at least two lines that are identified as prescription
/// lines is selected.
/// </para>
/// <para>
/// It should be noted that every paragraph (a.k.a. line) may
/// be regarded as one of three things:
/// </para>
/// <list type="ol">
/// <item>A typical prescription line (in the form "Ramipril 5 mg 1-0-0")</item>
/// <item>A typical non-prescription text line</item>
/// <item>Something inbetween, e.g. a line with tab stops as in
/// "Ramipril 5 mg \t alle zwei Tage" or in "Prof. B. Oss \t Dr. A. Sistent"
/// </item>
/// </list>
/// <para>
/// It is the third type of line that may cause confusion. If such a line
/// is encountered at the start of a putative block of prescriptions, we
/// therefore enter a "fuzzy" state in the detection algorithm and take
/// it from there, i.e. disregard the block if there are no lines that
/// are clearly prescriptions lines, or accept the block if we do detect
/// adjacent lines with unequivocal prescriptions.
/// </para>
///
/// </remarks>
public static bool AutoDetect(Document document)
{
Paragraph start = null;
Paragraph end = null;
bool insideBlock = false;
bool fuzzy = false;
bool result = false;
int i = document.Paragraphs.Count;
while (i > 1)
{
string line = document.Paragraphs[i].Range.Text;
if (Prescription.IsCanonicalPrescriptionLine(line))
{
// The current line is unequivocally a prescription line:
// If we're not inside a block already, mark the bottom
// of the block.
// If we are inside a block already, make sure to leave
// the 'fuzzy' state because this clearly now is a prescription
// block.
if (insideBlock)
{
fuzzy = false;
}
else
{
end = document.Paragraphs[i];
insideBlock = true;
}
}
else if (Prescription.IsPotentialPrescriptionLine(line))
{
// The current line is a putative prescription line:
// If we're not inside a block already, enter the
// "fuzzy" state.
// If we are inside a block, no special action is
// needed, we can continue with the next paragraph.
if (!insideBlock)
{
fuzzy = true;
insideBlock = true;
end = document.Paragraphs[i];
}
}
else
{
// The current line is not a prescription line:
// If we are currently in a definitive block of prescriptions,
// mark the line below the current line as the start of the block.
// If we're in a putative block, discard the information
// about the bottom end of the block and reset all flags.
if (insideBlock)
{
if (!fuzzy)
{
start = document.Paragraphs[i + 1];
break;
}
else
{
fuzzy = false;
insideBlock = false;
end = null;
}
}
}
i--;
}
if (end != null)
{
// If we don't have a start paragraph,
// but do have an end paragraph, we set the start paragraph to the
// first paragraph of the document.
if (start == null)
{
start = document.Paragraphs[1];
}
document.Range(start.Range.Start, end.Range.End).Select();
result = true;
}
return result;
}
#endregion
#region Properties
public List<Prescription> Prescriptions { get; protected set; }
#endregion
#region Constructor
public Importer() { }
public Importer(string text)
: this()
{
Import(text);
}
#endregion
#region Private methods
protected virtual void Import(string text)
{
List<Prescription> list = new List<Prescription>();
IList<Prescription> addition;
int columns = 1;
string[] lines = Helpers.SplitParagraphs(text);
foreach (string line in lines)
{
if (Prescription.IsPotentialPrescriptionLine(line))
{
addition = Prescription.ManyFromLine(line);
columns = System.Math.Max(columns, addition.Count);
list.AddRange(addition);
}
}
// If the input had several columns, sort the prescriptions by
// column.
// TODO: Make this more generic so it works with 3 or 4 columns as well.
if (columns == 2)
{
var firstCol = list.Where((item, index) => index % 2 == 0);
var secondCol = list.Where((item, index) => index % 2 != 0);
Prescriptions = firstCol.Concat(secondCol).ToList();
}
else
{
Prescriptions = list;
}
}
#endregion
}
}