209 lines
7.3 KiB
C#
Executable File
209 lines
7.3 KiB
C#
Executable File
using Microsoft.Office.Interop.Word;
|
|
/* Importer.cs
|
|
* part of zaaReloaded2
|
|
*
|
|
* Copyright 2015-2017 Daniel Kraus
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
|
|
namespace zaaReloaded2.Medication
|
|
{
|
|
/// <summary>
|
|
/// Imports prescriptions from a physician's letter.
|
|
/// </summary>
|
|
public class Importer
|
|
{
|
|
#region Static methods
|
|
|
|
/// <summary>
|
|
/// Attempts to automatically detect a block of prescriptions
|
|
/// in a document. The document is screened from end to start.
|
|
/// The detected block is selected.
|
|
/// </summary>
|
|
/// <returns>True if a block was detected, false if not.</returns>
|
|
/// <remarks>
|
|
/// <para>
|
|
/// Autodetection works by examining the document paragraph by
|
|
/// paragraph, starting at the end of the document. The first
|
|
/// block of at least two lines that are identified as prescription
|
|
/// lines is selected.
|
|
/// </para>
|
|
/// <para>
|
|
/// It should be noted that every paragraph (a.k.a. line) may
|
|
/// be regarded as one of three things:
|
|
/// </para>
|
|
/// <list type="ol">
|
|
/// <item>A typical prescription line (in the form "Ramipril 5 mg 1-0-0")</item>
|
|
/// <item>A typical non-prescription text line</item>
|
|
/// <item>Something inbetween, e.g. a line with tab stops as in
|
|
/// "Ramipril 5 mg \t alle zwei Tage" or in "Prof. B. Oss \t Dr. A. Sistent"
|
|
/// </item>
|
|
/// </list>
|
|
/// <para>
|
|
/// It is the third type of line that may cause confusion. If such a line
|
|
/// is encountered at the start of a putative block of prescriptions, we
|
|
/// therefore enter a "fuzzy" state in the detection algorithm and take
|
|
/// it from there, i.e. disregard the block if there are no lines that
|
|
/// are clearly prescriptions lines, or accept the block if we do detect
|
|
/// adjacent lines with unequivocal prescriptions.
|
|
/// </para>
|
|
///
|
|
/// </remarks>
|
|
public static bool AutoDetect(Document document)
|
|
{
|
|
Paragraph start = null;
|
|
Paragraph end = null;
|
|
bool insideBlock = false;
|
|
bool fuzzy = false;
|
|
bool result = false;
|
|
int i = document.Paragraphs.Count;
|
|
|
|
while (i > 1)
|
|
{
|
|
string line = document.Paragraphs[i].Range.Text;
|
|
|
|
if (Prescription.IsCanonicalPrescriptionLine(line))
|
|
{
|
|
// The current line is unequivocally a prescription line:
|
|
// If we're not inside a block already, mark the bottom
|
|
// of the block.
|
|
// If we are inside a block already, make sure to leave
|
|
// the 'fuzzy' state because this clearly now is a prescription
|
|
// block.
|
|
if (insideBlock)
|
|
{
|
|
fuzzy = false;
|
|
}
|
|
else
|
|
{
|
|
end = document.Paragraphs[i];
|
|
insideBlock = true;
|
|
}
|
|
}
|
|
else if (Prescription.IsPotentialPrescriptionLine(line))
|
|
{
|
|
// The current line is a putative prescription line:
|
|
// If we're not inside a block already, enter the
|
|
// "fuzzy" state.
|
|
// If we are inside a block, no special action is
|
|
// needed, we can continue with the next paragraph.
|
|
if (!insideBlock)
|
|
{
|
|
fuzzy = true;
|
|
insideBlock = true;
|
|
end = document.Paragraphs[i];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// The current line is not a prescription line:
|
|
// If we are currently in a definitive block of prescriptions,
|
|
// mark the line below the current line as the start of the block.
|
|
// If we're in a putative block, discard the information
|
|
// about the bottom end of the block and reset all flags.
|
|
if (insideBlock)
|
|
{
|
|
if (!fuzzy)
|
|
{
|
|
start = document.Paragraphs[i + 1];
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
fuzzy = false;
|
|
insideBlock = false;
|
|
end = null;
|
|
}
|
|
}
|
|
}
|
|
|
|
i--;
|
|
}
|
|
|
|
if (end != null)
|
|
{
|
|
// If we don't have a start paragraph,
|
|
// but do have an end paragraph, we set the start paragraph to the
|
|
// first paragraph of the document.
|
|
if (start == null)
|
|
{
|
|
start = document.Paragraphs[1];
|
|
}
|
|
document.Range(start.Range.Start, end.Range.End).Select();
|
|
result = true;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Properties
|
|
|
|
public List<Prescription> Prescriptions { get; protected set; }
|
|
|
|
#endregion
|
|
|
|
#region Constructor
|
|
|
|
public Importer() { }
|
|
|
|
public Importer(string text)
|
|
: this()
|
|
{
|
|
Import(text);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Private methods
|
|
|
|
protected virtual void Import(string text)
|
|
{
|
|
List<Prescription> list = new List<Prescription>();
|
|
IList<Prescription> addition;
|
|
int columns = 1;
|
|
string[] lines = Helpers.SplitParagraphs(text);
|
|
foreach (string line in lines)
|
|
{
|
|
if (Prescription.IsPotentialPrescriptionLine(line))
|
|
{
|
|
addition = Prescription.ManyFromLine(line);
|
|
columns = System.Math.Max(columns, addition.Count);
|
|
list.AddRange(addition);
|
|
}
|
|
}
|
|
|
|
// If the input had several columns, sort the prescriptions by
|
|
// column.
|
|
// TODO: Make this more generic so it works with 3 or 4 columns as well.
|
|
if (columns == 2)
|
|
{
|
|
var firstCol = list.Where((item, index) => index % 2 == 0);
|
|
var secondCol = list.Where((item, index) => index % 2 != 0);
|
|
Prescriptions = firstCol.Concat(secondCol).ToList();
|
|
}
|
|
else
|
|
{
|
|
Prescriptions = list;
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
}
|
|
}
|