zaaReloaded2/zaaReloaded2/Importer/AutoDetector.cs

189 lines
6.5 KiB
C#
Executable File

using Microsoft.Office.Interop.Word;
/* AutoDetect.cs
* part of zaaReloaded2
*
* Copyright 2015-2017 Daniel Kraus
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using zaaReloaded2.Importer.ZaaImporter;
using zaaReloaded2.Importer.ClinicImporter;
namespace zaaReloaded2.Importer
{
class AutoDetector
{
#region Public methods
/// <summary>
/// Attempts to automatically detect laboratory data in the Word
/// document.
/// </summary>
/// <param name="document">Document which to parse for laboratory
/// data.</param>
/// <returns>True if laboratory data was detected, false if not.</returns>
/// <exception cref="ArgumentNullException">if <paramref name="document"/>
/// is null.</exception>
public bool Detect(Document document)
{
if (document == null)
{
throw new ArgumentNullException(
"Automatic laboratory detection requires a document.");
}
// TODO: Try to make this algorithm more elegant.
Paragraph start = null;
Paragraph end = null;
int i = 1;
if (document.Bookmarks.Exists("Labor"))
{
i = GetParagraphIndex(
document,
document.Bookmarks["Labor"].Range.Paragraphs[1]);
}
while (i <= document.Paragraphs.Count)
{
// Expect the first paragraph of a Lauris block to be
// a time stamp. This prevents erroneous detection of
// lines such as "Tel. (09 31) 201-39432; -39126", which
// happen to structurally resemble a paragraph with
// laboratory items.
if (LaurisTimePoint.IsTimeStampLine(
document.Paragraphs[i].Range.Text))
{
start = document.Paragraphs[i];
break;
}
i++;
}
if (start != null)
{
end = start;
while (i <= document.Paragraphs.Count - 1)
{
if (!IsLabParagraph(document.Paragraphs[i+1]))
{
end = document.Paragraphs[i];
break;
}
i++;
}
document.Range(start.Range.Start, end.Range.End).Select();
return true;
}
return false;
}
#endregion
#region Private methods
/// <summary>
/// Returns true if a paragraph is a time stamp line.
/// </summary>
private bool IsTimeStampParagraph(Paragraph paragraph)
{
string text = paragraph.Range.Text;
bool isCinicTimePoint = ClinicTimePoint.IsTimeStampLine(text);
bool isZaaTimePoint = LaurisTimePoint.IsTimeStampLine(text);
// If the line is a ZAA time point, but not a clinic timepoint, we can deduct that
// the lab mode *must* be ZAA, because it will be a line in the form
// "(17.09.2015-201710:44:00) Cyclosporin-A vor Gabe: 130 µg/l;" which does not
// occur in the clinic format.
if ((_mode == Mode.Undefined) && isZaaTimePoint && !isCinicTimePoint)
{
_mode = Mode.Zaa;
}
return isCinicTimePoint || isZaaTimePoint;
}
/// <summary>
/// Returns true if a paragraph is either a time stamp line
/// or a paragraph with laboratory items.
/// </summary>
/// <remarks>
/// This method determines the mode: either ZAA-generated output or clinic system-generated
/// output. ZAA is given priority over clinic. Once a mode is detected, it will stick to
/// that mode.
/// </remarks>
/// <param name="paragraph"></param>
/// <returns></returns>
private bool IsLabParagraph(Paragraph paragraph)
{
string text = paragraph.Range.Text;
bool isLabParagraph = false;
switch (_mode)
{
case Mode.Undefined:
if (LaurisParagraph.ResemblesLaurisParagraph(text) || LaurisTimePoint.IsTimeStampLine(text))
{
_mode = Mode.Zaa;
isLabParagraph = true;
}
else if (ClinicLine.ResemblesClinicLine(text) || ClinicTimePoint.IsTimeStampLine(text))
{
_mode = Mode.Clinic;
isLabParagraph = true;
}
break;
case Mode.Zaa:
isLabParagraph = LaurisParagraph.ResemblesLaurisParagraph(text) || LaurisTimePoint.IsTimeStampLine(text);
break;
case Mode.Clinic:
isLabParagraph = ClinicLine.ResemblesClinicLine(text) || ClinicTimePoint.IsTimeStampLine(text);
break;
default:
break;
}
return isLabParagraph;
}
/// <summary>
/// Returns the index of a paragraph.
/// </summary>
/// <remarks>
/// http://word.mvps.org/faqs/macrosvba/GetIndexNoOfPara.htm
/// </remarks>
/// <param name="paragraph">Paragraph whose index to return.</param>
/// <returns>Index of the paragraph.</returns>
private int GetParagraphIndex(Document document, Paragraph paragraph)
{
return document.Range(0, paragraph.Range.Start).Paragraphs.Count;
}
#endregion
#region Fields
private enum Mode
{
Undefined,
Zaa,
Clinic
}
private Mode _mode;
#endregion
}
}