263 lines
9.8 KiB
C#
Executable File
263 lines
9.8 KiB
C#
Executable File
using Microsoft.Office.Interop.Word;
|
|
/* AutoDetect.cs
|
|
* part of zaaReloaded2
|
|
*
|
|
* Copyright 2015-2017 Daniel Kraus
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using zaaReloaded2.Importer.ZaaImporter;
|
|
using zaaReloaded2.Importer.ClinicImporter;
|
|
|
|
namespace zaaReloaded2.Importer
|
|
{
|
|
class AutoDetector
|
|
{
|
|
#region Properties
|
|
|
|
public ImportMode ImportMode { get; private set; }
|
|
|
|
#endregion
|
|
|
|
#region Public methods
|
|
|
|
/// <summary>
|
|
/// Attempts to automatically detect laboratory data in the Word
|
|
/// document.
|
|
/// </summary>
|
|
/// <param name="document">Document which to parse for laboratory
|
|
/// data.</param>
|
|
/// <returns>True if laboratory data was detected, false if not.</returns>
|
|
/// <exception cref="ArgumentNullException">if <paramref name="document"/>
|
|
/// is null.</exception>
|
|
public bool Detect(Document document)
|
|
{
|
|
if (document == null)
|
|
{
|
|
throw new ArgumentNullException(
|
|
"Automatic laboratory detection requires a document.");
|
|
}
|
|
|
|
int startParagraph = 1;
|
|
if (document.Bookmarks.Exists("Labor"))
|
|
{
|
|
Logger.Info("Detect: Found lab bookmark");
|
|
startParagraph = GetParagraphIndex(
|
|
document,
|
|
document.Bookmarks["Labor"].Range.Paragraphs[1]);
|
|
}
|
|
|
|
return DetectRange(document, startParagraph, document.Paragraphs.Count);
|
|
}
|
|
|
|
public bool Detect(Selection selection)
|
|
{
|
|
if (selection == null)
|
|
{
|
|
throw new ArgumentNullException(
|
|
"Automatic laboratory detection requires a selection.");
|
|
}
|
|
|
|
return DetectRange(selection.Document,
|
|
GetParagraphIndex(selection.Document, selection.Paragraphs.First),
|
|
GetParagraphIndex(selection.Document, selection.Paragraphs.Last) + 1);
|
|
}
|
|
|
|
public IImporter CreateImporter()
|
|
{
|
|
switch (ImportMode)
|
|
{
|
|
case ImportMode.Zaa:
|
|
return new ZaaImporter.ZaaImporter();
|
|
case ImportMode.Clinic:
|
|
return new ClinicImporter.ClinicImporter();
|
|
default:
|
|
throw new InvalidOperationException("Cannot create Importer for undefined import mode!");
|
|
}
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Private methods
|
|
|
|
private bool DetectRange(Document document, int startParagraph, int endParagraph)
|
|
{
|
|
if (document == null)
|
|
{
|
|
throw new ArgumentNullException(
|
|
"Automatic laboratory detection requires a document.");
|
|
}
|
|
|
|
if (startParagraph < 1 || startParagraph > document.Paragraphs.Count)
|
|
{
|
|
Logger.Fatal("Start paragraph index must be between {0} and {1}, was {2}!",
|
|
1, document.Paragraphs.Count, startParagraph);
|
|
throw new ArgumentOutOfRangeException("startParagraph");
|
|
}
|
|
|
|
if (endParagraph < startParagraph || endParagraph > document.Paragraphs.Count)
|
|
{
|
|
Logger.Fatal("End paragraph index must be between {0} and {1}, was {2}!",
|
|
startParagraph, document.Paragraphs.Count, endParagraph);
|
|
throw new ArgumentOutOfRangeException("endParagraph");
|
|
}
|
|
|
|
Logger.Info("DetectRange: Start paragraph is #{0}, end is #{1}, document has #{2} paragraphs",
|
|
startParagraph, endParagraph, document.Paragraphs.Count);
|
|
|
|
// TODO: Try to make this algorithm more elegant.
|
|
Paragraph start = null;
|
|
Paragraph end = null;
|
|
int i = startParagraph;
|
|
|
|
while (i <= endParagraph)
|
|
{
|
|
// Expect the first paragraph of a Lauris block to be
|
|
// a time stamp. This prevents erroneous detection of
|
|
// lines such as "Tel. (09 31) 201-39432; -39126", which
|
|
// happen to structurally resemble a paragraph with
|
|
// laboratory items.
|
|
if (IsTimeStampParagraph(document.Paragraphs[i]))
|
|
{
|
|
start = document.Paragraphs[i];
|
|
Logger.Info("DetectRange: Found time stamp line in paragraph #{0}", i);
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
|
|
if (start != null)
|
|
{
|
|
Logger.Info("DetectRange: Determining lab block");
|
|
while (i <= endParagraph - 1)
|
|
{
|
|
Paragraph p = document.Paragraphs[i + 1];
|
|
if (!IsLabParagraph(p) && !IsEmptyParagraph(p))
|
|
{
|
|
Logger.Info("Detect: Last lab paragraph is #{0}", i);
|
|
end = document.Paragraphs[i];
|
|
break;
|
|
}
|
|
i++;
|
|
}
|
|
|
|
if (end == null)
|
|
{
|
|
end = document.Paragraphs[endParagraph];
|
|
}
|
|
|
|
document.Range(start.Range.Start, end.Range.End).Select();
|
|
return true;
|
|
}
|
|
Logger.Warn("DetectRange: Did not find lab block!");
|
|
return false;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns true if a paragraph is a time stamp line.
|
|
/// </summary>
|
|
private bool IsTimeStampParagraph(Paragraph paragraph)
|
|
{
|
|
string text = paragraph.Range.Text;
|
|
bool isCinicTimePoint = ClinicTimePoint.IsTimeStampLine(text);
|
|
bool isZaaTimePoint = LaurisTimePoint.IsTimeStampLine(text);
|
|
// // If the line is a ZAA time point, but not a clinic timepoint, we can deduct that
|
|
// // the lab mode *must* be ZAA, because it will be a line in the form
|
|
// // "(17.09.2015-201710:44:00) Cyclosporin-A vor Gabe: 130 µg/l;" which does not
|
|
// // occur in the clinic format.
|
|
// if ((ImportMode == ImportMode.Undefined) && isZaaTimePoint && !isCinicTimePoint)
|
|
// {
|
|
// Logger.Info("IsTimeStampParagraph: Found ZAA time stamp, setting mode to ZAA");
|
|
// ImportMode = ImportMode.Zaa;
|
|
// }
|
|
return isCinicTimePoint || isZaaTimePoint;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns true if a paragraph is either a time stamp line
|
|
/// or a paragraph with laboratory items.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// This method determines the mode: either ZAA-generated output or clinic system-generated
|
|
/// output. ZAA is given priority over clinic. Once a mode is detected, it will stick to
|
|
/// that mode.
|
|
/// </remarks>
|
|
/// <param name="paragraph"></param>
|
|
/// <returns></returns>
|
|
private bool IsLabParagraph(Paragraph paragraph)
|
|
{
|
|
string text = paragraph.Range.Text;
|
|
bool isLabParagraph = false;
|
|
switch (ImportMode)
|
|
{
|
|
case ImportMode.Undefined:
|
|
if (LaurisParagraph.ResemblesLaurisParagraph(text) || LaurisTimePoint.IsTimeStampLine(text))
|
|
{
|
|
ImportMode = ImportMode.Zaa;
|
|
Logger.Info("IsLabParagraph: Setting mode to ZAA");
|
|
isLabParagraph = true;
|
|
}
|
|
else if (ClinicLine.ResemblesClinicLine(text) || ClinicTimePoint.IsTimeStampLine(text))
|
|
{
|
|
ImportMode = ImportMode.Clinic;
|
|
Logger.Info("IsLabParagraph: Setting mode to Clinic");
|
|
isLabParagraph = true;
|
|
}
|
|
break;
|
|
case ImportMode.Zaa:
|
|
isLabParagraph = LaurisParagraph.ResemblesLaurisParagraph(text) || LaurisTimePoint.IsTimeStampLine(text);
|
|
break;
|
|
case ImportMode.Clinic:
|
|
isLabParagraph = ClinicLine.ResemblesClinicLine(text) || ClinicTimePoint.IsTimeStampLine(text);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return isLabParagraph;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Returns the index of a paragraph.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// http://word.mvps.org/faqs/macrosvba/GetIndexNoOfPara.htm
|
|
/// </remarks>
|
|
/// <param name="paragraph">Paragraph whose index to return.</param>
|
|
/// <returns>Index of the paragraph.</returns>
|
|
private int GetParagraphIndex(Document document, Paragraph paragraph)
|
|
{
|
|
return document.Range(0, paragraph.Range.Start).Paragraphs.Count;
|
|
}
|
|
|
|
private bool IsEmptyParagraph(Paragraph paragraph)
|
|
{
|
|
string text = paragraph.Range.Text;
|
|
return String.IsNullOrWhiteSpace(text);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Class logger
|
|
|
|
private static NLog.Logger Logger { get { return _logger.Value; } }
|
|
|
|
private static readonly Lazy<NLog.Logger> _logger = new Lazy<NLog.Logger>(() => NLog.LogManager.GetCurrentClassLogger());
|
|
|
|
#endregion
|
|
}
|
|
}
|