using Microsoft.Office.Interop.Word; /* AutoDetect.cs * part of zaaReloaded2 * * Copyright 2015-2017 Daniel Kraus * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Linq; using System.Text; using zaaReloaded2.Importer.ZaaImporter; using zaaReloaded2.Importer.ClinicImporter; namespace zaaReloaded2.Importer { class AutoDetector { #region Properties public ImportMode ImportMode { get; private set; } #endregion #region Public methods /// /// Attempts to automatically detect laboratory data in the Word /// document. /// /// Document which to parse for laboratory /// data. /// True if laboratory data was detected, false if not. /// if /// is null. public bool Detect(Document document) { if (document == null) { throw new ArgumentNullException( "Automatic laboratory detection requires a document."); } int startParagraph = 1; if (document.Bookmarks.Exists("Labor")) { Logger.Info("Detect: Found lab bookmark"); startParagraph = GetParagraphIndex( document, document.Bookmarks["Labor"].Range.Paragraphs[1]); } return DetectRange(document, startParagraph, document.Paragraphs.Count); } public bool Detect(Selection selection) { if (selection == null) { throw new ArgumentNullException( "Automatic laboratory detection requires a selection."); } return DetectRange(selection.Document, GetParagraphIndex(selection.Document, selection.Paragraphs.First), GetParagraphIndex(selection.Document, selection.Paragraphs.Last) + 1); } public IImporter CreateImporter() { switch (ImportMode) { case ImportMode.Zaa: return new ZaaImporter.ZaaImporter(); case ImportMode.Clinic: return new ClinicImporter.ClinicImporter(); default: throw new InvalidOperationException("Cannot create Importer for undefined import mode!"); } } #endregion #region Private methods private bool DetectRange(Document document, int startParagraph, int endParagraph) { if (document == null) { throw new ArgumentNullException( "Automatic laboratory detection requires a document."); } if (startParagraph < 1 || startParagraph > document.Paragraphs.Count) { Logger.Fatal("Start paragraph index must be between {0} and {1}, was {2}!", 1, document.Paragraphs.Count, startParagraph); throw new ArgumentOutOfRangeException("startParagraph"); } if (endParagraph < startParagraph || endParagraph > document.Paragraphs.Count) { Logger.Fatal("End paragraph index must be between {0} and {1}, was {2}!", startParagraph, document.Paragraphs.Count, endParagraph); throw new ArgumentOutOfRangeException("endParagraph"); } Logger.Info("DetectRange: Start paragraph is #{0}, end is #{1}, document has #{2} paragraphs", startParagraph, endParagraph, document.Paragraphs.Count); // TODO: Try to make this algorithm more elegant. Paragraph start = null; Paragraph end = null; int i = startParagraph; while (i <= endParagraph) { // Expect the first paragraph of a Lauris block to be // a time stamp. This prevents erroneous detection of // lines such as "Tel. (09 31) 201-39432; -39126", which // happen to structurally resemble a paragraph with // laboratory items. if (IsTimeStampParagraph(document.Paragraphs[i])) { start = document.Paragraphs[i]; Logger.Info("DetectRange: Found time stamp line in paragraph #{0}", i); break; } i++; } if (start != null) { Logger.Info("DetectRange: Determining lab block"); while (i <= endParagraph - 1) { Paragraph p = document.Paragraphs[i + 1]; if (!IsLabParagraph(p) && !IsEmptyParagraph(p)) { Logger.Info("Detect: Last lab paragraph is #{0}", i); end = document.Paragraphs[i]; break; } i++; } if (end == null) { end = document.Paragraphs[endParagraph]; } document.Range(start.Range.Start, end.Range.End).Select(); return true; } Logger.Warn("DetectRange: Did not find lab block!"); return false; } /// /// Returns true if a paragraph is a time stamp line. /// private bool IsTimeStampParagraph(Paragraph paragraph) { string text = paragraph.Range.Text; bool isCinicTimePoint = ClinicTimePoint.IsTimeStampLine(text); bool isZaaTimePoint = LaurisTimePoint.IsTimeStampLine(text); // // If the line is a ZAA time point, but not a clinic timepoint, we can deduct that // // the lab mode *must* be ZAA, because it will be a line in the form // // "(17.09.2015-201710:44:00) Cyclosporin-A vor Gabe: 130 µg/l;" which does not // // occur in the clinic format. // if ((ImportMode == ImportMode.Undefined) && isZaaTimePoint && !isCinicTimePoint) // { // Logger.Info("IsTimeStampParagraph: Found ZAA time stamp, setting mode to ZAA"); // ImportMode = ImportMode.Zaa; // } return isCinicTimePoint || isZaaTimePoint; } /// /// Returns true if a paragraph is either a time stamp line /// or a paragraph with laboratory items. /// /// /// This method determines the mode: either ZAA-generated output or clinic system-generated /// output. ZAA is given priority over clinic. Once a mode is detected, it will stick to /// that mode. /// /// /// private bool IsLabParagraph(Paragraph paragraph) { string text = paragraph.Range.Text; bool isLabParagraph = false; switch (ImportMode) { case ImportMode.Undefined: if (LaurisParagraph.ResemblesLaurisParagraph(text) || LaurisTimePoint.IsTimeStampLine(text)) { ImportMode = ImportMode.Zaa; Logger.Info("IsLabParagraph: Setting mode to ZAA"); isLabParagraph = true; } else if (ClinicLine.ResemblesClinicLine(text) || ClinicTimePoint.IsTimeStampLine(text)) { ImportMode = ImportMode.Clinic; Logger.Info("IsLabParagraph: Setting mode to Clinic"); isLabParagraph = true; } break; case ImportMode.Zaa: isLabParagraph = LaurisParagraph.ResemblesLaurisParagraph(text) || LaurisTimePoint.IsTimeStampLine(text); break; case ImportMode.Clinic: isLabParagraph = ClinicLine.ResemblesClinicLine(text) || ClinicTimePoint.IsTimeStampLine(text); break; default: break; } return isLabParagraph; } /// /// Returns the index of a paragraph. /// /// /// http://word.mvps.org/faqs/macrosvba/GetIndexNoOfPara.htm /// /// Paragraph whose index to return. /// Index of the paragraph. private int GetParagraphIndex(Document document, Paragraph paragraph) { return document.Range(0, paragraph.Range.Start).Paragraphs.Count; } private bool IsEmptyParagraph(Paragraph paragraph) { string text = paragraph.Range.Text; return String.IsNullOrWhiteSpace(text); } #endregion #region Class logger private static NLog.Logger Logger { get { return _logger.Value; } } private static readonly Lazy _logger = new Lazy(() => NLog.LogManager.GetCurrentClassLogger()); #endregion } }