using Microsoft.Office.Interop.Word; /* AutoDetect.cs * part of zaaReloaded2 * * Copyright 2015-2017 Daniel Kraus * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Linq; using System.Text; using zaaReloaded2.Importer.ZaaImporter; using zaaReloaded2.Importer.ClinicImporter; namespace zaaReloaded2.Importer { class AutoDetector { #region Public methods /// /// Attempts to automatically detect laboratory data in the Word /// document. /// /// Document which to parse for laboratory /// data. /// True if laboratory data was detected, false if not. /// if /// is null. public bool Detect(Document document) { if (document == null) { throw new ArgumentNullException( "Automatic laboratory detection requires a document."); } // TODO: Try to make this algorithm more elegant. Paragraph start = null; Paragraph end = null; int i = 1; if (document.Bookmarks.Exists("Labor")) { i = GetParagraphIndex( document, document.Bookmarks["Labor"].Range.Paragraphs[1]); } while (i <= document.Paragraphs.Count) { // Expect the first paragraph of a Lauris block to be // a time stamp. This prevents erroneous detection of // lines such as "Tel. (09 31) 201-39432; -39126", which // happen to structurally resemble a paragraph with // laboratory items. if (LaurisTimePoint.IsTimeStampLine( document.Paragraphs[i].Range.Text)) { start = document.Paragraphs[i]; break; } i++; } if (start != null) { end = start; while (i <= document.Paragraphs.Count - 1) { if (!IsLabParagraph(document.Paragraphs[i+1])) { end = document.Paragraphs[i]; break; } i++; } document.Range(start.Range.Start, end.Range.End).Select(); return true; } return false; } #endregion #region Private methods /// /// Returns true if a paragraph is a time stamp line. /// private bool IsTimeStampParagraph(Paragraph paragraph) { string text = paragraph.Range.Text; bool isCinicTimePoint = ClinicTimePoint.IsTimeStampLine(text); bool isZaaTimePoint = LaurisTimePoint.IsTimeStampLine(text); // If the line is a ZAA time point, but not a clinic timepoint, we can deduct that // the lab mode *must* be ZAA, because it will be a line in the form // "(17.09.2015-201710:44:00) Cyclosporin-A vor Gabe: 130 µg/l;" which does not // occur in the clinic format. if ((_mode == Mode.Undefined) && isZaaTimePoint && !isCinicTimePoint) { _mode = Mode.Zaa; } return isCinicTimePoint || isZaaTimePoint; } /// /// Returns true if a paragraph is either a time stamp line /// or a paragraph with laboratory items. /// /// /// This method determines the mode: either ZAA-generated output or clinic system-generated /// output. ZAA is given priority over clinic. Once a mode is detected, it will stick to /// that mode. /// /// /// private bool IsLabParagraph(Paragraph paragraph) { string text = paragraph.Range.Text; bool isLabParagraph = false; switch (_mode) { case Mode.Undefined: if (LaurisParagraph.ResemblesLaurisParagraph(text) || LaurisTimePoint.IsTimeStampLine(text)) { _mode = Mode.Zaa; isLabParagraph = true; } else if (ClinicLine.ResemblesClinicLine(text) || ClinicTimePoint.IsTimeStampLine(text)) { _mode = Mode.Clinic; isLabParagraph = true; } break; case Mode.Zaa: isLabParagraph = LaurisParagraph.ResemblesLaurisParagraph(text) || LaurisTimePoint.IsTimeStampLine(text); break; case Mode.Clinic: isLabParagraph = ClinicLine.ResemblesClinicLine(text) || ClinicTimePoint.IsTimeStampLine(text); break; default: break; } return isLabParagraph; } /// /// Returns the index of a paragraph. /// /// /// http://word.mvps.org/faqs/macrosvba/GetIndexNoOfPara.htm /// /// Paragraph whose index to return. /// Index of the paragraph. private int GetParagraphIndex(Document document, Paragraph paragraph) { return document.Range(0, paragraph.Range.Start).Paragraphs.Count; } #endregion #region Fields private enum Mode { Undefined, Zaa, Clinic } private Mode _mode; #endregion } }