using Microsoft.Office.Interop.Word; /* Importer.cs * part of zaaReloaded2 * * Copyright 2015-2017 Daniel Kraus * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace zaaReloaded2.Medication { /// /// Imports prescriptions from a physician's letter. /// public class Importer { #region Static methods /// /// Attempts to automatically detect a block of prescriptions /// in a document. The document is screened from end to start. /// The detected block is selected. /// /// True if a block was detected, false if not. /// /// /// Autodetection works by examining the document paragraph by /// paragraph, starting at the end of the document. The first /// block of at least two lines that are identified as prescription /// lines is selected. /// /// /// It should be noted that every paragraph (a.k.a. line) may /// be regarded as one of three things: /// /// /// A typical prescription line (in the form "Ramipril 5 mg 1-0-0") /// A typical non-prescription text line /// Something inbetween, e.g. a line with tab stops as in /// "Ramipril 5 mg \t alle zwei Tage" or in "Prof. B. Oss \t Dr. A. Sistent" /// /// /// /// It is the third type of line that may cause confusion. If such a line /// is encountered at the start of a putative block of prescriptions, we /// therefore enter a "fuzzy" state in the detection algorithm and take /// it from there, i.e. disregard the block if there are no lines that /// are clearly prescriptions lines, or accept the block if we do detect /// adjacent lines with unequivocal prescriptions. /// /// /// public static bool AutoDetect(Document document) { Paragraph start = null; Paragraph end = null; bool insideBlock = false; bool fuzzy = false; bool result = false; int i = document.Paragraphs.Count; while (i > 1) { string line = document.Paragraphs[i].Range.Text; if (Prescription.IsCanonicalPrescriptionLine(line)) { // The current line is unequivocally a prescription line: // If we're not inside a block already, mark the bottom // of the block. // If we are inside a block already, make sure to leave // the 'fuzzy' state because this clearly now is a prescription // block. if (insideBlock) { fuzzy = false; } else { end = document.Paragraphs[i]; insideBlock = true; } } else if (Prescription.IsPotentialPrescriptionLine(line)) { // The current line is a putative prescription line: // If we're not inside a block already, enter the // "fuzzy" state. // If we are inside a block, no special action is // needed, we can continue with the next paragraph. if (!insideBlock) { fuzzy = true; insideBlock = true; end = document.Paragraphs[i]; } } else { // The current line is not a prescription line: // If we are currently in a definitive block of prescriptions, // mark the line below the current line as the start of the block. // If we're in a putative block, discard the information // about the bottom end of the block and reset all flags. if (insideBlock) { if (!fuzzy) { start = document.Paragraphs[i + 1]; break; } else { fuzzy = false; insideBlock = false; end = null; } } } i--; } if (end != null) { // If we don't have a start paragraph, // but do have an end paragraph, we set the start paragraph to the // first paragraph of the document. if (start == null) { start = document.Paragraphs[1]; } document.Range(start.Range.Start, end.Range.End).Select(); result = true; } return result; } #endregion #region Properties public List Prescriptions { get; protected set; } #endregion #region Constructor public Importer() { } public Importer(string text) : this() { Import(text); } #endregion #region Private methods protected virtual void Import(string text) { List list = new List(); IList addition; int columns = 1; string[] lines = Helpers.SplitParagraphs(text); foreach (string line in lines) { if (Prescription.IsPotentialPrescriptionLine(line)) { addition = Prescription.ManyFromLine(line); columns = System.Math.Max(columns, addition.Count); list.AddRange(addition); } } // If the input had several columns, sort the prescriptions by // column. // TODO: Make this more generic so it works with 3 or 4 columns as well. if (columns == 2) { var firstCol = list.Where((item, index) => index % 2 == 0); var secondCol = list.Where((item, index) => index % 2 != 0); Prescriptions = firstCol.Concat(secondCol).ToList(); } else { Prescriptions = list; } } #endregion } }