Refactor and harden ZaaImporter.

This commit is contained in:
Daniel Kraus 2015-08-04 23:21:38 +02:00
parent ae9cf7fa2e
commit 75c9c984c6
5 changed files with 99 additions and 73 deletions

View File

@ -41,5 +41,15 @@ namespace Tests.Importer.ZaaImporter
// Only 6 distinct time points (see method documentation above). // Only 6 distinct time points (see method documentation above).
Assert.AreEqual(6, importer.Laboratory.TimePoints.Count); Assert.AreEqual(6, importer.Laboratory.TimePoints.Count);
} }
[Test]
public void ParseInvalidInput()
{
zaa.ZaaImporter importer = new zaa.ZaaImporter();
importer.Import("some arbitrary text\r\nthat does not represent\r\na valid lab");
Assert.IsFalse(importer.Success);
importer.Import("(03.03.1930 13:30:00)\r\nKlinische Chemie: Natrium 135 [135 - 145] mmol/l;");
Assert.IsTrue(importer.Success);
}
} }
} }

View File

@ -30,6 +30,11 @@ namespace zaaReloaded2.Importer
/// </summary> /// </summary>
Laboratory Laboratory { get; set; } Laboratory Laboratory { get; set; }
/// <summary>
/// Indicates whether the import was successful.
/// </summary>
bool Success { get; }
/// <summary> /// <summary>
/// Imports laboratory data contained in a string. /// Imports laboratory data contained in a string.
/// </summary> /// </summary>

View File

@ -32,6 +32,19 @@ namespace zaaReloaded2.Importer.ZaaImporter
/// </summary> /// </summary>
public class LaurisParagraph public class LaurisParagraph
{ {
#region Static methods
/// <summary>
/// Investigates a paragraph and determines whether it looks
/// like a Lauris laboratory items paragraph.
/// </summary>
public static bool ResemblesLaurisParagraph(string paragraph)
{
return _expectedFormat.IsMatch(paragraph);
}
#endregion
#region Public properties #region Public properties
/// <summary> /// <summary>
@ -122,7 +135,7 @@ namespace zaaReloaded2.Importer.ZaaImporter
#region Fields #region Fields
static readonly Regex _expectedFormat = new Regex(@"(?<caption>[^:]+:\s*)?(?<items>[^:]+:\s*[^;]+;)*"); static readonly Regex _expectedFormat = new Regex(@"(?<caption>[^:]+:\s*)?(?<items>[^:]+:\s*[^;]+;)+");
Thesaurus.Parameters _parameterDictionary; Thesaurus.Parameters _parameterDictionary;
Thesaurus.Units _unitDictionary; Thesaurus.Units _unitDictionary;

View File

@ -65,11 +65,15 @@ namespace zaaReloaded2.Importer.ZaaImporter
/// <summary> /// <summary>
/// Gets an array of paragraphs in this LaurisText. /// Gets an array of paragraphs in this LaurisText.
/// </summary> /// </summary>
public string[] Paragraphs public IList<String> Paragraphs
{ {
[DebuggerStepThrough] [DebuggerStepThrough]
get get
{ {
if (_paragraphs == null)
{
_paragraphs = new List<String>();
}
return _paragraphs; return _paragraphs;
} }
set set
@ -101,7 +105,7 @@ namespace zaaReloaded2.Importer.ZaaImporter
{ {
Paragraphs = value.Split( Paragraphs = value.Split(
new string[] { Environment.NewLine }, new string[] { Environment.NewLine },
StringSplitOptions.None); StringSplitOptions.None).ToList();
} }
} }
} }
@ -128,7 +132,7 @@ namespace zaaReloaded2.Importer.ZaaImporter
{ } { }
public LaurisTimePoint( public LaurisTimePoint(
string[] paragraphs, IList<String> paragraphs,
Parameters parameterDictionary, Parameters parameterDictionary,
Units unitDictionary) Units unitDictionary)
: this() : this()
@ -138,72 +142,71 @@ namespace zaaReloaded2.Importer.ZaaImporter
Paragraphs = paragraphs; Paragraphs = paragraphs;
} }
public LaurisTimePoint(string[] paragraphs) public LaurisTimePoint(IList<String> paragraphs)
: this(paragraphs, null, null) : this(paragraphs, null, null)
{ {
} }
#endregion #endregion
#region Public methods
/// <summary>
/// Adds a new paragraph to this time point by parsing
/// the paragraph for laboratory items.
/// </summary>
/// <param name="paragraph">Paragraph to add.</param>
public void AddParagraph(string paragraph)
{
Paragraphs.Add(paragraph);
ParseParagraph(paragraph);
}
#endregion
#region Private methods #region Private methods
/// <summary> /// <summary>
/// Analyzes each Lauris paragraph in this time point, sets the date /// Analyzes each Lauris paragraph in this time point, sets the date
/// and time, and collects LabItem data. /// and time, and collects LabItem data.
/// </summary> /// </summary>
/// <returns>True if the LaurisText has time stamp in the first paragraphs void ParseParagraphs()
/// and contains <see cref="LabItem"/>s in the others.</returns>
bool ParseParagraphs()
{ {
if (Paragraphs.Length > 0) if (Paragraphs != null)
{ {
if (!ParseTimeStamp()) return false; foreach (string paragraph in Paragraphs)
LaurisParagraph lp;
if (IsValidTimePoint)
{ {
for (int i = 1; i < Paragraphs.Length; i++) ParseParagraph(paragraph);
{
lp = new LaurisParagraph(
Paragraphs[i],
_parameterDictionary,
_unitDictionary);
if (lp.IsLaurisParagraph)
{
Items.Merge(lp.Items);
}
}
} }
IsValidTimePoint = Items.Count > 0;
} }
return true;
} }
/// <summary> void ParseParagraph(string paragraph)
/// Analyzes the date and time information that is expected to be
/// in the first paragraph.
/// </summary>
/// <returns>True if the LaurisText contains a time stamp in the
/// first paragraph.</returns>
bool ParseTimeStamp()
{ {
if (Paragraphs.Length == 0) if (_timeStampRegex.IsMatch(paragraph))
throw new InvalidOperationException("The time point has no paragraphs.");
Match m = _timeStampRegex.Match(Paragraphs[0]);
bool success = false;
if (m.Success)
{ {
DateTime dt; DateTime dt;
success = DateTime.TryParseExact( if (DateTime.TryParseExact(
m.Groups["datetime"].Value, _timeStampRegex.Match(paragraph).Groups["datetime"].Value,
"dd.MM.yyyy HH:mm", "dd.MM.yyyy HH:mm",
CultureInfo.InvariantCulture, CultureInfo.InvariantCulture,
DateTimeStyles.AllowWhiteSpaces, DateTimeStyles.AllowWhiteSpaces,
out dt); out dt))
TimeStamp = dt; {
TimeStamp = dt;
}
}
else
{
LaurisParagraph lp = new LaurisParagraph(
paragraph,
_parameterDictionary,
_unitDictionary);
if (lp.IsLaurisParagraph)
{
Items.Merge(lp.Items);
}
} }
IsValidTimePoint = success;
return success;
} }
void AddItems(IItemDictionary items) void AddItems(IItemDictionary items)
@ -221,7 +224,7 @@ namespace zaaReloaded2.Importer.ZaaImporter
/// </summary> /// </summary>
static readonly Regex _timeStampRegex = new Regex( static readonly Regex _timeStampRegex = new Regex(
@"^\s*\(?\s*(?<datetime>\d\d\.\d\d\.\d\d\d\d\s+\d\d:\d\d)"); @"^\s*\(?\s*(?<datetime>\d\d\.\d\d\.\d\d\d\d\s+\d\d:\d\d)");
string[] _paragraphs; IList<String> _paragraphs;
Parameters _parameterDictionary; Parameters _parameterDictionary;
Units _unitDictionary; Units _unitDictionary;

View File

@ -52,6 +52,14 @@ namespace zaaReloaded2.Importer.ZaaImporter
} }
} }
public bool Success
{
get
{
return Laboratory.TimePoints.Count > 0;
}
}
/// <summary> /// <summary>
/// Splits the <paramref name="text"/> into individual time points /// Splits the <paramref name="text"/> into individual time points
/// and creates <see cref="LaurisTimePoint"/> objects from them. /// and creates <see cref="LaurisTimePoint"/> objects from them.
@ -62,42 +70,29 @@ namespace zaaReloaded2.Importer.ZaaImporter
string[] paragraphs = text.Split( string[] paragraphs = text.Split(
new string[] { Environment.NewLine }, new string[] { Environment.NewLine },
StringSplitOptions.RemoveEmptyEntries); StringSplitOptions.RemoveEmptyEntries);
int i = 0; LaurisTimePoint timePoint = null;
int start = 0;
int numParagraphs = paragraphs.Length;
while (i < numParagraphs) foreach (string paragraph in paragraphs)
{ {
// Search for the next occurrence of a time stamp line // If the current paragraph looks like a Lauris time stamp,
while (i < numParagraphs // create a new time point.
&& !LaurisTimePoint.IsTimeStampLine(paragraphs[i])) if (LaurisTimePoint.IsTimeStampLine(paragraph))
{ {
i++; timePoint = new LaurisTimePoint(paragraph);
Laboratory.AddTimePoint(timePoint);
} }
// If the current paragraph looks like a paragraph with
// TODO: Find an alternative to returning in the middle of the method. // laboratory items, add it to the current time point;
if (i >= numParagraphs) return; // if no time point exists yet, create one.
else if (LaurisParagraph.ResemblesLaurisParagraph(paragraph))
if (LaurisTimePoint.IsTimeStampLine(paragraphs[i]))
{ {
// Remember the time stamp line's index if (timePoint == null)
start = i;
// Seek the next time stamp line
while (i + 1 < numParagraphs
&& !LaurisTimePoint.IsTimeStampLine(paragraphs[i + 1]))
{ {
i++; timePoint = new LaurisTimePoint();
Laboratory.AddTimePoint(timePoint);
} }
timePoint.AddParagraph(paragraph);
} }
Laboratory.AddTimePoint(
new LaurisTimePoint(
paragraphs.Slice(start, i - start + 1),
_parameters,
_units
)
);
} }
} }