Refactor and harden ZaaImporter.

This commit is contained in:
Daniel Kraus 2015-08-04 23:21:38 +02:00
parent ae9cf7fa2e
commit 75c9c984c6
5 changed files with 99 additions and 73 deletions

View File

@ -41,5 +41,15 @@ namespace Tests.Importer.ZaaImporter
// Only 6 distinct time points (see method documentation above).
Assert.AreEqual(6, importer.Laboratory.TimePoints.Count);
}
[Test]
public void ParseInvalidInput()
{
zaa.ZaaImporter importer = new zaa.ZaaImporter();
importer.Import("some arbitrary text\r\nthat does not represent\r\na valid lab");
Assert.IsFalse(importer.Success);
importer.Import("(03.03.1930 13:30:00)\r\nKlinische Chemie: Natrium 135 [135 - 145] mmol/l;");
Assert.IsTrue(importer.Success);
}
}
}

View File

@ -30,6 +30,11 @@ namespace zaaReloaded2.Importer
/// </summary>
Laboratory Laboratory { get; set; }
/// <summary>
/// Indicates whether the import was successful.
/// </summary>
bool Success { get; }
/// <summary>
/// Imports laboratory data contained in a string.
/// </summary>

View File

@ -32,6 +32,19 @@ namespace zaaReloaded2.Importer.ZaaImporter
/// </summary>
public class LaurisParagraph
{
#region Static methods
/// <summary>
/// Investigates a paragraph and determines whether it looks
/// like a Lauris laboratory items paragraph.
/// </summary>
public static bool ResemblesLaurisParagraph(string paragraph)
{
return _expectedFormat.IsMatch(paragraph);
}
#endregion
#region Public properties
/// <summary>
@ -122,7 +135,7 @@ namespace zaaReloaded2.Importer.ZaaImporter
#region Fields
static readonly Regex _expectedFormat = new Regex(@"(?<caption>[^:]+:\s*)?(?<items>[^:]+:\s*[^;]+;)*");
static readonly Regex _expectedFormat = new Regex(@"(?<caption>[^:]+:\s*)?(?<items>[^:]+:\s*[^;]+;)+");
Thesaurus.Parameters _parameterDictionary;
Thesaurus.Units _unitDictionary;

View File

@ -65,11 +65,15 @@ namespace zaaReloaded2.Importer.ZaaImporter
/// <summary>
/// Gets an array of paragraphs in this LaurisText.
/// </summary>
public string[] Paragraphs
public IList<String> Paragraphs
{
[DebuggerStepThrough]
get
{
if (_paragraphs == null)
{
_paragraphs = new List<String>();
}
return _paragraphs;
}
set
@ -101,7 +105,7 @@ namespace zaaReloaded2.Importer.ZaaImporter
{
Paragraphs = value.Split(
new string[] { Environment.NewLine },
StringSplitOptions.None);
StringSplitOptions.None).ToList();
}
}
}
@ -128,7 +132,7 @@ namespace zaaReloaded2.Importer.ZaaImporter
{ }
public LaurisTimePoint(
string[] paragraphs,
IList<String> paragraphs,
Parameters parameterDictionary,
Units unitDictionary)
: this()
@ -138,72 +142,71 @@ namespace zaaReloaded2.Importer.ZaaImporter
Paragraphs = paragraphs;
}
public LaurisTimePoint(string[] paragraphs)
public LaurisTimePoint(IList<String> paragraphs)
: this(paragraphs, null, null)
{
}
#endregion
#region Public methods
/// <summary>
/// Adds a new paragraph to this time point by parsing
/// the paragraph for laboratory items.
/// </summary>
/// <param name="paragraph">Paragraph to add.</param>
public void AddParagraph(string paragraph)
{
Paragraphs.Add(paragraph);
ParseParagraph(paragraph);
}
#endregion
#region Private methods
/// <summary>
/// Analyzes each Lauris paragraph in this time point, sets the date
/// and time, and collects LabItem data.
/// </summary>
/// <returns>True if the LaurisText has time stamp in the first paragraphs
/// and contains <see cref="LabItem"/>s in the others.</returns>
bool ParseParagraphs()
void ParseParagraphs()
{
if (Paragraphs.Length > 0)
if (Paragraphs != null)
{
if (!ParseTimeStamp()) return false;
LaurisParagraph lp;
if (IsValidTimePoint)
foreach (string paragraph in Paragraphs)
{
for (int i = 1; i < Paragraphs.Length; i++)
{
lp = new LaurisParagraph(
Paragraphs[i],
_parameterDictionary,
_unitDictionary);
if (lp.IsLaurisParagraph)
{
Items.Merge(lp.Items);
}
}
ParseParagraph(paragraph);
}
IsValidTimePoint = Items.Count > 0;
}
return true;
}
/// <summary>
/// Analyzes the date and time information that is expected to be
/// in the first paragraph.
/// </summary>
/// <returns>True if the LaurisText contains a time stamp in the
/// first paragraph.</returns>
bool ParseTimeStamp()
void ParseParagraph(string paragraph)
{
if (Paragraphs.Length == 0)
throw new InvalidOperationException("The time point has no paragraphs.");
Match m = _timeStampRegex.Match(Paragraphs[0]);
bool success = false;
if (m.Success)
if (_timeStampRegex.IsMatch(paragraph))
{
DateTime dt;
success = DateTime.TryParseExact(
m.Groups["datetime"].Value,
if (DateTime.TryParseExact(
_timeStampRegex.Match(paragraph).Groups["datetime"].Value,
"dd.MM.yyyy HH:mm",
CultureInfo.InvariantCulture,
DateTimeStyles.AllowWhiteSpaces,
out dt);
TimeStamp = dt;
out dt))
{
TimeStamp = dt;
}
}
else
{
LaurisParagraph lp = new LaurisParagraph(
paragraph,
_parameterDictionary,
_unitDictionary);
if (lp.IsLaurisParagraph)
{
Items.Merge(lp.Items);
}
}
IsValidTimePoint = success;
return success;
}
void AddItems(IItemDictionary items)
@ -221,7 +224,7 @@ namespace zaaReloaded2.Importer.ZaaImporter
/// </summary>
static readonly Regex _timeStampRegex = new Regex(
@"^\s*\(?\s*(?<datetime>\d\d\.\d\d\.\d\d\d\d\s+\d\d:\d\d)");
string[] _paragraphs;
IList<String> _paragraphs;
Parameters _parameterDictionary;
Units _unitDictionary;

View File

@ -52,6 +52,14 @@ namespace zaaReloaded2.Importer.ZaaImporter
}
}
public bool Success
{
get
{
return Laboratory.TimePoints.Count > 0;
}
}
/// <summary>
/// Splits the <paramref name="text"/> into individual time points
/// and creates <see cref="LaurisTimePoint"/> objects from them.
@ -62,42 +70,29 @@ namespace zaaReloaded2.Importer.ZaaImporter
string[] paragraphs = text.Split(
new string[] { Environment.NewLine },
StringSplitOptions.RemoveEmptyEntries);
int i = 0;
int start = 0;
int numParagraphs = paragraphs.Length;
LaurisTimePoint timePoint = null;
while (i < numParagraphs)
foreach (string paragraph in paragraphs)
{
// Search for the next occurrence of a time stamp line
while (i < numParagraphs
&& !LaurisTimePoint.IsTimeStampLine(paragraphs[i]))
// If the current paragraph looks like a Lauris time stamp,
// create a new time point.
if (LaurisTimePoint.IsTimeStampLine(paragraph))
{
i++;
timePoint = new LaurisTimePoint(paragraph);
Laboratory.AddTimePoint(timePoint);
}
// TODO: Find an alternative to returning in the middle of the method.
if (i >= numParagraphs) return;
if (LaurisTimePoint.IsTimeStampLine(paragraphs[i]))
// If the current paragraph looks like a paragraph with
// laboratory items, add it to the current time point;
// if no time point exists yet, create one.
else if (LaurisParagraph.ResemblesLaurisParagraph(paragraph))
{
// Remember the time stamp line's index
start = i;
// Seek the next time stamp line
while (i + 1 < numParagraphs
&& !LaurisTimePoint.IsTimeStampLine(paragraphs[i + 1]))
if (timePoint == null)
{
i++;
timePoint = new LaurisTimePoint();
Laboratory.AddTimePoint(timePoint);
}
timePoint.AddParagraph(paragraph);
}
Laboratory.AddTimePoint(
new LaurisTimePoint(
paragraphs.Slice(start, i - start + 1),
_parameters,
_units
)
);
}
}