diff --git a/data/microserviceConfigs/default.yaml b/data/microserviceConfigs/default.yaml
index de516cb53..cba437e8a 100644
--- a/data/microserviceConfigs/default.yaml
+++ b/data/microserviceConfigs/default.yaml
@@ -217,4 +217,52 @@ IsIdentifiableOptions:
MaxConfirmAttempts: 1
ClassifierType: 'Microservices.IsIdentifiable.Service.TesseractStanfordDicomFileClassifier'
DataDirectory: ''
+
+ #Optional. Full connection string to the database storing the whitelist of valid entries
+ WhitelistConnectionString:
+ #Optional. The DBMS provider of the whitelist table e.g. MySql
+ WhitelistDatabaseType:
+ #Optional. The unqualified name of the whitelist table
+ WhitelistTableName:
+ #Optional. The column in WhitelistTableName which contains the whitelist elements
+ WhitelistColumn:
+
+ #Optional. Path to a CSV file containing a single untitled column of whitelist values
+ WhitelistCsv:
+
+ #Optional. Generate a report on the proportion of values failing validation (for each column)
+ #ColumnReport: true
+
+ #Optional. Generate a report listing every unique value failing validation (and the column the value failed in)
+ #ValuesReport: true
+
+ #Optional. Generate a full failure storage report that persists Failure objects in a manner that they can be retrieved.
+ #StoreReport: true
+
+ #Optional - If specified reports will be generated in the given folder. If not specified, current directory is used (unless an alternate destination option is picked)
+ DestinationCsvFolder:
+ #Optional - If specified, the given separator will be used instead of ,. Includes support for \t for tab and \r\n
+ DestinationCsvSeparator:
+ #Optional - If specified all tabs, newlines (\r and \n) and 2+ spaces will be stripped from the values written as output (applies to all output formats)
+ DestinationNoWhitespace:
+
+ #Optional. Full connection string to the database in which to store the report results
+ DestinationConnectionString:
+ #Optional. The DBMS provider of DestinationConnectionString e.g. MySql
+ DestinationDatabaseType:
+
+ #Optional. If specified postcodes will not be reported as failures
+ IgnorePostcodes: false
+ #Optional. Comma separated list of columns/tags which should be ignored and not processed
+ SkipColumns:
+ #Optional. If set and using a 7 class NER model then DATE and TIME objects will not be considered failures.
+ IgnoreDatesInText:
+ #Optional. Set to control the max size of the in-memory store of processed before the get written out to any destinations. Only makes sense for reports that don't perform any aggregation across the data
+ MaxCacheSize:
+ #Optional. Filename of additional rules in yaml format.
+ RulesFile:
+ #Optional. Directory of additional rules in yaml format.
+ RulesDirectory:
+ #Optional. Maximum number of answers to cache per column.
+ MaxValidationCacheSize:
diff --git a/news/795-feature.md b/news/795-feature.md
new file mode 100644
index 000000000..f8f5ed686
--- /dev/null
+++ b/news/795-feature.md
@@ -0,0 +1 @@
+Added support for specifying IsIdentifiable CLI options in the yaml config files instead of command line (command line will always take precedence if both are specified)
\ No newline at end of file
diff --git a/src/common/Smi.Common/Options/GlobalOptions.cs b/src/common/Smi.Common/Options/GlobalOptions.cs
index b699a58e8..1179caf44 100644
--- a/src/common/Smi.Common/Options/GlobalOptions.cs
+++ b/src/common/Smi.Common/Options/GlobalOptions.cs
@@ -112,6 +112,107 @@ public class IsIdentifiableOptions : ConsumerOptions
///
public string DataDirectory { get; set; }
+ ///
+ /// "Optional. Full connection string to the database storing the whitelist of valid entries"
+ ///
+ public string WhitelistConnectionString { get; set; }
+
+ ///
+ /// "Optional. The DBMS provider of the whitelist table e.g. MySql"
+ ///
+ public DatabaseType? WhitelistDatabaseType { get; set; }
+
+ ///
+ /// "Optional. The unqualified name of the whitelist table"
+ ///
+ public string WhitelistTableName { get; set; }
+
+ ///
+ /// "Optional. The column in WhitelistTableName which contains the whitelist elements"
+ ///
+ public string WhitelistColumn { get; set; }
+
+ ///
+ /// "Optional. Path to a CSV file containing a single untitled column of whitelist values"
+ ///
+ public string WhitelistCsv { get; set; }
+
+ ///
+ /// Optional. Generate a report on the proportion of values failing validation (for each column)")]
+ ///
+ public bool? ColumnReport { get; set; }
+
+ ///
+ /// Optional. Generate a report listing every unique value failing validation (and the column the value failed in)
+ ///
+ public bool? ValuesReport { get; set; }
+
+ ///
+ /// Optional. Generate a full failure storage report that persists Failure objects in a manner that they can be retrieved.
+ ///
+ public bool? StoreReport { get; set; }
+
+ ///
+ /// Optional - If specified reports will be generated in the given folder. If not specified, current directory is used (unless an alternate destination option is picked)
+ ///
+ public string DestinationCsvFolder { get; set; }
+
+ ///
+ /// @"Optional - If specified, the given separator will be used instead of ,. Includes support for \t for tab and \r\n."
+ ///
+ public string DestinationCsvSeparator { get; set; }
+
+ ///
+ /// @"Optional - If specified all tabs, newlines (\r and \n) and 2+ spaces will be stripped from the values written as output (applies to all output formats)"
+ ///
+ public bool? DestinationNoWhitespace { get; set; }
+
+ ///
+ /// "Optional. Full connection string to the database in which to store the report results"
+ ///
+ public string DestinationConnectionString { get; set; }
+
+ ///
+ /// "Optional. The DBMS provider of DestinationConnectionString e.g. MySql"
+ ///
+ public DatabaseType? DestinationDatabaseType { get; set; }
+
+ ///
+ /// "Optional. If specified postcodes will not be reported as failures"
+ ///
+ public bool? IgnorePostcodes { get; set; }
+
+ ///
+ /// "Optional. Comma separated list of columns/tags which should be ignored and not processed"
+ ///
+ public string SkipColumns { get; set; }
+
+ ///
+ /// "Optional. If set and using a 7 class NER model then DATE and TIME objects will not be considered failures."
+ ///
+ public bool? IgnoreDatesInText { get; set; }
+
+ ///
+ /// "Optional. Set to control the max size of the in-memory store of processed before the get written out to any destinations. Only makes sense for reports that don't perform any aggregation across the data"
+ ///
+ public int? MaxCacheSize { get; set; }
+
+ ///
+ /// "Optional. Filename of additional rules in yaml format."
+ ///
+ public string RulesFile { get; set; }
+
+ ///
+ /// "Optional. Directory of additional rules in yaml format."
+ ///
+ public string RulesDirectory { get; set; }
+
+ ///
+ /// "Optional. Maximum number of answers to cache per column."
+ ///
+ public int? MaxValidationCacheSize { get; set; }
+
+
public ProducerOptions IsIdentifiableProducerOptions {get; set;}
}
diff --git a/src/microservices/Microservices.IsIdentifiable/Options/IsIdentifiableAbstractOptions.cs b/src/microservices/Microservices.IsIdentifiable/Options/IsIdentifiableAbstractOptions.cs
index e51fca051..36e8aeef6 100644
--- a/src/microservices/Microservices.IsIdentifiable/Options/IsIdentifiableAbstractOptions.cs
+++ b/src/microservices/Microservices.IsIdentifiable/Options/IsIdentifiableAbstractOptions.cs
@@ -55,8 +55,10 @@ public abstract class IsIdentifiableAbstractOptions : CliOptions
[Option(HelpText = "Optional. If set and using a 7 class NER model then DATE and TIME objects will not be considered failures.")]
public bool IgnoreDatesInText { get; set; }
- [Option(HelpText = "Optional. Set to control the max size of the in-memory store of processed before the get written out to any destinations. Only makes sense for reports that don't perform any aggregation across the data", Default = 10000)]
- public int MaxCacheSize { get; set; }
+ [Option(HelpText = "Optional. Set to control the max size of the in-memory store of processed before the get written out to any destinations. Only makes sense for reports that don't perform any aggregation across the data", Default = MaxCacheSizeDefault)]
+ public int MaxCacheSize { get; set; } = MaxCacheSizeDefault;
+
+ public const int MaxCacheSizeDefault = 10000;
[Option(HelpText = "Optional. Filename of additional rules in yaml format.")]
public string RulesFile { get; set; }
@@ -64,8 +66,10 @@ public abstract class IsIdentifiableAbstractOptions : CliOptions
[Option(HelpText = "Optional. Directory of additional rules in yaml format.")]
public string RulesDirectory { get; set; }
- [Option(HelpText = "Optional. Maximum number of answers to cache per column.", Default = 1_000_000)]
- public int MaxValidationCacheSize { get; set; } = 1_000_000;
+ [Option(HelpText = "Optional. Maximum number of answers to cache per column.", Default = MaxValidationCacheSizeDefault)]
+ public int MaxValidationCacheSize { get; set; } = MaxValidationCacheSizeDefault;
+
+ public const int MaxValidationCacheSizeDefault = 1_000_000;
///
/// Returns a short string with no spaces or punctuation that describes the target. This will be used
@@ -81,5 +85,74 @@ public virtual void ValidateOptions()
{
}
+
+
+ ///
+ /// Populates class options that have not been specified on the command line directly by using the values (if any) in the
+ /// default yaml file for smi services
+ ///
+ ///
+ public virtual void FillMissingWithValuesUsing(IsIdentifiableOptions globalOpts)
+ {
+ if (string.IsNullOrWhiteSpace(WhitelistConnectionString))
+ WhitelistConnectionString = globalOpts.WhitelistConnectionString;
+
+ if (WhitelistDatabaseType == default(DatabaseType) && globalOpts.WhitelistDatabaseType.HasValue)
+ WhitelistDatabaseType = globalOpts.WhitelistDatabaseType.Value;
+
+ if (string.IsNullOrWhiteSpace(WhitelistTableName))
+ WhitelistTableName = globalOpts.WhitelistTableName;
+
+ if (string.IsNullOrWhiteSpace(WhitelistColumn))
+ WhitelistColumn = globalOpts.WhitelistColumn;
+
+ if (string.IsNullOrWhiteSpace(WhitelistCsv))
+ WhitelistCsv = globalOpts.WhitelistCsv;
+
+ if (ColumnReport == default(bool) && globalOpts.ColumnReport.HasValue)
+ ColumnReport = globalOpts.ColumnReport.Value;
+
+ if (ValuesReport == default(bool) && globalOpts.ValuesReport.HasValue)
+ ValuesReport = globalOpts.ValuesReport.Value;
+
+ if (StoreReport == default(bool) && globalOpts.StoreReport.HasValue)
+ StoreReport = globalOpts.StoreReport.Value;
+
+ if (string.IsNullOrWhiteSpace(DestinationCsvFolder))
+ DestinationCsvFolder = globalOpts.DestinationCsvFolder;
+
+ if (string.IsNullOrWhiteSpace(DestinationCsvSeparator))
+ DestinationCsvSeparator = globalOpts.DestinationCsvSeparator;
+
+ if (DestinationNoWhitespace == default(bool) && globalOpts.DestinationNoWhitespace.HasValue)
+ DestinationNoWhitespace = globalOpts.DestinationNoWhitespace.Value;
+
+ if (string.IsNullOrWhiteSpace(DestinationConnectionString))
+ DestinationConnectionString = globalOpts.DestinationConnectionString;
+
+ if (DestinationDatabaseType == default(DatabaseType) && globalOpts.DestinationDatabaseType.HasValue)
+ DestinationDatabaseType = globalOpts.DestinationDatabaseType.Value;
+
+ if (IgnorePostcodes == default(bool) && globalOpts.IgnorePostcodes.HasValue)
+ IgnorePostcodes = globalOpts.IgnorePostcodes.Value;
+
+ if (string.IsNullOrWhiteSpace(SkipColumns))
+ SkipColumns = globalOpts.SkipColumns;
+
+ if (IgnoreDatesInText == default(bool) && globalOpts.IgnoreDatesInText.HasValue)
+ IgnoreDatesInText = globalOpts.IgnoreDatesInText.Value;
+
+ if (MaxCacheSize == MaxCacheSizeDefault && globalOpts.MaxCacheSize.HasValue)
+ MaxCacheSize = globalOpts.MaxCacheSize.Value;
+
+ if (string.IsNullOrWhiteSpace(RulesFile))
+ RulesFile = globalOpts.RulesFile;
+
+ if (string.IsNullOrWhiteSpace(RulesDirectory))
+ RulesDirectory = globalOpts.RulesDirectory;
+
+ if (MaxValidationCacheSize == MaxValidationCacheSizeDefault && globalOpts.MaxValidationCacheSize.HasValue)
+ MaxValidationCacheSize = globalOpts.MaxValidationCacheSize.Value;
+ }
}
}
diff --git a/src/microservices/Microservices.IsIdentifiable/Program.cs b/src/microservices/Microservices.IsIdentifiable/Program.cs
index 035f9116a..5bbcdf5a6 100644
--- a/src/microservices/Microservices.IsIdentifiable/Program.cs
+++ b/src/microservices/Microservices.IsIdentifiable/Program.cs
@@ -34,6 +34,9 @@ private static int OnParse(GlobalOptions globals, object parsedOpts)
{
var opts = SmiCliInit.Verify(parsedOpts);
+ // For any values not specified on the command line - use the yaml values
+ opts.FillMissingWithValuesUsing(globals.IsIdentifiableOptions);
+
return opts switch
{
IsIdentifiableRelationalDatabaseOptions o => Run(o),
diff --git a/tests/microservices/Microservices.IsIdentifiable.Tests/ServiceTests/IsIdentifiableAbstractOptionsTests.cs b/tests/microservices/Microservices.IsIdentifiable.Tests/ServiceTests/IsIdentifiableAbstractOptionsTests.cs
new file mode 100644
index 000000000..77a7aaffb
--- /dev/null
+++ b/tests/microservices/Microservices.IsIdentifiable.Tests/ServiceTests/IsIdentifiableAbstractOptionsTests.cs
@@ -0,0 +1,156 @@
+using FAnsi;
+using Microservices.IsIdentifiable.Options;
+using NUnit.Framework;
+using Smi.Common.Options;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Microservices.IsIdentifiable.Tests.ServiceTests
+{
+ class IsIdentifiableAbstractOptionsTests
+ {
+ [Test]
+ public void FillMissingWithValuesUsing_NoOptionsAnywhere()
+ {
+ var opts = new IsIdentifiableDicomFileOptions();
+ var globalOpts = new IsIdentifiableOptions();
+ opts.FillMissingWithValuesUsing(globalOpts);
+ }
+
+ [Test]
+ public void FillMissingWithValuesUsing_Override()
+ {
+ int propsCounted = 0;
+
+ foreach(var gProp in typeof(IsIdentifiableOptions).GetProperties())
+ {
+ var cliProp = typeof(IsIdentifiableDicomFileOptions).GetProperty(gProp.Name);
+
+ if(cliProp == null)
+ {
+ continue;
+ }
+
+ var opts = new IsIdentifiableDicomFileOptions();
+ var globalOpts = new IsIdentifiableOptions();
+
+ var testVal = GetTestValue(cliProp);
+ gProp.SetValue(globalOpts, testVal);
+
+ Assert.AreNotEqual(testVal, cliProp.GetValue(opts));
+ opts.FillMissingWithValuesUsing(globalOpts);
+ Assert.AreEqual(testVal, cliProp.GetValue(opts));
+
+ propsCounted++;
+ }
+
+ // we did test some properties right!
+ Assert.Greater(propsCounted, 0);
+
+ }
+
+ [Test]
+ public void FillMissingWithValuesUsing_NoOverride()
+ {
+ int propsCounted = 0;
+
+ foreach (var gProp in typeof(IsIdentifiableOptions).GetProperties())
+ {
+ var cliProp = typeof(IsIdentifiableDicomFileOptions).GetProperty(gProp.Name);
+
+ if (cliProp == null)
+ {
+ continue;
+ }
+
+ var opts = new IsIdentifiableDicomFileOptions();
+ var globalOpts = new IsIdentifiableOptions();
+
+ var testVal1 = GetTestValue(cliProp);
+ var testVal2 = GetTestValue2(cliProp);
+
+ if(testVal1 is bool)
+ {
+ // boolean cli false is the default so missing and false are the same
+ // so instead lets make sure that false in yaml config doesn't override
+ // true in cli
+ testVal1 = false;
+ testVal2 = true;
+ }
+
+ // yaml says one value
+ gProp.SetValue(globalOpts, testVal1);
+ // cli says a different value
+ cliProp.SetValue(opts, testVal2);
+
+ // we should not have the yaml file entry
+ Assert.AreNotEqual(testVal1, cliProp.GetValue(opts));
+
+ // we ask to fill in missing values using the yaml entries
+ opts.FillMissingWithValuesUsing(globalOpts);
+
+ // but we had an entry on CLI already so that should take precedence
+ Assert.AreNotEqual(testVal1, cliProp.GetValue(opts));
+ Assert.AreEqual(testVal2, cliProp.GetValue(opts));
+
+ propsCounted++;
+ }
+
+ // we did test some properties right!
+ Assert.Greater(propsCounted, 0);
+
+ }
+
+ private object GetTestValue(System.Reflection.PropertyInfo gProp)
+ {
+ if(gProp.PropertyType == typeof(int))
+ {
+ return 5123;
+ }
+
+ if (gProp.PropertyType == typeof(string))
+ {
+ return "troll doll!";
+ }
+ if (gProp.PropertyType == typeof(bool))
+ {
+ return true;
+ }
+
+ if (gProp.PropertyType == typeof(DatabaseType))
+ {
+ return DatabaseType.MySql;
+ }
+
+ throw new ArgumentException($"Not sure what value to use in test for PropertyType {gProp.PropertyType}. This is an error in the test harness coverage not the underlying code.");
+
+ }
+ private object GetTestValue2(System.Reflection.PropertyInfo gProp)
+ {
+ if (gProp.PropertyType == typeof(int))
+ {
+ return 66456;
+ }
+
+ if (gProp.PropertyType == typeof(string))
+ {
+ return "rylyly?";
+ }
+ if (gProp.PropertyType == typeof(bool))
+ {
+ return false;
+ }
+
+ if (gProp.PropertyType == typeof(DatabaseType))
+ {
+ return DatabaseType.Oracle;
+ }
+
+ throw new ArgumentException($"Not sure what value to use in test for PropertyType {gProp.PropertyType}. This is an error in the test harness coverage not the underlying code.");
+
+ }
+ }
+}