From 1ea093ef97bf18720c2ad73a3a683cd8c49b4db9 Mon Sep 17 00:00:00 2001 From: sowu880 <57981365+sowu880@users.noreply.github.com> Date: Fri, 27 Aug 2021 16:46:39 +0800 Subject: [PATCH 1/3] Update readme for new features (#137) * update readme * Update FHIR-anonymization.md * Update FHIR-anonymization.md --- docs/FHIR-anonymization.md | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/docs/FHIR-anonymization.md b/docs/FHIR-anonymization.md index 768ec764..5de62bc6 100644 --- a/docs/FHIR-anonymization.md +++ b/docs/FHIR-anonymization.md @@ -207,13 +207,15 @@ The safe harbor configuration files can be accessed via [R4](src/Microsoft.Healt ### Configuration file format -The configuration is specified in JSON format. It has three high-level sections. One of these sections, namely _fhirVersion_ specify the configuration file's version for anonymizer. The second section named _fhirPathRules_ is meant to specify anonymization methods for data elements. The third section named _parameters_ affects global behavior. _fhirPathRules_ are executed in the order of appearance in the configuration file. +The configuration is specified in JSON format. It has four high-level sections. +One of these sections, namely _fhirVersion_ specify the configuration file's version for anonymizer. The second sections is _processingErrors_ to specify the behaviors for processing errors. The third section named _fhirPathRules_ is meant to specify anonymization methods for data elements. The last section named _parameters_ affects global behavior. _fhirPathRules_ are executed in the order of appearance in the configuration file. Here is a sample configuration for R4: ```json { "fhirVersion": "R4", + "processingError":"raise", "fhirPathRules": [ {"path": "nodesByType('Extension')", "method": "redact"}, {"path": "Organization.identifier", "method": "keep"}, @@ -238,6 +240,36 @@ Here is a sample configuration for R4: |Empty or Null| The configuration file targets the same FHIR version as the executable. |Other values| Other values will raise an exception. +### Processing Errors Specification + +Anonymization engine will throw three main exceptions in the program: _AnonymizationConfigurationException_, _AnonymizationProcessingException_ and _InvalidInputException_. +|Exception|Description| +|-----|-----| +|AnonymizerConfigurationException or AnonymizerRuleNotApplicableException|Raised when configuration file has invalid format or value.| +|AnonymizerProcessingException|Raised during the process of anonymizing a FHIR node.| +|InvalidInputException|Raised by invalid format of input FHIR resources.| + +Since _AnonymizationProcessingException_ may caused by a specific FHIR resource, customers can set the behavior when meeting this kind of exceptions in the section _processingErrors_ in configuration file. The setting will affect the output especially for the batch work. + +|processingErrors|Description| +|----|----| +|raise|Raise _AnonymizationProcessingException_ with program failed and stopped.| +|skip| Skip _AnonymizationProcessingException_ and return an empty FHIR resource with program continued. | + +Here is the structure of empty FHIR resource for patient: +``` +{ + "resourceType": "Patient", + "meta": { + "security": [ + { + "system": "http://terminology.hl7.org/CodeSystem/v3-ObservationValue", + "code": "REDACTED", + "display": "redacted" + } + ] +} +``` ### FHIR Path Rules FHIR path rules can be used to specify the anonymization methods for individual elements as well as elements of specific data types. Ex: From 31667220eeb3cf691e21766b16208cf3d5666595 Mon Sep 17 00:00:00 2001 From: ginalee-dotcom <68250213+ginalee-dotcom@users.noreply.github.com> Date: Mon, 6 Sep 2021 00:45:23 -0700 Subject: [PATCH 2/3] Update FHIR-anonymization.md (#141) --- docs/FHIR-anonymization.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/FHIR-anonymization.md b/docs/FHIR-anonymization.md index 5de62bc6..6f8a378f 100644 --- a/docs/FHIR-anonymization.md +++ b/docs/FHIR-anonymization.md @@ -32,9 +32,9 @@ You can also export FHIR resource from your FHIR server using [Bulk Export](http ## Anonymize FHIR data: using the command line tool Once you have built the command line tool, you will find two executable files for R4 and STU3 respectively: -1. Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool.exe in the $SOURCE\src\Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder. +1. Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool.exe in the $SOURCE\FHIR\src\Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder. -2. Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool.exe in the $SOURCE\src\Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder. +2. Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool.exe in the $SOURCE\FHIR\src\Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder. You can use these executables to anonymize FHIR resource files in a folder. ``` @@ -104,7 +104,7 @@ You can also export FHIR resources from a FHIR server using [Bulk Export](https: ### Create Data Factory pipeline -1. Enter the project folder $SOURCE\src\Microsoft.Health.Fhir.Anonymizer.\.AzureDataFactoryPipeline. Locate _AzureDataFactorySettings.json_ in the project and replace the values as described below. +1. Enter the project folder $SOURCE\FHIR\src\Microsoft.Health.Fhir.Anonymizer.\.AzureDataFactoryPipeline. Locate _AzureDataFactorySettings.json_ in the project and replace the values as described below. > **[NOTE]** > dataFactoryName can contain only lowercase characters or numbers, and must be 3-19 characters in length. @@ -203,7 +203,7 @@ Out of the 18 identifier types mentioned in HIPAA Safe Harbor method (2)(i), thi This configuration file is provided in a best-effort manner. We **strongly** recommend that you review the HIPAA guidelines as well as the implementation of this project before using it for you anonymization requirements. -The safe harbor configuration files can be accessed via [R4](src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool/configuration-sample.json) and [STU3](src/Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool/configuration-sample.json) links. +The safe harbor configuration files can be accessed via [R4](FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool/configuration-sample.json) and [STU3](FHIR/src/Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool/configuration-sample.json) links. ### Configuration file format @@ -288,7 +288,7 @@ The elements can be specified using [FHIRPath](http://hl7.org/fhirpath/) syntax. |cryptoHash|All elements| Transforms the value using [Crypto-hash method](#crypto-hash). | |encrypt|All elements| Transforms the value using [Encrypt method](#encrypt). | |substitute|All elements| [Substitutes](#substitute) the value to a predefined value. | -|generalize|Elements of primitive types|[Generalizes](#generalize) the value into a more general, less distinguishing value. +|generalize|Elements of [primitive](https://www.hl7.org/fhir/datatypes.html#primitive) types|[Generalizes](#generalize) the value into a more general, less distinguishing value. Two extension methods can be used in FHIR path rule to simplify the FHIR path: - nodesByType('_typename_'): return descendants of type '_typename_'. Nodes in bundle resource and contained list will be excluded. @@ -388,19 +388,19 @@ To substitute Address data types with a fixed JSON fragment To generalize valueQuantity fields of Observation resource using expression to define the range mapping ```json { - "path": "nodesByType('Observation').value.value", + "path": "nodesByType('Observation').ofType(Quantity).value", "method": "generalize", "cases":{ - "$this.value>=0 and $this.value<20": "20", - "$this.value>=20 and $this.value<40": "40", - "$this.value>=40 and $this.value<60": "60", - "$this.value>=60 and $this.value<80": "80" + "$this>=0 and $this<20": "20", + "$this>=20 and $this<40": "40", + "$this>=40 and $this<60": "60", + "$this>=60 and $this<80": "80" }, "otherValues":"redact" } ``` > **[NOTE]** -> Take care of the expression for field has choices of types. e.g. Observation.value[x]. The expression for the path should be Observation.value. +> Take care of the expression for field has choices of types. e.g. Observation.value[x]. The expression for the path should be Observation.ofType(x).value. To generalize string data type using expression to define the value set mapping @@ -507,7 +507,7 @@ Generalization uses FHIRPath predicate expression to define a set of cases that |string| _"$this in ('es-AR' \| 'es-ES' \| 'es-UY')": "'es'"_|Data fall in the value set will be mapped to "es".|'es-UY' -> 'es'| |string| _"$this.startsWith(\'123\')": "$this.subString(0,2)+\'*\*\*\*\' "_ |Mask sensitive string code.|'1230005' -> '123****'| |date, dateTime, time|_"$this >= @2010-1-1": "@2010"_|Data fall in a date/time/dateTime range will be mapped to one date/time/dateTime value.| 2016-03-10 -> 2010| -|date, dateTime, time|_"$this.replaceMatches('(?<year>\\\d{2,4})-(?<month>\\\d{1,2})-(?<day>\\\d{1,2})\\\b', '${year}-${month}'"_|Omit "day" to generalize specific date.|2016-01-01 -> 2016-01| +|date, dateTime, time|"$this.replaceMatches('(?<year>\\\d{2,4})-(?<month>\\\d{1,2})-(?<day>\\\d{1,2})\\\b', '${year}-${month}'"|Omit "day" to generalize specific date.|2016-01-01 -> 2016-01| For each generalization rule, there are several additional settings to specify in configuration files: - [required] **cases** An object defining key-value pairs to specify case condition and replacement value using FHIRPath predicate expression. _key_ represents case condition and _value_ represents target value. From c8a8f7e83b8780e9d16da4340a49beaba5807d67 Mon Sep 17 00:00:00 2001 From: chgl Date: Mon, 27 Sep 2021 11:29:54 +0200 Subject: [PATCH 3/3] Use the "masked" code for encrypted resource parts (#140) Fixes #95 --- .../Visitors/AnonymizationVisitorTests.cs | 6 +++--- .../Models/SecurityLabels.cs | 7 ++++--- .../Processors/ResourceProcessor.cs | 12 ++++++------ 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/Visitors/AnonymizationVisitorTests.cs b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/Visitors/AnonymizationVisitorTests.cs index 65edb0a7..fff7b4f1 100644 --- a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/Visitors/AnonymizationVisitorTests.cs +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/Visitors/AnonymizationVisitorTests.cs @@ -89,7 +89,7 @@ public void GivenAnEncryptRule_WhenProcess_NodeShouldBeEncrypted() patient = patientNode.ToPoco(); Assert.Single(patient.Meta.Security); - Assert.Contains(SecurityLabels.ENCRYPT.Code, patient.Meta.Security.Select(s => s.Code)); + Assert.Contains(SecurityLabels.MASKED.Code, patient.Meta.Security.Select(s => s.Code)); } [Fact] @@ -98,7 +98,7 @@ public void GivenAPrimitiveSubstituteRule_WhenProcess_NodeShouldBeSubstituted() AnonymizationFhirPathRule[] rules = new AnonymizationFhirPathRule[] { new AnonymizationFhirPathRule("Patient.address.city", "address.city", "Patient", "substitute", AnonymizerRuleType.FhirPathRule, "Patient.address.city", - new Dictionary { {"replaceWith", "ExampleCity2020" } }) + new Dictionary { {"replaceWith", "ExampleCity2020" } }) }; AnonymizationVisitor visitor = new AnonymizationVisitor(rules, CreateTestProcessors()); @@ -275,7 +275,7 @@ public void Given2ConflictRules_WhenProcess_SecondRuleShouldBeIgnored() var patient = CreateTestPatient(); var patientNode = ElementNode.FromElement(patient.ToTypedElement()); - patientNode.Accept(visitor); + patientNode.Accept(visitor); string patientCity = patientNode.Select("Patient.address[0].city").First().Value.ToString(); string patientCountry = patientNode.Select("Patient.address[0].country").First().Value.ToString(); diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Models/SecurityLabels.cs b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Models/SecurityLabels.cs index 6fa07cb1..c0a63d1d 100644 --- a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Models/SecurityLabels.cs +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Models/SecurityLabels.cs @@ -25,10 +25,11 @@ public static class SecurityLabels Display = "cryptographic hash function" }; - public static readonly Coding ENCRYPT = new Coding() + public static readonly Coding MASKED = new Coding() { - Code = "ENCRYPT", - Display = "exact value is transformed into ciphertext" + System = "http://terminology.hl7.org/CodeSystem/v3-ObservationValue", + Code = "MASKED", + Display = "masked" }; public static readonly Coding PERTURBED = new Coding() diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Processors/ResourceProcessor.cs b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Processors/ResourceProcessor.cs index 7f411090..7c65a238 100644 --- a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Processors/ResourceProcessor.cs +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Processors/ResourceProcessor.cs @@ -21,7 +21,7 @@ public class ResourceProcessor : IAnonymizerProcessor private readonly HashSet _visitedNodes = new HashSet(); private readonly Dictionary> _typeToNodeLookUp = new Dictionary>(); private readonly Dictionary> _nameToNodeLookUp = new Dictionary>(); - + private static readonly PocoStructureDefinitionSummaryProvider s_provider = new PocoStructureDefinitionSummaryProvider(); private const string _metaNodeName = "meta"; @@ -54,7 +54,7 @@ public ProcessResult Process(ElementNode node, ProcessContext context = null, Di } var matchNodes = GetMatchNodes(rule, node); - + foreach (var matchNode in matchNodes) { ruleResult.Update(ProcessNodeRecursive((ElementNode) matchNode.ToElement(), _processors[method], ruleContext, rule.RuleSettings)); @@ -97,9 +97,9 @@ public void AddSecurityTag(ElementNode node, ProcessResult result) } if (result.IsEncrypted && !meta.Security.Any(x => - string.Equals(x.Code, SecurityLabels.ENCRYPT.Code, StringComparison.InvariantCultureIgnoreCase))) + string.Equals(x.Code, SecurityLabels.MASKED.Code, StringComparison.InvariantCultureIgnoreCase))) { - meta.Security.Add(SecurityLabels.ENCRYPT); + meta.Security.Add(SecurityLabels.MASKED); } if (result.IsPerturbed && !meta.Security.Any(x => @@ -196,9 +196,9 @@ private IEnumerable GetMatchNodes(AnonymizationFhirPathRule rule, /* * Special case handling: - * Senario: FHIR path only contains resourceType: Patient, Resource. + * Senario: FHIR path only contains resourceType: Patient, Resource. * Sample AnonymizationFhirPathRule: { "path": "Patient", "method": "keep" } - * + * * Current FHIR path lib do not support navigate such ResourceType FHIR path from resource in bundle. * Example: navigate with FHIR path "Patient" from "Bundle.entry[0].resource[0]" is not support */