Skip to content

Commit

Permalink
Merge pull request #145 from microsoft/master
Browse files Browse the repository at this point in the history
Merge master
  • Loading branch information
sowu880 authored Sep 28, 2021
2 parents 881c798 + c8a8f7e commit 364a6de
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public void GivenAnEncryptRule_WhenProcess_NodeShouldBeEncrypted()

patient = patientNode.ToPoco<Patient>();
Assert.Single(patient.Meta.Security);
Assert.Contains(SecurityLabels.ENCRYPT.Code, patient.Meta.Security.Select(s => s.Code));
Assert.Contains(SecurityLabels.MASKED.Code, patient.Meta.Security.Select(s => s.Code));
}

[Fact]
Expand All @@ -98,7 +98,7 @@ public void GivenAPrimitiveSubstituteRule_WhenProcess_NodeShouldBeSubstituted()
AnonymizationFhirPathRule[] rules = new AnonymizationFhirPathRule[]
{
new AnonymizationFhirPathRule("Patient.address.city", "address.city", "Patient", "substitute", AnonymizerRuleType.FhirPathRule, "Patient.address.city",
new Dictionary<string, object> { {"replaceWith", "ExampleCity2020" } })
new Dictionary<string, object> { {"replaceWith", "ExampleCity2020" } })
};

AnonymizationVisitor visitor = new AnonymizationVisitor(rules, CreateTestProcessors());
Expand Down Expand Up @@ -275,7 +275,7 @@ public void Given2ConflictRules_WhenProcess_SecondRuleShouldBeIgnored()

var patient = CreateTestPatient();
var patientNode = ElementNode.FromElement(patient.ToTypedElement());
patientNode.Accept(visitor);
patientNode.Accept(visitor);
string patientCity = patientNode.Select("Patient.address[0].city").First().Value.ToString();
string patientCountry = patientNode.Select("Patient.address[0].country").First().Value.ToString();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ public static class SecurityLabels
Display = "cryptographic hash function"
};

public static readonly Coding ENCRYPT = new Coding()
public static readonly Coding MASKED = new Coding()
{
Code = "ENCRYPT",
Display = "exact value is transformed into ciphertext"
System = "http://terminology.hl7.org/CodeSystem/v3-ObservationValue",
Code = "MASKED",
Display = "masked"
};

public static readonly Coding PERTURBED = new Coding()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class ResourceProcessor : IAnonymizerProcessor
private readonly HashSet<ElementNode> _visitedNodes = new HashSet<ElementNode>();
private readonly Dictionary<string, List<ITypedElement>> _typeToNodeLookUp = new Dictionary<string, List<ITypedElement>>();
private readonly Dictionary<string, List<ITypedElement>> _nameToNodeLookUp = new Dictionary<string, List<ITypedElement>>();

private static readonly PocoStructureDefinitionSummaryProvider s_provider = new PocoStructureDefinitionSummaryProvider();
private const string _metaNodeName = "meta";

Expand Down Expand Up @@ -54,7 +54,7 @@ public ProcessResult Process(ElementNode node, ProcessContext context = null, Di
}

var matchNodes = GetMatchNodes(rule, node);

foreach (var matchNode in matchNodes)
{
ruleResult.Update(ProcessNodeRecursive((ElementNode) matchNode.ToElement(), _processors[method], ruleContext, rule.RuleSettings));
Expand Down Expand Up @@ -97,9 +97,9 @@ public void AddSecurityTag(ElementNode node, ProcessResult result)
}

if (result.IsEncrypted && !meta.Security.Any(x =>
string.Equals(x.Code, SecurityLabels.ENCRYPT.Code, StringComparison.InvariantCultureIgnoreCase)))
string.Equals(x.Code, SecurityLabels.MASKED.Code, StringComparison.InvariantCultureIgnoreCase)))
{
meta.Security.Add(SecurityLabels.ENCRYPT);
meta.Security.Add(SecurityLabels.MASKED);
}

if (result.IsPerturbed && !meta.Security.Any(x =>
Expand Down Expand Up @@ -196,9 +196,9 @@ private IEnumerable<ITypedElement> GetMatchNodes(AnonymizationFhirPathRule rule,

/*
* Special case handling:
* Senario: FHIR path only contains resourceType: Patient, Resource.
* Senario: FHIR path only contains resourceType: Patient, Resource.
* Sample AnonymizationFhirPathRule: { "path": "Patient", "method": "keep" }
*
*
* Current FHIR path lib do not support navigate such ResourceType FHIR path from resource in bundle.
* Example: navigate with FHIR path "Patient" from "Bundle.entry[0].resource[0]" is not support
*/
Expand Down
58 changes: 45 additions & 13 deletions docs/FHIR-anonymization.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ You can also export FHIR resource from your FHIR server using [Bulk Export](http
## Anonymize FHIR data: using the command line tool
Once you have built the command line tool, you will find two executable files for R4 and STU3 respectively:

1. Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool.exe in the $SOURCE\src\Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder.
1. Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool.exe in the $SOURCE\FHIR\src\Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder.

2. Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool.exe in the $SOURCE\src\Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder.
2. Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool.exe in the $SOURCE\FHIR\src\Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder.

You can use these executables to anonymize FHIR resource files in a folder.
```
Expand Down Expand Up @@ -104,7 +104,7 @@ You can also export FHIR resources from a FHIR server using [Bulk Export](https:
### Create Data Factory pipeline
1. Enter the project folder $SOURCE\src\Microsoft.Health.Fhir.Anonymizer.\<version>.AzureDataFactoryPipeline. Locate _AzureDataFactorySettings.json_ in the project and replace the values as described below.
1. Enter the project folder $SOURCE\FHIR\src\Microsoft.Health.Fhir.Anonymizer.\<version>.AzureDataFactoryPipeline. Locate _AzureDataFactorySettings.json_ in the project and replace the values as described below.
> **[NOTE]**
> dataFactoryName can contain only lowercase characters or numbers, and must be 3-19 characters in length.
Expand Down Expand Up @@ -203,17 +203,19 @@ Out of the 18 identifier types mentioned in HIPAA Safe Harbor method (2)(i), thi
This configuration file is provided in a best-effort manner. We **strongly** recommend that you review the HIPAA guidelines as well as the implementation of this project before using it for you anonymization requirements.


The safe harbor configuration files can be accessed via [R4](src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool/configuration-sample.json) and [STU3](src/Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool/configuration-sample.json) links.
The safe harbor configuration files can be accessed via [R4](FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool/configuration-sample.json) and [STU3](FHIR/src/Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool/configuration-sample.json) links.

### Configuration file format

The configuration is specified in JSON format. It has three high-level sections. One of these sections, namely _fhirVersion_ specify the configuration file's version for anonymizer. The second section named _fhirPathRules_ is meant to specify anonymization methods for data elements. The third section named _parameters_ affects global behavior. _fhirPathRules_ are executed in the order of appearance in the configuration file.
The configuration is specified in JSON format. It has four high-level sections.
One of these sections, namely _fhirVersion_ specify the configuration file's version for anonymizer. The second sections is _processingErrors_ to specify the behaviors for processing errors. The third section named _fhirPathRules_ is meant to specify anonymization methods for data elements. The last section named _parameters_ affects global behavior. _fhirPathRules_ are executed in the order of appearance in the configuration file.

Here is a sample configuration for R4:

```json
{
"fhirVersion": "R4",
"processingError":"raise",
"fhirPathRules": [
{"path": "nodesByType('Extension')", "method": "redact"},
{"path": "Organization.identifier", "method": "keep"},
Expand All @@ -238,6 +240,36 @@ Here is a sample configuration for R4:
|Empty or Null| The configuration file targets the same FHIR version as the executable.
|Other values| Other values will raise an exception.

### Processing Errors Specification

Anonymization engine will throw three main exceptions in the program: _AnonymizationConfigurationException_, _AnonymizationProcessingException_ and _InvalidInputException_.
|Exception|Description|
|-----|-----|
|AnonymizerConfigurationException or AnonymizerRuleNotApplicableException|Raised when configuration file has invalid format or value.|
|AnonymizerProcessingException|Raised during the process of anonymizing a FHIR node.|
|InvalidInputException|Raised by invalid format of input FHIR resources.|

Since _AnonymizationProcessingException_ may caused by a specific FHIR resource, customers can set the behavior when meeting this kind of exceptions in the section _processingErrors_ in configuration file. The setting will affect the output especially for the batch work.

|processingErrors|Description|
|----|----|
|raise|Raise _AnonymizationProcessingException_ with program failed and stopped.|
|skip| Skip _AnonymizationProcessingException_ and return an empty FHIR resource with program continued. |

Here is the structure of empty FHIR resource for patient:
```
{
"resourceType": "Patient",
"meta": {
"security": [
{
"system": "http://terminology.hl7.org/CodeSystem/v3-ObservationValue",
"code": "REDACTED",
"display": "redacted"
}
]
}
```

### FHIR Path Rules
FHIR path rules can be used to specify the anonymization methods for individual elements as well as elements of specific data types. Ex:
Expand All @@ -256,7 +288,7 @@ The elements can be specified using [FHIRPath](http://hl7.org/fhirpath/) syntax.
|cryptoHash|All elements| Transforms the value using [Crypto-hash method](#crypto-hash). |
|encrypt|All elements| Transforms the value using [Encrypt method](#encrypt). |
|substitute|All elements| [Substitutes](#substitute) the value to a predefined value. |
|generalize|Elements of primitive types|[Generalizes](#generalize) the value into a more general, less distinguishing value.
|generalize|Elements of [primitive](https://www.hl7.org/fhir/datatypes.html#primitive) types|[Generalizes](#generalize) the value into a more general, less distinguishing value.

Two extension methods can be used in FHIR path rule to simplify the FHIR path:
- nodesByType('_typename_'): return descendants of type '_typename_'. Nodes in bundle resource and contained list will be excluded.
Expand Down Expand Up @@ -356,19 +388,19 @@ To substitute Address data types with a fixed JSON fragment
To generalize valueQuantity fields of Observation resource using expression to define the range mapping
```json
{
"path": "nodesByType('Observation').value.value",
"path": "nodesByType('Observation').ofType(Quantity).value",
"method": "generalize",
"cases":{
"$this.value>=0 and $this.value<20": "20",
"$this.value>=20 and $this.value<40": "40",
"$this.value>=40 and $this.value<60": "60",
"$this.value>=60 and $this.value<80": "80"
"$this>=0 and $this<20": "20",
"$this>=20 and $this<40": "40",
"$this>=40 and $this<60": "60",
"$this>=60 and $this<80": "80"
},
"otherValues":"redact"
}
```
> **[NOTE]**
> Take care of the expression for field has choices of types. e.g. Observation.value[x]. The expression for the path should be Observation.value.
> Take care of the expression for field has choices of types. e.g. Observation.value[x]. The expression for the path should be Observation.ofType(x).value.
To generalize string data type using expression to define the value set mapping

Expand Down Expand Up @@ -475,7 +507,7 @@ Generalization uses FHIRPath predicate expression to define a set of cases that
|string| _"$this in ('es-AR' \| 'es-ES' \| 'es-UY')": "'es'"_|Data fall in the value set will be mapped to "es".|'es-UY' -> 'es'|
|string| _"$this.startsWith(\'123\')": "$this.subString(0,2)+\'*\*\*\*\' "_ |Mask sensitive string code.|'1230005' -> '123****'|
|date, dateTime, time|_"$this >= @2010-1-1": "@2010"_|Data fall in a date/time/dateTime range will be mapped to one date/time/dateTime value.| 2016-03-10 -> 2010|
|date, dateTime, time|_"$this.replaceMatches('(?&lt;year&gt;\\\d{2,4})-(?&lt;month&gt;\\\d{1,2})-(?&lt;day&gt;\\\d{1,2})\\\b', '${year}-${month}'"_|Omit "day" to generalize specific date.|2016-01-01 -> 2016-01|
|date, dateTime, time|"$this.replaceMatches('(?&lt;year&gt;\\\d{2,4})-(?&lt;month&gt;\\\d{1,2})-(?&lt;day&gt;\\\d{1,2})\\\b', '${year}-${month}'"|Omit "day" to generalize specific date.|2016-01-01 -> 2016-01|

For each generalization rule, there are several additional settings to specify in configuration files:
- [required] **cases** An object defining key-value pairs to specify case condition and replacement value using FHIRPath predicate expression. _key_ represents case condition and _value_ represents target value.
Expand Down

0 comments on commit 364a6de

Please sign in to comment.