Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge master #145

Merged
merged 4 commits into from
Sep 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public void GivenAnEncryptRule_WhenProcess_NodeShouldBeEncrypted()

patient = patientNode.ToPoco<Patient>();
Assert.Single(patient.Meta.Security);
Assert.Contains(SecurityLabels.ENCRYPT.Code, patient.Meta.Security.Select(s => s.Code));
Assert.Contains(SecurityLabels.MASKED.Code, patient.Meta.Security.Select(s => s.Code));
}

[Fact]
Expand All @@ -98,7 +98,7 @@ public void GivenAPrimitiveSubstituteRule_WhenProcess_NodeShouldBeSubstituted()
AnonymizationFhirPathRule[] rules = new AnonymizationFhirPathRule[]
{
new AnonymizationFhirPathRule("Patient.address.city", "address.city", "Patient", "substitute", AnonymizerRuleType.FhirPathRule, "Patient.address.city",
new Dictionary<string, object> { {"replaceWith", "ExampleCity2020" } })
new Dictionary<string, object> { {"replaceWith", "ExampleCity2020" } })
};

AnonymizationVisitor visitor = new AnonymizationVisitor(rules, CreateTestProcessors());
Expand Down Expand Up @@ -275,7 +275,7 @@ public void Given2ConflictRules_WhenProcess_SecondRuleShouldBeIgnored()

var patient = CreateTestPatient();
var patientNode = ElementNode.FromElement(patient.ToTypedElement());
patientNode.Accept(visitor);
patientNode.Accept(visitor);
string patientCity = patientNode.Select("Patient.address[0].city").First().Value.ToString();
string patientCountry = patientNode.Select("Patient.address[0].country").First().Value.ToString();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ public static class SecurityLabels
Display = "cryptographic hash function"
};

public static readonly Coding ENCRYPT = new Coding()
public static readonly Coding MASKED = new Coding()
{
Code = "ENCRYPT",
Display = "exact value is transformed into ciphertext"
System = "http://terminology.hl7.org/CodeSystem/v3-ObservationValue",
Code = "MASKED",
Display = "masked"
};

public static readonly Coding PERTURBED = new Coding()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class ResourceProcessor : IAnonymizerProcessor
private readonly HashSet<ElementNode> _visitedNodes = new HashSet<ElementNode>();
private readonly Dictionary<string, List<ITypedElement>> _typeToNodeLookUp = new Dictionary<string, List<ITypedElement>>();
private readonly Dictionary<string, List<ITypedElement>> _nameToNodeLookUp = new Dictionary<string, List<ITypedElement>>();

private static readonly PocoStructureDefinitionSummaryProvider s_provider = new PocoStructureDefinitionSummaryProvider();
private const string _metaNodeName = "meta";

Expand Down Expand Up @@ -54,7 +54,7 @@ public ProcessResult Process(ElementNode node, ProcessContext context = null, Di
}

var matchNodes = GetMatchNodes(rule, node);

foreach (var matchNode in matchNodes)
{
ruleResult.Update(ProcessNodeRecursive((ElementNode) matchNode.ToElement(), _processors[method], ruleContext, rule.RuleSettings));
Expand Down Expand Up @@ -97,9 +97,9 @@ public void AddSecurityTag(ElementNode node, ProcessResult result)
}

if (result.IsEncrypted && !meta.Security.Any(x =>
string.Equals(x.Code, SecurityLabels.ENCRYPT.Code, StringComparison.InvariantCultureIgnoreCase)))
string.Equals(x.Code, SecurityLabels.MASKED.Code, StringComparison.InvariantCultureIgnoreCase)))
{
meta.Security.Add(SecurityLabels.ENCRYPT);
meta.Security.Add(SecurityLabels.MASKED);
}

if (result.IsPerturbed && !meta.Security.Any(x =>
Expand Down Expand Up @@ -196,9 +196,9 @@ private IEnumerable<ITypedElement> GetMatchNodes(AnonymizationFhirPathRule rule,

/*
* Special case handling:
* Senario: FHIR path only contains resourceType: Patient, Resource.
* Senario: FHIR path only contains resourceType: Patient, Resource.
* Sample AnonymizationFhirPathRule: { "path": "Patient", "method": "keep" }
*
*
* Current FHIR path lib do not support navigate such ResourceType FHIR path from resource in bundle.
* Example: navigate with FHIR path "Patient" from "Bundle.entry[0].resource[0]" is not support
*/
Expand Down
58 changes: 45 additions & 13 deletions docs/FHIR-anonymization.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ You can also export FHIR resource from your FHIR server using [Bulk Export](http
## Anonymize FHIR data: using the command line tool
Once you have built the command line tool, you will find two executable files for R4 and STU3 respectively:

1. Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool.exe in the $SOURCE\src\Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder.
1. Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool.exe in the $SOURCE\FHIR\src\Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder.

2. Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool.exe in the $SOURCE\src\Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder.
2. Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool.exe in the $SOURCE\FHIR\src\Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool\bin\Debug|Release\netcoreapp3.1 folder.

You can use these executables to anonymize FHIR resource files in a folder.
```
Expand Down Expand Up @@ -104,7 +104,7 @@ You can also export FHIR resources from a FHIR server using [Bulk Export](https:

### Create Data Factory pipeline

1. Enter the project folder $SOURCE\src\Microsoft.Health.Fhir.Anonymizer.\<version>.AzureDataFactoryPipeline. Locate _AzureDataFactorySettings.json_ in the project and replace the values as described below.
1. Enter the project folder $SOURCE\FHIR\src\Microsoft.Health.Fhir.Anonymizer.\<version>.AzureDataFactoryPipeline. Locate _AzureDataFactorySettings.json_ in the project and replace the values as described below.

> **[NOTE]**
> dataFactoryName can contain only lowercase characters or numbers, and must be 3-19 characters in length.
Expand Down Expand Up @@ -203,17 +203,19 @@ Out of the 18 identifier types mentioned in HIPAA Safe Harbor method (2)(i), thi
This configuration file is provided in a best-effort manner. We **strongly** recommend that you review the HIPAA guidelines as well as the implementation of this project before using it for you anonymization requirements.


The safe harbor configuration files can be accessed via [R4](src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool/configuration-sample.json) and [STU3](src/Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool/configuration-sample.json) links.
The safe harbor configuration files can be accessed via [R4](FHIR/src/Microsoft.Health.Fhir.Anonymizer.R4.CommandLineTool/configuration-sample.json) and [STU3](FHIR/src/Microsoft.Health.Fhir.Anonymizer.Stu3.CommandLineTool/configuration-sample.json) links.

### Configuration file format

The configuration is specified in JSON format. It has three high-level sections. One of these sections, namely _fhirVersion_ specify the configuration file's version for anonymizer. The second section named _fhirPathRules_ is meant to specify anonymization methods for data elements. The third section named _parameters_ affects global behavior. _fhirPathRules_ are executed in the order of appearance in the configuration file.
The configuration is specified in JSON format. It has four high-level sections.
One of these sections, namely _fhirVersion_ specify the configuration file's version for anonymizer. The second sections is _processingErrors_ to specify the behaviors for processing errors. The third section named _fhirPathRules_ is meant to specify anonymization methods for data elements. The last section named _parameters_ affects global behavior. _fhirPathRules_ are executed in the order of appearance in the configuration file.

Here is a sample configuration for R4:

```json
{
"fhirVersion": "R4",
"processingError":"raise",
"fhirPathRules": [
{"path": "nodesByType('Extension')", "method": "redact"},
{"path": "Organization.identifier", "method": "keep"},
Expand All @@ -238,6 +240,36 @@ Here is a sample configuration for R4:
|Empty or Null| The configuration file targets the same FHIR version as the executable.
|Other values| Other values will raise an exception.

### Processing Errors Specification

Anonymization engine will throw three main exceptions in the program: _AnonymizationConfigurationException_, _AnonymizationProcessingException_ and _InvalidInputException_.
|Exception|Description|
|-----|-----|
|AnonymizerConfigurationException or AnonymizerRuleNotApplicableException|Raised when configuration file has invalid format or value.|
|AnonymizerProcessingException|Raised during the process of anonymizing a FHIR node.|
|InvalidInputException|Raised by invalid format of input FHIR resources.|

Since _AnonymizationProcessingException_ may caused by a specific FHIR resource, customers can set the behavior when meeting this kind of exceptions in the section _processingErrors_ in configuration file. The setting will affect the output especially for the batch work.

|processingErrors|Description|
|----|----|
|raise|Raise _AnonymizationProcessingException_ with program failed and stopped.|
|skip| Skip _AnonymizationProcessingException_ and return an empty FHIR resource with program continued. |

Here is the structure of empty FHIR resource for patient:
```
{
"resourceType": "Patient",
"meta": {
"security": [
{
"system": "http://terminology.hl7.org/CodeSystem/v3-ObservationValue",
"code": "REDACTED",
"display": "redacted"
}
]
}
```

### FHIR Path Rules
FHIR path rules can be used to specify the anonymization methods for individual elements as well as elements of specific data types. Ex:
Expand All @@ -256,7 +288,7 @@ The elements can be specified using [FHIRPath](http://hl7.org/fhirpath/) syntax.
|cryptoHash|All elements| Transforms the value using [Crypto-hash method](#crypto-hash). |
|encrypt|All elements| Transforms the value using [Encrypt method](#encrypt). |
|substitute|All elements| [Substitutes](#substitute) the value to a predefined value. |
|generalize|Elements of primitive types|[Generalizes](#generalize) the value into a more general, less distinguishing value.
|generalize|Elements of [primitive](https://www.hl7.org/fhir/datatypes.html#primitive) types|[Generalizes](#generalize) the value into a more general, less distinguishing value.

Two extension methods can be used in FHIR path rule to simplify the FHIR path:
- nodesByType('_typename_'): return descendants of type '_typename_'. Nodes in bundle resource and contained list will be excluded.
Expand Down Expand Up @@ -356,19 +388,19 @@ To substitute Address data types with a fixed JSON fragment
To generalize valueQuantity fields of Observation resource using expression to define the range mapping
```json
{
"path": "nodesByType('Observation').value.value",
"path": "nodesByType('Observation').ofType(Quantity).value",
"method": "generalize",
"cases":{
"$this.value>=0 and $this.value<20": "20",
"$this.value>=20 and $this.value<40": "40",
"$this.value>=40 and $this.value<60": "60",
"$this.value>=60 and $this.value<80": "80"
"$this>=0 and $this<20": "20",
"$this>=20 and $this<40": "40",
"$this>=40 and $this<60": "60",
"$this>=60 and $this<80": "80"
},
"otherValues":"redact"
}
```
> **[NOTE]**
> Take care of the expression for field has choices of types. e.g. Observation.value[x]. The expression for the path should be Observation.value.
> Take care of the expression for field has choices of types. e.g. Observation.value[x]. The expression for the path should be Observation.ofType(x).value.

To generalize string data type using expression to define the value set mapping

Expand Down Expand Up @@ -475,7 +507,7 @@ Generalization uses FHIRPath predicate expression to define a set of cases that
|string| _"$this in ('es-AR' \| 'es-ES' \| 'es-UY')": "'es'"_|Data fall in the value set will be mapped to "es".|'es-UY' -> 'es'|
|string| _"$this.startsWith(\'123\')": "$this.subString(0,2)+\'*\*\*\*\' "_ |Mask sensitive string code.|'1230005' -> '123****'|
|date, dateTime, time|_"$this >= @2010-1-1": "@2010"_|Data fall in a date/time/dateTime range will be mapped to one date/time/dateTime value.| 2016-03-10 -> 2010|
|date, dateTime, time|_"$this.replaceMatches('(?&lt;year&gt;\\\d{2,4})-(?&lt;month&gt;\\\d{1,2})-(?&lt;day&gt;\\\d{1,2})\\\b', '${year}-${month}'"_|Omit "day" to generalize specific date.|2016-01-01 -> 2016-01|
|date, dateTime, time|"$this.replaceMatches('(?&lt;year&gt;\\\d{2,4})-(?&lt;month&gt;\\\d{1,2})-(?&lt;day&gt;\\\d{1,2})\\\b', '${year}-${month}'"|Omit "day" to generalize specific date.|2016-01-01 -> 2016-01|

For each generalization rule, there are several additional settings to specify in configuration files:
- [required] **cases** An object defining key-value pairs to specify case condition and replacement value using FHIRPath predicate expression. _key_ represents case condition and _value_ represents target value.
Expand Down