diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 64e84701..f0255b8b 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -1,4 +1,3 @@ -Cerner Corporation [Contributor Names Follow] Cerner Corporation @@ -7,4 +6,4 @@ Cerner Corporation - Aleksander Eskilson [@bdrillard] [@rbrush]: https://github.com/rbrush -[@bdrillard] https://github.com/bdrillard \ No newline at end of file +[@bdrillard]: https://github.com/bdrillard diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/ValueSetUdfs.java b/bunsen-core/src/main/java/com/cerner/bunsen/ValueSetUdfs.java index 49b91377..cd20d384 100644 --- a/bunsen-core/src/main/java/com/cerner/bunsen/ValueSetUdfs.java +++ b/bunsen-core/src/main/java/com/cerner/bunsen/ValueSetUdfs.java @@ -1,6 +1,5 @@ package com.cerner.bunsen; -import com.cerner.bunsen.mappings.broadcast.BroadcastableValueSets; import com.cerner.bunsen.mappings.broadcast.BroadcastableValueSets; import java.util.ArrayDeque; import java.util.Deque; diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Ancestor.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Ancestor.java index d432c8d3..9faefcd9 100644 --- a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Ancestor.java +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Ancestor.java @@ -9,9 +9,9 @@ */ public class Ancestor implements Serializable { - private String conceptMapUri; + private String uri; - private String conceptMapVersion; + private String version; private String descendantSystem; @@ -22,30 +22,29 @@ public class Ancestor implements Serializable { private String ancestorValue; /** - * Nullary constructor so Spark can encode this as a bean. + * Nullary constructor so Spark can encode this class as a bean. */ public Ancestor() { } /** - * Constructs an ancestor bean. + * Constructs a {@link Ancestor} instance. * - * @param conceptMapUri the URI of the concept map defining this relationship - * @param conceptMapVersion the version of the concept map defining this relationship - * @param descendantSystem the code system of the descendant - * @param descendantValue the code value of the descendant - * @param ancestorSystem the code system of the ancestor - * @param ancestorValue the code value of the ancestor + * @param uri the hierarchy uri that owns this value + * @param version the hierarchy version that owns this value + * @param descendantSystem the code system of the descendant value + * @param descendantValue the descendant value + * @param ancestorSystem the code system of the ancestor value + * @param ancestorValue the ancestor value */ - public Ancestor(String conceptMapUri, - String conceptMapVersion, + public Ancestor(String uri, + String version, String descendantSystem, String descendantValue, String ancestorSystem, String ancestorValue) { - - this.conceptMapUri = conceptMapUri; - this.conceptMapVersion = conceptMapVersion; + this.uri = uri; + this.version = version; this.descendantSystem = descendantSystem; this.descendantValue = descendantValue; this.ancestorSystem = ancestorSystem; @@ -53,108 +52,108 @@ public Ancestor(String conceptMapUri, } /** - * Returns the URI of the concept map that defines this relationship. + * Returns the hierarchy URI that owns this value. * - * @return the URI of the concept map + * @return the hierarchy URI that owns this value. */ - public String getConceptMapUri() { - return conceptMapUri; + public String getUri() { + return uri; } /** - * Sets the URI of the concept map that defines this relationship. + * Sets the hierarchy URI that owns this value. * - * @param conceptMapUri the URI of the concept map + * @param uri the hierarchy URI that owns this value */ - public void setConceptMapUri(String conceptMapUri) { - this.conceptMapUri = conceptMapUri; + public void setUri(String uri) { + this.uri = uri; } /** - * Returns the version of the concept map that defines this relationship. + * Returns the hierarchy version that owns this value. * - * @return the version of the concept map. + * @return the hierarchy version that owns this value. */ - public String getConceptMapVersion() { - return conceptMapVersion; + public String getVersion() { + return version; } /** - * Sets the version of the concept map that defines this relationship. + * Sets the hierarchy version that owns this value. * - * @param conceptMapVersion the version of the concept map. + * @param version the hierarchy version that owns this value */ - public void setConceptMapVersion(String conceptMapVersion) { - this.conceptMapVersion = conceptMapVersion; + public void setVersion(String version) { + this.version = version; } /** - * Returns the system of the descendant code. + * Returns the code system that owns the descendant value. * - * @return the system of the descendant code + * @return the code system that owns the descendant value. */ public String getDescendantSystem() { return descendantSystem; } /** - * Sets the system of the descendant code. + * Sets the code system that owns the descendant value. * - * @param descendantSystem the system of the descendant code. + * @param descendantSystem the code system that owns the descendant value */ public void setDescendantSystem(String descendantSystem) { this.descendantSystem = descendantSystem; } /** - * Returns the value of the descendant code. + * Returns the descendant value. * - * @return the value of the descendant code. + * @return the descendant value. */ public String getDescendantValue() { return descendantValue; } /** - * Sets the value of the descendant code. + * Sets the descendant value. * - * @param descendantValue the value of the descendant code. + * @param descendantValue the descendant value */ public void setDescendantValue(String descendantValue) { this.descendantValue = descendantValue; } /** - * Returns the system of the ancestor code. + * Returns the code system that owns the ancestor value. * - * @return the system of the ancestor code + * @return the code system that owns the ancestor value. */ public String getAncestorSystem() { return ancestorSystem; } /** - * Sets the system of the ancestor code. + * Sets the code system that owns the ancestor value. * - * @param ancestorSystem the system of the ancestor code. + * @param ancestorSystem the code system that owns the ancestor value */ public void setAncestorSystem(String ancestorSystem) { this.ancestorSystem = ancestorSystem; } /** - * Returns the value of the ancestor code. + * Returns the ancestor value. * - * @return the value of the ancestor code. + * @return the ancestor value. */ public String getAncestorValue() { return ancestorValue; } /** - * Sets the value of the ancestor code. + * Sets the ancestor value. * - * @param ancestorValue the value of the ancestor code. + * @param ancestorValue the ancestor value */ public void setAncestorValue(String ancestorValue) { this.ancestorValue = ancestorValue; @@ -169,8 +168,8 @@ public boolean equals(Object obj) { Ancestor that = (Ancestor) obj; - return Objects.equals(this.conceptMapUri, that.conceptMapUri) - && Objects.equals(this.conceptMapVersion, that.conceptMapVersion) + return Objects.equals(this.uri, that.uri) + && Objects.equals(this.version, that.version) && Objects.equals(this.descendantSystem, that.descendantSystem) && Objects.equals(this.descendantValue, that.descendantValue) && Objects.equals(this.ancestorSystem, that.ancestorSystem) @@ -180,8 +179,8 @@ public boolean equals(Object obj) { @Override public int hashCode() { return 37 - * Objects.hashCode(this.conceptMapUri) - * Objects.hashCode(this.conceptMapVersion) + * Objects.hashCode(this.uri) + * Objects.hashCode(this.version) * Objects.hashCode(this.descendantSystem) * Objects.hashCode(this.descendantValue) * Objects.hashCode(this.ancestorSystem) diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/ConceptMaps.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/ConceptMaps.java index 7d3354c4..a674f827 100644 --- a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/ConceptMaps.java +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/ConceptMaps.java @@ -1,27 +1,24 @@ package com.cerner.bunsen.mappings; import static org.apache.spark.sql.functions.col; +import static org.apache.spark.sql.functions.lit; import ca.uhn.fhir.context.FhirContext; import ca.uhn.fhir.parser.IParser; import com.cerner.bunsen.FhirEncoders; -import com.cerner.bunsen.FhirEncoders; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import java.io.Serializable; +import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.function.BiFunction; import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.broadcast.Broadcast; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoder; @@ -31,9 +28,6 @@ import org.apache.spark.sql.SparkSession; import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; import org.apache.spark.sql.functions; -import org.apache.spark.sql.types.DataTypes; -import org.apache.spark.sql.types.StructField; -import org.apache.spark.sql.types.StructType; import org.hl7.fhir.dstu3.model.ConceptMap; import org.hl7.fhir.dstu3.model.ConceptMap.ConceptMapGroupComponent; import org.hl7.fhir.dstu3.model.ConceptMap.SourceElementComponent; @@ -50,6 +44,8 @@ public class ConceptMaps { private static final FhirContext FHIR_CONTEXT = FhirContext.forDstu3(); + private static final IParser PARSER = FHIR_CONTEXT.newXmlParser(); + /** * An encoder for serializing mappings. */ @@ -59,18 +55,9 @@ public class ConceptMaps { .getOrCreate() .of(ConceptMap.class); - private static final Encoder ANCESTOR_ENCODER = Encoders.bean(Ancestor.class); - private static final Encoder URL_AND_VERSION_ENCODER = Encoders.bean(UrlAndVersion.class); - /** - * The number of records to put in a slice of expanded ancestors. - * This just needs to be small enough to fit in a reasonable amount of memory - * when converting to a Dataset. - */ - private static final long ANCESTOR_SLICE_SIZE = 100000; - /** * Returns the encoder for mappings. * @@ -91,16 +78,6 @@ public static Encoder getConceptMapEncoder() { return CONCEPT_MAP_ENCODER; } - /** - * Returns the encoder for ancestors. - * - * @return an encoder for ancestors. - */ - public static Encoder getAncestorEncoder() { - - return ANCESTOR_ENCODER; - } - /** * Returns the encoder for UrlAndVersion tuples. * @@ -121,53 +98,37 @@ public static Encoder getUrlAndVersionEncoder() { public static final String MAPPING_TABLE = "mappings"; /** - * Default table name where ancestor information is stored. - */ - public static final String ANCESTOR_TABLE = "ancestors"; - - /** - * Defualt table name where concept maps are stored. + * Defalt table name where concept maps are stored. */ public static final String CONCEPT_MAP_TABLE = "conceptmaps"; private static final Pattern TABLE_NAME_PATTERN = Pattern.compile("[A-Za-z][A-Za-z0-9_]*\\.?[A-Za-z0-9_]*"); - private static final StructType MAP_AND_VERSION_SCHEMA = - DataTypes.createStructType(new StructField[]{ - DataTypes.createStructField("conceptmapuri", DataTypes.StringType, false), - DataTypes.createStructField("conceptmapversion", DataTypes.StringType, false)}); - private final SparkSession spark; private final Dataset conceptMaps; private final Dataset mappings; - private final Dataset ancestors; - - /** - * Concept maps that have been changed from the original source. - */ - private final Dataset changes; + private final Dataset members; private ConceptMaps(SparkSession spark, - Dataset changes, + Dataset members, Dataset conceptMaps, - Dataset mappings, - Dataset ancestors) { + Dataset mappings) { + this.spark = spark; - this.changes = changes; + this.members = members; this.conceptMaps = conceptMaps; this.mappings = mappings; - this.ancestors = ancestors; } /** - * Returns the collection of concept maps from the default table. + * Returns the collection of concept maps from the default database and tables. * * @param spark the spark session - * @return a ConceptMaps instance + * @return a ConceptMaps instance. */ public static ConceptMaps getDefault(SparkSession spark) { @@ -175,97 +136,43 @@ public static ConceptMaps getDefault(SparkSession spark) { } /** - * Returns the collection of concept maps from the tables in the given database + * Returns the collection of concept maps from the tables in the given database. * * @param spark the spark session - * @param databaseName name of the datase containing the conceptmaps and mappings tables. - * @return a ConceptMaps instance + * @param databaseName name of the database containing the conceptmaps and mappings tables. + * @return a ConceptMaps instance. */ public static ConceptMaps getFromDatabase(SparkSession spark, String databaseName) { - Dataset mappings = asMappings(spark.sql( - "select * from " + databaseName + "." + MAPPING_TABLE)); + Dataset mappings = spark.sql( + "SELECT * FROM " + databaseName + "." + MAPPING_TABLE).as(MAPPING_ENCODER); - Dataset ancestors = asAncestors(spark.sql( - "select * from " + databaseName + "." + ANCESTOR_TABLE)); + Dataset conceptMaps = spark + .sql("SELECT * FROM " + databaseName + "." + CONCEPT_MAP_TABLE) + .as(CONCEPT_MAP_ENCODER); return new ConceptMaps(spark, spark.emptyDataset(URL_AND_VERSION_ENCODER), - spark.sql("select * from " + databaseName + "." + CONCEPT_MAP_TABLE) - .as(CONCEPT_MAP_ENCODER), - mappings, - ancestors); + conceptMaps, + mappings); } /** * Returns an empty ConceptMaps instance. * * @param spark the spark session - * @return an empty ConceptMaps instance + * @return an empty ConceptMaps instance. */ public static ConceptMaps getEmpty(SparkSession spark) { + Dataset emptyConceptMaps = spark.emptyDataset(CONCEPT_MAP_ENCODER) + .withColumn("timestamp", lit(null).cast("timestamp")) + .as(CONCEPT_MAP_ENCODER); + return new ConceptMaps(spark, spark.emptyDataset(URL_AND_VERSION_ENCODER), - spark.emptyDataset(CONCEPT_MAP_ENCODER), - spark.emptyDataset(MAPPING_ENCODER), - spark.emptyDataset(ANCESTOR_ENCODER)); - } - - /** - * URL and version tuple used to uniquely identify a concept map. - */ - public static class UrlAndVersion { - - String url; - - String version; - - public UrlAndVersion(String url, String version) { - this.url = url; - this.version = version; - } - - - public String getUrl() { - return url; - } - - public void setUrl(String url) { - this.url = url; - } - - public String getVersion() { - return version; - } - - public void setVersion(String version) { - this.version = version; - } - - /** - * Nullary constructor for use in Spark data sets. - */ - public UrlAndVersion() { - } - - @Override - public int hashCode() { - - return 17 * url.hashCode() * version.hashCode(); - } - - @Override - public boolean equals(Object obj) { - if (!(obj instanceof UrlAndVersion)) { - return false; - } - - UrlAndVersion that = (UrlAndVersion) obj; - - return this.url.equals(that.url) - && this.version.equals(that.version); - } + emptyConceptMaps, + spark.emptyDataset(MAPPING_ENCODER)); } /** @@ -308,7 +215,7 @@ private static void addToConceptMap(ConceptMap map, Dataset mappings) { currentGroup = candidate; - // Workaround for the decoder producing an immutable array by + // Workaround for the decoder producing an immutable array by // replacing it with a mutable one. currentGroup.setElement(new ArrayList<>(currentGroup.getElement())); break; @@ -341,7 +248,7 @@ private static void addToConceptMap(ConceptMap map, Dataset mappings) { } /** - * Given a concept map, returns the list of mapping records it contains. + * Given a concept map, returns a list of mapping records it contains. * * @param map a concept map * @return a list of Mapping records. @@ -350,6 +257,15 @@ public static List expandMappings(ConceptMap map) { List mappings = new ArrayList<>(); + expandMappingsIterator(map).forEachRemaining(mappings::add); + + return mappings; + } + + private static Iterator expandMappingsIterator(ConceptMap map) { + + List mappings = new ArrayList<>(); + for (ConceptMapGroupComponent group: map.getGroup()) { for (SourceElementComponent element: group.getElement()) { @@ -397,29 +313,7 @@ public static List expandMappings(ConceptMap map) { } } - return mappings; - } - - /** - * Returns the mapping entries from a given concept maps. - * - * @param spark the spark session - * @param maps the concept maps - * @return a map from the concept map url and version to its mapping content. - */ - private static Map> fromConceptMaps(SparkSession spark, - List maps) { - - Map> datasets = new HashMap<>(); - - for (ConceptMap map: maps) { - - datasets.put(new UrlAndVersion(map.getUrl(), map.getVersion()), - asMappings(spark.createDataset(expandMappings(map), - MAPPING_ENCODER))); - } - - return datasets; + return mappings.iterator(); } /** @@ -427,218 +321,31 @@ private static Map> fromConceptMaps(SparkSession */ private Dataset getUrlAndVersions(Dataset conceptMaps) { - return conceptMaps.select( - functions.col("url"), - functions.col("version")) + return conceptMaps.select(functions.col("url"), functions.col("version")) + .distinct() .as(URL_AND_VERSION_ENCODER); } /** - * Convert a dataset into mappings with a consistent order, as Spark operations seem - * to have some surprising behavior if this isn't the case. - */ - private static Dataset asMappings(Dataset ds) { - - return ds.select( - "sourceValueSet", - "targetValueSet", - "sourceSystem", - "sourceValue", - "targetSystem", - "targetValue", - "equivalence", - "conceptmapuri", - "conceptmapversion") - .as(MAPPING_ENCODER); - } - - /** - * Convert a dataset into ancestors with a consistent order, as Spark operations seem - * to have some surprising behavior if this isn't the case. - */ - private static Dataset asAncestors(Dataset ds) { - - return ds.select( - "descendantValue", - "descendantSystem", - "ancestorSystem", - "ancestorValue", - "conceptmapuri", - "conceptmapversion") - .as(ANCESTOR_ENCODER); - } - - - /** - * A single system,value tuple and its parents. Additional connection - * types beyond parents may be added as necessary. - */ - private static class ConceptNode implements Serializable { - - String system; - String value; - - /** - * The set of parents. This purposefully relies on the Java - * default equality semantics, since we only use it internally - * and it is an efficient way to check for the direct parent - * of a record. - */ - Set parents; - - ConceptNode(String system, String value) { - - this.system = system; - this.value = value; - this.parents = new HashSet<>(); - } - - /** - * Returns the node's ancestors. - */ - Set getAncestors() { - - Set output = new HashSet<>(); - - getAncestors(output); - - // The current node is included so we can check for cycles, - // but it should not produce an ancestor record, so remove it. - output.remove(this); - - return output; - } - - private void getAncestors(Set visited) { - - // Some input data can contain cycles, so we must explicitly check for that. - if (!visited.contains(this)) { - - visited.add(this); - - for (ConceptNode parent: parents) { - - parent.getAncestors(visited); - } - } - } - } - - /** - * Expands a mapping dataset into its ancestors. - */ - private Dataset expandAncestors(Map> newMappings) { - - return newMappings.entrySet().stream().map(entry -> - expandAncestors(entry.getKey().getUrl(), - entry.getKey().getVersion(), - entry.getValue())) - .reduce(Dataset::union) - .get(); - } - - /** - * Expands the mappings into a dataset of ancestors. + * Returns a new ConceptMaps instance that includes the given maps. + * + * @param conceptMaps concept maps to add to the returned collection. + * @return a new ConceptMaps instance with the values added. */ - private Dataset expandAncestors(String conceptMapUri, - String conceptMapVersion, - Dataset mappings) { - - // Map used to find previously created concept nodes so we can - // use them to build a graph. - final Map> conceptNodes = new HashMap<>(); - - // List of all nodes for simpler iteration. - final List allNodes = new ArrayList<>(); - - // Helper function to get or add a node to our colleciton of nodes. - BiFunction getOrAddNode = (system, value) -> { - - Map systemMap = conceptNodes.get(system); - - if (systemMap == null) { + public ConceptMaps withConceptMaps(Dataset conceptMaps) { - systemMap = new HashMap<>(); + Dataset newMembers = getUrlAndVersions(conceptMaps); - conceptNodes.put(system, systemMap); - } - - ConceptNode node = systemMap.get(value); - - if (node == null) { - - node = new ConceptNode(system, value); - systemMap.put(value, node); - allNodes.add(node); - - } - - return node; - }; - - List subsumesMappings = mappings.where(functions.col("equivalence") - .equalTo(functions.lit("subsumes"))) - .collectAsList(); - - // Build our graph of nodes. - for (Mapping mapping: subsumesMappings) { + if (hasDuplicateUrlAndVersions(newMembers) || conceptMaps.count() != newMembers.count()) { - ConceptNode node = getOrAddNode.apply(mapping.getSourceSystem(), - mapping.getSourceValue()); - - ConceptNode parent = getOrAddNode.apply(mapping.getTargetSystem(), - mapping.getTargetValue()); - - node.parents.add(parent); + throw new IllegalArgumentException( + "Cannot add concept maps having duplicate conceptMapUri and conceptMapVersion"); } - // The graph is built, now translate it into ancestors. - List ancestors = allNodes.stream() - .flatMap(node -> - node.getAncestors() - .stream() - .map(ancestorNode -> - new Ancestor(conceptMapUri, - conceptMapVersion, - node.system, - node.value, - ancestorNode.system, - ancestorNode.value))) - .collect(Collectors.toList()); - - // We convert into a sliced RDD, then to a dataset, - // so we can specify a slice size and prevent Spark from - // attempting to copy everything at once for very large - // expansions. - int slices = (int) (ancestors.size() / ANCESTOR_SLICE_SIZE); - - if (slices > 1) { - - JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); - - JavaRDD rdd = jsc.parallelize(ancestors, slices); - - return spark.createDataset(rdd.rdd(), ANCESTOR_ENCODER); - - } else { - - return spark.createDataset(ancestors, ANCESTOR_ENCODER); - } - } - - /** - * Returns a new ConceptMaps instance that includes the given maps. - * - * @param conceptMaps concept maps to add to the returned collection. - * @return a new ConceptMaps instance with the values added - */ - public ConceptMaps withConceptMaps(List conceptMaps) { - - // Remove the concept contents for persistence. - // This is most easily done in the ConcpeptMap object by setting - // the group to an empty list. - List withoutConcepts = conceptMaps.stream() - .map(conceptMap -> { + // Remove the concept contents for persistence. This is most easily done in the ConceptMap + // object by setting the group to an empty list. + Dataset withoutConcepts = conceptMaps + .map((MapFunction) conceptMap -> { // Remove the elements rather than the groups to preserved the // "unmapped" structure in a group that can refer to other @@ -656,114 +363,110 @@ public ConceptMaps withConceptMaps(List conceptMaps) { withoutElements.setGroup(updatedGroups); return withoutElements; - }) - .collect(Collectors.toList()); + }, CONCEPT_MAP_ENCODER); - // Convert to datasets. - Dataset newMaps = spark.createDataset(withoutConcepts, CONCEPT_MAP_ENCODER); - Map> newMappings = fromConceptMaps(spark, conceptMaps); + Dataset newMappings = conceptMaps.flatMap(ConceptMaps::expandMappingsIterator, + MAPPING_ENCODER); - return withConceptMaps(newMaps, newMappings); + return withConceptMaps(withoutConcepts, newMappings); } /** * Returns a new ConceptMaps instance that includes the given map. * * @param conceptMap concept maps to add - * @return a new ConceptMaps instance with the values added + * @return a new ConceptMaps instance with the values added. */ public ConceptMaps withConceptMaps(ConceptMap... conceptMap) { return withConceptMaps(Arrays.asList(conceptMap)); } - private ConceptMaps withConceptMaps(Dataset newMaps, - Map> newMappings) { + public ConceptMaps withConceptMaps(List conceptMaps) { - Dataset newAncestors = expandAncestors(newMappings); + return withConceptMaps(this.spark.createDataset(conceptMaps, CONCEPT_MAP_ENCODER)); + } - // Get the changed changedVersion and column so we can filter - // existing items that have been changed. - Dataset changes = getUrlAndVersions(newMaps); + private ConceptMaps withConceptMaps(Dataset newMaps, Dataset newMappings) { - Dataset unchangedMaps = this.conceptMaps.alias("maps") - .join(changes.alias("changes"), - functions.col("maps.url").equalTo(functions.col("changes.url")).and( - functions.col("maps.version").equalTo(functions.col("changes.version"))), - "leftanti") - .as(CONCEPT_MAP_ENCODER); + Dataset newMembers = getUrlAndVersions(newMaps); - Dataset unchangedMappings = - asMappings(this.mappings.join(changes.alias("changes"), - functions.col("conceptmapuri") - .equalTo(functions.col("changes.url")).and( - functions.col("conceptmapversion") - .equalTo(functions.col("changes.version"))), - "leftanti")); - - Dataset unchangedAncestors = - asAncestors(this.ancestors.join(changes.alias("changes"), - functions.col("conceptmapuri") - .equalTo(functions.col("changes.url")).and( - functions.col("conceptmapversion") - .equalTo(functions.col("changes.version"))), - "leftanti")); - - // Reduce the new mappings into values - Dataset allNewMappings = newMappings.values() - .stream() - .reduce(Dataset::union) - .get(); + // Instantiating a new composite ConceptMaps requires a new timestamp + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + Dataset newMapsWithTimestamp = newMaps + .withColumn("timestamp", lit(timestamp.toString()).cast("timestamp")) + .as(CONCEPT_MAP_ENCODER); - // Return a new instance with new or updated values unioned with previous, unchanged values. return new ConceptMaps(spark, - this.changes.unionAll(changes).distinct(), - unchangedMaps.unionAll(newMaps), - unchangedMappings.unionAll(asMappings(allNewMappings)), - unchangedAncestors.unionAll(asAncestors(newAncestors))); + this.members.union(newMembers), + this.conceptMaps.union(newMapsWithTimestamp), + this.mappings.union(newMappings)); } /** - * Returns a new ConceptMaps instance that includes the expanded content from the map. + * Reads all concept maps from a given directory and adds them to + * our collection. The directory may be anything readable from a Spark path, + * including local filesystems, HDFS, S3, or others. * - * @param map a concept map that contains only metadata, without concept groups - * @param mappings the mappings associated with the given concept map - * @return a new ConceptMaps instance with the values added + * @param path a path from which concept maps will be loaded + * @return a instance of ConceptMaps that includes the contents from that directory. */ - public ConceptMaps withExpandedMap(ConceptMap map, Dataset mappings) { + public ConceptMaps withMapsFromDirectory(String path) { - if (map.getGroup().size() != 0) { - throw new IllegalArgumentException("The concept concepts themselves should be in the" - + " provided mappings parameter."); - } + return withConceptMaps(conceptMapsDatasetFromDirectory(path)); + } + + private Dataset conceptMapsDatasetFromDirectory(String path) { + + JavaRDD> fileNamesAndContents = this.spark.sparkContext() + .wholeTextFiles(path, 1) + .toJavaRDD(); - return withConceptMaps(spark.createDataset(ImmutableList.of(map), - CONCEPT_MAP_ENCODER), - ImmutableMap.of(new UrlAndVersion(map.getUrl(), map.getVersion()), mappings)); + return this.spark.createDataset(fileNamesAndContents + .map(tuple -> (ConceptMap) PARSER.parseResource(tuple._2)) + .rdd(), CONCEPT_MAP_ENCODER); } /** - * Reads all concept maps from a given directory and adds them to - * our collection. The directory may be anything readable from a Spark path, + * Returns all concept maps that are disjoint with concept maps stored in the default database and + * adds them to our collection. The directory may be anything readable from a Spark path, * including local filesystems, HDFS, S3, or others. * - * @param path a path from which concept maps will be loaded - * @return a instance of ConceptMaps that includes the contents from that directory. + * @param path a path from which disjoint concept maps will be loaded + * @return an instance of ConceptMaps that includes content from that directory that is disjoint + * with content already contained in the default database. */ - public ConceptMaps withMapsFromDirectory(String path) { + public ConceptMaps withDisjointMapsFromDirectory(String path) { - final IParser parser = FHIR_CONTEXT.newXmlParser(); + return withDisjointMapsFromDirectory(path, MAPPING_DATABASE); + } - List> fileNamesAndContents = - spark.sparkContext() - .wholeTextFiles(path, 1) - .toJavaRDD().collect(); + /** + * Returns all concept maps that are disjoint with concept maps stored in the default database and + * adds them to our collection. The directory may be anything readable from a Spark path, + * including local filesystems, HDFS, S3, or others. + * + * @param path a path from which disjoint concept maps will be loaded + * @param database the database to check concept maps against + * @return an instance of ConceptMaps that includes content from that directory that is disjoint + * with content already contained in the default database. + */ + public ConceptMaps withDisjointMapsFromDirectory(String path, String database) { - List mapList = fileNamesAndContents.stream() - .map(tuple -> (ConceptMap) parser.parseResource(tuple._2)) - .collect(Collectors.toList()); + Dataset currentMembers = this.spark + .sql("SELECT url, version FROM " + database + "." + CONCEPT_MAP_TABLE) + .as(URL_AND_VERSION_ENCODER) + .alias("current"); - return withConceptMaps(mapList); + Dataset maps = conceptMapsDatasetFromDirectory(path) + .alias("new") + .join(currentMembers, col("new.url").equalTo(col("current.url")) + .and(col("new.version").equalTo(col("current.version"))), + "leftanti") + .as(CONCEPT_MAP_ENCODER); + + return withConceptMaps(maps); } /** @@ -771,7 +474,7 @@ public ConceptMaps withMapsFromDirectory(String path) { * * @param uri the uri of the map to return * @param version the version of the map to return - * @return the specified concept map + * @return the specified concept map. */ public ConceptMap getConceptMap(String uri, String version) { @@ -779,9 +482,9 @@ public ConceptMap getConceptMap(String uri, String version) { // if the map does not exist. // Typecast necessary to placate the Java compiler calling this Scala function. - ConceptMap[] maps = (ConceptMap[]) conceptMaps.filter( - functions.col("url").equalTo(functions.lit(uri)) - .and(functions.col("version").equalTo(functions.lit(version)))) + ConceptMap[] maps = (ConceptMap[]) this.conceptMaps.filter( + functions.col("url").equalTo(lit(uri)) + .and(functions.col("version").equalTo(lit(version)))) .head(1); if (maps.length == 0) { @@ -806,10 +509,10 @@ public ConceptMap getConceptMap(String uri, String version) { * Instead, users should use the {@link #getMappings()} method to query mappings * in depth. * - * @return a dataset of concept maps that do not containmappings. + * @return a dataset of concept maps that do not contain mappings. */ public Dataset getMaps() { - return conceptMaps; + return this.conceptMaps; } /** @@ -819,7 +522,7 @@ public Dataset getMaps() { * @return a dataset of all mappings. */ public Dataset getMappings() { - return mappings; + return this.mappings; } /** @@ -827,27 +530,27 @@ public Dataset getMappings() { * * @param uri the uri of the concept map for which we get mappings * @param version the version of the concept map for which we get mappings - * @return a dataset of mappings for the given URI and version + * @return a dataset of mappings for the given URI and version. */ public Dataset getMappings(String uri, String version) { - return mappings.where(functions.col("conceptmapuri").equalTo(functions.lit(uri)) - .and(functions.col("conceptmapversion").equalTo(functions.lit(version)))); + return this.mappings.where(functions.col("conceptmapuri").equalTo(lit(uri)) + .and(functions.col("conceptmapversion").equalTo(lit(version)))); } /** * Returns a dataset with the mappings for each uri and version. * * @param uriToVersion a map of concept map URI to the version to load - * @return a datset of mapppings for the given URIs and versions + * @return a dataset of mappings for the given URIs and versions. */ public Dataset getMappings(Map uriToVersion) { - JavaSparkContext context = new JavaSparkContext(spark.sparkContext()); + JavaSparkContext context = new JavaSparkContext(this.spark.sparkContext()); Broadcast> broadcastMaps = context.broadcast(uriToVersion); - return mappings.filter((FilterFunction) mapping -> { + return this.mappings.filter((FilterFunction) mapping -> { String latestVersion = broadcastMaps.getValue().get(mapping.getConceptMapUri()); @@ -872,23 +575,12 @@ public Dataset getLatestMappings(Set uris, boolean includeExper return getMappings(latestMaps); } - - /** - * Returns a dataset of all mappings in this collection. This is generally used - * for inspection and debugging of these relationships. - * - * @return a dataset of all transitive ancestors. - */ - public Dataset getAncestors() { - return ancestors; - } - /** * Returns the latest versions of all concept maps. * * @param includeExperimental flag to include concept maps marked as experimental * - * @return a map of concept map URLs to the latest version for them + * @return a map of concept map URLs to the latest version for them. */ public Map getLatestVersions(boolean includeExperimental) { @@ -898,10 +590,10 @@ public Map getLatestVersions(boolean includeExperimental) { /** * Returns the latest versions of a given set of concept maps. * - * @param urls a set of URLs to retreieve the latest version for, or null to load them all. + * @param urls a set of URLs to retrieve the latest version for, or null to load them all. * @param includeExperimental flag to include concept maps marked as experimental * - * @return a map of concept map URLs to the latest version for them + * @return a map of concept map URLs to the latest version for them. */ public Map getLatestVersions(final Set urls, boolean includeExperimental) { @@ -910,26 +602,33 @@ public Map getLatestVersions(final Set urls, // per concept map. Spark's provided max aggregation function // only works on numeric types, so we jump into RDDs and perform // the reduce by hand. - JavaRDD changes = conceptMaps.select(col("url"), + JavaRDD changes = this.conceptMaps.select(col("url"), col("version"), col("experimental")) .toJavaRDD() - .filter(row -> { - return (urls == null || urls.contains(row.getString(0))) - && (includeExperimental || row.isNullAt(2) || !row.getBoolean(2)); - }) + .filter(row -> (urls == null || urls.contains(row.getString(0))) + && (includeExperimental || row.isNullAt(2) || !row.getBoolean(2))) .mapToPair(row -> new Tuple2<>(row.getString(0), row.getString(1))) .reduceByKey((leftVersion, rightVersion) -> leftVersion.compareTo(rightVersion) > 0 ? leftVersion : rightVersion) .map(tuple -> new UrlAndVersion(tuple._1, tuple._2)); - return spark.createDataset(changes.rdd(), URL_AND_VERSION_ENCODER) + return this.spark.createDataset(changes.rdd(), URL_AND_VERSION_ENCODER) .collectAsList() .stream() .collect(Collectors.toMap(UrlAndVersion::getUrl, UrlAndVersion::getVersion)); } + /** + * Returns true if the UrlAndVersions of new value sets contains duplicates with the current + * ValueSets. + */ + private boolean hasDuplicateUrlAndVersions(Dataset membersToCheck) { + + return this.members.intersect(membersToCheck).count() > 0; + } + /** * Creates a table of mapping records partitioned by conceptmapuri and * conceptmapversion. @@ -961,6 +660,7 @@ private static void createMappingTable(SparkSession spark, builder.append("CREATE EXTERNAL TABLE IF NOT EXISTS "); } else { + builder.append("CREATE TABLE IF NOT EXISTS "); } @@ -989,63 +689,6 @@ private static void createMappingTable(SparkSession spark, spark.sql(builder.toString()); } - /** - * Creates a table of ancestor records partitioned by conceptMapUri and - * conceptMapVersion. - * - * @param spark the spark session - * @param tableName the name of the ancestors table - * @param location the location to store the table, or null to create a Hive-managed table. - * @throws IllegalArgumentException if the table name or location are malformed. - */ - private static void createAncestorsTable(SparkSession spark, - String tableName, - String location) { - - if (!TABLE_NAME_PATTERN.matcher(tableName).matches()) { - throw new IllegalArgumentException("Invalid table name: " + tableName); - } - - // Hive will check for well-formed paths, so we just ensure - // a user isn't attempting to inject additional SQL into the statement. - if (location != null && location.contains(";")) { - throw new IllegalArgumentException("Invalid path for mapping table: " - + location); - } - - StringBuilder builder = new StringBuilder(); - - if (location != null) { - - builder.append("CREATE EXTERNAL TABLE IF NOT EXISTS "); - - } else { - builder.append("CREATE TABLE IF NOT EXISTS "); - } - - builder.append(tableName); - - // Note the partitioned by columns are deliberately lower case here, - // since Spark does not appear to match columns to - // Hive partitions if they are not. - builder.append("(descendantSystem STRING, " - + "descendantValue STRING, " - + "ancestorSystem STRING, " - + "ancestorValue STRING)\n" - + "PARTITIONED BY (conceptmapuri STRING, conceptmapversion STRING)\n"); - - builder.append("STORED AS PARQUET\n"); - - if (location != null) { - builder.append("LOCATION '") - .append(location) - .append("'"); - } - - spark.sql(builder.toString()); - } - - /** * Writes the updated concept maps to a database using the default "mappings" and "conceptmaps" * table names. @@ -1055,31 +698,9 @@ private static void createAncestorsTable(SparkSession spark, public void writeToDatabase(String database) { writeToTables(database + "." + MAPPING_TABLE, - database + "." + CONCEPT_MAP_TABLE, - database + "." + ANCESTOR_TABLE); - } - - /** - * Returns the concept maps that in our local concept maps, but not - * in the given table. - */ - private Dataset getMissingConceptMaps(String conceptMapTable) { - - Dataset mapsInDatabase = spark.sql("select url, version from " + conceptMapTable) - .alias("in_db"); - - Dataset localConcepts = getUrlAndVersions(conceptMaps).alias("local"); - - return localConcepts.join(mapsInDatabase, - functions.col("in_db.url") - .equalTo(functions.col("local.url")).and( - functions.col("in_db.version") - .equalTo(functions.col("local.version"))), - "leftanti") - .as(URL_AND_VERSION_ENCODER); + database + "." + CONCEPT_MAP_TABLE); } - /** * Writes mapping records to a table. This class ensures the columns and partitions are mapped * properly, and is a workaround similar to the problem described mappings, .insertInto(tableName); } - - /** - * Writes ancestor records to a table. This class ensures the columns and partitions are mapped - * properly, and is a workaround similar to the problem described here. - * - * @param ancestors a dataset of ancestor records - * @param tableName the table to write them to - */ - private static void writeAncestorsToTable(Dataset ancestors, - String tableName) { - - // Note the last two columns here must be the partitioned-by columns - // in order and in lower case for Spark to properly match - // them to the partitions. - Dataset orderedColumnDataset = - ancestors.select("descendantSystem", - "descendantValue", - "ancestorSystem", - "ancestorValue", - "conceptmapuri", - "conceptmapversion"); - - orderedColumnDataset - .write() - .insertInto(tableName); - } - - /** - * Write a dataset to a temporary location and reloads it into the return value. - * - *

This is to workaround Spark's laziness, which can lead to reading and writing - * from the same place causing issues. - */ - private Dataset writeAndReload(Dataset dataset, String tempName) { - - dataset.write().saveAsTable(tempName); - - return spark.sql("select * from " + tempName).as(dataset.exprEnc()); - } - - /** - * Update or insert ancestors by partition. - */ - private void upsertAncestorsByPartition(String ancestorsTable, - Dataset mapsToWrite, - String partitionDefs) { - - // Remove the ancestors table partitions we are replacing. - spark.sql("alter table " + ancestorsTable + " drop if exists partition " + partitionDefs); - - // Get only the ancestors to write and save them. - Dataset ancestorsToWrite = this.ancestors.join(mapsToWrite, - functions.col("conceptmapuri") - .equalTo(functions.col("url")).and( - functions.col("conceptmapversion") - .equalTo(functions.col("version"))), - "leftsemi") - .as(ANCESTOR_ENCODER); - - String tempAncestorsTable = "TEMP_ANCESTORS_TABLE_REMOVEME"; - Dataset tempAncestors = writeAndReload(ancestorsToWrite, tempAncestorsTable); - - // Write the mappings, appending so we don't affect others. - writeAncestorsToTable(tempAncestors, ancestorsTable); - - // Clean up our temporary table since the mappings write operation has finished. - spark.sql("drop table " + tempAncestorsTable); - } - - /** - * Update or insert mappings by partition. - */ - private void upsertMappingsByPartition(String mappingsTable, - Dataset mapsToWrite, - String partitionDefs) { - - // Remove the mappings table partitions we are replacing. - spark.sql("alter table " + mappingsTable + " drop if exists partition " + partitionDefs); - - // Get only mappings to write and save them. - Dataset mappingsToWrite = this.mappings.join(mapsToWrite, - functions.col("conceptmapuri") - .equalTo(functions.col("url")).and( - functions.col("conceptmapversion") - .equalTo(functions.col("version"))), - "leftsemi") - .as(MAPPING_ENCODER); - - // Create a temporary table of mappings to write. This must be done before we - // remove the partitions, since Spark's lazy execution will remove the data we - // are reading and trying to update as well. - String tempMappingsTable = "TEMP_MAPPINGS_TABLE_REMOVEME"; - Dataset tempMappings = writeAndReload(mappingsToWrite, tempMappingsTable); - - // Write the mappings, appending so we don't affect others. - writeMappingsToTable(tempMappings, mappingsTable); - - // Clean up our temporary table since the mappings write operation has finished. - spark.sql("drop table " + tempMappingsTable); - } - - /** - * Update or insert concept maps. - */ - private void upsertConceptMaps(String conceptMapTable, - Dataset mapsToWrite) { - - // Get existing maps that didn't change... - Dataset existingUnchangedMaps = spark.sql( - "select * from " + conceptMapTable) - .alias("maps") - .join(mapsToWrite.alias("to_write"), - functions.col("maps.url") - .equalTo(functions.col("to_write.url")) - .and(functions.col("maps.version") - .equalTo(functions.col("to_write.version"))), - "leftanti") - .as(CONCEPT_MAP_ENCODER); - - // ... and our local maps that did change... - Dataset changedMaps = conceptMaps - .alias("maps") - .join(mapsToWrite.alias("to_write"), - functions.col("maps.url") - .equalTo(functions.col("to_write.url")) - .and(functions.col("maps.version") - .equalTo(functions.col("to_write.version"))), - "leftsemi") - .as(CONCEPT_MAP_ENCODER); - - String tempMapsTableName = "TEMP_MAPS_TABLE_REMOVEME"; - - // Union the items we need to write with existing, unchanged content, and write it. - Dataset unioned = writeAndReload(changedMaps.unionAll(existingUnchangedMaps), - tempMapsTableName); - - unioned.write() - .mode(SaveMode.Overwrite) - .saveAsTable(conceptMapTable); - - spark.sql("drop table " + tempMapsTableName); - } - /** * Writes mappings to the given tables. * @@ -1264,17 +741,14 @@ private void upsertConceptMaps(String conceptMapTable, * * @param mappingsTable name of the table containing the mapping records * @param conceptMapTable name of the table containing the concept map metadata - * @param ancestorsTable name of the table containing transitive ancestors */ - public void writeToTables(String mappingsTable, - String conceptMapTable, - String ancestorsTable) { + public void writeToTables(String mappingsTable, String conceptMapTable) { boolean hasExistingMaps; try { - spark.sql("describe table " + conceptMapTable); + this.spark.sql("describe table " + conceptMapTable); hasExistingMaps = true; @@ -1293,59 +767,37 @@ public void writeToTables(String mappingsTable, } } - if (hasExistingMaps) { - - // Build the collection of concept maps we need to write, which can be: - // 1. All experimental maps in the local session - // 2. Maps that exist locally but not in the target database - - // Concept maps not in the target - Dataset existsLocally = getMissingConceptMaps(conceptMapTable); - - // Concept maps marked as experimental - Dataset experimental = getUrlAndVersions( - conceptMaps.filter(functions.col("experimental") - .equalTo(functions.lit(true)))); - - // Create a union to determine what to write. - Dataset mapsToWrite = existsLocally - .union(experimental) - .distinct(); - - // Get the mappings and ancestors partitions to drop - // as we are replacing them with new content. - String partitionDefs = mapsToWrite.collectAsList().stream() - .map(changedMap -> - new StringBuilder() - .append("(conceptmapuri=\"") - .append(changedMap.url) - .append("\", conceptmapversion=\"") - .append(changedMap.version) - .append("\")").toString()) - .collect(Collectors.joining(", ")); - - // Write mappings that have been changed. - upsertMappingsByPartition(mappingsTable, mapsToWrite, partitionDefs); - - // Write ancestors that have been changed. - upsertAncestorsByPartition(ancestorsTable, mapsToWrite, partitionDefs); - - // Write the FHIR ConceptMaps themselves. - upsertConceptMaps(conceptMapTable, mapsToWrite); - - } else { + if (!hasExistingMaps) { // No target tables exist, so create and write them. The mappings // and ancestors tables are created explicitly to meet our // partitioning system. - createMappingTable(spark, mappingsTable, null); - writeMappingsToTable(mappings, mappingsTable); + createMappingTable(this.spark, mappingsTable, null); + + // Create a concept map table by writing empty data having the proper schema and properties + this.spark.emptyDataset(CONCEPT_MAP_ENCODER) + .withColumn("timestamp", lit(null).cast("timestamp")) + .write() + .format("parquet") + .partitionBy("timestamp") + .saveAsTable(conceptMapTable); + } - createAncestorsTable(spark, ancestorsTable, null); - writeAncestorsToTable(ancestors, ancestorsTable); + Dataset currentMembers = this.spark + .sql("SELECT url, version FROM " + conceptMapTable) + .distinct() + .as(URL_AND_VERSION_ENCODER); + + if (hasDuplicateUrlAndVersions(currentMembers)) { - // The concept maps table itself is not partitioned, so simply save it. - conceptMaps.write().saveAsTable(conceptMapTable); + throw new IllegalArgumentException("The given concept maps contains duplicates url and " + + "versions against concept maps already stored in the table, " + conceptMapTable); } + + writeMappingsToTable(this.mappings, mappingsTable); + + this.conceptMaps.write() + .mode(SaveMode.ErrorIfExists) + .insertInto(conceptMapTable); } } diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Hierarchies.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Hierarchies.java new file mode 100644 index 00000000..59bfcd24 --- /dev/null +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Hierarchies.java @@ -0,0 +1,676 @@ +package com.cerner.bunsen.mappings; + +import static org.apache.spark.sql.functions.col; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.BiFunction; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoder; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import scala.Tuple2; + +/** + * An immutable collection of hierarchical systems. This class is used to import ancestor content + * from hierarchical systems, explore it, and persist it to a database. + */ +public class Hierarchies { + + /** + * An encoder for serializing ancestors. + */ + private static final Encoder ANCESTOR_ENCODER = Encoders.bean(Ancestor.class); + + private static final Encoder URI_AND_VERSION_ENCODER = + Encoders.bean(UrlAndVersion.class); + + private static final Encoder HIERARCHICAL_ELEMENT_ENCODER = + Encoders.bean(HierarchicalElement.class); + + /** + * The number of records to put in a slice of expanded ancestors. This just needs to be small + * enough to fit in a reasonable amount of memory when converting to a Dataset. + */ + private static final Long ANCESTORS_SLICE_SIZE = 100000L; + + private static final Pattern TABLE_NAME_PATTERN = + Pattern.compile("[A-Za-z][A-Za-z0-9_]*\\.?[A-Za-z0-9_]*"); + + /** + * Returns the encoder for UrlAndVersion tuples. + * + * @return the encoder for UrlAndVersion tuples. + */ + public static Encoder getUriAndVersionEncoder() { + return URI_AND_VERSION_ENCODER; + } + + /** + * Returns the encoder for hierarchical elements. + * + * @return the encoder for hierarchical elements. + */ + public static Encoder getHierarchicalElementEncoder() { + return HIERARCHICAL_ELEMENT_ENCODER; + } + + /** + * Default database name where the ancestor information is stored. + */ + public static final String HIERARCHIES_DATABASE = "ontologies"; + + /** + * Default table name where expanded ancestor information is stored. + */ + public static final String ANCESTORS_TABLE = "ancestors"; + + /** + * A URI prefix for hierarchical systems. A Hierarchy URI is "complete" when an identifier + * suffix for the hierarchy is appended to this value. + */ + public static final String HIERARCHY_URI_PREFIX = "urn:com:cerner:bunsen:hierarchy:"; + + private final SparkSession spark; + + private final Dataset members; + + private final Dataset ancestors; + + private Hierarchies(SparkSession spark, + Dataset members, + Dataset ancestors) { + + this.spark = spark; + this.members = members; + this.ancestors = ancestors; + } + + /** + * Returns the collection of ancestors from the default database and table. + * + * @param spark the spark session + * @return hierarchies instance. + */ + public static Hierarchies getDefault(SparkSession spark) { + + return getFromDatabase(spark, HIERARCHIES_DATABASE); + } + + /** + * Returns the collection of ancestors from the table in the given database. + * + * @param spark the spark session + * @param database name of the database containing the ancestors table + * @return a Hierarchies instance. + */ + public static Hierarchies getFromDatabase(SparkSession spark, String database) { + + Dataset ancestors = spark.sql("SELECT * FROM " + database + "." + ANCESTORS_TABLE) + .as(ANCESTOR_ENCODER); + + Dataset members = ancestors.filter((FilterFunction) ancestor -> + ancestor.getUri().startsWith(HIERARCHY_URI_PREFIX)) + .select(col("uri").alias("url"), col("version")) + .distinct() + .as(URI_AND_VERSION_ENCODER); + + return new Hierarchies(spark, + members, + ancestors); + } + + /** + * Returns an empty Hierarchies instance. + * + * @param spark the spark session + * @return an empty Hierarchies instance. + */ + public static Hierarchies getEmpty(SparkSession spark) { + + return new Hierarchies(spark, + spark.emptyDataset(URI_AND_VERSION_ENCODER), + spark.emptyDataset(ANCESTOR_ENCODER)); + } + + /** + * Returns a dataset of all ancestors in this collection. This is generally used for inspection + * and debugging of ancestors. + * + * @return a dataset of all ancestors. + */ + public Dataset getAncestors() { + return this.ancestors; + } + + /** + * Returns a dataset of UrlAndVersion members of this collection. + * + * @return a dataset of UrlAndVersion members in this collection. + */ + public Dataset getMembers() { + return this.members; + } + + /** + * Returns the latest version of all hierarchies. + * + * @return a map of hierarchy URI to the latest version for that hierarchy. + */ + public Map getLatestVersions() { + + return getLatestVersions(null); + } + + /** + * Returns latest versions of the given hierarchies. + * + * @param uris a set of URIs for which to retrieve the latest versions, or null to load them all + * @return a map of value set URIs to the latest versions for them. + */ + public Map getLatestVersions(final Set uris) { + + JavaRDD members = this.members.toJavaRDD() + .filter(uriAndVersion -> (uris == null || uris.contains(uriAndVersion.getUrl()))) + .mapToPair(uriAndVersion -> + new Tuple2<>(uriAndVersion.getUrl(), uriAndVersion.getVersion())) + .reduceByKey((leftVersion, rightVersion) -> + leftVersion.compareTo(rightVersion) > 0 ? leftVersion : rightVersion) + .map(tuple -> new UrlAndVersion(tuple._1, tuple._2)); + + return spark.createDataset(members.rdd(), URI_AND_VERSION_ENCODER) + .collectAsList() + .stream() + .collect(Collectors.toMap(UrlAndVersion::getUrl, + UrlAndVersion::getVersion)); + } + + /** + * Returns a new hierarchies instance with the transitive ancestors computed from the given + * dataset of {@link HierarchicalElement}. + * + * @param hierarchyUri the URI of the hierarchical system to add + * @param hierarchyVersion the version of the hierarchical system to add + * @param elements the elements from which to calculate the ancestors + * @return an instance of Hierarchies with the ancestors computed from the given elements + */ + public Hierarchies withHierarchyElements(String hierarchyUri, + String hierarchyVersion, + Dataset elements) { + + Dataset newAncestors = expandElements(hierarchyUri, hierarchyVersion, elements); + + Dataset newMembers = newAncestors.select(col("uri").alias("url"), col("version")) + .distinct() + .as(URI_AND_VERSION_ENCODER); + + if (hasDuplicateUriAndVersions(newMembers)) { + + throw new IllegalArgumentException( + "Cannot add elements having duplicate hierarchyUri and hierarchyVersion"); + } + + return new Hierarchies(this.spark, + this.members.union(newMembers), + this.ancestors.union(newAncestors)); + } + + /** + * Returns a new hierarchies instance with the given hierarchies. + * + * @param hierarchies the hierarchies to add to this instance + * @return a new instance of Hierarchies. + */ + public Hierarchies withHierarchies(Hierarchies hierarchies) { + + Dataset newAncestors = hierarchies.getAncestors(); + + Dataset newMembers = hierarchies.getMembers(); + + if (hasDuplicateUriAndVersions(newMembers)) { + + throw new IllegalArgumentException( + "Cannot add hierarchies having duplicate uri and version"); + } + + return new Hierarchies(this.spark, + this.members.union(newMembers), + this.ancestors.union(newAncestors)); + } + + /** + * A single system,value tuple and its parents. Additional connection + * types beyond parents may be added as necessary. + */ + private static class ConceptNode implements Serializable { + + String system; + String value; + + /** + * The set of parents. This purposefully relies on the Java + * default equality semantics, since we only use it internally + * and it is an efficient way to check for the direct parent + * of a record. + */ + Set parents; + + ConceptNode(String system, String value) { + + this.system = system; + this.value = value; + this.parents = new HashSet<>(); + } + + /** + * Returns the node's ancestors. + */ + Set getAncestors() { + + Set output = new HashSet<>(); + + getAncestors(output); + + // The current node is included so we can check for cycles, + // but it should not produce an ancestor record, so remove it. + output.remove(this); + + return output; + } + + private void getAncestors(Set visited) { + + // Some input data can contain cycles, so we must explicitly check for that. + if (!visited.contains(this)) { + + visited.add(this); + + for (ConceptNode parent: parents) { + + parent.getAncestors(visited); + } + } + } + } + + /** + * Calculates the transitive closure of ancestor values given the dataset of hierarchical + * elements. + */ + private Dataset expandElements(String hierarchyUri, + String hierarchyVersion, + Dataset elements) { + + // Map used to find previously created concept nodes so we can use them to build a graph + final Map> conceptNodes = new HashMap<>(); + + // List of all nodes for simpler iteration + final List allNodes = new ArrayList<>(); + + // Helper function to get or add a node to our collection of nodes + BiFunction getOrAddNode = (system, value) -> { + + Map systemMap = conceptNodes.get(system); + + if (systemMap == null) { + + systemMap = new HashMap<>(); + + conceptNodes.put(system, systemMap); + } + + ConceptNode node = systemMap.get(value); + + if (node == null) { + + node = new ConceptNode(system, value); + systemMap.put(value, node); + allNodes.add(node); + + } + + return node; + }; + + // Build our graph of nodes + for (HierarchicalElement element: elements.collectAsList()) { + + ConceptNode node = getOrAddNode.apply(element.getDescendantSystem(), + element.getDescendantValue()); + + ConceptNode parent = getOrAddNode.apply(element.getAncestorSystem(), + element.getAncestorValue()); + + node.parents.add(parent); + } + + // The graph is built, now translate it into ancestors + List ancestors = allNodes.stream() + .flatMap(node -> + node.getAncestors() + .stream() + .map(ancestorNode -> + new Ancestor(hierarchyUri, + hierarchyVersion, + node.system, + node.value, + ancestorNode.system, + ancestorNode.value))) + .collect(Collectors.toList()); + + // We convert into a sliced RDD, then to a dataset, so we can specify a slice size and prevent + // Spark from attempting to copy everything at once for very large expansions. + int slices = (int) (ancestors.size() / ANCESTORS_SLICE_SIZE); + + if (slices > 1) { + + JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD rdd = jsc.parallelize(ancestors, slices); + + return spark.createDataset(rdd.rdd(), ANCESTOR_ENCODER); + + } else { + + return spark.createDataset(ancestors, ANCESTOR_ENCODER); + } + } + + private boolean hasDuplicateUriAndVersions(Dataset membersToCheck) { + + return this.members.intersect(membersToCheck).count() > 0; + } + + /** + * Writes the ancestors to the default database "ontologies" using the default table "ancestors". + */ + public void writeToDatabase() { + + writeToDatabase(HIERARCHIES_DATABASE); + } + + /** + * Writes the ancestors to the given database using the default table name "ancestors". + * + * @param database the name of the database to which the ancestors are saved + */ + public void writeToDatabase(String database) { + + writeToTables(database + "." + ANCESTORS_TABLE); + } + + /** + * Writes the ancestors to the given table. + * + *

Warning: these updates are likely not atomic due to the lack of transactional + * semantics in the underlying data store. Concurrent users may see previous items + * removed before new ones are added, or items appear separately than others. This is intended + * for use in a user-specific sandbox or staging environment. + * + * @param ancestorsTable the name of the table to which the ancestors are saved + */ + public void writeToTables(String ancestorsTable) { + + boolean hasExistingAncestors; + + try { + + spark.sql("DESCRIBE TABLE " + ancestorsTable); + + hasExistingAncestors = true; + + } catch (Exception describeException) { + + if (describeException instanceof NoSuchTableException) { + + hasExistingAncestors = false; + + } else { + + throw new RuntimeException(describeException); + } + } + + if (!hasExistingAncestors) { + + createAncestorsTable(spark, ancestorsTable, null); + } + + Dataset currentMembers = this.spark.table(ancestorsTable) + .select(col("uri").alias("url"), col("version")) + .distinct() + .as(URI_AND_VERSION_ENCODER); + + if (hasDuplicateUriAndVersions(currentMembers)) { + + throw new IllegalArgumentException("The given hierarchies contains duplicate uri and " + + "versions against ancestors already stored in the table, " + ancestorsTable); + } + + writeAncestorsToTable(this.ancestors, ancestorsTable); + } + + /** + * Creates a table of ancestor records partitioned by uri and version. + * + * @param spark the spark session + * @param tableName the name of the ancestors table + * @param location the location to store the table, or null to create a Hive-managed table + * @throws IllegalArgumentException if the table name or location are malformed + */ + private static void createAncestorsTable(SparkSession spark, String tableName, String location) { + + if (!TABLE_NAME_PATTERN.matcher(tableName).matches()) { + throw new IllegalArgumentException("Invalid table name: " + tableName); + } + + // Hive will check for well-formed paths, so we just ensure a user isn't attempting to inject + // additional SQL into the statement + if (location != null && location.contains(";")) { + throw new IllegalArgumentException("Invalid path for values table: " + location); + } + + StringBuilder builder = new StringBuilder(); + + if (location != null) { + + builder.append("CREATE EXTERNAL TABLE IF NOT EXISTS "); + + } else { + + builder.append("CREATE TABLE IF NOT EXISTS "); + } + + builder.append(tableName); + + // Note the partitioned by columns are deliberately lower case here since Spark does not appear + // to match columns to Hive partitions if they are not + builder.append("(descendantSystem STRING, " + + "descendantValue STRING, " + + "ancestorSystem STRING," + + "ancestorValue String)\n" + + "PARTITIONED BY (uri STRING, version STRING)\n"); + + builder.append("STORED AS PARQUET\n"); + + if (location != null) { + builder.append("LOCATION '") + .append(location) + .append("'"); + } + + spark.sql(builder.toString()); + } + + /** + * Writes ancestor records to a table. This class ensures the columns and partitions are mapped + * properly, and is a workaround similar to the problem described here. + * + * @param ancestors a dataset of ancestor records + * @param tableName the table to write them to + */ + private static void writeAncestorsToTable(Dataset ancestors, String tableName) { + + Dataset orderedColumnDataset = ancestors.select("descendantSystem", + "descendantValue", + "ancestorSystem", + "ancestorValue", + "uri", + "version"); + + orderedColumnDataset.write() + .mode(SaveMode.ErrorIfExists) + .insertInto(tableName); + } + + /** + * A JavaBean to represent an element in a hierarchical system from which transitive ancestors + * can be computed. This class is mutable for easy use as a Spark + * {@code Dataset} instance. + */ + public static class HierarchicalElement implements Serializable { + + private String ancestorSystem; + + private String ancestorValue; + + private String descendantSystem; + + private String descendantValue; + + /** + * Nullary constructor so Spark can encode this class as a bean. + */ + public HierarchicalElement() { + } + + /** + * Constructs a {@link HierarchicalElement} instance. + * + * @param ancestorSystem the ancestor system + * @param ancestorValue the ancestor value + * @param descendantSystem the descendant system + * @param descendantValue the descendant value + */ + public HierarchicalElement(String ancestorSystem, + String ancestorValue, + String descendantSystem, + String descendantValue) { + this.ancestorSystem = ancestorSystem; + this.ancestorValue = ancestorValue; + this.descendantSystem = descendantSystem; + this.descendantValue = descendantValue; + } + + /** + * Returns the ancestor system. + * + * @return the ancestor system. + */ + public String getAncestorSystem() { + return ancestorSystem; + } + + /** + * Sets the ancestor system. + * + * @param ancestorSystem the ancestor system + */ + public void setAncestorSystem(String ancestorSystem) { + this.ancestorSystem = ancestorSystem; + } + + /** + * Returns the ancestor value. + * + * @return the ancestor value. + */ + public String getAncestorValue() { + return ancestorValue; + } + + /** + * Sets the ancestor value. + * + * @param ancestorValue the ancestor value + */ + public void setAncestorValue(String ancestorValue) { + this.ancestorValue = ancestorValue; + } + + /** + * Returns the descendant system. + * + * @return the descendant system. + */ + public String getDescendantSystem() { + return descendantSystem; + } + + /** + * Sets the descendant system. + * + * @param descendantSystem the descendant system + */ + public void setDescendantSystem(String descendantSystem) { + this.descendantSystem = descendantSystem; + } + + /** + * Returns the descendant value. + * + * @return the descendant value. + */ + public String getDescendantValue() { + return descendantValue; + } + + /** + * Sets the descendant value. + * + * @param descendantValue the descendant value + */ + public void setDescendantValue(String descendantValue) { + this.descendantValue = descendantValue; + } + + @Override + public boolean equals(Object obj) { + + if (!(obj instanceof HierarchicalElement)) { + return false; + } + + HierarchicalElement that = (HierarchicalElement) obj; + + return Objects.equals(this.ancestorSystem, that.ancestorSystem) + && Objects.equals(this.ancestorValue, that.ancestorValue) + && Objects.equals(this.descendantSystem, that.descendantSystem) + && Objects.equals(this.descendantValue, that.descendantValue); + } + + @Override + public int hashCode() { + return 37 + * Objects.hashCode(this.ancestorSystem) + * Objects.hashCode(this.ancestorValue) + * Objects.hashCode(this.descendantSystem) + * Objects.hashCode(this.descendantValue); + } + } +} diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Mapping.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Mapping.java index 8e072a4b..017c39c2 100644 --- a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Mapping.java +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Mapping.java @@ -46,10 +46,49 @@ public class Mapping { private String equivalence = EQUIVALENT; + /** + * Nullary constructor so Spark can encode this class as a bean. + */ + public Mapping() { + } + + /** + * Constructs a {@link Mapping} instance. + * + * @param conceptMapUri the URI for the FHIR concept map that owns this mapping + * @param conceptMapVersion the version of the FHIR concept map that owns this mapping + * @param sourceValueSet the valueset for all source values in this mapping + * @param targetValueSet the valueset for all target values in this mapping + * @param sourceSystem the code system for the source code + * @param sourceValue the code value for the source code + * @param targetSystem the code system for the target code + * @param targetValue the code value for the target code + * @param equivalence the FHIR equivalence type + */ + public Mapping(String conceptMapUri, + String conceptMapVersion, + String sourceValueSet, + String targetValueSet, + String sourceSystem, + String sourceValue, + String targetSystem, + String targetValue, + String equivalence) { + this.conceptMapUri = conceptMapUri; + this.conceptMapVersion = conceptMapVersion; + this.sourceValueSet = sourceValueSet; + this.targetValueSet = targetValueSet; + this.sourceSystem = sourceSystem; + this.sourceValue = sourceValue; + this.targetSystem = targetSystem; + this.targetValue = targetValue; + this.equivalence = equivalence; + } + /** * Returns the URI for the FHIR concept map that owns this mapping. * - * @return the URI for the FHIR concept map that owns this mapping + * @return the URI for the FHIR concept map that owns this mapping. */ public String getConceptMapUri() { return conceptMapUri; @@ -67,7 +106,7 @@ public void setConceptMapUri(String conceptMapUri) { /** * Returns the version of the FHIR concept map that owns this mapping. * - * @return the version of the FHIR concept map that owns this mapping + * @return the version of the FHIR concept map that owns this mapping. */ public String getConceptMapVersion() { return conceptMapVersion; @@ -85,7 +124,7 @@ public void setConceptMapVersion(String conceptMapVersion) { /** * Returns the valueset for all source values in this mapping. * - * @return the valueset for all source values in this mapping + * @return the valueset for all source values in this mapping. */ public String getSourceValueSet() { return sourceValueSet; @@ -103,7 +142,7 @@ public void setSourceValueSet(String sourceValueSet) { /** * Returns the valueset for all target values in this mapping. * - * @return the valueset for all target values in this mapping + * @return the valueset for all target values in this mapping. */ public String getTargetValueSet() { return targetValueSet; @@ -121,7 +160,7 @@ public void setTargetValueSet(String targetValueSet) { /** * Returns the code system for the source code. * - * @return the code system for the source code + * @return the code system for the source code. */ public String getSourceSystem() { return sourceSystem; @@ -157,7 +196,7 @@ public void setSourceValue(String sourceValue) { /** * Returns the code system for the target code. * - * @return the code system for the target code + * @return the code system for the target code. */ public String getTargetSystem() { return targetSystem; @@ -175,7 +214,7 @@ public void setTargetSystem(String targetSystem) { /** * Returns the code value for the target code. * - * @return the code value for the target code + * @return the code value for the target code. */ public String getTargetValue() { return targetValue; @@ -195,7 +234,7 @@ public void setTargetValue(String targetValue) { * Returns the equivalence for the mapping. Defaults to "equivalent" if not * otherwise set. * - * @return the FHIR equivalence type + * @return the FHIR equivalence type. * @see FHIR valueset * concept map equivalence */ diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/UrlAndVersion.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/UrlAndVersion.java new file mode 100644 index 00000000..cfcc7af0 --- /dev/null +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/UrlAndVersion.java @@ -0,0 +1,56 @@ +package com.cerner.bunsen.mappings; + +/** + * URI and version tuple used to uniquely identify a concept map, value set, or hierarchy. + */ +public class UrlAndVersion { + + private String url; + + private String version; + + public UrlAndVersion(String url, String version) { + this.url = url; + this.version = version; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getVersion() { + return version; + } + + public void setVersion(String version) { + this.version = version; + } + + /** + * Nullary constructor for use in Spark data sets. + */ + public UrlAndVersion() { + } + + @Override + public int hashCode() { + + return 17 * url.hashCode() * version.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof UrlAndVersion)) { + return false; + } + + UrlAndVersion that = (UrlAndVersion) obj; + + return this.url.equals(that.url) + && this.version.equals(that.version); + } +} diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Value.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Value.java new file mode 100644 index 00000000..68c3644f --- /dev/null +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/Value.java @@ -0,0 +1,164 @@ +package com.cerner.bunsen.mappings; + +import java.io.Serializable; +import java.util.Objects; + +/** + * A JavaBean to represent a value row of a value set. This class is mutable for easy use as a + * Spark {@code Dataset} instance. + */ +public class Value implements Serializable { + + private String valueSetUri; + + private String valueSetVersion; + + private String system; + + private String version; + + private String value; + + /** + * Nullary constructor so Spark can encode this class as a bean. + */ + public Value() { + } + + /** + * Constructs a {@link Value} instance. + * + * @param valueSetUri the value set uri that owns this value + * @param valueSetVersion the value set version that owns this value + * @param system the code system that owns this value + * @param version the version of the code system that owns this value + * @param value the value + */ + public Value(String valueSetUri, + String valueSetVersion, + String system, + String version, + String value) { + this.valueSetUri = valueSetUri; + this.valueSetVersion = valueSetVersion; + this.system = system; + this.version = version; + this.value = value; + } + + /** + * Returns the URI for this FHIR value set that owns this value. + * + * @return the URI for this FHIR value set that owns this value. + */ + public String getValueSetUri() { + return valueSetUri; + } + + /** + * Sets the URI for this FHIR value set that owns this value. + * + * @param valueSetUri the URI for this FHIR value set that owns this value + */ + public void setValueSetUri(String valueSetUri) { + this.valueSetUri = valueSetUri; + } + + /** + * Returns the version for this FHIR value set that owns this value. + * + * @return the version for this FHIR value set that owns this value. + */ + public String getValueSetVersion() { + return valueSetVersion; + } + + /** + * Sets the version for this FHIR value set that owns this value. + * + * @param valueSetVersion the version for this FHIR value set that owns this value + */ + public void setValueSetVersion(String valueSetVersion) { + this.valueSetVersion = valueSetVersion; + } + + /** + * Returns the code system that owns this value. + * + * @return the code system that owns this value. + */ + public String getSystem() { + return system; + } + + /** + * Sets the code system that owns this value. + * + * @param system the code system that owns this value + */ + public void setSystem(String system) { + this.system = system; + } + + /** + * Returns the version of the code system that owns this value. + * + * @return the version of the code system that owns this value. + */ + public String getVersion() { + return version; + } + + /** + * Sets the version of the code system that owns this value. + * + * @param version the version of the code system that owns this value + */ + public void setVersion(String version) { + this.version = version; + } + + /** + * Returns the value. + * + * @return the value. + */ + public String getValue() { + return value; + } + + /** + * Sets the value. + * + * @param value the value + */ + public void setValue(String value) { + this.value = value; + } + + @Override + public boolean equals(Object obj) { + + if (!(obj instanceof Value)) { + return false; + } + + Value that = (Value) obj; + + return Objects.equals(this.valueSetUri, that.valueSetUri) + && Objects.equals(this.valueSetVersion, that.valueSetVersion) + && Objects.equals(this.system, that.system) + && Objects.equals(this.version, that.version) + && Objects.equals(this.value, that.value); + } + + @Override + public int hashCode() { + return 37 + * Objects.hashCode(this.valueSetUri) + * Objects.hashCode(this.valueSetVersion) + * Objects.hashCode(this.system) + * Objects.hashCode(this.version) + * Objects.hashCode(this.value); + } +} diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/ValueSets.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/ValueSets.java new file mode 100644 index 00000000..361f029a --- /dev/null +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/ValueSets.java @@ -0,0 +1,743 @@ +package com.cerner.bunsen.mappings; + +import static org.apache.spark.sql.functions.col; +import static org.apache.spark.sql.functions.collect_list; +import static org.apache.spark.sql.functions.lit; +import static org.apache.spark.sql.functions.struct; + +import ca.uhn.fhir.context.FhirContext; +import ca.uhn.fhir.parser.IParser; +import com.cerner.bunsen.FhirEncoders; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.broadcast.Broadcast; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Encoder; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.hl7.fhir.dstu3.model.ValueSet; +import org.hl7.fhir.dstu3.model.ValueSet.ConceptReferenceComponent; +import org.hl7.fhir.dstu3.model.ValueSet.ConceptSetComponent; +import org.hl7.fhir.dstu3.model.ValueSet.ValueSetComposeComponent; +import scala.Tuple2; + +/** + * An immutable collection of FHIR ValueSets. This class is used to import value set content, + * explore it, and persist it to a database. + */ +public class ValueSets { + + private static final FhirContext FHIR_CONTEXT = FhirContext.forDstu3(); + + private static final IParser PARSER = FHIR_CONTEXT.newXmlParser(); + + /** + * An encoder for serializing values. + */ + private static final Encoder VALUE_ENCODER = Encoders.bean(Value.class); + + private static final Encoder VALUE_SET_ENCODER = FhirEncoders.forStu3() + .getOrCreate() + .of(ValueSet.class); + + private static final Encoder URL_AND_VERSION_ENCODER = + Encoders.bean(UrlAndVersion.class); + + private static final Pattern TABLE_NAME_PATTERN = + Pattern.compile("[A-Za-z][A-Za-z0-9_]*\\.?[A-Za-z0-9_]*"); + + /** + * Returns the encoder for values. + * + * @return the encoder for values. + */ + public static Encoder getValueEncoder() { + return VALUE_ENCODER; + } + + /** + * Returns the encoder for value sets. + * + * @return the encoder for value sets. + */ + public static Encoder getValueSetEncoder() { + return VALUE_SET_ENCODER; + } + + /** + * Returns the encoder for UrlAndVersion tuples. + * + * @return the encoder for UrlAndVersion tuples. + */ + public static Encoder getUrlAndVersionEncoder() { + return URL_AND_VERSION_ENCODER; + } + + /** + * Default database name where the value sets information is stored. + */ + public static final String VALUE_SETS_DATABASE = "ontologies"; + + /** + * Default table name where the expanded values information is stored. + */ + public static final String VALUES_TABLE = "values"; + + /** + * Default table name where value sets metadata is stored. + */ + public static final String VALUE_SETS_TABLE = "valuesets"; + + private final SparkSession spark; + + /** + * URI and Version metadata used to preserve uniqueness among value sets. + */ + private final Dataset members; + + private final Dataset valueSets; + + private final Dataset values; + + private ValueSets(SparkSession spark, + Dataset members, + Dataset valueSets, + Dataset values) { + + this.spark = spark; + this.members = members; + this.valueSets = valueSets; + this.values = values; + } + + /** + * Returns the collection of value sets from the default database and tables. + * + * @param spark the spark session + * @return a ValueSets instance. + */ + public static ValueSets getDefault(SparkSession spark) { + + return getFromDatabase(spark, VALUE_SETS_DATABASE); + } + + /** + * Returns the collection of value sets from the tables in the given database. + * + * @param spark the spark session + * @param databaseName name of the database containing the value sets and values tables + * @return a ValueSets instance. + */ + public static ValueSets getFromDatabase(SparkSession spark, String databaseName) { + + Dataset values = spark.table(databaseName + "." + VALUES_TABLE).as(VALUE_ENCODER); + + Dataset valueSets = spark.table(databaseName + "." + VALUE_SETS_TABLE) + .as(VALUE_SET_ENCODER); + + Dataset members = valueSets.select("url", "version").as(URL_AND_VERSION_ENCODER); + + return new ValueSets(spark, + members, + valueSets, + values); + } + + /** + * Returns an empty ValueSets instance. + * + * @param spark the spark session + * @return an empty ValueSets instance. + */ + public static ValueSets getEmpty(SparkSession spark) { + + Dataset emptyValueSets = spark.emptyDataset(VALUE_SET_ENCODER) + .withColumn("timestamp", lit(null).cast("timestamp")) + .as(VALUE_SET_ENCODER); + + return new ValueSets(spark, + spark.emptyDataset(URL_AND_VERSION_ENCODER), + emptyValueSets, + spark.emptyDataset(VALUE_ENCODER)); + } + + /** + * Returns a dataset of all values in this collection. This is generally used for inspection and + * debugging of values. + * + * @return a dataset of all values. + */ + public Dataset getValues() { + return this.values; + } + + /** + * Returns the values for the given URI and version. + * + * @param uri the uri of the value set for which we get values + * @param version the version of the value set for which we get values + * @return a dataset of values for the given URI and version. + */ + public Dataset getValues(String uri, String version) { + + return this.values.where(col("valueseturi").equalTo(lit(uri)) + .and(col("valuesetversion").equalTo(lit(version)))); + } + + /** + * Returns a dataset with the values for each element in the map of uri to version. + * + * @param uriToVersion a map of value set URI to the version to load + * @return a dataset of values for the given URIs and versions. + */ + public Dataset getValues(Map uriToVersion) { + + JavaSparkContext context = new JavaSparkContext(this.spark.sparkContext()); + + Broadcast> broadcastUrisToVersion = context.broadcast(uriToVersion); + + return this.values.filter((FilterFunction) value -> { + + String latestVersion = broadcastUrisToVersion.getValue().get(value.getValueSetUri()); + + return latestVersion != null && latestVersion.equals(value.getValueSetVersion()); + }); + } + + /** + * Returns the latest version of all value sets. + * + * @param includeExperimental whether to include value sets marked as experimental + * @return a map of value set URIs to the latest version for them. + */ + public Map getLatestVersions(boolean includeExperimental) { + + return getLatestVersions(null, includeExperimental); + } + + /** + * Returns the latest versions of a given set of value sets. + * + * @param uris a set of URIs for which to retrieve the latest versions, or null to load them all + * @param includeExperimental whether to include value sets marked as experimental + * @return a map of value set URIs to the latest versions for them. + */ + public Map getLatestVersions(final Set uris, boolean includeExperimental) { + + // Reduce by the concept map URI to return only the latest version + // per concept map. Spark's provided max aggregation function + // only works on numeric types, so we jump into RDDs and perform + // the reduce by hand. + JavaRDD members = this.valueSets.select("url", "version", "experimental") + .toJavaRDD() + .filter(row -> (uris == null || uris.contains(row.getString(0))) + && (includeExperimental || row.isNullAt(2) || !row.getBoolean(2))) + .mapToPair(row -> new Tuple2<>(row.getString(0), row.getString(1))) + .reduceByKey((leftVersion, rightVersion) -> + leftVersion.compareTo(rightVersion) > 0 ? leftVersion : rightVersion) + .map(tuple -> new UrlAndVersion(tuple._1, tuple._2)); + + return spark.createDataset(members.rdd(), URL_AND_VERSION_ENCODER) + .collectAsList() + .stream() + .collect(Collectors.toMap(UrlAndVersion::getUrl, + UrlAndVersion::getVersion)); + } + + /** + * Returns a dataset with the latest values for each valueset of the given uris. + * + * @param uris URIs for the value sets + * @param includeExperimental whether to include value sets marked as experimental + * @return a dataset of the latest mappings for them. + */ + public Dataset getLatestValues(Set uris, boolean includeExperimental) { + + // Since mappings are partitioned by URL and version, in most cases it is more efficient to load + // separately for each partition and union the results. + Map latestVersions = getLatestVersions(uris, includeExperimental); + + return getValues(latestVersions); + } + + /** + * Returns the value set with the given uri and version, or null if there is no such value set. + * + * @param uri the uri of the value set to return + * @param version the version of the value set to return + * @return the specified value set. + */ + public ValueSet getValueSet(String uri, String version) { + + // Load the value sets, which may contain zero items if the value set does not exist + + // Typecast necessary to placate the Java compiler calling this Scala function + ValueSet[] valueSets = (ValueSet[]) this.valueSets.filter( + col("url").equalTo(lit(uri)) + .and(col("version").equalTo(lit(version)))) + .head(1); + + if (valueSets.length == 0) { + + return null; + + } else { + + ValueSet valueSet = valueSets[0]; + + Dataset filteredValues = getValues(uri, version); + + addToValueSet(valueSet, filteredValues); + + return valueSet; + } + } + + /** + * Returns a dataset of value sets to inspect metadata. Since the value sets themselves can be + * quite large, the values in this dataset do not contain them. Instead, users should use the + * {@link #getValues()} method to query values in depth. + * + * @return a dataset of value sets that do not contain concept values. + */ + public Dataset getValueSets() { + return this.valueSets; + } + + /** + * Reads all value sets from a given directory and adds them to our collection. The directory may + * be anything readable from a Spark path, including local filesystems, HDFS, S3, or others. + * + * @param path a path from which value sets will be loaded + * @return an instance of ValueSets that includes the contents from that directory. + */ + public ValueSets withValueSetsFromDirectory(String path) { + + return withValueSets(valueSetDatasetFromDirectory(path)); + } + + /** + * Returns a dataset of ValueSet from the content stored at the given directory. + */ + private Dataset valueSetDatasetFromDirectory(String path) { + + + JavaRDD> fileNamesAndContents = this.spark.sparkContext() + .wholeTextFiles(path, 1) + .toJavaRDD(); + + return this.spark.createDataset(fileNamesAndContents + .map(tuple -> (ValueSet) PARSER.parseResource(tuple._2)) + .rdd(), VALUE_SET_ENCODER); + } + + /** + * Returns all value sets that are disjoint with value sets stored in the default database and + * adds them to our collection. The directory may be anything readable from a Spark path, + * including local filesystems, HDFS, S3, or others. + * + * @param path a path from which disjoint value sets will be loaded + * @return an instance of ValueSets that includes content from that directory that is disjoint + * with content already contained in the default database. + */ + public ValueSets withDisjointValueSetsFromDirectory(String path) { + + return withDisjointValueSetsFromDirectory(path, VALUE_SETS_DATABASE); + } + + /** + * Returns all value sets that are disjoint with value sets stored in the given database and + * adds them to our collection. The directory may be anything readable from a Spark path, + * including local filesystems, HDFS, S3, or others. + * + * @param path a path from which disjoint value sets will be loaded + * @param database the database to check value sets against + * @return an instance of ValueSets that includes content from that directory that is disjoint + * with content already contained in the given database. + */ + public ValueSets withDisjointValueSetsFromDirectory(String path, String database) { + + Dataset currentMembers = this.spark.table(database + "." + VALUE_SETS_TABLE) + .select("url", "version") + .distinct() + .as(URL_AND_VERSION_ENCODER) + .alias("current"); + + Dataset valueSets = valueSetDatasetFromDirectory(path) + .alias("new") + .join(currentMembers, col("new.url").equalTo(col("current.url")) + .and(col("new.version").equalTo(col("current.version"))), + "leftanti") + .as(VALUE_SET_ENCODER); + + return withValueSets(valueSets); + } + + /** + * Returns a new ValueSets instance that includes the given value sets. + * + * @param valueSets the value sets to add to the returned collection. + * @return a new ValueSets instance with the added value sets. + */ + public ValueSets withValueSets(ValueSet... valueSets) { + + return withValueSets(Arrays.asList(valueSets)); + } + + public ValueSets withValueSets(List valueSets) { + + return withValueSets(this.spark.createDataset(valueSets, VALUE_SET_ENCODER)); + } + + /** + * Returns a new ValueSets instance that includes the given value sets. + * + * @param valueSets the value sets to add to the returned collection. + * @return a new ValueSets instance with the added value sets. + */ + public ValueSets withValueSets(Dataset valueSets) { + + Dataset newMembers = getUrlAndVersions(valueSets); + + // Ensure that there are no duplicates among the value sets + if (hasDuplicateUrlAndVersions(newMembers) || valueSets.count() != newMembers.count()) { + + throw new IllegalArgumentException( + "Cannot add value sets having duplicate valueSetUri and valueSetVersion"); + } + + // The value set concepts will be stored in the values table for persistence, so we remove + // them from the individual value sets. This can be done most easily by setting concepts to an + // empty list. + Dataset withoutConcepts = valueSets.map((MapFunction) valueSet -> { + ValueSet valueSetWithoutConcepts = valueSet.copy(); + + List updatedInclusions = new ArrayList<>(); + + for (ConceptSetComponent inclusion: valueSet.getCompose().getInclude()) { + + ConceptSetComponent inclusionWithoutConcepts = inclusion.copy(); + + inclusionWithoutConcepts.setConcept(new ArrayList<>()); + updatedInclusions.add(inclusionWithoutConcepts); + } + + valueSetWithoutConcepts.getCompose().setInclude(updatedInclusions); + + return valueSetWithoutConcepts; + }, VALUE_SET_ENCODER); + + Dataset newValues = valueSets.flatMap(ValueSets::expandValuesIterator, VALUE_ENCODER); + + return withValueSets(withoutConcepts, newValues); + } + + private ValueSets withValueSets(Dataset newValueSets, Dataset newValues) { + + Dataset newMembers = getUrlAndVersions(newValueSets); + + // Instantiating a new composite ConceptMaps requires a new timestamp + Timestamp timestamp = new Timestamp(System.currentTimeMillis()); + + Dataset newValueSetsWithTimestamp = newValueSets + .withColumn("timestamp", lit(timestamp.toString()).cast("timestamp")) + .as(VALUE_SET_ENCODER); + + return new ValueSets(spark, + this.members.union(newMembers), + this.valueSets.union(newValueSetsWithTimestamp), + this.values.union(newValues)); + } + + /** + * Given a value set, returns a list of value records it contains. + * + * @param valueSet a value set + * @return a list of Value records. + */ + public static List expandValues(ValueSet valueSet) { + + List values = new ArrayList<>(); + + expandValuesIterator(valueSet).forEachRemaining(values::add); + + return values; + } + + private static Iterator expandValuesIterator(ValueSet valueSet) { + + List values = new ArrayList<>(); + + ValueSetComposeComponent compose = valueSet.getCompose(); + + for (ConceptSetComponent inclusion: compose.getInclude()) { + + for (ConceptReferenceComponent concept: inclusion.getConcept()) { + + Value value = new Value(); + + value.setValueSetUri(valueSet.getUrl()); + value.setValueSetVersion(valueSet.getVersion()); + + value.setSystem(inclusion.getSystem()); + value.setVersion(inclusion.getVersion()); + + value.setValue(concept.getCode()); + + values.add(value); + } + } + + return values.iterator(); + } + + /** + * Adds the given values to the given value set instance. + */ + private void addToValueSet(ValueSet valueSet, Dataset values) { + + ValueSetComposeComponent composeComponent = valueSet.getCompose(); + ConceptSetComponent currentInclusion = null; + ConceptReferenceComponent concept = null; + + List sortedValues = values.sort("system", "version", "value").collectAsList(); + + // Workaround for the decoder producing an immutable array by replacing it with a mutable one + composeComponent.setInclude(new ArrayList<>(composeComponent.getInclude())); + for (Value value: sortedValues) { + + if (currentInclusion == null + || !value.getSystem().equals(currentInclusion.getSystem()) + || !value.getVersion().equals(currentInclusion.getVersion())) { + + // Find a matching inclusion + for (ConceptSetComponent candidate: composeComponent.getInclude()) { + + if (value.getSystem().equals(candidate.getSystem()) + && value.getVersion().equals(candidate.getVersion())) { + + currentInclusion = candidate; + + // Workaround for the decoder producing an immutable array by replacing it with a + // mutable one + currentInclusion.setConcept(new ArrayList<>(currentInclusion.getConcept())); + } + } + + // No matching inclusion found, so add one + if (currentInclusion == null) { + + currentInclusion = composeComponent.addInclude(); + + currentInclusion.setSystem(value.getSystem()); + currentInclusion.setVersion(value.getVersion()); + + concept = null; + } + } + + // Create concept if not exists + if (concept == null || !value.getValue().equals(concept.getCode())) { + + concept = currentInclusion.addConcept(); + concept.setCode(value.getValue()); + } + } + } + + private static Dataset getUrlAndVersions(Dataset valueSets) { + + return valueSets.select("url", "version") + .distinct() + .as(URL_AND_VERSION_ENCODER); + } + + /** + * Returns true if the UrlAndVersions if the membersToCheck has any duplicates with the members + * of this value sets instance. + */ + private boolean hasDuplicateUrlAndVersions(Dataset membersToCheck) { + + return this.members.intersect(membersToCheck).count() > 0; + } + + /** + * Writes the the value sets to the default database "ontologies" using default table names: + * "values", and "valuesets". + */ + public void writeToDatabase() { + + writeToDatabase(VALUE_SETS_DATABASE); + } + + /** + * Writes the value sets to the given database using default table names: "values", "valuesets", + * and "ancestors". + * + * @param database the name of the database to which the value sets are saved + */ + public void writeToDatabase(String database) { + + writeToTables(database + "." + VALUES_TABLE, + database + "." + VALUE_SETS_TABLE); + } + + /** + * Writes value sets to the given tables. + * + *

Warning: these updates are likely not atomic due to the lack of transactional + * semantics in the underlying data store. Concurrent users may see previous items + * removed before new ones are added, or items appear separately than others. This is intended + * for use in a user-specific sandbox or staging environment. + * + * @param valuesTable name of the table to which the value records are saved + * @param valueSetTable name of the table to which the value set metadata is saved + */ + public void writeToTables(String valuesTable, String valueSetTable) { + + boolean hasExistingValueSets; + + try { + + spark.sql("DESCRIBE TABLE " + valueSetTable); + + hasExistingValueSets = true; + + } catch (Exception describeException) { + + // Checked exceptions when calling into Scala upset the Java compiler, + // hence the need for this workaround and re-throw to propagate unexpected + // failures. + if (describeException instanceof NoSuchTableException) { + + hasExistingValueSets = false; + + } else { + + throw new RuntimeException(describeException); + } + } + + // If the target tables do not exist, we create them. The values and ancestors tables are + // created explicitly to meet our partitioning system + if (!hasExistingValueSets) { + + createValuesTable(spark, valuesTable, null); + + // Create a value set table by writing empty data having the proper schema and properties + spark.emptyDataset(VALUE_SET_ENCODER) + .withColumn("timestamp", lit(null).cast("timestamp")) + .write() + .format("parquet") + .partitionBy("timestamp") + .saveAsTable(valueSetTable); + } + + // Check existing value set URIs and Versions for duplicates among the new members + Dataset currentMembers = this.spark.table(valueSetTable) + .select("url", "version") + .distinct() + .as(URL_AND_VERSION_ENCODER); + + if (hasDuplicateUrlAndVersions(currentMembers)) { + + throw new IllegalArgumentException("The given value sets contains duplicate url and versions " + + "against value sets already stored in the table, " + valueSetTable); + } + + writeValuesToTable(this.values, valuesTable); + + this.valueSets.write() + .mode(SaveMode.ErrorIfExists) + .insertInto(valueSetTable); + } + + /** + * Creates a table of value records partitioned by valueseturi and valuesetversion. + * + * @param spark the spark session + * @param tableName the name of the values table + * @param location the location to store the table, or null to create a Hive-managed table + * @throws IllegalArgumentException if the table name or location are malformed + */ + private static void createValuesTable(SparkSession spark, String tableName, String location) { + + if (!TABLE_NAME_PATTERN.matcher(tableName).matches()) { + throw new IllegalArgumentException("Invalid table name: " + tableName); + } + + // Hive will check for well-formed paths, so we just ensure a user isn't attempting to inject + // additional SQL into the statement + if (location != null && location.contains(";")) { + throw new IllegalArgumentException("Invalid path for values table: " + location); + } + + StringBuilder builder = new StringBuilder(); + + if (location != null) { + + builder.append("CREATE EXTERNAL TABLE IF NOT EXISTS "); + + } else { + + builder.append("CREATE TABLE IF NOT EXISTS "); + } + + builder.append(tableName); + + // Note the partitioned by columns are deliberately lower case here since Spark does not appear + // to match columns to Hive partitions if they are not + builder.append("(system STRING, " + + "version STRING, " + + "value STRING)\n" + + "PARTITIONED BY (valueseturi STRING, valuesetversion STRING)\n"); + + builder.append("STORED AS PARQUET\n"); + + if (location != null) { + builder.append("LOCATION '") + .append(location) + .append("'"); + } + + spark.sql(builder.toString()); + } + + /** + * Writes value records to a table. This class ensures the columns and partitions are mapped + * properly, and is a workaround similar to the problem described here. + * + * @param values a dataset of value records + * @param tableName the table to write them to + */ + private static void writeValuesToTable(Dataset values, String tableName) { + + // Note the last two columns here must be the partitioned-by columns in order and in lower case + // for Spark to properly match them to the partitions + Dataset orderColumnDataset = values.select("system", + "version", + "value", + "valueseturi", + "valuesetversion"); + + orderColumnDataset.write() + .mode(SaveMode.ErrorIfExists) + .insertInto(tableName); + } +} diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/broadcast/BroadcastableConceptMap.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/broadcast/BroadcastableConceptMap.java index 07b38acb..92515163 100644 --- a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/broadcast/BroadcastableConceptMap.java +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/broadcast/BroadcastableConceptMap.java @@ -56,7 +56,7 @@ public class BroadcastableConceptMap implements Serializable { } /** - * Cosntructs the broadcastable concept map. + * Constructs the broadcastable concept map. * * @param conceptMapUri the URI of the map * @param mappings the mappings to include in the broadcast @@ -82,7 +82,7 @@ private static Map> sourceToTargetValueMap(List /** * Returns the URI for the concept map. * - * @return the URI for the concept map + * @return the URI for the concept map. */ public String getConceptMapUri() { @@ -142,7 +142,7 @@ public static class CodeValue implements Serializable { /** * Returns the code system. * - * @return the code system + * @return the code system. */ public String getSystem() { return system; @@ -151,7 +151,7 @@ public String getSystem() { /** * Returns the code value. * - * @return the code value + * @return the code value. */ public String getValue() { return value; diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/broadcast/BroadcastableValueSets.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/broadcast/BroadcastableValueSets.java index 4099f177..0fdc3166 100644 --- a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/broadcast/BroadcastableValueSets.java +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/broadcast/BroadcastableValueSets.java @@ -3,7 +3,8 @@ import static org.apache.spark.sql.functions.col; import com.cerner.bunsen.mappings.Ancestor; -import com.cerner.bunsen.mappings.ConceptMaps; +import com.cerner.bunsen.mappings.Hierarchies; +import com.cerner.bunsen.mappings.ValueSets; import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; @@ -21,226 +22,75 @@ import org.apache.spark.sql.SparkSession; /** - * An immutable collection of value sets that can be broadcast for use in Spark - * transformations or user-defined functions. + * An immutable collection of value sets that can be broadcast for use in Spark transformations or + * user-defined functions. */ public class BroadcastableValueSets implements Serializable { /** - * Spark encoder for ancestor values. + * Spark encoder for value set references. */ - private static Encoder ANCESTOR_VALUE_ENCODER = - Encoders.bean(AncestorValue.class); + private static Encoder REFERENCE_ENCODER = Encoders.bean(Reference.class); /** - * Map from concept reference to code system to a set of values that - * are contained in that code system. + * Spark encoder for hierarchical ancestor values. */ - private final Map>> values; + private static Encoder ANCESTOR_VALUE_ENCODER = Encoders.bean(AncestorValue.class); /** - * Private constructor for use by the builder. + * A map from value set reference to code system to a set of values that are contained in that + * code system. */ - private BroadcastableValueSets(Map>> values) { - - this.values = values; - } + private Map>> values = new HashMap<>(); /** - * Bean-style class to represent ancestor values locally and as Spark datasets. + * Private constructor for use in the builder. */ - public static class AncestorValue { - - private String referenceName; - - private String conceptMapUri; - - private String conceptMapVersion; - - private String ancestorSystem; - - private String ancestorValue; - - /** - * Nullary constructor so Spark can encode this as a bean. - */ - public AncestorValue() { - } - - /** - * Constructs an ancestor value bean. - * - * @param referenceName the reference name to be used in user code. - * @param conceptMapUri the URI of the concept map defining this relationship - * @param conceptMapVersion the version of the concept map defining this relationship - * @param ancestorSystem the code system of the ancestor - * @param ancestorValue the code value of the ancestor - */ - public AncestorValue(String referenceName, - String conceptMapUri, - String conceptMapVersion, - String ancestorSystem, - String ancestorValue) { - - this.referenceName = referenceName; - this.conceptMapUri = conceptMapUri; - this.conceptMapVersion = conceptMapVersion; - this.ancestorSystem = ancestorSystem; - this.ancestorValue = ancestorValue; - } - - /** - * Returns the value set reference name. - * - * @return the value set reference name - */ - public String getReferenceName() { - return referenceName; - } - - /** - * Sets the value set reference name. - * - * @param referenceName the value set reference name - */ - public void setReferenceName(String referenceName) { - this.referenceName = referenceName; - } - - /** - * Returns the URI of the concept map that defines this ancestor value. - * - * @return the URI of the concept map - */ - public String getConceptMapUri() { - return conceptMapUri; - } - - /** - * Sets the URI of the concept map that defines this ancestor value. - * - * @param conceptMapUri the URI of the concept map - */ - public void setConceptMapUri(String conceptMapUri) { - this.conceptMapUri = conceptMapUri; - } - - /** - * Returns the version of the concept map that defines this ancestor value. - * - * @return the version of the concept map. - */ - public String getConceptMapVersion() { - return conceptMapVersion; - } - - /** - * Sets the version of the concept map that defines this ancestor value. - * - * @param conceptMapVersion the version of the concept map. - */ - public void setConceptMapVersion(String conceptMapVersion) { - this.conceptMapVersion = conceptMapVersion; - } - - /** - * Returns the system of the ancestor code. - * - * @return the system of the ancestor code - */ - public String getAncestorSystem() { - return ancestorSystem; - } - - /** - * Sets the system of the ancestor code. - * - * @param ancestorSystem the system of the ancestor code. - */ - public void setAncestorSystem(String ancestorSystem) { - this.ancestorSystem = ancestorSystem; - } - - /** - * Returns the value of the ancestor code. - * - * @return the value of the ancestor code. - */ - public String getAncestorValue() { - return ancestorValue; - } - - /** - * Sets the value of the ancestor code. - * - * @param ancestorValue the value of the ancestor code. - */ - public void setAncestorValue(String ancestorValue) { - this.ancestorValue = ancestorValue; - } - - @Override - public boolean equals(Object obj) { - - if (!(obj instanceof AncestorValue)) { - return false; - } - - AncestorValue that = (AncestorValue) obj; - - return Objects.equals(this.referenceName, that.referenceName) - && Objects.equals(this.conceptMapUri, that.conceptMapUri) - && Objects.equals(this.conceptMapVersion, that.conceptMapVersion) - && Objects.equals(this.ancestorSystem, that.ancestorSystem) - && Objects.equals(this.ancestorValue, that.ancestorValue); - } + private BroadcastableValueSets(Map>> values) { - @Override - public int hashCode() { - return 37 - * Objects.hashCode(this.referenceName) - * Objects.hashCode(this.conceptMapUri) - * Objects.hashCode(this.conceptMapVersion) - * Objects.hashCode(this.ancestorSystem) - * Objects.hashCode(this.ancestorValue); - } + this.values = values; } public static class Builder { /** - * Map from concept reference to code system to a set of values that - * are contained in that code system. + * Map from value set reference to code system to a set of values that are contained in that + * code system. */ private Map>> values = new HashMap<>(); /** - * List of ancestor values to be used when creating a broadcastable value set. + * List of reference values to be used when creating a broadcastable value set. + */ + private List references = new ArrayList<>(); + + /** + * List of hierarchical ancestor values to be used when creating the broadcastable value set. */ private List ancestorValues = new ArrayList<>(); /** * Adds a code under the given reference name to the value set. * - * @param referenceName the referece name of the value set + * @param referenceName the reference name of the value set * @param system the code system to add * @param code the code value to add - * @return this builder + * @return this builder. */ public Builder addCode(String referenceName, String system, String code) { - if (values == null) { + if (this.values == null) { throw new IllegalStateException("The builder cannot be used after " + "the concept map has been built."); } - Map> systemToCodes = values.get(referenceName); + Map> systemToCodes = this.values.get(referenceName); if (systemToCodes == null) { systemToCodes = new HashMap<>(); - values.put(referenceName, systemToCodes); + this.values.put(referenceName, systemToCodes); } Set codeSet = systemToCodes.get(system); @@ -257,9 +107,42 @@ public Builder addCode(String referenceName, String system, String code) { return this; } + /** + * Add a "reference" to a value set, that is, create a user-defined reference name for a value + * set of codes. This method adds a reference using the latest version for the given value set + * uri. + * + * @param referenceName the value set reference name to be used in the user code + * @param valueSetUri the value set uri that contains codes to reference + * @return this builder. + */ + public Builder addReference(String referenceName, String valueSetUri) { + + return addReference(referenceName, valueSetUri, null); + } + + /** + * Add a "reference" to a value set, that is, create a user-defined reference name for a value + * set of codes. + * + * @param referenceName the value set reference name to be used in user code + * @param valueSetUri the value set uri that contains codes to reference + * @param valueSetVersion the value set version that contains codes to reference + * @return this builder. + */ + public Builder addReference(String referenceName, String valueSetUri, String valueSetVersion) { + + this.references.add(new Reference(referenceName, + valueSetUri, + valueSetVersion)); + + return this; + } + /** * Add "descendants" of a given code value, that is code values - * that are transitively subsumed by the given value in the given concept map. + * that are transitively subsumed by the given value in the given hierarchy. This + * uses the latest version of the provided hierarchy. * *

This function creates a collection of ancestors to query, and the descendants * are actually retrieved with the broadcastable value set is built. @@ -267,29 +150,20 @@ public Builder addCode(String referenceName, String system, String code) { * @param referenceName the valueset reference name to be used in user code. * @param ancestorSystem the ancestor system of descendants to include * @param ancestorValue the ancestor value of descendants to include - * @param conceptMapUri the concept map URI that defines the descendants - * @param conceptMapVersion the concept map version that defines the descendants + * @param hierarchyUri the hierarchy URI that defines the descendants * @return this builder */ public Builder addDescendantsOf(String referenceName, String ancestorSystem, String ancestorValue, - String conceptMapUri, - String conceptMapVersion) { - - ancestorValues.add(new AncestorValue(referenceName, - conceptMapUri, - conceptMapVersion, - ancestorSystem, - ancestorValue)); + String hierarchyUri) { - return this; + return addDescendantsOf(referenceName, ancestorSystem, ancestorValue, hierarchyUri, null); } /** * Add "descendants" of a given code value, that is code values - * that are transitively subsumed by the given value in the given concept map. This - * uses the latest version of the provided concept map. + * that are transitively subsumed by the given value in the given hierarchy. * *

This function creates a collection of ancestors to query, and the descendants * are actually retrieved with the broadcastable value set is built. @@ -297,91 +171,154 @@ public Builder addDescendantsOf(String referenceName, * @param referenceName the valueset reference name to be used in user code. * @param ancestorSystem the ancestor system of descendants to include * @param ancestorValue the ancestor value of descendants to include - * @param conceptMapUri the concept map URI that defines the descendants + * @param hierarchyUri the hierarchy URI that defines the descendants + * @param hierarchyVersion the hierarchy version that defines the descendants * @return this builder */ public Builder addDescendantsOf(String referenceName, String ancestorSystem, String ancestorValue, - String conceptMapUri) { + String hierarchyUri, + String hierarchyVersion) { - return addDescendantsOf(referenceName, + this.ancestorValues.add(new AncestorValue(referenceName, + hierarchyUri, + hierarchyVersion, ancestorSystem, - ancestorValue, - conceptMapUri, - null); + ancestorValue)); + + return this; + } + + /** + * Adds the version information to any unversioned references. + * + * @param valueSets the value sets to use + */ + private void addReferenceVersions(ValueSets valueSets) { + + // Identify references without versions and load the latest for the value set uri + Set latestValueSets = this.references.stream() + .filter(reference -> reference.getValueSetVersion() == null) + .map(Reference::getValueSetUri) + .collect(Collectors.toSet()); + + final Map versions = valueSets.getLatestVersions(latestValueSets, false); + + // Sets the version in references that were not specified + for (Reference reference: this.references) { + + if (reference.getValueSetVersion() == null) { + + String valueSetVersion = versions.get(reference.getValueSetUri()); + + reference.setValueSetVersion(valueSetVersion); + } + } } /** * Adds the version information to any unversioned ancestors. * - * @param maps concept maps to use. + * @param hierarchies hierarchies to use */ - private void addAncestorVersions(ConceptMaps maps) { + private void addAncestorVersions(Hierarchies hierarchies) { - // Identify the maps without versions and load the latest version. - Set latestMaps = ancestorValues.stream() - .filter(ancestor -> ancestor.getConceptMapVersion() == null) - .map(AncestorValue::getConceptMapUri) + // Identify hierarchies without verions and load the latest verions + Set latestHierarchies = this.ancestorValues.stream() + .filter(ancestor -> ancestor.version == null) + .map(AncestorValue::getUri) .collect(Collectors.toSet()); - final Map versions = maps.getLatestVersions(latestMaps, false); + final Map versions = hierarchies.getLatestVersions(latestHierarchies); - // Sets the version in ancestors that were not specified. - for (AncestorValue ancestor: ancestorValues) { + for (AncestorValue ancestorValue: this.ancestorValues) { - if (ancestor.getConceptMapVersion() == null) { + if (ancestorValue.getVersion() == null) { - String version = versions.get(ancestor.getConceptMapUri()); + String version = versions.get(ancestorValue.getUri()); - ancestor.setConceptMapVersion(version); + ancestorValue.setVersion(version); } } } /** - * Returns broadcastable value sets using the content added to this builder, using - * the default concept maps to load the needed reference data. + * Returns broadcastable value sets by loading reference data that was added to this builder and + * the default value sets. * - * @param spark a spark session used to load reference data - * @return broadcastable value sets + * @param spark the Spark session used to load reference data + * @return a {@link BroadcastableValueSets} instance. */ public BroadcastableValueSets build(SparkSession spark) { - return build(spark, ConceptMaps.getDefault(spark)); + return build(spark, ValueSets.getDefault(spark), Hierarchies.getDefault(spark)); } /** - * Returns broadcastable value sets using the content added to this builder, using - * the given concept maps to load the needed reference data. + * Returns broadcastable value sets by loading reference data that was added to this builder and + * the given value sets. * - * @param spark a spark session used to load reference data - * @param maps a {@link ConceptMaps} instance defining the reference data to load - * @return broadcastable value sets + * @param spark the Spark session used to load reference data + * @param valueSets a {@link ValueSets} instance defining the value set reference data to load + * @param hierarchies a {@link Hierarchies} instance defining hierarchical reference data to + * load + * @return a {@link BroadcastableValueSets} instance. */ - public BroadcastableValueSets build(SparkSession spark, ConceptMaps maps) { + public BroadcastableValueSets build(SparkSession spark, + ValueSets valueSets, + Hierarchies hierarchies) { - // Add the pending descendants to the values. - if (ancestorValues.size() > 0) { + // Add pending references and the codes contained in those references' value sets + if (this.references.size() > 0) { - // Ensure all ancestors have a version, using the latest for those that don't. - addAncestorVersions(maps); + // Ensure all references have a version, using latest for any that do not + addReferenceVersions(valueSets); - Dataset ancestorsToLoad = spark.createDataset(ancestorValues, - ANCESTOR_VALUE_ENCODER) + Dataset referencesToLoad = spark.createDataset(this.references, + REFERENCE_ENCODER) .as("toload"); - Dataset ancestors = maps.getAncestors().as("a"); + List codeReferences = valueSets.getValues() + .as("present") + .join( + referencesToLoad, + col("present.valueSetUri").equalTo(col("toload.valueSetUri")) + .and(col("present.valueSetVersion").equalTo(col("toload.valueSetVersion")))) + .select("referenceName", "system", "value") + .collectAsList(); + + // Add a code for each code held in the reference's value set + for (Row codeReference: codeReferences) { + + addCode(codeReference.getString(0), + codeReference.getString(1), + codeReference.getString(2)); + } + } + + // Add pending hierarchical descendants to the values + if (this.ancestorValues.size() > 0) { + + // Ensure all descendants have a version, using the latest for any that do not + addAncestorVersions(hierarchies); + + Dataset ancestorsToLoad = spark.createDataset(this.ancestorValues, + ANCESTOR_VALUE_ENCODER) + .as("toload"); - List descendants = ancestors.join(ancestorsToLoad, - col("a.conceptMapUri").equalTo(col("toload.conceptMapUri")) - .and(col("a.conceptMapVersion").equalTo(col("toload.conceptMapVersion"))) - .and(col("a.ancestorSystem").equalTo(col("toload.ancestorSystem"))) - .and(col("a.ancestorValue").equalTo(col("toload.ancestorValue")))) + List descendants = hierarchies.getAncestors() + .as("present") + .join( + ancestorsToLoad, + col("present.uri").equalTo(col("toload.uri")) + .and(col("present.version").equalTo(col("toload.version"))) + .and(col("present.ancestorSystem").equalTo(col("toload.ancestorSystem"))) + .and(col("present.ancestorValue").equalTo(col("toload.ancestorValue")))) .select("referenceName", "descendantSystem", "descendantValue") .collectAsList(); - // Add a code for each descendant. + // Add a code for each descendant for (Row descendant: descendants) { addCode(descendant.getString(0), @@ -389,19 +326,20 @@ public BroadcastableValueSets build(SparkSession spark, ConceptMaps maps) { descendant.getString(2)); } - // The parent value itself is also included in the value set, so add it as well. - for (AncestorValue value: ancestorValues) { + // Add a code for the parent as well, since a value is contained in its own value set + for (AncestorValue ancestorValue: this.ancestorValues) { - addCode(value.getReferenceName(), - value.getAncestorSystem(), - value.getAncestorValue()); + addCode(ancestorValue.getReferenceName(), + ancestorValue.getAncestorSystem(), + ancestorValue.getAncestorValue()); } } - // Nullify the builder values so they cannot be further mutated. + // Nullify the builder values so they cannot be further mutated Map>> values = this.values; this.values = null; + this.references = null; this.ancestorValues = null; return new BroadcastableValueSets(values); @@ -411,7 +349,7 @@ public BroadcastableValueSets build(SparkSession spark, ConceptMaps maps) { /** * Returns a {@link BroadcastableValueSets} builder. * - * @return a {@link BroadcastableValueSets} builder + * @return a {@link BroadcastableValueSets} builder. */ public static Builder newBuilder() { @@ -419,18 +357,18 @@ public static Builder newBuilder() { } /** - * Returns true if the valueset with the given reference name includes + * Returns true if the value set with the given reference name includes * the given code value. * * @param referenceName the reference name registered for the value set * @param system the code system to check if it is contained * @param code the code value to check if it is contained - * @return true if contained, false otherwise + * @return true if contained, false otherwise. * @throws IllegalArgumentException if the referenceName is not known to this object */ public boolean hasCode(String referenceName, String system, String code) { - Map> codeSystemAndValues = values.get(referenceName); + Map> codeSystemAndValues = this.values.get(referenceName); if (codeSystemAndValues == null) { throw new IllegalArgumentException("Unknown value set reference name: " + referenceName); @@ -443,7 +381,6 @@ public boolean hasCode(String referenceName, String system, String code) { : values.contains(code); } - /** * Returns the reference names in the value set. * @@ -451,7 +388,7 @@ public boolean hasCode(String referenceName, String system, String code) { */ public Set getReferenceNames() { - return values.keySet(); + return this.values.keySet(); } /** @@ -459,18 +396,284 @@ public Set getReferenceNames() { * those systems used by the given valueset reference. * * @param referenceName the reference name for which to retrieve values - * @return a map of the code systems to the code values used by the reference + * @return a map of the code systems to the code values used by the reference. */ public Map> getValues(String referenceName) { - // Defensively wrap the contained sets in unmodifiable collections. - // This is done on read since this is an infrequently called operation - // so we avoid the cost of creating an immutable set for the primary flow - // where this is not called. - return values.get(referenceName) + // Defensively wrap the contained sets in unmodifiable collections. This is done on read since + // this is an infrequently called operation so we avoid the cost of creating an immutable set + // for the primary flow where this is not called. + return this.values.get(referenceName) .entrySet() .stream() .collect(Collectors.toMap(Map.Entry::getKey, entry -> Collections.unmodifiableSet(entry.getValue()))); } + + /** + * Bean-style class to represent a reference to a value set. + */ + public static class Reference { + + private String referenceName; + + private String valueSetUri; + + private String valueSetVersion; + + /** + * Nullary constructor so Spark can encode this class as a bean. + */ + public Reference() { + } + + /** + * Constructs a {@link Reference} value. + * + * @param referenceName the reference name to be used in user code + * @param valueSetUri the value set uri defining this relationship + * @param valueSetVersion the value set version defining this relationship + */ + public Reference(String referenceName, String valueSetUri, String valueSetVersion) { + this.referenceName = referenceName; + this.valueSetUri = valueSetUri; + this.valueSetVersion = valueSetVersion; + } + + /** + * Returns the value set reference name. + * + * @return the value set reference name. + */ + public String getReferenceName() { + return referenceName; + } + + /** + * Sets the value set reference name. + * + * @param referenceName the value set reference name + */ + public void setReferenceName(String referenceName) { + this.referenceName = referenceName; + } + + /** + * Returns the value set uri that defines this reference relationship. + * + * @return the value set uri that defines this reference relationship. + */ + public String getValueSetUri() { + return valueSetUri; + } + + /** + * Sets the value set uri that defines this reference relationship. + * + * @param valueSetUri the value set uri that defines this reference relationship + */ + public void setValueSetUri(String valueSetUri) { + this.valueSetUri = valueSetUri; + } + + /** + * Returns the value set version that defines this reference relationship. + * + * @return the value set version that defines this reference relationship. + */ + public String getValueSetVersion() { + return valueSetVersion; + } + + /** + * Sets the value set version that defines this reference relationship. + * + * @param valueSetVersion the value set version that defines this reference relationship + */ + public void setValueSetVersion(String valueSetVersion) { + this.valueSetVersion = valueSetVersion; + } + + @Override + public boolean equals(Object obj) { + + if (!(obj instanceof Reference)) { + return false; + } + + Reference that = (Reference) obj; + + return Objects.equals(this.referenceName, that.referenceName) + && Objects.equals(this.valueSetUri, that.valueSetUri) + && Objects.equals(this.valueSetVersion, that.valueSetVersion); + } + + @Override + public int hashCode() { + return 37 + * Objects.hashCode(this.referenceName) + * Objects.hashCode(this.valueSetUri) + * Objects.hashCode(this.valueSetVersion); + } + } + + /** + * Bean-style class to represent ancestor values locally and as Spark datasets. + */ + public static class AncestorValue { + private String referenceName; + + private String uri; + + private String version; + + private String ancestorSystem; + + private String ancestorValue; + + /** + * Nullary constructor so Spark can encode this as a bean. + */ + public AncestorValue() { + } + + /** + * Constructs an ancestor value bean. + * + * @param referenceName the reference name to be used in user code. + * @param uri the URI of the hierarchy defining this relationship + * @param version the version of the hierarchy defining this relationship + * @param ancestorSystem the code system of the ancestor + * @param ancestorValue the code value of the ancestor + */ + public AncestorValue(String referenceName, + String uri, + String version, + String ancestorSystem, + String ancestorValue) { + + this.referenceName = referenceName; + this.uri = uri; + this.version = version; + this.ancestorSystem = ancestorSystem; + this.ancestorValue = ancestorValue; + } + + /** + * Returns the value set reference name. + * + * @return the value set reference name. + */ + public String getReferenceName() { + return referenceName; + } + + /** + * Sets the value set reference name. + * + * @param referenceName the value set reference name + */ + public void setReferenceName(String referenceName) { + this.referenceName = referenceName; + } + + /** + * Returns the URI of the hierarchy that owns this ancestor value. + * + * @return the URI of the hierarchy. + */ + public String getUri() { + return uri; + } + + /** + * Sets the URI of the concept map that owns this ancestor value. + * + * @param uri the URI of the hierarchy + */ + public void setUri(String uri) { + this.uri = uri; + } + + /** + * Returns the version of the hierarchy that owns this ancestor value. + * + * @return the version of the hierarchy. + */ + public String getVersion() { + return version; + } + + /** + * Sets the version of the hierarchy that owns this ancestor value. + * + * @param version the version of the hierarchy + */ + public void setVersion(String version) { + this.version = version; + } + + /** + * Returns the system of the ancestor code. + * + * @return the system of the ancestor code. + */ + public String getAncestorSystem() { + return ancestorSystem; + } + + /** + * Sets the system of the ancestor code. + * + * @param ancestorSystem the system of the ancestor code + */ + public void setAncestorSystem(String ancestorSystem) { + this.ancestorSystem = ancestorSystem; + } + + /** + * Returns the value of the ancestor code. + * + * @return the value of the ancestor code. + */ + public String getAncestorValue() { + return ancestorValue; + } + + /** + * Sets the value of the ancestor code. + * + * @param ancestorValue the value of the ancestor code + */ + public void setAncestorValue(String ancestorValue) { + this.ancestorValue = ancestorValue; + } + + @Override + public boolean equals(Object obj) { + + if (!(obj instanceof AncestorValue)) { + return false; + } + + AncestorValue that = (AncestorValue) obj; + + return Objects.equals(this.referenceName, that.referenceName) + && Objects.equals(this.uri, that.uri) + && Objects.equals(this.version, that.version) + && Objects.equals(this.ancestorSystem, that.ancestorSystem) + && Objects.equals(this.ancestorValue, that.ancestorValue); + } + + @Override + public int hashCode() { + return 37 + * Objects.hashCode(this.referenceName) + * Objects.hashCode(this.uri) + * Objects.hashCode(this.version) + * Objects.hashCode(this.ancestorSystem) + * Objects.hashCode(this.ancestorValue); + } + } + } diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/systems/Loinc.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/systems/Loinc.java index 70b37cfc..9b31c756 100644 --- a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/systems/Loinc.java +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/systems/Loinc.java @@ -3,13 +3,12 @@ import static org.apache.spark.sql.functions.col; import static org.apache.spark.sql.functions.lit; -import com.cerner.bunsen.mappings.ConceptMaps; -import com.cerner.bunsen.mappings.Mapping; +import com.cerner.bunsen.mappings.Hierarchies; +import com.cerner.bunsen.mappings.Hierarchies.HierarchicalElement; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -import org.hl7.fhir.dstu3.model.ConceptMap; /** * Support for LOINC. @@ -17,16 +16,9 @@ public class Loinc { /** - * Concept map URI used for the LOINC hierarchy. + * Hierarchy URI used for LOINC is-a relationships. */ - public static final String LOINC_HIERARCHY_MAPPING_URI = - "uri:cerner:bunsen:mapping:loinc-hierarchy"; - - /** - * Valueset URI used for LOINC codes. See the - * FHIR LOINC documentation. - */ - public static final String LOINC_VALUESET_URI = "http://loinc.org/vs"; + public static final String LOINC_HIERARCHY_URI = Hierarchies.HIERARCHY_URI_PREFIX + "loinc"; /** * LOINC code system URI. @@ -34,68 +26,56 @@ public class Loinc { public static final String LOINC_CODE_SYSTEM_URI = "http://loinc.org"; /** - * Reads the LOINC multiaxial hierarchy file and convert it to a mapping dataset. + * Reads the LOINC mutliaxial hierarchy file and converts it to a {@link HierarchicalElement} + * dataset. * * @param spark the Spark session * @param loincHierarchyPath path to the multiaxial hierarchy CSV - * @param loincVersion the version of the LOINC hierarchy being read. - * @return a dataset of mappings representing the hierarchical relationship. + * @return a dataset of {@link HierarchicalElement} representing the hierarchical relationship. */ - public static Dataset readMultiaxialHierarchyFile(SparkSession spark, - String loincHierarchyPath, - String loincVersion) { + public static Dataset readMultiaxialHierarchyFile(SparkSession spark, + String loincHierarchyPath) { return spark.read() .option("header", true) .csv(loincHierarchyPath) - .select(col("CODE"), col("IMMEDIATE_PARENT")) - .where(col("CODE").isNotNull() - .and(col("CODE").notEqual(lit("")))) + .select(col("IMMEDIATE_PARENT"), col("CODE")) .where(col("IMMEDIATE_PARENT").isNotNull() .and(col("IMMEDIATE_PARENT").notEqual(lit("")))) - .map((MapFunction) row -> { + .where(col("CODE").isNotNull() + .and(col("CODE").notEqual(lit("")))) + .map((MapFunction) row -> { - Mapping mapping = new Mapping(); + HierarchicalElement element = new HierarchicalElement(); - mapping.setConceptMapUri(LOINC_HIERARCHY_MAPPING_URI); - mapping.setConceptMapVersion(loincVersion); - mapping.setSourceValueSet(LOINC_VALUESET_URI); - mapping.setTargetValueSet(LOINC_VALUESET_URI); - mapping.setSourceSystem(LOINC_CODE_SYSTEM_URI); - mapping.setSourceValue(row.getString(0)); - mapping.setTargetSystem(LOINC_CODE_SYSTEM_URI); - mapping.setTargetValue(row.getString(1)); - mapping.setEquivalence(Mapping.SUBSUMES); + element.setAncestorSystem(LOINC_CODE_SYSTEM_URI); + element.setAncestorValue(row.getString(0)); - return mapping; - }, ConceptMaps.getMappingEncoder()); + element.setDescendantSystem(LOINC_CODE_SYSTEM_URI); + element.setDescendantValue(row.getString(1)); + + return element; + }, Hierarchies.getHierarchicalElementEncoder()); } /** - * Returns a ConceptMaps instance with the specified multiaxial hierarchy. This method - * reads the LOINC multiaxial hierarchy file and converts it to a mapping dataset, and - * adds it to the given concept maps. + * Returns a {@link Hierarchies} instance with the specified multiaxial hierarchy. This method + * reads the LOINC multiaxial hierarchy file and converts it to a {@link HierarchicalElement} + * dataset, and adds it to the given hierarchies. * * @param spark the Spark session - * @param maps a ConceptMaps instance to which the hierarchy will be added + * @param hierarchies a {@link Hierarchies} instance to which the hierarchy will be added * @param loincHierarchyPath path to the multiaxial hierarchy CSV - * @param loincVersion the version of the LOINC hierarchy being read. - * @return a ConceptMaps instance that includes the read hierarchy. + * @param loincVersion the version of the LOINC hierarchy being read + * @return a {@link Hierarchies} instance that includes the read hierarchy. */ - public static ConceptMaps withLoincHierarchy(SparkSession spark, - ConceptMaps maps, + public static Hierarchies withLoincHierarchy(SparkSession spark, + Hierarchies hierarchies, String loincHierarchyPath, String loincVersion) { - ConceptMap conceptMap = new ConceptMap(); - - conceptMap.setUrl(LOINC_HIERARCHY_MAPPING_URI); - conceptMap.setVersion(loincVersion); - conceptMap.setExperimental(false); - - Dataset mappings = readMultiaxialHierarchyFile(spark, - loincHierarchyPath, loincVersion); + Dataset elements = readMultiaxialHierarchyFile(spark, loincHierarchyPath); - return maps.withExpandedMap(conceptMap, mappings); + return hierarchies.withHierarchyElements(LOINC_HIERARCHY_URI, loincVersion, elements); } } diff --git a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/systems/Snomed.java b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/systems/Snomed.java index f71d8857..7ae2c853 100644 --- a/bunsen-core/src/main/java/com/cerner/bunsen/mappings/systems/Snomed.java +++ b/bunsen-core/src/main/java/com/cerner/bunsen/mappings/systems/Snomed.java @@ -3,13 +3,12 @@ import static org.apache.spark.sql.functions.col; import static org.apache.spark.sql.functions.lit; -import com.cerner.bunsen.mappings.ConceptMaps; -import com.cerner.bunsen.mappings.Mapping; +import com.cerner.bunsen.mappings.Hierarchies; +import com.cerner.bunsen.mappings.Hierarchies.HierarchicalElement; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; -import org.hl7.fhir.dstu3.model.ConceptMap; /** * Support for SNOMED CT. @@ -17,16 +16,9 @@ public class Snomed { /** - * Concept map URI used for SNOMED is-a relationships. + * Hierarchy URI used for SNOMED is-a relationships. */ - public static final String SNOMED_HIERARCHY_MAPPING_URI = - "uri:cerner:bunsen:mapping:snomed-hierarchy"; - - /** - * Value set URI for all of SNOMED. See the - * FHIR SNOMED documentation. - */ - public static final String SNOMED_CODES_VALUESET_URI = "http://snomed.info/sct?fhir_vs"; + public static final String SNOMED_HIERARCHY_URI = Hierarchies.HIERARCHY_URI_PREFIX + "snomed"; /** * SNOMED code system URI. @@ -39,16 +31,14 @@ public class Snomed { private static final String SNOMED_ISA_RELATIONSHIP_ID = "116680003"; /** - * Reads a Snomed relationship file + * Reads a Snomed relationship file and converts it to a {@link HierarchicalElement} dataset. * * @param spark the Spark session * @param snomedRelationshipPath path to the SNOMED relationship file - * @param snomedVersion the version of the SNOMED CT being read. - * @return a dataset of mappings representing the hierarchical relationship. + * @return a dataset of{@link HierarchicalElement} representing the hierarchical relationship. */ - public static Dataset readRelationshipFile(SparkSession spark, - String snomedRelationshipPath, - String snomedVersion) { + public static Dataset readRelationshipFile(SparkSession spark, + String snomedRelationshipPath) { return spark.read() .option("header", true) @@ -56,54 +46,43 @@ public static Dataset readRelationshipFile(SparkSession spark, .csv(snomedRelationshipPath) .where(col("typeId").equalTo(lit(SNOMED_ISA_RELATIONSHIP_ID))) .where(col("active").equalTo(lit("1"))) - .select(col("sourceId"), col("destinationId")) - .where(col("sourceId").isNotNull() - .and(col("sourceId").notEqual(lit("")))) + .select(col("destinationId"), col("sourceId")) .where(col("destinationId").isNotNull() .and(col("destinationId").notEqual(lit("")))) - .map((MapFunction) row -> { + .where(col("sourceId").isNotNull() + .and(col("sourceId").notEqual(lit("")))) + .map((MapFunction) row -> { - Mapping mapping = new Mapping(); + HierarchicalElement element = new HierarchicalElement(); - mapping.setConceptMapUri(SNOMED_HIERARCHY_MAPPING_URI); - mapping.setConceptMapVersion(snomedVersion); - mapping.setSourceValueSet(SNOMED_CODES_VALUESET_URI); - mapping.setTargetValueSet(SNOMED_CODES_VALUESET_URI); - mapping.setSourceSystem(SNOMED_CODE_SYSTEM_URI); - mapping.setSourceValue(row.getString(0)); - mapping.setTargetSystem(SNOMED_CODE_SYSTEM_URI); - mapping.setTargetValue(row.getString(1)); - mapping.setEquivalence(Mapping.SUBSUMES); + element.setAncestorSystem(SNOMED_CODE_SYSTEM_URI); + element.setAncestorValue(row.getString(0)); - return mapping; - }, ConceptMaps.getMappingEncoder()); + element.setDescendantSystem(SNOMED_CODE_SYSTEM_URI); + element.setDescendantValue(row.getString(1)); + + return element; + }, Hierarchies.getHierarchicalElementEncoder()); } /** * Returns a ConceptMaps instance with the specified multiaxial hierarchy. This method - * reads the LOINC multiaxial hierarchy file and convert it to a mapping dataset, and - * adds it to the given concept maps. + * reads the LOINC multiaxial hierarchy file and convert it to a {@link HierarchicalElement} + * dataset, and adds it to the given hierarchies. * * @param spark the Spark session - * @param maps a ConceptMaps instance to which the hierarchy will be added + * @param hierarchies a {@link Hierarchies} instance to which the hierarchy will be added * @param snomedRelationshipPath path to the relationship CSV * @param snomedVersion the version of SNOMED being read. - * @return a ConceptMaps instance that includes the read hierarchy. + * @return a {@link Hierarchies} instance that includes the read hierarchy. */ - public static ConceptMaps withRelationships(SparkSession spark, - ConceptMaps maps, + public static Hierarchies withRelationships(SparkSession spark, + Hierarchies hierarchies, String snomedRelationshipPath, String snomedVersion) { - ConceptMap conceptMap = new ConceptMap(); - - conceptMap.setUrl(SNOMED_HIERARCHY_MAPPING_URI); - conceptMap.setVersion(snomedVersion); - conceptMap.setExperimental(false); - - Dataset mappings = readRelationshipFile(spark, - snomedRelationshipPath, snomedVersion); + Dataset elements = readRelationshipFile(spark, snomedRelationshipPath); - return maps.withExpandedMap(conceptMap, mappings); + return hierarchies.withHierarchyElements(SNOMED_HIERARCHY_URI, snomedVersion, elements); } } diff --git a/bunsen-core/src/main/scala/com/cerner/bunsen/EncoderBuilder.scala b/bunsen-core/src/main/scala/com/cerner/bunsen/EncoderBuilder.scala index 39cc4217..96c8600b 100644 --- a/bunsen-core/src/main/scala/com/cerner/bunsen/EncoderBuilder.scala +++ b/bunsen-core/src/main/scala/com/cerner/bunsen/EncoderBuilder.scala @@ -14,7 +14,7 @@ import org.apache.spark.sql.catalyst.expressions.objects._ import org.apache.spark.sql.catalyst.{InternalRow, expressions} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String -import org.hl7.fhir.dstu3.model.{Reference, TemporalPrecisionEnum} +import org.hl7.fhir.dstu3.model._ import org.hl7.fhir.instance.model.api.{IBase, IBaseDatatype, IIdType} import org.hl7.fhir.utilities.xhtml.XhtmlNode @@ -495,6 +495,43 @@ private[bunsen] class EncoderBuilder(fhirContext: FhirContext, schemaConverter: List(Literal("reference"), reference, Literal("display"), display) + } else if (definition.getImplementingClass == classOf[ValueSet.ValueSetExpansionContainsComponent]) { + + val system = dataTypeToUtf8Expr( + Invoke(inputObject, + "getSystemElement", + ObjectType(classOf[UriType]))) + + val abstract_ = Invoke(inputObject, + "getAbstract", + DataTypes.BooleanType) + + val inactive = Invoke(inputObject, + "getInactive", + DataTypes.BooleanType) + + val version = dataTypeToUtf8Expr( + Invoke(inputObject, + "getVersionElement", + ObjectType(classOf[org.hl7.fhir.dstu3.model.StringType]))) + + val code = dataTypeToUtf8Expr( + Invoke(inputObject, + "getCodeElement", + ObjectType(classOf[org.hl7.fhir.dstu3.model.CodeType]))) + + val display = dataTypeToUtf8Expr( + Invoke(inputObject, + "getDisplayElement", + ObjectType(classOf[org.hl7.fhir.dstu3.model.StringType]))) + + List(Literal("system"), system, + Literal("abstract"), abstract_, + Literal("inactive"), inactive, + Literal("version"), version, + Literal("code"), code, + Literal("display"), display) + } else { // Map to (name, value, name, value) expressions for child elements. definition.getChildren @@ -602,7 +639,7 @@ private[bunsen] class EncoderBuilder(fhirContext: FhirContext, schemaConverter: val child = EncoderBuilder.ObjectCast(deserializer, ObjectType(classOf[org.hl7.fhir.dstu3.model.Type])) - // If this item is not null, deseralize it. Otherwise attempt other choices. + // If this item is not null, deserialize it. Otherwise attempt other choices. expressions.If(IsNotNull(childPath), child, choiceToDeserializer(fhirChildTypes.tail, choiceChildDefinition, path)) @@ -646,7 +683,7 @@ private[bunsen] class EncoderBuilder(fhirContext: FhirContext, schemaConverter: val childPath = Some(addToPath(childDefinition.getElementName)) - // These must match on the RuntimeChild* structures rather than the defintions, + // These must match on the RuntimeChild* structures rather than the definitions, // since only the RuntimeChild* structures include default values to be passed // to constructors when deserializing some bound objects. val result = childDefinition match { @@ -831,6 +868,8 @@ private[bunsen] class EncoderBuilder(fhirContext: FhirContext, schemaConverter: .filter(child => definition.getImplementingClass != classOf[Reference] || child.getElementName == "reference" || child.getElementName == "display") + .filter(child => definition.getImplementingClass != classOf[ValueSet.ValueSetExpansionContainsComponent] || + child.getElementName != "contains") .flatMap(child => childToDeserializer(child, path)).toMap val compositeInstance = NewInstance(definition.getImplementingClass, @@ -840,7 +879,7 @@ private[bunsen] class EncoderBuilder(fhirContext: FhirContext, schemaConverter: val setters = childExpressions.map { case (name, expression) => // Option types are not visible in the getChildByName, so we fall back - // to looking for htem in the child list. + // to looking for them in the child list. val childDefinition = if (definition.getChildByName(name) != null) definition.getChildByName(name) else diff --git a/bunsen-core/src/main/scala/com/cerner/bunsen/SchemaConverter.scala b/bunsen-core/src/main/scala/com/cerner/bunsen/SchemaConverter.scala index 45e34078..53e52b99 100644 --- a/bunsen-core/src/main/scala/com/cerner/bunsen/SchemaConverter.scala +++ b/bunsen-core/src/main/scala/com/cerner/bunsen/SchemaConverter.scala @@ -2,6 +2,7 @@ package com.cerner.bunsen import ca.uhn.fhir.context._ import org.apache.spark.sql.types.{BooleanType => _, DateType => _, IntegerType => _, StringType => _, _} +import org.hl7.fhir.dstu3.model.ValueSet import org.hl7.fhir.dstu3.model._ import org.hl7.fhir.instance.model.api.{IBase, IBaseResource} @@ -43,6 +44,13 @@ object SchemaConverter { StructField("reference", DataTypes.StringType), StructField("display", DataTypes.StringType))) + private[bunsen] val containsSchema = StructType(List( + StructField("system", DataTypes.StringType), + StructField("abstract", DataTypes.BooleanType), + StructField("inactive", DataTypes.BooleanType), + StructField("version", DataTypes.StringType), + StructField("code", DataTypes.StringType), + StructField("display", DataTypes.StringType))) } /** @@ -112,6 +120,17 @@ class SchemaConverter(fhirContext: FhirContext) { List(StructField(childDefinition.getElementName, SchemaConverter.referenceSchema)) } + } else if (childDefinition.getChildByName(childDefinition.getElementName) + .getImplementingClass == classOf[ValueSet.ValueSetExpansionContainsComponent]) { + + if (childDefinition.getMax != 1) { + + List(StructField(childDefinition.getElementName, ArrayType(SchemaConverter.containsSchema))) + + } else { + List(StructField(childDefinition.getElementName, SchemaConverter.containsSchema)) + } + } else { val definition = childDefinition.getChildByName(childDefinition.getElementName) @@ -135,7 +154,7 @@ class SchemaConverter(fhirContext: FhirContext) { } /** - * Returns the Spark DataType used to encode the given FHIR primitve. + * Returns the Spark DataType used to encode the given FHIR primitive. */ private[bunsen] def primitiveToDataType(definition: RuntimePrimitiveDatatypeDefinition): DataType = { diff --git a/bunsen-core/src/test/java/com/cerner/bunsen/ValueSetUdfsTest.java b/bunsen-core/src/test/java/com/cerner/bunsen/ValueSetUdfsTest.java index 996bac45..933845db 100644 --- a/bunsen-core/src/test/java/com/cerner/bunsen/ValueSetUdfsTest.java +++ b/bunsen-core/src/test/java/com/cerner/bunsen/ValueSetUdfsTest.java @@ -1,6 +1,8 @@ package com.cerner.bunsen; import com.cerner.bunsen.mappings.ConceptMaps; +import com.cerner.bunsen.mappings.Hierarchies; +import com.cerner.bunsen.mappings.ValueSets; import com.cerner.bunsen.mappings.broadcast.BroadcastableValueSets; import com.cerner.bunsen.mappings.systems.Loinc; import com.cerner.bunsen.mappings.systems.Snomed; @@ -12,7 +14,9 @@ import org.apache.spark.sql.SparkSession; import org.hl7.fhir.dstu3.model.CodeableConcept; import org.hl7.fhir.dstu3.model.Condition; +import org.hl7.fhir.dstu3.model.Enumerations.AdministrativeGender; import org.hl7.fhir.dstu3.model.Observation; +import org.hl7.fhir.dstu3.model.Patient; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -59,8 +63,18 @@ private static Condition condition(String id, String code) { return condition; } + private static Patient patient(String id, String marritalStatus) { + Patient patient = new Patient(); + + patient.setId(id); + + patient.setMaritalStatus(codeable("http://hl7.org/fhir/v3/MaritalStatus", marritalStatus)); + + return patient; + } + /** - * Sets up Spark and loads test mappings. + * Sets up Spark and loads test value sets. */ @BeforeClass public static void setUp() throws IOException { @@ -82,50 +96,45 @@ public static void setUp() throws IOException { spark.sql("create database " + ConceptMaps.MAPPING_DATABASE); - ConceptMaps empty = ConceptMaps.getEmpty(spark); - - ConceptMaps withLoinc = Loinc.withLoincHierarchy(spark, - empty, + Hierarchies withLoinc = Loinc.withLoincHierarchy(spark, + Hierarchies.getEmpty(spark), "src/test/resources/LOINC_HIERARCHY_SAMPLE.CSV", "2.56"); - ConceptMaps withLoincAndSnomed = Snomed.withRelationships(spark, + Hierarchies withLoincAndSnomed = Snomed.withRelationships(spark, withLoinc, "src/test/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT", "20160901"); + ValueSets withGender = ValueSets.getEmpty(spark) + .withValueSetsFromDirectory("src/test/resources/valuesets"); + BroadcastableValueSets valueSets = BroadcastableValueSets.newBuilder() - .addDescendantsOf("bp", + .addCode("bp", + Loinc.LOINC_CODE_SYSTEM_URI, + "8462-4") + .addCode("albumin", Loinc.LOINC_CODE_SYSTEM_URI, - "8462-4", - Loinc.LOINC_HIERARCHY_MAPPING_URI, - "2.56") + "14959-1") + .addReference("married", + "urn:cerner:bunsen:valueset:married_maritalstatus") .addDescendantsOf("leukocytes", Loinc.LOINC_CODE_SYSTEM_URI, "LP14419-3", - Loinc.LOINC_HIERARCHY_MAPPING_URI, - "2.56") - .addDescendantsOf("albumin", - Loinc.LOINC_CODE_SYSTEM_URI, - "14959-1", - Loinc.LOINC_HIERARCHY_MAPPING_URI, - "2.56") + Loinc.LOINC_HIERARCHY_URI) .addDescendantsOf("diabetes", Snomed.SNOMED_CODE_SYSTEM_URI, "73211009", - Snomed.SNOMED_HIERARCHY_MAPPING_URI, - "20160901") + Snomed.SNOMED_HIERARCHY_URI) .addDescendantsOf("blood_disorder", Snomed.SNOMED_CODE_SYSTEM_URI, "266992002", - Snomed.SNOMED_HIERARCHY_MAPPING_URI, - "20160901") + Snomed.SNOMED_HIERARCHY_URI) .addDescendantsOf("disorder_history", Snomed.SNOMED_CODE_SYSTEM_URI, "312850006", - Snomed.SNOMED_HIERARCHY_MAPPING_URI, - "20160901") - .build(spark, withLoincAndSnomed); + Snomed.SNOMED_HIERARCHY_URI) + .build(spark, withGender, withLoincAndSnomed); ValueSetUdfs.pushUdf(spark, valueSets); @@ -149,6 +158,14 @@ public static void setUp() throws IOException { encoders.of(Condition.class)); conditions.createOrReplaceTempView("test_snomed_cond"); + + Dataset patients = spark.createDataset( + ImmutableList.of( + patient("married", "M"), + patient("unmarried", "U")), + encoders.of(Patient.class)); + + patients.createOrReplaceTempView("test_valueset_patient"); } /** @@ -201,6 +218,7 @@ public void testSnomedHasAncestor() { @Test public void testHasCyclicAncestor() { + Dataset results = spark.sql("select id from test_snomed_cond " + "where in_valueset(code, 'blood_disorder')"); @@ -213,4 +231,14 @@ public void testHasCyclicAncestor() { Assert.assertEquals(1, ancestorResults.count()); Assert.assertEquals("history_of_anemia", ancestorResults.head().get(0)); } + + @Test + public void testHasValueSetCode() { + + Dataset results = spark.sql("select id from test_valueset_patient " + + "where in_valueset(maritalStatus, 'married')"); + + Assert.assertEquals(1, results.count()); + Assert.assertEquals("married", results.head().get(0)); + } } diff --git a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/ConceptMapsTest.java b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/ConceptMapsTest.java index d20af673..62216828 100644 --- a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/ConceptMapsTest.java +++ b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/ConceptMapsTest.java @@ -1,19 +1,12 @@ package com.cerner.bunsen.mappings; -import com.google.common.collect.ComparisonChain; import com.google.common.collect.ImmutableSet; import java.io.IOException; import java.nio.file.Files; -import java.util.HashSet; import java.util.List; -import java.util.Set; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SparkSession; import org.hl7.fhir.dstu3.model.ConceptMap; -import org.hl7.fhir.dstu3.model.ConceptMap.ConceptMapGroupComponent; -import org.hl7.fhir.dstu3.model.ConceptMap.SourceElementComponent; -import org.hl7.fhir.dstu3.model.ConceptMap.TargetElementComponent; -import org.hl7.fhir.dstu3.model.Enumerations.ConceptMapEquivalence; import org.hl7.fhir.dstu3.model.UriType; import org.hl7.fhir.exceptions.FHIRException; import org.junit.AfterClass; @@ -22,7 +15,7 @@ import org.junit.Test; /** - * Unit test for storing, loading, and manipulationg ConceptMaps. + * Unit test for storing, loading, and manipulating ConceptMaps. */ public class ConceptMapsTest { @@ -80,42 +73,6 @@ private static final ConceptMap conceptMap(String url, String version) { return conceptMap; } - /** - * Returns a concept map that includes ancesters where - * A is B is C in this test map, and D is also C. - */ - private static final ConceptMap ancestorMap(String url, String version) { - ConceptMap conceptMap = new ConceptMap(); - - // - conceptMap.setUrl(url); - conceptMap.setVersion(version); - conceptMap.setExperimental(true); - conceptMap.setSource(new UriType("urn:test:valueset")); - conceptMap.setTarget(new UriType("urn:test:valueset")); - - ConceptMapGroupComponent group = conceptMap.addGroup() - .setSource("urn:test:system") - .setTarget("urn:test:system"); - - group.addElement() - .setCode("urn:test:code:a") - .addTarget().setCode("urn:test:code:b") - .setEquivalence(ConceptMapEquivalence.SUBSUMES); - - group.addElement() - .setCode("urn:test:code:b") - .addTarget().setCode("urn:test:code:c") - .setEquivalence(ConceptMapEquivalence.SUBSUMES); - - group.addElement() - .setCode("urn:test:code:d") - .addTarget().setCode("urn:test:code:c") - .setEquivalence(ConceptMapEquivalence.SUBSUMES); - - return conceptMap; - } - private static void checkMap(ConceptMap map, String url, String version) throws FHIRException { Assert.assertNotNull("Could not find concept map + " + url, map); @@ -147,80 +104,6 @@ public void testCreateSimpleMappings() throws FHIRException { checkMap(secondMap, "urn:cerner:map:othermap", "1"); } - @Test - public void testMappingsAreSorted() { - - String database = "test_mappings_are_sorted"; - - spark.sql("create database " + database); - - // Ensure that mappings we create are sorted - ConceptMap testMap = new ConceptMap(); - - testMap.setUrl("urn:test"); - testMap.setVersion("0.1"); - testMap.setExperimental(true); - testMap.setSource(new UriType("urn:source:valueset")); - testMap.setTarget(new UriType("urn:target:valueset")); - - SourceElementComponent element = testMap.addGroup() - .setSource("urn:test:x") - .setTarget("urn:test:y") - .addElement(); - - element.setCode("urn:test:code:4") - .addTarget() - .setCode("urn:test:code:5"); - - - ConceptMaps.getEmpty(spark) - .withConceptMaps(testMap) - .writeToDatabase(database); - - ConceptMap reloaded = ConceptMaps - .getFromDatabase(spark, database) - .getConceptMap("urn:test", "0.1"); - - assertContentIsSorted(reloaded); - } - - /** - * Helper function to assert the concepts are sorted within the given map. - */ - private void assertContentIsSorted(ConceptMap map) { - - ConceptMapGroupComponent previousGroup = null; - - for (ConceptMapGroupComponent group: map.getGroup()) { - - Assert.assertTrue(previousGroup == null - || ComparisonChain.start() - .compare(group.getSource(), previousGroup.getSource()) - .compare(group.getTarget(), previousGroup.getTarget()) - .result() >= 0); - - previousGroup = group; - - SourceElementComponent previousElement = null; - - for (SourceElementComponent element : group.getElement()) { - - Assert.assertTrue(previousElement == null - || element.getCode().compareTo(previousElement.getCode()) >= 0); - - previousElement = element; - - TargetElementComponent previousTarget = null; - - for (TargetElementComponent target: element.getTarget()) { - - Assert.assertTrue(previousTarget == null - || target.getCode().compareTo(previousTarget.getCode()) >= 0); - } - } - } - } - @Test public void testAppendMappings() throws FHIRException { @@ -245,37 +128,25 @@ public void testAppendMappings() throws FHIRException { checkMap(newMap, "urn:cerner:map:newmap", "1"); } - @Test - public void testModifyMapping() throws FHIRException { - - ConceptMaps original = ConceptMaps.getEmpty(spark) - .withConceptMaps(conceptMap("urn:cerner:map:testmap", "1")); - - // Modify the map to ensure the change is reflected. - ConceptMap modifiedMap = original.getConceptMap("urn:cerner:map:testmap", "1"); - - modifiedMap.getGroup() - .get(0) - .addElement() - .setCode("urn:new:source:code") - .addTarget() - .setCode("urn:new:target:code"); + @Test (expected = IllegalArgumentException.class) + public void testIncludingDuplicateConceptMapThrowsException() { - ConceptMaps modified = original.withConceptMaps(modifiedMap); + ConceptMaps.getEmpty(spark) + .withConceptMaps(conceptMap("urn:cerner:map:testmap", "1"), + conceptMap("urn:cerner:map:testmap", "1")); + } - // The new mapping should be visible in the modified but not the original. - Assert.assertEquals(1, original.getMappings().count()); - Assert.assertEquals(2, modified.getMappings().count()); + @Test (expected = IllegalArgumentException.class) + public void testAddingDuplicateConceptMapsThrowsException() throws FHIRException { - ConceptMap reloadedOriginal = original.getConceptMap("urn:cerner:map:testmap", "1"); - ConceptMap reloadedModified = modified.getConceptMap("urn:cerner:map:testmap", "1"); + ConceptMaps maps = ConceptMaps.getEmpty(spark) + .withConceptMaps(conceptMap("urn:cerner:map:testmap", "1")); - Assert.assertEquals(1, reloadedOriginal.getGroup().get(0).getElement().size()); - Assert.assertEquals(2, reloadedModified.getGroup().get(0).getElement().size()); + maps.withConceptMaps(conceptMap("urn:cerner:map:testmap", "1")); } @Test - public void testFromDirectory() { + public void testWithMapsFromDirectory() { ConceptMaps maps = ConceptMaps.getEmpty(spark) .withMapsFromDirectory("src/test/resources/conceptmaps"); @@ -293,99 +164,56 @@ public void testFromDirectory() { } @Test - public void testWriteToNewTables() { + public void testWithDisjointMapsFromDirectory() { - spark.sql("create database test_mapping_write"); + String database = "test_conceptmaps_disjoint"; + spark.sql("CREATE DATABASE " + database); - ConceptMap ancestorMap = ancestorMap("urn:cerner:test:write:ancestormap", "0"); - - ConceptMaps maps = ConceptMaps.getEmpty(spark) + ConceptMaps.getEmpty(spark) .withMapsFromDirectory("src/test/resources/conceptmaps") - .withConceptMaps(ancestorMap); - - maps.writeToDatabase("test_mapping_write"); + .writeToDatabase(database); - ConceptMaps reloadedMaps = ConceptMaps.getFromDatabase(spark, "test_mapping_write"); + ConceptMaps maps = ConceptMaps.getFromDatabase(spark, database) + .withDisjointMapsFromDirectory("src/test/resources/conceptmaps", database); - ConceptMap genderMap = reloadedMaps.getConceptMap( + ConceptMap genderMap = maps.getConceptMap( "urn:cerner:poprec:fhir:conceptmap:demographics:gender", "0.0.1"); + Assert.assertEquals(1, maps.getMaps().count()); + Assert.assertNotNull(genderMap); Assert.assertEquals("urn:cerner:poprec:fhir:conceptmap:demographics:gender", genderMap.getUrl()); Assert.assertEquals("0.0.1", genderMap.getVersion()); - - Assert.assertEquals(3, genderMap.getGroup().size()); - - Assert.assertEquals(4, reloadedMaps.getAncestors().count()); } @Test - public void testUpdateMap() { - - spark.sql("create database test_mapping_update"); - - ConceptMaps.getEmpty(spark) - .withConceptMaps(conceptMap("urn:cerner:map:testmap", "1")) - .writeToDatabase("test_mapping_update"); - - ConceptMaps original = ConceptMaps.getFromDatabase(spark, "test_mapping_update"); - - Assert.assertEquals(1, original.getMappings().count()); - - // Modify the map to ensure the change is reflected. - ConceptMap modifiedMap = original.getConceptMap("urn:cerner:map:testmap", "1"); - - // The test adds codes lexigraphically later than the original so - // it is deeply equal to the reloaded version, which sorts elements. - modifiedMap.getGroup() - .get(0) - .addElement() - .setCode("urn:source:code:new") - .addTarget() - .setCode("urn:target:code:new"); - - original.withConceptMaps(modifiedMap) - .writeToDatabase("test_mapping_update"); - - ConceptMaps reloaded = ConceptMaps.getFromDatabase(spark, "test_mapping_update"); - - // Ensure the new mapping is visible in the modified map. - Assert.assertEquals(2, reloaded.getMappings().count()); - - ConceptMap reloadedMap = reloaded.getConceptMap("urn:cerner:map:testmap", "1"); - - Assert.assertTrue(reloadedMap.equalsDeep(modifiedMap)); - } - - @Test - public void testPreserveUnchangedPartitions() { + public void testWriteToNewTables() { - String database = "test_preserve_unchanged"; + spark.sql("create database test_mapping_write"); - spark.sql("create database " + database); + ConceptMaps maps = ConceptMaps.getEmpty(spark) + .withMapsFromDirectory("src/test/resources/conceptmaps"); - ConceptMap original = conceptMap("urn:cerner:map:testmap", "1"); + maps.writeToDatabase("test_mapping_write"); - ConceptMaps.getEmpty(spark) - .withConceptMaps(original, - conceptMap("urn:cerner:map:othermap", "1")) - .writeToDatabase(database); + ConceptMaps reloadedMaps = ConceptMaps.getFromDatabase(spark, "test_mapping_write"); - ConceptMaps.getEmpty(spark) - .withConceptMaps(conceptMap("urn:cerner:map:newmap", "1")) - .writeToDatabase(database); + ConceptMap genderMap = reloadedMaps.getConceptMap( + "urn:cerner:poprec:fhir:conceptmap:demographics:gender", + "0.0.1"); - ConceptMaps loaded = ConceptMaps.getFromDatabase(spark, database); + Assert.assertNotNull(genderMap); - Assert.assertEquals(3, loaded.getMappings().count()); + Assert.assertEquals("urn:cerner:poprec:fhir:conceptmap:demographics:gender", + genderMap.getUrl()); - ConceptMap reloaded = loaded.getConceptMap("urn:cerner:map:testmap", "1"); + Assert.assertEquals("0.0.1", genderMap.getVersion()); - Assert.assertTrue(original.equalsDeep(reloaded)); + Assert.assertEquals(3, genderMap.getGroup().size()); } @Test @@ -440,18 +268,18 @@ public void testGetLatestExperimental() { ConceptMaps conceptMaps = ConceptMaps.getFromDatabase(spark, database); Dataset latestWithExperimental = conceptMaps.getLatestMappings( - ImmutableSet.of("urn:cerner:map:expmap"), - true); + ImmutableSet.of("urn:cerner:map:expmap"), + true); - // We include experimental versions, so we shoudl see that. + // We include experimental versions, so we should see that. Assert.assertEquals(1, latestWithExperimental .where("conceptMapUri == 'urn:cerner:map:expmap' and conceptMapVersion == '2'") .count()); Dataset latestWithoutExperimental = conceptMaps.getLatestMappings( - ImmutableSet.of("urn:cerner:map:expmap"), - false); + ImmutableSet.of("urn:cerner:map:expmap"), + false); // Version 1 is not experimental, so we should see it. Assert.assertEquals(1, @@ -468,43 +296,24 @@ public void testGetLatestExperimental() { } @Test - public void testAncestors() { - - ConceptMap conceptMap = ancestorMap("urn:cerner:ancestormap", "0"); - - ConceptMaps withHierarchy = ConceptMaps.getEmpty(spark) - .withConceptMaps(conceptMap); - - Set expected = ImmutableSet.of( - new Ancestor("urn:cerner:ancestormap", - "0", - "urn:test:system", - "urn:test:code:a", - "urn:test:system", - "urn:test:code:b"), - new Ancestor("urn:cerner:ancestormap", - "0", - "urn:test:system", - "urn:test:code:a", - "urn:test:system", - "urn:test:code:c"), - new Ancestor("urn:cerner:ancestormap", - "0", - "urn:test:system", - "urn:test:code:b", - "urn:test:system", - "urn:test:code:c"), - new Ancestor("urn:cerner:ancestormap", - "0", - "urn:test:system", - "urn:test:code:d", - "urn:test:system", - "urn:test:code:c")); - - // Ensure all of the expected values are present. - List actual = withHierarchy.getAncestors().collectAsList(); - - Assert.assertEquals(expected.size(), actual.size()); - Assert.assertTrue(expected.containsAll(actual)); + public void testExpandMappings() { + ConceptMap conceptMap = ConceptMaps.getEmpty(spark) + .withConceptMaps(conceptMap("urn:cerner:conceptmap:map", "1")) + .getConceptMap("urn:cerner:conceptmap:map", "1"); + + List mappings = ConceptMaps.expandMappings(conceptMap); + + Mapping expectedValue = new Mapping("urn:cerner:conceptmap:map", + "1", + "urn:source:valueset", + "urn:target:valueset", + "urn:source:system", + "urn:source:code:a", + "urn:target:system", + "urn:target:code:1", + Mapping.EQUIVALENT); + + Assert.assertEquals(1, mappings.size()); + Assert.assertEquals(expectedValue, mappings.get(0)); } } diff --git a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/HierarchiesTests.java b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/HierarchiesTests.java new file mode 100644 index 00000000..532c5110 --- /dev/null +++ b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/HierarchiesTests.java @@ -0,0 +1,193 @@ +package com.cerner.bunsen.mappings; + +import com.cerner.bunsen.mappings.Hierarchies.HierarchicalElement; +import com.cerner.bunsen.mappings.systems.Loinc; +import com.cerner.bunsen.mappings.systems.Snomed; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import java.io.IOException; +import java.nio.file.Files; +import java.util.List; +import java.util.Set; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.SparkSession; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Unit tests for storing, loading, and manipulating hierarchies. + */ +public class HierarchiesTests { + + private static SparkSession spark; + + private static final String HIERARCHY_URI = Hierarchies.HIERARCHY_URI_PREFIX + "testhierarchy"; + + /** + * Sets up Spark. + */ + @BeforeClass + public static void setUp() throws IOException { + + // Create a local spark session using an in-memory metastore. + // We must also use Hive and set the partition mode to non-strict to + // support dynamic partitions. + spark = SparkSession.builder() + .master("local[2]") + .appName("ConceptMapsTest") + .enableHiveSupport() + .config("javax.jdo.option.ConnectionURL", + "jdbc:derby:memory:metastore_db;create=true") + .config("hive.exec.dynamic.partition.mode", + "nonstrict") + .config("spark.sql.warehouse.dir", + Files.createTempDirectory("spark_warehouse").toString()) + .getOrCreate(); + } + + /** + * Tears down Spark. + */ + @AfterClass + public static void tearDown() { + spark.stop(); + spark = null; + } + + private static HierarchicalElement element(String ancestor, String descendant) { + return new HierarchicalElement("urn:cerner:system", + ancestor, + "urn:cerner:system", + descendant); + } + + private static Ancestor ancestor(String ancestor, String descendant) { + return new Ancestor(HIERARCHY_URI, + "1", + "urn:cerner:system", + descendant, + "urn:cerner:system", + ancestor); + } + + private static Dataset getElements() { + return spark.createDataset(ImmutableList.of( + element("a", "b"), + element("a", "d"), + element("b", "c"), + element("e", "f")), + Hierarchies.getHierarchicalElementEncoder()); + } + + private static final Set ANCESTOR_CLOSURE = ImmutableSet.of( + ancestor("a", "b"), + ancestor("a", "c"), + ancestor("a", "d"), + ancestor("b", "c"), + ancestor("e", "f")); + + @Test + public void testCreateHierarchy() { + + Hierarchies hierarchies = Hierarchies.getEmpty(spark) + .withHierarchyElements(HIERARCHY_URI, "1", getElements()); + + List ancestors = hierarchies.getAncestors().collectAsList(); + List members = hierarchies.getMembers().collectAsList(); + + Assert.assertEquals(5, ancestors.size()); + Assert.assertTrue(ANCESTOR_CLOSURE.containsAll(ancestors)); + + Assert.assertEquals(1, members.size()); + Assert.assertEquals(new UrlAndVersion(HIERARCHY_URI, "1"), members.get(0)); + } + + @Test + public void testAppendHierarchies() { + + Hierarchies withLoinc = Loinc.withLoincHierarchy(spark, + Hierarchies.getEmpty(spark), + "src/test/resources/LOINC_HIERARCHY_SAMPLE.CSV", + "2.56"); + + Hierarchies withSnomed = Snomed.withRelationships(spark, + withLoinc, + "src/test/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT", + "20160901"); + + Hierarchies hierarchies = Hierarchies.getEmpty(spark) + .withHierarchyElements(HIERARCHY_URI, "1", getElements()) + .withHierarchies(withSnomed); + + List ancestors = hierarchies.getAncestors().collectAsList(); + List members = hierarchies.getMembers().collectAsList(); + + Set expected = ImmutableSet.of( + new UrlAndVersion(Loinc.LOINC_HIERARCHY_URI, "2.56"), + new UrlAndVersion(Snomed.SNOMED_HIERARCHY_URI, "20160901"), + new UrlAndVersion(HIERARCHY_URI, "1")); + + Assert.assertEquals(35, ancestors.size()); + + Assert.assertEquals(3, members.size()); + Assert.assertTrue(expected.containsAll(members)); + } + + @Test (expected = IllegalArgumentException.class) + public void testAppendDuplicateHierarchyThrowsException() { + + Hierarchies hierarchies = Hierarchies.getEmpty(spark) + .withHierarchyElements(HIERARCHY_URI, "1", getElements()); + + hierarchies.withHierarchyElements(HIERARCHY_URI, "1", getElements()); + } + + @Test (expected = IllegalArgumentException.class) + public void testAppendDuplicateHierarchiesThrowsException() { + + Hierarchies hierarchies = Hierarchies.getEmpty(spark) + .withHierarchyElements(HIERARCHY_URI, "1", getElements()); + + hierarchies.withHierarchies(hierarchies); + } + + @Test + public void testWriteToNewTables() { + + String database = "test_hierarchies_write"; + spark.sql("CREATE DATABASE " + database); + + Hierarchies.getEmpty(spark) + .withHierarchyElements(HIERARCHY_URI, "1", getElements()) + .writeToDatabase(database); + + Hierarchies hierarchies = Hierarchies.getFromDatabase(spark, database); + + List ancestors = hierarchies.getAncestors().collectAsList(); + List members = hierarchies.getMembers().collectAsList(); + + Assert.assertEquals(5, ancestors.size()); + Assert.assertTrue(ANCESTOR_CLOSURE.containsAll(ancestors)); + + Assert.assertEquals(1, members.size()); + Assert.assertEquals(new UrlAndVersion(HIERARCHY_URI, "1"), members.get(0)); + } + + @Test (expected = IllegalArgumentException.class) + public void testWritingDuplicateHierarchiesThrowsException() { + + String database = "duplicate_hierarchies_write"; + spark.sql("CREATE DATABASE " + database); + + Hierarchies hierarchies = Hierarchies.getEmpty(spark) + .withHierarchyElements(HIERARCHY_URI, "1", getElements()); + + hierarchies.writeToDatabase(database); + + Hierarchies reloaded = Hierarchies.getFromDatabase(spark, database); + + reloaded.writeToDatabase(database); + } +} diff --git a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/ValueSetsTest.java b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/ValueSetsTest.java new file mode 100644 index 00000000..c2915f9c --- /dev/null +++ b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/ValueSetsTest.java @@ -0,0 +1,346 @@ +package com.cerner.bunsen.mappings; + +import com.google.common.collect.ImmutableSet; +import java.io.IOException; +import java.nio.file.Files; +import java.text.MessageFormat; +import java.util.List; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.SparkSession; +import org.hl7.fhir.dstu3.model.ValueSet; +import org.hl7.fhir.dstu3.model.ValueSet.ConceptSetComponent; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Unit test for storing, loading, and manipulating ValueSets. + */ +public class ValueSetsTest { + + private static SparkSession spark; + + /** + * Sets up Spark. + */ + @BeforeClass + public static void setUp() throws IOException { + + // Create a local spark session using an in-memory metastore. + // We must also use Hive and set the partition mode to non-strict to + // support dynamic partitions. + spark = SparkSession.builder() + .master("local[2]") + .appName("ConceptMapsTest") + .enableHiveSupport() + .config("javax.jdo.option.ConnectionURL", + "jdbc:derby:memory:metastore_db;create=true") + .config("hive.exec.dynamic.partition.mode", + "nonstrict") + .config("spark.sql.warehouse.dir", + Files.createTempDirectory("spark_warehouse").toString()) + .getOrCreate(); + + spark.sql("create database mappingtestdb"); + } + + /** + * Tears down Spark. + */ + @AfterClass + public static void tearDown() { + spark.stop(); + spark = null; + } + + private static ValueSet valueSet(String valueSetUrl, String valueSetVersion) { + + return valueSet(valueSetUrl, valueSetVersion, "a"); + } + + private static ValueSet valueSet(String valueSetUrl, String valueSetVersion, String... codes) { + + ValueSet valueSet = new ValueSet(); + + valueSet.setUrl(valueSetUrl); + valueSet.setVersion(valueSetVersion); + valueSet.setExperimental(true); + + ConceptSetComponent inclusion = valueSet.getCompose().addInclude(); + + inclusion.setSystem("urn:cerner:system").setVersion("1"); + + for (String code: codes) { + + inclusion.addConcept().setCode(code); + } + + return valueSet; + } + + private static void checkValueSet(ValueSet valueSet, String url, String version) { + + Assert.assertNotNull( + MessageFormat.format("Could not find value set for url {0} and version {1}", url, version), + valueSet); + + Assert.assertEquals(url, valueSet.getUrl()); + Assert.assertEquals(version, valueSet.getVersion()); + + ConceptSetComponent inclusion = valueSet.getCompose().getIncludeFirstRep(); + + Assert.assertEquals("urn:cerner:system", inclusion.getSystem()); + Assert.assertEquals("1", inclusion.getVersion()); + Assert.assertEquals("a", inclusion.getConceptFirstRep().getCode()); + + Assert.assertEquals(1, valueSet.getCompose().getInclude().size()); + } + + @Test + public void testCreateSimpleValueSets() { + ValueSets valueSets = ValueSets.getEmpty(spark) + .withValueSets(valueSet("urn:cerner:valueset:valueset1", "1"), + valueSet("urn:cerner:valueset:valueset2", "1")); + + Dataset values = valueSets.getValues(); + + Assert.assertEquals(2, values.count()); + + ValueSet firstValueSet = valueSets.getValueSet("urn:cerner:valueset:valueset1", "1"); + checkValueSet(firstValueSet, "urn:cerner:valueset:valueset1", "1"); + + ValueSet secondValueSet = valueSets.getValueSet("urn:cerner:valueset:valueset2", "1"); + checkValueSet(secondValueSet, "urn:cerner:valueset:valueset2", "1"); + } + + @Test + public void testAppendValueSets() { + ValueSets original = ValueSets.getEmpty(spark) + .withValueSets(valueSet("urn:cerner:valueset:valueset1", "1"), + valueSet("urn:cerner:valueset:valueset2", "1")); + + ValueSets valueSets = original.withValueSets(valueSet("urn:cerner:valueset:valueset3", "1")); + + Assert.assertEquals(2, original.getValues().count()); + Assert.assertEquals(3, valueSets.getValues().count()); + + ValueSet firstValueSet = valueSets.getValueSet("urn:cerner:valueset:valueset1", "1"); + checkValueSet(firstValueSet, "urn:cerner:valueset:valueset1", "1"); + + ValueSet secondValueSet = valueSets.getValueSet("urn:cerner:valueset:valueset2", "1"); + checkValueSet(secondValueSet, "urn:cerner:valueset:valueset2", "1"); + + ValueSet newValueSet = valueSets.getValueSet("urn:cerner:valueset:valueset3", "1"); + checkValueSet(newValueSet, "urn:cerner:valueset:valueset3", "1"); + } + + @Test (expected = IllegalArgumentException.class) + public void testIncludingDuplicateValueSetsThrowsException() { + + ValueSets.getEmpty(spark) + .withValueSets(valueSet("urn:cerner:valueset:valueset", "1"), + valueSet("urn:cerner:valueset:valueset", "1")); + } + + @Test (expected = IllegalArgumentException.class) + public void testAddingDuplicateValueSetsThrowsException() { + + ValueSets valueSets = ValueSets.getEmpty(spark) + .withValueSets(valueSet("urn:cerner:valueset:valueset", "1")); + + valueSets.withValueSets(valueSet("urn:cerner:valueset:valueset", "1")); + } + + @Test + public void testWithValueSetsFromDirectory() { + + ValueSets valueSets = ValueSets.getEmpty(spark) + .withValueSetsFromDirectory("src/test/resources/valuesets"); + + ValueSet marriedValueSet = valueSets.getValueSet( + "urn:cerner:bunsen:valueset:married_maritalstatus", + "0.0.1"); + + Assert.assertNotNull(marriedValueSet); + Assert.assertEquals("urn:cerner:bunsen:valueset:married_maritalstatus", + marriedValueSet.getUrl()); + Assert.assertEquals("0.0.1", marriedValueSet.getVersion()); + } + + @Test + public void testWithDisjointValueSetsFromDirectory() { + + String database = "test_valuesets_disjoint"; + spark.sql("CREATE DATABASE " + database); + + ValueSets.getEmpty(spark) + .withValueSetsFromDirectory("src/test/resources/valuesets") + .writeToDatabase(database); + + ValueSets valueSets = ValueSets.getFromDatabase(spark, database) + .withDisjointValueSetsFromDirectory("src/test/resources/valuesets", database); + + ValueSet marriedValueSet = valueSets.getValueSet( + "urn:cerner:bunsen:valueset:married_maritalstatus", + "0.0.1"); + + Assert.assertEquals(1, valueSets.getValueSets().count()); + + Assert.assertNotNull(marriedValueSet); + Assert.assertEquals("urn:cerner:bunsen:valueset:married_maritalstatus", + marriedValueSet.getUrl()); + Assert.assertEquals("0.0.1", marriedValueSet.getVersion()); + + } + + @Test + public void testWriteToNewTables() { + + String database = "test_valuesets_write"; + spark.sql("CREATE DATABASE " + database); + + ValueSets valueSets = ValueSets.getEmpty(spark) + .withValueSetsFromDirectory("src/test/resources/valuesets"); + + valueSets.writeToDatabase(database); + + ValueSets reloadedValueSets = ValueSets.getFromDatabase(spark, database); + + ValueSet marriedValueSet = reloadedValueSets + .getValueSet("urn:cerner:bunsen:valueset:married_maritalstatus", "0.0.1"); + + Assert.assertNotNull(marriedValueSet); + Assert.assertEquals("urn:cerner:bunsen:valueset:married_maritalstatus", + marriedValueSet.getUrl()); + Assert.assertEquals("0.0.1", marriedValueSet.getVersion()); + Assert.assertEquals(1, marriedValueSet.getCompose().getInclude().size()); + } + + @Test + public void testValueSetsIncludesNoConcepts() { + + ValueSets valueSets = ValueSets.getEmpty(spark) + .withValueSets(valueSet("urn:cerner:valueset:valueset1", "1"), + valueSet("urn:cerner:valueset:valueset2", "1")); + + valueSets.getValueSets() + .collectAsList() + .forEach(valueSet -> valueSet.getCompose() + .getInclude() + .forEach(inclusion -> Assert.assertTrue(inclusion.getConcept().isEmpty()))); + } + + @Test (expected = IllegalArgumentException.class) + public void testWritingDuplicateValueSetsThrowsException() { + + String database = "duplicate_valuesets_write"; + spark.sql("CREATE DATABASE " + database); + + ValueSets valueSets = ValueSets.getEmpty(spark) + .withValueSetsFromDirectory("src/test/resources/valuesets"); + + valueSets.writeToDatabase(database); + + ValueSets reloadedValueSets = ValueSets.getFromDatabase(spark, database); + + reloadedValueSets.writeToDatabase(database); + } + + @Test + public void testGetLatest() { + + String database = "test_get_latest"; + spark.sql("CREATE DATABASE " + database); + + ValueSets.getEmpty(spark) + .withValueSets( + valueSet("urn:cerner:valueset:newvalueset", "1"), + valueSet("urn:cerner:valueset:newvalueset", "2"), + valueSet("urn:cerner:valueset:othervalueset", "1")) + .writeToDatabase(database); + + Dataset latest = ValueSets.getFromDatabase(spark, database) + .getLatestValues(ImmutableSet.of("urn:cerner:valueset:newvalueset", + "urn:cerner:valueset:othervalueset"), + true); + + latest.cache(); + + Assert.assertEquals(2, latest.count()); + + Assert.assertEquals(0, latest.where( + "valueSetUri == 'urn:cerner:valueset:newvalueset' AND valueSetVersion == '1'") + .count()); + + Assert.assertEquals(1, latest.where( + "valueSetUri == 'urn:cerner:valueset:newvalueset' AND valueSetVersion == '2'") + .count()); + + Assert.assertEquals(1, latest.where( + "valueSetUri == 'urn:cerner:valueset:othervalueset' AND valueSetVersion == '1'") + .count()); + } + + @Test + public void testGetLatestExperimental() { + + String database = "test_get_latest_experimental"; + spark.sql("CREATE DATABASE " + database); + + ValueSets.getEmpty(spark) + .withValueSets( + valueSet("urn:cerner:valueset:expvalueset", "1").setExperimental(false), + valueSet("urn:cerner:valueset:expvalueset", "2"), + valueSet("urn:cerner:valueset:otherexpvalueset", "1")) + .writeToDatabase(database); + + ValueSets valueSets = ValueSets.getFromDatabase(spark, database); + + Dataset latestWithExperimental = valueSets.getLatestValues( + ImmutableSet.of("urn:cerner:valueset:expvalueset"), + true); + + // We include experimental versions, so we should see that. + Assert.assertEquals(1, + latestWithExperimental + .where("valueSetUri == 'urn:cerner:valueset:expvalueset' and valueSetVersion == '2'") + .count()); + + Dataset latestWithoutExperimental = valueSets.getLatestValues( + ImmutableSet.of("urn:cerner:valueset:expvalueset"), + false); + + // Version 1 is not experimental, so we should see it. + Assert.assertEquals(1, + latestWithoutExperimental + .where("valueSetUri == 'urn:cerner:valueset:expvalueset' and valueSetVersion == '1'") + .count()); + + // Loading a map with only experimental versions should find nothing. + Dataset onlyExperimentalValueSets = valueSets.getLatestValues( + ImmutableSet.of("urn:cerner:valueset:otherexpvalueset"), + false); + + Assert.assertEquals(0, onlyExperimentalValueSets.count()); + } + + @Test + public void testExpandValues() { + + ValueSet valueSet = ValueSets.getEmpty(spark) + .withValueSets(valueSet("urn:cerner:valueset:valueset", "1")) + .getValueSet("urn:cerner:valueset:valueset", "1"); + + List values = ValueSets.expandValues(valueSet); + + Value expectedValue = new Value("urn:cerner:valueset:valueset", + "1", + "urn:cerner:system", + "1", + "a"); + + Assert.assertEquals(1, values.size()); + Assert.assertEquals(expectedValue, values.get(0)); + } +} diff --git a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/broadcast/BroadcastableValueSetsTest.java b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/broadcast/BroadcastableValueSetsTest.java index 15de704b..b3ba1541 100644 --- a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/broadcast/BroadcastableValueSetsTest.java +++ b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/broadcast/BroadcastableValueSetsTest.java @@ -1,6 +1,7 @@ package com.cerner.bunsen.mappings.broadcast; -import com.cerner.bunsen.mappings.ConceptMaps; +import com.cerner.bunsen.mappings.Hierarchies; +import com.cerner.bunsen.mappings.ValueSets; import com.cerner.bunsen.mappings.systems.Loinc; import com.cerner.bunsen.mappings.systems.Snomed; import com.google.common.collect.ImmutableSet; @@ -22,7 +23,7 @@ public class BroadcastableValueSetsTest { private static SparkSession spark; /** - * Sets up Spark and loads test mappings. + * Sets up Spark and loads test value sets. */ @BeforeClass public static void setUp() throws IOException { @@ -42,21 +43,23 @@ public static void setUp() throws IOException { Files.createTempDirectory("spark_warehouse").toString()) .getOrCreate(); - spark.sql("create database " + ConceptMaps.MAPPING_DATABASE); + spark.sql("CREATE DATABASE ontologies"); - ConceptMaps empty = ConceptMaps.getEmpty(spark); - - ConceptMaps withLoinc = Loinc.withLoincHierarchy(spark, - empty, + Hierarchies withLoinc = Loinc.withLoincHierarchy(spark, + Hierarchies.getEmpty(spark), "src/test/resources/LOINC_HIERARCHY_SAMPLE.CSV", "2.56"); - ConceptMaps withLoincAndSnomed = Snomed.withRelationships(spark, + Hierarchies withLoincAndSnomed = Snomed.withRelationships(spark, withLoinc, "src/test/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT", "20160901"); - withLoincAndSnomed.writeToDatabase(ConceptMaps.MAPPING_DATABASE); + withLoincAndSnomed.writeToDatabase(Hierarchies.HIERARCHIES_DATABASE); + + ValueSets.getEmpty(spark) + .withValueSetsFromDirectory("src/test/resources/valuesets") + .writeToDatabase(ValueSets.VALUE_SETS_DATABASE); } /** @@ -72,26 +75,26 @@ public static void tearDown() { public void testCustom() { BroadcastableValueSets valueSets = BroadcastableValueSets.newBuilder() - .addCode("testparent", Loinc.LOINC_CODE_SYSTEM_URI, "123") - .addCode("testparent", Loinc.LOINC_CODE_SYSTEM_URI, "456") - .addCode("testother", Loinc.LOINC_CODE_SYSTEM_URI, "789") - .build(spark, ConceptMaps.getDefault(spark)); + .addCode("testparent", "urn:cerner:system", "123") + .addCode("testparent", "urn:cerner:system", "456") + .addCode("testother", "urn:cerner:system", "789") + .build(spark, ValueSets.getEmpty(spark), Hierarchies.getEmpty(spark)); Assert.assertTrue(valueSets.hasCode("testparent", - Loinc.LOINC_CODE_SYSTEM_URI, + "urn:cerner:system", "123")); Assert.assertTrue(valueSets.hasCode("testparent", - Loinc.LOINC_CODE_SYSTEM_URI, + "urn:cerner:system", "456")); // This value should be in the other valueset, so check for false. Assert.assertFalse(valueSets.hasCode("testparent", - Loinc.LOINC_CODE_SYSTEM_URI, + "urn:cerner:system", "789")); Assert.assertTrue(valueSets.hasCode("testother", - Loinc.LOINC_CODE_SYSTEM_URI, + "urn:cerner:system", "789")); } @@ -99,23 +102,25 @@ public void testCustom() { public void testLoadLoinc() { BroadcastableValueSets valueSets = BroadcastableValueSets.newBuilder() - .addDescendantsOf("bp", + .addCode("bp", Loinc.LOINC_CODE_SYSTEM_URI, - "8462-4", - Loinc.LOINC_HIERARCHY_MAPPING_URI, - "2.56") + "8462-4") .addDescendantsOf("leukocytes", Loinc.LOINC_CODE_SYSTEM_URI, "LP14419-3", - Loinc.LOINC_HIERARCHY_MAPPING_URI, + Loinc.LOINC_HIERARCHY_URI, "2.56") - .build(spark, ConceptMaps.getDefault(spark)); + .build(spark, ValueSets.getEmpty(spark), Hierarchies.getDefault(spark)); Assert.assertTrue(valueSets.hasCode("leukocytes", Loinc.LOINC_CODE_SYSTEM_URI, "5821-4")); // "is a" LP14419-3 - Assert.assertFalse(valueSets.hasCode("leukocytes", + Assert.assertTrue(valueSets.hasCode("leukocytes", + Loinc.LOINC_CODE_SYSTEM_URI, + "LP14419-3")); // value set includes parent code + + Assert.assertFalse(valueSets.hasCode("bp", Loinc.LOINC_CODE_SYSTEM_URI, "1234-5")); // not "is a" LP14419-3 } @@ -124,47 +129,90 @@ public void testLoadLoinc() { public void testLoadLatestLoinc() { BroadcastableValueSets valueSets = BroadcastableValueSets.newBuilder() - .addDescendantsOf("bp", + .addCode("bp", Loinc.LOINC_CODE_SYSTEM_URI, - "8462-4", - Loinc.LOINC_HIERARCHY_MAPPING_URI) + "8462-4") .addDescendantsOf("leukocytes", Loinc.LOINC_CODE_SYSTEM_URI, "LP14419-3", - Loinc.LOINC_HIERARCHY_MAPPING_URI) - .build(spark, ConceptMaps.getDefault(spark)); + Loinc.LOINC_HIERARCHY_URI) + .build(spark, ValueSets.getEmpty(spark), Hierarchies.getDefault(spark)); Assert.assertTrue(valueSets.hasCode("leukocytes", Loinc.LOINC_CODE_SYSTEM_URI, "5821-4")); // "is a" LP14419-3 - Assert.assertFalse(valueSets.hasCode("leukocytes", + Assert.assertTrue(valueSets.hasCode("leukocytes", + Loinc.LOINC_CODE_SYSTEM_URI, + "LP14419-3")); // value set includes parent code + + Assert.assertFalse(valueSets.hasCode("bp", Loinc.LOINC_CODE_SYSTEM_URI, "1234-5")); // not "is a" LP14419-3 } @Test - public void testGetValueset() { + public void testLoadReference() { BroadcastableValueSets valueSets = BroadcastableValueSets.newBuilder() - .addDescendantsOf("bp", + .addReference("married", + "urn:cerner:bunsen:valueset:married_maritalstatus", + "0.0.1") + .build(spark, ValueSets.getDefault(spark), Hierarchies.getEmpty(spark)); + + Assert.assertTrue(valueSets.hasCode("married", + "http://hl7.org/fhir/v3/MaritalStatus", + "M")); + + Assert.assertFalse(valueSets.hasCode("married", + "http://hl7.org/fhir/v3/MaritalStatus", + "U")); + } + + @Test + public void testLoadLatestReference() { + + BroadcastableValueSets valueSets = BroadcastableValueSets.newBuilder() + .addReference("married", + "urn:cerner:bunsen:valueset:married_maritalstatus") + .build(spark, ValueSets.getDefault(spark), Hierarchies.getEmpty(spark)); + + Assert.assertTrue(valueSets.hasCode("married", + "http://hl7.org/fhir/v3/MaritalStatus", + "M")); + + Assert.assertFalse(valueSets.hasCode("married", + "http://hl7.org/fhir/v3/MaritalStatus", + "U")); + } + + @Test + public void testGetValueSet() { + + BroadcastableValueSets valueSets = BroadcastableValueSets.newBuilder() + .addCode("bp", Loinc.LOINC_CODE_SYSTEM_URI, - "8462-4", - Loinc.LOINC_HIERARCHY_MAPPING_URI) + "8462-4") .addDescendantsOf("leukocytes", Loinc.LOINC_CODE_SYSTEM_URI, "LP14419-3", - Loinc.LOINC_HIERARCHY_MAPPING_URI) - .build(spark, ConceptMaps.getDefault(spark)); + Loinc.LOINC_HIERARCHY_URI) + .addReference("married", + "urn:cerner:bunsen:valueset:married_maritalstatus") + .build(spark, ValueSets.getDefault(spark), Hierarchies.getDefault(spark)); - Assert.assertTrue(ImmutableSet.of("bp", "leukocytes") + Assert.assertTrue(ImmutableSet.of("bp", "leukocytes", "married") .containsAll(valueSets.getReferenceNames())); Map> leukocyteValues = valueSets.getValues("leukocytes"); + Map> genderValues = valueSets.getValues("married"); Assert.assertTrue(leukocyteValues.containsKey("http://loinc.org")); - Assert.assertTrue(ImmutableSet.of("LP14419-3", "5821-4") .containsAll(leukocyteValues.get("http://loinc.org"))); + + Assert.assertTrue(genderValues.containsKey("http://hl7.org/fhir/v3/MaritalStatus")); + Assert.assertTrue(ImmutableSet.of("M") + .containsAll(genderValues.get("http://hl7.org/fhir/v3/MaritalStatus"))); } } diff --git a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/systems/LoincTest.java b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/systems/LoincTest.java index f3426d59..1af8dec7 100644 --- a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/systems/LoincTest.java +++ b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/systems/LoincTest.java @@ -3,7 +3,7 @@ import static org.apache.spark.sql.functions.col; import static org.apache.spark.sql.functions.lit; -import com.cerner.bunsen.mappings.Mapping; +import com.cerner.bunsen.mappings.Hierarchies.HierarchicalElement; import java.util.List; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SparkSession; @@ -19,24 +19,23 @@ public class LoincTest { private static SparkSession spark; - private static Dataset loincMappings; + private static Dataset loincValues; /** * Sets up Spark. */ @BeforeClass - public static void setUp() { + public static void setUp() throws Exception { spark = SparkSession.builder() .master("local[2]") - .appName("LoincTest") + .appName("SnomedTest") .getOrCreate(); - loincMappings = Loinc.readMultiaxialHierarchyFile(spark, - "src/test/resources/LOINC_HIERARCHY_SAMPLE.CSV", - "2.56"); + loincValues = Loinc.readMultiaxialHierarchyFile(spark, + "src/test/resources/LOINC_HIERARCHY_SAMPLE.CSV"); - loincMappings.cache(); + loincValues.cache(); } /** @@ -51,47 +50,24 @@ public static void tearDown() { @Test public void testHasParent() { - List mappings = loincMappings - .where(col("sourceValue") + List values = loincValues + .where(col("descendantValue") .equalTo(lit("LP14559-6"))) .collectAsList(); - Assert.assertEquals(1, mappings.size()); + Assert.assertEquals(1, values.size()); Assert.assertEquals("LP31755-9", - mappings.get(0).getTargetValue()); - } - - @Test - public void checkConceptMapUri() { - - // All imported rows should have the expected concept map URI. - Assert.assertEquals(loincMappings.count(), - loincMappings - .where(col("conceptMapUri") - .equalTo(lit(Loinc.LOINC_HIERARCHY_MAPPING_URI))) - .count()); - } - - @Test - public void checkVersion() { - - // All imported rows should have the expected concept map version. - Assert.assertEquals(loincMappings.count(), - loincMappings - .where(col("conceptMapVersion") - .equalTo(lit("2.56"))) - .count()); + values.get(0).getAncestorValue()); } @Test public void checkSystems() { - Assert.assertEquals(loincMappings.count(), - loincMappings - .where(col("sourceSystem") - .equalTo(lit(Loinc.LOINC_CODE_SYSTEM_URI))) - .where(col("targetSystem") - .equalTo(lit(Loinc.LOINC_CODE_SYSTEM_URI))) + // All imported rows should have the expected system + Assert.assertEquals(loincValues.count(), + loincValues + .where(col("ancestorSystem").equalTo(lit(Loinc.LOINC_CODE_SYSTEM_URI)) + .and(col("descendantSystem").equalTo(lit(Loinc.LOINC_CODE_SYSTEM_URI)))) .count()); } } diff --git a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/systems/SnomedTest.java b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/systems/SnomedTest.java index f87279e7..798ef681 100644 --- a/bunsen-core/src/test/java/com/cerner/bunsen/mappings/systems/SnomedTest.java +++ b/bunsen-core/src/test/java/com/cerner/bunsen/mappings/systems/SnomedTest.java @@ -3,7 +3,7 @@ import static org.apache.spark.sql.functions.col; import static org.apache.spark.sql.functions.lit; -import com.cerner.bunsen.mappings.Mapping; +import com.cerner.bunsen.mappings.Hierarchies.HierarchicalElement; import java.util.List; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.SparkSession; @@ -19,7 +19,7 @@ public class SnomedTest { private static SparkSession spark; - private static Dataset snomedMappings; + private static Dataset snomedValues; /** * Sets up Spark and loads the SNOMED mappings for testing. @@ -32,11 +32,10 @@ public static void setUp() { .appName("SnomedTest") .getOrCreate(); - snomedMappings = Snomed.readRelationshipFile(spark, - "src/test/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT", - "20160901"); + snomedValues = Snomed.readRelationshipFile(spark, + "src/test/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT"); - snomedMappings.cache(); + snomedValues.cache(); } /** @@ -51,47 +50,24 @@ public static void tearDown() { @Test public void testHasParent() { - List mappings = snomedMappings - .where(col("sourceValue") + List values = snomedValues + .where(col("descendantValue") .equalTo(lit("44054006"))) .collectAsList(); - Assert.assertEquals(1, mappings.size()); + Assert.assertEquals(1, values.size()); Assert.assertEquals("73211009", - mappings.get(0).getTargetValue()); - } - - @Test - public void checkConceptMapUri() { - - // All imported rows should have the expected concept map URI. - Assert.assertEquals(snomedMappings.count(), - snomedMappings - .where(col("conceptMapUri") - .equalTo(lit(Snomed.SNOMED_HIERARCHY_MAPPING_URI))) - .count()); - } - - @Test - public void checkVersion() { - - // All imported rows should have the expected concept map version. - Assert.assertEquals(snomedMappings.count(), - snomedMappings - .where(col("conceptMapVersion") - .equalTo(lit("20160901"))) - .count()); + values.get(0).getAncestorValue()); } @Test public void checkSystems() { - Assert.assertEquals(snomedMappings.count(), - snomedMappings - .where(col("sourceSystem") - .equalTo(lit(Snomed.SNOMED_CODE_SYSTEM_URI))) - .where(col("targetSystem") - .equalTo(lit(Snomed.SNOMED_CODE_SYSTEM_URI))) + // All imported rows should have the expected system + Assert.assertEquals(snomedValues.count(), + snomedValues + .where(col("ancestorSystem").equalTo(lit(Snomed.SNOMED_CODE_SYSTEM_URI)) + .and(col("descendantSystem").equalTo(lit(Snomed.SNOMED_CODE_SYSTEM_URI)))) .count()); } } diff --git a/bunsen-core/src/test/resources/valuesets/married_maritalstatus.xml b/bunsen-core/src/test/resources/valuesets/married_maritalstatus.xml new file mode 100644 index 00000000..d8ccbc2d --- /dev/null +++ b/bunsen-core/src/test/resources/valuesets/married_maritalstatus.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs/source/snomed_and_loinc.rst b/docs/source/snomed_and_loinc.rst index ec0c132b..15e54f14 100644 --- a/docs/source/snomed_and_loinc.rst +++ b/docs/source/snomed_and_loinc.rst @@ -33,30 +33,30 @@ Once the content is downloaded, users can import it with the following commands. :py:func:`~bunsen.mapping.loinc.with_loinc_hierarchy` and :py:func:`~bunsen.snomed.loinc.with_relationships` functions for details. ->>> from bunsen.mapping import get_empty +>>> from bunsen.mapping import get_empty_value_sets >>> from bunsen.mapping.loinc import with_loinc_hierarchy >>> from bunsen.mapping.snomed import with_relationships >>> ->>> # Add SNOMED ot the concept maps ->>> concept_maps = with_relationships( +>>> # Add SNOMED to the value sets +>>> value_sets = with_relationships( >>> spark, ->>> get_empty(spark), +>>> get_empty_value_sets(spark), >>> '/path/to/mappings/snomedct_rf2/20160901/Snapshot/Terminology/sct2_Relationship_Snapshot_US1000124_20160901.txt', >>> '20160901') >>> ->>> # Add LOINC to the concept maps. ->>> concept_maps = with_loinc_hierarchy( +>>> # Add LOINC to the value sets +>>> value_sets = with_loinc_hierarchy( >>> spark, ->>> concept_maps, +>>> value_sets, >>> '/path/to/mappings/loinc_hierarchy/2.56/LOINC_2.56_MULTI-AXIAL_HIERARCHY.CSV', >>> '2.56') >>> >>> # Write the SNOMED and LOINC data to the ontologies database, where it is visible >>> # in Bunsen's valueset functions. ->>> concept_maps.write_to_database('ontologies') +>>> value_sets.write_to_database('ontologies') -FHIR ConceptMap APIs +FHIR ValueSet and ConceptMap APIs -------------------- .. automodule:: bunsen.mapping @@ -69,4 +69,4 @@ FHIR ConceptMap APIs .. automodule:: bunsen.mapping.snomed :members: - :undoc-members: \ No newline at end of file + :undoc-members: diff --git a/python/bunsen/mapping/__init__.py b/python/bunsen/mapping/__init__.py index acb83822..fcdf4521 100644 --- a/python/bunsen/mapping/__init__.py +++ b/python/bunsen/mapping/__init__.py @@ -1,23 +1,51 @@ """ Core library for working with `Concept Maps `_ -in Bunsen. This class See the :py:class:`~bunsen.mapping.ConceptMaps` class for details. +and `Value Sets `_, and hierarchical code systems +in Bunsen. See the :py:class:`~bunsen.mapping.ConceptMaps` class, +:py:class `~bunsen.mapping.ValueSets` class, and :py:class `~bunsen.mapping.Hierarchies` +class for details. """ from pyspark.sql import functions, DataFrame import collections +import datetime -def get_default(spark_session): +def get_default_concept_maps(spark_session): jconcept_maps = spark_session._jvm.com.cerner.bunsen.mappings \ .ConceptMaps.getDefault(spark_session._jsparkSession) return ConceptMaps(spark_session, jconcept_maps) -def get_empty(spark_session): +def get_empty_concept_maps(spark_session): jconcept_maps = spark_session._jvm.com.cerner.bunsen.mappings \ .ConceptMaps.getEmpty(spark_session._jsparkSession) return ConceptMaps(spark_session, jconcept_maps) +def get_default_value_sets(spark_session): + jvalue_sets = spark_session._jvm.com.cerner.bunsen.mappings \ + .ValueSets.getDefault(spark_session._jsparkSession) + + return ValueSets(spark_session, jvalue_sets) + +def get_empty_value_sets(spark_session): + jvalue_sets = spark_session._jvm.com.cerner.bunsen.mappings \ + .ValueSets.getEmpty(spark_session._jsparkSession) + + return ValueSets(spark_session, jvalue_sets) + +def get_default_hierarchies(spark_session): + jhierarchies = spark_session._jvm.com.cerner.bunsen.mappings \ + .Hierarchies.getDefault(spark_session._jsparkSession) + + return Hierarchies(spark_session, jhierarchies) + +def get_empty_hierarchies(spark_session): + jhierarchies = spark_session._jvm.com.cerner.bunsen.mappings \ + .Hierarchies.getEmpty(spark_session._jsparkSession) + + return Hierarchies(spark_session, jhierarchies) + def _add_mappings_to_map(jvm, concept_map, mappings): """ Helper function to add a collection of mappings in the form of a list of @@ -49,11 +77,35 @@ def _add_mappings_to_map(jvm, concept_map, mappings): target.setEquivalence(equivEnum) +def _add_values_to_value_set(jvm, value_set, values): + """ + Helper function to add a collection of values in the form of a list of + [(source, value)] tuples to the given value set. + """ + inclusions = collections.defaultdict(list) + + for (s, v) in values: + inclusions[s].append(v) + + for system, values in inclusions.items(): + inclusion = value_set.getCompose().addInclude() + + inclusion.setSystem(system) + + # FHIR expects a non-empty version, so we use the current datetime for + # ad-hoc value sets + version = datetime.datetime \ + .now() \ + .replace(microsecond=0) \ + .isoformat(sep=' ') + inclusion.setVersion(version) + + for value in values: + inclusion.addConcept().setCode(value) class ConceptMaps(object): """ - An immutable collection of FHIR Concept Maps to be used to - map value sets and for ontologically-based queries. + An immutable collection of FHIR Concept Maps to be used to map value sets. """ def __init__(self, spark_session, jconcept_maps): @@ -61,11 +113,11 @@ def __init__(self, spark_session, jconcept_maps): self._jvm = spark_session._jvm self._jconcept_maps = jconcept_maps - def latest_version(self, uri): + def latest_version(self, url): """ Returns the latest version of a map, or None if there is none." """ - df = get_maps().where(df.uri == functions.lit(uri)) + df = get_maps().where(df.url == functions.lit(url)) results = df.agg({"version": "max"}).collect() return results[0].min if resuls.count() > 0 else None @@ -75,36 +127,22 @@ def get_maps(self): allowing users to explore mapping metadata. The mappings themselves are excluded because they can become quite large, - so users should use the get_mappings method above to explore a table of - them. + so users should use the get_mappings method to explore a table of them. """ return DataFrame(self._jconcept_maps.getMaps(), self._spark_session) - def get_mappings(self, uri=None, version=None): + def get_mappings(self, url=None, version=None): """ - Returns a dataset of all mappings. + Returns a dataset of all mappings which may be filtered by an optional + concept map url and concept map version. """ df = DataFrame(self._jconcept_maps.getMappings(), self._spark_session) - if uri is not None: - df = df.where(df.conceptMapUri == functions.lit(uri)) - - if version is not None: - df = df.where(df.conceptMapVersion == functions.lit(version)) - - return df - - def get_ancestors(self, uri=None, version=None): - """ - Returns a dataset of all ancestors. - """ - df = DataFrame(self._jconcept_maps.getAncestors(), self._spark_session) - - if uri is not None: - df = df.where(df.conceptMapUri == functions.lit(uri)) + if url is not None: + df = df.where(df.url == functions.lit(url)) if version is not None: - df = df.where(df.conceptMapVersion == functions.lit(version)) + df = df.where(df.version == functions.lit(version)) return df @@ -143,10 +181,7 @@ def with_new_map(self, return ConceptMaps(self._spark_session, self._jconcept_maps.withConceptMaps(map_as_list)) - def add_mappings(self, - url, - version, - mappings): + def add_mappings(self, url, version, mappings): """ Returns a new ConceptMaps instance with the given mappings added to an existing map. The mappings parameter must be a list of tuples of the form @@ -167,3 +202,141 @@ def write_to_database(self, database): and conceptmaps table if they don't exist. """ self._jconcept_maps.writeToDatabase(database) + +class ValueSets(object): + """ + An immutable collection of FHIR Value Sets to be used to for + ontologically-based queries. + """ + + def __init__(self, spark_session, jvalue_sets): + self._spark_session = spark_session + self._jvm = spark_session._jvm + self._jvalue_sets = jvalue_sets + + def latest_version(self, url): + """ + Returns the latest version of a value set, or None if there is none. + """ + df = get_value_sets().where(df.url == functions.lit(url)) + results = df.agg({"valueSetVersion": "max"}).collect() + return results[0].min if results.count() > 0 else None + + def get_value_sets(self): + """ + Returns a dataset of FHIR ValueSets without the nested value content, + allowing users to explore value set metadata. + + The values themselves are excluded because they can be become quite + large, so users should use the get_values method to explore them. + """ + return DataFrame(self._jvalue_sets.getValueSets(), self._spark_session) + + def get_values(self, url=None, version=None): + """ + Returns a dataset of all values which may be filtered by an optional + value set url and value set version. + """ + df = DataFrame(self._jvalue_sets.getValues(), self._spark_session) + + if url is not None: + df = df.where(df.valueSetUri == functions.lit(url)) + + if version is not None: + df = df.where(df.valueSetVersion == functions.lit(url)) + + return df + + def get_value_set_as_xml(self, url, version): + """ + Returns an XML string containing the specified value set. + """ + value_set = self._jvalue_sets.getValueSet(url, version) + return self._jvm.com.cerner.bunsen.python.Functions.resourceToXml(value_set) + + def with_new_value_set(self, + url, + version, + experimental=True, + values=[]): + """ + Returns a new ValueSets instance with the given value set added. Callers + may include a list of value tuples in the form of [(system, value)]. + """ + value_set = self._jvm.org.hl7.fhir.dstu3.model.ValueSet() + value_set.setUrl(url) + value_set.setVersion(version) + + if (experimental): + value_set.setExperimental(True) + + _add_values_to_value_set(self._jvm, value_set, values) + + value_set_as_list = self._jvm.java.util.Collections.singletonList(value_set) + + return ValueSets(self._spark_session, + self._jvalue_sets.withValueSets(value_set_as_list)) + + def add_values(self, url, version, values): + """ + Returns a new ValueSets instance with the given values added to an + existing value set. The values parameter must be a list of the form + [(sytem, value)]. + """ + value_set = self._jvalue_sets.getValueSet(url, version) + + _add_values_to_value_set(self._jvm, value_set, values) + + value_set_as_list = self._jvm.java.util.Collections.singletonList(value_set) + + return ValueSets(self._spark_session, + self._jvalue_sets.withValueSets(value_set_as_list)) + + def write_to_database(self, database): + """ + Writes the value set content to the given database, creating a values + and valuesets table if they don't exist. + """ + self._jvalue_sets.writeToDatabase(database) + +class Hierarchies(object): + """ + An immutable collection of values from hierarchical code systems to be used + for ontologically-based queries. + """ + + def __init__(self, spark_session, jhierarchies): + self._spark_session = spark_session + self._jvm = spark_session._jvm + self._jhierarchies = jhierarchies + + def latest_version(self, uri): + """ + Returns the latest version of a hierarchy, or None if there is none. + """ + df = get_ancestors().where(df.uri == functions.lit(uri)) + results = df.agg({"version": "max"}).collect() + return results[0].min if results.count() > 0 else None + + def get_ancestors(self, url=None, version=None): + """ + Returns a dataset of ancestor values representing the transitive + closure of codes in this Hierarchies instance filtered by an optional + hierarchy uri and version.. + """ + df = DataFrame(self._jhierarchies.getAncestors(), self._spark_session) + + if url is not None: + df = df.where(df.uri == functions.lit(uri)) + + if version is not None: + df = df.where(df.version == functions.lit(veresion)) + + return df + + def write_to_database(self, database): + """ + Write the ancestor content to the given database, create an ancestors + table if they don't exist. + """ + self._jhierarchies.writeToDatabase(database) diff --git a/python/bunsen/mapping/loinc.py b/python/bunsen/mapping/loinc.py index 205724a0..c5fab864 100644 --- a/python/bunsen/mapping/loinc.py +++ b/python/bunsen/mapping/loinc.py @@ -2,18 +2,18 @@ Support for importing the LOINC Hierarchy into Bunsen. """ -from bunsen.mapping import ConceptMaps +from bunsen.mapping import Hierarchies -def with_loinc_hierarchy(sparkSession, concept_maps, loinc_hierarchy_path, loinc_version): +def with_loinc_hierarchy(sparkSession, hierarchies, loinc_hierarchy_path, loinc_version): """ - Returns a concept maps instance that includes the LOINC hierarchy read + Returns a hierarchies instance that includes the LOINC hierarchy read from the given location. """ loinc = sparkSession._jvm.com.cerner.bunsen.mappings.systems.Loinc - jconcept_maps = loinc.withLoincHierarchy(sparkSession._jsparkSession, - concept_maps._jconcept_maps, - loinc_hierarchy_path, - loinc_version) + jhierarchies = loinc.withLoincHierarchy(sparkSession._jsparkSession, + hierarchies._jhierarchies, + loinc_hierarchy_path, + loinc_version) - return ConceptMaps(sparkSession, jconcept_maps) \ No newline at end of file + return Hierarchies(sparkSession, jhierarchies) diff --git a/python/bunsen/mapping/snomed.py b/python/bunsen/mapping/snomed.py index 40dcff6b..4c9c736b 100644 --- a/python/bunsen/mapping/snomed.py +++ b/python/bunsen/mapping/snomed.py @@ -2,18 +2,18 @@ Support for importing SNOMED relationship files into Bunsen. """ -from bunsen.mapping import ConceptMaps +from bunsen.mapping import Hierarchies -def with_relationships(sparkSession, concept_maps, snomed_relationship_path, snomed_version): +def with_relationships(sparkSession, hierarchies, snomed_relationship_path, snomed_version): """ - Returns a concept maps instance that includes the SNOMED relationships read + Returns a hierarchies instance that includes the SNOMED relationships read from the given location. """ snomed = sparkSession._jvm.com.cerner.bunsen.mappings.systems.Snomed - jconcept_maps = snomed.withRelationships(sparkSession._jsparkSession, - concept_maps._jconcept_maps, - snomed_relationship_path, - snomed_version) + jhierarchies = snomed.withRelationships(sparkSession._jsparkSession, + hierarchies._jhierarchies, + snomed_relationship_path, + snomed_version) - return ConceptMaps(sparkSession, jconcept_maps) + return Hierarchies(sparkSession, jhierarchies) diff --git a/python/bunsen/valuesets.py b/python/bunsen/valuesets.py index 562bbddd..ff10a6d7 100644 --- a/python/bunsen/valuesets.py +++ b/python/bunsen/valuesets.py @@ -5,34 +5,37 @@ from collections import namedtuple -from bunsen.mapping import get_default +from bunsen.mapping import get_default_value_sets, get_default_hierarchies -# Placeholder record to load descendents of a given ancestor -AncestorPlaceholder = namedtuple("AncestorPlaceholder", - "codeSystem codeValue conceptMapUri conceptMapVersion") +# Placeholder record to load a particular value set +ValueSetPlaceholder = namedtuple("ValueSetPlaceholder", + "valueSetUri valueSetVersion") + +# Placeholder record to load a particular hierarchical system +HierarchyPlaceholder = namedtuple("HierarchyPlaceholder", + "codeSystem codeValue hierarchyUri hierarchyVersion") def isa_loinc(code_value, loinc_version=None): """ - Returns a valueset placeholder that will load all values that are descendents + Returns a hierarchy placeholder that will load all values that are descendents of a given LOINC code. """ - return AncestorPlaceholder('http://loinc.org', - code_value, - 'uri:cerner:foresight:mapping:loinc-hierarchy', - loinc_version) + return HierarchyPlaceholder('http://loinc.org', + code_value, + 'urn:com:cerner:bunsen:hierarchy:loinc', + loinc_version) def isa_snomed(code_value, snomed_version=None): """ - Returns a valueset placeholder that will load all values that are descendents + Returns a hierarchy placeholder that will load all values that are descendents of a given SNOMED code. """ - return AncestorPlaceholder('http://snomed.info/sct', - code_value, - 'uri:cerner:foresight:mapping:snomed-hierarchy', - snomed_version) - + return HierarchyPlaceholder('http://snomed.info/sct', + code_value, + 'urn:com:cerner:bunsen:hierarchy:snomed', + snomed_version) -def push_valuesets(spark_session, valueset_map, concept_maps=None): +def push_valuesets(spark_session, valueset_map, value_sets=None, hierarchies=None): """ Pushes valuesets onto a stack and registers an in_valueset user-defined function that uses this content. @@ -40,12 +43,17 @@ def push_valuesets(spark_session, valueset_map, concept_maps=None): The valueset_map takes the form of {referenceName: [(codeset, codevalue), (codeset, codevalue)]} to specify which codesets/values are used for the given valueset reference name. - Rather than explicitly passing a list of (codeset, codevalue) tuples, users may instead provide - an AncestorPlaceholder that instructs the the system to load all descendents of a given - code value. See the isa_loinc and isa_snomed functions above for details. + Rather than explicitly passing a list of (codeset, codevalue) tuples, users may instead + load particular value sets or particular hierarchies by providing a ValueSetPlaceholder + or HierarchyPlaceholder that instructs the system to load codes belonging to a particular + value set or hierarchical system, respectively. See the isa_loinc and isa_snomed functions + above for details. """ - if concept_maps is None: - concept_maps = get_default(spark_session) + if value_sets is None: + value_sets = get_default_value_sets(spark_session) + + if hierarchies is None: + hierarchies = get_default_hierarchies(spark_session) jvm = spark_session._jvm @@ -53,12 +61,26 @@ def push_valuesets(spark_session, valueset_map, concept_maps=None): for (name, content) in valueset_map.items(): - if type(content) is AncestorPlaceholder: + print(name) + print(content) + + if type(content) is HierarchyPlaceholder: + + # Add codes belonging to the specified hierarchy + (codeSystem, codeValue, hierarchyUri, hierarchyVersion) = content + + builder.addDescendantsOf(name, + codeSystem, + codeValue, + hierarchyUri, + hierarchyVersion) + + elif type(content) is ValueSetPlaceholder: - # Add descendents of the specified item - (codeSystem, codeValue, conceptMapUri, conceptMapVersion) = content + # Add codes belonging to the specified value set + (valueSetUri, valueSetVersion) = content - builder.addDescendantsOf(name, codeSystem, codeValue, conceptMapUri, conceptMapVersion) + builder.addReference(name, valueSetUri, valueSetVersion) else: @@ -66,7 +88,9 @@ def push_valuesets(spark_session, valueset_map, concept_maps=None): for (codeSystem, codeValue) in content: builder.addCode(name, codeSystem, codeValue) - broadcastable = builder.build(spark_session._jsparkSession, concept_maps._jconcept_maps) + broadcastable = builder.build(spark_session._jsparkSession, + value_sets._jvalue_sets, + hierarchies._jhierarchies) jvm.com.cerner.bunsen.ValueSetUdfs.pushUdf(spark_session._jsparkSession, broadcastable) @@ -97,4 +121,4 @@ def pop_valuesets(spark_session): """ jvm = spark_session._jvm - return jvm.com.cerner.bunsen.ValueSetUdfs.popUdf(spark_session._jsparkSession) \ No newline at end of file + return jvm.com.cerner.bunsen.ValueSetUdfs.popUdf(spark_session._jsparkSession) diff --git a/python/tests/resources/valuesets/married_maritalstatus.xml b/python/tests/resources/valuesets/married_maritalstatus.xml new file mode 100644 index 00000000..d8ccbc2d --- /dev/null +++ b/python/tests/resources/valuesets/married_maritalstatus.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/python/tests/test_bunsen.py b/python/tests/test_bunsen.py index 335f808a..3c761487 100644 --- a/python/tests/test_bunsen.py +++ b/python/tests/test_bunsen.py @@ -8,14 +8,18 @@ from bunsen.mapping.loinc import with_loinc_hierarchy from bunsen.mapping.snomed import with_relationships -from bunsen.mapping import get_empty, get_default +from bunsen.mapping import get_empty_concept_maps, get_default_concept_maps, get_empty_value_sets, get_default_value_sets, get_empty_hierarchies, get_default_hierarchies from bunsen.bundles import load_from_directory, extract_entry, save_as_database, to_bundle +from bunsen.valuesets import push_valuesets, isa_loinc, isa_snomed, get_current_valuesets import xml.etree.ElementTree as ET -EXPECTED_COLUMNS = {'sourceValueSet', 'targetValueSet', 'sourceSystem', - 'sourceValue', 'targetSystem', 'targetValue', 'equivalence', - 'conceptmapuri', 'conceptmapversion'} +EXPECTED_COLUMNS = {'uri', + 'version', + 'descendantSystem', + 'descendantValue', + 'ancestorSystem', + 'ancestorValue'} @fixture(scope="session") def spark_session(request): @@ -44,81 +48,51 @@ def spark_session(request): # Concept Maps Tests - - def test_add_map(spark_session): - concept_maps = get_empty(spark_session) + concept_maps = get_empty_concept_maps(spark_session) snomed_to_loinc = [('http://snomed.info/sct', '75367002', 'http://loinc.org', '55417-0', 'equivalent'), # Blood pressure ('http://snomed.info/sct', '271649006', 'http://loinc.org', '8480-6', 'equivalent'), # Systolic BP ('http://snomed.info/sct', '271650006', 'http://loinc.org', '8462-4', 'equivalent')] # Diastolic BP - updated = concept_maps.with_new_map(url='urn:cerner:test:snomed-to-loinc', + appended = concept_maps.with_new_map(url='urn:cerner:test:snomed-to-loinc', version='0.1', source='urn:cerner:test:valueset', target='http://hl7.org/fhir/ValueSet/observation-code', mappings=snomed_to_loinc) - assert updated.get_maps().count() == 1 - assert updated.get_mappings().where(col('conceptmapuri') == 'urn:cerner:test:snomed-to-loinc').count() == 3 - -def test_add_to_existing(spark_session): - - concept_maps = get_empty(spark_session) - - # Create an existing map - with_existing = concept_maps.with_new_map(url='urn:cerner:test:snomed-to-loinc', - version='0.1', - source='urn:cerner:test:valueset', - target='http://hl7.org/fhir/ValueSet/observation-code', - mappings=[('http://snomed.info/sct', '75367002', 'http://loinc.org', '55417-0', 'equivalent')]) - - - updates = [('http://snomed.info/sct', '271649006', 'http://loinc.org', '8480-6', 'equivalent'), # Systolic BP - ('http://snomed.info/sct', '271650006', 'http://loinc.org', '8462-4', 'equivalent')] # Diastolic BP - - updated = with_existing.add_mappings(url='urn:cerner:test:snomed-to-loinc', - version='0.1', - mappings=updates) - - # Original concept map should be unchanged. - assert with_existing.get_maps().count() == 1 - assert with_existing.get_mappings().where(col('conceptmapuri') == 'urn:cerner:test:snomed-to-loinc').count() == 1 - - # Updated concept map should have the new mappings. - assert updated.get_maps().count() == 1 - assert updated.get_mappings().where(col('conceptmapuri') == 'urn:cerner:test:snomed-to-loinc').count() == 3 - assert updated.get_ancestors().count() == 0 + assert appended.get_maps().count() == 1 + assert appended.get_mappings().where(col('conceptmapuri') == 'urn:cerner:test:snomed-to-loinc').count() == 3 def test_get_map_as_xml(spark_session): - concept_maps = get_empty(spark_session) + concept_maps = get_empty_concept_maps(spark_session) snomed_to_loinc = [('http://snomed.info/sct', '75367002', 'http://loinc.org', '55417-0', 'equivalent'), # Blood pressure ('http://snomed.info/sct', '271649006', 'http://loinc.org', '8480-6', 'equivalent'), # Systolic BP ('http://snomed.info/sct', '271650006', 'http://loinc.org', '8462-4', 'equivalent')] # Diastolic BP - updated = concept_maps.with_new_map(url='urn:cerner:test:snomed-to-loinc', + appended = concept_maps.with_new_map(url='urn:cerner:test:snomed-to-loinc', version='0.1', source='urn:cerner:test:valueset', target='http://hl7.org/fhir/ValueSet/observation-code', mappings=snomed_to_loinc) - xml_str = updated.get_map_as_xml('urn:cerner:test:snomed-to-loinc', '0.1') + xml_str = appended.get_map_as_xml('urn:cerner:test:snomed-to-loinc', '0.1') root = ET.fromstring(xml_str) assert root.tag == '{http://hl7.org/fhir}ConceptMap' def test_write_maps(spark_session): - concept_maps = get_empty(spark_session) + concept_maps = get_empty_concept_maps(spark_session) snomed_to_loinc = [('http://snomed.info/sct', '75367002', 'http://loinc.org', '55417-0', 'equivalent'), # Blood pressure ('http://snomed.info/sct', '271649006', 'http://loinc.org', '8480-6', 'equivalent'), # Systolic BP ('http://snomed.info/sct', '271650006', 'http://loinc.org', '8462-4', 'equivalent')] # Diastolic BP - updated = concept_maps.with_new_map(url='urn:cerner:test:snomed-to-loinc', + appended = concept_maps.with_new_map(url='urn:cerner:test:snomed-to-loinc', version='0.1', source='urn:cerner:test:valueset', target='http://hl7.org/fhir/ValueSet/observation-code', @@ -129,45 +103,75 @@ def test_write_maps(spark_session): spark_session.sql('drop table if exists ontologies.ancestors') spark_session.sql('drop table if exists ontologies.conceptmaps') - updated.write_to_database('ontologies') + appended.write_to_database('ontologies') # Check that the maps were written by reloading and inspecting them. - reloaded = get_default(spark_session) + reloaded = get_default_concept_maps(spark_session) assert reloaded.get_maps().count() == 1 assert reloaded.get_mappings().where(col('conceptmapuri') == 'urn:cerner:test:snomed-to-loinc').count() == 3 +# Value Sets Tests +def test_add_valueset(spark_session): + + value_sets = get_empty_value_sets(spark_session) + + values = [('urn:cerner:system1', 'urn:code:a'), + ('urn:cerner:system1', 'urn:code:b'), + ('urn:cerner:system2', 'urn:code:1')] + + appended = value_sets.with_new_value_set(url='urn:cerner:test:valuesets:testvalueset', + version='0.1', + values=values) + + assert appended.get_value_sets().count() == 1 + assert appended.get_values().count() == 3 + +def test_get_value_set_as_xml(spark_session): + + value_sets = get_empty_value_sets(spark_session) + + values = [('urn:cerner:system1', 'urn:code:a'), + ('urn:cerner:system1', 'urn:code:b'), + ('urn:cerner:system2', 'urn:code:1')] + + appended = value_sets.with_new_value_set(url='urn:cerner:test:valuesets:testvalueset', + version='0.1', + values=values) + # this test fails because version is null on line 778 of ValueSets.java + xml_str = appended.get_value_set_as_xml('urn:cerner:test:valuesets:testvalueset', '0.1') + + root = ET.fromstring(xml_str) + assert root.tag == '{http://hl7.org/fhir}ValueSet' + # LOINC Tests def test_read_hierarchy_file(spark_session): - mappings = with_loinc_hierarchy( + ancestors = with_loinc_hierarchy( spark_session, - get_empty(spark_session), + get_empty_hierarchies(spark_session), 'tests/resources/LOINC_HIERARCHY_SAMPLE.CSV', - '2.56').get_mappings() + '2.56').get_ancestors() - assert set(mappings.columns) == EXPECTED_COLUMNS + assert set(ancestors.columns) == EXPECTED_COLUMNS # SNOMED Tests - def test_read_relationship_file(spark_session): - mappings = with_relationships( + ancestors = with_relationships( spark_session, - get_empty(spark_session), + get_empty_hierarchies(spark_session), 'tests/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT', - '20160901').get_mappings() + '20160901').get_ancestors() - assert set(mappings.columns) == EXPECTED_COLUMNS + assert set(ancestors.columns) == EXPECTED_COLUMNS # Bundles Tests @fixture(scope="session") def bundles(spark_session): return load_from_directory(spark_session, 'tests/resources/bundles', 1) - def test_load_from_directory(bundles): assert len(bundles.collect()) == 3 - def test_extract_entry(spark_session, bundles): assert extract_entry(spark_session, bundles, 'Condition').count() == 5 @@ -188,3 +192,58 @@ def test_to_bundle(spark_session, bundles): conditions = extract_entry(spark_session, bundles, 'Condition') assert to_bundle(spark_session, conditions) != None + +# ValueSetsUdfs Tests +def test_isa_loinc(spark_session): + with_loinc = with_loinc_hierarchy( + spark_session, + get_empty_hierarchies(spark_session), + 'tests/resources/LOINC_HIERARCHY_SAMPLE.CSV', + '2.56') + + push_valuesets(spark_session, + {'leukocytes' : isa_loinc('LP14738-6')}, + value_sets=get_empty_value_sets(spark_session), + hierarchies=with_loinc) + + expected = {'leukocytes' : [('http://loinc.org', '5821-4'), + ('http://loinc.org', 'LP14738-6'), + ('http://loinc.org', 'LP14419-3')]} + assert get_current_valuesets(spark_session) == expected + +def test_isa_snomed(spark_session): + with_snomed = with_relationships( + spark_session, + get_empty_hierarchies(spark_session), + 'tests/resources/SNOMED_RELATIONSHIP_SAMPLE.TXT', + '20160901') + + push_valuesets(spark_session, + {'diabetes' : isa_snomed('73211009')}, + value_sets=get_empty_value_sets(spark_session), + hierarchies=with_snomed) + + expected = {'diabetes' : [('http://snomed.info/sct', '73211009'), + ('http://snomed.info/sct', '44054006')]} + + assert get_current_valuesets(spark_session) == expected + +def test_isa_custom(spark_session, bundles): + observations = extract_entry(spark_session, bundles, 'observation') + observations.registerTempTable('observations') + + blood_pressure = {'blood_pressure' : [('http://loinc.org', '8462-4')]} + + value_sets = get_empty_value_sets(spark_session) + hierarchies = get_empty_hierarchies(spark_session) + + push_valuesets(spark_session, blood_pressure, value_sets, hierarchies) + + results = spark_session.sql("SELECT subject.reference, " + + "effectiveDateTime, " + + "valueQuantity.value " + + "FROM observations " + + "WHERE in_valueset(code, 'blood_pressure')") + + assert get_current_valuesets(spark_session) == blood_pressure + assert results.count() == 14