From 7a2b7f894226095581b4824852d44e814131aded Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Thu, 14 Nov 2024 10:11:32 -0500 Subject: [PATCH 01/15] ALS-7810: Add drs-uri table to pfb output --- .../hpds/processing/io/PfbWriter.java | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index b10fc08c..eb58da8d 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -21,6 +21,7 @@ public class PfbWriter implements ResultWriter { public static final String PATIENT_TABLE_PREFIX = "pic-sure-"; + public static final String DRS_URL_TABLE_PREFIX = "drs-url-"; private Logger log = LoggerFactory.getLogger(PfbWriter.class); private final Schema metadataSchema; @@ -29,6 +30,7 @@ public class PfbWriter implements ResultWriter { private final String queryId; private final String patientTableName; + private final String drsUrlTableName; private SchemaBuilder.FieldAssembler entityFieldAssembler; private List fields; @@ -36,6 +38,7 @@ public class PfbWriter implements ResultWriter { private File file; private Schema entitySchema; private Schema patientDataSchema; + private Schema drsUriSchema; private Schema relationSchema; private static final Set SINGULAR_FIELDS = Set.of("patient_id"); @@ -44,6 +47,7 @@ public PfbWriter(File tempFile, String queryId) { this.file = tempFile; this.queryId = queryId; this.patientTableName = formatFieldName(PATIENT_TABLE_PREFIX + queryId); + this.drsUrlTableName = formatFieldName(DRS_URL_TABLE_PREFIX + queryId); entityFieldAssembler = SchemaBuilder.record("entity") .namespace("edu.harvard.dbmi") .fields(); @@ -72,9 +76,15 @@ public PfbWriter(File tempFile, String queryId) { @Override public void writeHeader(String[] data) { fields = Arrays.stream(data.clone()).map(this::formatFieldName).collect(Collectors.toList()); + + drsUriSchema = SchemaBuilder.record(drsUrlTableName) + .fields() + .requiredString("concept_path") + .name("drs_uri").type(SchemaBuilder.array().items(SchemaBuilder.nullable().stringType())).noDefault() + .endRecord(); + SchemaBuilder.FieldAssembler patientRecords = SchemaBuilder.record(patientTableName) .fields(); - fields.forEach(field -> { if (isSingularField(field)) { patientRecords.nullableString(field, "null"); @@ -85,7 +95,7 @@ public void writeHeader(String[] data) { }); patientDataSchema = patientRecords.endRecord(); - Schema objectSchema = Schema.createUnion(metadataSchema, patientDataSchema); + Schema objectSchema = Schema.createUnion(metadataSchema, patientDataSchema, drsUriSchema); entityFieldAssembler = entityFieldAssembler.name("object").type(objectSchema).noDefault(); entityFieldAssembler.nullableString("id", "null"); @@ -104,6 +114,30 @@ public void writeHeader(String[] data) { } writeMetadata(); + writeDrsUris(); + } + + private void writeDrsUris() { + GenericRecord entityRecord = new GenericData.Record(entitySchema); + + for (String field : fields) { + GenericRecord drsUriData = new GenericData.Record(drsUriSchema); + drsUriData.put("concept_path", field); + // todo: lookup DRS URIs + drsUriData.put("drs_uri", List.of("https://a-drs-uri.com/")); + + entityRecord.put("object", drsUriData); + entityRecord.put("name", drsUrlTableName); + entityRecord.put("relations", List.of()); + + try { + dataFileWriter.append(entityRecord); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + } private boolean isSingularField(String field) { From 34b71d502bd0d5fe08a7bd3a3a783de192e85fbc Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Mon, 18 Nov 2024 14:02:07 -0500 Subject: [PATCH 02/15] Initial commit for dictionary service --- .../hpds/processing/dictionary/Concept.java | 6 ++++ .../dictionary/DictionaryService.java | 30 +++++++++++++++++++ .../hpds/processing/io/PfbWriter.java | 22 ++++++++++++-- .../hpds/processing/io/PfbWriterTest.java | 14 ++++++--- .../avillach/hpds/service/QueryService.java | 7 ++++- .../application-bdc-auth-dev.properties | 4 ++- .../application-bdc-auth-prod.properties | 4 ++- 7 files changed, 77 insertions(+), 10 deletions(-) create mode 100644 processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java create mode 100644 processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java new file mode 100644 index 00000000..59fe903d --- /dev/null +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java @@ -0,0 +1,6 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary; + +import java.util.Map; + +public record Concept(String conceptPath, String name, Map meta) { +} diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java new file mode 100644 index 00000000..585c3deb --- /dev/null +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java @@ -0,0 +1,30 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary; + +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.core.ParameterizedTypeReference; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpMethod; +import org.springframework.stereotype.Component; +import org.springframework.web.client.RestTemplate; + +import java.util.List; + +@Component +@ConditionalOnProperty("dictionary.host") +public class DictionaryService { + + public static final ParameterizedTypeReference> CONCEPT_LIST_TYPE_REFERENCE = new ParameterizedTypeReference<>() { + }; + private final String dictionaryHost; + private final RestTemplate restTemplate; + + public DictionaryService(@Value("${dictionary.host}") String dictionaryHostTemplate, @Value("${TARGET_STACK}") String targetStack) { + this.dictionaryHost = dictionaryHostTemplate.replace("___TARGET_STACK___", targetStack); + this.restTemplate = new RestTemplate(); + } + + public List getConcepts(List conceptPaths) { + return restTemplate.exchange(dictionaryHost, HttpMethod.POST, new HttpEntity<>(conceptPaths), CONCEPT_LIST_TYPE_REFERENCE).getBody(); + } +} diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index eb58da8d..f0f4056a 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -1,5 +1,7 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing.io; +import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.Concept; +import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; import org.apache.avro.file.CodecFactory; @@ -16,6 +18,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.util.*; +import java.util.function.Function; import java.util.stream.Collectors; public class PfbWriter implements ResultWriter { @@ -24,6 +27,8 @@ public class PfbWriter implements ResultWriter { public static final String DRS_URL_TABLE_PREFIX = "drs-url-"; private Logger log = LoggerFactory.getLogger(PfbWriter.class); + private final DictionaryService dictionaryService; + private final Schema metadataSchema; private final Schema nodeSchema; @@ -43,9 +48,10 @@ public class PfbWriter implements ResultWriter { private static final Set SINGULAR_FIELDS = Set.of("patient_id"); - public PfbWriter(File tempFile, String queryId) { + public PfbWriter(File tempFile, String queryId, DictionaryService dictionaryService) { this.file = tempFile; this.queryId = queryId; + this.dictionaryService = dictionaryService; this.patientTableName = formatFieldName(PATIENT_TABLE_PREFIX + queryId); this.drsUrlTableName = formatFieldName(DRS_URL_TABLE_PREFIX + queryId); entityFieldAssembler = SchemaBuilder.record("entity") @@ -118,13 +124,23 @@ public void writeHeader(String[] data) { } private void writeDrsUris() { + Map conceptMap = dictionaryService.getConcepts(fields).stream() + .collect(Collectors.toMap(Concept::conceptPath, Function.identity())); GenericRecord entityRecord = new GenericData.Record(entitySchema); for (String field : fields) { GenericRecord drsUriData = new GenericData.Record(drsUriSchema); drsUriData.put("concept_path", field); - // todo: lookup DRS URIs - drsUriData.put("drs_uri", List.of("https://a-drs-uri.com/")); + + Concept concept = conceptMap.get(field); + List drsUris = List.of(); + if (concept != null) { + Map meta = concept.meta(); + if (meta != null) { + drsUris = meta.values().stream().toList(); + } + } + drsUriData.put("drs_uri", drsUris); entityRecord.put("object", drsUriData); entityRecord.put("name", drsUrlTableName); diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java index d1819741..039f93bb 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java @@ -1,7 +1,9 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing.io; +import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import java.io.File; @@ -12,11 +14,15 @@ import static org.junit.jupiter.api.Assertions.*; +@ExtendWith(MockitoExtension.class) public class PfbWriterTest { + @Mock + private DictionaryService dictionaryService; + @Test public void writeValidPFB() { - PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString()); + PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"}); List> nullableList = new ArrayList<>(); @@ -39,21 +45,21 @@ public void writeValidPFB() { @Test public void formatFieldName_spacesAndBackslashes_replacedWithUnderscore() { - PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString()); + PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); String formattedName = pfbWriter.formatFieldName("\\Topmed Study Accession with Subject ID\\\\"); assertEquals("_Topmed_Study_Accession_with_Subject_ID__", formattedName); } @Test public void formatFieldName_startsWithDigit_prependUnderscore() { - PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString()); + PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); String formattedName = pfbWriter.formatFieldName("123Topmed Study Accession with Subject ID\\\\"); assertEquals("_123Topmed_Study_Accession_with_Subject_ID__", formattedName); } @Test public void formatFieldName_randomGarbage_replaceWithUnderscore() { - PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString()); + PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); String formattedName = pfbWriter.formatFieldName("$$$my garbage @vro var!able nam#"); assertEquals("___my_garbage__vro_var_able_nam_", formattedName); } diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java index a00a8ad0..31952b49 100644 --- a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java +++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java @@ -9,6 +9,7 @@ import java.util.stream.Collectors; import edu.harvard.hms.dbmi.avillach.hpds.data.query.ResultType; +import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService; import edu.harvard.hms.dbmi.avillach.hpds.processing.io.CsvWriter; import edu.harvard.hms.dbmi.avillach.hpds.processing.io.PfbWriter; import edu.harvard.hms.dbmi.avillach.hpds.processing.io.ResultWriter; @@ -48,6 +49,8 @@ public class QueryService { private final CountProcessor countProcessor; private final MultiValueQueryProcessor multiValueQueryProcessor; + private final DictionaryService dictionaryService; + HashMap results = new HashMap<>(); @@ -57,6 +60,7 @@ public QueryService (AbstractProcessor abstractProcessor, TimeseriesProcessor timeseriesProcessor, CountProcessor countProcessor, MultiValueQueryProcessor multiValueQueryProcessor, + @Autowired(required = false) DictionaryService dictionaryService, @Value("${SMALL_JOB_LIMIT}") Integer smallJobLimit, @Value("${SMALL_TASK_THREADS}") Integer smallTaskThreads, @Value("${LARGE_TASK_THREADS}") Integer largeTaskThreads) { @@ -65,6 +69,7 @@ public QueryService (AbstractProcessor abstractProcessor, this.timeseriesProcessor = timeseriesProcessor; this.countProcessor = countProcessor; this.multiValueQueryProcessor = multiValueQueryProcessor; + this.dictionaryService = dictionaryService; SMALL_JOB_LIMIT = smallJobLimit; SMALL_TASK_THREADS = smallTaskThreads; @@ -136,7 +141,7 @@ private AsyncResult initializeResult(Query query) throws IOException { String queryId = UUIDv5.UUIDFromString(query.toString()).toString(); ResultWriter writer; if (ResultType.DATAFRAME_PFB.equals(query.getExpectedResultType())) { - writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"), queryId); + writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"), queryId, dictionaryService); } else { writer = new CsvWriter(File.createTempFile("result-" + System.nanoTime(), ".sstmp")); } diff --git a/service/src/main/resources/application-bdc-auth-dev.properties b/service/src/main/resources/application-bdc-auth-dev.properties index 7b9dce89..cce65412 100644 --- a/service/src/main/resources/application-bdc-auth-dev.properties +++ b/service/src/main/resources/application-bdc-auth-dev.properties @@ -7,4 +7,6 @@ HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/ data-export.s3.bucket-name=pic-sure-auth-dev-data-export data-export.s3.region=us-east-1 -data-export.s3.signedUrl-expiry-minutes=60 \ No newline at end of file +data-export.s3.signedUrl-expiry-minutes=60 + +dictionary.host = https://wildfly.___TARGET_STACK___/ \ No newline at end of file diff --git a/service/src/main/resources/application-bdc-auth-prod.properties b/service/src/main/resources/application-bdc-auth-prod.properties index a63bc6e3..625b0c72 100644 --- a/service/src/main/resources/application-bdc-auth-prod.properties +++ b/service/src/main/resources/application-bdc-auth-prod.properties @@ -7,4 +7,6 @@ HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/ data-export.s3.bucket-name=pic-sure-auth-prod-data-export data-export.s3.region=us-east-1 -data-export.s3.signedUrl-expiry-minutes=60 \ No newline at end of file +data-export.s3.signedUrl-expiry-minutes=60 + +dictionary.host = https://wildfly.___TARGET_STACK___/ \ No newline at end of file From 24519b83e454c23bcf44ca2068a45c41fc9ad9ad Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Mon, 18 Nov 2024 15:14:56 -0500 Subject: [PATCH 03/15] Fix path --- .../avillach/hpds/processing/dictionary/DictionaryService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java index 585c3deb..f5f5bfaf 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java @@ -25,6 +25,6 @@ public DictionaryService(@Value("${dictionary.host}") String dictionaryHostTempl } public List getConcepts(List conceptPaths) { - return restTemplate.exchange(dictionaryHost, HttpMethod.POST, new HttpEntity<>(conceptPaths), CONCEPT_LIST_TYPE_REFERENCE).getBody(); + return restTemplate.exchange(dictionaryHost + "/picsure/proxy/dictionary-api/concepts/detail/", HttpMethod.POST, new HttpEntity<>(conceptPaths), CONCEPT_LIST_TYPE_REFERENCE).getBody(); } } From 4826851c2a3cff226c50b81df95f204bab5fa7f8 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Tue, 19 Nov 2024 08:36:15 -0500 Subject: [PATCH 04/15] ALS-7810: Handle errors better --- .../dbmi/avillach/hpds/processing/io/PfbWriter.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index f0f4056a..ef00a89f 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -124,9 +124,14 @@ public void writeHeader(String[] data) { } private void writeDrsUris() { - Map conceptMap = dictionaryService.getConcepts(fields).stream() - .collect(Collectors.toMap(Concept::conceptPath, Function.identity())); - GenericRecord entityRecord = new GenericData.Record(entitySchema); + GenericRecord entityRecord = new GenericData.Record(entitySchema);; + Map conceptMap = Map.of(); + try { + conceptMap = dictionaryService.getConcepts(fields).stream() + .collect(Collectors.toMap(Concept::conceptPath, Function.identity())); + } catch (RuntimeException e) { + log.error("Error fetching DRS URIs from dictionary service"); + } for (String field : fields) { GenericRecord drsUriData = new GenericData.Record(drsUriSchema); From 87383a46192b31df6e8725af2298319b96e6fb4f Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Tue, 19 Nov 2024 10:47:01 -0500 Subject: [PATCH 05/15] ALS-7810: Update dictionary port --- service/src/main/resources/application-bdc-auth-dev.properties | 2 +- service/src/main/resources/application-bdc-auth-prod.properties | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/service/src/main/resources/application-bdc-auth-dev.properties b/service/src/main/resources/application-bdc-auth-dev.properties index cce65412..81aa5ca3 100644 --- a/service/src/main/resources/application-bdc-auth-dev.properties +++ b/service/src/main/resources/application-bdc-auth-dev.properties @@ -9,4 +9,4 @@ data-export.s3.bucket-name=pic-sure-auth-dev-data-export data-export.s3.region=us-east-1 data-export.s3.signedUrl-expiry-minutes=60 -dictionary.host = https://wildfly.___TARGET_STACK___/ \ No newline at end of file +dictionary.host = http://wildfly.___TARGET_STACK___:8080/ \ No newline at end of file diff --git a/service/src/main/resources/application-bdc-auth-prod.properties b/service/src/main/resources/application-bdc-auth-prod.properties index 625b0c72..a2269d45 100644 --- a/service/src/main/resources/application-bdc-auth-prod.properties +++ b/service/src/main/resources/application-bdc-auth-prod.properties @@ -9,4 +9,4 @@ data-export.s3.bucket-name=pic-sure-auth-prod-data-export data-export.s3.region=us-east-1 data-export.s3.signedUrl-expiry-minutes=60 -dictionary.host = https://wildfly.___TARGET_STACK___/ \ No newline at end of file +dictionary.host = http://wildfly.___TARGET_STACK___:8080/ \ No newline at end of file From 25710e523fb0afe12d62ad0e899ee90e405affb3 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Tue, 19 Nov 2024 13:40:43 -0500 Subject: [PATCH 06/15] Fix issue with formatted field names, missing non nullable field --- .../hpds/processing/io/PfbWriter.java | 33 ++++++++++--------- .../hpds/processing/io/PfbWriterTest.java | 7 +++- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index ef00a89f..cf1e496e 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -38,7 +38,8 @@ public class PfbWriter implements ResultWriter { private final String drsUrlTableName; private SchemaBuilder.FieldAssembler entityFieldAssembler; - private List fields; + private List originalFields; + private List formattedFields; private DataFileWriter dataFileWriter; private File file; private Schema entitySchema; @@ -81,7 +82,8 @@ public PfbWriter(File tempFile, String queryId, DictionaryService dictionaryServ @Override public void writeHeader(String[] data) { - fields = Arrays.stream(data.clone()).map(this::formatFieldName).collect(Collectors.toList()); + originalFields = List.of(data); + formattedFields = originalFields.stream().map(this::formatFieldName).collect(Collectors.toList()); drsUriSchema = SchemaBuilder.record(drsUrlTableName) .fields() @@ -91,7 +93,7 @@ public void writeHeader(String[] data) { SchemaBuilder.FieldAssembler patientRecords = SchemaBuilder.record(patientTableName) .fields(); - fields.forEach(field -> { + formattedFields.forEach(field -> { if (isSingularField(field)) { patientRecords.nullableString(field, "null"); } else { @@ -127,17 +129,17 @@ private void writeDrsUris() { GenericRecord entityRecord = new GenericData.Record(entitySchema);; Map conceptMap = Map.of(); try { - conceptMap = dictionaryService.getConcepts(fields).stream() + conceptMap = dictionaryService.getConcepts(originalFields).stream() .collect(Collectors.toMap(Concept::conceptPath, Function.identity())); } catch (RuntimeException e) { log.error("Error fetching DRS URIs from dictionary service"); } - for (String field : fields) { + for (int i = 0; i < formattedFields.size(); i++) { GenericRecord drsUriData = new GenericData.Record(drsUriSchema); - drsUriData.put("concept_path", field); + drsUriData.put("concept_path", formattedFields.get(i)); - Concept concept = conceptMap.get(field); + Concept concept = conceptMap.get(originalFields.get(i)); List drsUris = List.of(); if (concept != null) { Map meta = concept.meta(); @@ -149,6 +151,7 @@ private void writeDrsUris() { entityRecord.put("object", drsUriData); entityRecord.put("name", drsUrlTableName); + entityRecord.put("id", "null"); entityRecord.put("relations", List.of()); try { @@ -157,8 +160,6 @@ private void writeDrsUris() { throw new UncheckedIOException(e); } } - - } private boolean isSingularField(String field) { @@ -181,7 +182,7 @@ private void writeMetadata() { GenericRecord entityRecord = new GenericData.Record(entitySchema); List nodeList = new ArrayList<>(); - for (String field : fields) { + for (String field : formattedFields) { GenericRecord nodeData = new GenericData.Record(nodeSchema); nodeData.put("name", field); nodeData.put("ontology_reference", ""); @@ -213,21 +214,21 @@ public void writeEntity(Collection entities) { @Override public void writeMultiValueEntity(Collection>> entities) { entities.forEach(entity -> { - if (entity.size() != fields.size()) { + if (entity.size() != formattedFields.size()) { throw new IllegalArgumentException("Entity length much match the number of fields in this document"); } GenericRecord patientData = new GenericData.Record(patientDataSchema); String patientId = ""; - for(int i = 0; i < fields.size(); i++) { - if ("patient_id".equals(fields.get(i))) { + for(int i = 0; i < formattedFields.size(); i++) { + if ("patient_id".equals(formattedFields.get(i))) { patientId = (entity.get(i) != null && !entity.get(i).isEmpty()) ? entity.get(i).get(0) : ""; } - if (isSingularField(fields.get(i))) { + if (isSingularField(formattedFields.get(i))) { String entityValue = (entity.get(i) != null && !entity.get(i).isEmpty()) ? entity.get(i).get(0) : ""; - patientData.put(fields.get(i), entityValue); + patientData.put(formattedFields.get(i), entityValue); } else { List fieldValue = entity.get(i) != null ? entity.get(i) : List.of(); - patientData.put(fields.get(i), fieldValue); + patientData.put(formattedFields.get(i), fieldValue); } } diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java index 039f93bb..aaf37986 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java @@ -1,14 +1,17 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing.io; +import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.Concept; import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; +import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; import java.io.File; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.UUID; import static org.junit.jupiter.api.Assertions.*; @@ -24,6 +27,9 @@ public class PfbWriterTest { public void writeValidPFB() { PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); + Mockito.when(dictionaryService.getConcepts(List.of("patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"))) + .thenReturn(List.of(new Concept("\\demographics\\age\\", "age", Map.of("drs_uri", "a-drs.uri")))); + pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"}); List> nullableList = new ArrayList<>(); nullableList.add(List.of("123")); @@ -40,7 +46,6 @@ public void writeValidPFB() { List.of(List.of(), List.of("75"), List.of()) )); pfbWriter.close(); - // todo: validate this programatically } @Test From da8df759bdd7af5de834930db638f213798c1ad2 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Tue, 19 Nov 2024 15:41:47 -0500 Subject: [PATCH 07/15] ALS-7810: Fix dictionary config --- .../processing/dictionary/DictionaryService.java | 13 ++++++++++--- .../resources/application-development.properties | 3 +-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java index f5f5bfaf..653f1907 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java @@ -1,16 +1,18 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.core.ParameterizedTypeReference; import org.springframework.http.HttpEntity; import org.springframework.http.HttpMethod; import org.springframework.stereotype.Component; +import org.springframework.stereotype.Service; import org.springframework.web.client.RestTemplate; import java.util.List; -@Component +@Service @ConditionalOnProperty("dictionary.host") public class DictionaryService { @@ -19,8 +21,13 @@ public class DictionaryService { private final String dictionaryHost; private final RestTemplate restTemplate; - public DictionaryService(@Value("${dictionary.host}") String dictionaryHostTemplate, @Value("${TARGET_STACK}") String targetStack) { - this.dictionaryHost = dictionaryHostTemplate.replace("___TARGET_STACK___", targetStack); + @Autowired + public DictionaryService(@Value("${dictionary.host}") String dictionaryHostTemplate, @Value("${TARGET_STACK:}") String targetStack) { + if (targetStack != null && !targetStack.isEmpty()) { + this.dictionaryHost = dictionaryHostTemplate.replace("___TARGET_STACK___", targetStack); + } else { + this.dictionaryHost = dictionaryHostTemplate; + } this.restTemplate = new RestTemplate(); } diff --git a/service/src/main/resources/application-development.properties b/service/src/main/resources/application-development.properties index 16c335d2..5768ef1b 100644 --- a/service/src/main/resources/application-development.properties +++ b/service/src/main/resources/application-development.properties @@ -2,5 +2,4 @@ SMALL_JOB_LIMIT = 100 SMALL_TASK_THREADS = 1 LARGE_TASK_THREADS = 1 -hpds.genomicProcessor.impl=localDistributed -HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/ \ No newline at end of file +dictionary.host = http://wildfly.___TARGET_STACK___:8080/ \ No newline at end of file From dc0ebe371c7669f5f17b4f6b7ecd32cc64db270f Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Tue, 19 Nov 2024 16:38:00 -0500 Subject: [PATCH 08/15] Fix dictionary service path --- .../hpds/processing/dictionary/DictionaryService.java | 2 +- .../hms/dbmi/avillach/hpds/processing/io/PfbWriter.java | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java index 653f1907..c6c10bef 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/DictionaryService.java @@ -32,6 +32,6 @@ public DictionaryService(@Value("${dictionary.host}") String dictionaryHostTempl } public List getConcepts(List conceptPaths) { - return restTemplate.exchange(dictionaryHost + "/picsure/proxy/dictionary-api/concepts/detail/", HttpMethod.POST, new HttpEntity<>(conceptPaths), CONCEPT_LIST_TYPE_REFERENCE).getBody(); + return restTemplate.exchange(dictionaryHost + "/pic-sure-api-2/PICSURE/proxy/dictionary-api/concepts/detail", HttpMethod.POST, new HttpEntity<>(conceptPaths), CONCEPT_LIST_TYPE_REFERENCE).getBody(); } } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index cf1e496e..d29b91f7 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -132,7 +132,7 @@ private void writeDrsUris() { conceptMap = dictionaryService.getConcepts(originalFields).stream() .collect(Collectors.toMap(Concept::conceptPath, Function.identity())); } catch (RuntimeException e) { - log.error("Error fetching DRS URIs from dictionary service"); + log.error("Error fetching DRS URIs from dictionary service", e); } for (int i = 0; i < formattedFields.size(); i++) { @@ -144,7 +144,8 @@ private void writeDrsUris() { if (concept != null) { Map meta = concept.meta(); if (meta != null) { - drsUris = meta.values().stream().toList(); + drsUris = new ArrayList<>(meta.keySet().stream().toList()); + drsUris.addAll(meta.values().stream().toList()); } } drsUriData.put("drs_uri", drsUris); From 842fc235f71e2484a96697d5f3fa569118d89444 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Wed, 20 Nov 2024 08:08:45 -0500 Subject: [PATCH 09/15] ALS-7810: Finalize data dictionary pfb output --- .../hpds/processing/dictionary/Concept.java | 2 +- .../hpds/processing/io/PfbWriter.java | 54 +++++++++++++------ .../hpds/processing/io/PfbWriterTest.java | 2 +- 3 files changed, 40 insertions(+), 18 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java index 59fe903d..6fe4f706 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java @@ -2,5 +2,5 @@ import java.util.Map; -public record Concept(String conceptPath, String name, Map meta) { +public record Concept(String type, String conceptPath, String name, String display, String dataset, String description, Map meta) { } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index d29b91f7..4e1c4df9 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -1,5 +1,7 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing.io; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.Concept; import edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary.DictionaryService; import org.apache.avro.Schema; @@ -24,7 +26,7 @@ public class PfbWriter implements ResultWriter { public static final String PATIENT_TABLE_PREFIX = "pic-sure-"; - public static final String DRS_URL_TABLE_PREFIX = "drs-url-"; + public static final String DATA_DICTIONARY_TABLE_PREFIX = "data-dictionary-"; private Logger log = LoggerFactory.getLogger(PfbWriter.class); private final DictionaryService dictionaryService; @@ -35,7 +37,7 @@ public class PfbWriter implements ResultWriter { private final String queryId; private final String patientTableName; - private final String drsUrlTableName; + private final String dataDictionaryTableName; private SchemaBuilder.FieldAssembler entityFieldAssembler; private List originalFields; @@ -44,7 +46,7 @@ public class PfbWriter implements ResultWriter { private File file; private Schema entitySchema; private Schema patientDataSchema; - private Schema drsUriSchema; + private Schema dataDictionarySchema; private Schema relationSchema; private static final Set SINGULAR_FIELDS = Set.of("patient_id"); @@ -54,7 +56,7 @@ public PfbWriter(File tempFile, String queryId, DictionaryService dictionaryServ this.queryId = queryId; this.dictionaryService = dictionaryService; this.patientTableName = formatFieldName(PATIENT_TABLE_PREFIX + queryId); - this.drsUrlTableName = formatFieldName(DRS_URL_TABLE_PREFIX + queryId); + this.dataDictionaryTableName = formatFieldName(DATA_DICTIONARY_TABLE_PREFIX + queryId); entityFieldAssembler = SchemaBuilder.record("entity") .namespace("edu.harvard.dbmi") .fields(); @@ -85,10 +87,14 @@ public void writeHeader(String[] data) { originalFields = List.of(data); formattedFields = originalFields.stream().map(this::formatFieldName).collect(Collectors.toList()); - drsUriSchema = SchemaBuilder.record(drsUrlTableName) + dataDictionarySchema = SchemaBuilder.record(dataDictionaryTableName) .fields() .requiredString("concept_path") .name("drs_uri").type(SchemaBuilder.array().items(SchemaBuilder.nullable().stringType())).noDefault() + .nullableString("type", "null") + .nullableString("display", "null") + .nullableString("dataset", "null") + .nullableString("description", "null") .endRecord(); SchemaBuilder.FieldAssembler patientRecords = SchemaBuilder.record(patientTableName) @@ -103,7 +109,7 @@ public void writeHeader(String[] data) { }); patientDataSchema = patientRecords.endRecord(); - Schema objectSchema = Schema.createUnion(metadataSchema, patientDataSchema, drsUriSchema); + Schema objectSchema = Schema.createUnion(metadataSchema, patientDataSchema, dataDictionarySchema); entityFieldAssembler = entityFieldAssembler.name("object").type(objectSchema).noDefault(); entityFieldAssembler.nullableString("id", "null"); @@ -122,36 +128,52 @@ public void writeHeader(String[] data) { } writeMetadata(); - writeDrsUris(); + writeDataDictionary(); } - private void writeDrsUris() { + private void writeDataDictionary() { GenericRecord entityRecord = new GenericData.Record(entitySchema);; Map conceptMap = Map.of(); try { conceptMap = dictionaryService.getConcepts(originalFields).stream() .collect(Collectors.toMap(Concept::conceptPath, Function.identity())); } catch (RuntimeException e) { - log.error("Error fetching DRS URIs from dictionary service", e); + log.error("Error fetching concepts from dictionary service", e); + return; } for (int i = 0; i < formattedFields.size(); i++) { - GenericRecord drsUriData = new GenericData.Record(drsUriSchema); - drsUriData.put("concept_path", formattedFields.get(i)); + String formattedField = formattedFields.get(i); + if ("patient_id".equals(formattedField)) { + continue; + } + GenericRecord dataDictionaryData = new GenericData.Record(dataDictionarySchema); + dataDictionaryData.put("concept_path", formattedField); Concept concept = conceptMap.get(originalFields.get(i)); List drsUris = List.of(); if (concept != null) { Map meta = concept.meta(); if (meta != null) { - drsUris = new ArrayList<>(meta.keySet().stream().toList()); - drsUris.addAll(meta.values().stream().toList()); + String drsUriJson = meta.get("drs_uri"); + if (drsUriJson != null) { + try { + String[] drsUriArray = new ObjectMapper().readValue(drsUriJson, String[].class); + drsUris = List.of(drsUriArray); + } catch (JsonProcessingException e) { + log.error("Error parsing drs_uri as json: " + drsUriJson); + } + } } + dataDictionaryData.put("type", concept.type()); + dataDictionaryData.put("display", concept.display()); + dataDictionaryData.put("dataset", concept.dataset()); + dataDictionaryData.put("description", concept.description()); } - drsUriData.put("drs_uri", drsUris); + dataDictionaryData.put("drs_uri", drsUris); - entityRecord.put("object", drsUriData); - entityRecord.put("name", drsUrlTableName); + entityRecord.put("object", dataDictionaryData); + entityRecord.put("name", dataDictionaryTableName); entityRecord.put("id", "null"); entityRecord.put("relations", List.of()); diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java index aaf37986..1167a090 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java @@ -28,7 +28,7 @@ public void writeValidPFB() { PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); Mockito.when(dictionaryService.getConcepts(List.of("patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"))) - .thenReturn(List.of(new Concept("\\demographics\\age\\", "age", Map.of("drs_uri", "a-drs.uri")))); + .thenReturn(List.of(new Concept("Categorical", "\\demographics\\age\\", "age", "AGE", "demographics", "patient age", Map.of("drs_uri", "a-drs.uri")))); pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"}); List> nullableList = new ArrayList<>(); From d21f58879e7a60bdb786cb938289e598d6e3de20 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Wed, 20 Nov 2024 08:47:10 -0500 Subject: [PATCH 10/15] ALS-7810: Update PFB table names --- .../hms/dbmi/avillach/hpds/processing/io/PfbWriter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index 4e1c4df9..8a6fdd23 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -25,8 +25,8 @@ public class PfbWriter implements ResultWriter { - public static final String PATIENT_TABLE_PREFIX = "pic-sure-"; - public static final String DATA_DICTIONARY_TABLE_PREFIX = "data-dictionary-"; + public static final String PATIENT_TABLE_PREFIX = "pic-sure-patients-"; + public static final String DATA_DICTIONARY_TABLE_PREFIX = "pic-sure-data-dictionary-"; private Logger log = LoggerFactory.getLogger(PfbWriter.class); private final DictionaryService dictionaryService; From c5c3d77a4802cee6dd3e48f7886f8ceca361d2d6 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Wed, 20 Nov 2024 09:53:15 -0500 Subject: [PATCH 11/15] ALS-7810: Attempt to fix parsing error --- .../hms/dbmi/avillach/hpds/processing/io/PfbWriter.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index 8a6fdd23..655823e0 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -172,9 +172,10 @@ private void writeDataDictionary() { } dataDictionaryData.put("drs_uri", drsUris); + log.info("Writing " + formattedField + " to data dictonary table with drs_uris: " + drsUris); entityRecord.put("object", dataDictionaryData); entityRecord.put("name", dataDictionaryTableName); - entityRecord.put("id", "null"); + entityRecord.put("id", formattedField); entityRecord.put("relations", List.of()); try { From 67b1b776bbaba098b8e2e05eedfff71a63ee2471 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Wed, 20 Nov 2024 10:03:11 -0500 Subject: [PATCH 12/15] ALS-7810: Attempt to fix parsing error --- .../hpds/processing/upload/SignUrlService.java | 10 ++++++---- .../avillach/hpds/processing/io/PfbWriterTest.java | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java index f80ddce3..6c1c66c4 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java @@ -26,6 +26,8 @@ public class SignUrlService { private final int signedUrlExpiryMinutes; private final Region region; + private final S3Client s3; + private static Logger log = LoggerFactory.getLogger(SignUrlService.class); @Autowired @@ -37,13 +39,13 @@ public SignUrlService( this.bucketName = bucketName; this.signedUrlExpiryMinutes = signedUrlExpiryMinutes; this.region = Region.of(region); - } - public void uploadFile(File file, String objectKey) { - S3Client s3 = S3Client.builder() - .region(region) + s3 = S3Client.builder() + .region(this.region) .build(); + } + public void uploadFile(File file, String objectKey) { putS3Object(s3, bucketName, objectKey, file); s3.close(); } diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java index 1167a090..2d150d27 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java @@ -28,7 +28,7 @@ public void writeValidPFB() { PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); Mockito.when(dictionaryService.getConcepts(List.of("patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"))) - .thenReturn(List.of(new Concept("Categorical", "\\demographics\\age\\", "age", "AGE", "demographics", "patient age", Map.of("drs_uri", "a-drs.uri")))); + .thenReturn(List.of(new Concept("Categorical", "\\demographics\\age\\", "age", "AGE", null, "patient age", Map.of("drs_uri", "[\"a-drs.uri\", \"another-drs.uri\"]")))); pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"}); List> nullableList = new ArrayList<>(); From c2203d19a1f9f6a2786e57aadc85f57e17c11548 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Wed, 20 Nov 2024 11:23:36 -0500 Subject: [PATCH 13/15] Add Concept json tests --- .../hpds/processing/dictionary/Concept.java | 3 +++ .../processing/dictionary/ConceptTest.java | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java index 6fe4f706..cef0fcf8 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java @@ -1,6 +1,9 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + import java.util.Map; +@JsonIgnoreProperties(ignoreUnknown = true) public record Concept(String type, String conceptPath, String name, String display, String dataset, String description, Map meta) { } diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java new file mode 100644 index 00000000..733f46fa --- /dev/null +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java @@ -0,0 +1,25 @@ +package edu.harvard.hms.dbmi.avillach.hpds.processing.dictionary; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +public class ConceptTest { + + @Test + public void jsonSerialization() throws JsonProcessingException { + Concept[] concepts = new Concept[]{new Concept("Categorical", "\\demographics\\age\\", "age", "AGE", null, "patient age", Map.of("drs_uri", "[\"a-drs.uri\", \"another-drs.uri\"]"))}; + ObjectMapper objectMapper = new ObjectMapper(); + + String serialized = objectMapper.writeValueAsString(concepts); + + Concept[] deserialized = objectMapper.readValue(serialized, Concept[].class); + + assertEquals(List.of(concepts), List.of(deserialized)); + } +} \ No newline at end of file From 052e7c48426eb85ab62ab9e46aca7119d51fc3c9 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Wed, 20 Nov 2024 11:33:07 -0500 Subject: [PATCH 14/15] Add Concept json tests --- .../hms/dbmi/avillach/hpds/processing/dictionary/Concept.java | 2 +- .../hms/dbmi/avillach/hpds/processing/io/PfbWriter.java | 2 -- .../dbmi/avillach/hpds/processing/dictionary/ConceptTest.java | 3 ++- .../hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java index cef0fcf8..0e9da98c 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/Concept.java @@ -5,5 +5,5 @@ import java.util.Map; @JsonIgnoreProperties(ignoreUnknown = true) -public record Concept(String type, String conceptPath, String name, String display, String dataset, String description, Map meta) { +public record Concept(String conceptPath, String name, String display, String dataset, String description, Map meta) { } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index 655823e0..20868cbb 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -91,7 +91,6 @@ public void writeHeader(String[] data) { .fields() .requiredString("concept_path") .name("drs_uri").type(SchemaBuilder.array().items(SchemaBuilder.nullable().stringType())).noDefault() - .nullableString("type", "null") .nullableString("display", "null") .nullableString("dataset", "null") .nullableString("description", "null") @@ -165,7 +164,6 @@ private void writeDataDictionary() { } } } - dataDictionaryData.put("type", concept.type()); dataDictionaryData.put("display", concept.display()); dataDictionaryData.put("dataset", concept.dataset()); dataDictionaryData.put("description", concept.description()); diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java index 733f46fa..e391e126 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java @@ -13,10 +13,11 @@ public class ConceptTest { @Test public void jsonSerialization() throws JsonProcessingException { - Concept[] concepts = new Concept[]{new Concept("Categorical", "\\demographics\\age\\", "age", "AGE", null, "patient age", Map.of("drs_uri", "[\"a-drs.uri\", \"another-drs.uri\"]"))}; + Concept[] concepts = new Concept[]{new Concept("\\demographics\\age\\", "age", "AGE", null, "patient age", Map.of("drs_uri", "[\"a-drs.uri\", \"another-drs.uri\"]"))}; ObjectMapper objectMapper = new ObjectMapper(); String serialized = objectMapper.writeValueAsString(concepts); + //String serialized = "[{\"type\":\"Categorical\",\"conceptPath\":\"\\\\phs003578\\\\AGE_CAT\\\\\",\"name\":\"AGE_CAT\",\"display\":\"Participant age, 4 categories\",\"dataset\":\"phs003578\",\"description\":\"Participant age, 4 categories\",\"values\":[\"16-29 years\",\"30-49 years\",\"50-64 years\",\"65+ years\"],\"allowFiltering\":true,\"studyAcronym\":\"RESPONSE\",\"children\":null,\"meta\":{\"values\":\"[\\\"16-29 years\\\",\\\"30-49 years\\\",\\\"50-64 years\\\",\\\"65+ years\\\"]\",\"description\":\"Participant age, 4 categories\",\"drs_uri\":\"[\\\"drs://dg.4503:dg.4503%2F0aa2fcab-5a40-40d9-86b5-ba42f3b71ab0\\\"]\",\"stigmatized\":\"false\"},\"table\":null,\"study\":{\"ref\":\"phs003578\",\"fullName\":\"REDS-IV-P Epidemiology, Surveillance and Preparedness of the Novel SARS-CoV-2 Epidemic\",\"abbreviation\":\"RESPONSE\",\"description\":\"Leveraging access to the blood supply and blood donors, the REDS-IV-P program began conducting the RESPONSE study (REDS-IV-P Epidemiology, Surveillance and Preparedness of the Novel SARS-CoV-2 Epidemic) in early 2020 in order to 1) evaluate if SARS-CoV-2 RNA was found in blood donations in the U.S. using an assay that could potentially be used to screen the blood supply if evidence of SARS-CoV-2 transfusion-transmission became apparent 2) conduct serosurveys using optimized assays/algorithms to monitor antibody reactivity in blood donor populations over time, 3) enroll SARS-CoV-2 positive donors and others into a longitudinal cohort study to answer fundamental questions about the evolution of viremia and immune responses, and 4) establish a sharable biorepository that includes specimens collected early on in the infection and potentially large volumes of plasma from infected/convalescent donors.Screening for SARS-CoV-2 RNAemia was completed using a SARS-CoV-2 nucleic acid test (NAT) performed on retained blood donor minipool samples from six geographic regions in the US. The study also included serosurveillance (i.e. testing for antibody directed against the SARS-CoV2 spike protein) of donations from the same six regions to document accruing seroincidence in blood donor populations and to project these rates in the general population. To enrich for donors with acute SARS-CoV-2 infection, another part of the study focused on donors reporting post-donation information (PDI) consistent with COVID-19 by testing plasma from all available PDI donations for SARS-CoV-2 RNA by NAT. Subjects who were diagnosed with COVID-19 based on PDI reports or who tested positive by SARS-CoV-2 NAT on index donation plasma were enrolled into a longitudinal follow-up study which collected multiple samples for up to one-year post-infection. The longitudinal follow-up study also enrolled community members who reported a new positive SARS-CoV-2 NAT test in the prior 7-14 days.The specific aims of the RESPONSE study were to:1. Establish the incidence of SARS-CoV-2 RNAemia in blood donations from the American Red Cross (ARC) regions in Los Angeles, Boston, and Minneapolis metropolitan areas, Bloodworks Northwest (BWNW), New York Blood Center (NYBC), and Vitalant San Francisco Bay Area, monthly between March and September of 20202. Conduct serosurveys to study antibody reactivity in same six areas as above monthly for March to August 20203. Document rates of Post Donation Information (PDI) reports to determine PDI rates relevant to SARS-CoV-2 clinical disease and test index donation plasma from PDI donors for SARS-CoV-2 RNA4. Enroll SARS-CoV-2 infected subjects into a longitudinal cohort study to answer fundamental questions on the evolution of viremia, early immune responses and waning of immunity over 3-12 months of follow-up5. Establish a sharable biorepository of samples from all of the above Aims for future researchThe data from Aims 3 and 4 above is being made available in BioData Catalyst.\",\"meta\":{\"study_link\":\"https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs003578.v1.p1\",\"phase\":\"p1\",\"sponsor\":\"National Heart, Lung, and Blood Institute\",\"study_accession\":\"phs003578.v1.p1\",\"study_design\":\"Prospective Longitudinal Cohort\",\"data_type\":\"P\",\"study_focus\":\"SARS-CoV-2\",\"version\":\"v1\"}},\"type\":\"Categorical\"}]"; Concept[] deserialized = objectMapper.readValue(serialized, Concept[].class); diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java index 2d150d27..36aa67b3 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java @@ -28,7 +28,7 @@ public void writeValidPFB() { PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString(), dictionaryService); Mockito.when(dictionaryService.getConcepts(List.of("patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"))) - .thenReturn(List.of(new Concept("Categorical", "\\demographics\\age\\", "age", "AGE", null, "patient age", Map.of("drs_uri", "[\"a-drs.uri\", \"another-drs.uri\"]")))); + .thenReturn(List.of(new Concept("\\demographics\\age\\", "age", "AGE", null, "patient age", Map.of("drs_uri", "[\"a-drs.uri\", \"another-drs.uri\"]")))); pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"}); List> nullableList = new ArrayList<>(); From 50b8f85376c4c5bf98a351aa8fc870bfd1711d79 Mon Sep 17 00:00:00 2001 From: Ryan Amari Date: Wed, 20 Nov 2024 12:31:39 -0500 Subject: [PATCH 15/15] Changes from code review --- .../hms/dbmi/avillach/hpds/processing/io/PfbWriter.java | 1 - .../avillach/hpds/processing/upload/SignUrlService.java | 9 +++------ .../avillach/hpds/processing/dictionary/ConceptTest.java | 2 -- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java index 20868cbb..55e8e3c7 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java @@ -138,7 +138,6 @@ private void writeDataDictionary() { .collect(Collectors.toMap(Concept::conceptPath, Function.identity())); } catch (RuntimeException e) { log.error("Error fetching concepts from dictionary service", e); - return; } for (int i = 0; i < formattedFields.size(); i++) { diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java index 6c1c66c4..d6d950f3 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java @@ -26,8 +26,6 @@ public class SignUrlService { private final int signedUrlExpiryMinutes; private final Region region; - private final S3Client s3; - private static Logger log = LoggerFactory.getLogger(SignUrlService.class); @Autowired @@ -39,13 +37,12 @@ public SignUrlService( this.bucketName = bucketName; this.signedUrlExpiryMinutes = signedUrlExpiryMinutes; this.region = Region.of(region); - - s3 = S3Client.builder() - .region(this.region) - .build(); } public void uploadFile(File file, String objectKey) { + S3Client s3 = S3Client.builder() + .region(this.region) + .build(); putS3Object(s3, bucketName, objectKey, file); s3.close(); } diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java index e391e126..a9dd4b4e 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/dictionary/ConceptTest.java @@ -17,8 +17,6 @@ public void jsonSerialization() throws JsonProcessingException { ObjectMapper objectMapper = new ObjectMapper(); String serialized = objectMapper.writeValueAsString(concepts); - //String serialized = "[{\"type\":\"Categorical\",\"conceptPath\":\"\\\\phs003578\\\\AGE_CAT\\\\\",\"name\":\"AGE_CAT\",\"display\":\"Participant age, 4 categories\",\"dataset\":\"phs003578\",\"description\":\"Participant age, 4 categories\",\"values\":[\"16-29 years\",\"30-49 years\",\"50-64 years\",\"65+ years\"],\"allowFiltering\":true,\"studyAcronym\":\"RESPONSE\",\"children\":null,\"meta\":{\"values\":\"[\\\"16-29 years\\\",\\\"30-49 years\\\",\\\"50-64 years\\\",\\\"65+ years\\\"]\",\"description\":\"Participant age, 4 categories\",\"drs_uri\":\"[\\\"drs://dg.4503:dg.4503%2F0aa2fcab-5a40-40d9-86b5-ba42f3b71ab0\\\"]\",\"stigmatized\":\"false\"},\"table\":null,\"study\":{\"ref\":\"phs003578\",\"fullName\":\"REDS-IV-P Epidemiology, Surveillance and Preparedness of the Novel SARS-CoV-2 Epidemic\",\"abbreviation\":\"RESPONSE\",\"description\":\"Leveraging access to the blood supply and blood donors, the REDS-IV-P program began conducting the RESPONSE study (REDS-IV-P Epidemiology, Surveillance and Preparedness of the Novel SARS-CoV-2 Epidemic) in early 2020 in order to 1) evaluate if SARS-CoV-2 RNA was found in blood donations in the U.S. using an assay that could potentially be used to screen the blood supply if evidence of SARS-CoV-2 transfusion-transmission became apparent 2) conduct serosurveys using optimized assays/algorithms to monitor antibody reactivity in blood donor populations over time, 3) enroll SARS-CoV-2 positive donors and others into a longitudinal cohort study to answer fundamental questions about the evolution of viremia and immune responses, and 4) establish a sharable biorepository that includes specimens collected early on in the infection and potentially large volumes of plasma from infected/convalescent donors.Screening for SARS-CoV-2 RNAemia was completed using a SARS-CoV-2 nucleic acid test (NAT) performed on retained blood donor minipool samples from six geographic regions in the US. The study also included serosurveillance (i.e. testing for antibody directed against the SARS-CoV2 spike protein) of donations from the same six regions to document accruing seroincidence in blood donor populations and to project these rates in the general population. To enrich for donors with acute SARS-CoV-2 infection, another part of the study focused on donors reporting post-donation information (PDI) consistent with COVID-19 by testing plasma from all available PDI donations for SARS-CoV-2 RNA by NAT. Subjects who were diagnosed with COVID-19 based on PDI reports or who tested positive by SARS-CoV-2 NAT on index donation plasma were enrolled into a longitudinal follow-up study which collected multiple samples for up to one-year post-infection. The longitudinal follow-up study also enrolled community members who reported a new positive SARS-CoV-2 NAT test in the prior 7-14 days.The specific aims of the RESPONSE study were to:1. Establish the incidence of SARS-CoV-2 RNAemia in blood donations from the American Red Cross (ARC) regions in Los Angeles, Boston, and Minneapolis metropolitan areas, Bloodworks Northwest (BWNW), New York Blood Center (NYBC), and Vitalant San Francisco Bay Area, monthly between March and September of 20202. Conduct serosurveys to study antibody reactivity in same six areas as above monthly for March to August 20203. Document rates of Post Donation Information (PDI) reports to determine PDI rates relevant to SARS-CoV-2 clinical disease and test index donation plasma from PDI donors for SARS-CoV-2 RNA4. Enroll SARS-CoV-2 infected subjects into a longitudinal cohort study to answer fundamental questions on the evolution of viremia, early immune responses and waning of immunity over 3-12 months of follow-up5. Establish a sharable biorepository of samples from all of the above Aims for future researchThe data from Aims 3 and 4 above is being made available in BioData Catalyst.\",\"meta\":{\"study_link\":\"https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id=phs003578.v1.p1\",\"phase\":\"p1\",\"sponsor\":\"National Heart, Lung, and Blood Institute\",\"study_accession\":\"phs003578.v1.p1\",\"study_design\":\"Prospective Longitudinal Cohort\",\"data_type\":\"P\",\"study_focus\":\"SARS-CoV-2\",\"version\":\"v1\"}},\"type\":\"Categorical\"}]"; - Concept[] deserialized = objectMapper.readValue(serialized, Concept[].class); assertEquals(List.of(concepts), List.of(deserialized));