diff --git a/data/pom.xml b/data/pom.xml
index 20dee146..85c96dad 100644
--- a/data/pom.xml
+++ b/data/pom.xml
@@ -49,6 +49,17 @@
org.projectlombok
lombok
+
+ software.amazon.awssdk
+ s3
+
+
+
+ commons-logging
+ commons-logging
+
+
+
diff --git a/pom.xml b/pom.xml
index 9754c284..6b0e331e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -28,6 +28,7 @@
UTF-8
1.4.10
+ 2.20.153
@@ -199,7 +200,7 @@
edu.harvard.hms.dbmi.avillach
pic-sure-resource-api
- 2.1.0-SNAPSHOT
+ 2.2.0-SNAPSHOT
org.apache.logging.log4j
@@ -320,7 +321,18 @@
snappy-java
1.1.10.5
-
+
+ software.amazon.awssdk
+ s3
+ ${aws.version}
+
+
+
+ commons-logging
+ commons-logging
+
+
+
diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AsyncResult.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AsyncResult.java
index 7312b6f9..70e9d38a 100644
--- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AsyncResult.java
+++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AsyncResult.java
@@ -43,6 +43,10 @@ public MediaType getResponseType() {
return responseType;
}
+ public File getFile() {
+ return stream.getFile();
+ }
+
public static enum Status{
SUCCESS {
@Override
diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ResultStoreStream.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ResultStoreStream.java
index d0ffc2f7..bc826817 100644
--- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ResultStoreStream.java
+++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/ResultStoreStream.java
@@ -117,4 +117,8 @@ public long estimatedSize() {
public void closeWriter() {
writer.close();
}
+
+ public File getFile() {
+ return writer.getFile();
+ }
}
diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java
index 9f09bbd8..b10fc08c 100644
--- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java
+++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriter.java
@@ -2,7 +2,6 @@
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
-import org.apache.avro.file.Codec;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
@@ -21,10 +20,15 @@
public class PfbWriter implements ResultWriter {
+ public static final String PATIENT_TABLE_PREFIX = "pic-sure-";
private Logger log = LoggerFactory.getLogger(PfbWriter.class);
private final Schema metadataSchema;
private final Schema nodeSchema;
+
+ private final String queryId;
+
+ private final String patientTableName;
private SchemaBuilder.FieldAssembler entityFieldAssembler;
private List fields;
@@ -32,11 +36,14 @@ public class PfbWriter implements ResultWriter {
private File file;
private Schema entitySchema;
private Schema patientDataSchema;
+ private Schema relationSchema;
private static final Set SINGULAR_FIELDS = Set.of("patient_id");
- public PfbWriter(File tempFile) {
- file = tempFile;
+ public PfbWriter(File tempFile, String queryId) {
+ this.file = tempFile;
+ this.queryId = queryId;
+ this.patientTableName = formatFieldName(PATIENT_TABLE_PREFIX + queryId);
entityFieldAssembler = SchemaBuilder.record("entity")
.namespace("edu.harvard.dbmi")
.fields();
@@ -53,12 +60,19 @@ public PfbWriter(File tempFile) {
metadataRecord.requiredString("misc");
metadataRecord = metadataRecord.name("nodes").type(SchemaBuilder.array().items(nodeSchema)).noDefault();
metadataSchema = metadataRecord.endRecord();
+
+
+ SchemaBuilder.FieldAssembler relationRecord = SchemaBuilder.record("Relation")
+ .fields()
+ .requiredString("dst_name")
+ .requiredString("dst_id");
+ relationSchema = relationRecord.endRecord();
}
@Override
public void writeHeader(String[] data) {
fields = Arrays.stream(data.clone()).map(this::formatFieldName).collect(Collectors.toList());
- SchemaBuilder.FieldAssembler patientRecords = SchemaBuilder.record("patientData")
+ SchemaBuilder.FieldAssembler patientRecords = SchemaBuilder.record(patientTableName)
.fields();
fields.forEach(field -> {
@@ -76,6 +90,7 @@ public void writeHeader(String[] data) {
entityFieldAssembler = entityFieldAssembler.name("object").type(objectSchema).noDefault();
entityFieldAssembler.nullableString("id", "null");
entityFieldAssembler.requiredString("name");
+ entityFieldAssembler = entityFieldAssembler.name("relations").type(SchemaBuilder.array().items(relationSchema)).noDefault();
entitySchema = entityFieldAssembler.endRecord();
DatumWriter datumWriter = new GenericDatumWriter(entitySchema);
@@ -126,6 +141,7 @@ private void writeMetadata() {
entityRecord.put("object", metadata);
entityRecord.put("name", "metadata");
entityRecord.put("id", "null");
+ entityRecord.put("relations", List.of());
try {
dataFileWriter.append(entityRecord);
@@ -163,8 +179,9 @@ public void writeMultiValueEntity(Collection>> entities) {
GenericRecord entityRecord = new GenericData.Record(entitySchema);
entityRecord.put("object", patientData);
- entityRecord.put("name", "patientData");
+ entityRecord.put("name", patientTableName);
entityRecord.put("id", patientId);
+ entityRecord.put("relations", List.of());
try {
dataFileWriter.append(entityRecord);
diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java
new file mode 100644
index 00000000..f80ddce3
--- /dev/null
+++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/upload/SignUrlService.java
@@ -0,0 +1,84 @@
+package edu.harvard.hms.dbmi.avillach.hpds.processing.upload;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Component;
+import software.amazon.awssdk.core.sync.RequestBody;
+import software.amazon.awssdk.regions.Region;
+import software.amazon.awssdk.services.s3.S3Client;
+import software.amazon.awssdk.services.s3.model.GetObjectRequest;
+import software.amazon.awssdk.services.s3.model.PutObjectRequest;
+import software.amazon.awssdk.services.s3.presigner.S3Presigner;
+import software.amazon.awssdk.services.s3.presigner.model.GetObjectPresignRequest;
+import software.amazon.awssdk.services.s3.presigner.model.PresignedGetObjectRequest;
+
+import java.io.File;
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.Map;
+
+@Component
+public class SignUrlService {
+
+ private final String bucketName;
+ private final int signedUrlExpiryMinutes;
+ private final Region region;
+
+ private static Logger log = LoggerFactory.getLogger(SignUrlService.class);
+
+ @Autowired
+ public SignUrlService(
+ @Value("${data-export.s3.bucket-name:}") String bucketName,
+ @Value("${data-export.s3.region:us-east-1}") String region,
+ @Value("${data-export.s3.signedUrl-expiry-minutes:60}") int signedUrlExpiryMinutes
+ ) {
+ this.bucketName = bucketName;
+ this.signedUrlExpiryMinutes = signedUrlExpiryMinutes;
+ this.region = Region.of(region);
+ }
+
+ public void uploadFile(File file, String objectKey) {
+ S3Client s3 = S3Client.builder()
+ .region(region)
+ .build();
+
+ putS3Object(s3, bucketName, objectKey, file);
+ s3.close();
+ }
+
+ // This example uses RequestBody.fromFile to avoid loading the whole file into
+ // memory.
+ public void putS3Object(S3Client s3, String bucketName, String objectKey, File file) {
+ Map metadata = new HashMap<>();
+ PutObjectRequest putOb = PutObjectRequest.builder()
+ .bucket(bucketName)
+ .key(objectKey)
+ .metadata(metadata)
+ .build();
+
+ s3.putObject(putOb, RequestBody.fromFile(file));
+ log.info("Successfully placed " + objectKey + " into bucket " + bucketName);
+ }
+
+ public String createPresignedGetUrl(String keyName) {
+ PresignedGetObjectRequest presignedRequest;
+ try (S3Presigner presigner = S3Presigner.builder().region(region).build()) {
+ GetObjectRequest objectRequest = GetObjectRequest.builder()
+ .bucket(bucketName)
+ .key(keyName)
+ .build();
+
+ GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder()
+ .signatureDuration(Duration.ofMinutes(signedUrlExpiryMinutes)) // The URL will expire in 10 minutes.
+ .getObjectRequest(objectRequest)
+ .build();
+
+ presignedRequest = presigner.presignGetObject(presignRequest);
+ }
+ log.info("Presigned URL: [{}]", presignedRequest.url().toString());
+
+ return presignedRequest.url().toExternalForm();
+ }
+}
diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java
index 7e18d1a7..d1819741 100644
--- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java
+++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/io/PfbWriterTest.java
@@ -7,6 +7,7 @@
import java.io.File;
import java.util.ArrayList;
import java.util.List;
+import java.util.UUID;
import static org.junit.jupiter.api.Assertions.*;
@@ -15,7 +16,7 @@ public class PfbWriterTest {
@Test
public void writeValidPFB() {
- PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
+ PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
pfbWriter.writeHeader(new String[] {"patient_id", "\\demographics\\age\\", "\\phs123\\stroke\\"});
List> nullableList = new ArrayList<>();
@@ -38,21 +39,21 @@ public void writeValidPFB() {
@Test
public void formatFieldName_spacesAndBackslashes_replacedWithUnderscore() {
- PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
+ PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
String formattedName = pfbWriter.formatFieldName("\\Topmed Study Accession with Subject ID\\\\");
assertEquals("_Topmed_Study_Accession_with_Subject_ID__", formattedName);
}
@Test
public void formatFieldName_startsWithDigit_prependUnderscore() {
- PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
+ PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
String formattedName = pfbWriter.formatFieldName("123Topmed Study Accession with Subject ID\\\\");
assertEquals("_123Topmed_Study_Accession_with_Subject_ID__", formattedName);
}
@Test
public void formatFieldName_randomGarbage_replaceWithUnderscore() {
- PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"));
+ PfbWriter pfbWriter = new PfbWriter(new File("target/test-result.avro"), UUID.randomUUID().toString());
String formattedName = pfbWriter.formatFieldName("$$$my garbage @vro var!able nam#");
assertEquals("___my_garbage__vro_var_able_nam_", formattedName);
}
diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java
index a32839ad..1d0f8c2e 100644
--- a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java
+++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/PicSureService.java
@@ -1,14 +1,14 @@
package edu.harvard.hms.dbmi.avillach.hpds.service;
-import java.io.ByteArrayInputStream;
+import java.io.File;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.Map.Entry;
import java.util.stream.Collectors;
import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.InfoColumnMeta;
+import edu.harvard.hms.dbmi.avillach.hpds.processing.upload.SignUrlService;
import edu.harvard.hms.dbmi.avillach.hpds.service.util.Paginator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -28,7 +28,6 @@
import edu.harvard.dbmi.avillach.domain.*;
import edu.harvard.dbmi.avillach.util.UUIDv5;
import edu.harvard.hms.dbmi.avillach.hpds.crypto.Crypto;
-import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.FileBackedByteIndexedInfoStore;
import edu.harvard.hms.dbmi.avillach.hpds.data.phenotype.ColumnMeta;
import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query;
import edu.harvard.hms.dbmi.avillach.hpds.processing.*;
@@ -41,13 +40,15 @@ public class PicSureService {
@Autowired
public PicSureService(QueryService queryService, TimelineProcessor timelineProcessor, CountProcessor countProcessor,
- VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor, Paginator paginator) {
+ VariantListProcessor variantListProcessor, AbstractProcessor abstractProcessor, Paginator paginator,
+ SignUrlService signUrlService) {
this.queryService = queryService;
this.timelineProcessor = timelineProcessor;
this.countProcessor = countProcessor;
this.variantListProcessor = variantListProcessor;
this.abstractProcessor = abstractProcessor;
this.paginator = paginator;
+ this.signUrlService = signUrlService;
Crypto.loadDefaultKey();
}
@@ -67,6 +68,8 @@ public PicSureService(QueryService queryService, TimelineProcessor timelineProce
private final Paginator paginator;
+ private final SignUrlService signUrlService;
+
private static final String QUERY_METADATA_FIELD = "queryMetadata";
private static final int RESPONSE_CACHE_SIZE = 50;
@@ -213,19 +216,7 @@ private QueryStatus convertToQueryStatus(AsyncResult entity) {
public ResponseEntity queryResult(@PathVariable("resourceQueryId") UUID queryId, @RequestBody QueryRequest resultRequest) throws IOException {
AsyncResult result = queryService.getResultFor(queryId.toString());
if (result == null) {
- // This happens sometimes when users immediately request the status for a query
- // before it can be initialized. We wait a bit and try again before throwing an
- // error.
- try {
- Thread.sleep(100);
- } catch (InterruptedException e) {
- return ResponseEntity.status(500).build();
- }
-
- result = queryService.getResultFor(queryId.toString());
- if (result == null) {
- return ResponseEntity.status(404).build();
- }
+ return ResponseEntity.status(404).build();
}
if (result.getStatus() == AsyncResult.Status.SUCCESS) {
result.open();
@@ -237,6 +228,25 @@ public ResponseEntity queryResult(@PathVariable("resourceQueryId") UUID queryId,
}
}
+ @PostMapping(value = "/query/{resourceQueryId}/signed-url")
+ public ResponseEntity querySignedURL(@PathVariable("resourceQueryId") UUID queryId, @RequestBody QueryRequest resultRequest) throws IOException {
+ AsyncResult result = queryService.getResultFor(queryId.toString());
+ if (result == null) {
+ return ResponseEntity.status(404).build();
+ }
+ if (result.getStatus() == AsyncResult.Status.SUCCESS) {
+ File file = result.getFile();
+ signUrlService.uploadFile(file, file.getName());
+ String presignedGetUrl = signUrlService.createPresignedGetUrl(file.getName());
+ log.info("Presigned url: " + presignedGetUrl);
+ return ResponseEntity.ok()
+ .contentType(MediaType.APPLICATION_JSON)
+ .body(new SignedUrlResponse(presignedGetUrl));
+ } else {
+ return ResponseEntity.status(400).body("Status : " + result.getStatus().name());
+ }
+ }
+
@PostMapping("/query/{resourceQueryId}/status")
public QueryStatus queryStatus(@PathVariable("resourceQueryId") UUID queryId, @RequestBody QueryRequest request) {
return convertToQueryStatus(queryService.getStatusFor(queryId.toString()));
diff --git a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java
index 5fe1a0a9..a41a94d2 100644
--- a/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java
+++ b/service/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/service/QueryService.java
@@ -135,9 +135,10 @@ private AsyncResult initializeResult(Query query) throws ClassNotFoundException,
throw new RuntimeException("UNSUPPORTED RESULT TYPE");
}
+ String queryId = UUIDv5.UUIDFromString(query.toString()).toString();
ResultWriter writer;
if (ResultType.DATAFRAME_PFB.equals(query.getExpectedResultType())) {
- writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"));
+ writer = new PfbWriter(File.createTempFile("result-" + System.nanoTime(), ".avro"), queryId);
} else {
writer = new CsvWriter(File.createTempFile("result-" + System.nanoTime(), ".sstmp"));
}
@@ -145,7 +146,7 @@ private AsyncResult initializeResult(Query query) throws ClassNotFoundException,
AsyncResult result = new AsyncResult(query, p, writer)
.setStatus(AsyncResult.Status.PENDING)
.setQueuedTime(System.currentTimeMillis())
- .setId(UUIDv5.UUIDFromString(query.toString()).toString());
+ .setId(queryId);
query.setId(result.getId());
results.put(result.getId(), result);
return result;
diff --git a/service/src/main/resources/application-bdc-auth.properties b/service/src/main/resources/application-bdc-auth.properties
index 860f5d64..272b84ec 100644
--- a/service/src/main/resources/application-bdc-auth.properties
+++ b/service/src/main/resources/application-bdc-auth.properties
@@ -3,4 +3,8 @@ SMALL_TASK_THREADS = 1
LARGE_TASK_THREADS = 1
hpds.genomicProcessor.impl=localPatientDistributed
-HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/
\ No newline at end of file
+HPDS_GENOMIC_DATA_DIRECTORY=/opt/local/hpds/all/
+
+data-export.s3.bucket-name=pic-sure-auth-dev-data-export
+data-export.s3.region=us-east-1
+data-export.s3.signedUrl-expiry-minutes=30
\ No newline at end of file