Skip to content

Commit

Permalink
ALS-7581: Add support for multi value CSV export
Browse files Browse the repository at this point in the history
  • Loading branch information
ramari16 committed Oct 21, 2024
1 parent 89357dc commit e588d65
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,6 @@ public enum ResultType {
* Exports data as PFB, using avro
* <a href="https://uc-cdis.github.io/pypfb/">https://uc-cdis.github.io/pypfb/</a>
*/
DATAFRAME_PFB
DATAFRAME_PFB,
DATAFRAME_MULTI
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public class PfbProcessor implements HpdsProcessor {
@Autowired
public PfbProcessor(AbstractProcessor abstractProcessor) {
this.abstractProcessor = abstractProcessor;
ID_BATCH_SIZE = Integer.parseInt(System.getProperty("ID_BATCH_SIZE", "0"));
ID_BATCH_SIZE = Integer.parseInt(System.getProperty("ID_BATCH_SIZE", "1000"));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing.io;

import com.google.common.base.Joiner;
import org.springframework.http.MediaType;

import java.io.File;
Expand All @@ -9,6 +10,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;

public class CsvWriter implements ResultWriter {

Expand Down Expand Up @@ -43,13 +45,27 @@ public void writeEntity(Collection<String[]> data) {
try {
csvWriter.write(fileWriter, data);
} catch (IOException e) {
throw new RuntimeException("IOException while appending to CSV file", e);
throw new UncheckedIOException("IOException while appending to CSV file", e);
}
}

@Override
public void writeMultiValueEntity(Collection<List<List<String>>> data) {
throw new RuntimeException("Method not implemented");
List<String[]> collect = data.stream().map(line -> {
return line.stream()
.map(cell -> {
if (cell == null) {
return "";
}
return Joiner.on('\t').join(cell);
})
.toArray(String[]::new);
}).toList();
try {
csvWriter.write(fileWriter, collect);
} catch (IOException e) {
throw new UncheckedIOException("IOException while appending to CSV file", e);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,6 @@ public ResponseEntity<QueryStatus> query(@RequestBody QueryRequest queryJson) {
} catch (IOException e) {
log.error("IOException caught in query processing:", e);
return ResponseEntity.status(500).build();
} catch (ClassNotFoundException e) {
return ResponseEntity.status(500).build();
}
} else {
QueryStatus status = new QueryStatus();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public QueryService (AbstractProcessor abstractProcessor,
smallTaskExecutor = createExecutor(smallTaskExecutionQueue, SMALL_TASK_THREADS);
}

public AsyncResult runQuery(Query query) throws ClassNotFoundException, IOException {
public AsyncResult runQuery(Query query) throws IOException {
// Merging fields from filters into selected fields for user validation of results
mergeFilterFieldsIntoSelectedFields(query);

Expand Down Expand Up @@ -112,7 +112,7 @@ public int runCount(Query query) throws InterruptedException, ExecutionException
return countProcessor.runCounts(query);
}

private AsyncResult initializeResult(Query query) throws ClassNotFoundException, FileNotFoundException, IOException {
private AsyncResult initializeResult(Query query) throws IOException {

HpdsProcessor p;
switch(query.getExpectedResultType()) {
Expand All @@ -129,6 +129,7 @@ private AsyncResult initializeResult(Query query) throws ClassNotFoundException,
p = countProcessor;
break;
case DATAFRAME_PFB:
case DATAFRAME_MULTI:
p = pfbProcessor;
break;
default :
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package edu.harvard.hms.dbmi.avillach.hpds.service;

import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query;
import edu.harvard.hms.dbmi.avillach.hpds.data.query.ResultType;
import edu.harvard.hms.dbmi.avillach.hpds.processing.AsyncResult;
import edu.harvard.hms.dbmi.avillach.hpds.test.util.BuildIntegrationTestEnvironment;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.junit.jupiter.SpringExtension;

import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import static org.junit.jupiter.api.Assertions.*;
@ExtendWith(SpringExtension.class)
@EnableAutoConfiguration
@SpringBootTest(classes = edu.harvard.hms.dbmi.avillach.hpds.service.HpdsApplication.class)
@ActiveProfiles("integration-test")
class QueryServiceTest {

@Autowired
private QueryService queryService;

@BeforeAll
public static void beforeAll() {
BuildIntegrationTestEnvironment instance = BuildIntegrationTestEnvironment.INSTANCE;
}

@Test
public void dataframeMulti() throws IOException, InterruptedException {
Query query = new Query();
List<Query.VariantInfoFilter> variantInfoFilters = new ArrayList<>();
Query.VariantInfoFilter variantInfoFilter = new Query.VariantInfoFilter();
variantInfoFilter.categoryVariantInfoFilters = Map.of("Gene_with_variant", new String[]{"LOC102723996", "LOC101928576"});
variantInfoFilters.add(variantInfoFilter);
query.setVariantInfoFilters(variantInfoFilters);
query.setFields(List.of("\\open_access-1000Genomes\\data\\SYNTHETIC_AGE\\"));
query.setExpectedResultType(ResultType.DATAFRAME_MULTI);

AsyncResult asyncResult = queryService.runQuery(query);

Thread.sleep(1000);

System.out.println(asyncResult.getStatus());
System.out.println(IOUtils.toString(new FileInputStream(asyncResult.getFile()), StandardCharsets.UTF_8));
;
}

}

0 comments on commit e588d65

Please sign in to comment.