Skip to content

Commit

Permalink
fix: test
Browse files Browse the repository at this point in the history
  • Loading branch information
ClemDoum committed Nov 7, 2024
1 parent 60758ee commit 820b524
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void test_get_queue_names_for_batch_nlp_pipeline() {
@Test
public void test_get_queue_names_for_batch_nlp_pipeline_from_index() {
PipelineHelper pipelineHelper = new PipelineHelper(new PropertiesProvider(new HashMap<>() {{
put("stages", "BATCHENQUEUEIDX,BATCHNLP");
put("stages", "CREATENLPBATCHESFROMINDEX,BATCHNLP");
}}));
assertThat(pipelineHelper.getQueueNameFor(Stage.BATCHNLP)).isEqualTo("extract:queue:batchnlp");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
import org.icij.datashare.asynctasks.CancellableTask;
import org.icij.datashare.asynctasks.Task;
Expand Down Expand Up @@ -59,14 +60,14 @@ public Long call() throws Exception {
return 0L;
}
int batchSize = this.docs.size();
int updateRate = batchSize / 10;
int updateRate = Integer.max(batchSize / 10, 1);
Language language = this.docs.get(0).language();
pipeline.initialize(language);
logger.info("performing NER on {} docs in {}...", batchSize, language);
// TODO: for now None of the Java NER seems to support batch processing, we just iterate docs one by one
// TODO: we could improve perfs by fetching docs and processing them concurrently...
int nProcessed = 0;
this.progress.apply(0.0);
Optional.ofNullable(this.progress).ifPresent(p -> p.apply(0.0));
for (CreateNlpBatchesFromIndex.BatchDocument doc : this.docs) {
String project = doc.project();
Document indexDoc = indexer.get(doc.id(), doc.rootDocument(), EXCLUDED_SOURCES);
Expand All @@ -87,11 +88,12 @@ public Long call() throws Exception {
}
nProcessed += 1;
if (nProcessed % updateRate == 0) {
this.progress.apply((double) nProcessed / (double) batchSize);
Double prog = (double) nProcessed / (double) batchSize;
Optional.ofNullable(this.progress).ifPresent(p -> p.apply(prog));
}
}
pipeline.terminate(language);
this.progress.apply(1.0);
Optional.ofNullable(this.progress).ifPresent(p -> p.apply(1.0));
return (long) batchSize;
}

Expand Down

0 comments on commit 820b524

Please sign in to comment.