Skip to content

Commit

Permalink
MIRACL qrels and topic for surprise lang dev (castorini#2042)
Browse files Browse the repository at this point in the history
* add dev topics and qrels for miracl surprise language

* set analyzer to whitespace if given yoruba
  • Loading branch information
crystina-z authored and Thong Nguyen committed Mar 3, 2023
1 parent 42ae782 commit ecde5a9
Show file tree
Hide file tree
Showing 8 changed files with 5,146 additions and 4 deletions.
4 changes: 3 additions & 1 deletion src/main/java/io/anserini/eval/Qrels.java
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ public enum Qrels {
MIRACL_V10_SW_DEV("topics-and-qrels/qrels.miracl-v1.0-sw-dev.tsv"),
MIRACL_V10_TE_DEV("topics-and-qrels/qrels.miracl-v1.0-te-dev.tsv"),
MIRACL_V10_TH_DEV("topics-and-qrels/qrels.miracl-v1.0-th-dev.tsv"),
MIRACL_V10_ZH_DEV("topics-and-qrels/qrels.miracl-v1.0-zh-dev.tsv");
MIRACL_V10_ZH_DEV("topics-and-qrels/qrels.miracl-v1.0-zh-dev.tsv"),
MIRACL_V10_DE_DEV("topics-and-qrels/qrels.miracl-v1.0-de-dev.tsv"),
MIRACL_V10_YO_DEV("topics-and-qrels/qrels.miracl-v1.0-yo-dev.tsv");

public final String path;

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/io/anserini/index/IndexCollection.java
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ public Counters run() throws IOException {
config = new IndexWriterConfig(ukrainianAnalyzer);
} else if (args.language.equals("zh") || args.language.equals("ko")) {
config = new IndexWriterConfig(chineseAnalyzer);
} else if (args.language.equals("sw") || args.language.equals("te")) {
} else if (args.language.equals("sw") || args.language.equals("yo")) {
// For Mr.TyDi: sw and te do not have custom Lucene analyzers, so just use whitespace analyzer.
config = new IndexWriterConfig(whitespaceAnalyzer);
} else if (args.pretokenized) {
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/io/anserini/search/topicreader/Topics.java
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,9 @@ public enum Topics {
MIRACL_V10_SW_DEV(TsvIntTopicReader.class, "topics-and-qrels/topics.miracl-v1.0-sw-dev.tsv"),
MIRACL_V10_TE_DEV(TsvIntTopicReader.class, "topics-and-qrels/topics.miracl-v1.0-te-dev.tsv"),
MIRACL_V10_TH_DEV(TsvIntTopicReader.class, "topics-and-qrels/topics.miracl-v1.0-th-dev.tsv"),
MIRACL_V10_ZH_DEV(TsvStringTopicReader.class, "topics-and-qrels/topics.miracl-v1.0-zh-dev.tsv");
MIRACL_V10_ZH_DEV(TsvStringTopicReader.class, "topics-and-qrels/topics.miracl-v1.0-zh-dev.tsv"),
MIRACL_V10_DE_DEV(TsvStringTopicReader.class, "topics-and-qrels/topics.miracl-v1.0-de-dev.tsv"),
MIRACL_V10_YO_DEV(TsvStringTopicReader.class, "topics-and-qrels/topics.miracl-v1.0-yo-dev.tsv");

public final String path;
public final Class<? extends TopicReader> readerClass;
Expand Down
Loading

0 comments on commit ecde5a9

Please sign in to comment.