Skip to content

Commit

Permalink
Merge branch 'master' into feature-anserini-bm25
Browse files Browse the repository at this point in the history
  • Loading branch information
Linsen-gao-457 authored Dec 25, 2024
2 parents 9aef4fc + 6a9cacf commit 71677ee
Show file tree
Hide file tree
Showing 11 changed files with 530 additions and 421 deletions.
399 changes: 201 additions & 198 deletions docs/experiments-msmarco-passage.md

Large diffs are not rendered by default.

224 changes: 113 additions & 111 deletions docs/start-here.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/main/frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"@emotion/styled": "^11.11.5",
"@mui/material": "^5.10.10",
"framer-motion": "^11.2.11",
"next": "^14.2.10",
"next": "^14.2.15",
"postcss": "^8.4.31",
"react": "18.2.0",
"react-dom": "18.2.0",
Expand Down
130 changes: 65 additions & 65 deletions src/main/frontend/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1137,10 +1137,10 @@
prop-types "^15.8.1"
react-is "^18.2.0"

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/env/-/env-14.2.10.tgz#1d3178340028ced2d679f84140877db4f420333c"
integrity sha512-dZIu93Bf5LUtluBXIv4woQw2cZVZ2DJTjax5/5DOs3lzEOeKLy7GxRSr4caK9/SCPdaW6bCgpye6+n4Dh9oJPw==
"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/env/-/env-14.2.15.tgz#06d984e37e670d93ddd6790af1844aeb935f332f"
integrity sha512-S1qaj25Wru2dUpcIZMjxeMVSwkt8BK4dmWHHiBuRstcIyOsMapqT4A4jSB6onvqeygkSSmOkyny9VVx8JIGamQ==

"@next/[email protected]":
version "12.3.1"
Expand All @@ -1149,50 +1149,50 @@
dependencies:
glob "7.1.7"

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.10.tgz#49d10ca4086fbd59ee68e204f75d7136eda2aa80"
integrity sha512-V3z10NV+cvMAfxQUMhKgfQnPbjw+Ew3cnr64b0lr8MDiBJs3eLnM6RpGC46nhfMZsiXgQngCJKWGTC/yDcgrDQ==

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.10.tgz#0ebeae3afb8eac433882b79543295ab83624a1a8"
integrity sha512-Y0TC+FXbFUQ2MQgimJ/7Ina2mXIKhE7F+GUe1SgnzRmwFY3hX2z8nyVCxE82I2RicspdkZnSWMn4oTjIKz4uzA==

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.10.tgz#7e602916d2fb55a3c532f74bed926a0137c16f20"
integrity sha512-ZfQ7yOy5zyskSj9rFpa0Yd7gkrBnJTkYVSya95hX3zeBG9E55Z6OTNPn1j2BTFWvOVVj65C3T+qsjOyVI9DQpA==

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.10.tgz#6b143f628ccee490b527562e934f8de578d4be47"
integrity sha512-n2i5o3y2jpBfXFRxDREr342BGIQCJbdAUi/K4q6Env3aSx8erM9VuKXHw5KNROK9ejFSPf0LhoSkU/ZiNdacpQ==

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.10.tgz#086f2f16a0678890a1eb46518c4dda381b046082"
integrity sha512-GXvajAWh2woTT0GKEDlkVhFNxhJS/XdDmrVHrPOA83pLzlGPQnixqxD8u3bBB9oATBKB//5e4vpACnx5Vaxdqg==

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.10.tgz#1befef10ed8dbcc5047b5d637a25ae3c30a0bfc3"
integrity sha512-opFFN5B0SnO+HTz4Wq4HaylXGFV+iHrVxd3YvREUX9K+xfc4ePbRrxqOuPOFjtSuiVouwe6uLeDtabjEIbkmDA==

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.10.tgz#731f52c3ae3c56a26cf21d474b11ae1529531209"
integrity sha512-9NUzZuR8WiXTvv+EiU/MXdcQ1XUvFixbLIMNQiVHuzs7ZIFrJDLJDaOF1KaqttoTujpcxljM/RNAOmw1GhPPQQ==

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.10.tgz#32723ef7f04e25be12af357cc72ddfdd42fd1041"
integrity sha512-fr3aEbSd1GeW3YUMBkWAu4hcdjZ6g4NBl1uku4gAn661tcxd1bHs1THWYzdsbTRLcCKLjrDZlNp6j2HTfrw+Bg==

"@next/[email protected].10":
version "14.2.10"
resolved "https://registry.yarnpkg.com/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.10.tgz#ee1d036cb5ec871816f96baee7991035bb242455"
integrity sha512-UjeVoRGKNL2zfbcQ6fscmgjBAS/inHBh63mjIlfPg/NG8Yn2ztqylXt5qilYb6hoHIwaU2ogHknHWWmahJjgZQ==
"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.15.tgz#6386d585f39a1c490c60b72b1f76612ba4434347"
integrity sha512-Rvh7KU9hOUBnZ9TJ28n2Oa7dD9cvDBKua9IKx7cfQQ0GoYUwg9ig31O2oMwH3wm+pE3IkAQ67ZobPfEgurPZIA==

"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.15.tgz#b7baeedc6a28f7545ad2bc55adbab25f7b45cb89"
integrity sha512-5TGyjFcf8ampZP3e+FyCax5zFVHi+Oe7sZyaKOngsqyaNEpOgkKB3sqmymkZfowy3ufGA/tUgDPPxpQx931lHg==

"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.15.tgz#fa13c59d3222f70fb4cb3544ac750db2c6e34d02"
integrity sha512-3Bwv4oc08ONiQ3FiOLKT72Q+ndEMyLNsc/D3qnLMbtUYTQAmkx9E/JRu0DBpHxNddBmNT5hxz1mYBphJ3mfrrw==

"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.15.tgz#30e45b71831d9a6d6d18d7ac7d611a8d646a17f9"
integrity sha512-k5xf/tg1FBv/M4CMd8S+JL3uV9BnnRmoe7F+GWC3DxkTCD9aewFRH1s5rJ1zkzDa+Do4zyN8qD0N8c84Hu96FQ==

"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.15.tgz#5065db17fc86f935ad117483f21f812dc1b39254"
integrity sha512-kE6q38hbrRbKEkkVn62reLXhThLRh6/TvgSP56GkFNhU22TbIrQDEMrO7j0IcQHcew2wfykq8lZyHFabz0oBrA==

"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.15.tgz#3c4a4568d8be7373a820f7576cf33388b5dab47e"
integrity sha512-PZ5YE9ouy/IdO7QVJeIcyLn/Rc4ml9M2G4y3kCM9MNf1YKvFY4heg3pVa/jQbMro+tP6yc4G2o9LjAz1zxD7tQ==

"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.15.tgz#fb812cc4ca0042868e32a6a021da91943bb08b98"
integrity sha512-2raR16703kBvYEQD9HNLyb0/394yfqzmIeyp2nDzcPV4yPjqNUG3ohX6jX00WryXz6s1FXpVhsCo3i+g4RUX+g==

"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.15.tgz#ec26e6169354f8ced240c1427be7fd485c5df898"
integrity sha512-fyTE8cklgkyR1p03kJa5zXEaZ9El+kDNM5A+66+8evQS5e/6v0Gk28LqA0Jet8gKSOyP+OTm/tJHzMlGdQerdQ==

"@next/[email protected].15":
version "14.2.15"
resolved "https://registry.yarnpkg.com/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.15.tgz#18d68697002b282006771f8d92d79ade9efd35c4"
integrity sha512-SzqGbsLsP9OwKNUG9nekShTwhj6JSB9ZLMWQ8g1gG6hdE5gQLncbnbymrwy2yVmH9nikSLYRYxYMFu78Ggp7/g==

"@nodelib/[email protected]":
version "2.1.5"
Expand Down Expand Up @@ -2564,37 +2564,37 @@ ms@^2.1.1:
integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==

nanoid@^3.3.6, nanoid@^3.3.7:
version "3.3.7"
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.7.tgz#d0c301a691bc8d54efa0a2226ccf3fe2fd656bd8"
integrity sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==
version "3.3.8"
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.3.8.tgz#b1be3030bee36aaff18bacb375e5cce521684baf"
integrity sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==

natural-compare@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
integrity sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==

next@^14.2.10:
version "14.2.10"
resolved "https://registry.yarnpkg.com/next/-/next-14.2.10.tgz#331981a4fecb1ae8af1817d4db98fc9687ee1cb6"
integrity sha512-sDDExXnh33cY3RkS9JuFEKaS4HmlWmDKP1VJioucCG6z5KuA008DPsDZOzi8UfqEk3Ii+2NCQSJrfbEWtZZfww==
next@^14.2.15:
version "14.2.15"
resolved "https://registry.yarnpkg.com/next/-/next-14.2.15.tgz#348e5603e22649775d19c785c09a89c9acb5189a"
integrity sha512-h9ctmOokpoDphRvMGnwOJAedT6zKhwqyZML9mDtspgf4Rh3Pn7UTYKqePNoDvhsWBAO5GoPNYshnAUGIazVGmw==
dependencies:
"@next/env" "14.2.10"
"@next/env" "14.2.15"
"@swc/helpers" "0.5.5"
busboy "1.6.0"
caniuse-lite "^1.0.30001579"
graceful-fs "^4.2.11"
postcss "8.4.31"
styled-jsx "5.1.1"
optionalDependencies:
"@next/swc-darwin-arm64" "14.2.10"
"@next/swc-darwin-x64" "14.2.10"
"@next/swc-linux-arm64-gnu" "14.2.10"
"@next/swc-linux-arm64-musl" "14.2.10"
"@next/swc-linux-x64-gnu" "14.2.10"
"@next/swc-linux-x64-musl" "14.2.10"
"@next/swc-win32-arm64-msvc" "14.2.10"
"@next/swc-win32-ia32-msvc" "14.2.10"
"@next/swc-win32-x64-msvc" "14.2.10"
"@next/swc-darwin-arm64" "14.2.15"
"@next/swc-darwin-x64" "14.2.15"
"@next/swc-linux-arm64-gnu" "14.2.15"
"@next/swc-linux-arm64-musl" "14.2.15"
"@next/swc-linux-x64-gnu" "14.2.15"
"@next/swc-linux-x64-musl" "14.2.15"
"@next/swc-win32-arm64-msvc" "14.2.15"
"@next/swc-win32-ia32-msvc" "14.2.15"
"@next/swc-win32-x64-msvc" "14.2.15"

object-assign@^4.1.1:
version "4.1.1"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,20 @@

package io.anserini.collection;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.NoSuchElementException;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.example.GroupReadSupport;

import java.util.ArrayList;

/**
* Collection class for managing Parquet dense vectors
* Extends the DocumentCollection class for handling documents.
Expand Down Expand Up @@ -87,8 +85,8 @@ public FileSegment<ParquetDenseVectorCollection.Document> createFileSegment(Buff
public static class Segment extends FileSegment<ParquetDenseVectorCollection.Document> {
private List<double[]> vectors; // List to store vectors from the Parquet file
private List<String> ids; // List to store document IDs
private List<String> contents; // List to store contents of the documents
private int currentIndex; // Current index for iteration
private ParquetReader<Group> reader;
private boolean readerInitialized;

/**
* Constructor for the Segment class using a file path.
Expand Down Expand Up @@ -126,33 +124,12 @@ private void initializeParquetReader(java.nio.file.Path path) throws IOException
// name
org.apache.hadoop.fs.Path hadoopPath = new org.apache.hadoop.fs.Path(path.toString());

// Create a ParquetReader with GroupReadSupport to read Group objects
ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), hadoopPath).build();
reader = ParquetReader.builder(new GroupReadSupport(), hadoopPath).build();

// Initialize lists to store data read from the Parquet file
vectors = new ArrayList<>();
ids = new ArrayList<>();

Group record;
// Read each record from the Parquet file
while ((record = reader.read()) != null) {
// Extract the docid (String) from the record
String docid = record.getString("docid", 0);
ids.add(docid);

// Extract the vector (double[]) from the record
Group vectorGroup = record.getGroup("vector", 0); // Access the 'vector' field
int vectorSize = vectorGroup.getFieldRepetitionCount(0); // Get the number of elements in the vector
double[] vector = new double[vectorSize];
for (int i = 0; i < vectorSize; i++) {
Group listGroup = vectorGroup.getGroup(0, i); // Access the 'list' group
vector[i] = listGroup.getDouble("element", 0); // Get the double value from the 'element' field
}
vectors.add(vector);
}

reader.close();
currentIndex = 0;
readerInitialized = true;
}

/**
Expand All @@ -164,19 +141,34 @@ private void initializeParquetReader(java.nio.file.Path path) throws IOException
@Override
protected synchronized void readNext() throws IOException, NoSuchElementException {
// Check if we have reached the end of the list
if (currentIndex >= ids.size()) {
if(atEOF || !readerInitialized){
throw new NoSuchElementException("End of file reached");
}
Group record = reader.read();
if (record == null) {
atEOF = true;
reader.close();
readerInitialized = false;
throw new NoSuchElementException("End of file reached");
}

// Get the current document's ID, contents, and vector
String id = ids.get(currentIndex);
double[] vector = vectors.get(currentIndex);

// Read each record from the Parquet file
// Extract the docid (String) from the record
String docid = record.getString("docid", 0);
ids.add(docid);

// Extract the vector (double[]) from the record
Group vectorGroup = record.getGroup("vector", 0); // Access the 'vector' field
int vectorSize = vectorGroup.getFieldRepetitionCount(0); // Get the number of elements in the vector
double[] vector = new double[vectorSize];
for (int i = 0; i < vectorSize; i++) {
Group listGroup = vectorGroup.getGroup(0, i); // Access the 'list' group
vector[i] = listGroup.getDouble("element", 0); // Get the double value from the 'element' field
}
vectors.add(vector);

// Create a new Document object with the retrieved data
bufferedRecord = new ParquetDenseVectorCollection.Document(id, vector, "");

currentIndex++;
bufferedRecord = new ParquetDenseVectorCollection.Document(docid, vector, "");
}
}

Expand Down
8 changes: 8 additions & 0 deletions src/main/java/io/anserini/search/topicreader/Topics.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,20 +80,26 @@ public enum Topics {
TREC2021_DL_UNICOIL_NOEXP(TsvIntTopicReader.class,"topics.dl21.unicoil-noexp.0shot.tsv.gz"),
TREC2021_DL_SPLADE_PP_ED(TsvIntTopicReader.class,"topics.dl21.splade-pp-ed.tsv.gz"),
TREC2021_DL_SPLADE_PP_SD(TsvIntTopicReader.class,"topics.dl21.splade-pp-sd.tsv.gz"),
TREC2021_DL_SNOWFLAKE_ARCTIC_EMBED_L(JsonIntVectorTopicReader.class, "topics.dl21.snowflake-arctic-embed-l.jsonl.gz"),
TREC2022_DL(TsvIntTopicReader.class,"topics.dl22.txt"),
TREC2022_DL_UNICOIL(TsvIntTopicReader.class,"topics.dl22.unicoil.0shot.tsv.gz"),
TREC2022_DL_UNICOIL_NOEXP(TsvIntTopicReader.class,"topics.dl22.unicoil-noexp.0shot.tsv.gz"),
TREC2022_DL_SPLADE_PP_ED(TsvIntTopicReader.class,"topics.dl22.splade-pp-ed.tsv.gz"),
TREC2022_DL_SPLADE_PP_SD(TsvIntTopicReader.class,"topics.dl22.splade-pp-sd.tsv.gz"),
TREC2022_DL_SNOWFLAKE_ARCTIC_EMBED_L(JsonIntVectorTopicReader.class, "topics.dl22.snowflake-arctic-embed-l.jsonl.gz"),
TREC2023_DL(TsvIntTopicReader.class, "topics.dl23.txt"),
TREC2023_DL_UNICOIL(TsvIntTopicReader.class,"topics.dl23.unicoil.0shot.tsv.gz"),
TREC2023_DL_UNICOIL_NOEXP(TsvIntTopicReader.class,"topics.dl23.unicoil-noexp.0shot.tsv.gz"),
TREC2023_DL_SPLADE_PP_ED(TsvIntTopicReader.class,"topics.dl23.splade-pp-ed.tsv.gz"),
TREC2023_DL_SPLADE_PP_SD(TsvIntTopicReader.class,"topics.dl23.splade-pp-sd.tsv.gz"),
TREC2023_DL_SNOWFLAKE_ARCTIC_EMBED_L(JsonIntVectorTopicReader.class, "topics.dl23.snowflake-arctic-embed-l.jsonl.gz"),

TREC2024_RAG_RAGGY_DEV(TsvIntTopicReader.class, "topics.rag24.raggy-dev.txt"),
TREC2024_RAG_RAGGY_DEV_SNOWFLAKE_ARCTIC_EMBED_L(JsonIntVectorTopicReader.class, "topics.rag24.raggy-dev.snowflake-arctic-embed-l.jsonl.gz"),
TREC2024_RAG_RESEARCHY_DEV(TsvIntTopicReader.class, "topics.rag24.researchy-dev.txt"),
TREC2024_RAG_RESEARCHY_DEV_SNOWFLAKE_ARCTIC_EMBED_L(JsonIntVectorTopicReader.class, "topics.rag24.researchy-dev.snowflake-arctic-embed-l.jsonl.gz"),
TREC2024_RAG_TEST(TsvStringTopicReader.class, "topics.rag24.test.txt"),
TREC2024_RAG_TEST_SNOWFLAKE_ARCTIC_EMBED_L(JsonStringVectorTopicReader.class, "topics.rag24.test.snowflake-arctic-embed-l.jsonl.gz"),

// MS MARCO V1 topics
MSMARCO_DOC_DEV(TsvIntTopicReader.class,"topics.msmarco-doc.dev.txt"),
Expand All @@ -120,9 +126,11 @@ public enum Topics {
MSMARCO_V2_DOC_DEV(TsvIntTopicReader.class,"topics.msmarco-v2-doc.dev.txt"),
MSMARCO_V2_DOC_DEV_UNICOIL(TsvIntTopicReader.class,"topics.msmarco-v2-doc.dev.unicoil.0shot.tsv.gz"),
MSMARCO_V2_DOC_DEV_UNICOIL_NOEXP(TsvIntTopicReader.class,"topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.tsv.gz"),
MSMARCO_V2_DOC_DEV_SNOWFLAKE_ARCTIC_EMBED_L(JsonIntVectorTopicReader.class, "topics.msmarco-v2-doc.dev.snowflake-arctic-embed-l.jsonl.gz"),
MSMARCO_V2_DOC_DEV2(TsvIntTopicReader.class,"topics.msmarco-v2-doc.dev2.txt"),
MSMARCO_V2_DOC_DEV2_UNICOIL(TsvIntTopicReader.class,"topics.msmarco-v2-doc.dev2.unicoil.0shot.tsv.gz"),
MSMARCO_V2_DOC_DEV2_UNICOIL_NOEXP(TsvIntTopicReader.class,"topics.msmarco-v2-doc.dev2.unicoil-noexp.0shot.tsv.gz"),
MSMARCO_V2_DOC_DEV2_SNOWFLAKE_ARCTIC_EMBED_L(JsonIntVectorTopicReader.class, "topics.msmarco-v2-doc.dev2.snowflake-arctic-embed-l.jsonl.gz"),
MSMARCO_V2_PASSAGE_DEV(TsvIntTopicReader.class, "topics.msmarco-v2-passage.dev.txt"),
MSMARCO_V2_PASSAGE_DEV_UNICOIL(TsvIntTopicReader.class, "topics.msmarco-v2-passage.dev.unicoil.0shot.tsv.gz"),
MSMARCO_V2_PASSAGE_DEV_UNICOIL_NOEXP(TsvIntTopicReader.class, "topics.msmarco-v2-passage.dev.unicoil-noexp.0shot.tsv.gz"),
Expand Down
21 changes: 21 additions & 0 deletions src/test/java/io/anserini/index/IndexFlatDenseVectorsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,28 @@ public void test1() throws Exception {
assertNotNull(results);
assertEquals(100, results.get("documents"));
}

@Test
public void testParquet() throws Exception {
String indexPath = "target/lucene-test-index.flat." + System.currentTimeMillis();
String[] indexArgs = new String[] {
"-collection", "ParquetDenseVectorCollection",
"-input", "src/test/resources/sample_docs/parquet/msmarco-passage-bge-base-en-v1.5.parquet/",
"-index", indexPath,
"-generator", "ParquetDenseVectorDocumentGenerator",
"-threads", "1"
};

IndexFlatDenseVectors.main(indexArgs);

IndexReader reader = IndexReaderUtils.getReader(indexPath);
assertNotNull(reader);

Map<String, Object> results = IndexReaderUtils.getIndexStats(reader, Constants.VECTOR);
assertNotNull(results);
assertEquals(10, results.get("documents"));
}

@Test
public void testQuantizedInt8() throws Exception {
String indexPath = "target/lucene-test-index.flat." + System.currentTimeMillis();
Expand Down
Loading

0 comments on commit 71677ee

Please sign in to comment.