Skip to content

Commit

Permalink
Adding integ tests for scenario of hybrid query with aggregations (#632)
Browse files Browse the repository at this point in the history
* Adding tests and params to ignore tests if needed

Signed-off-by: Martin Gaievski <[email protected]>
(cherry picked from commit dd3b30c)
Signed-off-by: Martin Gaievski <[email protected]>
  • Loading branch information
martin-gaievski committed Apr 19, 2024
1 parent 5d79f90 commit a169391
Show file tree
Hide file tree
Showing 8 changed files with 2,047 additions and 0 deletions.
71 changes: 71 additions & 0 deletions .github/workflows/test_aggregations.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
name: Run Additional Tests for Neural Search
on:
schedule:
- cron: '0 0 * * *' # every night
push:
branches:
- "*"
- "feature/**"
pull_request:
branches:
- "*"
- "feature/**"

jobs:
Get-CI-Image-Tag:
uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main
with:
product: opensearch

Check-neural-search-linux:
needs: Get-CI-Image-Tag
strategy:
matrix:
java: [11, 17, 21]
os: [ubuntu-latest]

name: Integ Tests Linux
runs-on: ${{ matrix.os }}
container:
# using the same image which is used by opensearch-build team to build the OpenSearch Distribution
# this image tag is subject to change as more dependencies and updates will arrive over time
image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }}
# need to switch to root so that github actions can install runner binary on container without permission issues.
options: --user root


steps:
- name: Checkout neural-search
uses: actions/checkout@v1

- name: Setup Java ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}

- name: Run tests
run: |
chown -R 1000:1000 `pwd`
su `id -un 1000` -c "./gradlew ':integTest' -Dtest_aggs=true --tests \"org.opensearch.neuralsearch.query.aggregation.*IT\""
Check-neural-search-windows:
strategy:
matrix:
java: [11, 17, 21]
os: [windows-latest]

name: Integ Tests Windows
runs-on: ${{ matrix.os }}

steps:
- name: Checkout neural-search
uses: actions/checkout@v1

- name: Setup Java ${{ matrix.java }}
uses: actions/setup-java@v1
with:
java-version: ${{ matrix.java }}

- name: Run tests
run: |
./gradlew ':integTest' -Dtest_aggs=true --tests "org.opensearch.neuralsearch.query.aggregation.*IT"
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Enhancements
### Bug Fixes
### Infrastructure
- Adding integration tests for scenario of hybrid query with aggregations ([#632](https://github.com/opensearch-project/neural-search/pull/632))
### Documentation
### Maintenance
### Refactoring
Expand Down
5 changes: 5 additions & 0 deletions DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@ Additionally, to run integration tests on multi nodes with security enabled, run
./gradlew :integTest -Dsecurity.enabled=true -PnumNodes=3
```

Some integration tests are skipped by default, mainly to save time and resources. A special parameter is required to include those tests in the executed test suite. For example, the following command enables additional tests for aggregations when they are bundled with hybrid queries
```
./gradlew :integTest -PnumNodes=3 -Dtest_aggs=true
```

Integration tests can be run with remote cluster. For that run the following command and replace host/port/cluster name values with ones for the target cluster:

```
Expand Down
6 changes: 6 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,12 @@ task integTest(type: RestIntegTestTask) {
description = "Run tests against a cluster"
testClassesDirs = sourceSets.test.output.classesDirs
classpath = sourceSets.test.runtimeClasspath
boolean runCompleteAggsTestSuite = Boolean.parseBoolean(System.getProperty('test_aggs', "false"))
if (!runCompleteAggsTestSuite) {
filter {
excludeTestsMatching "org.opensearch.neuralsearch.query.aggregation.*IT"
}
}
}
tasks.named("check").configure { dependsOn(integTest) }

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.query.aggregation;

import lombok.SneakyThrows;
import org.junit.BeforeClass;
import org.opensearch.neuralsearch.BaseNeuralSearchIT;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.IntStream;

import static org.opensearch.neuralsearch.TestUtils.RELATION_EQUAL_TO;
import static org.opensearch.neuralsearch.util.AggregationsTestUtils.getNestedHits;
import static org.opensearch.neuralsearch.util.AggregationsTestUtils.getTotalHits;

public class BaseAggregationsWithHybridQueryIT extends BaseNeuralSearchIT {
protected static final String TEST_DOC_TEXT1 = "Hello world";
protected static final String TEST_DOC_TEXT2 = "Hi to this place";
protected static final String TEST_DOC_TEXT3 = "We would like to welcome everyone";
protected static final String TEST_DOC_TEXT4 = "Hello, I'm glad to you see you pal";
protected static final String TEST_DOC_TEXT5 = "People keep telling me orange but I still prefer pink";
protected static final String TEST_DOC_TEXT6 = "She traveled because it cost the same as therapy and was a lot more enjoyable";
protected static final String TEST_TEXT_FIELD_NAME_1 = "test-text-field-1";
protected static final String TEST_QUERY_TEXT3 = "hello";
protected static final String TEST_QUERY_TEXT4 = "cost";
protected static final String TEST_QUERY_TEXT5 = "welcome";
protected static final String NESTED_TYPE_FIELD_USER = "user";
protected static final String NESTED_FIELD_FIRSTNAME = "firstname";
protected static final String NESTED_FIELD_LASTNAME = "lastname";
protected static final String NESTED_FIELD_FIRSTNAME_JOHN = "john";
protected static final String NESTED_FIELD_LASTNAME_BLACK = "black";
protected static final String NESTED_FIELD_FIRSTNAME_FRODO = "frodo";
protected static final String NESTED_FIELD_LASTNAME_BAGGINS = "baggins";
protected static final String NESTED_FIELD_FIRSTNAME_MOHAMMED = "mohammed";
protected static final String NESTED_FIELD_LASTNAME_EZAB = "ezab";
protected static final String NESTED_FIELD_FIRSTNAME_SUN = "sun";
protected static final String NESTED_FIELD_LASTNAME_WUKONG = "wukong";
protected static final String NESTED_FIELD_FIRSTNAME_VASILISA = "vasilisa";
protected static final String NESTED_FIELD_LASTNAME_WISE = "the wise";
protected static final String INTEGER_FIELD_DOCINDEX = "doc_index";
protected static final int INTEGER_FIELD_DOCINDEX_1234 = 1234;
protected static final int INTEGER_FIELD_DOCINDEX_2345 = 2345;
protected static final int INTEGER_FIELD_DOCINDEX_3456 = 3456;
protected static final int INTEGER_FIELD_DOCINDEX_4567 = 4567;
protected static final String KEYWORD_FIELD_DOCKEYWORD = "doc_keyword";
protected static final String KEYWORD_FIELD_DOCKEYWORD_WORKABLE = "workable";
protected static final String KEYWORD_FIELD_DOCKEYWORD_ANGRY = "angry";
protected static final String KEYWORD_FIELD_DOCKEYWORD_LIKABLE = "likeable";
protected static final String KEYWORD_FIELD_DOCKEYWORD_ENTIRE = "entire";
protected static final String DATE_FIELD = "doc_date";
protected static final String DATE_FIELD_01031995 = "01/03/1995";
protected static final String DATE_FIELD_05022015 = "05/02/2015";
protected static final String DATE_FIELD_07232007 = "07/23/2007";
protected static final String DATE_FIELD_08212012 = "08/21/2012";
protected static final String INTEGER_FIELD_PRICE = "doc_price";
protected static final int INTEGER_FIELD_PRICE_130 = 130;
protected static final int INTEGER_FIELD_PRICE_100 = 100;
protected static final int INTEGER_FIELD_PRICE_200 = 200;
protected static final int INTEGER_FIELD_PRICE_25 = 25;
protected static final int INTEGER_FIELD_PRICE_30 = 30;
protected static final int INTEGER_FIELD_PRICE_350 = 350;
protected static final String BUCKET_AGG_DOC_COUNT_FIELD = "doc_count";
protected static final String BUCKETS_AGGREGATION_NAME_1 = "date_buckets_1";
protected static final String BUCKETS_AGGREGATION_NAME_2 = "date_buckets_2";
protected static final String BUCKETS_AGGREGATION_NAME_3 = "date_buckets_3";
protected static final String BUCKETS_AGGREGATION_NAME_4 = "date_buckets_4";
protected static final String KEY = "key";
protected static final String BUCKET_AGG_KEY_AS_STRING = "key_as_string";
protected static final String SUM_AGGREGATION_NAME = "sum_aggs";
protected static final String SUM_AGGREGATION_NAME_2 = "sum_aggs_2";
protected static final String AVG_AGGREGATION_NAME = "avg_field";
protected static final String GENERIC_AGGREGATION_NAME = "my_aggregation";
protected static final String DATE_AGGREGATION_NAME = "date_aggregation";
protected static final String CLUSTER_SETTING_CONCURRENT_SEGMENT_SEARCH = "search.concurrent_segment_search.enabled";

@BeforeClass
@SneakyThrows
public static void setUpCluster() {
// we need new instance because we're calling non-static methods from static method.
// main purpose is to minimize network calls, initialization is only needed once
BaseAggregationsWithHybridQueryIT instance = new BaseAggregationsWithHybridQueryIT();
instance.initClient();
instance.updateClusterSettings();
}

@Override
public boolean isUpdateClusterSettings() {
return false;
}

@Override
protected boolean preserveClusterUponCompletion() {
return true;
}

protected void prepareResources(String indexName, String pipelineName) {
initializeIndexIfNotExist(indexName);
createSearchPipelineWithResultsPostProcessor(pipelineName);
}

@SneakyThrows
protected void initializeIndexIfNotExist(String indexName) {
if (!indexExists(indexName)) {
createIndexWithConfiguration(
indexName,
buildIndexConfiguration(
List.of(),
List.of(NESTED_TYPE_FIELD_USER, NESTED_FIELD_FIRSTNAME, NESTED_FIELD_LASTNAME),
List.of(INTEGER_FIELD_DOCINDEX),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(DATE_FIELD),
3
),
""
);

addKnnDoc(
indexName,
"1",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT1),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_JOHN, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_BLACK)),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_1234, INTEGER_FIELD_PRICE_130),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_WORKABLE),
List.of(DATE_FIELD),
List.of(DATE_FIELD_01031995)
);
addKnnDoc(
indexName,
"2",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT3),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_FRODO, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_BAGGINS)),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_2345, INTEGER_FIELD_PRICE_100),
List.of(),
List.of(),
List.of(DATE_FIELD),
List.of(DATE_FIELD_05022015)
);
addKnnDoc(
indexName,
"3",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT2),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_MOHAMMED, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_EZAB)),
List.of(INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_PRICE_200),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_ANGRY),
List.of(DATE_FIELD),
List.of(DATE_FIELD_07232007)
);
addKnnDoc(
indexName,
"4",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT4),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_SUN, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_WUKONG)),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_3456, INTEGER_FIELD_PRICE_25),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_LIKABLE),
List.of(DATE_FIELD),
List.of(DATE_FIELD_05022015)
);
addKnnDoc(
indexName,
"5",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT5),
List.of(),
List.of(),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_3456, INTEGER_FIELD_PRICE_30),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_ENTIRE),
List.of(DATE_FIELD),
List.of(DATE_FIELD_08212012)
);
addKnnDoc(
indexName,
"6",
List.of(),
List.of(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT6),
List.of(NESTED_TYPE_FIELD_USER),
List.of(Map.of(NESTED_FIELD_FIRSTNAME, NESTED_FIELD_FIRSTNAME_VASILISA, NESTED_FIELD_LASTNAME, NESTED_FIELD_LASTNAME_WISE)),
List.of(INTEGER_FIELD_DOCINDEX, INTEGER_FIELD_PRICE),
List.of(INTEGER_FIELD_DOCINDEX_4567, INTEGER_FIELD_PRICE_350),
List.of(KEYWORD_FIELD_DOCKEYWORD),
List.of(KEYWORD_FIELD_DOCKEYWORD_ENTIRE),
List.of(DATE_FIELD),
List.of(DATE_FIELD_08212012)
);
}
}

protected void assertHitResultsFromQuery(int expected, Map<String, Object> searchResponseAsMap) {
assertEquals(expected, getHitCount(searchResponseAsMap));

List<Map<String, Object>> hits1NestedList = getNestedHits(searchResponseAsMap);
List<String> ids = new ArrayList<>();
List<Double> scores = new ArrayList<>();
for (Map<String, Object> oneHit : hits1NestedList) {
ids.add((String) oneHit.get("_id"));
scores.add((Double) oneHit.get("_score"));
}

// verify that scores are in desc order
assertTrue(IntStream.range(0, scores.size() - 1).noneMatch(idx -> scores.get(idx) < scores.get(idx + 1)));
// verify that all ids are unique
assertEquals(Set.copyOf(ids).size(), ids.size());

Map<String, Object> total = getTotalHits(searchResponseAsMap);
assertNotNull(total.get("value"));
assertEquals(expected, total.get("value"));
assertNotNull(total.get("relation"));
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
}
}
Loading

0 comments on commit a169391

Please sign in to comment.