Skip to content

Commit

Permalink
move milvus tests to their own module (#3596)
Browse files Browse the repository at this point in the history
  • Loading branch information
masci authored Nov 17, 2022
1 parent 6cd0e33 commit 1399681
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 84 deletions.
117 changes: 36 additions & 81 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,42 @@ jobs:
channel: '#haystack'
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

integration-tests-milvus:
name: Integration / Milvus / ${{ matrix.os }}
needs:
- unit-tests
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v3

- name: Setup Python
uses: ./.github/actions/python_cache/

- name: Setup Milvus
run: |
cd ../../ # Avoid causing permission issues on hashFiles later by creating unreadable folders like "volumes"
wget https://github.com/milvus-io/milvus/releases/download/v2.0.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
sudo docker-compose up -d
sudo docker-compose ps
- name: Install Haystack
run: pip install .[milvus]

- name: Run tests
run: |
pytest --maxfail=5 -m "document_store and integration" test/document_stores/test_milvus.py
- uses: act10ns/slack@v1
with:
status: ${{ job.status }}
channel: '#haystack'
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'


#
# TODO: the following steps need to be revisited
Expand Down Expand Up @@ -491,87 +527,6 @@ jobs:
channel: '#haystack'
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

milvus-tests-linux:
needs: [mypy, pylint, black]
runs-on: ubuntu-latest
if: contains(github.event.pull_request.labels.*.name, 'topic:milvus') || !github.event.pull_request.draft

steps:
- uses: actions/checkout@v3

- name: Setup Python
uses: ./.github/actions/python_cache/

- name: Setup Milvus
run: |
cd ../../ # Avoid causing permission issues on hashFiles later by creating unreadable folders like "volumes"
wget https://github.com/milvus-io/milvus/releases/download/v2.0.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
sudo docker-compose up -d
sudo docker-compose ps
# TODO Let's try to remove this one from the unit tests
- name: Install pdftotext
run: wget --no-check-certificate https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin

- name: Install Haystack
run: pip install .[milvus]

- name: Run tests
env:
TOKENIZERS_PARALLELISM: 'false'
run: |
pytest ${{ env.PYTEST_PARAMS }} -m "milvus and not integration" test/document_stores/ --document_store_type=milvus
- name: Dump docker logs on failure
if: failure()
uses: jwalton/gh-docker-logs@v1

- uses: act10ns/slack@v1
with:
status: ${{ job.status }}
channel: '#haystack'
if: failure() && github.repository_owner == 'deepset-ai' && github.ref == 'refs/heads/main'

# FIXME: seems like we can't run containers on Windows
# milvus-tests-windows:
# needs:
# - mypy
# - pylint
# runs-on: windows-latest
# if: contains(github.event.pull_request.labels.*.name, 'topic:milvus') && contains(github.event.pull_request.labels.*.name, 'topic:windows') || !github.event.pull_request.draft || !github.event.pull_request.draft

# steps:
# - uses: actions/checkout@v3

# - name: Setup Python
# uses: ./.github/actions/python_cache/
# with:
# prefix: windows

# - name: Setup Milvus
# run: |
# cd ../../ # Avoid causing permission issues on hashFiles later by creating unreadable folders like "volumes"
# wget https://github.com/milvus-io/milvus/releases/download/v2.0.0/milvus-standalone-docker-compose.yml -O docker-compose.yml
# sudo docker-compose up -d
# sudo docker-compose ps

# - name: Install pdftotext
# run: |
# choco install xpdf-utils
# choco install openjdk11
# refreshenv

# - name: Install Haystack
# run: pip install .[milvus]

# - name: Run tests
# env:
# TOKENIZERS_PARALLELISM: 'false'
# run: |
# pytest ${{ env.PYTEST_PARAMS }} -m "milvus and not integration" ${{ env.SUITES_EXCLUDED_FROM_WINDOWS }} test/document_stores/ --document_store_type=milvus




rest-and-ui:
needs: [mypy, pylint, black]

Expand Down
4 changes: 1 addition & 3 deletions test/document_stores/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,9 +1082,7 @@ def test_multilabel_meta_aggregations(document_store: BaseDocumentStore):
assert multi_label.filters == l.filters


@pytest.mark.parametrize(
"document_store", ["elasticsearch", "faiss", "milvus", "weaviate", "pinecone", "memory"], indirect=True
)
@pytest.mark.parametrize("document_store", ["memory"], indirect=True)
def test_update_meta(document_store: BaseDocumentStore):
documents = [
Document(content="Doc1", meta={"meta_key_1": "1", "meta_key_2": "1"}),
Expand Down
97 changes: 97 additions & 0 deletions test/document_stores/test_milvus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import pytest
import numpy as np

from haystack.document_stores.milvus import MilvusDocumentStore
from haystack.schema import Document

from .test_base import DocumentStoreBaseTestAbstract


class TestMilvusDocumentStore(DocumentStoreBaseTestAbstract):
@pytest.fixture
def ds(self, tmp_path):
db_url = f"sqlite:///{tmp_path}/haystack_test_milvus.db"
return MilvusDocumentStore(sql_url=db_url, return_embedding=True)

@pytest.fixture
def documents(self):
"""
write_documents will raise an exception if receives a document without
embeddings, so we customize the documents fixture and always provide
embeddings
"""
documents = []
for i in range(3):
documents.append(
Document(
content=f"A Foo Document {i}",
meta={"name": f"name_{i}", "year": "2020", "month": "01", "numbers": [2, 4]},
embedding=np.random.rand(768).astype(np.float32),
)
)

documents.append(
Document(
content=f"A Bar Document {i}",
meta={"name": f"name_{i}", "year": "2021", "month": "02", "numbers": [-2, -4]},
embedding=np.random.rand(768).astype(np.float32),
)
)

documents.append(
Document(
content=f"Document {i}",
meta={"name": f"name_{i}", "month": "03"},
embedding=np.random.rand(768).astype(np.float32),
)
)

return documents

@pytest.mark.integration
def test_delete_index(self, ds, documents):
"""Contrary to other Document Stores, MilvusDocumentStore doesn't raise if the index is empty"""
ds.write_documents(documents, index="custom_index")
assert ds.get_document_count(index="custom_index") == len(documents)
ds.delete_index(index="custom_index")
assert ds.get_document_count(index="custom_index") == 0

# NOTE: MilvusDocumentStore derives from the SQL one and behaves differently to the others when filters are applied.
# While this should be considered a bug, the relative tests are skipped in the meantime

@pytest.mark.skip
@pytest.mark.integration
def test_ne_filters(self, ds, documents):
pass

@pytest.mark.skip
@pytest.mark.integration
def test_nin_filters(self, ds, documents):
pass

@pytest.mark.skip
@pytest.mark.integration
def test_comparison_filters(self, ds, documents):
pass

@pytest.mark.skip
@pytest.mark.integration
def test_nested_condition_filters(self, ds, documents):
pass

@pytest.mark.skip
@pytest.mark.integration
def test_nested_condition_not_filters(self, ds, documents):
pass

# NOTE: again inherithed from the SQLDocumentStore, labels metadata are not supported

@pytest.mark.skip
@pytest.mark.integration
def test_delete_labels_by_filter(self, ds, labels):
pass

@pytest.mark.skip
@pytest.mark.integration
def test_delete_labels_by_filter_id(self, ds, labels):
pass

0 comments on commit 1399681

Please sign in to comment.