Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Speed up integration tests (nodes) #3408

Merged
merged 3 commits into from
Oct 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,7 @@ jobs:
- name: Download models
if: steps.cache-hf-models.outputs.cache-hit != 'true'
run: |
python -c "from transformers import AutoModel;[AutoModel.from_pretrained(model_name) for model_name in ['vblagoje/bart_lfqa','yjernite/bart_eli5', 'google/pegasus-xsum', 'vblagoje/dpr-ctx_encoder-single-lfqa-wiki', 'vblagoje/dpr-question_encoder-single-lfqa-wiki', 'facebook/dpr-question_encoder-single-nq-base', 'facebook/dpr-ctx_encoder-single-nq-base', 'elastic/distilbert-base-cased-finetuned-conll03-english']]"
python -c "from transformers import AutoModel;[AutoModel.from_pretrained(model_name) for model_name in ['vblagoje/bart_lfqa','yjernite/bart_eli5', 'vblagoje/dpr-ctx_encoder-single-lfqa-wiki', 'vblagoje/dpr-question_encoder-single-lfqa-wiki', 'facebook/dpr-question_encoder-single-nq-base', 'facebook/dpr-ctx_encoder-single-nq-base', 'elastic/distilbert-base-cased-finetuned-conll03-english']]"


- name: Run Elasticsearch
Expand Down
2 changes: 1 addition & 1 deletion test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def lfqa_generator(request):

@pytest.fixture
def summarizer():
return TransformersSummarizer(model_name_or_path="google/pegasus-xsum", use_gpu=-1)
return TransformersSummarizer(model_name_or_path="sshleifer/distilbart-xsum-12-6", use_gpu=False)


@pytest.fixture
Expand Down
18 changes: 9 additions & 9 deletions test/nodes/test_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
]

EXPECTED_SUMMARIES = [
"California's largest electricity provider has turned off power to hundreds of thousands of customers.",
"The Eiffel Tower is a landmark in Paris, France.",
"California's largest electricity provider, PG&E, has shut down power supplies to thousands of customers.",
" The Eiffel Tower in Paris has officially opened its doors to the public.",
]

SPLIT_DOCS = [
Expand All @@ -32,8 +32,8 @@
# Documents order is very important to produce summary.
# Different order of same documents produce different summary.
EXPECTED_ONE_SUMMARIES = [
"The Eiffel Tower is a landmark in Paris, France.",
"The Eiffel Tower, built in 1889 in Paris, France, is the world's tallest free-standing structure.",
" The Eiffel Tower in Paris has officially opened its doors to the public.",
" The Eiffel Tower in Paris has become the tallest man-made structure in the world.",
]


Expand Down Expand Up @@ -89,7 +89,7 @@ def test_summarization_pipeline(document_store, retriever, summarizer):
output = pipeline.run(query=query, params={"Retriever": {"top_k": 1}})
answers = output["answers"]
assert len(answers) == 1
assert "The Eiffel Tower is a landmark in Paris, France." == answers[0]["answer"]
assert " The Eiffel Tower in Paris has officially opened its doors to the public." == answers[0]["answer"]


@pytest.mark.integration
Expand All @@ -114,7 +114,7 @@ def test_summarization_pipeline_one_summary(document_store, retriever, summarize


@pytest.mark.summarizer
def add_metadata_summerizer():
def test_metadata_summarizer(summarizer):
docs = [
Document(
content="""PG&E stated it scheduled the blackouts in response to forecasts for high winds amid dry conditions. The aim is to reduce the risk of wildfires. Nearly 800 thousand customers were scheduled to be affected by the shutoffs which were expected to last through at least midday tomorrow.""",
Expand All @@ -129,10 +129,9 @@ def add_metadata_summerizer():
meta={"sub_content": "Paris best tour best tour", "topic": "Eiffel tower"},
),
]
# Original input is overwrote after the "predict". So adding the same input as check_output to assess the output
# Original input is overwritten after the "predict". So adding the same input as check_output to assess the output
check_output = deepcopy(docs)

summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
summary = summarizer.predict(documents=docs)

assert len(summary[0].meta) == len(check_output[0].meta)
Expand All @@ -145,4 +144,5 @@ def add_metadata_summerizer():
summary = summarizer.predict(documents=docs, generate_single_summary=True)

assert len(summary) == 1
assert not summary[0].meta # Metadata is not returned in case of a single summary
summary[0].meta.pop("context")
assert not summary[0].meta # Remaining metadata is not returned in case of a single summary
4 changes: 2 additions & 2 deletions test/nodes/test_summarizer_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ def test_summarization_pipeline_with_translator(
documents = output["documents"]
assert len(documents) == 1
assert documents[0].content in [
"Der Eiffelturm ist ein Wahrzeichen in Paris, Frankreich.",
"Der Eiffelturm, der 1889 in Paris, Frankreich, erbaut wurde, ist das höchste freistehende Bauwerk der Welt.",
"Der Eiffelturm in Paris ist die höchste von Menschen geschaffene Struktur der Welt geworden.",
"Der Eiffelturm in Paris hat offiziell seine Türen für die Öffentlichkeit geöffnet.",
]