From 594101d5a9bde15fc5f4a53645c15fa02a683249 Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Fri, 25 Feb 2022 17:46:39 +0100 Subject: [PATCH 01/14] Add SplitDocumentList and JoinAnswer nodes --- haystack/nodes/other/join_answers.py | 60 ++++++++++++++++++++++ haystack/nodes/other/split_documents.py | 66 +++++++++++++++++++++++++ haystack/pipelines/base.py | 52 +++++++++++-------- 3 files changed, 157 insertions(+), 21 deletions(-) create mode 100644 haystack/nodes/other/join_answers.py create mode 100644 haystack/nodes/other/split_documents.py diff --git a/haystack/nodes/other/join_answers.py b/haystack/nodes/other/join_answers.py new file mode 100644 index 0000000000..643d38f61f --- /dev/null +++ b/haystack/nodes/other/join_answers.py @@ -0,0 +1,60 @@ +from typing import Optional, List, Dict, Tuple + +from haystack import Answer +from haystack.nodes import BaseComponent + + +class JoinAnswers(BaseComponent): + """ + A node to join `Answer`s produced by multiple `Reader` nodes. + """ + + def __init__(self, join_mode: str = "concatenate", weights: Optional[List[float]] = None, + top_k_join: Optional[int] = None): + """ + :param join_mode: `"concatenate"` to combine documents from multiple `Reader`s. `"merge"` to aggregate scores + of individual `Answer`s. + :param weights: A node-wise list (length of list must be equal to the number of input nodes) of weights for + adjusting `Answer` scores when using the `"merge"` join_mode. By default, equal weight is assignef to each + `Reader` score. This parameter is not compatible with the `"concatenate"` join_mode. + :param top_k_join: Limit `Answer`s to top_k based on the resulting scored of the join. + """ + + assert join_mode in ["concatenate", "merge"], f"JoinAnswers node does not support '{join_mode}' join_mode." + assert not (weights is not None and join_mode == "concatenate"), \ + "Weights are not compatible with 'concatenate' join_mode" + + # Save init parameters to enable export of component config as YAML + self.set_config(join_mode=join_mode, weights=weights, top_k_join=top_k_join) + + self.join_mode = join_mode + self.weights = [float(i) / sum(weights) for i in weights] if weights else None + self.top_k_join = top_k_join + + def run(self, inputs: List[Dict], top_k_join: Optional[int] = None) -> Tuple[Dict, str]: + reader_results = [inp["answers"] for inp in inputs] + + if self.join_mode == "concatenate": + concatenated_answers = [answer for cur_reader_result in reader_results for answer in cur_reader_result] + concatenated_answers = sorted(concatenated_answers, reverse=True) + return {"answers": concatenated_answers, "labels": inputs[0].get("labels", None)}, "output_1" + + elif self.join_mode == "merge": + merged_answers = self._merge_answers(reader_results) + + if not top_k_join: + top_k_join = self.top_k_join if self.top_k_join is not None else len(merged_answers) + merged_answers = merged_answers[:top_k_join] + return {"answers": merged_answers, "labels": inputs[0].get("labels", None)}, "output_1" + + else: + raise ValueError(f"Invalid join_mode: {self.join_mode}") + + def _merge_answers(self, reader_results: List[List[Answer]]) -> List[Answer]: + weights = self.weights if self.weights else [1 / len(reader_results)] * len(reader_results) + + for result, weight in zip(reader_results, weights): + for answer in result: + answer.score *= weight + + return sorted([answer for cur_reader_result in reader_results for answer in cur_reader_result], reverse=True) diff --git a/haystack/nodes/other/split_documents.py b/haystack/nodes/other/split_documents.py new file mode 100644 index 0000000000..0431039561 --- /dev/null +++ b/haystack/nodes/other/split_documents.py @@ -0,0 +1,66 @@ +from typing import List, Tuple, Dict, Optional + +from haystack.nodes.base import BaseComponent +from haystack import Document + + +class SplitDocumentList(BaseComponent): + """ + A node to split a list of `Document`s by `content_type` or by the values of a metadata field. + """ + # By default (split_by == "content_type"), the node has two outgoing edges. + outgoing_edges = 2 + + def __init__(self, split_by: str = "content_type", metadata_values: Optional[List[str]] = None): + """ + :param split_by: Field to split the documents by. Either `"content_type"` or a metadata field name. + If this parameter is set to `"content_type"`, the list of `Document`s will be split into a list containing + only `Document`s of type `"text"` (will be routed to `"output_1"`) and a list containing only `Document`s of + type `"text"` (will be routed to `"output_2"`). + If this parameter is set to a metadata field name, you need to specify the parameter `metadata_values` as + well. + :param metadata_values: If the parameter `split_by` is set to a metadata field name, you need to provide a list + of values to group the `Document`s to. `Document`s whose metadata field is equal to the first value of the + provided list will be routed to `"output_1"`, `Document`s whose metadata field is equal to the second + value of the provided list will be routed to `"output_2"`, etc. + """ + + assert split_by == "content_type" or metadata_values is not None, \ + "If split_by is set to the name of a metadata field, you must provide metadata_values " \ + "to group the documents to." + + # Save init parameters to enable export of component config as YAML + self.set_config(split_by=split_by, metadata_values=metadata_values) + + self.split_by = split_by + self.metadata_values = metadata_values + + # If we split list of Documents by a metadata field, number of outgoing edges might change + if split_by != "content_type" and metadata_values is not None: + self.outgoing_edges = len(metadata_values) + + def run(self, documents: List[Document]) -> Tuple[Dict, str]: + if self.split_by == "content_type": + split_documents = {"output_1": [], "output_2": []} + + for doc in documents: + if doc.content_type == "text": + split_documents["output_1"].append(doc) + elif doc.content_type == "table": + split_documents["output_2"].append(doc) + + else: + split_documents = {f"output_{i+1}": [] for i in range(len(self.metadata_values))} + for doc in documents: + current_metadata_value = doc.meta.get(self.split_by, None) + # Disregard current document if it does not contain the provided metadata field + if current_metadata_value is not None: + try: + index = self.metadata_values.index(current_metadata_value) + except ValueError: + # Disregard current document if current_metadata_value is not in the provided metadata_values + continue + + split_documents[f"output_{index+1}"].append(doc) + + return split_documents, "split_documents" diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py index b733d19493..4027aa5468 100644 --- a/haystack/pipelines/base.py +++ b/haystack/pipelines/base.py @@ -578,28 +578,38 @@ def run( # type: ignore f"Exception while running node `{node_id}` with input `{node_input}`: {e}, full stack trace: {tb}" ) queue.pop(node_id) - next_nodes = self.get_next_nodes(node_id, stream_id) - for n in next_nodes: # add successor nodes with corresponding inputs to the queue - if queue.get(n): # concatenate inputs if it's a join node - existing_input = queue[n] - if "inputs" not in existing_input.keys(): - updated_input: dict = {"inputs": [existing_input, node_output], "params": params} - if query: - updated_input["query"] = query - if file_paths: - updated_input["file_paths"] = file_paths - if labels: - updated_input["labels"] = labels - if documents: - updated_input["documents"] = documents - if meta: - updated_input["meta"] = meta + # + if stream_id == "split_documents": + for stream_id in [key for key in node_output.keys() if key.startswith("output_")]: + current_node_output = {k: v for k, v in node_output.items() if not k.startswith("output_")} + current_docs = node_output.pop(stream_id) + current_node_output["documents"] = current_docs + next_nodes = self.get_next_nodes(node_id, stream_id) + for n in next_nodes: + queue[n] = current_node_output + else: + next_nodes = self.get_next_nodes(node_id, stream_id) + for n in next_nodes: # add successor nodes with corresponding inputs to the queue + if queue.get(n): # concatenate inputs if it's a join node + existing_input = queue[n] + if "inputs" not in existing_input.keys(): + updated_input: dict = {"inputs": [existing_input, node_output], "params": params} + if query: + updated_input["query"] = query + if file_paths: + updated_input["file_paths"] = file_paths + if labels: + updated_input["labels"] = labels + if documents: + updated_input["documents"] = documents + if meta: + updated_input["meta"] = meta + else: + existing_input["inputs"].append(node_output) + updated_input = existing_input + queue[n] = updated_input else: - existing_input["inputs"].append(node_output) - updated_input = existing_input - queue[n] = updated_input - else: - queue[n] = node_output + queue[n] = node_output i = 0 else: i += 1 # attempt executing next node in the queue as current `node_id` has unprocessed predecessors From 840fedee593a4eb9c6d0fc236252801aa02243de Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 27 Feb 2022 22:12:23 +0000 Subject: [PATCH 02/14] Update Documentation & Code Style --- haystack/nodes/other/join_answers.py | 10 ++++++---- haystack/nodes/other/split_documents.py | 6 ++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/haystack/nodes/other/join_answers.py b/haystack/nodes/other/join_answers.py index 643d38f61f..a3f1cc75b2 100644 --- a/haystack/nodes/other/join_answers.py +++ b/haystack/nodes/other/join_answers.py @@ -9,8 +9,9 @@ class JoinAnswers(BaseComponent): A node to join `Answer`s produced by multiple `Reader` nodes. """ - def __init__(self, join_mode: str = "concatenate", weights: Optional[List[float]] = None, - top_k_join: Optional[int] = None): + def __init__( + self, join_mode: str = "concatenate", weights: Optional[List[float]] = None, top_k_join: Optional[int] = None + ): """ :param join_mode: `"concatenate"` to combine documents from multiple `Reader`s. `"merge"` to aggregate scores of individual `Answer`s. @@ -21,8 +22,9 @@ def __init__(self, join_mode: str = "concatenate", weights: Optional[List[float] """ assert join_mode in ["concatenate", "merge"], f"JoinAnswers node does not support '{join_mode}' join_mode." - assert not (weights is not None and join_mode == "concatenate"), \ - "Weights are not compatible with 'concatenate' join_mode" + assert not ( + weights is not None and join_mode == "concatenate" + ), "Weights are not compatible with 'concatenate' join_mode" # Save init parameters to enable export of component config as YAML self.set_config(join_mode=join_mode, weights=weights, top_k_join=top_k_join) diff --git a/haystack/nodes/other/split_documents.py b/haystack/nodes/other/split_documents.py index 0431039561..4842182a48 100644 --- a/haystack/nodes/other/split_documents.py +++ b/haystack/nodes/other/split_documents.py @@ -8,6 +8,7 @@ class SplitDocumentList(BaseComponent): """ A node to split a list of `Document`s by `content_type` or by the values of a metadata field. """ + # By default (split_by == "content_type"), the node has two outgoing edges. outgoing_edges = 2 @@ -25,9 +26,10 @@ def __init__(self, split_by: str = "content_type", metadata_values: Optional[Lis value of the provided list will be routed to `"output_2"`, etc. """ - assert split_by == "content_type" or metadata_values is not None, \ - "If split_by is set to the name of a metadata field, you must provide metadata_values " \ + assert split_by == "content_type" or metadata_values is not None, ( + "If split_by is set to the name of a metadata field, you must provide metadata_values " "to group the documents to." + ) # Save init parameters to enable export of component config as YAML self.set_config(split_by=split_by, metadata_values=metadata_values) From 598af8874b55a1fb2551752ef672d24e9569519f Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Mon, 28 Feb 2022 17:15:09 +0100 Subject: [PATCH 03/14] Add tests + adapt tutorial --- haystack/__init__.py | 2 +- haystack/nodes/__init__.py | 2 +- haystack/nodes/other/__init__.py | 2 + haystack/nodes/other/join_answers.py | 2 +- haystack/nodes/other/split_documents.py | 2 +- test/test_pipeline.py | 31 +- tutorials/Tutorial15_TableQA.ipynb | 2110 +++++++++++++++-------- tutorials/Tutorial15_TableQA.py | 34 +- 8 files changed, 1422 insertions(+), 763 deletions(-) diff --git a/haystack/__init__.py b/haystack/__init__.py index ae67727cf1..203f692897 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -102,7 +102,7 @@ def __getattr__(self, attr): from haystack.modeling.evaluation import eval from haystack.modeling.logger import MLFlowLogger, StdoutLogger, TensorBoardLogger -from haystack.nodes.other import JoinDocuments, Docs2Answers +from haystack.nodes.other import JoinDocuments, Docs2Answers, JoinAnswers, SplitDocumentList from haystack.nodes.query_classifier import SklearnQueryClassifier, TransformersQueryClassifier from haystack.nodes.file_classifier import FileTypeClassifier from haystack.utils import preprocessing diff --git a/haystack/nodes/__init__.py b/haystack/nodes/__init__.py index 52f2ae0020..d4410995c0 100644 --- a/haystack/nodes/__init__.py +++ b/haystack/nodes/__init__.py @@ -21,7 +21,7 @@ AzureConverter, ParsrConverter, ) -from haystack.nodes.other import Docs2Answers, JoinDocuments +from haystack.nodes.other import Docs2Answers, JoinDocuments, SplitDocumentList, JoinAnswers from haystack.nodes.preprocessor import BasePreProcessor, PreProcessor from haystack.nodes.query_classifier import SklearnQueryClassifier, TransformersQueryClassifier from haystack.nodes.question_generator import QuestionGenerator diff --git a/haystack/nodes/other/__init__.py b/haystack/nodes/other/__init__.py index 8341a135be..5552d7821e 100644 --- a/haystack/nodes/other/__init__.py +++ b/haystack/nodes/other/__init__.py @@ -1,2 +1,4 @@ from haystack.nodes.other.docs2answers import Docs2Answers from haystack.nodes.other.join_docs import JoinDocuments +from haystack.nodes.other.split_documents import SplitDocumentList +from haystack.nodes.other.join_answers import JoinAnswers diff --git a/haystack/nodes/other/join_answers.py b/haystack/nodes/other/join_answers.py index 643d38f61f..3e5ccc4134 100644 --- a/haystack/nodes/other/join_answers.py +++ b/haystack/nodes/other/join_answers.py @@ -1,6 +1,6 @@ from typing import Optional, List, Dict, Tuple -from haystack import Answer +from haystack.schema import Answer from haystack.nodes import BaseComponent diff --git a/haystack/nodes/other/split_documents.py b/haystack/nodes/other/split_documents.py index 0431039561..39d4258bc5 100644 --- a/haystack/nodes/other/split_documents.py +++ b/haystack/nodes/other/split_documents.py @@ -1,7 +1,7 @@ from typing import List, Tuple, Dict, Optional from haystack.nodes.base import BaseComponent -from haystack import Document +from haystack.schema import Document class SplitDocumentList(BaseComponent): diff --git a/test/test_pipeline.py b/test/test_pipeline.py index 6956319771..9835be2fdb 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -3,10 +3,12 @@ import os import json from unittest.mock import Mock + +import pandas as pd import pytest import responses -from haystack import __version__ +from haystack import __version__, Document from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore from haystack.nodes.retriever.sparse import ElasticsearchRetriever @@ -16,7 +18,7 @@ RootNode, ) from haystack.pipelines import ExtractiveQAPipeline -from haystack.nodes import DensePassageRetriever, EmbeddingRetriever +from haystack.nodes import DensePassageRetriever, EmbeddingRetriever, SplitDocumentList from conftest import MOCK_DC, DC_API_ENDPOINT, DC_API_KEY, DC_TEST_INDEX, SAMPLES_PATH, deepset_cloud_fixture @@ -631,6 +633,31 @@ def test_documentsearch_document_store_authentication(retriever_with_docs, docum assert kwargs["headers"] == auth_headers +def test_split_document_list_content_type(test_docs_xs): + # Test splitting by content_type + docs = [Document(content="text document", content_type="text"), + Document(content=pd.DataFrame(columns=["col 1", "col 2"], data=[["row 1", "row 1"], ["row 2", "row 2"]]), + content_type="table")] + + split_documents = SplitDocumentList() + result, _ = split_documents.run(documents=docs) + assert len(result["output_1"]) == 1 + assert len(result["output_2"]) == 1 + assert result["output_1"][0].content_type == "text" + assert result["output_2"][0].content_type == "table" + + # Test splitting by metadata field + docs = [Document.from_dict(doc) if isinstance(doc, dict) else doc for doc in test_docs_xs] + split_documents = SplitDocumentList(split_by="meta_field", metadata_values=["test1", "test3", "test5"]) + result, _ = split_documents.run(docs) + assert len(result["output_1"]) == 1 + assert len(result["output_2"]) == 1 + assert len(result["output_3"]) == 1 + assert result["output_1"][0].meta["meta_field"] == "test1" + assert result["output_2"][0].meta["meta_field"] == "test3" + assert result["output_3"][0].meta["meta_field"] == "test5" + + def clean_faiss_document_store(): if Path("existing_faiss_document_store").exists(): os.remove("existing_faiss_document_store") diff --git a/tutorials/Tutorial15_TableQA.ipynb b/tutorials/Tutorial15_TableQA.ipynb index b29637ea64..a292658e12 100644 --- a/tutorials/Tutorial15_TableQA.ipynb +++ b/tutorials/Tutorial15_TableQA.ipynb @@ -1,761 +1,1359 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "DeAkZwDhufYA" - }, - "source": [ - "# Open-Domain QA on Tables\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", - "\n", - "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vbR3bETlvi-3" - }, - "source": [ - "### Prepare environment\n", - "\n", - "#### Colab: Enable the GPU runtime\n", - "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", - "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HW66x0rfujyO" - }, - "outputs": [], - "source": [ - "# Make sure you have a GPU running\n", - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_ZXoyhOAvn7M" - }, - "outputs": [], - "source": [ - "# Install the latest release of Haystack in your own environment\n", - "#! pip install farm-haystack\n", - "\n", - "# Install the latest master of Haystack\n", - "!pip install --upgrade pip\n", - "!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab]\n", - "\n", - "# The TaPAs-based TableReader requires the torch-scatter library\n", - "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", - "\n", - "# If you run this notebook on Google Colab, you might need to\n", - "# restart the runtime after installing haystack." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K_XJhluXwF5_" - }, - "source": [ - "### Start an Elasticsearch server\n", - "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "frDqgzK7v2i1" - }, - "outputs": [], - "source": [ - "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", - "from haystack.utils import launch_es\n", - "\n", - "launch_es()" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "S4PGj1A6wKWu" - }, - "outputs": [], - "source": [ - "# In Colab / No Docker environments: Start Elasticsearch from source\n", - "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", - "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", - "! chown -R daemon:daemon elasticsearch-7.9.2\n", - "\n", - "import os\n", - "from subprocess import Popen, PIPE, STDOUT\n", - "\n", - "es_server = Popen(\n", - " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", - ")\n", - "# wait until ES has started\n", - "! sleep 30" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RmxepXZtwQ0E" - }, - "outputs": [], - "source": [ - "# Connect to Elasticsearch\n", - "from haystack.document_stores import ElasticsearchDocumentStore\n", - "\n", - "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", - "document_index = \"document\"\n", - "document_store = ElasticsearchDocumentStore(\n", - " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fFh26LIlxldw" - }, - "source": [ - "## Add Tables to DocumentStore\n", - "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", - "\n", - "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nM63uwbd8zd6" - }, - "outputs": [], - "source": [ - "# Let's first fetch some tables that we want to query\n", - "# Here: 1000 tables from OTT-QA\n", - "from haystack.utils import fetch_archive_from_http\n", - "\n", - "doc_dir = \"data\"\n", - "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_tables_sample.json.zip\"\n", - "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "SKjw2LuXxlGh", - "outputId": "c24f8ca0-1a58-44ea-f01d-414db4c8f1f4" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Result ... Score\n", - "0 Winner ... 6-1 , 6-1\n", - "1 Winner ... 6-2 , 4-6 , 6-3\n", - "2 Winner ... 6-2 , 6-2\n", - "3 Runner-up ... 3-6 , 2-6\n", - "4 Winner ... 6-7 , 6-3 , 6-0\n", - "5 Winner ... 6-1 , 6-0\n", - "6 Winner ... 6-2 , 2-6 , 6-2\n", - "7 Winner ... 6-0 , 6-4\n", - "\n", - "[8 rows x 8 columns]\n", - "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" - ] - } - ], - "source": [ - "# Add the tables to the DocumentStore\n", - "\n", - "import json\n", - "from haystack import Document\n", - "import pandas as pd\n", - "\n", - "\n", - "def read_ottqa_tables(filename):\n", - " processed_tables = []\n", - " with open(filename) as tables:\n", - " tables = json.load(tables)\n", - " for key, table in tables.items():\n", - " current_columns = table[\"header\"]\n", - " current_rows = table[\"data\"]\n", - " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", - " current_doc_title = table[\"title\"]\n", - " current_section_title = table[\"section_title\"]\n", - " document = Document(\n", - " content=current_df,\n", - " content_type=\"table\",\n", - " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", - " id=key,\n", - " )\n", - " processed_tables.append(document)\n", - "\n", - " return processed_tables\n", - "\n", - "\n", - "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", - "document_store.write_documents(tables, index=document_index)\n", - "\n", - "# Showing content field and meta field of one of the Documents of content_type 'table'\n", - "print(tables[0].content)\n", - "print(tables[0].meta)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hmQC1sDmw3d7" - }, - "source": [ - "## Initalize Retriever, Reader, & Pipeline\n", - "\n", - "### Retriever\n", - "\n", - "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", - "They use some simple but fast algorithm.\n", - "\n", - "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", - "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", - "\n", - "**Alternatives:**\n", - "\n", - "- `ElasticsearchRetriever` that uses BM25 algorithm\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EY_qvdV6wyK5" - }, - "outputs": [], - "source": [ - "from haystack.nodes.retriever import TableTextRetriever\n", - "\n", - "retriever = TableTextRetriever(\n", - " document_store=document_store,\n", - " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", - " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", - " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", - " embed_meta_fields=[\"title\", \"section_title\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jasi1RM2zIJ7" - }, - "outputs": [], - "source": [ - "# Add table embeddings to the tables in DocumentStore\n", - "document_store.update_embeddings(retriever=retriever)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XM-ijy6Zz11L" - }, - "outputs": [], - "source": [ - "## Alternative: ElasticsearchRetriever\n", - "# from haystack.nodes.retriever import ElasticsearchRetriever\n", - "# retriever = ElasticsearchRetriever(document_store=document_store)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "YHfQWxVI0N2e", - "outputId": "05976ac9-bee3-4eb8-b36d-01f1db5250db" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the Retriever\n", - "from haystack.utils import print_documents\n", - "\n", - "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", - "# Get highest scored table\n", - "print(retrieved_tables[0].content)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zbwkXScm2-gy" - }, - "source": [ - "### Reader\n", - "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", - "\n", - "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4APcRoio2RxG" - }, - "outputs": [], - "source": [ - "from haystack.nodes import TableReader\n", - "\n", - "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ILuAXkyN4F7x", - "outputId": "7bdb7190-fcf8-4296-c237-cffc78dac4aa" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", - "\n", - "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", - "print(table_doc.content)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ilbsecgA4vfN", - "outputId": "5f4e8f0b-bc9e-485b-c933-546fcad2b411" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ 'answers': [ Answer(answer='12', type='extractive', score=1.0, context= Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns], offsets_in_document=[Span(start=12, end=13), Span(start=18, end=19), Span(start=24, end=25), Span(start=42, end=43), Span(start=48, end=49), Span(start=54, end=55), Span(start=78, end=79), Span(start=84, end=85), Span(start=90, end=91), Span(start=102, end=103), Span(start=108, end=109), Span(start=114, end=115)], offsets_in_context=[Span(start=12, end=13), Span(start=18, end=19), Span(start=24, end=25), Span(start=42, end=43), Span(start=48, end=49), Span(start=54, end=55), Span(start=78, end=79), Span(start=84, end=85), Span(start=90, end=91), Span(start=102, end=103), Span(start=108, end=109), Span(start=114, end=115)], document_id='List_of_tallest_twin_buildings_and_structures_in_the_world_1', meta={'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']})],\n", - " 'query': 'How many twin buildings are under construction?'}\n" - ] - } - ], - "source": [ - "from haystack.utils import print_answers\n", - "\n", - "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", - "print_answers(prediction, details=\"all\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jkAYNMb7R9qu" - }, - "source": [ - "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", - "\n", - "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "It8XYT2ZTVJs", - "outputId": "5bd712a0-9f22-4fc0-a4f1-b01b15cb9916" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Predicted answer: 12\n", - "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "DeAkZwDhufYA" + }, + "source": [ + "# Open-Domain QA on Tables\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", + "\n", + "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vbR3bETlvi-3" + }, + "source": [ + "### Prepare environment\n", + "\n", + "#### Colab: Enable the GPU runtime\n", + "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", + "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HW66x0rfujyO" + }, + "outputs": [], + "source": [ + "# Make sure you have a GPU running\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_ZXoyhOAvn7M" + }, + "outputs": [], + "source": [ + "# Install the latest release of Haystack in your own environment\n", + "#! pip install farm-haystack\n", + "\n", + "# Install the latest master of Haystack\n", + "!pip install --upgrade pip\n", + "!pip install git+https://github.com/deepset-ai/haystack.git@split_tables_and_texts#egg=farm-haystack[colab]\n", + "\n", + "# The TaPAs-based TableReader requires the torch-scatter library\n", + "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", + "\n", + "# Install pygraphviz for visualization of Pipelines\n", + "!apt install libgraphviz-dev\n", + "!pip install pygraphviz" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K_XJhluXwF5_" + }, + "source": [ + "### Start an Elasticsearch server\n", + "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "frDqgzK7v2i1" + }, + "outputs": [], + "source": [ + "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", + "from haystack.utils import launch_es\n", + "\n", + "launch_es()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "S4PGj1A6wKWu" + }, + "outputs": [], + "source": [ + "# In Colab / No Docker environments: Start Elasticsearch from source\n", + "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", + "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", + "! chown -R daemon:daemon elasticsearch-7.9.2\n", + "\n", + "import os\n", + "from subprocess import Popen, PIPE, STDOUT\n", + "\n", + "es_server = Popen(\n", + " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", + ")\n", + "# wait until ES has started\n", + "! sleep 30" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RmxepXZtwQ0E" + }, + "outputs": [], + "source": [ + "# Connect to Elasticsearch\n", + "from haystack.document_stores import ElasticsearchDocumentStore\n", + "\n", + "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", + "document_index = \"document\"\n", + "document_store = ElasticsearchDocumentStore(\n", + " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fFh26LIlxldw" + }, + "source": [ + "## Add Tables to DocumentStore\n", + "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", + "\n", + "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nM63uwbd8zd6" + }, + "outputs": [], + "source": [ + "# Let's first fetch some tables that we want to query\n", + "# Here: 1000 tables from OTT-QA\n", + "from haystack.utils import fetch_archive_from_http\n", + "\n", + "doc_dir = \"data\"\n", + "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip\"\n", + "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SKjw2LuXxlGh", + "outputId": "5ca79348-3eb8-4423-8ed4-2e08d4288765" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Result ... Score\n", + "0 Winner ... 6-1 , 6-1\n", + "1 Winner ... 6-2 , 4-6 , 6-3\n", + "2 Winner ... 6-2 , 6-2\n", + "3 Runner-up ... 3-6 , 2-6\n", + "4 Winner ... 6-7 , 6-3 , 6-0\n", + "5 Winner ... 6-1 , 6-0\n", + "6 Winner ... 6-2 , 2-6 , 6-2\n", + "7 Winner ... 6-0 , 6-4\n", + "\n", + "[8 rows x 8 columns]\n", + "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" + ] + } + ], + "source": [ + "# Add the tables to the DocumentStore\n", + "\n", + "import json\n", + "from haystack import Document\n", + "import pandas as pd\n", + "\n", + "\n", + "def read_ottqa_tables(filename):\n", + " processed_tables = []\n", + " with open(filename) as tables:\n", + " tables = json.load(tables)\n", + " for key, table in tables.items():\n", + " current_columns = table[\"header\"]\n", + " current_rows = table[\"data\"]\n", + " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", + " current_doc_title = table[\"title\"]\n", + " current_section_title = table[\"section_title\"]\n", + " document = Document(\n", + " content=current_df,\n", + " content_type=\"table\",\n", + " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", + " id=key,\n", + " )\n", + " processed_tables.append(document)\n", + "\n", + " return processed_tables\n", + "\n", + "\n", + "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", + "document_store.write_documents(tables, index=document_index)\n", + "\n", + "# Showing content field and meta field of one of the Documents of content_type 'table'\n", + "print(tables[0].content)\n", + "print(tables[0].meta)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hmQC1sDmw3d7" + }, + "source": [ + "## Initalize Retriever, Reader, & Pipeline\n", + "\n", + "### Retriever\n", + "\n", + "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", + "They use some simple but fast algorithm.\n", + "\n", + "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", + "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", + "\n", + "**Alternatives:**\n", + "\n", + "- `ElasticsearchRetriever` that uses BM25 algorithm\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EY_qvdV6wyK5" + }, + "outputs": [], + "source": [ + "from haystack.nodes.retriever import TableTextRetriever\n", + "\n", + "retriever = TableTextRetriever(\n", + " document_store=document_store,\n", + " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", + " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", + " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", + " embed_meta_fields=[\"title\", \"section_title\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jasi1RM2zIJ7" + }, + "outputs": [], + "source": [ + "# Add table embeddings to the tables in DocumentStore\n", + "document_store.update_embeddings(retriever=retriever)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XM-ijy6Zz11L" + }, + "outputs": [], + "source": [ + "## Alternative: ElasticsearchRetriever\n", + "# from haystack.nodes.retriever import ElasticsearchRetriever\n", + "# retriever = ElasticsearchRetriever(document_store=document_store)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "YHfQWxVI0N2e", + "outputId": "3e5bf3b8-1c4f-4c7d-855c-dc82b0dc4b52" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the Retriever\n", + "from haystack.utils import print_documents\n", + "\n", + "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", + "# Get highest scored table\n", + "print(retrieved_tables[0].content)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zbwkXScm2-gy" + }, + "source": [ + "### Reader\n", + "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", + "\n", + "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4APcRoio2RxG" + }, + "outputs": [], + "source": [ + "from haystack.nodes import TableReader\n", + "\n", + "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ILuAXkyN4F7x", + "outputId": "142457f0-322b-4ef7-acf2-f89f52bffac3" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", + "\n", + "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", + "print(table_doc.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ilbsecgA4vfN", + "outputId": "e0095547-fb82-4b76-f826-284bcff61257" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ ]\n" + ] + } + ], + "source": [ + "from haystack.utils import print_answers\n", + "\n", + "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", + "print_answers(prediction, details=\"all\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jkAYNMb7R9qu" + }, + "source": [ + "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", + "\n", + "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "It8XYT2ZTVJs", + "outputId": "b617b61c-1e92-4301-c73c-913b40d464a4" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Predicted answer: 12\n", + "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" + ] + } + ], + "source": [ + "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", + "print(f\"Meta field: {prediction['answers'][0].meta}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgmG7pzL5ceh" + }, + "source": [ + "### Pipeline\n", + "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", + "\n", + "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "G-aZZvyv4-Mf" + }, + "outputs": [], + "source": [ + "# Initialize pipeline\n", + "from haystack import Pipeline\n", + "\n", + "table_qa_pipeline = Pipeline()\n", + "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "m8evexnW6dev", + "outputId": "099c0ec6-4306-46c5-e9fa-983c57305787" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ { 'answer': '12',\n", + " 'context': Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '7',\n", + " 'context': Building or structure ... Listing\n", + "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", + "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", + "2 Jensen Block ... Seattle landmark\n", + "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", + "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", + "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", + "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", + "\n", + "[7 rows x 3 columns]},\n", + " { 'answer': '8',\n", + " 'context': Years Venue Location\n", + "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", + "1 1987-88 Navesink Country Club Middletown , New Jersey\n", + "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", + "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", + "4 1982 Wykagyl Country Club New Rochelle , New York\n", + "5 1981 Ridgewood Country Club Paramus , New Jersey\n", + "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", + "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", + " { 'answer': '8',\n", + " 'context': Model Specification ... Prime mover Power output\n", + "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", + "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", + "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", + "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", + "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", + "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", + "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", + "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", + "\n", + "[8 rows x 7 columns]},\n", + " { 'answer': '10',\n", + " 'context': Name or designation ... Notes\n", + "0 Aluminum Overcast ... One of only ten flyable B-17s\n", + "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", + "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", + "3 Douglas DC-7B N836D ... \n", + "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", + "5 FIFI ... One of only two B-29s flying\n", + "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", + "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", + "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", + "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", + "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", + "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", + "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", + "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", + "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", + "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", + "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", + "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", + "18 Yankee Lady ... Flyable\n", + "\n", + "[19 rows x 6 columns]},\n", + " { 'answer': '13',\n", + " 'context': N Year Country ... Link Remark K\n", + "0 003+ 2013 INDIA ... LK RK K\n", + "1 005 2006 USA ... LK RK K\n", + "2 010 2014 ZAF ... LK RK K\n", + "3 020 2010 USA ... LK RK K\n", + "4 030 201 ? USA ... LK RK K\n", + "5 040 2007 USA ... LK RK K\n", + "6 042 2004 USA ... LK Only G-S With Large Battery K\n", + "7 050 201 ? USA ... LK RK K\n", + "8 100 20 ? ? USA ... LK RK K\n", + "9 200 20 ? ? USA ... LK RK K\n", + "10 300 2013 EUR ... LK RK K\n", + "11 400 20 ? ? USA ... LK RK K\n", + "12 995 20 ? ? USA ... LK RK K\n", + "\n", + "[13 rows x 12 columns]},\n", + " { 'answer': '5',\n", + " 'context': Team ... Capacity\n", + "0 Barnsley ... 23,009\n", + "1 Blackpool ... 16,750\n", + "2 Bradford City ... 25,136\n", + "3 Burton Albion ... 6,912\n", + "4 Bury ... 11,840\n", + "5 Chesterfield ... 10,400\n", + "6 Colchester United ... 10,105\n", + "7 Coventry City ... 32,500\n", + "8 Crewe Alexandra ... 10,066\n", + "9 Doncaster Rovers ... 15,231\n", + "10 Fleetwood Town ... 5,311\n", + "11 Gillingham ... 11,582\n", + "12 Millwall ... 20,146\n", + "13 Oldham Athletic ... 13,512\n", + "14 Peterborough United ... 14,319\n", + "15 Port Vale ... 18,947\n", + "16 Rochdale ... 10,249\n", + "17 Scunthorpe United ... 9,183\n", + "18 Sheffield United ... 32,702\n", + "19 Shrewsbury Town ... 9,875\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': '7',\n", + " 'context': Resource Name ... Added\n", + "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", + "1 John M. Beasley House ... March 5 , 1996\n", + "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", + "3 Austin House ... February 5 , 1998\n", + "4 Reid-Woods House ... August 31 , 2000\n", + "5 Villa Serena Apartments ... September 29 , 2000\n", + "6 Paul M. Souder House ... November 2 , 2000\n", + "7 Stevens-Gilchrist House ... August 17 , 2001\n", + "\n", + "[8 rows x 3 columns]},\n", + " { 'answer': '19',\n", + " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", + "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", + "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", + "2 Cowen Park Bridge ... 15th Avenue NE\n", + "3 First Avenue South Bridge ... State Route 99\n", + "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", + "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", + "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", + "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", + "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", + "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", + "10 Magnolia Bridge ... W Garfield Street\n", + "11 Montlake Bridge ... State Route 513\n", + "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", + "13 Salmon Bay Bridge ... BNSF Railway\n", + "14 Ship Canal Bridge ... Interstate 5\n", + "15 Schmitz Park Bridge ... SW Admiral Way\n", + "16 Spokane Street Bridge ... SW Spokane Street\n", + "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", + "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", + "19 University Bridge ... Eastlake Avenue NE\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '8',\n", + " 'context': Location ... Comments\n", + "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", + "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", + "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", + "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", + "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", + "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", + "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", + "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", + "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", + "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", + "\n", + "[10 rows x 4 columns]}]\n" + ] + } + ], + "source": [ + "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", + "print_answers(prediction, details=\"minimum\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Open-Domain QA on Text and Tables\n", + "With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information.\n", + "\n", + "To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset." + ], + "metadata": { + "id": "8uMzl9Ml_D1B" + } + }, + { + "cell_type": "code", + "source": [ + "# Add 1,000 text passages from OTT-QA to our document store.\n", + "\n", + "def read_ottqa_texts(filename):\n", + " processed_passages = []\n", + " with open(filename) as passages:\n", + " passages = json.load(passages)\n", + " for title, content in passages.items():\n", + " title = title[6:]\n", + " title = title.replace(\"_\", \" \")\n", + " document = Document(\n", + " content=content,\n", + " content_type=\"text\",\n", + " meta={\"title\": title}\n", + " )\n", + " processed_passages.append(document)\n", + " \n", + " return processed_passages\n", + "\n", + "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", + "document_store.write_documents(passages, index=document_index)" + ], + "metadata": { + "id": "4CBcIjIq_uFx" + }, + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False)" + ], + "metadata": { + "id": "j1TaNF7SiKgH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Pipeline for QA on Combination of Text and Tables\n", + "We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `\"text\"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `\"table\"` to a `TableReader`.\n", + "\n", + "To achieve this, we make use of two additional nodes:\n", + "- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `\"text\"` or `\"table\"`, respectively.\n", + "- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers." + ], + "metadata": { + "id": "c2sk_uNHj0DY" + } + }, + { + "cell_type": "code", + "source": [ + "from haystack.nodes import FARMReader, SplitDocumentList, JoinAnswers\n", + "\n", + "text_reader = FARMReader(\"deepset/roberta-base-squad2\")\n", + "# In order to get meaningful scores from the TableReader, use \"deepset/tapas-large-nq-hn-reader\" or\n", + "# \"deepset/tapas-large-nq-reader\" as TableReader models. The disadvantage of these models is, however,\n", + "# that they are not capable of doing aggregations over multiple table cells.\n", + "table_reader = TableReader(\"deepset/tapas-large-nq-hn-reader\")\n", + "split_documents = SplitDocumentList()\n", + "join_answers = JoinAnswers()" + ], + "metadata": { + "id": "Ej_j8Q3wlxXE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "text_table_qa_pipeline = Pipeline()\n", + "text_table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "text_table_qa_pipeline.add_node(component=split_documents, name=\"SplitDocumentList\", inputs=[\"TableTextRetriever\"])\n", + "text_table_qa_pipeline.add_node(component=text_reader, name=\"TextReader\", inputs=[\"SplitDocumentList.output_1\"])\n", + "text_table_qa_pipeline.add_node(component=table_reader, name=\"TableReader\", inputs=[\"SplitDocumentList.output_2\"])\n", + "text_table_qa_pipeline.add_node(component=join_answers, name=\"JoinAnswers\", inputs=[\"TextReader\", \"TableReader\"])" + ], + "metadata": { + "id": "Zdq6JnF5m3aP" + }, + "execution_count": 54, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's have a look on the structure of the combined Table an Text QA pipeline.\n", + "from IPython import display\n", + "\n", + "text_table_qa_pipeline.draw()\n", + "display.Image(\"pipeline.png\")" + ], + "metadata": { + "id": "K4vH1ZEnniut", + "outputId": "5926d4ac-aa5a-41d3-df19-73950f88ea2c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 540 + } + }, + "execution_count": 55, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a text passage\n", + "predictions = text_table_qa_pipeline.run(query=\"Who is Aleksandar Trifunovic?\")" + ], + "metadata": { + "id": "strPNduPoBLe" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "9YiK75tSoOGA", + "outputId": "65362f83-4313-4ac1-f6ce-9b92522598c1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 57, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: Who is Aleksandar Trifunovic?\n", + "Answers:\n", + "[ { 'answer': 'a Serbian professional basketball coach and former player',\n", + " 'context': 'Aleksandar Trifunović ( ; born 30 May 1967 ) is a Serbian '\n", + " 'professional basketball coach and former player .'},\n", + " { 'answer': 'Johnny Höglin',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Ivar Eriksen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Magne Thomassen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': '5',\n", + " 'context': Position # Player Moving from\n", + "0 F 12 Nikola Kalinić Radnički Kragujevac\n", + "1 SF 6 Nemanja Dangubić Mega Vizura\n", + "2 C 33 Maik Zirbes Brose Baskets\n", + "3 PG 3 Marcus Williams Lokomotiv Kuban\n", + "4 PG 24 Stefan Jović Radnički Kragujevac\n", + "5 C 14 Đorđe Kaplanović FMP\n", + "6 SF 5 Nikola Čvorović FMP\n", + "7 SG 7 Aleksandar Aranitović Crvena zvezda U18\n", + "8 SG 20 Aleksa Radanov Crvena zvezda U18},\n", + " { 'answer': 'Vasile Sărucan',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Belgium',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Poland',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Hafþór Júlíus Björnsson',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Estonia',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Iceland',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman',\n", + " 'context': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman . He is currently '\n", + " 'playing with Piráti Chomutov of the Czech Extral'},\n", + " { 'answer': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player',\n", + " 'context': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player . In 2010 , he '\n", + " 'played for FC Angusht Nazran in the'}]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a table\n", + "predictions = text_table_qa_pipeline.run(query=\"What is Cuba's national tree?\")" + ], + "metadata": { + "id": "QYOHDSmLpzEg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "4kw53uWep3zj", + "outputId": "6316f9af-ef44-426a-ec42-75a3c0d293a1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 67, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: What is Cuba's national tree?\n", + "Answers:\n", + "[ { 'answer': 'Cuban royal palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Quercus sagraeana , the Cuban oak',\n", + " 'context': 'Quercus sagraeana , the Cuban oak , is a medium-sized '\n", + " 'evergreen tree native to western Cuba in the Cuban pine '\n", + " 'forests ecoregion .'},\n", + " { 'answer': \"Glenn O'Brien\",\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Belize',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Palmyra palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Guadeloupe',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Basse-Terre',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'East Caribbean dollar',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Jenkins',\n", + " 'context': NRHP reference number ... County\n", + "0 72000402 ... Wilkes\n", + "1 ... Meriwether\n", + "2 ... Bartow\n", + "3 71000280 ... Jenkins\n", + "4 ... Chatham\n", + "5 89002015 ... Thomas\n", + "6 ... Glynn\n", + "7 75000615 ... Walton\n", + "8 84001156 ... Sumter\n", + "9 79000713 ... Cobb\n", + "10 82002491 ... Twiggs\n", + "11 74000703 ... Taliaferro\n", + "12 80001039 ... Floyd\n", + "13 90000805 ... Gwinnett\n", + "14 73000620 ... Decatur\n", + "15 79000731 ... Houston\n", + "16 95000741 ... Grady\n", + "17 97000559 ... Greene\n", + "18 74000662 ... Brooks\n", + "19 75000616 ... Washington\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': \"Primula farinosa , the bird's-eye primrose\",\n", + " 'context': \"Primula farinosa , the bird's-eye primrose , is a small \"\n", + " 'perennial plant in the family Primulaceae , native to '\n", + " 'Northern Europe and northern Asia , and '},\n", + " { 'answer': 'Poospiza',\n", + " 'context': 'Poospiza is a genus of finch-like tanagers found in both '\n", + " 'the South American lowlands and the Andes mountains . '\n", + " 'Generally they are arboreal feeders in '},\n", + " { 'answer': 'golden-crowned sparrow',\n", + " 'context': 'The golden-crowned sparrow ( Zonotrichia atricapilla ) is '\n", + " 'a large American sparrow found in the western part of '\n", + " 'North America .'},\n", + " { 'answer': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush',\n", + " 'context': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush ) , with unusually large leaves and '\n", + " 'flower heads . It is a rare variety '},\n", + " { 'answer': 'rain',\n", + " 'context': 's and operates hotels at Machu Picchu Natural Reserve , '\n", + " 'the southeastern rain forest of the Amazon in Puerto '\n", + " 'Maldonado , Tambopata , the Sacred Valley'}]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RyeK3s28_X1C" + }, + "source": [ + "## About us\n", + "\n", + "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", + "\n", + "We bring NLP to the industry via open source! \n", + "Our focus: Industry specific language models & large scale QA systems. \n", + " \n", + "Some of our other work: \n", + "- [German BERT](https://deepset.ai/german-bert)\n", + "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", + "- [FARM](https://github.com/deepset-ai/FARM)\n", + "\n", + "Get in touch:\n", + "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", + "\n", + "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" + ] } - ], - "source": [ - "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", - "print(f\"Meta field: {prediction['answers'][0].meta}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pgmG7pzL5ceh" - }, - "source": [ - "### Pipeline\n", - "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", - "\n", - "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "id": "G-aZZvyv4-Mf" - }, - "outputs": [], - "source": [ - "# Initialize pipeline\n", - "from haystack import Pipeline\n", - "\n", - "table_qa_pipeline = Pipeline()\n", - "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", - "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { + ], + "metadata": { + "accelerator": "GPU", "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "m8evexnW6dev", - "outputId": "290168b1-294e-42ed-c970-e5ddfefb3396" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ { 'answer': '12',\n", - " 'context': Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '7',\n", - " 'context': Building or structure ... Listing\n", - "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", - "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", - "2 Jensen Block ... Seattle landmark\n", - "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", - "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", - "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", - "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", - "\n", - "[7 rows x 3 columns]},\n", - " { 'answer': '8',\n", - " 'context': Years Venue Location\n", - "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", - "1 1987-88 Navesink Country Club Middletown , New Jersey\n", - "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", - "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", - "4 1982 Wykagyl Country Club New Rochelle , New York\n", - "5 1981 Ridgewood Country Club Paramus , New Jersey\n", - "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", - "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", - " { 'answer': '8',\n", - " 'context': Model Specification ... Prime mover Power output\n", - "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", - "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", - "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", - "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", - "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", - "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", - "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", - "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", - "\n", - "[8 rows x 7 columns]},\n", - " { 'answer': '10',\n", - " 'context': Name or designation ... Notes\n", - "0 Aluminum Overcast ... One of only ten flyable B-17s\n", - "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", - "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", - "3 Douglas DC-7B N836D ... \n", - "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", - "5 FIFI ... One of only two B-29s flying\n", - "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", - "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", - "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", - "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", - "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", - "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", - "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", - "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", - "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", - "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", - "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", - "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", - "18 Yankee Lady ... Flyable\n", - "\n", - "[19 rows x 6 columns]},\n", - " { 'answer': '13',\n", - " 'context': N Year Country ... Link Remark K\n", - "0 003+ 2013 INDIA ... LK RK K\n", - "1 005 2006 USA ... LK RK K\n", - "2 010 2014 ZAF ... LK RK K\n", - "3 020 2010 USA ... LK RK K\n", - "4 030 201 ? USA ... LK RK K\n", - "5 040 2007 USA ... LK RK K\n", - "6 042 2004 USA ... LK Only G-S With Large Battery K\n", - "7 050 201 ? USA ... LK RK K\n", - "8 100 20 ? ? USA ... LK RK K\n", - "9 200 20 ? ? USA ... LK RK K\n", - "10 300 2013 EUR ... LK RK K\n", - "11 400 20 ? ? USA ... LK RK K\n", - "12 995 20 ? ? USA ... LK RK K\n", - "\n", - "[13 rows x 12 columns]},\n", - " { 'answer': '5',\n", - " 'context': Team ... Capacity\n", - "0 Barnsley ... 23,009\n", - "1 Blackpool ... 16,750\n", - "2 Bradford City ... 25,136\n", - "3 Burton Albion ... 6,912\n", - "4 Bury ... 11,840\n", - "5 Chesterfield ... 10,400\n", - "6 Colchester United ... 10,105\n", - "7 Coventry City ... 32,500\n", - "8 Crewe Alexandra ... 10,066\n", - "9 Doncaster Rovers ... 15,231\n", - "10 Fleetwood Town ... 5,311\n", - "11 Gillingham ... 11,582\n", - "12 Millwall ... 20,146\n", - "13 Oldham Athletic ... 13,512\n", - "14 Peterborough United ... 14,319\n", - "15 Port Vale ... 18,947\n", - "16 Rochdale ... 10,249\n", - "17 Scunthorpe United ... 9,183\n", - "18 Sheffield United ... 32,702\n", - "19 Shrewsbury Town ... 9,875\n", - "\n", - "[20 rows x 4 columns]},\n", - " { 'answer': '7',\n", - " 'context': Resource Name ... Added\n", - "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", - "1 John M. Beasley House ... March 5 , 1996\n", - "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", - "3 Austin House ... February 5 , 1998\n", - "4 Reid-Woods House ... August 31 , 2000\n", - "5 Villa Serena Apartments ... September 29 , 2000\n", - "6 Paul M. Souder House ... November 2 , 2000\n", - "7 Stevens-Gilchrist House ... August 17 , 2001\n", - "\n", - "[8 rows x 3 columns]},\n", - " { 'answer': '19',\n", - " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", - "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", - "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", - "2 Cowen Park Bridge ... 15th Avenue NE\n", - "3 First Avenue South Bridge ... State Route 99\n", - "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", - "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", - "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", - "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", - "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", - "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", - "10 Magnolia Bridge ... W Garfield Street\n", - "11 Montlake Bridge ... State Route 513\n", - "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", - "13 Salmon Bay Bridge ... BNSF Railway\n", - "14 Ship Canal Bridge ... Interstate 5\n", - "15 Schmitz Park Bridge ... SW Admiral Way\n", - "16 Spokane Street Bridge ... SW Spokane Street\n", - "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", - "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", - "19 University Bridge ... Eastlake Avenue NE\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '8',\n", - " 'context': Location ... Comments\n", - "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", - "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", - "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", - "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", - "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", - "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", - "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", - "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", - "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", - "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", - "\n", - "[10 rows x 4 columns]}]\n" - ] + "name": "Tutorial15_TableQA.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" } - ], - "source": [ - "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", - "print_answers(prediction, details=\"minimum\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RyeK3s28_X1C" - }, - "source": [ - "## About us\n", - "\n", - "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", - "\n", - "We bring NLP to the industry via open source! \n", - "Our focus: Industry specific language models & large scale QA systems. \n", - " \n", - "Some of our other work: \n", - "- [German BERT](https://deepset.ai/german-bert)\n", - "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", - "- [FARM](https://github.com/deepset-ai/FARM)\n", - "\n", - "Get in touch:\n", - "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", - "\n", - "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "name": "Tutorial15_TableQA.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/tutorials/Tutorial15_TableQA.py b/tutorials/Tutorial15_TableQA.py index 5c282e21c8..fb1ac2d68a 100644 --- a/tutorials/Tutorial15_TableQA.py +++ b/tutorials/Tutorial15_TableQA.py @@ -6,7 +6,7 @@ from haystack.document_stores import ElasticsearchDocumentStore from haystack import Document, Pipeline from haystack.nodes.retriever import TableTextRetriever -from haystack.nodes import TableReader +from haystack.nodes import TableReader, FARMReader, SplitDocumentList, JoinAnswers def tutorial15_tableqa(): @@ -116,6 +116,38 @@ def read_ottqa_tables(filename): print_answers(prediction, details="minimum") + ### Pipeline for QA on Combination of Text and Tables + # We are using one node for retrieving both texts and tables, the TableTextRetriever. + # In order to do question-answering on the Documents coming from the TableTextRetriever, we need to route + # Documents of type "text" to a FARMReader ( or alternatively TransformersReader) and Documents of type + # "table" to a TableReader. + + text_reader = FARMReader("deepset/roberta-base-squad2") + # In order to get meaningful scores from the TableReader, use "deepset/tapas-large-nq-hn-reader" or + # "deepset/tapas-large-nq-reader" as TableReader models. The disadvantage of these models is, however, + # that they are not capable of doing aggregations over multiple table cells. + table_reader = TableReader("deepset/tapas-large-nq-hn-reader") + split_documents = SplitDocumentList() + join_answers = JoinAnswers() + + text_table_qa_pipeline = Pipeline() + text_table_qa_pipeline.add_node(component=retriever, name="TableTextRetriever", inputs=["Query"]) + text_table_qa_pipeline.add_node(component=split_documents, name="SplitDocumentList", inputs=["TableTextRetriever"]) + text_table_qa_pipeline.add_node(component=text_reader, name="TextReader", inputs=["SplitDocumentList.output_1"]) + text_table_qa_pipeline.add_node(component=table_reader, name="TableReader", inputs=["SplitDocumentList.output_2"]) + text_table_qa_pipeline.add_node(component=join_answers, name="JoinAnswers", inputs=["TextReader", "TableReader"]) + + # Example query whose answer resides in a text passage + predictions = text_table_qa_pipeline.run(query="Who is Aleksandar Trifunovic?") + # We can see both text passages and tables as contexts of the predicted answers. + print_answers(predictions, details="minimum") + + # Example query whose answer resides in a table + predictions = text_table_qa_pipeline.run(query="What is Cuba's national tree?") + # We can see both text passages and tables as contexts of the predicted answers. + print_answers(predictions, details="minimum") + + if __name__ == "__main__": tutorial15_tableqa() From e19954693929371195aa7f40e684ce37b206cf6f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 28 Feb 2022 16:17:51 +0000 Subject: [PATCH 04/14] Update Documentation & Code Style --- docs/_src/tutorials/tutorials/15.md | 104 +- .../haystack-pipeline-1.2.0rc0.schema.json | 92 + test/test_pipeline.py | 10 +- tutorials/Tutorial15_TableQA.ipynb | 2674 ++++++++--------- tutorials/Tutorial15_TableQA.py | 1 - 5 files changed, 1535 insertions(+), 1346 deletions(-) diff --git a/docs/_src/tutorials/tutorials/15.md b/docs/_src/tutorials/tutorials/15.md index c6bdb6325d..6c66d47a81 100644 --- a/docs/_src/tutorials/tutorials/15.md +++ b/docs/_src/tutorials/tutorials/15.md @@ -33,13 +33,14 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial # Install the latest master of Haystack !pip install --upgrade pip -!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab] +!pip install git+https://github.com/deepset-ai/haystack.git@split_tables_and_texts#egg=farm-haystack[colab] # The TaPAs-based TableReader requires the torch-scatter library !pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html -# If you run this notebook on Google Colab, you might need to -# restart the runtime after installing haystack. +# Install pygraphviz for visualization of Pipelines +!apt install libgraphviz-dev +!pip install pygraphviz ``` ### Start an Elasticsearch server @@ -94,7 +95,7 @@ Just as text passages, tables are represented as `Document` objects in Haystack. from haystack.utils import fetch_archive_from_http doc_dir = "data" -s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_tables_sample.json.zip" +s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip" fetch_archive_from_http(url=s3_url, output_dir=doc_dir) ``` @@ -246,6 +247,101 @@ prediction = table_qa_pipeline.run("How many twin buildings are under constructi print_answers(prediction, details="minimum") ``` +# Open-Domain QA on Text and Tables +With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information. + +To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset. + + +```python +# Add 1,000 text passages from OTT-QA to our document store. + + +def read_ottqa_texts(filename): + processed_passages = [] + with open(filename) as passages: + passages = json.load(passages) + for title, content in passages.items(): + title = title[6:] + title = title.replace("_", " ") + document = Document(content=content, content_type="text", meta={"title": title}) + processed_passages.append(document) + + return processed_passages + + +passages = read_ottqa_texts(f"{doc_dir}/ottqa_texts_sample.json") +document_store.write_documents(passages, index=document_index) +``` + + +```python +document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False) +``` + +## Pipeline for QA on Combination of Text and Tables +We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `"text"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `"table"` to a `TableReader`. + +To achieve this, we make use of two additional nodes: +- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `"text"` or `"table"`, respectively. +- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers. + + +```python +from haystack.nodes import FARMReader, SplitDocumentList, JoinAnswers + +text_reader = FARMReader("deepset/roberta-base-squad2") +# In order to get meaningful scores from the TableReader, use "deepset/tapas-large-nq-hn-reader" or +# "deepset/tapas-large-nq-reader" as TableReader models. The disadvantage of these models is, however, +# that they are not capable of doing aggregations over multiple table cells. +table_reader = TableReader("deepset/tapas-large-nq-hn-reader") +split_documents = SplitDocumentList() +join_answers = JoinAnswers() +``` + + +```python +text_table_qa_pipeline = Pipeline() +text_table_qa_pipeline.add_node(component=retriever, name="TableTextRetriever", inputs=["Query"]) +text_table_qa_pipeline.add_node(component=split_documents, name="SplitDocumentList", inputs=["TableTextRetriever"]) +text_table_qa_pipeline.add_node(component=text_reader, name="TextReader", inputs=["SplitDocumentList.output_1"]) +text_table_qa_pipeline.add_node(component=table_reader, name="TableReader", inputs=["SplitDocumentList.output_2"]) +text_table_qa_pipeline.add_node(component=join_answers, name="JoinAnswers", inputs=["TextReader", "TableReader"]) +``` + + +```python +# Let's have a look on the structure of the combined Table an Text QA pipeline. +from IPython import display + +text_table_qa_pipeline.draw() +display.Image("pipeline.png") +``` + + +```python +# Example query whose answer resides in a text passage +predictions = text_table_qa_pipeline.run(query="Who is Aleksandar Trifunovic?") +``` + + +```python +# We can see both text passages and tables as contexts of the predicted answers. +print_answers(predictions, details="minimum") +``` + + +```python +# Example query whose answer resides in a table +predictions = text_table_qa_pipeline.run(query="What is Cuba's national tree?") +``` + + +```python +# We can see both text passages and tables as contexts of the predicted answers. +print_answers(predictions, details="minimum") +``` + ## About us This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany diff --git a/json-schemas/haystack-pipeline-1.2.0rc0.schema.json b/json-schemas/haystack-pipeline-1.2.0rc0.schema.json index 520d8ee0cc..4387b996b1 100644 --- a/json-schemas/haystack-pipeline-1.2.0rc0.schema.json +++ b/json-schemas/haystack-pipeline-1.2.0rc0.schema.json @@ -59,6 +59,9 @@ { "$ref": "#/definitions/ImageToTextConverterComponent" }, + { + "$ref": "#/definitions/JoinAnswersComponent" + }, { "$ref": "#/definitions/JoinDocumentsComponent" }, @@ -95,6 +98,9 @@ { "$ref": "#/definitions/SklearnQueryClassifierComponent" }, + { + "$ref": "#/definitions/SplitDocumentListComponent" + }, { "$ref": "#/definitions/TableReaderComponent" }, @@ -1093,6 +1099,51 @@ ], "additionalProperties": false }, + "JoinAnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "JoinAnswers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "join_mode": { + "title": "Join Mode", + "default": "concatenate", + "type": "string" + }, + "weights": { + "title": "Weights", + "type": "array", + "items": { + "type": "number" + } + }, + "top_k_join": { + "title": "Top K Join", + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, "JoinDocumentsComponent": { "type": "object", "properties": { @@ -1836,6 +1887,47 @@ ], "additionalProperties": false }, + "SplitDocumentListComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SplitDocumentList" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "split_by": { + "title": "Split By", + "default": "content_type", + "type": "string" + }, + "metadata_values": { + "title": "Metadata Values", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, "TableReaderComponent": { "type": "object", "properties": { diff --git a/test/test_pipeline.py b/test/test_pipeline.py index 9835be2fdb..ca709f8507 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -635,9 +635,13 @@ def test_documentsearch_document_store_authentication(retriever_with_docs, docum def test_split_document_list_content_type(test_docs_xs): # Test splitting by content_type - docs = [Document(content="text document", content_type="text"), - Document(content=pd.DataFrame(columns=["col 1", "col 2"], data=[["row 1", "row 1"], ["row 2", "row 2"]]), - content_type="table")] + docs = [ + Document(content="text document", content_type="text"), + Document( + content=pd.DataFrame(columns=["col 1", "col 2"], data=[["row 1", "row 1"], ["row 2", "row 2"]]), + content_type="table", + ), + ] split_documents = SplitDocumentList() result, _ = split_documents.run(documents=docs) diff --git a/tutorials/Tutorial15_TableQA.ipynb b/tutorials/Tutorial15_TableQA.ipynb index a292658e12..6866526c0d 100644 --- a/tutorials/Tutorial15_TableQA.ipynb +++ b/tutorials/Tutorial15_TableQA.ipynb @@ -1,1359 +1,1357 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "DeAkZwDhufYA" - }, - "source": [ - "# Open-Domain QA on Tables\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", - "\n", - "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vbR3bETlvi-3" - }, - "source": [ - "### Prepare environment\n", - "\n", - "#### Colab: Enable the GPU runtime\n", - "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", - "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HW66x0rfujyO" - }, - "outputs": [], - "source": [ - "# Make sure you have a GPU running\n", - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_ZXoyhOAvn7M" - }, - "outputs": [], - "source": [ - "# Install the latest release of Haystack in your own environment\n", - "#! pip install farm-haystack\n", - "\n", - "# Install the latest master of Haystack\n", - "!pip install --upgrade pip\n", - "!pip install git+https://github.com/deepset-ai/haystack.git@split_tables_and_texts#egg=farm-haystack[colab]\n", - "\n", - "# The TaPAs-based TableReader requires the torch-scatter library\n", - "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", - "\n", - "# Install pygraphviz for visualization of Pipelines\n", - "!apt install libgraphviz-dev\n", - "!pip install pygraphviz" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K_XJhluXwF5_" - }, - "source": [ - "### Start an Elasticsearch server\n", - "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "frDqgzK7v2i1" - }, - "outputs": [], - "source": [ - "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", - "from haystack.utils import launch_es\n", - "\n", - "launch_es()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "S4PGj1A6wKWu" - }, - "outputs": [], - "source": [ - "# In Colab / No Docker environments: Start Elasticsearch from source\n", - "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", - "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", - "! chown -R daemon:daemon elasticsearch-7.9.2\n", - "\n", - "import os\n", - "from subprocess import Popen, PIPE, STDOUT\n", - "\n", - "es_server = Popen(\n", - " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", - ")\n", - "# wait until ES has started\n", - "! sleep 30" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RmxepXZtwQ0E" - }, - "outputs": [], - "source": [ - "# Connect to Elasticsearch\n", - "from haystack.document_stores import ElasticsearchDocumentStore\n", - "\n", - "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", - "document_index = \"document\"\n", - "document_store = ElasticsearchDocumentStore(\n", - " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fFh26LIlxldw" - }, - "source": [ - "## Add Tables to DocumentStore\n", - "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", - "\n", - "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nM63uwbd8zd6" - }, - "outputs": [], - "source": [ - "# Let's first fetch some tables that we want to query\n", - "# Here: 1000 tables from OTT-QA\n", - "from haystack.utils import fetch_archive_from_http\n", - "\n", - "doc_dir = \"data\"\n", - "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip\"\n", - "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "SKjw2LuXxlGh", - "outputId": "5ca79348-3eb8-4423-8ed4-2e08d4288765" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Result ... Score\n", - "0 Winner ... 6-1 , 6-1\n", - "1 Winner ... 6-2 , 4-6 , 6-3\n", - "2 Winner ... 6-2 , 6-2\n", - "3 Runner-up ... 3-6 , 2-6\n", - "4 Winner ... 6-7 , 6-3 , 6-0\n", - "5 Winner ... 6-1 , 6-0\n", - "6 Winner ... 6-2 , 2-6 , 6-2\n", - "7 Winner ... 6-0 , 6-4\n", - "\n", - "[8 rows x 8 columns]\n", - "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" - ] - } - ], - "source": [ - "# Add the tables to the DocumentStore\n", - "\n", - "import json\n", - "from haystack import Document\n", - "import pandas as pd\n", - "\n", - "\n", - "def read_ottqa_tables(filename):\n", - " processed_tables = []\n", - " with open(filename) as tables:\n", - " tables = json.load(tables)\n", - " for key, table in tables.items():\n", - " current_columns = table[\"header\"]\n", - " current_rows = table[\"data\"]\n", - " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", - " current_doc_title = table[\"title\"]\n", - " current_section_title = table[\"section_title\"]\n", - " document = Document(\n", - " content=current_df,\n", - " content_type=\"table\",\n", - " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", - " id=key,\n", - " )\n", - " processed_tables.append(document)\n", - "\n", - " return processed_tables\n", - "\n", - "\n", - "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", - "document_store.write_documents(tables, index=document_index)\n", - "\n", - "# Showing content field and meta field of one of the Documents of content_type 'table'\n", - "print(tables[0].content)\n", - "print(tables[0].meta)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hmQC1sDmw3d7" - }, - "source": [ - "## Initalize Retriever, Reader, & Pipeline\n", - "\n", - "### Retriever\n", - "\n", - "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", - "They use some simple but fast algorithm.\n", - "\n", - "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", - "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", - "\n", - "**Alternatives:**\n", - "\n", - "- `ElasticsearchRetriever` that uses BM25 algorithm\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EY_qvdV6wyK5" - }, - "outputs": [], - "source": [ - "from haystack.nodes.retriever import TableTextRetriever\n", - "\n", - "retriever = TableTextRetriever(\n", - " document_store=document_store,\n", - " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", - " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", - " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", - " embed_meta_fields=[\"title\", \"section_title\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jasi1RM2zIJ7" - }, - "outputs": [], - "source": [ - "# Add table embeddings to the tables in DocumentStore\n", - "document_store.update_embeddings(retriever=retriever)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XM-ijy6Zz11L" - }, - "outputs": [], - "source": [ - "## Alternative: ElasticsearchRetriever\n", - "# from haystack.nodes.retriever import ElasticsearchRetriever\n", - "# retriever = ElasticsearchRetriever(document_store=document_store)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "YHfQWxVI0N2e", - "outputId": "3e5bf3b8-1c4f-4c7d-855c-dc82b0dc4b52" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the Retriever\n", - "from haystack.utils import print_documents\n", - "\n", - "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", - "# Get highest scored table\n", - "print(retrieved_tables[0].content)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zbwkXScm2-gy" - }, - "source": [ - "### Reader\n", - "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", - "\n", - "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4APcRoio2RxG" - }, - "outputs": [], - "source": [ - "from haystack.nodes import TableReader\n", - "\n", - "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ILuAXkyN4F7x", - "outputId": "142457f0-322b-4ef7-acf2-f89f52bffac3" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", - "\n", - "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", - "print(table_doc.content)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ilbsecgA4vfN", - "outputId": "e0095547-fb82-4b76-f826-284bcff61257" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: How many twin buildings are under construction?\n", - "Answers:\n", - "[ ]\n" - ] - } - ], - "source": [ - "from haystack.utils import print_answers\n", - "\n", - "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", - "print_answers(prediction, details=\"all\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jkAYNMb7R9qu" - }, - "source": [ - "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", - "\n", - "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "It8XYT2ZTVJs", - "outputId": "b617b61c-1e92-4301-c73c-913b40d464a4" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Predicted answer: 12\n", - "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" - ] - } - ], - "source": [ - "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", - "print(f\"Meta field: {prediction['answers'][0].meta}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pgmG7pzL5ceh" - }, - "source": [ - "### Pipeline\n", - "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", - "\n", - "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "G-aZZvyv4-Mf" - }, - "outputs": [], - "source": [ - "# Initialize pipeline\n", - "from haystack import Pipeline\n", - "\n", - "table_qa_pipeline = Pipeline()\n", - "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", - "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "m8evexnW6dev", - "outputId": "099c0ec6-4306-46c5-e9fa-983c57305787" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: How many twin buildings are under construction?\n", - "Answers:\n", - "[ { 'answer': '12',\n", - " 'context': Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '7',\n", - " 'context': Building or structure ... Listing\n", - "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", - "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", - "2 Jensen Block ... Seattle landmark\n", - "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", - "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", - "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", - "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", - "\n", - "[7 rows x 3 columns]},\n", - " { 'answer': '8',\n", - " 'context': Years Venue Location\n", - "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", - "1 1987-88 Navesink Country Club Middletown , New Jersey\n", - "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", - "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", - "4 1982 Wykagyl Country Club New Rochelle , New York\n", - "5 1981 Ridgewood Country Club Paramus , New Jersey\n", - "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", - "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", - " { 'answer': '8',\n", - " 'context': Model Specification ... Prime mover Power output\n", - "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", - "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", - "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", - "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", - "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", - "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", - "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", - "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", - "\n", - "[8 rows x 7 columns]},\n", - " { 'answer': '10',\n", - " 'context': Name or designation ... Notes\n", - "0 Aluminum Overcast ... One of only ten flyable B-17s\n", - "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", - "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", - "3 Douglas DC-7B N836D ... \n", - "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", - "5 FIFI ... One of only two B-29s flying\n", - "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", - "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", - "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", - "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", - "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", - "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", - "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", - "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", - "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", - "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", - "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", - "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", - "18 Yankee Lady ... Flyable\n", - "\n", - "[19 rows x 6 columns]},\n", - " { 'answer': '13',\n", - " 'context': N Year Country ... Link Remark K\n", - "0 003+ 2013 INDIA ... LK RK K\n", - "1 005 2006 USA ... LK RK K\n", - "2 010 2014 ZAF ... LK RK K\n", - "3 020 2010 USA ... LK RK K\n", - "4 030 201 ? USA ... LK RK K\n", - "5 040 2007 USA ... LK RK K\n", - "6 042 2004 USA ... LK Only G-S With Large Battery K\n", - "7 050 201 ? USA ... LK RK K\n", - "8 100 20 ? ? USA ... LK RK K\n", - "9 200 20 ? ? USA ... LK RK K\n", - "10 300 2013 EUR ... LK RK K\n", - "11 400 20 ? ? USA ... LK RK K\n", - "12 995 20 ? ? USA ... LK RK K\n", - "\n", - "[13 rows x 12 columns]},\n", - " { 'answer': '5',\n", - " 'context': Team ... Capacity\n", - "0 Barnsley ... 23,009\n", - "1 Blackpool ... 16,750\n", - "2 Bradford City ... 25,136\n", - "3 Burton Albion ... 6,912\n", - "4 Bury ... 11,840\n", - "5 Chesterfield ... 10,400\n", - "6 Colchester United ... 10,105\n", - "7 Coventry City ... 32,500\n", - "8 Crewe Alexandra ... 10,066\n", - "9 Doncaster Rovers ... 15,231\n", - "10 Fleetwood Town ... 5,311\n", - "11 Gillingham ... 11,582\n", - "12 Millwall ... 20,146\n", - "13 Oldham Athletic ... 13,512\n", - "14 Peterborough United ... 14,319\n", - "15 Port Vale ... 18,947\n", - "16 Rochdale ... 10,249\n", - "17 Scunthorpe United ... 9,183\n", - "18 Sheffield United ... 32,702\n", - "19 Shrewsbury Town ... 9,875\n", - "\n", - "[20 rows x 4 columns]},\n", - " { 'answer': '7',\n", - " 'context': Resource Name ... Added\n", - "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", - "1 John M. Beasley House ... March 5 , 1996\n", - "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", - "3 Austin House ... February 5 , 1998\n", - "4 Reid-Woods House ... August 31 , 2000\n", - "5 Villa Serena Apartments ... September 29 , 2000\n", - "6 Paul M. Souder House ... November 2 , 2000\n", - "7 Stevens-Gilchrist House ... August 17 , 2001\n", - "\n", - "[8 rows x 3 columns]},\n", - " { 'answer': '19',\n", - " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", - "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", - "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", - "2 Cowen Park Bridge ... 15th Avenue NE\n", - "3 First Avenue South Bridge ... State Route 99\n", - "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", - "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", - "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", - "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", - "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", - "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", - "10 Magnolia Bridge ... W Garfield Street\n", - "11 Montlake Bridge ... State Route 513\n", - "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", - "13 Salmon Bay Bridge ... BNSF Railway\n", - "14 Ship Canal Bridge ... Interstate 5\n", - "15 Schmitz Park Bridge ... SW Admiral Way\n", - "16 Spokane Street Bridge ... SW Spokane Street\n", - "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", - "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", - "19 University Bridge ... Eastlake Avenue NE\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '8',\n", - " 'context': Location ... Comments\n", - "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", - "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", - "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", - "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", - "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", - "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", - "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", - "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", - "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", - "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", - "\n", - "[10 rows x 4 columns]}]\n" - ] - } - ], - "source": [ - "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", - "print_answers(prediction, details=\"minimum\")" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Open-Domain QA on Text and Tables\n", - "With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information.\n", - "\n", - "To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset." - ], - "metadata": { - "id": "8uMzl9Ml_D1B" - } - }, - { - "cell_type": "code", - "source": [ - "# Add 1,000 text passages from OTT-QA to our document store.\n", - "\n", - "def read_ottqa_texts(filename):\n", - " processed_passages = []\n", - " with open(filename) as passages:\n", - " passages = json.load(passages)\n", - " for title, content in passages.items():\n", - " title = title[6:]\n", - " title = title.replace(\"_\", \" \")\n", - " document = Document(\n", - " content=content,\n", - " content_type=\"text\",\n", - " meta={\"title\": title}\n", - " )\n", - " processed_passages.append(document)\n", - " \n", - " return processed_passages\n", - "\n", - "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", - "document_store.write_documents(passages, index=document_index)" - ], - "metadata": { - "id": "4CBcIjIq_uFx" - }, - "execution_count": 16, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False)" - ], - "metadata": { - "id": "j1TaNF7SiKgH" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Pipeline for QA on Combination of Text and Tables\n", - "We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `\"text\"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `\"table\"` to a `TableReader`.\n", - "\n", - "To achieve this, we make use of two additional nodes:\n", - "- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `\"text\"` or `\"table\"`, respectively.\n", - "- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers." - ], - "metadata": { - "id": "c2sk_uNHj0DY" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "DeAkZwDhufYA" + }, + "source": [ + "# Open-Domain QA on Tables\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", + "\n", + "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vbR3bETlvi-3" + }, + "source": [ + "### Prepare environment\n", + "\n", + "#### Colab: Enable the GPU runtime\n", + "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", + "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HW66x0rfujyO" + }, + "outputs": [], + "source": [ + "# Make sure you have a GPU running\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_ZXoyhOAvn7M" + }, + "outputs": [], + "source": [ + "# Install the latest release of Haystack in your own environment\n", + "#! pip install farm-haystack\n", + "\n", + "# Install the latest master of Haystack\n", + "!pip install --upgrade pip\n", + "!pip install git+https://github.com/deepset-ai/haystack.git@split_tables_and_texts#egg=farm-haystack[colab]\n", + "\n", + "# The TaPAs-based TableReader requires the torch-scatter library\n", + "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", + "\n", + "# Install pygraphviz for visualization of Pipelines\n", + "!apt install libgraphviz-dev\n", + "!pip install pygraphviz" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K_XJhluXwF5_" + }, + "source": [ + "### Start an Elasticsearch server\n", + "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "frDqgzK7v2i1" + }, + "outputs": [], + "source": [ + "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", + "from haystack.utils import launch_es\n", + "\n", + "launch_es()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "S4PGj1A6wKWu" + }, + "outputs": [], + "source": [ + "# In Colab / No Docker environments: Start Elasticsearch from source\n", + "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", + "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", + "! chown -R daemon:daemon elasticsearch-7.9.2\n", + "\n", + "import os\n", + "from subprocess import Popen, PIPE, STDOUT\n", + "\n", + "es_server = Popen(\n", + " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", + ")\n", + "# wait until ES has started\n", + "! sleep 30" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RmxepXZtwQ0E" + }, + "outputs": [], + "source": [ + "# Connect to Elasticsearch\n", + "from haystack.document_stores import ElasticsearchDocumentStore\n", + "\n", + "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", + "document_index = \"document\"\n", + "document_store = ElasticsearchDocumentStore(\n", + " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fFh26LIlxldw" + }, + "source": [ + "## Add Tables to DocumentStore\n", + "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", + "\n", + "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nM63uwbd8zd6" + }, + "outputs": [], + "source": [ + "# Let's first fetch some tables that we want to query\n", + "# Here: 1000 tables from OTT-QA\n", + "from haystack.utils import fetch_archive_from_http\n", + "\n", + "doc_dir = \"data\"\n", + "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip\"\n", + "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "SKjw2LuXxlGh", + "outputId": "5ca79348-3eb8-4423-8ed4-2e08d4288765" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "from haystack.nodes import FARMReader, SplitDocumentList, JoinAnswers\n", - "\n", - "text_reader = FARMReader(\"deepset/roberta-base-squad2\")\n", - "# In order to get meaningful scores from the TableReader, use \"deepset/tapas-large-nq-hn-reader\" or\n", - "# \"deepset/tapas-large-nq-reader\" as TableReader models. The disadvantage of these models is, however,\n", - "# that they are not capable of doing aggregations over multiple table cells.\n", - "table_reader = TableReader(\"deepset/tapas-large-nq-hn-reader\")\n", - "split_documents = SplitDocumentList()\n", - "join_answers = JoinAnswers()" - ], - "metadata": { - "id": "Ej_j8Q3wlxXE" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + " Result ... Score\n", + "0 Winner ... 6-1 , 6-1\n", + "1 Winner ... 6-2 , 4-6 , 6-3\n", + "2 Winner ... 6-2 , 6-2\n", + "3 Runner-up ... 3-6 , 2-6\n", + "4 Winner ... 6-7 , 6-3 , 6-0\n", + "5 Winner ... 6-1 , 6-0\n", + "6 Winner ... 6-2 , 2-6 , 6-2\n", + "7 Winner ... 6-0 , 6-4\n", + "\n", + "[8 rows x 8 columns]\n", + "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" + ] + } + ], + "source": [ + "# Add the tables to the DocumentStore\n", + "\n", + "import json\n", + "from haystack import Document\n", + "import pandas as pd\n", + "\n", + "\n", + "def read_ottqa_tables(filename):\n", + " processed_tables = []\n", + " with open(filename) as tables:\n", + " tables = json.load(tables)\n", + " for key, table in tables.items():\n", + " current_columns = table[\"header\"]\n", + " current_rows = table[\"data\"]\n", + " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", + " current_doc_title = table[\"title\"]\n", + " current_section_title = table[\"section_title\"]\n", + " document = Document(\n", + " content=current_df,\n", + " content_type=\"table\",\n", + " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", + " id=key,\n", + " )\n", + " processed_tables.append(document)\n", + "\n", + " return processed_tables\n", + "\n", + "\n", + "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", + "document_store.write_documents(tables, index=document_index)\n", + "\n", + "# Showing content field and meta field of one of the Documents of content_type 'table'\n", + "print(tables[0].content)\n", + "print(tables[0].meta)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hmQC1sDmw3d7" + }, + "source": [ + "## Initalize Retriever, Reader, & Pipeline\n", + "\n", + "### Retriever\n", + "\n", + "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", + "They use some simple but fast algorithm.\n", + "\n", + "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", + "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", + "\n", + "**Alternatives:**\n", + "\n", + "- `ElasticsearchRetriever` that uses BM25 algorithm\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EY_qvdV6wyK5" + }, + "outputs": [], + "source": [ + "from haystack.nodes.retriever import TableTextRetriever\n", + "\n", + "retriever = TableTextRetriever(\n", + " document_store=document_store,\n", + " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", + " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", + " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", + " embed_meta_fields=[\"title\", \"section_title\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jasi1RM2zIJ7" + }, + "outputs": [], + "source": [ + "# Add table embeddings to the tables in DocumentStore\n", + "document_store.update_embeddings(retriever=retriever)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XM-ijy6Zz11L" + }, + "outputs": [], + "source": [ + "## Alternative: ElasticsearchRetriever\n", + "# from haystack.nodes.retriever import ElasticsearchRetriever\n", + "# retriever = ElasticsearchRetriever(document_store=document_store)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "YHfQWxVI0N2e", + "outputId": "3e5bf3b8-1c4f-4c7d-855c-dc82b0dc4b52" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "text_table_qa_pipeline = Pipeline()\n", - "text_table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", - "text_table_qa_pipeline.add_node(component=split_documents, name=\"SplitDocumentList\", inputs=[\"TableTextRetriever\"])\n", - "text_table_qa_pipeline.add_node(component=text_reader, name=\"TextReader\", inputs=[\"SplitDocumentList.output_1\"])\n", - "text_table_qa_pipeline.add_node(component=table_reader, name=\"TableReader\", inputs=[\"SplitDocumentList.output_2\"])\n", - "text_table_qa_pipeline.add_node(component=join_answers, name=\"JoinAnswers\", inputs=[\"TextReader\", \"TableReader\"])" - ], - "metadata": { - "id": "Zdq6JnF5m3aP" - }, - "execution_count": 54, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the Retriever\n", + "from haystack.utils import print_documents\n", + "\n", + "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", + "# Get highest scored table\n", + "print(retrieved_tables[0].content)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zbwkXScm2-gy" + }, + "source": [ + "### Reader\n", + "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", + "\n", + "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4APcRoio2RxG" + }, + "outputs": [], + "source": [ + "from haystack.nodes import TableReader\n", + "\n", + "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ILuAXkyN4F7x", + "outputId": "142457f0-322b-4ef7-acf2-f89f52bffac3" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Let's have a look on the structure of the combined Table an Text QA pipeline.\n", - "from IPython import display\n", - "\n", - "text_table_qa_pipeline.draw()\n", - "display.Image(\"pipeline.png\")" - ], - "metadata": { - "id": "K4vH1ZEnniut", - "outputId": "5926d4ac-aa5a-41d3-df19-73950f88ea2c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 540 - } - }, - "execution_count": 55, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 55 - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", + "\n", + "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", + "print(table_doc.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ilbsecgA4vfN", + "outputId": "e0095547-fb82-4b76-f826-284bcff61257" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Example query whose answer resides in a text passage\n", - "predictions = text_table_qa_pipeline.run(query=\"Who is Aleksandar Trifunovic?\")" - ], - "metadata": { - "id": "strPNduPoBLe" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ ]\n" + ] + } + ], + "source": [ + "from haystack.utils import print_answers\n", + "\n", + "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", + "print_answers(prediction, details=\"all\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jkAYNMb7R9qu" + }, + "source": [ + "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", + "\n", + "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "It8XYT2ZTVJs", + "outputId": "b617b61c-1e92-4301-c73c-913b40d464a4" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# We can see both text passages and tables as contexts of the predicted answers.\n", - "print_answers(predictions, details=\"minimum\")" - ], - "metadata": { - "id": "9YiK75tSoOGA", - "outputId": "65362f83-4313-4ac1-f6ce-9b92522598c1", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 57, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: Who is Aleksandar Trifunovic?\n", - "Answers:\n", - "[ { 'answer': 'a Serbian professional basketball coach and former player',\n", - " 'context': 'Aleksandar Trifunović ( ; born 30 May 1967 ) is a Serbian '\n", - " 'professional basketball coach and former player .'},\n", - " { 'answer': 'Johnny Höglin',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': 'Ivar Eriksen',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': 'Magne Thomassen',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': '5',\n", - " 'context': Position # Player Moving from\n", - "0 F 12 Nikola Kalinić Radnički Kragujevac\n", - "1 SF 6 Nemanja Dangubić Mega Vizura\n", - "2 C 33 Maik Zirbes Brose Baskets\n", - "3 PG 3 Marcus Williams Lokomotiv Kuban\n", - "4 PG 24 Stefan Jović Radnički Kragujevac\n", - "5 C 14 Đorđe Kaplanović FMP\n", - "6 SF 5 Nikola Čvorović FMP\n", - "7 SG 7 Aleksandar Aranitović Crvena zvezda U18\n", - "8 SG 20 Aleksa Radanov Crvena zvezda U18},\n", - " { 'answer': 'Vasile Sărucan',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Belgium',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Poland',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Hafþór Júlíus Björnsson',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Estonia',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Iceland',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", - " 'professional ice hockey defenceman',\n", - " 'context': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", - " 'professional ice hockey defenceman . He is currently '\n", - " 'playing with Piráti Chomutov of the Czech Extral'},\n", - " { 'answer': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", - " 'is a Russian professional football player',\n", - " 'context': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", - " 'is a Russian professional football player . In 2010 , he '\n", - " 'played for FC Angusht Nazran in the'}]\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Predicted answer: 12\n", + "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" + ] + } + ], + "source": [ + "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", + "print(f\"Meta field: {prediction['answers'][0].meta}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgmG7pzL5ceh" + }, + "source": [ + "### Pipeline\n", + "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", + "\n", + "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "G-aZZvyv4-Mf" + }, + "outputs": [], + "source": [ + "# Initialize pipeline\n", + "from haystack import Pipeline\n", + "\n", + "table_qa_pipeline = Pipeline()\n", + "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "m8evexnW6dev", + "outputId": "099c0ec6-4306-46c5-e9fa-983c57305787" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Example query whose answer resides in a table\n", - "predictions = text_table_qa_pipeline.run(query=\"What is Cuba's national tree?\")" - ], - "metadata": { - "id": "QYOHDSmLpzEg" - }, - "execution_count": null, - "outputs": [] - }, + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ { 'answer': '12',\n", + " 'context': Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '7',\n", + " 'context': Building or structure ... Listing\n", + "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", + "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", + "2 Jensen Block ... Seattle landmark\n", + "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", + "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", + "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", + "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", + "\n", + "[7 rows x 3 columns]},\n", + " { 'answer': '8',\n", + " 'context': Years Venue Location\n", + "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", + "1 1987-88 Navesink Country Club Middletown , New Jersey\n", + "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", + "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", + "4 1982 Wykagyl Country Club New Rochelle , New York\n", + "5 1981 Ridgewood Country Club Paramus , New Jersey\n", + "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", + "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", + " { 'answer': '8',\n", + " 'context': Model Specification ... Prime mover Power output\n", + "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", + "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", + "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", + "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", + "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", + "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", + "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", + "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", + "\n", + "[8 rows x 7 columns]},\n", + " { 'answer': '10',\n", + " 'context': Name or designation ... Notes\n", + "0 Aluminum Overcast ... One of only ten flyable B-17s\n", + "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", + "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", + "3 Douglas DC-7B N836D ... \n", + "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", + "5 FIFI ... One of only two B-29s flying\n", + "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", + "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", + "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", + "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", + "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", + "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", + "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", + "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", + "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", + "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", + "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", + "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", + "18 Yankee Lady ... Flyable\n", + "\n", + "[19 rows x 6 columns]},\n", + " { 'answer': '13',\n", + " 'context': N Year Country ... Link Remark K\n", + "0 003+ 2013 INDIA ... LK RK K\n", + "1 005 2006 USA ... LK RK K\n", + "2 010 2014 ZAF ... LK RK K\n", + "3 020 2010 USA ... LK RK K\n", + "4 030 201 ? USA ... LK RK K\n", + "5 040 2007 USA ... LK RK K\n", + "6 042 2004 USA ... LK Only G-S With Large Battery K\n", + "7 050 201 ? USA ... LK RK K\n", + "8 100 20 ? ? USA ... LK RK K\n", + "9 200 20 ? ? USA ... LK RK K\n", + "10 300 2013 EUR ... LK RK K\n", + "11 400 20 ? ? USA ... LK RK K\n", + "12 995 20 ? ? USA ... LK RK K\n", + "\n", + "[13 rows x 12 columns]},\n", + " { 'answer': '5',\n", + " 'context': Team ... Capacity\n", + "0 Barnsley ... 23,009\n", + "1 Blackpool ... 16,750\n", + "2 Bradford City ... 25,136\n", + "3 Burton Albion ... 6,912\n", + "4 Bury ... 11,840\n", + "5 Chesterfield ... 10,400\n", + "6 Colchester United ... 10,105\n", + "7 Coventry City ... 32,500\n", + "8 Crewe Alexandra ... 10,066\n", + "9 Doncaster Rovers ... 15,231\n", + "10 Fleetwood Town ... 5,311\n", + "11 Gillingham ... 11,582\n", + "12 Millwall ... 20,146\n", + "13 Oldham Athletic ... 13,512\n", + "14 Peterborough United ... 14,319\n", + "15 Port Vale ... 18,947\n", + "16 Rochdale ... 10,249\n", + "17 Scunthorpe United ... 9,183\n", + "18 Sheffield United ... 32,702\n", + "19 Shrewsbury Town ... 9,875\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': '7',\n", + " 'context': Resource Name ... Added\n", + "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", + "1 John M. Beasley House ... March 5 , 1996\n", + "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", + "3 Austin House ... February 5 , 1998\n", + "4 Reid-Woods House ... August 31 , 2000\n", + "5 Villa Serena Apartments ... September 29 , 2000\n", + "6 Paul M. Souder House ... November 2 , 2000\n", + "7 Stevens-Gilchrist House ... August 17 , 2001\n", + "\n", + "[8 rows x 3 columns]},\n", + " { 'answer': '19',\n", + " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", + "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", + "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", + "2 Cowen Park Bridge ... 15th Avenue NE\n", + "3 First Avenue South Bridge ... State Route 99\n", + "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", + "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", + "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", + "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", + "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", + "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", + "10 Magnolia Bridge ... W Garfield Street\n", + "11 Montlake Bridge ... State Route 513\n", + "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", + "13 Salmon Bay Bridge ... BNSF Railway\n", + "14 Ship Canal Bridge ... Interstate 5\n", + "15 Schmitz Park Bridge ... SW Admiral Way\n", + "16 Spokane Street Bridge ... SW Spokane Street\n", + "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", + "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", + "19 University Bridge ... Eastlake Avenue NE\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '8',\n", + " 'context': Location ... Comments\n", + "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", + "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", + "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", + "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", + "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", + "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", + "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", + "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", + "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", + "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", + "\n", + "[10 rows x 4 columns]}]\n" + ] + } + ], + "source": [ + "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", + "print_answers(prediction, details=\"minimum\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Open-Domain QA on Text and Tables\n", + "With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information.\n", + "\n", + "To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset." + ], + "metadata": { + "id": "8uMzl9Ml_D1B" + } + }, + { + "cell_type": "code", + "source": [ + "# Add 1,000 text passages from OTT-QA to our document store.\n", + "\n", + "\n", + "def read_ottqa_texts(filename):\n", + " processed_passages = []\n", + " with open(filename) as passages:\n", + " passages = json.load(passages)\n", + " for title, content in passages.items():\n", + " title = title[6:]\n", + " title = title.replace(\"_\", \" \")\n", + " document = Document(content=content, content_type=\"text\", meta={\"title\": title})\n", + " processed_passages.append(document)\n", + "\n", + " return processed_passages\n", + "\n", + "\n", + "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", + "document_store.write_documents(passages, index=document_index)" + ], + "metadata": { + "id": "4CBcIjIq_uFx" + }, + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False)" + ], + "metadata": { + "id": "j1TaNF7SiKgH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Pipeline for QA on Combination of Text and Tables\n", + "We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `\"text\"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `\"table\"` to a `TableReader`.\n", + "\n", + "To achieve this, we make use of two additional nodes:\n", + "- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `\"text\"` or `\"table\"`, respectively.\n", + "- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers." + ], + "metadata": { + "id": "c2sk_uNHj0DY" + } + }, + { + "cell_type": "code", + "source": [ + "from haystack.nodes import FARMReader, SplitDocumentList, JoinAnswers\n", + "\n", + "text_reader = FARMReader(\"deepset/roberta-base-squad2\")\n", + "# In order to get meaningful scores from the TableReader, use \"deepset/tapas-large-nq-hn-reader\" or\n", + "# \"deepset/tapas-large-nq-reader\" as TableReader models. The disadvantage of these models is, however,\n", + "# that they are not capable of doing aggregations over multiple table cells.\n", + "table_reader = TableReader(\"deepset/tapas-large-nq-hn-reader\")\n", + "split_documents = SplitDocumentList()\n", + "join_answers = JoinAnswers()" + ], + "metadata": { + "id": "Ej_j8Q3wlxXE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "text_table_qa_pipeline = Pipeline()\n", + "text_table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "text_table_qa_pipeline.add_node(component=split_documents, name=\"SplitDocumentList\", inputs=[\"TableTextRetriever\"])\n", + "text_table_qa_pipeline.add_node(component=text_reader, name=\"TextReader\", inputs=[\"SplitDocumentList.output_1\"])\n", + "text_table_qa_pipeline.add_node(component=table_reader, name=\"TableReader\", inputs=[\"SplitDocumentList.output_2\"])\n", + "text_table_qa_pipeline.add_node(component=join_answers, name=\"JoinAnswers\", inputs=[\"TextReader\", \"TableReader\"])" + ], + "metadata": { + "id": "Zdq6JnF5m3aP" + }, + "execution_count": 54, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's have a look on the structure of the combined Table an Text QA pipeline.\n", + "from IPython import display\n", + "\n", + "text_table_qa_pipeline.draw()\n", + "display.Image(\"pipeline.png\")" + ], + "metadata": { + "id": "K4vH1ZEnniut", + "outputId": "5926d4ac-aa5a-41d3-df19-73950f88ea2c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 540 + } + }, + "execution_count": 55, + "outputs": [ { - "cell_type": "code", - "source": [ - "# We can see both text passages and tables as contexts of the predicted answers.\n", - "print_answers(predictions, details=\"minimum\")" - ], - "metadata": { - "id": "4kw53uWep3zj", - "outputId": "6316f9af-ef44-426a-ec42-75a3c0d293a1", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 67, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: What is Cuba's national tree?\n", - "Answers:\n", - "[ { 'answer': 'Cuban royal palm',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Quercus sagraeana , the Cuban oak',\n", - " 'context': 'Quercus sagraeana , the Cuban oak , is a medium-sized '\n", - " 'evergreen tree native to western Cuba in the Cuban pine '\n", - " 'forests ecoregion .'},\n", - " { 'answer': \"Glenn O'Brien\",\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Guy Oseary',\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Guy Oseary',\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Belize',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Palmyra palm',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Guadeloupe',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'Basse-Terre',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'East Caribbean dollar',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'Jenkins',\n", - " 'context': NRHP reference number ... County\n", - "0 72000402 ... Wilkes\n", - "1 ... Meriwether\n", - "2 ... Bartow\n", - "3 71000280 ... Jenkins\n", - "4 ... Chatham\n", - "5 89002015 ... Thomas\n", - "6 ... Glynn\n", - "7 75000615 ... Walton\n", - "8 84001156 ... Sumter\n", - "9 79000713 ... Cobb\n", - "10 82002491 ... Twiggs\n", - "11 74000703 ... Taliaferro\n", - "12 80001039 ... Floyd\n", - "13 90000805 ... Gwinnett\n", - "14 73000620 ... Decatur\n", - "15 79000731 ... Houston\n", - "16 95000741 ... Grady\n", - "17 97000559 ... Greene\n", - "18 74000662 ... Brooks\n", - "19 75000616 ... Washington\n", - "\n", - "[20 rows x 4 columns]},\n", - " { 'answer': \"Primula farinosa , the bird's-eye primrose\",\n", - " 'context': \"Primula farinosa , the bird's-eye primrose , is a small \"\n", - " 'perennial plant in the family Primulaceae , native to '\n", - " 'Northern Europe and northern Asia , and '},\n", - " { 'answer': 'Poospiza',\n", - " 'context': 'Poospiza is a genus of finch-like tanagers found in both '\n", - " 'the South American lowlands and the Andes mountains . '\n", - " 'Generally they are arboreal feeders in '},\n", - " { 'answer': 'golden-crowned sparrow',\n", - " 'context': 'The golden-crowned sparrow ( Zonotrichia atricapilla ) is '\n", - " 'a large American sparrow found in the western part of '\n", - " 'North America .'},\n", - " { 'answer': 'Banksia sessilis var . cordata is a variety of Banksia '\n", - " 'sessilis ( Parrot Bush',\n", - " 'context': 'Banksia sessilis var . cordata is a variety of Banksia '\n", - " 'sessilis ( Parrot Bush ) , with unusually large leaves and '\n", - " 'flower heads . It is a rare variety '},\n", - " { 'answer': 'rain',\n", - " 'context': 's and operates hotels at Machu Picchu Natural Reserve , '\n", - " 'the southeastern rain forest of the Amazon in Puerto '\n", - " 'Maldonado , Tambopata , the Sacred Valley'}]\n" - ] - } + "output_type": "execute_result", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUkAAAILCAYAAABl8m5SAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nOzdeViU9f4//ucMywzDMoCsAoOyKIviAqYCanS8ykJPKOUSSFqaUselqGNZp2+nU51jVpqWmmVmYppb5tLix3IXPYoLyiYHjgIiDMM2wyYD8/r94Y/7iOCIBtw4vB7XdV+jN+95v18zMM+591tCRATGGGNtqZCKXQFjjHVnHJKMMWYEhyRjjBlhLnYBrGcgIpSVlUGj0aCmpgYVFRUAgBs3bqC2thYAoFAoIJPJAAAODg6wtraGk5MTevXqBYlEIlrtrGfjkGQdJj8/H5cuXUJubi7y8vKQl5eH3NxclJaWQqPRwGAw3Fe/UqkUTk5OcHZ2hq+vL3x8fODj4wNfX18MGDAAKpWqg18JY/8j4b3b7H5oNBocOXIEJ0+exLlz53Du3DmUlZUBAFxcXNC3b18hzNzc3ODk5ARXV1c4OTnB1tYWSqUSAGBpaQlra2sAQE1NDRoaGgAAVVVV0Ol00Gg0KCkpgUajQXFxsRC+//3vf6FWqwEAvXr1wpAhQzBkyBCMGDECo0ePhpOTkwjvCjNBFRySrF3q6+vx22+/4ddff8WhQ4dw6dIlSKVSBAcHY+jQoUJIDRo0CHZ2dl1Sk1arxYULF4SQPnv2LNLT02EwGDBgwAA8/PDDeOyxx/CnP/0Jcrm8S2piJodDkt1ZTU0N9u7di507d+Lnn39GdXU1hg4dijFjxuDhhx/GqFGjYG9vL3aZLVRVVeHIkSM4dOgQDh06hHPnzsHGxgaPP/44Jk2ahPHjxwtLroy1A4ckay01NRXffvstkpOTUVVVhREjRuDpp5/GU089BQ8PD7HLuyelpaX4+eefsW3bNuzfvx9yuRxPPvkkEhISMHbsWLHLY90fhyS7qampCdu3b8eSJUtw7tw5BAUFYdasWYiPj4ezs7PY5XWI0tJSbNy4EevWrUNGRgaGDBmCv/71r3j66adhZmYmdnmse6oAsR5Nr9fTunXryN/fn8zMzGjq1Kl04sQJscvqdMePH6cpU6aQmZkZ+fn50VdffUV6vV7sslj3U84Hk/dg+/fvx+DBg5GYmIjRo0cjMzMTmzdvxsiRI8UurdOFh4djy5YtyMrKwsMPP4wXX3wRgwYNwq+//ip2aayb4ZDsgQoKCjBhwgQ89thj8Pf3R3p6Or766iv4+/uLXVqX8/Pzw5dffomMjAwEBARg3LhxmDBhAgoKCsQujXUTHJI9zObNmxESEoLc3Fz89ttv+OGHH+Dn5yd2WaLz9fXFjh078PvvvyM3NxchISH47rvvxC6LdQMckj1EfX09EhISEBcXh/j4eKSmpuKRRx4Ru6xuJyoqCqmpqZg+fTri4+ORkJCA+vp6sctiIuK92z2ARqNBTEwMMjIy8N1332HcuHFil/RA+OWXX/DMM88gMDAQP/74I5/F0zPxIUCmrrCwEA8//DCICHv37kVgYKDYJT1QsrKyEB0dDYlEgkOHDsHT01PskljX4pA0ZWVlZRg9ejSkUil+++03uLi4iF3SA6m0tBR/+tOf0NjYiCNHjvASZc/CIWmqbty4gTFjxkCtVuPYsWPo3bu32CU90IqKihAZGQlnZ2ccPnyYzwXvOfj2DaZq8eLFyMzMxK+//soB2QF69+6N/fv3Izs7G4sXLxa7HNaFOCRN0G+//Ybly5dj5cqVHX7s48GDBzFlyhSoVCrI5XLY2tpi4MCBePXVV1FYWNihY3U3fn5+WLlyJZYvX44DBw6IXQ7rKqKd7MM6hcFgoJCQEIqJienwvt98800CQDNnzqRz585RXV0dVVVV0S+//EKhoaGkVCrp0KFDHT5udzNx4kQaOHAgGQwGsUthna+cQ9LE7Ny5kyQSCaWlpXVov7t37yYA9Pzzz7f5c61WS35+fuTk5ERlZWUdOnZ3k56eTlKplHbs2CF2Kazz8bnbpubTTz9FTEwMBg4c2KH9Ll26FADw9ttvt/lzW1tbJCUlQaPRYN26dR06dncTFBSEiRMnYvny5WKXwroAh6QJqaiowPHjxzFt2rQO7be2thYpKSnw8vIyej+Z8PBwADcvnAEA8+fPh6WlJdzc3IQ2L730EqytrSGRSKDRaIT5TU1NePvtt6FSqWBlZYWQkBB8//33AIAPP/wQCoUCtra2UKvVSEpKgoeHB0aNGgWJRAKJRAJfX1+cO3cOADBz5kwoFAoolUrs3r27Q9+LZtOmTcOJEyeEW1YwEyb2sizrON9//z1ZWFhQRUVFh/abmZlJACg0NNRou+LiYgJAffv2FebFxcWRq6tri3ZLly4lAFRaWirMe/XVV0kmk9H27dupoqKCFi9eTFKplE6fPk1E/9seumDBAlq5ciVNmjSJMjMzKTY2lszMzOjatWstxnjmmWdo9+7df/Sl31FVVRVZWFjQli1bOm0M1i3w6rYpyc7Oho+PT4ffUqG6uhoAhJt33YmDg0OL9u1VX1+PVatWYeLEiYiNjYW9vT3eeustWFhYYP369S3a/utf/8Jf/vIX7NixAwEBAUhMTERTU1OLdlqtFqdPn8YTTzxxT3XcCzs7O/j5+SE7O7vTxmDdA4ekCSkuLm6xattRbG1tAdy8f4wxzffSvtcrmWdnZ6O2thYDBgwQ5llZWcHNzQ1ZWVlGn/vII4+gX79++Prrr0H//3kRW7ZswdSpUzv9auPu7u64fv16p47BxMchaUJ0Op0QaB1JpVLBwsICJSUlRtsVFxcL7e9FTU0NAOCtt94StjFKJBJcvXoVtbW1Rp8rkUgwd+5c5OXl4bfffgMAfPvtt3j++efvqYb7YWdnB61W2+njMHFxSJoQFxcX4V7UHcnKygoREREoLCzElStX7tju2LFjAICYmJh76r95yXPZsmUgohZTSkrKXZ8/Y8YMyOVyfPXVV8jOzoadnR28vb3vqYb7UVxcDFdX104fh4mLQ9KEuLu749q1a53S9xtvvAEA+Pvf/97mz7VaLT755BN4eXlh6tSpwnxzc3Po9XqjfXt5eUEul+P8+fP3VZuDgwOmTJmCXbt24aOPPsLs2bPvq597de3atU7ZvMG6Fw5JEzJs2DBcu3YNOTk5Hd73o48+ig8++AAbNmzAjBkzcOHCBdTX10Or1WL//v2IiopCdXU1du3a1WIHj5+fH8rLy7Fr1y7o9XqUlpbi6tWrLfqWy+WYOXMmNm/ejFWrVkGr1aKpqQmFhYXt3uaXmJiIGzduYO/evZgwYUKHvva25OXloaCgAMOHD+/0sZjIRN25zjqUXq8nBwcHWr58eaeNkZKSQs888wypVCqytLQkiURCAMjT05PKy8tbtS8rK6OoqCiSy+XUt29fmjdvHr322msEgPz8/Cg/P5+IiG7cuEGLFi0ilUpF5ubm5OzsTLGxsZSenk5LliwhKysrAkBeXl60cePGNmsbMmQIvfHGG5322m/16aefkr29PTU0NHTJeEw05XypNBPz3HPP4dSpU7h48SKk0s5fUSgrK0NoaCiuXr2KDz74QFgtF0N0dDQ+++wz9O3bt1PHMRgMCAkJQWhoKDZs2NCpYzHR8aXSTM0bb7yB7OxsbN++vUvG69WrF3bt2gU7Ozu8+eabeP/991FbW4uu+O69dVtnWloa5HJ5pwckAGzfvh2ZmZl47bXXOn0s1g2IvCjLOsEzzzxDvr6+pNPpumzMs2fPUnh4OMlkMlKpVLR///5OH/Pll1+my5cvU3Z2Ng0dOpTS09M7fUydTke+vr40bdq0Th+LdQvl5mKHNOt4y5YtQ0hICBYuXIivvvqqS8YcMmQIjh8/3iVjNVMoFAgICICHhwc+//xzBAUFdfqYL7/8MqqqqvDxxx93+lise+BtkiZq165dmDRpEr7++mvMmDFD7HJMwrfffosZM2Zgx44dmDhxotjlsK7B2yRNVUxMDBYvXozZs2d32pVwepLdu3fj+eefx+uvv84B2cPwkqSJS0xMxDfffIMtW7bgySefFLucB9KPP/6IqVOn4tlnn8Xq1ashkUjELol1HV6SNHWff/45ZsyYgdjYWL5I7H349NNPERsbi2effRaff/45B2QPxDtuTJxUKsXq1avh6+uLpKQkXLhwAStWrOiUC2GYEp1OhwULFmDDhg1YsmQJXn31VbFLYiLh1e0eZO/evXj++edhbW2NjRs3IiIiQuySuqXjx48jISEBOp0OX3/9NcaPHy92SUw8vLrdk4wfPx4XL15ESEgIRo8ejYSEhLte/qwnKSsrw4IFCzBmzBj4+/vj/PnzHJCML3DR07i4uGDXrl3YuHEjDh48iP79+2Pp0qV3vW6jKaurq8PHH38MX19f7Ny5Exs2bMAvv/yC3r17i10a6wZ4dbsHq62txZIlS/DRRx/BxsYGCxYswEsvvXTX2zSYiqqqKqxatQrLly9HdXU1kpKSsGjRIlhbW4tdGus+KjgkGdRqNT799FN8/vnnICI8++yzmDVrFkJCQsQurVOkpaVh3bp1wsUpXnzxRSxcuBAuLi4iV8a6IQ5J9j9VVVX44osv8OWXX+I///kPhg0bhpkzZ2LSpEkP/BW4S0pK8MMPP2D9+vX497//DX9/f8yaNQtz5szpMUvO7L5wSLLWiAiHDx/GunXrsHPnTtTX12PkyJGYOHEixo8fj/79+4tdYrtkZ2dj7969+OGHH5CSkgK5XI5Jkybh+eefx5gxY/iYR9YeHJLMuNraWuzfvx8//PAD9u7di/LycvTu3RtRUVEYM2YMIiMj0a9fv06/M+HdGAwGXL58GceOHcOhQ4dw8OBBFBUVwdHREePHj8fEiRPx6KOPQqFQiFone+BwSLL2a2xsxOnTp3Hw4EEcOnQIx48fR21tLaytrRESEoIhQ4Zg0KBB8PX1hY+PD7y8vGBu3rHnKzQ2NqKgoAB5eXnIzc3FhQsXcP78eVy4cAE1NTVQKBSIiIjAww8/jKioKAwbNqzDa2A9Cocku38NDQ24ePEizp07h/Pnz+P8+fO4ePGicJtVCwsLqFQquLu7w8nJCU5OTnBxcYGdnR1sbW1hbm4OMzMz2NnZAYBwb5vGxkbodDpotVqo1WpoNBpoNBoUFxfj6tWrwsV27ezsMHDgQAwePBiDBw/GkCFDMHDgQFhaWor2njCTwyHJOp5Go0Fubq6wtHdr0KnVauh0OtTU1KChoUEIRABCcFpaWsLa2hq2trZwcXFpEbDNS6m+vr5wcnIS+ZWyHoBDkolv6tSpaGxs7LJbTjB2D/i0RMYYM4ZDkjHGjOCQZIwxIzgkGWPMCA5JxhgzgkOSMcaM4JBkjDEjOCQZY8wIDknGGDOCQ5IxxozgkGSMMSM4JBljzAgOScYYM4JDkjHGjOCQZIwxIzgkGWPMCA5JxhgzgkOSMcaM4JBkjDEjOCQZY8wIDknGGDOCQ5IxxozgkGSMMSM4JBljzAgOScZEZjAYsGzZMoSHh4tdCmsDhyRjIsrJycHo0aPxyiuvoLa2VuxyWBs4JJnJqqur6/Slsz8yxoULF/D6668jMTERgwcP7uDKWEfhkGQma926dVCr1d12jEGDBmHHjh2Ii4uDTCbr4MpYR+GQZN0GEeGTTz5BYGAgZDIZHBwcEBMTg6ysLKHN/PnzYWlpCTc3N2HeSy+9BGtra0gkEmg0GgDAwoULkZSUhNzcXEgkEvj5+WHFihWQy+VwcXHB3Llz4e7uDrlcjvDwcJw6dapDxmAmiBgT2ZQpUyg2NpbefvttsrS0pI0bN1JlZSWlpaXR0KFDycnJiYqLi4X2cXFx5Orq2qKPpUuXEgAqLS0V5sXGxpKvr2+LdnPmzCFra2vKyMig+vp6Sk9Pp2HDhpGtrS3l5+d3yBj3Y/jw4TRo0KA/3A/rcOW8JMm6haamJnzyySeYNGkS4uPjoVQqMXDgQKxZswYajQZr167tsLHMzc2FpdWgoCCsWrUKOp0O69ev77AxmOngkGTdglarRXV1NcLCwlrMHzZsGCwtLVusDne0sLAwKBSKFqv1jDXjkGTdgl6vBwDY2Ni0+pm9vT10Ol2nji+TyVBaWtqpY7AHE4ck6xYsLCwAoM0wrKyshKenZ6eNrdfrO30M9uDikGTdgp2dHWxsbHDmzJkW80+dOoWGhgaEhoYK88zNzYUlz45w6NAhEBFGjBjRaWOwBxeHJOsWzMzMkJSUhJ07dyI5ORlarRYXL15EYmIi3N3dMWfOHKGtn58fysvLsWvXLuj1epSWluLq1aut+nR0dERRURGuXLkCnU4nhJ7BYEBFRQUaGxuRlpaGhQsXQqVSYcaMGR02BjMhYu9fZ6z5ECCDwUBLly4lf39/srCwIAcHB5o4cSJlZ2e3aF9WVkZRUVEkl8upb9++NG/ePHrttdcIAPn5+QmH8pw9e5a8vb3JysqKIiMjqbi4mObMmUMWFhbk4eFB5ubmZGdnRzExMZSbm9thY7RXSkoKRUREkLu7OwEgAOTm5kbh4eF0+PDhP/iusg5SLiEiEjemWU83depUNDY2Yvv27Z0+1ty5c7Ft2zaUlZV1+ljMJFTw6jbrcZqamsQugT1AOCQZ6yBZWVmQSCR3naZOnSp2qewecEiyHmPx4sVYv349qqqq0Ldv3w5fvQ8ICAAR3XXasmVLh47LOhdvk2Si68ptkozdI94myRhjxnBIMsaYERySjDFmBIckY4wZwSHJGGNGcEgyxpgRHJKMMWYEhyRjjBnBIckYY0ZwSDLGmBF8WiLrUjt37mx17vLFixdBRAgJCWkxf/LkyXjqqae6sjzGblfBIcm61Pnz5zFkyJB2tT1z5kyL2zYwJgIOSdb1+vXrh5ycHKNtvL29ceXKla4piLE74wtcsK43ffp04e6IbbG0tMTMmTO7sCLG7oyXJFmXy83Nhb+/P4z96WVlZaF///5dWBVjbeIlSdb1fH19MXjwYEgkklY/k0gkCAkJ4YBk3QaHJBNFQkICzMzMWs03NzdHQkKCCBUx1jZe3WaiuH79Ojw9PWEwGFrMl0gkyM/Ph6enp0iVMdYCr24zcbi7u2PUqFEtlialUinCw8M5IFm3wiHJRDN9+vQW/5dIJLyqzbodXt1moqmsrISLiwv0ej0AwMzMDCUlJejVq5fIlTEm4NVtJh57e3uMGzcO5ubmMDMzw2OPPcYBybodDkkmqri4ODQ1NYGIEBcXJ3Y5jLViLnYB7MFXW1uLGzduCI81NTVoaGgAAOh0OjQ2NrZ6TnP7pqYmWFpawmAwoKmpCdu2bYNCoYBMJmv1HHNzc9ja2gK4eVaOtbW18CiTyaBQKDr3hbIeibdJ9lBNTU3QaDTCVFFRAa1WC51OB61Wi6qqKlRWVkKr1baYdDod6urqUF9fLzx2J3K5HFZWVsKjra0t7OzsWkz29vZQKpWws7MTfu7g4AAnJydhausYTtYj8QUuTIler0dJSQny8/Nx/fp1FBYWQqPRQK1WQ61WtwhFjUbT6vlWVlYtwsTBwaFVwNjY2AhLend7BP4XWrczMzODnZ0dAOCnn35CU1MTJkyYAADQarVoampq9ZxbQ/nWJVdjj9XV1a2CvvkLoXmqq6trNdatgenk5AQXFxe4uLjAyckJnp6ecHd3h0qlgqurq9Hz0NkDj0PyQaJWq5Gbm4u8vDxcuXIFRUVFKCwsRFFREa5du4aSkhLh4GyJRAJXV1c4OzvD2dkZrq6urT70zs7Owv8dHR1F+7Dr9XoQESwtLUUbv7y8XPjyKC0tbfWlUlJSgtLSUpSWlqKkpEQ471wqlcLV1RUeHh7o3bs3vLy84O7ujj59+sDX1xc+Pj5wcXER5XWxDsEh2Z0QEa5cuYLs7Gzk5eUJU3MwVldXA7i5PU6lUqF3795QqVRwd3eHh4dHiw+qm5sbL+F0Er1ej+LiYhQUFAhfUNeuXUNRUZEwLz8/X9gua2NjAx8fHyE0m//dv39/eHt7t3kOO+s2OCTFUlRUhIyMDKSnpwuPFy5cEILQwcFB+EDdPnl7e/M2swdARUVFiy+72yfg5heen58fgoODERQUJDwGBATw77h74JDsbHq9HpcuXUJqairOnDmD1NRUpKenC9vBVCoVAgMDERwcLDwGBATAwcFB5MpZZ6qoqEBmZiYyMjKQmZmJS5cuISsrC/n5+QBubh8ODg5GaGgowsLCEBoaigEDBvDaQdfjkOxIRITMzEykpKQgNTUVqampuHDhAm7cuAFra2sMGTIEoaGhCAkJwYABAxAQECDsvGAMuLnTKisrC5cuXUJaWhpSU1Nx7tw51NTUQCaTYdCgQQgNDUVoaChGjhyJwMBAXl3vXBySf0RTUxOysrJw/PhxHDt2DAcPHkRhYSEsLCzg7+8v/DGHhobioYceEm3HBHuwNf+dNX/xpqam4vz586ipqYGdnR0eeughjB07FhERERg+fDgvbXYsDsl7lZOTg7179+LAgQM4duwYtFotHB0dERkZidGjR2PUqFEYOnQozM35OH3WeRobG3H27FkcPXoUR44cwbFjx1BeXg6lUonIyEiMHTsW0dHR8Pf3F7vUBx2H5N3o9XocPXoUP/30E/bs2YPLly/DwcEBY8eOxejRozFmzBgEBwdDKuUzPJl4DAYD0tPTcfjwYRw5cgQHDhxARUUF+vXrhwkTJiA6OhqRkZG8lHnvOCTb0tjYiP3792PTpk3Yt28fqqqqEBQUhOjoaERHRyMiIoKXFFm31tjYiOPHj2Pfvn3Yu3cvMjMzoVQqER0djbi4ODz66KP8N9w+HJK3Onv2LDZu3IjNmzdDrVYjIiICkydPRnR0NHx8fMQuj7H7lpeXh3379mHr1q04fvw4XFxcMG3aNEyfPh1Dhw4Vu7zujEOyvr4e3377LVasWIH09HT4+/sjPj4e8fHxHIzMJOXl5SE5ORnJycnIyclBcHAw5s2bh2effRZyuVzs8rqbnhuS5eXlWL16NVauXImqqipMnz4dzz33HEaMGCF2aYx1mZMnT+Lrr7/Gxo0boVQqMW/ePCQmJsLR0VHs0rqLnheSOp0O7733HlatWgULCwskJiZi3rx5cHNzE7s0xkRTUlKCFStWYPXq1dDr9UhMTMRbb73Fx/H2tJDcsmULkpKScOPGDbz55puYPXs2bGxsxC6LsW6juroaX331Fd577z3IZDJ89NFHmDZtmthliakC1APk5ORQVFQUSaVSmj17Nmk0mi4df8qUKQSgXdOePXuM9hUWFkZSqZQGDRpktN3SpUvJ2dmZANDq1au7vM4/Yvv27dS3b99WY8pkMurTpw/NnDmT8vLyOm18IqJ9+/aRnZ0d7d69u1PH6a40Gg298MILJJVKKSoqinJycsQuSSzlJn9w348//oiwsDBUVFQgJSUFa9euFeU+Kvv370dlZSX0ej2uX78OAPjzn/+MhoYG1NTUQK1WY/bs2Xft5/Tp04iKirpru1dffRUnTpwQrc4/IjY2Fnl5efD19YVSqQQRoampCfn5+Xj33Xfx/fffY8SIESgrK+u0GqjnrGC1qVevXvjiiy9w8uRJVFZWIiwsDLt37xa7LFGYdEiuX78esbGxePrpp5GSkoKHHnpIlDokEgkiIiKgVCpbHJsmkUhgYWEBhUIBZ2dnhIaG3lOfD0Kd7VFXV4fw8HCjbaRSKVxcXDB9+nT85S9/gVqtxoEDBzp8nGbR0dGoqqoSLgTcUw0bNgwpKSmYPHkyJk2ahK+//lrskrqcyR5N+uOPP2LWrFlYvHgx/vGPf4hay+bNm9vVbs6cOe3uszPOnOiMOttj3bp1UKvV7W7v5+cHACguLu7UcdhNMpkMa9euhZubG2bNmgVHR0fExMSIXVaXMcklyWvXriEhIQGzZs0SPSDvx9GjRxEUFASlUgm5XI6BAwfi119/bdHmP//5DwICAmBtbQ0rKyuMGjUKx44du2vfTU1NePvtt6FSqWBlZYWQkBB8//3391Wnsb6++eYb2NjYQCKRwMHBAbt27cKZM2eEa2E+88wzAICFCxciKSkJubm5kEgkQgAak5OTAwAYNGhQu+tpa5wPP/wQCoUCtra2UKvVSEpKgoeHB9atWweVSgWJRILPPvusXf03X41HKpUiNDQUtbW1AIC//vWvwu/xm2++uWs/d6opOzv7vn5HHendd9/FCy+8gISEBBQWFopdTtcRe6toZ3juuefIx8eH6uvrxS6lTdevXycA9OSTT7b5823bttE777xD5eXlVFZWRiNGjKBevXoJP//Tn/5EPj4+9N///pf0ej1dunSJhg8fTnK5nC5fviy0y8nJabXj5tVXXyWZTEbbt2+niooKWrx4MUmlUjp9+vQ913m3vjIyMkihUNCzzz4rPOeNN96gr776qkU/sbGx5Ovr26p/X19fUiqVwv8rKirom2++IYVCQdHR0fdcT1vjvPnmmwSAFixYQCtXrqRJkyZRZmYmFRQUEABauXJlu/pvbGykPn36kEqlosbGxhZjvPzyy7Rs2bJ213mnmrqD+vp68vX1pRkzZohdSlcpN7mQrK6uJmtra1qzZo3YpdzR3cLndh988AEBILVaTUQ3Q/L2vdtpaWkEgF599VVh3u0hWVdXRwqFgqZOnSq0qa2tJZlMRi+++OI91dnevr744gsCQMnJyfTdd9/RK6+80qovYyGJ2/ZwSyQSeu+996ihoeGe6zEWknV1dS3m3x6S7el/2bJlBIC2bt0qtKmpqSGVSkVVVVXt7udONXUXa9euJYVCQTqdTuxSuoLp7d2+dOkSampqMG7cOLFL6TDN2x/buoNgs4EDB0KpVCItLe2ObbKzs1FbW4sBAwYI86ysrODm5oasrKx7qqm9fb3wwgt46qmnMHfuXGzduhUffvjhPY3TvHebiPDaa6+BiKBUKlttk+3I19aW9vQ/a9YsKJVKLF++XGiTnJyMmJgY4aDszq6zK4wbNw61tbVG/9ZMicmFZGVlJQDA3t5e5Eru3759+/Dwww/D2dkZMpkMf/3rX9v1PLOVwNQAACAASURBVAsLC+j1+jv+vKamBgDw1ltvQSKRCNPVq1eFbWjtdS99vf/++6iurv7DO03+9re/wc3NDYsXL0ZBQcF913M/2tO/jY0NXnjhBZw4cQL//ve/AQCrV6/G/Pnzu6zOrtB8ymLzZ83UmVxIenp6AgByc3NFruT+5OfnY+LEiXBzc8OpU6dQVVWFJUuW3PV5jY2NKC8vh0qlumMbZ2dnAMCyZcuEpbPmKSUl5Z7qbG9fer0eCxYswCeffIKUlBS899579zTOrWxtbfGvf/0LOp0OL774Yqe9tra0t//58+fDwsICy5Ytw5EjR+Dl5QVfX98uq7MrNO848/LyErmSrmFyIRkUFARvb29s2rRJ7FLuy8WLF6HX6/Hiiy/Cx8cHcrm8XcdEHjx4EAaDwehlr7y8vCCXy3H+/Pk/XGd7+5o3bx5mz56Nl19+Ga+88gr+8Y9//KEwSEhIwPDhw7F3715s3br1nuu5X+3t39PTE5MnT8b27dvxt7/9DQsXLryvfrqzTZs2QaVStdhkYMpMLiQlEglefvllrFq1CpcvXxa7nHvWvCR44MAB1NfXIycnB6dOnWrVrqGhAVVVVcJl/OfPnw9vb2/MmDHjjn3L5XLMnDkTmzdvxqpVq6DVatHU1ITCwkLh7Jr2ak9fn3/+OTw8PDBp0iQAwAcffICgoCDExcVBq9UKfTk6OqKoqAhXrlyBTqczuslAIpFgxYoVkEgkmD9/PioqKtpdz72M80feu6SkJDQ2NqKiogKPPPLIfffTHeXk5OCzzz7DwoULe84NyLpwL1GXaWhooLCwMAoODqbKykqxyxFotVoaPXo0OTo6EgCSSqXk5+dH77//fot2ixYtIkdHR7K3t6enn36aPvvsMwJAvr6+lJ+fT+vXr6eoqChycXEhc3Nz6tWrF02bNo2uXr0q9PHxxx+Tq6srASBra2uaNGkSERHduHGDFi1aRCqViszNzcnZ2ZliY2MpPT39nus01teECRNIIpGQo6MjnThxgohuHgojlUoJACmVSjpz5gwREZ09e5a8vb3JysqKIiMjaefOndSvXz9hj3bv3r1p7ty5LcaeMWMGASB7e3v65z//2a7Xdvs4r7zyCllZWREA8vLyoo0bNxIR0cqVK8nNzY0AkEKhoD//+c/tfu+aRUVFtTrUqT3v25IlS9qsqTuorKykAQMGUFhYWKujC0xYucleBaiwsBAjR46Em5sbfv75Zzg5OYldEmMPLI1Gg8cffxzXr19HSkpKj9keCaDC5Fa3m3l6euLw4cMoKytDWFgYzpw5I3ZJjD2Qzpw5g7CwMJSXlws7o3oSkw1JAPDx8cHp06cRGBiIkSNHYsGCBS22hTHG7qy2thbvvPMOIiIi4OPjg5SUlB55SxOTDkng5iWffvrpJ6xbtw6bN29GQEAAvv322x5/KSzGjNmzZw8CAwOxYsUKfPjhh/i///s/uLi4iF2WKEw+JIGbe0QTEhKQmZmJ8ePHY+bMmRg5ciR27NgBg8EgdnmMdQsGgwE7d+7EyJEjERMTg0cffRSXL1/GggULYGZmJnZ5oukRIdmsV69eWLt2LU6dOgUPDw9MnjwZ/fv3x5o1a1BXVyd2eYyJoq6uDmvWrEFAQACefvppuLu74+TJk/jyyy95hycAk9273R45OTn4+OOPsWHDBtja2uKZZ55BfHw8wsLCxC6NsU535swZJCcn47vvvoNOp0NCQgKSkpLQr18/sUvrTnrWjcDuRK1WY+3atUhOTkZ2djaCgoIQHx+PuLg4o6f5Mfagyc/Px6ZNm5CcnIyMjAz0798f8fHxmD17NlxdXcUurzvikLzdqVOnkJycjC1btqC8vBwjRozA+PHj8cQTT7S6yCtjD4ILFy5g37592LdvH06ePAlHR0dMnToV8fHxGD58uNjldXccknei1+vxyy+/YNeuXfjpp59QXFwMLy8vREdHIzo6Go888ggUCoXYZTLWSm1tLX7//XchGAsKCuDm5oYnnngCMTExGDduXKfc/sNEcUi2V3p6Ovbu3Ys9e/YgJSUFUqkUgwYNQkREBCIjIzF27Fg4ODiIXSbrgWpqapCSkoJjx47h+PHjOHbsGOrr6xEUFIQJEyZg/PjxCA8Ph1Tao/bTdhQOyftx/fp1HDhwAEeOHMHRo0eRnZ0Nc3NzDB48GKNHj0ZkZCTCwsJ63JkJrGsUFBQgNTUVR48exdGjR3Hu3Dk0Njaif//+GDVqFEaPHo2xY8fC3d1d7FJNAYdkRyguLhYC8/Dhw0hPT4fBYICLiwtCQ0OFKSwsTLjeJWPt0RyIt05qtRpSqRRBQUEYM2YMRo8ejdGjR8PNzU3sck0Rh2Rn0Gq1OHfuXIs/7JycHBgMBri6uiIkJATBwcEICgoSJl5V79kqKiqQkZGB9PR0ZGZmIj09HWlpaSgpKYFUKoW/v3+LL9whQ4YIt4RgnYpDsqtotVqcPXsWqampuHTpkvBhqK6uBgC4u7sjODgYgYGBCAwMhK+vL3x8fODt7c0b2U2EXq/H1atXkZeXh9zcXGRmZiIjIwMZGRnCtSRtbW0REBCA4OBgDBw4kANRfBySYrt69SoyMzNx6dIlZGVl4dKlS8jOzhbuH2JmZibcAsDHx0eY+vTpA09PT7i5ufEG+W7CYDCguLgYhYWFuHLlCvLy8oRAzMvLQ0FBgXAzN3t7e/Tv3x8DBgxAQEAABgwYgMDAQHh7e4v8KthtOCS7q/LycuFDdvtUUFCAxsZGAIC5uTlcXV2hUqng7u4OT09PeHp6wt3dHSqVCs7OznBycoKTk1PPuZJ0ByMiaDQaaDQalJaWIj8/H9evX0dhYaFwRfH8/HyUlJS0+L14eXm1+GK7dWq+mRbr9jgkH0R6vR6FhYW4du0aCgsLUVRUhIKCAhQVFeHatWsoKCjA9evXW9yeQCqVCmHZPLm4uMDZ2Rm9evWCvb09lEol7OzsoFQqYW9vDzs7O9jZ2UEmk4n4ajvOjRs3oNVqodVqUVlZiaqqKmi1WlRVVaGyshJlZWUoLS2FWq0WQrF5uvVCKBYWFnB3d4eXlxc8PDzQu3dveHl5oXfv3vD09ISHhwc8PT15M4lp4JA0VUSEkpISlJaWQqPRQK1WC/9uKwyqqqrueEtTmUwmhKdSqYS5uTlsbW3v+GhhYQEbGxvh+dbW1rC0tGzVr42NTasg0ev1wnbaWzU0NAi3YwWA6upqoe2tjzqdDo2NjcJjVVWVEIY3btxo8/UpFAoolco2vzycnJzg7OwMFxcX4edubm68VN5zcEiy/7k1VCorK4WlrlunyspKNDU1QavVCmF0e1jdHmhVVVVtXpKusrKyzet6trWnXyqVQqlUCv9vDt7moG1+bA5rOzs7mJmZtVgivnVqXnJuDn3G7oBDkolv6tSpaGxsxPbt28UuhbHbme49bhhjrCNwSDLGmBEckowxZgSHJGOMGcEhyRhjRnBIMsaYERySjDFmBIckY4wZwSHJGGNGcEgyxpgRHJKMMWYEhyRjjBnBIckYY0ZwSDLGmBEckowxZgSHJGOMGcEhyRhjRnBIMsaYERySjDFmBIckY4wZwSHJGGNGcEgyxpgRHJKMMWYEhyRjjBnBIcmYyAwGA5YtW4bw8HCxS2Ft4JBkTEQ5OTkYPXo0XnnlFdTW1opdDmsDhyQzWXV1dZ2+dPZHxrhw4QJef/11JCYmYvDgwR1cGesoHJLMZK1btw5qtbrbjjFo0CDs2LEDcXFxkMlkHVwZ6ygckqzbICJ88sknCAwMhEwmg4ODA2JiYpCVlSW0mT9/PiwtLeHm5ibMe+mll2BtbQ2JRAKNRgMAWLhwIZKSkpCbmwuJRAI/Pz+sWLECcrkcLi4umDt3Ltzd3SGXyxEeHo5Tp051yBjMBBFjIpsyZQrFxsbS22+/TZaWlrRx40aqrKyktLQ0Gjp0KDk5OVFxcbHQPi4ujlxdXVv0sXTpUgJApaWlwrzY2Fjy9fVt0W7OnDlkbW1NGRkZVF9fT+np6TRs2DCytbWl/Pz8DhnjfgwfPpwGDRr0h/thHa6clyRZt9DU1IRPPvkEkyZNQnx8PJRKJQYOHIg1a9ZAo9Fg7dq1HTaWubm5sLQaFBSEVatWQafTYf369R02BjMdHJKsW9BqtaiurkZYWFiL+cOGDYOlpWWL1eGOFhYWBoVC0WK1nrFmHJKsW9Dr9QAAGxubVj+zt7eHTqfr1PFlMhlKS0s7dQz2YOKQZN2ChYUFALQZhpWVlfD09Oy0sfV6faePwR5cHJKsW7Czs4ONjQ3OnDnTYv6pU6fQ0NCA0NBQYZ65ubmw5NkRDh06BCLCiBEjOm0M9uDikGTdgpmZGZKSkrBz504kJydDq9Xi4sWLSExMhLu7O+bMmSO09fPzQ3l5OXbt2gW9Xo/S0lJcvXq1VZ+Ojo4oKirClStXoNPphNAzGAyoqKhAY2Mj0tLSsHDhQqhUKsyYMaPDxmAmROz964w1HwJkMBho6dKl5O/vTxYWFuTg4EATJ06k7OzsFu3LysooKiqK5HI59e3bl+bNm0evvfYaASA/Pz/hUJ6zZ8+St7c3WVlZUWRkJBUXF9OcOXPIwsKCPDw8yNzcnOzs7CgmJoZyc3M7bIz2SklJoYiICHJ3dycABIDc3NwoPDycDh8+/AffVdZByiVEROLGNOvppk6disbGRmzfvr3Tx5o7dy62bduGsrKyTh+LmYQKXt1mPU5TU5PYJbAHCIckYx0kKysLEonkrtPUqVPFLpXdAw5J1mMsXrwY69evR1VVFfr27dvhq/cBAQEgortOW7Zs6dBxWefibZJMdF25TZKxe8TbJBljzBgOScYYM4JDkjHGjOCQZIwxIzgkGWPMCA5JxhgzgkOSMcaM4JBkjDEjOCQZY8wIDknGGDOCT0tkXWrnzp2tzl2+ePEiiAghISEt5k+ePBlPPfVUV5bH2O0qOCRZlzp//jyGDBnSrrZnzpxpcdsGxkTAIcm6Xr9+/ZCTk2O0jbe3N65cudI1BTF2Z3yBC9b1pk+fLtwdsS2WlpaYOXNmF1bE2J3xkiTrcrm5ufD394exP72srCz079+/C6tirE28JMm6nq+vLwYPHgyJRNLqZxKJBCEhIRyQrNvgkGSiSEhIgJmZWav55ubmSEhIEKEixtrGq9tMFNevX4enpycMBkOL+RKJBPn5+fD09BSpMsZa4NVtJg53d3eMGjWqxdKkVCpFeHg4ByTrVjgkmWimT5/e4v8SiYRXtVm3w6vbTDSVlZVwcXGBXq8HAJiZmaGkpAS9evUSuTLGBLy6zcRjb2+PcePGwdzcHGZmZnjsscc4IFm3wyHJRBUXF4empiYQEeLi4sQuh7FWzMUugD34amtrcePGDeGxpqYGDQ0NAACdTofGxsZWz2lu39TUBEtLSxgMBjQ1NWHbtm1QKBSQyWStnmNubg5bW1sAN8/Ksba2Fh5lMhkUCkXnvlDWI/E2yR6qqakJGo1GmCoqKqDVaqHT6aDValFVVYXKykpotdoWk06nQ11dHerr64XH7kQul8PKykp4tLW1hZ2dXYvJ3t4eSqUSdnZ2ws8dHBzg5OQkTG0dw8l6JL7AhSnR6/UoKSlBfn4+rl+/jsLCQmg0GqjVaqjV6hahqNFoWj3fysqqRZg4ODi0ChgbGxthSe9uj8D/Qut2ZmZmsLOzAwD89NNPaGpqwoQJEwAAWq0WTU1NrZ5zayjfuuRq7LG6urpV0Dd/ITRPdXV1rca6NTCdnJzg4uICFxcXODk5wdPTE+7u7lCpVHB1dTV6Hjp74HFIPkjUajVyc3ORl5eHK1euoKioCIWFhSgqKsK1a9dQUlIiHJwtkUjg6uoKZ2dnODs7w9XVtdWH3tnZWfi/o6OjaB92vV4PIoKlpaVo45eXlwtfHqWlpa2+VEpKSlBaWorS0lKUlJQI551LpVK4urrCw8MDvXv3hpeXF9zd3dGnTx/4+vrCx8cHLi4uorwu1iE4JLsTIsKVK1eQnZ2NvLw8YWoOxurqagA3t8epVCr07t0bKpUK7u7u8PDwaPFBdXNz4yWcTqLX61FcXIyCggLhC+ratWsoKioS5uXn5wvbZW1sbODj4yOEZvO/+/fvD29v7zbPYWfdBoekWIqKipCRkYH09HTh8cKFC0IQOjg4CB+o2ydvb2/eZvYAqKioaPFld/sE3PzC8/PzQ3BwMIKCgoTHgIAA/h13DxySnU2v1+PSpUtITU3FmTNnkJqaivT0dGE7mEqlQmBgIIKDg4XHgIAAODg4iFw560wVFRXIzMxERkYGMjMzcenSJWRlZSE/Px/Aze3DwcHBCA0NRVhYGEJDQzFgwABeO+h6HJIdiYiQmZmJlJQUpKamIjU1FRcuXMCNGzdgbW2NIUOGIDQ0FCEhIRgwYAACAgKEnReMATd3WmVlZeHSpUtIS0tDamoqzp07h5qaGshkMgwaNAihoaEIDQ3FyJEjERgYyKvrnYtD8o9oampCVlYWjh8/jmPHjuHgwYMoLCyEhYUF/P39hT/m0NBQPPTQQ6LtmGAPtua/s+Yv3tTUVJw/fx41NTWws7PDQw89hLFjxyIiIgLDhw/npc2OxSF5r3JycrB3714cOHAAx44dg1arhaOjIyIjIzF69GiMGjUKQ4cOhbk5H6fPOk9jYyPOnj2Lo0eP4siRIzh27BjKy8uhVCoRGRmJsWPHIjo6Gv7+/mKX+qDjkLwbvV6Po0eP4qeffsKePXtw+fJlODg4YOzYsRg9ejTGjBmD4OBgSKV8hicTj8FgQHp6Og4fPowjR47gwIEDqKioQL9+/TBhwgRER0cjMjKSlzLvHYdkWxobG7F//35s2rQJ+/btQ1VVFYKCghAdHY3o6GhERETwkiLr1hobG3H8+HHs27cPe/fuRWZmJpRKJaKjoxEXF4dHH32U/4bbh0PyVmfPnsXGjRuxefNmqNVqREREYPLkyYiOjoaPj4/Y5TF23/Ly8rBv3z5s3boVx48fh4uLC6ZNm4bp06dj6NChYpfXnXFI1tfX49tvv8WKFSuQnp4Of39/xMfHIz4+noORmaS8vDwkJycjOTkZOTk5CA4Oxrx58/Dss89CLpeLXV5303NDsry8HKtXr8bKlStRVVWF6dOn47nnnsOIESPELo2xLnPy5El8/fXX2LhxI5RKJebNm4fExEQ4OjqKXVp30fNCUqfT4b333sOqVatgYWGBxMREzJs3D25ubmKXxphoSkpKsGLFCqxevRp6vR6JiYl46623+DjenhaSW7ZsQVJSEm7cuIE333wTs2fPho2NjdhlMdZtVFdX46uvvsJ7770HmUyGjz76CNOmTRO7LDFVgHqAnJwcioqKIqlUSrNnzyaNRiNKHefPn6cpU6ZQnz59yNLSknr16kUhISH0j3/84577Wrp0KTk7OxMAWr16tTB/3759ZGdnR7t3777jc7dv3059+/YlAC0mCwsLcnZ2pjFjxtCHH35I5eXl9/U6Td3t75+rqyvFxcUZfU57fi/diUajoRdeeIGkUilFRUVRTk6O2CWJpdzkD+778ccfERYWhoqKCqSkpGDt2rWi3Efl4sWLCA8Ph5ubGw4ePIiqqiqcOHEC48aNw6FDh+65v1dffRUnTpxoNZ/asWIQGxuLvLw8+Pr6QqlUgohgMBigVquxdetW9O3bF4sWLUJwcDDOnDlzz7WZutvfv+LiYiQnJxt9Tnt+L91Jr1698MUXX+DkyZOorKxEWFgYdu/eLXZZ4hA5pTvV119/TWZmZjRr1iyqq6sTtZaEhATq3bt3q/k3btyg8ePH31efOTk5rZYk21JbW0sjR45sNd/X15eUSmWbz9m2bRtJpVJycXGhysrK+6rPVNzP+9dZY4qhvr6eZs+eTWZmZrRu3Tqxy+lqprsk+eOPP2LWrFl444038OWXX4p+aENZWRmqqqpQXl7eYr6lpSX27NnTqWOvW7cOarX6np7z1FNPYcaMGVCr1VizZk0nVfZguJ/370Ec805kMhnWrl2LxYsXY9asWdi1a5fYJXUtsWO6MxQWFpKdnR298MILYpcieOeddwgADRo0iI4dO3bHdp9++inJZDJydnamOXPmkJubG8lkMho5ciSdPHmyRdvblySPHj1KXl5eBIBWrlxJREQLFiwgS0tLYfuZr6+v8Py7LQkdOXKEANCYMWOEeQaDgT7++GMKCAggS0tLsre3pyeffJIyMzNbPf/bb7+l0NBQkslkpFAoyNvbm959912aN28eWVhYkKurq9D2xRdfJIVCQQCotLSUiIiWLVtGCoWCJBIJDR06lFxcXMjc3JwUCgUNGTKEIiMjydPTk2QyGSmVSnrttddajN/Y2Eh/+9vfyMvLi+RyOQ0cOJC2bNlCRESff/45KRQKsrKyol27dtG4cePI1taWPDw86LvvvhP6+CPvX7O2fi9ERIcOHaJhw4aRlZUV2dra0oABA6iqqsromGKbM2cO2draUkFBgdildJVykwzJ5557jnx8fKi+vl7sUgS1tbUUFhYm/OEHBQXRkiVLqKysrFXbOXPmkLW1NWVkZFB9fT2lp6fTsGHDyNbWlvLz84V2ba1uFxQUtPowxsbGtvlBu9uHXKvVEgDy8vIS5r399ttkaWlJGzdupMrKSkpLS6OhQ4eSk5MTFRcXC+2WLVtGAOif//wnlZWVUXl5OX3xxRfCDo64uLgWIUl0c2fUrSFJRPT//t//IwB06tQpqqmpIY1GQ+PGjSMAtG/fPiotLaWamhqaP38+AaDz588Lz3311VdJJpPR9u3bqaKighYvXkxSqZROnz5NRERvvvkmAaDffvuNqqqqSK1W06hRo8ja2poaGhr+8Pt3q9t/L9XV1WRnZ0dLliyhuro6Ki4upkmTJgmv/U5jiq2+vp58fX1pxowZYpfSVUxvdbumpgbff/89/vrXv7Z5W1KxWFlZ4cSJE/j0008REBCAjIwMLFq0CIGBgTh8+HCr9ubm5ggMDIRMJkNQUBBWrVoFnU6H9evXd1nNtra2kEgk0Ol0AG7eiOuTTz7BpEmTEB8fD6VSiYEDB2LNmjXQaDRYu3YtgJsXBfn73/+OqKgovP7663B0dISDgwOef/55DBs27L5qCQoKgkKhQK9evYRDUlQqFZycnKBQKBAfHw8AyMrKAnDzTKpVq1Zh4sSJiI2Nhb29Pd566y1YWFi0eg/Dw8NhZ2cHZ2dnTJ06FTU1NcLFbzvLlStXoNVqERwcDLlcDldXV+zYsQNOTk6dOu4fJZPJsGjRImzdulW4ir6pM7mQvHTpEmpqajBu3DixS2nFwsIC8+fPR2ZmJk6ePImYmBio1Wo8/fTTqKioMPrcsLAwKBQKIQS6Qk1NDYhIOKA4PT0d1dXVCAsLa9Fu2LBhsLS0xKlTpwAAaWlpqKysxGOPPdainZmZGRYsWPCH62q+Luet9/NuvrqNXq8HAGRnZ6O2thYDBgwQ2lhZWcHNzc3oe9jcd3M/naX5BmHx8fF45513cOXKlU4dryONGzcOtbW1SEtLE7uULmFyIVlZWQkAsLe3F7kS44YPH44ffvgBiYmJKC0txcGDB+/6HJlMhtLS0i6o7qbLly8DAAICAgD8771t6wB8e3t7YYlTq9UK88RSU1MDAHjrrbcgkUiE6erVq6itrRWtrmZWVlb4/fffERkZiffffx8+Pj6YOnVqm7e37W6aT1ls/nswdSYXkp6engCA3NxckStpKTY2tsWST7Pp06cDwF0/uHq9HpWVlcLr6wq//PILAODxxx8H8L/Qaw7DW91aW+/evQGgzXt7dxVnZ2cAwLJly0BELaaUlJROH//IkSNYtmyZ0TbBwcHYs2cPioqKsGjRInz//ff46KOPOr22PyonJwcA4OXlJXIlXcPkQjIoKAje3t7YtGmT2KW0cOPGDWRkZLSan52dDQAICQkx+vxDhw6BiLrsAhzFxcVYtmwZPD098dxzzwEABgwYABsbm1YHmJ86dQoNDQ0IDQ0FAPTp0weOjo7Yv3//Hfs3Nzfv1FVaLy8vyOVynD9/vtPGMCY1NRXW1tZ3/Hnz3TKBm4H+z3/+E0OHDm3zb6S72bRpE1QqVYtNGabM5EJSIpHg5ZdfxqpVq4TVxe5i4sSJ2Lp1KyorK1FVVYUff/wRr7/+Op588slWIWkwGFBRUYHGxkakpaVh4cKFUKlUmDFjxj2P6+joiKKiIly5cgU6na5FOBERqqurYTAYQEQoLS3F999/j4iICJiZmWHXrl3CNkm5XI6kpCTs3LkTycnJ0Gq1uHjxIhITE+Hu7o45c+YAuLlZYPHixThy5Ajmz5+Pa9euwWAwQKfTCSHg5+eH8vJy7Nq1C3q9HqWlpbh69ep9vrOtyeVyzJw5E5s3b8aqVaug1WrR1NSEwsJCXL9+/Z76Mvb+3U6v16OkpASHDh26a0jOnTsXWVlZaGhowLlz53D16lXhS/BexuxKOTk5+Oyzz7Bw4cKecwMy8fasd56GhgYKCwuj4ODgbnO2yP79+2nKlCnk6+tLMpmMLC0tqX///vTOO++0OlRpzpw5ZGFhQR4eHmRubk52dnYUExNDubm5QpuPP/6YXF1dCQBZW1vTpEmTaOXKleTm5kYASKFQ0J///GciIjp79ix5e3uTlZUVRUZG0pdffkkhISGkUCjI0tKSpFIpASCJREL29vb00EMP0bvvvtvm4UkGg4GWLl1K/v7+ZGFhQQ4ODjRx4kTKzs5u1fazzz6jgQMHklwuJ7lcTkOGDKHPP/+ciIjKysooKiqK5HI59e3bl+bNm0evvfYaASA/Pz/Kz8+n5cuXC8dO9unTh44ePUr/+te/SKlUCudMb9q0ibZs2SK8Fw4ODrR582Yiunk206JFi0ilUpG5uTk5OztTbGwspaenC8dJAiB/f3/Kzc2ltWvXkp2dHQEgb29vunz5cpvv3+rVq8nX17fVue+3rOFD9wAAIABJREFUTzt37iQiavP3cuXKFQoPDycHBwcyMzOj3r1705tvvkmNjY1tjnnr4VViqayspAEDBlBYWFiLQ6RMXLnJXgWosLAQI0eOhJubG37++eduf2jFrebOnYtt27ahrKxM7FIYA3Bz+/Ljjz+O69evIyUlpcdsjwRQYXKr2808PT1x+PBhlJWVISws7IG7UENTU5PYJTAGADhz5gzCwsJQXl6OI0eO9KSABGCC2yRv5ePjg9OnTyMwMBAjR47EggULhMNTGGPG1dbW4p133kFERAR8fHyQkpLSM29pIvYKf1cwGAy0YcMGcnZ2Jnd3d9qwYQMZDAaxy2rTG2+8IZy326dPH9q2bZvYJbEeaPfu3aRSqcjBwYGWL18ubCvtgUzz3O070Wg0NHv2bJJKpTR8+HDavn07NTU1iV0WY91CU1MT7dixg0aMGEFSqZRmzZrV4jz6Hsr0zt02plevXli7di1OnToFDw8PTJ48Gf3798eaNWseiDMdGOsMdXV1WLNmDQICAvD000/D3d0dJ0+exJdffvlA7fDsLCa7d7s9cnJy8PHHH2PDhg2wtbXFM888g/j4+FbnJjNmis6cOYPk5GR899130Ol0SEhIQFJSEvr16yd2ad1Jz7oR2J2o1WqsXbsWycnJyM7ORlBQEOLj4xEXFweVSiV2eYx1mPz8fGzatAnJycnIyMhA//79ER8fj9mzZ8PV1VXs8rojDsnbnTp1CsnJydiyZQvKy8sxYsQIjB8/Hk888QQGDRokdnmM3bMLFy5g37592LdvH06ePAlHR0dMnToV8fHxGD58uNjldXccknei1+vxyy+/YNeuXfjpp59QXFwMLy8vREdHIzo6Go888ggUCoXYZTLWSm1tLX7//XchGAsKCuDm5oYnnngCMTExGDdunHBpOXZXHJLtlZ6ejr1792LPnj1ISUn5/9i777AozvVv4N9ddllYyoJSlWoFBBGBnwXsMbHGGuMRscSg0RNLiklM1ROjRqMmGmuMvWKJUVQUj1HBIIYmKMUOKG3pHXbZ+/3Dw7yuYEFZZoHnc11zKbvDzHdnh3uemd15HgiFQri7u8PHxwe+vr544403YGpqyndMpgUqLS1FeHg4wsLCcOXKFYSFhaGiogIuLi4YOXIkRowYgd69e0MobFGf0zYUViRfRUZGBs6fP4/Lly8jNDQUycnJEIlE6NatG/r27QtfX194eXm1uDsTmMaRlpaGqKgohIaGIjQ0FDExMVAqlejcuTP69OmDvn374o033oC1tTXfUZsDViQbQmZmJlcwL126hJs3b0KlUsHCwgKenp7c5OXl1aj9QTJNX01BfHLKzs6GUCiEi4sL+vXrh759+6Jv376wsrLiO25zxIqkJhQVFSEmJkZtx759+zZUKhUsLS3RtWtXdOnSBS4uLtzETtVbtvz8fCQkJODmzZtITEzEzZs3ERcXh6ysLAiFQnTs2FHtgOvh4cF1YcdoFCuSjaWoqAjR0dGIiorCjRs3uD+GmsGUrK2t0aVLFzg7O8PZ2Rnt27dHu3btYG9vzy6yNxMKhQIpKSm4d+8e7t69i8TERCQkJCAhIYHr49LIyAhOTk7o0qUL3NzcWEHkHyuSfEtJSUFiYiJu3LiBpKQk3LhxA8nJydz4ITo6OrC1teWKZs3k4OAAGxsbWFlZsQvyWkKlUiEzMxMPHz7EgwcPcO/ePa4g3rt3D2lpaVzvTiYmJujcuTNcXV3h5OQEV1dXODs7w97enudXwTyFFUltlZeXx/2RPT2lpaVx4+WIRCJYWlrCzs4O1tbWsLGxgY2NDaytrWFnZwdzc3OYmZnBzMys5fQk3cCICDk5OcjJyYFcLkdqaioyMjLw8OFDrqfz1NRUZGVlqb0vtra2age2J6eawbQYrceKZFOkUCjw8OFDPHr0CA8fPkR6ejrS0tKQnp6OR48eIS0tDRkZGWpd/guFQq5Y1kwWFhYwNzdH69atYWJiAplMBmNjY8hkMpiYmMDY2BjGxsZaNX7566isrERRURGKioq4ITSKiopQWFiIgoIC5ObmQi6XIzs7myuKNZNKpeKWIxaLYW1tDVtbW7Rt2xZt2rSBra0t2rRpAxsbG7Rt2xY2NjbsMknzwIpkc0VEyMrKglwuR05ODrKzs7n/11UMCgsLnzlio0Qi4YqnTCaDSCSCkZHRM/8Vi8Vqw84aGBhw41k/ydDQsFYhUSgUdQ56X1VVxQ0TCwAlJSXcvE/+W1xcjIqKCuTn50MkEqGwsJArhpWVlXW+PqlUCplMVufBw8zMDObm5rCwsOCet7KyYq3yloMVSeb/UyqVXFEpKCjgWl1PTgUFBaiurkZRUREuX74MiUQCR0dHtWL1dEErLCxUa4nVKCgoQF27X12f9AuFQshkMu7nmsJbU2hr/jUyMkJYWBjkcjmmTp0KS0tLrkX85FTTcq4p+gzzDKxIMq/m8OHDePfdd3Hs2DGMHj2a7zhqsrKy4OXlBRcXF5w+fRo6Ojp8R2KaruY7xg2jOffu3UNAQAA+/PBDrSuQAGBpaYkjR47g0qVLWLJkCd9xmCaOtSSZelEoFOjXrx9KSkoQEREBfX19viM90+7duzFt2jQEBgZi/PjxfMdhmqZ8djGGqZdFixYhLi4OkZGRWl0gAWDKlCn4+++/MX36dDg7O6NLly58R2KaINaSZF5acHAwhg0bhp07d2LKlCl8x3kpCoUCgwYNQlZWFq5du6b24Q/DvAT2wQ3zcrKystCtWze89dZb2LlzJ99x6iUzMxNeXl7w9PTEH3/8we5QYuqDfXDDvJhKpcLkyZNhaGiI9evX8x2n3qysrHD48GEEBwdj6dKlfMdhmhhWJJkXWrp0KUJDQxEYGAgjIyO+47ySXr16Ye3atViyZAmCgoL4jsM0Iex0m3mu0NBQDBw4EGvXrsWHH37Id5zXFhAQgEOHDiEiIgLOzs58x2G0H7smyTxbfn4+PDw84OrqipMnTzaLW/EqKyvRt29fFBUVISIignVBxrwIuybJ1I2IMH36dFRXV2PXrl3NokACj+9DP3LkCPLy8jBjxow6b4tkmCexIsnUad26dTh16hQOHjyI1q1b8x2nQdna2iIwMBDHjx/Hzz//zHccRsuxIsnUEhcXhy+++AKLFy+Gj48P33E0ol+/fli6dCk+++wzhIaG8h2H0WLsmiSjpqSkBF5eXrC2tsb58+ebdecQRIRx48YhPDwcMTExbCAtpi7sgxtG3ZQpUxAcHIzY2Fi0adOG7zgaV1BQAE9PT9jb2yMkJKRZHxSYV8I+uGH+v507d2Lv3r3Yvn17iyiQwOOxZo4dO4arV6+yHoOYOrGWJAMAuH37Njw9PTF79mz8+OOPfMdpdFu2bMGcOXMQFBSEoUOH8h2H0R7sdJt5/N3BXr16QSQSISwsrM6hFlqCadOm4cSJE4iKioKjoyPfcRjtwIokA/z73//Gvn37EBMT06KLQ3l5OXr37g1dXV1uaAqmxWPXJFu6oKAgbNq0CZs2bWrRBRIA9PX1ERgYiKSkJCxcuJDvOIyWYC3JFiwtLQ0eHh4YP348Nm/ezHccrREYGIh3330Xu3fvhr+/P99xGH6x0+2WSqlUon///igoKMC1a9cglUr5jqRV5s+fj+3btyMiIgIuLi58x2H4w4pkS/Xll19i7dq1iIiIQNeuXfmOo3WqqqrQr18/FBcXIyIiAgYGBnxHYvjBrkm2RBcvXsTKlSvx66+/sgL5DLq6ujh06BAyMzMxb948vuMwPGItyRYmOzsb3bp1g6+vLwIDA/mOo/VOnz6NESNGYN++ffjXv/7Fdxym8bGWZEuiUqng7+8PfX19/Pbbb3zH4YVKpcLatWvRu3fvl5p/2LBhmD17NmbPno0HDx5oNlwT9p///AcuLi4wNjaGRCJBhw4d8Nlnn6GkpITvaK+PmBZj+fLlJBaLKTw8nO8ovLh16xb5+PgQAHJ3d3/p3ysvLyd3d3f6v//7P6qqqtJgwqarX79+tGHDBsrNzaWioiI6dOgQicViGjJkCN/RXlceK5ItREREBOnq6tLq1as1svyysjLq1auXRpbdEOuIjY2lsWPH0t69e6lbt271KpJERDdu3CB9fX369ttvX2n9r0vbt+/w4cNJqVSqPTZhwgQCQKmpqQ0Rjy957HS7BSgoKMDEiRMxaNAgfPTRRxpZx++//47s7GyNLLsh1uHu7o6jR4/Cz8/vle6k6dKlC3766ScsXboUFy5ceKUMr0Pbt29QUFCtHpTMzMwAAGVlZa+djVd8l2lG8yZOnEiWlpaUmZnJPaZSqWj16tXk5OREurq6ZGJiQqNGjaLExERunrlz55JYLCZLS0vusTlz5pBUKiUAJJfLiYho/vz5pKurSwAIALVv355++eUXkkgkZG5uTrNmzSIrKyuSSCTUq1cvunr1aoOs41X16NGj3i3JGqNHjyYbGxvKycl57nwtefvWGDVqFOnr61NlZeVrL4tH7HS7udu4cSMJhUI6f/682uPffvst6erq0p49e6igoIDi4uKoe/fuZGZmplZM/fz81P7AiIhWrVql9gdGRDRu3Lhaf1izZs0iAwMDSkhIoIqKCrp58yZ5e3uTkZGR2inY66zjVbxOkczLyyM7OzsaM2bMc+dryduXiKi0tJSMjIxo3rx5DbI8HrHT7ebsxo0b+OSTT/DNN99g0KBB3OPl5eVYs2YNxo4di8mTJ0Mmk8HNzQ2bN29GTk4Otm7d2mAZRCIRnJ2dIZFI4OLigo0bN6K4uBg7duxosHU0JlNTU+zZswcnTpx45nZi2xdYtmwZrK2tsXTpUl5zNARWJJup0tJSTJgwAV5eXvjmm2/Unrt58yY3TMOTvL29oauri4iICI3l8vLyglQqRVJSksbWoWl9+/bFF198gfnz5yM+Pr7W8y19+x47dgyBgYE4e/YsjIyMeMvRUFiRbKbmzp2LzMxM7N27t9YF9YKCAgCAoaFhrd8zMTFBcXGxRrNJJBLI5XKNrkPTFi9ejO7du2PSpEkoLy9Xe64lb9+DBw9ixYoVuHjxIhwcHHjJ0NBYkWyGAgMDsXPnTuzYsQN2dna1njcxMQGAOv9YCwoKYGNjo7FsCoVC4+toDCKRCAcPHsSjR4/wxRdfqD3XUrfv+vXrsXfvXly4cKFZDf/BimQzc/fuXQQEBGD+/PkYNWpUnfO4urrC0NAQkZGRao9HRESgqqoKnp6e3GMikQgKhaLB8l28eBFEhJ49e2psHY3F1tYWW7Zswfr163HixAnu8Za2fYkIn3/+OeLj43H8+PE6W9BNGSuSzYhCoYCfnx86duyIFStWPHM+PT09fPLJJzh27Bj27t2LoqIixMfHY/bs2bC2tsasWbO4eTt06IC8vDwcP34cCoUCcrkcKSkptZbZqlUrpKen48GDByguLub+KFUqFfLz86FUKhEXF4cFCxbAzs4O06ZNa7B18Omdd97B1KlTMWPGDKSnpwNoeds3ISEBK1euxG+//QaxWAyBQKA2/fTTT/XZpNqH54/XmQa0YMECMjQ0pKSkpBfOq1KpaNWqVdSxY0cSi8VkampKY8aMoeTkZLX5cnNzacCAAaSnp0eOjo40d+5cWrhwIQGgDh06cF81iY6OJnt7e9LX1ydfX1/KzMykWbNmkVgsprZt25JIJCJjY2MaPXo03b17t8HW8bLCw8PJx8eHrK2tue8CWllZUe/evenSpUsvvZy6lJSUUOfOnalfv37cXSctafvGx8dz27SuadWqVa+1fXnGvifZXJw+fZoEAgHt2bOH7yicWbNmUatWrfiO0SiioqJIV1eXfvzxx0ZbZ0vavjxi35NsDh49eoQpU6Zg+vTpmDx5Mt9x1FRXV/MdoVF0794dy5Ytw9dff13rWqQmtZTtyydWJJs4lUqFKVOmwNTUFD///DPfcRpNUlJSrWtfdU0TJ05stEwff/wx+vfvD39//yZ/v7I2bl/e8N2WZV7Pd999R3p6ehQTE8N3FDWLFi3i7gV2cHCgw4cP8x2pUTx69IhatWpFH374oUbX01K3Lw/yWM/kTdjly5cxcOBArF+/HrNnz+Y7DvM/R48exTvvvINTp05h6NChfMdhXg8bCKypysvLg4eHB7y8vHD06FG+4zBPmTRpEi5duoS4uDi0bt2a7zjMq2NFsikiIowePRoxMTGIjY1Fq1at+I7EPKWgoADu7u7sINb0sTFumqK1a9fi9OnTOHjwICuQWsrExAS///47/vjjD+zfv5/vOMxrYC3JJiYqKgq9e/fGkiVLat0zzGif+fPnY9euXYiLi6vzPnpG67HT7aakpKQEnp6eaNu2LUJCQmr17sNon4qKCnh7e8PS0hLnzp2DUMhO3poYdrrdlHzwwQcoKCjAvn37WIFsIvT09LBr1y6EhoZi/fr1fMdhXgErkk3Etm3bcODAAezduxfW1tZ8x2HqoXv37vj666+5nnKYpoWdbjcBCQkJ8Pb2xvz587Fs2TK+4zCvQKlUok+fPqisrMTVq1ehq6vLdyTm5bBrktquoqICPXv2hEQiQVhYGMRiMd+RmFd07949dOvWDfPmzWsWY7+0EOyapLZbsGABUlJScOjQIVYgm7h27dph5cqVWL58OS5fvsx3HOYlsSKpBc6fP49du3bVevzo0aPYsmULNm3a1GzGC2npZs2ahaFDh2L69Om1hnc4d+4cPvjgA56SMc/Eww3jzFPef/99AkB+fn5UVFREREQpKSnUqlUr+ve//81zOqahZWVlkYWFBc2cOZOIiIqKimjmzJkkEAhIX1+fKisreU7IPIF1cME3lUoFCwsL5ObmQiQSwdbWFoGBgZg3bx6Ki4tx7do16Ovr8x2TaWDHjx/H2LFj8cMPP2Djxo3Iysrihku4fPky+vTpw3NC5n/YBzd8Cw8PR+/evbmfRSIRAEAqleLq1atwdnbmKxqjQeXl5fD19UVMTAyEQiHXea6uri4WLVqExYsX8xuQqcE+uOHbiRMn1L4OolQqoVQqUVJSgoULFyI3N5fHdIwmXL16Fa6uroiLiwMRqfUuXlVVheDgYB7TMU9jLUmedezYEXfu3KnzObFYjFatWiEwMBB9+/Zt5GRMQ6uoqMB3332Hn376CQKB4JlDL+jo6CA/Px9GRkaNnJCpA2tJ8unOnTvPLJAAuOE/Bw4ciA0bNjRiMkYTgoKCsHbtWgDPH5umurqafUVIi7AiyaMTJ05w1yCfhYgwfPhwvPPOO42UitGU8ePHIzw8HLa2ts/9zquuri7++9//NmIy5nlYkeTR0aNHn9miEIvFkEql2Lx5M/78809YWFg0cjpGEzw9PREXF4cxY8Y8c56qqiqcPn26EVMxz8OuSfIkNzcXlpaWdRZJoVCIPn36YM+ePbC1teUhHdMYdu/ejZkzZ6K6uhpKpVLtOYFAgPT0dFhZWfGUjvkfdk2SL0FBQXj6+CQSiaCrq4tly5bhwoULrEA2c1OmTEF0dDTatWtX6/RbIBDgwoULPCVjnsSKJE+OHz+u1gGrjo4O3N3dERcXh88//5x1ztpCuLi4ICoqChMmTADwuDgCj88mzp8/z2c05n/Y6TYPKisrYWpqivLycohEIhARvv76a3zzzTesM90WbPfu3Zg1axb3XVkrKytkZGTwHaulY3fcAI+/wF1cXIyCggKUlJSgqqoKxcXFateJysrKUFlZyf0sFAohk8nUlmNiYgKBQABTU1MYGhrC0NAQUqm01vrOnDmDYcOGAXjckjhw4AC6du2qoVfHNCUJCQkYM2YMbt26BQC4desWOnbsCODxBzqlpaXIz8/n9sfy8nJUVFRwv09EKCgoUFumSCSq9Z1LY2Nj6OjowMjICFKpFAYGBjA1NdXwq2uS8p///ZMmqqKiAikpKUhLS0NmZibkcjmys7ORlZUFuVwOuVyOnJwcFBYWoqSkRG0na2g1xVQmk8HY2BiWlpZ48OABBAIB+vbtCz8/P6SmpqK6uhoODg5sR20hKioqkJaWhkePHiEzMxM5OTnc1LVrVxQXFyMjIwO9evWCSqWqddDWlJqCaWRkhNatW8PMzExtsrS0hJmZGSwsLGBvbw8rK6tmf2moybYkc3NzkZiYiMTERNy9excpKSl48OABUlJS1E5RdHV1uTfVysoK5ubm3CSTyWBoaAgDAwMYGxtzP0skEujr60NPT49bjkQiUWsVKhQKlJSUcD+rVCoUFhZy/xYXF6OkpAQlJSUoKipCYWEhCgsLkZmZiePHj8PKygrl5eXIyspCaWkptxxjY2PY29vDwcEBjo6OcHBwgJOTE5ycnODg4MBds2K0m0KhwP3795GcnIzbt28jNTUVqampSEtLQ1paGrKysrh5hUJhncVILpcjNTUVM2bM4PZTAwMDmJiYQCqVQk9PD7q6ujAwMFBbt0wmUytcFRUVKC8vV5snPz8fAFBUVISysjKUlZVxLdSysjIUFRWpFe6cnByusfFkF29isRht27aFra0t7O3tYWtrCwcHB3Tq1AmdOnVCmzZtNLF5G5P2n26Xl5cjNjYWUVFRuHnzJpKSknDz5k3I5XIAgJGREdq3b88Vlpqp5g0zMzPj+RWoq6ysRHV1tVrBLSsrQ1paGlfka/5NSUnB3bt3kZmZCeDxUb6mYHbp0gXdunWDl5cX+w4ljyorK3Hjxg3ExMQgKSkJycnJuHXrFu7fv8/16mNtbQ1HR0fY2tpyk729PWxsbGBjYwMLC4tnHvyysrJgaWnZmC/phSorK5GZmYm0tDSkpKTg4cOHSEtL4w4E9+/fR1FREYDHf581BbNTp05wc3ODh4cHHB0dm8oBX7uKpEqlQnx8PK5evYrIyEhERkbixo0bUCqVMDU1hZubG5ydnbnJycmpRYxlXFBQgMTERCQkJCApKQkJCQlITEzE/fv3AQD29vbw8vKCt7c3vL290aNHj1qtC+b1lZeXIzIyEtHR0YiNjUVMTAwSEhKgUChgYGAAZ2dndOrUCZ07d1YrDIaGhnxHb3SZmZncAaNmSkpKwt27d1FdXQ2ZTIZu3brBw8MDHh4e8PT0hIuLizYWTv6L5L1793D+/HmcP38eFy5cQG5uLgwNDeHu7g5PT09u0tINyKvCwkLEx8cjKiqKmxISEiASieDu7o433ngDPj4+6NevH4yNjfmO2+QUFRXh2rVrCAsLw5UrVxAWFoaKigrIZDK4urqq7Z9OTk7smwkvoaqqCrdv31bbZ2NiYlBWVgYjIyP06NEDPj4+8PX1ha+vr9olL540fpEsLCxEcHAw/vzzT5w9exZ5eXlo3bo1+vTpgwEDBqB///5wdXVt9heDNSU9PR0XL17kptu3b0NXVxc+Pj4YOXIk3n77bbRv357vmFqpsrISly9fRnBwMIKDg5GYmAiBQAAXFxf06dMHPj4+8PHxYUNpNDClUonr168jLCyMOyBlZGRAT08PvXr1wpAhQzB06FC4ubnxEa9ximRGRgaOHj2KEydO4NKlS1CpVOjbty9GjBiBgQMHws3NjRVFDUlPT8dff/2FM2fO4PTp08jPz0eXLl3w9ttvY9y4cfD09OQ7Iq/S09Px559/4syZM7hw4QJKS0vh6uqKIUOGoH///vDx8YGJiQnfMVucu3fv4sqVKwgJCcHZs2chl8tha2uLIUOGYNiwYXjrrbcaq8d+zRXJyspKnDt3Dnv27MHx48chFosxcOBAjBw5EqNHj2YfNvCguroa4eHhCAoKwvHjx5GcnAwnJye8++67mDZtWotpIeXl5SEoKAiHDx9GcHAwdHV10bt3b4wYMQKjR4+Gvb093xGZJ6hUKsTExHCX5S5evAixWIwRI0bA398fb731libHMc9v8IHA4uPjKSAggGQyGYlEIho+fDgdOnSIysvLG3pVzGu6evUqzZkzh1q1akVCoZAGDx5Mx44do+rqar6jNbiqqioKDAykt956i0QiEUmlUnr33Xfpjz/+YPtmE5ORkUHr1q2j3r17k0AgoFatWtGsWbMoOjpaE6vLa7AiGRISQkOGDCGBQEDOzs60Zs0ayszMbKjFMxpUUVFBR48epeHDh5NQKKSOHTvSxo0bqbS0lO9ory0jI4OWLFlCbdq0IR0dHRo5ciTt37+fSkpK+I7GNICUlBRauXIldenShQBQ7969ad++fQ054uTrF8lTp05Rt27dCAANGDCAgoKCSKVSNUQ4hgdJSUk0c+ZM0tfXJzMzM1q+fDmVlZXxHavebt26Rf7+/qSrq0tmZmb0+eef0/379/mOxWjQX3/9RePHjyeRSESWlpa0fPnyhjgYvnqRjI6OpkGDBhEAGjt2LEVFRb1uGEaLZGdn09dff00GBgZkZ2dHu3fvbhKn4Q8ePKAZM2aQSCQiJycn2rFjBzudbmHS0tJo0aJFZGRkRJaWlvTzzz+/zj5Q/yJZVFRE77//PgmFQurZsyeFhYW96sqZJiA9PZ0CAgJIR0eHPD09KT4+nu9IdSotLaVPP/2UdHV1qV27drRz505SKpV8x2J4JJfL6dNPPyWpVEo2NjZ0+PDhV1lM/YrklStXqF27dmRhYUEHDx5kp9UtSHx8PPn4+JCenh6tXr1aq1qVFy9epA4dOpCJiQlt3LiRqqqq+I7EaJH09HSaMWMGCQQCGjt2LGVkZNTn11++SH7//feko6NDI0aMYB/ItFBKpZJ++OEHEovFNGjQIMrLy+M1T1VVFc2dO5cEAgG9/fbb9OjRI17zMNrt/Pnz1K5dOzI1NaWjR4++7K+9uEgqlUr64IMPSCQS0YYNG1jrkaHIyEiys7MjV1dX3gpTfn4+vfHGG2RoaEj79u3jJQPT9JSWltIHH3xAQqGQfvzxx5f5lecXSYVCQePHjyd9fX06ceJEw6Ssw7vvvksAXmo6efKkxnIcOXKEHB0da61TIpGQg4MDTZ8+ne7du6ex9deYMWMGGRpB/WMtAAAgAElEQVQaEgCKiYnR+PpeRVpaGrm4uJCDgwPdvXu3UdedkpJCLi4u1LZtW019N46IGna/9PLyIqFQSO7u7s+db9WqVWRubk4AaNOmTS+Vk+239bdu3TrS0dGhgICAF106en6R/Oyzz8jAwIAuX77csAmf8u6779K5c+eooKCAFAoFZWRkEAB6++23qaqqikpLSyk7O5sCAgI0WiRrtG/fnmQyGRERVVdXU1ZWFu3evZukUilZWFhQTk6OxjMcOHBA63e2nJwc6t69O7m5uTXadyqzs7OpU6dO1LVrV3r48KFG19XQ++WgQYNeWCSJiG7fvl2vIlmD7bf1ExQURBKJhL788svnzZb3zBumg4KCsGrVKmzYsAF9+vRpkPt7nkUgEMDHxwcymQwikUjt8Zrxp83NzRv8PuPy8nL07t37ufMIhUJYWFjA398fH374IbKzs9kATf/TunVrHD9+HBkZGQgICND4+lQqFSZPngyFQoGzZ8+ibdu2Gl2fJvbLxurJiu23LzZ8+HBs2bIFy5cvx8GDB585X53DNxQWFmL69OmYMWMGpk6dqrGQNQ4cOPBS882aNatB1/v7778jOzv7pefv0KEDAHCd4GpSU+kWztbWFrt27cKIESMwceJEjBw5UmPr2rZtG/766y9cuXKlUcaj1sR++fTQsY2B7bfPNnXqVMTFxeH999+Hr68vbGxsas1TZ0ty3bp1UKlUWLVqlcZDvorq6mp8++23sLOzg76+Prp27YpDhw4BAHbu3AlDQ0NuQK7jx48jMjIS9vb20NHRwaRJkwAACxYswCeffIK7d+9CIBBwO9Lz3L59GwDg7u7+0nkAIDQ0FC4uLpDJZNDT04ObmxvOnj3LPU9EWLVqFTp37gyJRAKZTIaFCxfW63WvXLkSUqkURkZGyM7OxieffIK2bdsiOTm5nlu3/oYNG4Zx48bhu+++09g6Kioq8M033+Df//43vL29Nbae1/Gi9xkA7ty5AycnJxgYGEBfXx99+vRBWFjYC5f9on3sedh++3zLli2DlZUVlixZUvcMdZ2Et2vXjj799FNNXAZ4KTXXfkaNGlXn859++ilJJBI6cuQI5efn05dffklCoZD++ecfIiJKSEggqVRKU6dO5X5n0aJFtG3bNrXljBs3jtq3b19r+U9e2yF6/Enqzp07SSqV0vDhw+ud5/Dhw7R48WLKy8uj3Nxc6tmzJ7Vu3Zr7/a+++ooEAgGtXr2a8vPzqaysjDZs2FDr2s6L1vPVV18RAJo/fz6tX7+exo4dS4mJiS/a3A3i77//1ui1qP3795NYLKb09HSNLP9lvGi/fNH7PGjQIGrXrh3dv3+fFAoF3bhxg3r06EF6enp069Ytbr66rkm+6L0nYvvt69i4cSNJpVIqLCx8+qnaH9zcu3ePAFBoaGjjpKvD83bG8vJykkqlNHHiRO6xsrIykkgkNGfOHO6xLVu2EADau3cv7d+/nz7++ONay3pekcRTnxQKBAJaunRprS8qv2yeJy1btowAUHZ2NpWVlZFUKqXBgwerzfP0BfCXWU/NzsbXbXiWlpa0atUqjSx72rRpNGDAAI0s+2W9qEg+7cn3majuD27i4uIIgFqj5Oki+bL7GNtvX51cLieBQFDXB3C1P7ipGe+Xp16AXyg5ORllZWVwdXXlHtPX14eVlRWSkpK4x2bOnInx48fjgw8+QGBgIFauXFmv9chkMhARiAgLFy4EEUEmk9W6pvSyeZ5Us4zq6mrcuXMHZWVlGDRoUIO8bj65ublx+09DS0pKgoeHh0aWrSlPvs/P4ubmBplMhri4uGfOU5/3nu23r8bMzAy2trZ1ZqpVJMvKygBAbTQ/bVIz/OrXX38NgUDATSkpKVz2Gj/88ANKSkrq9eFMXb755htYWVnhyy+/RFpaWr3znDp1Cv3794e5uTkkEgk+++wz7vcfPnwIADA3N2+w180XAwMDteFxG1JpaanWD272vPf5ecRiMTeyYl1e9b1n+239GBkZqQ0TXaNWkWzdujUAcEO2apuaN2Xt2rXcEbNmCg8P5+ZTKBSYP38+1qxZg/DwcCxduvSV12lkZIQVK1aguLgYc+bMqVee1NRUjBkzBlZWVoiIiEBhYSF+/PFH7vdrBjqqrKxskNfNp+zsbI0N4WtmZtYon86+qhe9z8+iVCqRl5f33FE/X/W9Z/vtyyMiZGZm1jliQq0i6e7uDqFQqFUv4Em2trbQ09NDbGzsc+ebO3cuAgIC8NFHH+Hjjz/G999//1qvacqUKejRoweCgoIQGBj40nni4+OhUCgwZ84ctGvXDnp6empfk6gZ9OzSpUvPXf/Lvm6+1IyP3r17d40s38vLC6GhoRpZdkN40fv8LH/99RdUKtVzt9vrvPdsv305CQkJyM3NhZeXV63nahVJmUyGfv36Yd++fY0Srr709PQwffp0HDhwABs3bkRRURGqq6vx8OFDZGRkAAA2bNiAtm3bYuzYsQAef8Tv4uICPz8/btB0AGjVqhXS09Px4MEDFBcXP/eURyAQYN26dRAIBJg3bx7y8/NfKk9NC+H8+fOoqKjA7du3ERERwS3X3Nwc48aNw5EjR/D777+jqKgIcXFx2Lp1a71fN5+OHDkCpVKJoUOHamT548aNQ1JSEv7++2+NLP91veh9rlFVVYXCwkIolUpER0dj3rx5sLe3x7Rp05657Nd579l++3K2bdsGBweHOotknV8BOnz4MAmFwka/taioqIj69u1LrVq1IgAkFAqpQ4cO9MMPP6jNV1lZSZ9//jnZ2dmRSCQic3NzGjduHN28eZNGjhzJjXvx999/ExHRRx99REKhkACQTCajyMhIInrccbC9vT3p6+uTr68vHTt2jDp16sR9MtimTRv64IMP1NY9bdo0AkAmJia0fPnyF+YhIvr888+pVatWZGJiQu+88w79+uuvBIDat29PqampVFxcTO+//z61bt2aDA0NydfXl7799lsCQDY2NnT9+vUXrufHH38kfX19AkC2tra0Z88ejb5XT1IoFOTk5ESTJk3S6Hp8fHyob9++jd7Jysvuly96n3fs2EEDBgwgCwsLEolE1Lp1a/rXv/5FKSkp3DJWr15NlpaWBIAMDAxo7NixRPT89/7KlStsv30N9+/fJz09Pfr555/rejqvztESiQh9+vRBYWEhrl69qvUXzBl+ffrpp9i8eTOuX7+u0TG9IyMj0atXL6xatQoLFizQ2HqYlkOpVGLgwIHIy8tDdHR0XaMuPnu0xLS0NDI3N6d//etfGqrfTHPw559/kkAgoF27djXK+pYvX04ikahROjphmjeVSkXvvfceSaVSiouLe9Zsz+8FKDg4mIRCIX388cesH0mmluDgYDIwMKh1aqdJKpWK3n//fZJIJKwfSeaVVVZW0pQpU0gsFlNQUNDzZn1xp7vHjh0jPT09mjx5MusWn+EcO3aMJBIJL/uFSqWi7777jgQCAX333XfsAM7US15eHg0YMIAMDQ1fVCCJXnb4hrNnz5KhoSENHDiQUlNTXz8l02RVVVVx997yfYaxadMmEolENGrUKDZ0A/NSLly4QO3btyc7O7vnnWI/6eXHuImOjiZnZ2cyMTGh/fv3v3pKpslKSEig7t27k4GBAW3evJnvOET0eBCw9u3bk4mJCf3++++sVcnUqaCggAICAkggENDo0aPrM05X/UZLLCsr4wZeGjVqFCUnJ9c/LdPkFBcX07fffkv6+vrUo0cPtR5rtEFpaSl9/PHHpKOjQ3379uW1cxZGu1RWVtKmTZuoTZs2ZGlpSYGBgfVdRP3H3SZ6POqYq6sricVi+vDDD0kul7/KYhgtp1QqaevWrWRlZUUmJia0cuVKUigUfMd6pmvXrlH//v0JAA0ZMkStGzGmZVEqlbRz505ydHQkiURCc+fOfdXhK16tSNaE+O2338ja2pqMjY1p4cKFlJaW9qqLY7RIeXk5bd26lZycnEgsFtPcuXOb1IEwJCSEevbsSQKBgN566y06efKkVo0TzmhOfn4+rV27ljp06EAikYjef/99tS/rv4JXL5I1SkpKaPny5dSmTRsSi8U0efJkjY5gx2hOdnY2LVmyhCwsLEgikdB7772ndafW9REUFESDBw8mgUBA7dq1o5UrV1Jubi7fsRgNuH79Os2cOZMMDAzI0NCQZs+eTbdv326IRb9+kaxRWVlJgYGB5O3tTQDIxcWFVqxYwXU4ymgnpVJJISEh5O/vT1KplGQyGc2bN69ZnRXcvn2bu8VOIpHQiBEjaNeuXVRUVMR3NOY1pKWl0c8//0w+Pj4EgDp27EgrVqxo6ANhwxXJGiqVii5cuEBTpkwhAwMD0tXVpbFjx9KxY8cabdhR5vlUKhVFRETQRx99RJaWliQQCKhfv360fft2Kikp4TuexhQXF9POnTtpyJAhJBKJuB6zjx49ygpmE3H//n1av349+fj4cH00BAQE0MWLFzX1zYa6791uKCUlJTh69Ch27dqFS5cuQSKRYNCgQXj77bcxcuTIRhnxjnmsvLwcFy5cwIkTJ3Dy5ElkZGSgQ4cO8Pf3h7+/PxwdHfmO2KhycnJw5MgRHDx4EKGhodDR0YGvry+GDBmCoUOHam3P/C1NZWUlLl++jODgYJw5cwaJiYkwMjLCqFGjMHHiRLz55puaHoEyX6NF8klZWVkICgrCiRMnuO6XvLy8MGDAAPTv3x++vr4wNDRsjCgtQnV1NWJjY3Hx4kVcvHgRf/31F8rKyuDl5cUdpJ4ePa+lysnJwblz53DmzBmcPXsWcrkcNjY23H7p6+sLFxeXJjdcalNUXl6Oa9euITQ0FFeuXEFoaChKS0vRpUsXDB06FEOGDIGvry8kEkljRWq8Ivmk8vJynD9/HmfOnMHFixeRmJgIkUgEb29v9O/fH7169YKXlxesra0bO1qTVVpaipiYGFy7dg0XL15EaGgoCgoKYG5ujv79++ONN97AiBEj0KZNG76jajWVSoWoqCgEBwcjNDQUV69eRXFxMVq1aoXevXvDx8cHXl5e8PDw4HrxZ14NEeHu3bvcfnvlyhVERkZCoVDA1tYWffr0Qb9+/TB06FDY2tryFZOfIvm0zMxMrsVz6dIlJCcng4jQtm1beHl5cZOrq2udg4e3NIWFhUhISEBUVBQiIyMRGRmJpKQkVFdXw8LCAn369EH//v0xYMAA1gJ6TTUt8rCwMISFheHvv/9Geno6gMe9bnfr1g0eHh7o1q0bXF1d4ejoCJFIxHNq7VNSUoLk5GTExcUhNjYWMTExuH79OoqKiqCjowMXFxf4+vrCx8cHffr0ee5wFo1MO4rk0woLC7k//prpwYMHAABjY2M4OzvDxcWF+7dDhw6wt7fnxt1oDqqrq5GRkYG7d+8iKSkJCQkJSEhIQGJiIh49egQAMDExUTuIeHl5wd7enufkzV9WVhZiY2O5P/aYmBjcuXMHKpUKYrEY7dq1g7OzMzp16sRNDg4OsLa2btYFtLy8HCkpKXjw4AGSkpKQnJyMW7du4datW9zAYfr6+ujatavawcXNzU1rBx6EthbJuuTm5uLmzZtITEzkikViYiK38QHAysoK9vb23GRnZwdra2tYWFjA3NwclpaWMDU15fFVPFZWVga5XI6MjAzI5XLI5XKkpqYiJSWFm9LS0rjhJGoODF26dIGTkxN3gHB0dGStRC1R01K6detWrQJRM2Kgjo4OrK2tYW9vDxsbG9ja2sLW1hbm5ubcPmpmZgYzM7O6On/lTWlpKeRyObKyspCTk4OcnBxkZGTg4cOHSE1NRWpqKh4+fIjc3FzudywtLeHk5MQdJDp37ozOnTujffv20NHR4fHV1FvTKZLPUlhYiHv37nHF5cGDB3jw4AFSUlLw8OHDWqM+6urqwtzcHObm5jAyMoKhoSEMDQ1hamoKQ0NDGBgYcD2xP1lQhUIhZDIZ93N5eTkqKiq4n0tLS1FVVQWFQoGSkhLk5+ejpKQEpaWlKCkpQUFBAQoKCpCZmVlr2FUDAwPY2tqqFXh7e3s4ODjA0dGRXUds4h49esQd+J4uLDX76NNjc8tkMhgbG8PCwgImJiaQSqWQSqWQyWQwNDSEVCrlPug0MjJSa6FKpVK1DzaKi4uhVCq5n8vKylBZWQkiQkFBAUpLS1FWVobi4mIUFRWhrKwMJSUlXEEsLy9Xy6avrw9LS0tun7WxsYGNjQ3s7OxgZ2cHe3t7mJiYaGJT8qHpF8kXUSqVXGstMzMT2dnZyMnJgVwuR3FxMYqLi7kiVvP/8vJyVFdXqw0aVlP8akgkErVTBD09Pejr60MkEsHIyAgmJiZcATY0NISJiQlkMhmsrKyQnJyMVatWYefOnRg3bpw2n2owjaSmINVMhw8fxqFDhzBz5kwQEVfICgsLUVxcjLKyMu5gW1BQgCf/jJ8uik8XzSf3XRMTExgYGEAqlcLY2BjGxsZcAa5p1dZMNa3dFjacS/MvktrK398f586dQ2xsLPsUn1Fz7949uLu7Y8GCBfj+++/5jtPSsSLJl5KSEu5rTufPn29q12kYDVGpVBgwYAAKCwtx7do1rbo22ULl1xp3m2kchoaG2LdvH8LDw/Hjjz/yHYfREqtWrUJERAR2797NCqSWYC1Jnv38889YuHAhLl68CB8fH77jMDxKSEiAp6cnFi9ejM8//5zvOMxj7HSbb0SEMWPGIDo6GjExMewujhZKqVSiV69eEIvF3L3kjFZgp9t8EwgE2LFjB4RCIaZNmwZ2zGqZFi9ejISEBOzcuZMVSC3DiqQWMDU1xZ49e3DmzBls3LiR7zhMI4uKisLKlSuxatUqdOrUie84zFPY6bYWWbJkCZYvX47w8HB4eHjwHYdpBDW9YVlZWSEkJITdQaV92DVJbaJSqfDmm28iJSUF0dHRMDIy4jsSo2Hz58/Hrl27EBcXp02dOjD/H7smqU2EQiH27duH4uJizJs3j+84jIaFhobi119/xfr161mB1GKsJamFzpw5g+HDh2PXrl3w9/fnOw6jAUVFRVxvOMePH+c7DvNsrCWpjYYOHYqPPvoIs2fPRlJSEt9xGA346KOPUFpaii1btvAdhXkB1pLUUgqFAn379kVpaSmuXbvWrPrKbOmCgoIwcuRIHD58GOPHj+c7DvN87IMbbXbv3j10794dU6dOxS+//MJ3HKYB5ObmwtXVFYMHD8bu3bv5jsO8GDvd1mbt2rXDb7/9hvXr17PrVs3EnDlzoKOjww56TQhrSTYB77//Po4dO4aYmBg2PEMTduDAAfj5+eHUqVMYOnQo33GYl8NOt5uCiooK9OjRAzKZDBcuXGjW46Q0V+np6XBzc8PEiROxYcMGvuMwL4+dbjcFenp62L9/P6KiovCf//yH7zjMKwgICICJiQnrFq8JYkWyiejSpQvWrFmDH374AefPn+c7DlMPW7duRXBwMHbs2MGNS8M0Hex0u4mZNGkSLly4gNjYWFhZWfEdh3mB+/fvw93dHXPmzMGKFSv4jsPUH7sm2dQUFhbCw8MDnTt3xunTp1mHCFqsuroaAwYMQH5+PiIjI9UG42KaDHZNsqmRyWQ4dOgQLly4gNWrV/Mdh3mOlStX4tq1a9i7dy8rkE0YK5JNkLe3N77//nt8+eWXCA8P5zsOU4fo6GgsXrwYP/zwA9zd3fmOw7wGdrrdRBER3n77bcTFxSE2NhampqZ8R2L+p6ysDJ6enrCwsMBff/0FoZC1RZowdrrdVAkEAmzfvh1KpRIzZ87kOw7zhI8//hiZmZnYs2cPK5DNAHsHmzBzc3Ps378ff/zxB3777Te+4zQJKpUKa9euRe/evTWy/DNnzmDr1q3YvHlzi+wjUtPblxfENHlfffUV6enpUWxsLN9RtNqtW7fIx8eHAJC7u3uDLz8rK4ssLS1p2rRpDb7spkDT25cneawl2QwsXrwYXl5emDRpEsrKynjJUF5ervHWw+us4/r16/jiiy8we/ZsdOvWrYGTPb5GPGPGDEilUo10XtHSty+fWJFsBkQiEQ4cOICsrCwsWLCAlwy///47srOztXYd7u7uOHr0KPz8/DTydZwNGzbg9OnT2LlzJ4yNjRt8+S19+/KK77Ys03CCgoJIIBDQvn37XjivSqWi1atXk5OTE+nq6pKJiQmNGjWKEhMTuXnmzp1LYrGYLC0tucfmzJlDUqmUAJBcLiciovnz55Ouri4BIADUvn17+uWXX0gikZC5uTnNmjWLrKysSCKRUK9evejq1asNso5X1aNHjwY9HUxISCB9fX367rvvuMfY9m0+p9usSDYzH374IRkaGlJycvJz5/v2229JV1eX9uzZQwUFBRQXF0fdu3cnMzMzyszM5Obz8/NT+wMjIlq1apXaHxgR0bhx42r9Yc2aNYsMDAwoISGBKioq6ObNm+Tt7U1GRkaUmpraIOt4FQ35R1xVVUXe3t7k5eVFVVVV3ONs+zafIslOt5uZn376CR07doSfnx+qqqrqnKe8vBxr1qzB2LFjMXnyZMhkMri5uWHz5s3IycnB1q1bGyyPSCSCs7MzJBIJXFxcsHHjRhQXF2PHjh0Ntg4+ffXVV0hMTMSBAwcgFosBsO3b3LAi2cxIJBIEBgYiOTkZX331VZ3z3Lx5EyUlJfDy8lJ73NvbG7q6uoiIiNBYPi8vL0il0mYxwFlISAhWr16NX375BR06dOAeZ9u3eWFFshnq0KEDtm7ditWrV+PkyZO1ni8oKACAOrvtMjExQXFxsUbzSSQSyOVyja5D0+RyOaZOnYqxY8fivffeU3uObd/mhRXJZmrixInw9/fHjBkzkJ6ervaciYkJANT5x1pQUAAbGxuN5VIoFBpfh6YREd577z2IxeI6T53Z9m1eWJFsxjZu3IjWrVtj0qRJqK6u5h53dXWFoaEhIiMj1eaPiIhAVVUVPD09ucdEIhEUCkWDZbp48SKICD179tTYOjRtzZo1CA4OxsGDB+u8Z55t3+aFFclmzMDAAIGBgbh27RqWLVvGPa6np4dPPvkEx44dw969e1FUVIT4+HjMnj0b1tbWmDVrFjdvhw4dkJeXh+PHj0OhUEAulyMlJaXWulq1aoX09HQ8ePAAxcXF3B+lSqVCfn4+lEol4uLisGDBAtjZ2WHatGkNto7GFBUVhS+//BLff/89evXqVec8bPs2Mzx/vM40gnXr1pFQKKT//ve/3GMqlYpWrVpFHTt2JLFYTKampjRmzJhaXx3Kzc2lAQMGkJ6eHjk6OtLcuXNp4cKFBIA6dOjAfdUkOjqa7O3tSV9fn3x9fSkzM5NmzZpFYrGY2rZtSyKRiIyNjWn06NF09+7dBlvHywoPDycfHx+ytrbmvgtoZWVFvXv3pkuXLr3UMoqLi6lz587Ur18/UiqVz52Xbd/6b18tlce6SmshxowZg8jISMTExMDMzKxR1vnBBx/g8OHDyM3NbZT1adqUKVMQHByM2NhYtGnThu84zW77ainWVVpLsX37dohEIkydOhWNeVx88lpoUxYYGIi9e/fi999/14oCWaO5bF9txopkC2FqaoqDBw8iJCQE69ev5zvOa0tKSoJAIHjhNHHixNde1927dxEQEIAFCxZg5MiRDZBe+zXm9tV6PJ/vM43s+++/J4lEQlFRURpdz6JFi7h7gR0cHOjw4cMaXZ+mVFVVUY8ePcjNzY3Ky8v5jsNpLtu3CWDXJFsalUqFIUOG4P79+4iKitJIjzXNyaeffootW7YgKioKnTp14jsO0/jYNcmWRigUYs+ePSgpKWHDPrzAyZMnsWbNGvz666+sQLZgrCXZQp09exbDhg3D9u3bMXXqVL7jaJ2UlBR4enpi9OjR2LZtG99xGP7ksyLZgn322WfYuHEj/vnnHzg7O/MdR2tUVFTA19cXCoUC4eHhkEqlfEdi+MOKZEumVCrRt29fFBcX49q1a9DX1+c7klaYOXMmDh48iH/++QedO3fmOw7DL3ZNsiUTiUQ4ePAg0tPTsXDhQr7jaIX9+/dj27Zt2LlzJyuQDAD2PckWz87ODlu2bMHGjRvxxx9/8B2HV/Hx8QgICMCnn36KsWPH8h2H0RLsdJsBAMyaNQuBgYGIiYmBg4MD33EaXUlJCby9vWFqaopLly5xvYwzLR67Jsk8VlFRgZ49e0IikSAsLKxFFQkiwoQJExAaGoro6Gituu2Q4R27Jsk8pqenh/379+PGjRtYvHgx33Ea1dq1a/HHH39gz549rEAytbAiyXBcXFzwyy+/YMWKFQgJCeE7TqO4evUqFi1ahKVLl2Lw4MF8x2G0EDvdZmqZPHkyQkJCEBsbC2tra7Xn9u3bhxEjRkAmk/GUrv4yMzNx4cIFTJo0Se3x7OxsdO/eHR4eHjhx4gQEAgFPCRktxk63mdo2b94MExMT+Pn5QaVSAQBKS0sxdepUTJ48GcHBwTwnrJ9Dhw7Bz88Pn3zyCZRKJYDH97D7+/tDJBJh586drEAyz8ZDrxpME/DPP/+Qrq4urVixgmJjY6l9+/YkEolIKBTSu+++y3e8eunevTsJBALS0dGhfv36kVwup6+++ookEglFRkbyHY/RbqwXIObZ1qxZg1WrViE/Px/V1dVcK8zAwAB5eXnQ1dXlOeGL3b17Fx07duQ6GhaLxTAyMkJBQQE2bdrEOvlgXoSdbjN1KywsRHh4OLKyslBZWckVSODxqXdoaCiP6V7e/v37IRKJuJ8VCgWKioogEAha1NecmFfHiiRTy7Vr1+Dm5objx4/XOdSDWCzGiRMneEhWf3v27Kk16p9SqUR1dTXee+89zJw5k40KyDwXK5KMmmXLlqF3795IT09Xaz0+SaFQ4MiRI42crP5iY2Nx+/bt586zfft2DBo0CHK5vJFSMU0NK5KMGmdnZxgbG0MofP6ukZ6ejuvXrzdSqldz8ODBF143FQgEiImJwV9//dVIqZimhgVtUAAAABTvSURBVBVJRs2YMWOQnJyMYcOGAcAzvxqjq6ur1afcRIQ9e/agqqqqzudrDgJvvvkmEhISMGHChMaMxzQhrEgytZibm+P48eMIDAyEkZFRnR9wVFVVafUpd2hoKNLT0+t8TiQSwczMDEeOHMGpU6dga2vbyOmYpoQVSeaZ3nnnHSQlJWHgwIF1nn7Hx8fj4cOHPCR7sQMHDtQ61RaJRBAKhZg9ezbu3LmDcePG8ZSOaUpYkWSey9raGmfOnMGmTZugr6+v1qrU0dHByZMneUxXN6VSiUOHDqmdaguFQjg5OeHq1atYt24djIyMeEzINCWsSDIvJBAIMHPmTNy8eRPe3t7Q0dEB8PjWvmPHjvGcrrazZ88iPz8fwOOvK0kkEixbtgyxsbHw9vbmOR3T1LA7bph6qa6uxsqVK/Htt9+iuroaIpEIubm5tVpmBQUFKCsrQ1lZGQoLCwGAK1w1KioqUF5ervaYvr4+9PT01B4zNTUFAMhkMkilUkilUpiYmDwz4+TJk7Fv3z4AwIgRI7Bp0ybY2Ni82gtmWjrW6S7zfAqFAhkZGUhNTYVcLuem5ORknDhxAoWFhejQoQNUKhVKSkpQWlqK0tLSRslmYGAAqVQKIyMjmJmZoXXr1jAxMcGRI0cgkUjg7++PwYMHw9zcHHZ2drC2tmZ32TD1xYpkS6dUKvHgwQPcvn0bt27dQkpKCh4+fIiHDx8iNTUVmZmZqK6u5uaXyWSwsLCAmZkZTE1NkZaWBiKCn58fjI2Noa+vD0NDQ67Vp6+vz7UEjYyM1G4RFIlEtVqgJSUlanfAKJVKFBcXA3jcEi0vL+dapyUlJSgvL0dxcTFycnKQk5PDfZhkaGiInJwcrhULPL4uaWVlBXt7e9jY2MDGxgb29vbo2LEjOnXqBAcHB7V8DANWJFuO8vJy3LhxAzExMUhOTsatW7dw69Yt3L9/nytKlpaWcHBwUCsgNjY2aNu2Lezt7WFhYVFnSywpKQlOTk6N/ZLqVFpaCgMDA+5nhUKB7OxspKamcsX/yQPBgwcPkJWVBeDx9UtHR0d06tQJnTp1QufOneHh4QFXV1c23G7LxYpkc1RYWIhr164hJiYG169fR2xsLJKTk1FdXQ0jIyM4OTlxradOnTpx/zc2NuY7Oi+Kioq4lvTt27eRnJyM27dvIykpCcXFxdDR0UHnzp3RrVs3uLu7w8PDA//3f//XpDoeZl4ZK5LNQXp6Oq5cuYKwsDBcuXIFMTExUKlUMDU1hYuLCzw9PbnJ2dn5hbccMv9feno6oqKiuCkhIQH37t0DALRr1w4+Pj7w9fWFj48PXFxcWOe9zQ8rkk2RXC7HuXPnEBwcjP/+97/IyMiARCKBp6cnevfuDR8fH/Tq1QuWlpZ8R22WsrKycPXqVYSFheHvv/9GVFQUKisrYW1tjUGDBmHIkCEYPHgwLCws+I7KvD5WJJsCIkJERAROnTqF4OBgREdHQyQSwcfHB2+++Sb69OkDLy8vSCQSvqO2SJWVlYiMjERYWBjOnTuHsLAwKJVKdO/eHW+99RaGDx+Onj17slZm08SKpDa7efMmDh8+jL179+Lu3btwdHTE4MGD8cYbb+DNN99k18S0VFlZGf7++2+cP38eJ0+eREJCAmxsbDB27Fi888478PHxYQWz6WBFUtvcv38fv/32Gw4cOIAHDx6gY8eOmDBhAiZMmICuXbvyHY95BfHx8QgMDERgYCBu3boFBwcHTJw4EQEBAWjXrh3f8ZjnY0VSG6hUKpw5cwYbN25EcHAwrK2t4e/vjwkTJsDDw4PveEwDiomJQWBgIPbs2YOMjAwMGTIEs2fPxrBhw9gHatqJFUk+VVZWYtu2bfjpp5+QkpKCQYMGYfbs2Xj77bfZl5qbOaVSiZMnT2LTpk04f/487O3t8cknnyAgIIBdW9YurEjyoaqqCtu3b8eyZcsgl8sREBCADz/8EJ06deI7GsODW7du4ddff8Vvv/0Gc3NzLFq0CDNmzGgSo1G2AKxINrY//vgDH330ETIzMxEQEIAvvvgCbdu25TsWowXS09OxYsUKbN26FVZWVli7di3GjBnDd6yWjg0p21jS09Mxbtw4jBs3Dv3798ft27exfv16ViAZTps2bbBu3TrcuXMHAwYMwLhx4zB27Nhn9rDONA5WJBvBgQMH0KVLF8TGxuLcuXPYuXMnGzKAeSYbGxvs2LED58+fR1xcHFxcXHDgwAG+Y7VYrEhqEBHhm2++gZ+fH/z9/REfH4833niD71hMEzFw4EDEx8dj2rRp8PPzwzfffFPnOOiMhhGjERUVFTR+/HjS1dWlbdu2Nco6AwICyMTEhACQWCwmb2/vev3+qVOnyNjYmE6cOPHaWfbt20cAqFevXq+9LIZo+/btpKurS++88w5VVFTwHaclyWMtSQ2orq7GpEmTEBISgnPnzmHGjBmNst6tW7ciJCQEADBz5kxcu3atXr9PDdhK2b9/P9q3b4/w8HDcuXOnwZbbUk2fPh0hISEICQmBn5+fWh+fjGaxIqkB33//PU6fPo2TJ0+iX79+fMd5acOHD0dhYSFGjhz5WsvJzc1FQkIClixZAgDYvXt3Q8Rr8fr27YuTJ0/i1KlT+M9//sN3nBaDFckGFh0djaVLl2L16tXo06cP33H+X3v3GtPk2cYB/F85lBYopxZBaBmEQ5gKyClySiDZQLfpkAlDgjrCBjKTzcxFzFic2STGzYxMBss0LjMoJ3cA8cNwzrjJcA4GiJwKyqmcBlUolDKqcL0fDM+7vjjfzWGLcP+SfuDp7X1fEPyHPof7MojS0lI8//zz2Lx5M8zMzFBQUMDOpS2Q8PBw5OTkIDs7G7/99puhy1kWWEgusH379iEkJAQZGRmGLoVDRPj444/h7e0NPp8PGxsbxMbGoq2tjRtTVVUFmUwGHo+HTz/9FACQn5/P9ZEpLy/Hxo0bIRKJ4Ozs/NCrrYWFhYiLi4OlpSWio6PR3d2NK1euzBv3T+b/8ccfERwcDKFQCJFIhLVr12J8fByBgYHg8Xjg8Xjw8fGBQqF4YE0HDx6Era0tzMzMcOjQIQD3T4scOHAAMpkMAoEAPj4+KCkpAQB8+OGHXP+c4eFh7N27F05OTpDL5X9Zi76kp6cjJCQE+/bt09uay5qBT4ouKW1tbcTj8aiystJgNdTU1BAA2r17N3fswIEDZGpqSgUFBTQ2NkaNjY3k7+9PYrGYhoaGuHEKhYIAUG5uLncsKyuLANAPP/xAKpWKhoeHKSIigszNzUmr1c5bv6enhyQSCd27d4+IiAoKCggApaamPrDevzO/Wq0mkUhER44coampKRoaGqK4uDgaGRkhIqKwsDCSSqU0OzvLzVtRUUGenp46ax07doyys7O5r99++23i8/n01Vdf0ejoKL3zzju0YsUKqqmp0antzTffpNzcXIqLi6Pa2tqH1qIvlZWVBIBaW1v1uu4ydIeF5AL65JNPyNbWlmZmZgxWw/+GpEajIQsLC0pMTNQZ9+uvvxIAev/997ljDwvJqakp7lheXh4BoJs3b85b//Dhw5SSksJ9rVKpiM/nk0gkIo1GM2/835m/qamJAND58+cf+D2fOHGCANClS5e4Y1u3biUAVF1dzR0LCwujnp4eIiKampoioVCo83PRaDTE5/Pp9ddf/8va/l8t+jI7O0tisZhycnIMWscywK5uL6TW1lb4+fktqt1cmpuboVarERgYqHM8KCgIpqamuHbt2j+ec+6Z4j93NZwz91F7jkgkQnR0NMbHx1FeXv5I87u5ucHe3h7Jyck4ePAguru7dca//PLLEAqF3AWi0dFR3Lp1C3w+nzvW3d0NU1NTyGQyAIBcLodGo8GaNWu4eQQCARwcHHROQ/yv/1eLvvB4PPj5+aGlpcUg6y8ni+d/8xKgVqt1OvUZ0tymrmNjYwAACwuLeWOsra25dq0LoampCTdu3MCmTZu484Q8Hg8VFRUAHv0qt0AgwKVLlxAeHo7s7Gy4ubkhMTERU1NTAO63qo2Li8PXX38NjUaDoqIipKamYtOmTSgpKcH09DSKioqQnJzMzTnXG/zdd9/VqbWnpwcajeaRa9EnS0tLqNVqva+73LCQXEASiQRDQ0OGLgMAYGdnB+B+EAJ4YBiOjY3B2dl5wdY8c+YMtm3bBiLSed25cwcCgQAXLlx45J/P6tWrUVFRgYGBAWRmZqKkpARHjx7l3k9JScHExAS+/fZbFBUVITExESkpKRgdHcX58+dRVlaGrVu3cuMlEgkAICcnZ169V69e/Ve16Mvg4CDro6MHLCQXUHBwMBoaGvR6pRMAdu3ahf7+fgD396gEwO1ivmbNGlhYWKC2tlbn31y7dg1arRYBAQELUgMRobi4GLt37573no2NDeLj4zEzM4PCwsJ/PPfAwAD3sVIikeDw4cPw9/fX+agZFRUFFxcXHDp0CPb29rCzs0NMTAwcHR3x3nvvwdXVVadlrlQqhZmZGRoaGha8Fn0YHx9HXV0dgoOD9brucsRCcgFt3LgRpqamOHXqlN7XLiwshFqtRnFxMTw8PPDCCy8AAMzMzLB371588803OH36NMbHx3Hjxg1kZGTA0dER6enpC7J+dXU1RCIRwsLCHvj+3C1Rj/KRe2BgALt27UJbWxu0Wi3q6+vR09OD9evXc2N4PB527tyJtrY27Ny5EwBgZGSE7du3o7m5Gdu3b9eZ08zMDCkpKSgqKkJ+fj7Gx8cxMzODvr4+DA4O/qta9OHUqVMwMTHBc889p9d1lyWDXTNaovbs2UP29vY0OjqqtzWPHj1K1tbWxOfzacOGDdTZ2anz/uzsLH300Ufk4eFBJiYmZGNjQ1u2bCG5XM6Nyc3NJQcHBwJAQqGQNm/eTHl5eSQUCgkAeXh40K1bt+j48eMkEokIALm4uFB7ezulpqaSubk5GRsbk6+vL9XV1ems/8EHH5CjoyMBIADk5OREeXl5f3v+7u5uCg0NJRsbGzIyMqJVq1ZRVlYWd5vRnM7OTrK3t9e5Nam1tZXs7e3p7t27835u09PTlJmZSTKZjIyNjUkikdBLL71Ezc3NdOTIERIIBASApFIpFRQUEBH97Voep7GxMVq5ciXt2bNHb2suY3fYprsLbHR0FKtXr0ZYWBhKS0tZVzxmQREREhISUFVVhZaWFtjY2Bi6pKWObbq70GxsbHDmzBmUlZUhKyvL0OUwS0xWVhbKyspQWFjIAlJPWLepxyAqKgonT57EK6+8ArVajZycHBgZGRm6LOYJNjMzg7feegu5ubn48ssvERUVZeiSlg0Wko/Jjh07YG5ujh07dkAul6O0tBRWVlaGLot5AqnVam7rvdOnTyMpKcnQJS0r7JzkY1ZbW4sXX3wR5ubmOHHixBO1dRpjeD/99BNee+01TExMoLy8HEFBQYYuablh5yQft8DAQNTU1MDb2xtRUVFIS0vjnoJhmL8yNjaG9PR0REZGwsvLCzU1NSwgDYSFpB6sWrUK5eXlKCkpQUVFBby9vXHs2DH88ccfhi6NWWSmp6eRm5sLb29vnDt3DsXFxTh37hzrqmlALCT1KD4+Hi0tLUhKSsL+/fvh7u6OvLw87ikZZvnSarXIz8+Hu7s7MjMzsW3bNrS0tCAhIcHQpS177JykgQwODnKN6O3s7JCeno5XX30Vjo6Ohi6N0aPBwUGcPHkSn3/+OZRKJdLS0rB//372e7B4jLKQNLD+/n4cO3YMX3zxBVQqFWJjY5GRkYHIyEh2I/oSRUS4fPkyPvvsM5SVlcHKygopKSl44403FnTDEWZBsJBcLLRaLcrLy3H8+HFcvHgRUqkUW7ZsQXx8PMLCwlhgLgHNzc04e/YsCgsL0dHRgYCAAKSlpSE5ORlCodDQ5TEPxkJyMWpsbERRURFKSkrQ1dUFd3d3JCQkIDY2FgEBAYtqU1/mr83OzqKurg5lZWUoKSnBzZs34erqioSEBCQlJXE7NTGLGgvJxa6mpgalpaU4e/Ysenp6IBaL8eyzz2LDhg2Ijo6Gg4ODoUtk/mRoaAgXLlxAZWUlvv/+e4yMjEAmkyE+Ph4JCQlsa7MnDwvJJ0lTUxO+++47VFZWoqqqCtPT0/Dx8UFERARCQkIQHh7OtSdg9EOhUKCqqgrV1dW4cuUKGhsbYWpqioiICMTExCAmJgZr1641dJnMo2Mh+aTSaDS4fPkyLl68iOrqatTV1eHu3btwdnZGWFgYQkNDsW7dOvj4+LDHIReISqVCY2Mj6uvrcfXqVVRVVaGvrw8mJibw9/dHaGgonnnmGURGRrJzjEsHC8mlQqPRoKamBj///DOqq6vxyy+/4Pbt2+DxeHB1dYWfnx98fX3h5+eHp59+Gk899RSMjdmj+w9y7949dHd3o6WlBQ0NDbh+/ToaGhrQ1dUFIoKtrS3Wr1+P0NBQhIeHIygoiIXi0sVCcinr7e3F9evXuVd9fT06OztBRDA1NYWrqyu8vLzg6ekJDw8PeHp6wsXFBU5OTlzHwqVKq9ViYGAA3d3d6OjoQHt7O9rb2yGXy9HV1QWtVgsejwc3NzesW7cOvr6+3Iud0lhWWEguNxMTE5DL5ejo6IBcLufCoaOjg+vNw+Px4ODgAGdnZzg7O0MqlUIqlWLlypUQi8UQi8Wwt7eHWCxeNN0h50xOTkKpVGJ4eBhKpRJKpRK///47FAoFFAoF+vv7oVAoMDQ0hLlffUtLS3h6enIvLy8veHh4wMvLC5aWlgb+jhgDYyHJ/NfQ0BB6e3vR19cHhUKB3t5eLlR6e3sxMjIy7xFKgUAAOzs7iMViWFhYQCAQwNraGubm5hAKhbC0tISlpSWMjY1hZGSk04wLuN/Nce4eUCKat/nHXO+ZmZkZjI+PY2JiAhqNBpOTk1CpVNBoNFCr1bh9+zaUSuW81q58Ph8SiQQymYwLfZlMBqlUCicnJ7i4uLA7BJiHYSHJ/DMTExMYGRnByMgIlEolF05KpRKTk5PQaDRQqVRQq9VcgKlUKszOzmJ6elqnp/Xs7CxUKpXO/FZWVjr3gQqFQvD5fKxYsQJWVlawsLCAUCiEhYUFrKysIBQKYW5uzv2FOxfYEokEEomE/SXI/FssJBmGYR6C7SfJMAzzMCwkGYZhHoKFJMMwzEMYAzhr6CIYhmEWqcn/AIW1+j6FlqSOAAAAAElFTkSuQmCC\n", + "text/plain": [ + "" ] - }, + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a text passage\n", + "predictions = text_table_qa_pipeline.run(query=\"Who is Aleksandar Trifunovic?\")" + ], + "metadata": { + "id": "strPNduPoBLe" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "9YiK75tSoOGA", + "outputId": "65362f83-4313-4ac1-f6ce-9b92522598c1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 57, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "RyeK3s28_X1C" - }, - "source": [ - "## About us\n", - "\n", - "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", - "\n", - "We bring NLP to the industry via open source! \n", - "Our focus: Industry specific language models & large scale QA systems. \n", - " \n", - "Some of our other work: \n", - "- [German BERT](https://deepset.ai/german-bert)\n", - "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", - "- [FARM](https://github.com/deepset-ai/FARM)\n", - "\n", - "Get in touch:\n", - "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", - "\n", - "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: Who is Aleksandar Trifunovic?\n", + "Answers:\n", + "[ { 'answer': 'a Serbian professional basketball coach and former player',\n", + " 'context': 'Aleksandar Trifunović ( ; born 30 May 1967 ) is a Serbian '\n", + " 'professional basketball coach and former player .'},\n", + " { 'answer': 'Johnny Höglin',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Ivar Eriksen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Magne Thomassen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': '5',\n", + " 'context': Position # Player Moving from\n", + "0 F 12 Nikola Kalinić Radnički Kragujevac\n", + "1 SF 6 Nemanja Dangubić Mega Vizura\n", + "2 C 33 Maik Zirbes Brose Baskets\n", + "3 PG 3 Marcus Williams Lokomotiv Kuban\n", + "4 PG 24 Stefan Jović Radnički Kragujevac\n", + "5 C 14 Đorđe Kaplanović FMP\n", + "6 SF 5 Nikola Čvorović FMP\n", + "7 SG 7 Aleksandar Aranitović Crvena zvezda U18\n", + "8 SG 20 Aleksa Radanov Crvena zvezda U18},\n", + " { 'answer': 'Vasile Sărucan',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Belgium',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Poland',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Hafþór Júlíus Björnsson',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Estonia',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Iceland',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman',\n", + " 'context': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman . He is currently '\n", + " 'playing with Piráti Chomutov of the Czech Extral'},\n", + " { 'answer': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player',\n", + " 'context': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player . In 2010 , he '\n", + " 'played for FC Angusht Nazran in the'}]\n" + ] } - ], - "metadata": { - "accelerator": "GPU", + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a table\n", + "predictions = text_table_qa_pipeline.run(query=\"What is Cuba's national tree?\")" + ], + "metadata": { + "id": "QYOHDSmLpzEg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "4kw53uWep3zj", + "outputId": "6316f9af-ef44-426a-ec42-75a3c0d293a1", "colab": { - "name": "Tutorial15_TableQA.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" + "base_uri": "https://localhost:8080/" } + }, + "execution_count": 67, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: What is Cuba's national tree?\n", + "Answers:\n", + "[ { 'answer': 'Cuban royal palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Quercus sagraeana , the Cuban oak',\n", + " 'context': 'Quercus sagraeana , the Cuban oak , is a medium-sized '\n", + " 'evergreen tree native to western Cuba in the Cuban pine '\n", + " 'forests ecoregion .'},\n", + " { 'answer': \"Glenn O'Brien\",\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Belize',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Palmyra palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Guadeloupe',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Basse-Terre',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'East Caribbean dollar',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Jenkins',\n", + " 'context': NRHP reference number ... County\n", + "0 72000402 ... Wilkes\n", + "1 ... Meriwether\n", + "2 ... Bartow\n", + "3 71000280 ... Jenkins\n", + "4 ... Chatham\n", + "5 89002015 ... Thomas\n", + "6 ... Glynn\n", + "7 75000615 ... Walton\n", + "8 84001156 ... Sumter\n", + "9 79000713 ... Cobb\n", + "10 82002491 ... Twiggs\n", + "11 74000703 ... Taliaferro\n", + "12 80001039 ... Floyd\n", + "13 90000805 ... Gwinnett\n", + "14 73000620 ... Decatur\n", + "15 79000731 ... Houston\n", + "16 95000741 ... Grady\n", + "17 97000559 ... Greene\n", + "18 74000662 ... Brooks\n", + "19 75000616 ... Washington\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': \"Primula farinosa , the bird's-eye primrose\",\n", + " 'context': \"Primula farinosa , the bird's-eye primrose , is a small \"\n", + " 'perennial plant in the family Primulaceae , native to '\n", + " 'Northern Europe and northern Asia , and '},\n", + " { 'answer': 'Poospiza',\n", + " 'context': 'Poospiza is a genus of finch-like tanagers found in both '\n", + " 'the South American lowlands and the Andes mountains . '\n", + " 'Generally they are arboreal feeders in '},\n", + " { 'answer': 'golden-crowned sparrow',\n", + " 'context': 'The golden-crowned sparrow ( Zonotrichia atricapilla ) is '\n", + " 'a large American sparrow found in the western part of '\n", + " 'North America .'},\n", + " { 'answer': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush',\n", + " 'context': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush ) , with unusually large leaves and '\n", + " 'flower heads . It is a rare variety '},\n", + " { 'answer': 'rain',\n", + " 'context': 's and operates hotels at Machu Picchu Natural Reserve , '\n", + " 'the southeastern rain forest of the Amazon in Puerto '\n", + " 'Maldonado , Tambopata , the Sacred Valley'}]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RyeK3s28_X1C" + }, + "source": [ + "## About us\n", + "\n", + "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", + "\n", + "We bring NLP to the industry via open source! \n", + "Our focus: Industry specific language models & large scale QA systems. \n", + " \n", + "Some of our other work: \n", + "- [German BERT](https://deepset.ai/german-bert)\n", + "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", + "- [FARM](https://github.com/deepset-ai/FARM)\n", + "\n", + "Get in touch:\n", + "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", + "\n", + "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "Tutorial15_TableQA.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file diff --git a/tutorials/Tutorial15_TableQA.py b/tutorials/Tutorial15_TableQA.py index fb1ac2d68a..0ae1bbb3d0 100644 --- a/tutorials/Tutorial15_TableQA.py +++ b/tutorials/Tutorial15_TableQA.py @@ -115,7 +115,6 @@ def read_ottqa_tables(filename): prediction = table_qa_pipeline.run("How many twin buildings are under construction?") print_answers(prediction, details="minimum") - ### Pipeline for QA on Combination of Text and Tables # We are using one node for retrieving both texts and tables, the TableTextRetriever. # In order to do question-answering on the Documents coming from the TableTextRetriever, we need to route From d24fb22f53d9f83d50b63657479524725651127f Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Mon, 28 Feb 2022 17:27:27 +0100 Subject: [PATCH 05/14] Remove branch from installation path in Tutorial --- tutorials/Tutorial15_TableQA.ipynb | 2676 ++++++++++++++-------------- 1 file changed, 1338 insertions(+), 1338 deletions(-) diff --git a/tutorials/Tutorial15_TableQA.ipynb b/tutorials/Tutorial15_TableQA.ipynb index a292658e12..c14b322e80 100644 --- a/tutorials/Tutorial15_TableQA.ipynb +++ b/tutorials/Tutorial15_TableQA.ipynb @@ -1,1359 +1,1359 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "DeAkZwDhufYA" - }, - "source": [ - "# Open-Domain QA on Tables\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", - "\n", - "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vbR3bETlvi-3" - }, - "source": [ - "### Prepare environment\n", - "\n", - "#### Colab: Enable the GPU runtime\n", - "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", - "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HW66x0rfujyO" - }, - "outputs": [], - "source": [ - "# Make sure you have a GPU running\n", - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_ZXoyhOAvn7M" - }, - "outputs": [], - "source": [ - "# Install the latest release of Haystack in your own environment\n", - "#! pip install farm-haystack\n", - "\n", - "# Install the latest master of Haystack\n", - "!pip install --upgrade pip\n", - "!pip install git+https://github.com/deepset-ai/haystack.git@split_tables_and_texts#egg=farm-haystack[colab]\n", - "\n", - "# The TaPAs-based TableReader requires the torch-scatter library\n", - "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", - "\n", - "# Install pygraphviz for visualization of Pipelines\n", - "!apt install libgraphviz-dev\n", - "!pip install pygraphviz" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K_XJhluXwF5_" - }, - "source": [ - "### Start an Elasticsearch server\n", - "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "frDqgzK7v2i1" - }, - "outputs": [], - "source": [ - "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", - "from haystack.utils import launch_es\n", - "\n", - "launch_es()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "S4PGj1A6wKWu" - }, - "outputs": [], - "source": [ - "# In Colab / No Docker environments: Start Elasticsearch from source\n", - "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", - "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", - "! chown -R daemon:daemon elasticsearch-7.9.2\n", - "\n", - "import os\n", - "from subprocess import Popen, PIPE, STDOUT\n", - "\n", - "es_server = Popen(\n", - " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", - ")\n", - "# wait until ES has started\n", - "! sleep 30" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RmxepXZtwQ0E" - }, - "outputs": [], - "source": [ - "# Connect to Elasticsearch\n", - "from haystack.document_stores import ElasticsearchDocumentStore\n", - "\n", - "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", - "document_index = \"document\"\n", - "document_store = ElasticsearchDocumentStore(\n", - " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fFh26LIlxldw" - }, - "source": [ - "## Add Tables to DocumentStore\n", - "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", - "\n", - "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nM63uwbd8zd6" - }, - "outputs": [], - "source": [ - "# Let's first fetch some tables that we want to query\n", - "# Here: 1000 tables from OTT-QA\n", - "from haystack.utils import fetch_archive_from_http\n", - "\n", - "doc_dir = \"data\"\n", - "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip\"\n", - "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "SKjw2LuXxlGh", - "outputId": "5ca79348-3eb8-4423-8ed4-2e08d4288765" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Result ... Score\n", - "0 Winner ... 6-1 , 6-1\n", - "1 Winner ... 6-2 , 4-6 , 6-3\n", - "2 Winner ... 6-2 , 6-2\n", - "3 Runner-up ... 3-6 , 2-6\n", - "4 Winner ... 6-7 , 6-3 , 6-0\n", - "5 Winner ... 6-1 , 6-0\n", - "6 Winner ... 6-2 , 2-6 , 6-2\n", - "7 Winner ... 6-0 , 6-4\n", - "\n", - "[8 rows x 8 columns]\n", - "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" - ] - } - ], - "source": [ - "# Add the tables to the DocumentStore\n", - "\n", - "import json\n", - "from haystack import Document\n", - "import pandas as pd\n", - "\n", - "\n", - "def read_ottqa_tables(filename):\n", - " processed_tables = []\n", - " with open(filename) as tables:\n", - " tables = json.load(tables)\n", - " for key, table in tables.items():\n", - " current_columns = table[\"header\"]\n", - " current_rows = table[\"data\"]\n", - " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", - " current_doc_title = table[\"title\"]\n", - " current_section_title = table[\"section_title\"]\n", - " document = Document(\n", - " content=current_df,\n", - " content_type=\"table\",\n", - " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", - " id=key,\n", - " )\n", - " processed_tables.append(document)\n", - "\n", - " return processed_tables\n", - "\n", - "\n", - "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", - "document_store.write_documents(tables, index=document_index)\n", - "\n", - "# Showing content field and meta field of one of the Documents of content_type 'table'\n", - "print(tables[0].content)\n", - "print(tables[0].meta)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hmQC1sDmw3d7" - }, - "source": [ - "## Initalize Retriever, Reader, & Pipeline\n", - "\n", - "### Retriever\n", - "\n", - "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", - "They use some simple but fast algorithm.\n", - "\n", - "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", - "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", - "\n", - "**Alternatives:**\n", - "\n", - "- `ElasticsearchRetriever` that uses BM25 algorithm\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EY_qvdV6wyK5" - }, - "outputs": [], - "source": [ - "from haystack.nodes.retriever import TableTextRetriever\n", - "\n", - "retriever = TableTextRetriever(\n", - " document_store=document_store,\n", - " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", - " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", - " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", - " embed_meta_fields=[\"title\", \"section_title\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jasi1RM2zIJ7" - }, - "outputs": [], - "source": [ - "# Add table embeddings to the tables in DocumentStore\n", - "document_store.update_embeddings(retriever=retriever)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XM-ijy6Zz11L" - }, - "outputs": [], - "source": [ - "## Alternative: ElasticsearchRetriever\n", - "# from haystack.nodes.retriever import ElasticsearchRetriever\n", - "# retriever = ElasticsearchRetriever(document_store=document_store)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "YHfQWxVI0N2e", - "outputId": "3e5bf3b8-1c4f-4c7d-855c-dc82b0dc4b52" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the Retriever\n", - "from haystack.utils import print_documents\n", - "\n", - "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", - "# Get highest scored table\n", - "print(retrieved_tables[0].content)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zbwkXScm2-gy" - }, - "source": [ - "### Reader\n", - "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", - "\n", - "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4APcRoio2RxG" - }, - "outputs": [], - "source": [ - "from haystack.nodes import TableReader\n", - "\n", - "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ILuAXkyN4F7x", - "outputId": "142457f0-322b-4ef7-acf2-f89f52bffac3" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", - "\n", - "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", - "print(table_doc.content)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ilbsecgA4vfN", - "outputId": "e0095547-fb82-4b76-f826-284bcff61257" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: How many twin buildings are under construction?\n", - "Answers:\n", - "[ ]\n" - ] - } - ], - "source": [ - "from haystack.utils import print_answers\n", - "\n", - "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", - "print_answers(prediction, details=\"all\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jkAYNMb7R9qu" - }, - "source": [ - "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", - "\n", - "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "It8XYT2ZTVJs", - "outputId": "b617b61c-1e92-4301-c73c-913b40d464a4" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Predicted answer: 12\n", - "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" - ] - } - ], - "source": [ - "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", - "print(f\"Meta field: {prediction['answers'][0].meta}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pgmG7pzL5ceh" - }, - "source": [ - "### Pipeline\n", - "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", - "\n", - "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "G-aZZvyv4-Mf" - }, - "outputs": [], - "source": [ - "# Initialize pipeline\n", - "from haystack import Pipeline\n", - "\n", - "table_qa_pipeline = Pipeline()\n", - "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", - "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "m8evexnW6dev", - "outputId": "099c0ec6-4306-46c5-e9fa-983c57305787" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: How many twin buildings are under construction?\n", - "Answers:\n", - "[ { 'answer': '12',\n", - " 'context': Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '7',\n", - " 'context': Building or structure ... Listing\n", - "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", - "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", - "2 Jensen Block ... Seattle landmark\n", - "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", - "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", - "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", - "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", - "\n", - "[7 rows x 3 columns]},\n", - " { 'answer': '8',\n", - " 'context': Years Venue Location\n", - "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", - "1 1987-88 Navesink Country Club Middletown , New Jersey\n", - "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", - "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", - "4 1982 Wykagyl Country Club New Rochelle , New York\n", - "5 1981 Ridgewood Country Club Paramus , New Jersey\n", - "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", - "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", - " { 'answer': '8',\n", - " 'context': Model Specification ... Prime mover Power output\n", - "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", - "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", - "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", - "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", - "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", - "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", - "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", - "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", - "\n", - "[8 rows x 7 columns]},\n", - " { 'answer': '10',\n", - " 'context': Name or designation ... Notes\n", - "0 Aluminum Overcast ... One of only ten flyable B-17s\n", - "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", - "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", - "3 Douglas DC-7B N836D ... \n", - "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", - "5 FIFI ... One of only two B-29s flying\n", - "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", - "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", - "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", - "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", - "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", - "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", - "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", - "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", - "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", - "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", - "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", - "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", - "18 Yankee Lady ... Flyable\n", - "\n", - "[19 rows x 6 columns]},\n", - " { 'answer': '13',\n", - " 'context': N Year Country ... Link Remark K\n", - "0 003+ 2013 INDIA ... LK RK K\n", - "1 005 2006 USA ... LK RK K\n", - "2 010 2014 ZAF ... LK RK K\n", - "3 020 2010 USA ... LK RK K\n", - "4 030 201 ? USA ... LK RK K\n", - "5 040 2007 USA ... LK RK K\n", - "6 042 2004 USA ... LK Only G-S With Large Battery K\n", - "7 050 201 ? USA ... LK RK K\n", - "8 100 20 ? ? USA ... LK RK K\n", - "9 200 20 ? ? USA ... LK RK K\n", - "10 300 2013 EUR ... LK RK K\n", - "11 400 20 ? ? USA ... LK RK K\n", - "12 995 20 ? ? USA ... LK RK K\n", - "\n", - "[13 rows x 12 columns]},\n", - " { 'answer': '5',\n", - " 'context': Team ... Capacity\n", - "0 Barnsley ... 23,009\n", - "1 Blackpool ... 16,750\n", - "2 Bradford City ... 25,136\n", - "3 Burton Albion ... 6,912\n", - "4 Bury ... 11,840\n", - "5 Chesterfield ... 10,400\n", - "6 Colchester United ... 10,105\n", - "7 Coventry City ... 32,500\n", - "8 Crewe Alexandra ... 10,066\n", - "9 Doncaster Rovers ... 15,231\n", - "10 Fleetwood Town ... 5,311\n", - "11 Gillingham ... 11,582\n", - "12 Millwall ... 20,146\n", - "13 Oldham Athletic ... 13,512\n", - "14 Peterborough United ... 14,319\n", - "15 Port Vale ... 18,947\n", - "16 Rochdale ... 10,249\n", - "17 Scunthorpe United ... 9,183\n", - "18 Sheffield United ... 32,702\n", - "19 Shrewsbury Town ... 9,875\n", - "\n", - "[20 rows x 4 columns]},\n", - " { 'answer': '7',\n", - " 'context': Resource Name ... Added\n", - "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", - "1 John M. Beasley House ... March 5 , 1996\n", - "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", - "3 Austin House ... February 5 , 1998\n", - "4 Reid-Woods House ... August 31 , 2000\n", - "5 Villa Serena Apartments ... September 29 , 2000\n", - "6 Paul M. Souder House ... November 2 , 2000\n", - "7 Stevens-Gilchrist House ... August 17 , 2001\n", - "\n", - "[8 rows x 3 columns]},\n", - " { 'answer': '19',\n", - " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", - "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", - "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", - "2 Cowen Park Bridge ... 15th Avenue NE\n", - "3 First Avenue South Bridge ... State Route 99\n", - "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", - "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", - "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", - "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", - "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", - "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", - "10 Magnolia Bridge ... W Garfield Street\n", - "11 Montlake Bridge ... State Route 513\n", - "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", - "13 Salmon Bay Bridge ... BNSF Railway\n", - "14 Ship Canal Bridge ... Interstate 5\n", - "15 Schmitz Park Bridge ... SW Admiral Way\n", - "16 Spokane Street Bridge ... SW Spokane Street\n", - "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", - "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", - "19 University Bridge ... Eastlake Avenue NE\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '8',\n", - " 'context': Location ... Comments\n", - "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", - "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", - "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", - "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", - "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", - "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", - "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", - "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", - "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", - "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", - "\n", - "[10 rows x 4 columns]}]\n" - ] - } - ], - "source": [ - "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", - "print_answers(prediction, details=\"minimum\")" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Open-Domain QA on Text and Tables\n", - "With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information.\n", - "\n", - "To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset." - ], - "metadata": { - "id": "8uMzl9Ml_D1B" - } - }, - { - "cell_type": "code", - "source": [ - "# Add 1,000 text passages from OTT-QA to our document store.\n", - "\n", - "def read_ottqa_texts(filename):\n", - " processed_passages = []\n", - " with open(filename) as passages:\n", - " passages = json.load(passages)\n", - " for title, content in passages.items():\n", - " title = title[6:]\n", - " title = title.replace(\"_\", \" \")\n", - " document = Document(\n", - " content=content,\n", - " content_type=\"text\",\n", - " meta={\"title\": title}\n", - " )\n", - " processed_passages.append(document)\n", - " \n", - " return processed_passages\n", - "\n", - "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", - "document_store.write_documents(passages, index=document_index)" - ], - "metadata": { - "id": "4CBcIjIq_uFx" - }, - "execution_count": 16, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False)" - ], - "metadata": { - "id": "j1TaNF7SiKgH" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Pipeline for QA on Combination of Text and Tables\n", - "We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `\"text\"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `\"table\"` to a `TableReader`.\n", - "\n", - "To achieve this, we make use of two additional nodes:\n", - "- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `\"text\"` or `\"table\"`, respectively.\n", - "- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers." - ], - "metadata": { - "id": "c2sk_uNHj0DY" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "DeAkZwDhufYA" + }, + "source": [ + "# Open-Domain QA on Tables\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", + "\n", + "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vbR3bETlvi-3" + }, + "source": [ + "### Prepare environment\n", + "\n", + "#### Colab: Enable the GPU runtime\n", + "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", + "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HW66x0rfujyO" + }, + "outputs": [], + "source": [ + "# Make sure you have a GPU running\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_ZXoyhOAvn7M" + }, + "outputs": [], + "source": [ + "# Install the latest release of Haystack in your own environment\n", + "#! pip install farm-haystack\n", + "\n", + "# Install the latest master of Haystack\n", + "!pip install --upgrade pip\n", + "!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab]\n", + "\n", + "# The TaPAs-based TableReader requires the torch-scatter library\n", + "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", + "\n", + "# Install pygraphviz for visualization of Pipelines\n", + "!apt install libgraphviz-dev\n", + "!pip install pygraphviz" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K_XJhluXwF5_" + }, + "source": [ + "### Start an Elasticsearch server\n", + "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "frDqgzK7v2i1" + }, + "outputs": [], + "source": [ + "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", + "from haystack.utils import launch_es\n", + "\n", + "launch_es()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "S4PGj1A6wKWu" + }, + "outputs": [], + "source": [ + "# In Colab / No Docker environments: Start Elasticsearch from source\n", + "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", + "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", + "! chown -R daemon:daemon elasticsearch-7.9.2\n", + "\n", + "import os\n", + "from subprocess import Popen, PIPE, STDOUT\n", + "\n", + "es_server = Popen(\n", + " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", + ")\n", + "# wait until ES has started\n", + "! sleep 30" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RmxepXZtwQ0E" + }, + "outputs": [], + "source": [ + "# Connect to Elasticsearch\n", + "from haystack.document_stores import ElasticsearchDocumentStore\n", + "\n", + "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", + "document_index = \"document\"\n", + "document_store = ElasticsearchDocumentStore(\n", + " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fFh26LIlxldw" + }, + "source": [ + "## Add Tables to DocumentStore\n", + "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", + "\n", + "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nM63uwbd8zd6" + }, + "outputs": [], + "source": [ + "# Let's first fetch some tables that we want to query\n", + "# Here: 1000 tables from OTT-QA\n", + "from haystack.utils import fetch_archive_from_http\n", + "\n", + "doc_dir = \"data\"\n", + "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip\"\n", + "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "SKjw2LuXxlGh", + "outputId": "5ca79348-3eb8-4423-8ed4-2e08d4288765" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "from haystack.nodes import FARMReader, SplitDocumentList, JoinAnswers\n", - "\n", - "text_reader = FARMReader(\"deepset/roberta-base-squad2\")\n", - "# In order to get meaningful scores from the TableReader, use \"deepset/tapas-large-nq-hn-reader\" or\n", - "# \"deepset/tapas-large-nq-reader\" as TableReader models. The disadvantage of these models is, however,\n", - "# that they are not capable of doing aggregations over multiple table cells.\n", - "table_reader = TableReader(\"deepset/tapas-large-nq-hn-reader\")\n", - "split_documents = SplitDocumentList()\n", - "join_answers = JoinAnswers()" - ], - "metadata": { - "id": "Ej_j8Q3wlxXE" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + " Result ... Score\n", + "0 Winner ... 6-1 , 6-1\n", + "1 Winner ... 6-2 , 4-6 , 6-3\n", + "2 Winner ... 6-2 , 6-2\n", + "3 Runner-up ... 3-6 , 2-6\n", + "4 Winner ... 6-7 , 6-3 , 6-0\n", + "5 Winner ... 6-1 , 6-0\n", + "6 Winner ... 6-2 , 2-6 , 6-2\n", + "7 Winner ... 6-0 , 6-4\n", + "\n", + "[8 rows x 8 columns]\n", + "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" + ] + } + ], + "source": [ + "# Add the tables to the DocumentStore\n", + "\n", + "import json\n", + "from haystack import Document\n", + "import pandas as pd\n", + "\n", + "\n", + "def read_ottqa_tables(filename):\n", + " processed_tables = []\n", + " with open(filename) as tables:\n", + " tables = json.load(tables)\n", + " for key, table in tables.items():\n", + " current_columns = table[\"header\"]\n", + " current_rows = table[\"data\"]\n", + " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", + " current_doc_title = table[\"title\"]\n", + " current_section_title = table[\"section_title\"]\n", + " document = Document(\n", + " content=current_df,\n", + " content_type=\"table\",\n", + " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", + " id=key,\n", + " )\n", + " processed_tables.append(document)\n", + "\n", + " return processed_tables\n", + "\n", + "\n", + "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", + "document_store.write_documents(tables, index=document_index)\n", + "\n", + "# Showing content field and meta field of one of the Documents of content_type 'table'\n", + "print(tables[0].content)\n", + "print(tables[0].meta)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hmQC1sDmw3d7" + }, + "source": [ + "## Initalize Retriever, Reader, & Pipeline\n", + "\n", + "### Retriever\n", + "\n", + "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", + "They use some simple but fast algorithm.\n", + "\n", + "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", + "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", + "\n", + "**Alternatives:**\n", + "\n", + "- `ElasticsearchRetriever` that uses BM25 algorithm\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EY_qvdV6wyK5" + }, + "outputs": [], + "source": [ + "from haystack.nodes.retriever import TableTextRetriever\n", + "\n", + "retriever = TableTextRetriever(\n", + " document_store=document_store,\n", + " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", + " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", + " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", + " embed_meta_fields=[\"title\", \"section_title\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jasi1RM2zIJ7" + }, + "outputs": [], + "source": [ + "# Add table embeddings to the tables in DocumentStore\n", + "document_store.update_embeddings(retriever=retriever)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XM-ijy6Zz11L" + }, + "outputs": [], + "source": [ + "## Alternative: ElasticsearchRetriever\n", + "# from haystack.nodes.retriever import ElasticsearchRetriever\n", + "# retriever = ElasticsearchRetriever(document_store=document_store)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "YHfQWxVI0N2e", + "outputId": "3e5bf3b8-1c4f-4c7d-855c-dc82b0dc4b52" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "text_table_qa_pipeline = Pipeline()\n", - "text_table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", - "text_table_qa_pipeline.add_node(component=split_documents, name=\"SplitDocumentList\", inputs=[\"TableTextRetriever\"])\n", - "text_table_qa_pipeline.add_node(component=text_reader, name=\"TextReader\", inputs=[\"SplitDocumentList.output_1\"])\n", - "text_table_qa_pipeline.add_node(component=table_reader, name=\"TableReader\", inputs=[\"SplitDocumentList.output_2\"])\n", - "text_table_qa_pipeline.add_node(component=join_answers, name=\"JoinAnswers\", inputs=[\"TextReader\", \"TableReader\"])" - ], - "metadata": { - "id": "Zdq6JnF5m3aP" - }, - "execution_count": 54, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the Retriever\n", + "from haystack.utils import print_documents\n", + "\n", + "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", + "# Get highest scored table\n", + "print(retrieved_tables[0].content)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zbwkXScm2-gy" + }, + "source": [ + "### Reader\n", + "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", + "\n", + "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4APcRoio2RxG" + }, + "outputs": [], + "source": [ + "from haystack.nodes import TableReader\n", + "\n", + "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ILuAXkyN4F7x", + "outputId": "142457f0-322b-4ef7-acf2-f89f52bffac3" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Let's have a look on the structure of the combined Table an Text QA pipeline.\n", - "from IPython import display\n", - "\n", - "text_table_qa_pipeline.draw()\n", - "display.Image(\"pipeline.png\")" - ], - "metadata": { - "id": "K4vH1ZEnniut", - "outputId": "5926d4ac-aa5a-41d3-df19-73950f88ea2c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 540 - } - }, - "execution_count": 55, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 55 - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", + "\n", + "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", + "print(table_doc.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ilbsecgA4vfN", + "outputId": "e0095547-fb82-4b76-f826-284bcff61257" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Example query whose answer resides in a text passage\n", - "predictions = text_table_qa_pipeline.run(query=\"Who is Aleksandar Trifunovic?\")" - ], - "metadata": { - "id": "strPNduPoBLe" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ ]\n" + ] + } + ], + "source": [ + "from haystack.utils import print_answers\n", + "\n", + "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", + "print_answers(prediction, details=\"all\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jkAYNMb7R9qu" + }, + "source": [ + "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", + "\n", + "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "It8XYT2ZTVJs", + "outputId": "b617b61c-1e92-4301-c73c-913b40d464a4" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# We can see both text passages and tables as contexts of the predicted answers.\n", - "print_answers(predictions, details=\"minimum\")" - ], - "metadata": { - "id": "9YiK75tSoOGA", - "outputId": "65362f83-4313-4ac1-f6ce-9b92522598c1", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 57, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: Who is Aleksandar Trifunovic?\n", - "Answers:\n", - "[ { 'answer': 'a Serbian professional basketball coach and former player',\n", - " 'context': 'Aleksandar Trifunović ( ; born 30 May 1967 ) is a Serbian '\n", - " 'professional basketball coach and former player .'},\n", - " { 'answer': 'Johnny Höglin',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': 'Ivar Eriksen',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': 'Magne Thomassen',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': '5',\n", - " 'context': Position # Player Moving from\n", - "0 F 12 Nikola Kalinić Radnički Kragujevac\n", - "1 SF 6 Nemanja Dangubić Mega Vizura\n", - "2 C 33 Maik Zirbes Brose Baskets\n", - "3 PG 3 Marcus Williams Lokomotiv Kuban\n", - "4 PG 24 Stefan Jović Radnički Kragujevac\n", - "5 C 14 Đorđe Kaplanović FMP\n", - "6 SF 5 Nikola Čvorović FMP\n", - "7 SG 7 Aleksandar Aranitović Crvena zvezda U18\n", - "8 SG 20 Aleksa Radanov Crvena zvezda U18},\n", - " { 'answer': 'Vasile Sărucan',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Belgium',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Poland',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Hafþór Júlíus Björnsson',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Estonia',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Iceland',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", - " 'professional ice hockey defenceman',\n", - " 'context': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", - " 'professional ice hockey defenceman . He is currently '\n", - " 'playing with Piráti Chomutov of the Czech Extral'},\n", - " { 'answer': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", - " 'is a Russian professional football player',\n", - " 'context': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", - " 'is a Russian professional football player . In 2010 , he '\n", - " 'played for FC Angusht Nazran in the'}]\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Predicted answer: 12\n", + "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" + ] + } + ], + "source": [ + "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", + "print(f\"Meta field: {prediction['answers'][0].meta}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgmG7pzL5ceh" + }, + "source": [ + "### Pipeline\n", + "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", + "\n", + "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "G-aZZvyv4-Mf" + }, + "outputs": [], + "source": [ + "# Initialize pipeline\n", + "from haystack import Pipeline\n", + "\n", + "table_qa_pipeline = Pipeline()\n", + "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "m8evexnW6dev", + "outputId": "099c0ec6-4306-46c5-e9fa-983c57305787" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Example query whose answer resides in a table\n", - "predictions = text_table_qa_pipeline.run(query=\"What is Cuba's national tree?\")" - ], - "metadata": { - "id": "QYOHDSmLpzEg" - }, - "execution_count": null, - "outputs": [] - }, + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ { 'answer': '12',\n", + " 'context': Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '7',\n", + " 'context': Building or structure ... Listing\n", + "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", + "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", + "2 Jensen Block ... Seattle landmark\n", + "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", + "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", + "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", + "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", + "\n", + "[7 rows x 3 columns]},\n", + " { 'answer': '8',\n", + " 'context': Years Venue Location\n", + "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", + "1 1987-88 Navesink Country Club Middletown , New Jersey\n", + "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", + "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", + "4 1982 Wykagyl Country Club New Rochelle , New York\n", + "5 1981 Ridgewood Country Club Paramus , New Jersey\n", + "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", + "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", + " { 'answer': '8',\n", + " 'context': Model Specification ... Prime mover Power output\n", + "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", + "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", + "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", + "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", + "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", + "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", + "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", + "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", + "\n", + "[8 rows x 7 columns]},\n", + " { 'answer': '10',\n", + " 'context': Name or designation ... Notes\n", + "0 Aluminum Overcast ... One of only ten flyable B-17s\n", + "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", + "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", + "3 Douglas DC-7B N836D ... \n", + "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", + "5 FIFI ... One of only two B-29s flying\n", + "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", + "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", + "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", + "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", + "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", + "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", + "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", + "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", + "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", + "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", + "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", + "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", + "18 Yankee Lady ... Flyable\n", + "\n", + "[19 rows x 6 columns]},\n", + " { 'answer': '13',\n", + " 'context': N Year Country ... Link Remark K\n", + "0 003+ 2013 INDIA ... LK RK K\n", + "1 005 2006 USA ... LK RK K\n", + "2 010 2014 ZAF ... LK RK K\n", + "3 020 2010 USA ... LK RK K\n", + "4 030 201 ? USA ... LK RK K\n", + "5 040 2007 USA ... LK RK K\n", + "6 042 2004 USA ... LK Only G-S With Large Battery K\n", + "7 050 201 ? USA ... LK RK K\n", + "8 100 20 ? ? USA ... LK RK K\n", + "9 200 20 ? ? USA ... LK RK K\n", + "10 300 2013 EUR ... LK RK K\n", + "11 400 20 ? ? USA ... LK RK K\n", + "12 995 20 ? ? USA ... LK RK K\n", + "\n", + "[13 rows x 12 columns]},\n", + " { 'answer': '5',\n", + " 'context': Team ... Capacity\n", + "0 Barnsley ... 23,009\n", + "1 Blackpool ... 16,750\n", + "2 Bradford City ... 25,136\n", + "3 Burton Albion ... 6,912\n", + "4 Bury ... 11,840\n", + "5 Chesterfield ... 10,400\n", + "6 Colchester United ... 10,105\n", + "7 Coventry City ... 32,500\n", + "8 Crewe Alexandra ... 10,066\n", + "9 Doncaster Rovers ... 15,231\n", + "10 Fleetwood Town ... 5,311\n", + "11 Gillingham ... 11,582\n", + "12 Millwall ... 20,146\n", + "13 Oldham Athletic ... 13,512\n", + "14 Peterborough United ... 14,319\n", + "15 Port Vale ... 18,947\n", + "16 Rochdale ... 10,249\n", + "17 Scunthorpe United ... 9,183\n", + "18 Sheffield United ... 32,702\n", + "19 Shrewsbury Town ... 9,875\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': '7',\n", + " 'context': Resource Name ... Added\n", + "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", + "1 John M. Beasley House ... March 5 , 1996\n", + "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", + "3 Austin House ... February 5 , 1998\n", + "4 Reid-Woods House ... August 31 , 2000\n", + "5 Villa Serena Apartments ... September 29 , 2000\n", + "6 Paul M. Souder House ... November 2 , 2000\n", + "7 Stevens-Gilchrist House ... August 17 , 2001\n", + "\n", + "[8 rows x 3 columns]},\n", + " { 'answer': '19',\n", + " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", + "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", + "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", + "2 Cowen Park Bridge ... 15th Avenue NE\n", + "3 First Avenue South Bridge ... State Route 99\n", + "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", + "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", + "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", + "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", + "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", + "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", + "10 Magnolia Bridge ... W Garfield Street\n", + "11 Montlake Bridge ... State Route 513\n", + "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", + "13 Salmon Bay Bridge ... BNSF Railway\n", + "14 Ship Canal Bridge ... Interstate 5\n", + "15 Schmitz Park Bridge ... SW Admiral Way\n", + "16 Spokane Street Bridge ... SW Spokane Street\n", + "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", + "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", + "19 University Bridge ... Eastlake Avenue NE\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '8',\n", + " 'context': Location ... Comments\n", + "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", + "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", + "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", + "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", + "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", + "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", + "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", + "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", + "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", + "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", + "\n", + "[10 rows x 4 columns]}]\n" + ] + } + ], + "source": [ + "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", + "print_answers(prediction, details=\"minimum\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Open-Domain QA on Text and Tables\n", + "With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information.\n", + "\n", + "To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset." + ], + "metadata": { + "id": "8uMzl9Ml_D1B" + } + }, + { + "cell_type": "code", + "source": [ + "# Add 1,000 text passages from OTT-QA to our document store.\n", + "\n", + "def read_ottqa_texts(filename):\n", + " processed_passages = []\n", + " with open(filename) as passages:\n", + " passages = json.load(passages)\n", + " for title, content in passages.items():\n", + " title = title[6:]\n", + " title = title.replace(\"_\", \" \")\n", + " document = Document(\n", + " content=content,\n", + " content_type=\"text\",\n", + " meta={\"title\": title}\n", + " )\n", + " processed_passages.append(document)\n", + " \n", + " return processed_passages\n", + "\n", + "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", + "document_store.write_documents(passages, index=document_index)" + ], + "metadata": { + "id": "4CBcIjIq_uFx" + }, + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False)" + ], + "metadata": { + "id": "j1TaNF7SiKgH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Pipeline for QA on Combination of Text and Tables\n", + "We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `\"text\"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `\"table\"` to a `TableReader`.\n", + "\n", + "To achieve this, we make use of two additional nodes:\n", + "- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `\"text\"` or `\"table\"`, respectively.\n", + "- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers." + ], + "metadata": { + "id": "c2sk_uNHj0DY" + } + }, + { + "cell_type": "code", + "source": [ + "from haystack.nodes import FARMReader, SplitDocumentList, JoinAnswers\n", + "\n", + "text_reader = FARMReader(\"deepset/roberta-base-squad2\")\n", + "# In order to get meaningful scores from the TableReader, use \"deepset/tapas-large-nq-hn-reader\" or\n", + "# \"deepset/tapas-large-nq-reader\" as TableReader models. The disadvantage of these models is, however,\n", + "# that they are not capable of doing aggregations over multiple table cells.\n", + "table_reader = TableReader(\"deepset/tapas-large-nq-hn-reader\")\n", + "split_documents = SplitDocumentList()\n", + "join_answers = JoinAnswers()" + ], + "metadata": { + "id": "Ej_j8Q3wlxXE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "text_table_qa_pipeline = Pipeline()\n", + "text_table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "text_table_qa_pipeline.add_node(component=split_documents, name=\"SplitDocumentList\", inputs=[\"TableTextRetriever\"])\n", + "text_table_qa_pipeline.add_node(component=text_reader, name=\"TextReader\", inputs=[\"SplitDocumentList.output_1\"])\n", + "text_table_qa_pipeline.add_node(component=table_reader, name=\"TableReader\", inputs=[\"SplitDocumentList.output_2\"])\n", + "text_table_qa_pipeline.add_node(component=join_answers, name=\"JoinAnswers\", inputs=[\"TextReader\", \"TableReader\"])" + ], + "metadata": { + "id": "Zdq6JnF5m3aP" + }, + "execution_count": 54, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's have a look on the structure of the combined Table an Text QA pipeline.\n", + "from IPython import display\n", + "\n", + "text_table_qa_pipeline.draw()\n", + "display.Image(\"pipeline.png\")" + ], + "metadata": { + "id": "K4vH1ZEnniut", + "outputId": "5926d4ac-aa5a-41d3-df19-73950f88ea2c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 540 + } + }, + "execution_count": 55, + "outputs": [ { - "cell_type": "code", - "source": [ - "# We can see both text passages and tables as contexts of the predicted answers.\n", - "print_answers(predictions, details=\"minimum\")" - ], - "metadata": { - "id": "4kw53uWep3zj", - "outputId": "6316f9af-ef44-426a-ec42-75a3c0d293a1", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 67, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: What is Cuba's national tree?\n", - "Answers:\n", - "[ { 'answer': 'Cuban royal palm',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Quercus sagraeana , the Cuban oak',\n", - " 'context': 'Quercus sagraeana , the Cuban oak , is a medium-sized '\n", - " 'evergreen tree native to western Cuba in the Cuban pine '\n", - " 'forests ecoregion .'},\n", - " { 'answer': \"Glenn O'Brien\",\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Guy Oseary',\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Guy Oseary',\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Belize',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Palmyra palm',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Guadeloupe',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'Basse-Terre',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'East Caribbean dollar',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'Jenkins',\n", - " 'context': NRHP reference number ... County\n", - "0 72000402 ... Wilkes\n", - "1 ... Meriwether\n", - "2 ... Bartow\n", - "3 71000280 ... Jenkins\n", - "4 ... Chatham\n", - "5 89002015 ... Thomas\n", - "6 ... Glynn\n", - "7 75000615 ... Walton\n", - "8 84001156 ... Sumter\n", - "9 79000713 ... Cobb\n", - "10 82002491 ... Twiggs\n", - "11 74000703 ... Taliaferro\n", - "12 80001039 ... Floyd\n", - "13 90000805 ... Gwinnett\n", - "14 73000620 ... Decatur\n", - "15 79000731 ... Houston\n", - "16 95000741 ... Grady\n", - "17 97000559 ... Greene\n", - "18 74000662 ... Brooks\n", - "19 75000616 ... Washington\n", - "\n", - "[20 rows x 4 columns]},\n", - " { 'answer': \"Primula farinosa , the bird's-eye primrose\",\n", - " 'context': \"Primula farinosa , the bird's-eye primrose , is a small \"\n", - " 'perennial plant in the family Primulaceae , native to '\n", - " 'Northern Europe and northern Asia , and '},\n", - " { 'answer': 'Poospiza',\n", - " 'context': 'Poospiza is a genus of finch-like tanagers found in both '\n", - " 'the South American lowlands and the Andes mountains . '\n", - " 'Generally they are arboreal feeders in '},\n", - " { 'answer': 'golden-crowned sparrow',\n", - " 'context': 'The golden-crowned sparrow ( Zonotrichia atricapilla ) is '\n", - " 'a large American sparrow found in the western part of '\n", - " 'North America .'},\n", - " { 'answer': 'Banksia sessilis var . cordata is a variety of Banksia '\n", - " 'sessilis ( Parrot Bush',\n", - " 'context': 'Banksia sessilis var . cordata is a variety of Banksia '\n", - " 'sessilis ( Parrot Bush ) , with unusually large leaves and '\n", - " 'flower heads . It is a rare variety '},\n", - " { 'answer': 'rain',\n", - " 'context': 's and operates hotels at Machu Picchu Natural Reserve , '\n", - " 'the southeastern rain forest of the Amazon in Puerto '\n", - " 'Maldonado , Tambopata , the Sacred Valley'}]\n" - ] - } + "output_type": "execute_result", + "data": { + "image/png": "\n", + "text/plain": [ + "" ] - }, + }, + "metadata": {}, + "execution_count": 55 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a text passage\n", + "predictions = text_table_qa_pipeline.run(query=\"Who is Aleksandar Trifunovic?\")" + ], + "metadata": { + "id": "strPNduPoBLe" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "9YiK75tSoOGA", + "outputId": "65362f83-4313-4ac1-f6ce-9b92522598c1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 57, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "RyeK3s28_X1C" - }, - "source": [ - "## About us\n", - "\n", - "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", - "\n", - "We bring NLP to the industry via open source! \n", - "Our focus: Industry specific language models & large scale QA systems. \n", - " \n", - "Some of our other work: \n", - "- [German BERT](https://deepset.ai/german-bert)\n", - "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", - "- [FARM](https://github.com/deepset-ai/FARM)\n", - "\n", - "Get in touch:\n", - "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", - "\n", - "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: Who is Aleksandar Trifunovic?\n", + "Answers:\n", + "[ { 'answer': 'a Serbian professional basketball coach and former player',\n", + " 'context': 'Aleksandar Trifunović ( ; born 30 May 1967 ) is a Serbian '\n", + " 'professional basketball coach and former player .'},\n", + " { 'answer': 'Johnny Höglin',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Ivar Eriksen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Magne Thomassen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': '5',\n", + " 'context': Position # Player Moving from\n", + "0 F 12 Nikola Kalinić Radnički Kragujevac\n", + "1 SF 6 Nemanja Dangubić Mega Vizura\n", + "2 C 33 Maik Zirbes Brose Baskets\n", + "3 PG 3 Marcus Williams Lokomotiv Kuban\n", + "4 PG 24 Stefan Jović Radnički Kragujevac\n", + "5 C 14 Đorđe Kaplanović FMP\n", + "6 SF 5 Nikola Čvorović FMP\n", + "7 SG 7 Aleksandar Aranitović Crvena zvezda U18\n", + "8 SG 20 Aleksa Radanov Crvena zvezda U18},\n", + " { 'answer': 'Vasile Sărucan',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Belgium',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Poland',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Hafþór Júlíus Björnsson',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Estonia',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Iceland',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman',\n", + " 'context': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman . He is currently '\n", + " 'playing with Piráti Chomutov of the Czech Extral'},\n", + " { 'answer': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player',\n", + " 'context': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player . In 2010 , he '\n", + " 'played for FC Angusht Nazran in the'}]\n" + ] } - ], - "metadata": { - "accelerator": "GPU", + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a table\n", + "predictions = text_table_qa_pipeline.run(query=\"What is Cuba's national tree?\")" + ], + "metadata": { + "id": "QYOHDSmLpzEg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "4kw53uWep3zj", + "outputId": "6316f9af-ef44-426a-ec42-75a3c0d293a1", "colab": { - "name": "Tutorial15_TableQA.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" + "base_uri": "https://localhost:8080/" } + }, + "execution_count": 67, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: What is Cuba's national tree?\n", + "Answers:\n", + "[ { 'answer': 'Cuban royal palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Quercus sagraeana , the Cuban oak',\n", + " 'context': 'Quercus sagraeana , the Cuban oak , is a medium-sized '\n", + " 'evergreen tree native to western Cuba in the Cuban pine '\n", + " 'forests ecoregion .'},\n", + " { 'answer': \"Glenn O'Brien\",\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Belize',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Palmyra palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Guadeloupe',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Basse-Terre',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'East Caribbean dollar',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Jenkins',\n", + " 'context': NRHP reference number ... County\n", + "0 72000402 ... Wilkes\n", + "1 ... Meriwether\n", + "2 ... Bartow\n", + "3 71000280 ... Jenkins\n", + "4 ... Chatham\n", + "5 89002015 ... Thomas\n", + "6 ... Glynn\n", + "7 75000615 ... Walton\n", + "8 84001156 ... Sumter\n", + "9 79000713 ... Cobb\n", + "10 82002491 ... Twiggs\n", + "11 74000703 ... Taliaferro\n", + "12 80001039 ... Floyd\n", + "13 90000805 ... Gwinnett\n", + "14 73000620 ... Decatur\n", + "15 79000731 ... Houston\n", + "16 95000741 ... Grady\n", + "17 97000559 ... Greene\n", + "18 74000662 ... Brooks\n", + "19 75000616 ... Washington\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': \"Primula farinosa , the bird's-eye primrose\",\n", + " 'context': \"Primula farinosa , the bird's-eye primrose , is a small \"\n", + " 'perennial plant in the family Primulaceae , native to '\n", + " 'Northern Europe and northern Asia , and '},\n", + " { 'answer': 'Poospiza',\n", + " 'context': 'Poospiza is a genus of finch-like tanagers found in both '\n", + " 'the South American lowlands and the Andes mountains . '\n", + " 'Generally they are arboreal feeders in '},\n", + " { 'answer': 'golden-crowned sparrow',\n", + " 'context': 'The golden-crowned sparrow ( Zonotrichia atricapilla ) is '\n", + " 'a large American sparrow found in the western part of '\n", + " 'North America .'},\n", + " { 'answer': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush',\n", + " 'context': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush ) , with unusually large leaves and '\n", + " 'flower heads . It is a rare variety '},\n", + " { 'answer': 'rain',\n", + " 'context': 's and operates hotels at Machu Picchu Natural Reserve , '\n", + " 'the southeastern rain forest of the Amazon in Puerto '\n", + " 'Maldonado , Tambopata , the Sacred Valley'}]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RyeK3s28_X1C" + }, + "source": [ + "## About us\n", + "\n", + "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", + "\n", + "We bring NLP to the industry via open source! \n", + "Our focus: Industry specific language models & large scale QA systems. \n", + " \n", + "Some of our other work: \n", + "- [German BERT](https://deepset.ai/german-bert)\n", + "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", + "- [FARM](https://github.com/deepset-ai/FARM)\n", + "\n", + "Get in touch:\n", + "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", + "\n", + "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "Tutorial15_TableQA.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file From 5674eff6b160c4e7908158de42ee057245120cdc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 28 Feb 2022 16:34:01 +0000 Subject: [PATCH 06/14] Update Documentation & Code Style --- docs/_src/tutorials/tutorials/15.md | 2 +- .../haystack-pipeline-1.2.0.schema.json | 92 +++++++++++++++++++ tutorials/Tutorial15_TableQA.ipynb | 10 +- 3 files changed, 97 insertions(+), 7 deletions(-) diff --git a/docs/_src/tutorials/tutorials/15.md b/docs/_src/tutorials/tutorials/15.md index 6c66d47a81..3b61b00a0f 100644 --- a/docs/_src/tutorials/tutorials/15.md +++ b/docs/_src/tutorials/tutorials/15.md @@ -33,7 +33,7 @@ Make sure you enable the GPU runtime to experience decent speed in this tutorial # Install the latest master of Haystack !pip install --upgrade pip -!pip install git+https://github.com/deepset-ai/haystack.git@split_tables_and_texts#egg=farm-haystack[colab] +!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab] # The TaPAs-based TableReader requires the torch-scatter library !pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html diff --git a/json-schemas/haystack-pipeline-1.2.0.schema.json b/json-schemas/haystack-pipeline-1.2.0.schema.json index d425ee0986..0dce877897 100644 --- a/json-schemas/haystack-pipeline-1.2.0.schema.json +++ b/json-schemas/haystack-pipeline-1.2.0.schema.json @@ -59,6 +59,9 @@ { "$ref": "#/definitions/ImageToTextConverterComponent" }, + { + "$ref": "#/definitions/JoinAnswersComponent" + }, { "$ref": "#/definitions/JoinDocumentsComponent" }, @@ -95,6 +98,9 @@ { "$ref": "#/definitions/SklearnQueryClassifierComponent" }, + { + "$ref": "#/definitions/SplitDocumentListComponent" + }, { "$ref": "#/definitions/TableReaderComponent" }, @@ -1093,6 +1099,51 @@ ], "additionalProperties": false }, + "JoinAnswersComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "JoinAnswers" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "join_mode": { + "title": "Join Mode", + "default": "concatenate", + "type": "string" + }, + "weights": { + "title": "Weights", + "type": "array", + "items": { + "type": "number" + } + }, + "top_k_join": { + "title": "Top K Join", + "type": "integer" + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, "JoinDocumentsComponent": { "type": "object", "properties": { @@ -1836,6 +1887,47 @@ ], "additionalProperties": false }, + "SplitDocumentListComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "SplitDocumentList" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "split_by": { + "title": "Split By", + "default": "content_type", + "type": "string" + }, + "metadata_values": { + "title": "Metadata Values", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, "TableReaderComponent": { "type": "object", "properties": { diff --git a/tutorials/Tutorial15_TableQA.ipynb b/tutorials/Tutorial15_TableQA.ipynb index c14b322e80..4c2a4b028c 100644 --- a/tutorials/Tutorial15_TableQA.ipynb +++ b/tutorials/Tutorial15_TableQA.ipynb @@ -740,6 +740,7 @@ "source": [ "# Add 1,000 text passages from OTT-QA to our document store.\n", "\n", + "\n", "def read_ottqa_texts(filename):\n", " processed_passages = []\n", " with open(filename) as passages:\n", @@ -747,15 +748,12 @@ " for title, content in passages.items():\n", " title = title[6:]\n", " title = title.replace(\"_\", \" \")\n", - " document = Document(\n", - " content=content,\n", - " content_type=\"text\",\n", - " meta={\"title\": title}\n", - " )\n", + " document = Document(content=content, content_type=\"text\", meta={\"title\": title})\n", " processed_passages.append(document)\n", - " \n", + "\n", " return processed_passages\n", "\n", + "\n", "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", "document_store.write_documents(passages, index=document_index)" ], From 48198b7c6be46f39b0724fb932ed4d8e6d75e2ba Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Mon, 28 Feb 2022 17:41:49 +0100 Subject: [PATCH 07/14] Fix typing --- haystack/nodes/other/join_answers.py | 5 +++-- haystack/nodes/other/split_documents.py | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/haystack/nodes/other/join_answers.py b/haystack/nodes/other/join_answers.py index 8e4759b547..db2b850e9f 100644 --- a/haystack/nodes/other/join_answers.py +++ b/haystack/nodes/other/join_answers.py @@ -33,7 +33,7 @@ def __init__( self.weights = [float(i) / sum(weights) for i in weights] if weights else None self.top_k_join = top_k_join - def run(self, inputs: List[Dict], top_k_join: Optional[int] = None) -> Tuple[Dict, str]: + def run(self, inputs: List[Dict], top_k_join: Optional[int] = None) -> Tuple[Dict, str]: # type: ignore reader_results = [inp["answers"] for inp in inputs] if self.join_mode == "concatenate": @@ -57,6 +57,7 @@ def _merge_answers(self, reader_results: List[List[Answer]]) -> List[Answer]: for result, weight in zip(reader_results, weights): for answer in result: - answer.score *= weight + if isinstance(answer.score, float): + answer.score *= weight return sorted([answer for cur_reader_result in reader_results for answer in cur_reader_result], reverse=True) diff --git a/haystack/nodes/other/split_documents.py b/haystack/nodes/other/split_documents.py index d20a18d01c..803fc2500c 100644 --- a/haystack/nodes/other/split_documents.py +++ b/haystack/nodes/other/split_documents.py @@ -41,9 +41,9 @@ def __init__(self, split_by: str = "content_type", metadata_values: Optional[Lis if split_by != "content_type" and metadata_values is not None: self.outgoing_edges = len(metadata_values) - def run(self, documents: List[Document]) -> Tuple[Dict, str]: + def run(self, documents: List[Document]) -> Tuple[Dict, str]: # type: ignore if self.split_by == "content_type": - split_documents = {"output_1": [], "output_2": []} + split_documents: Dict[str, List[Document]] = {"output_1": [], "output_2": []} for doc in documents: if doc.content_type == "text": @@ -52,6 +52,8 @@ def run(self, documents: List[Document]) -> Tuple[Dict, str]: split_documents["output_2"].append(doc) else: + assert isinstance(self.metadata_values, list), "You need to provide metadata_values if you want to split" \ + " a list of Documents by a metadata field." split_documents = {f"output_{i+1}": [] for i in range(len(self.metadata_values))} for doc in documents: current_metadata_value = doc.meta.get(self.split_by, None) From 665133edf94495984d540b713d68cc9f36f33ed3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 28 Feb 2022 16:52:28 +0000 Subject: [PATCH 08/14] Update Documentation & Code Style --- haystack/nodes/other/split_documents.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/haystack/nodes/other/split_documents.py b/haystack/nodes/other/split_documents.py index 803fc2500c..add54df4e8 100644 --- a/haystack/nodes/other/split_documents.py +++ b/haystack/nodes/other/split_documents.py @@ -52,8 +52,9 @@ def run(self, documents: List[Document]) -> Tuple[Dict, str]: # type: ignore split_documents["output_2"].append(doc) else: - assert isinstance(self.metadata_values, list), "You need to provide metadata_values if you want to split" \ - " a list of Documents by a metadata field." + assert isinstance(self.metadata_values, list), ( + "You need to provide metadata_values if you want to split" " a list of Documents by a metadata field." + ) split_documents = {f"output_{i+1}": [] for i in range(len(self.metadata_values))} for doc in documents: current_metadata_value = doc.meta.get(self.split_by, None) From 867d5efaaced6ae185f8e9d867e3c791f9a65e28 Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Tue, 1 Mar 2022 15:40:23 +0100 Subject: [PATCH 09/14] Change name of SplitDocumentList to RouteDocuments --- haystack/__init__.py | 2 +- haystack/nodes/__init__.py | 2 +- haystack/nodes/other/__init__.py | 2 +- .../nodes/other/{split_documents.py => route_documents.py} | 7 ++++--- 4 files changed, 7 insertions(+), 6 deletions(-) rename haystack/nodes/other/{split_documents.py => route_documents.py} (95%) diff --git a/haystack/__init__.py b/haystack/__init__.py index 203f692897..f023dc7efb 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -102,7 +102,7 @@ def __getattr__(self, attr): from haystack.modeling.evaluation import eval from haystack.modeling.logger import MLFlowLogger, StdoutLogger, TensorBoardLogger -from haystack.nodes.other import JoinDocuments, Docs2Answers, JoinAnswers, SplitDocumentList +from haystack.nodes.other import JoinDocuments, Docs2Answers, JoinAnswers, RouteDocuments from haystack.nodes.query_classifier import SklearnQueryClassifier, TransformersQueryClassifier from haystack.nodes.file_classifier import FileTypeClassifier from haystack.utils import preprocessing diff --git a/haystack/nodes/__init__.py b/haystack/nodes/__init__.py index d4410995c0..bc3d86239e 100644 --- a/haystack/nodes/__init__.py +++ b/haystack/nodes/__init__.py @@ -21,7 +21,7 @@ AzureConverter, ParsrConverter, ) -from haystack.nodes.other import Docs2Answers, JoinDocuments, SplitDocumentList, JoinAnswers +from haystack.nodes.other import Docs2Answers, JoinDocuments, RouteDocuments, JoinAnswers from haystack.nodes.preprocessor import BasePreProcessor, PreProcessor from haystack.nodes.query_classifier import SklearnQueryClassifier, TransformersQueryClassifier from haystack.nodes.question_generator import QuestionGenerator diff --git a/haystack/nodes/other/__init__.py b/haystack/nodes/other/__init__.py index 5552d7821e..4cfb55ceab 100644 --- a/haystack/nodes/other/__init__.py +++ b/haystack/nodes/other/__init__.py @@ -1,4 +1,4 @@ from haystack.nodes.other.docs2answers import Docs2Answers from haystack.nodes.other.join_docs import JoinDocuments -from haystack.nodes.other.split_documents import SplitDocumentList +from haystack.nodes.other.route_documents import RouteDocuments from haystack.nodes.other.join_answers import JoinAnswers diff --git a/haystack/nodes/other/split_documents.py b/haystack/nodes/other/route_documents.py similarity index 95% rename from haystack/nodes/other/split_documents.py rename to haystack/nodes/other/route_documents.py index add54df4e8..f9ba7e3ed3 100644 --- a/haystack/nodes/other/split_documents.py +++ b/haystack/nodes/other/route_documents.py @@ -4,9 +4,10 @@ from haystack.schema import Document -class SplitDocumentList(BaseComponent): +class RouteDocuments(BaseComponent): """ - A node to split a list of `Document`s by `content_type` or by the values of a metadata field. + A node to split a list of `Document`s by `content_type` or by the values of a metadata field and route them to + different nodes. """ # By default (split_by == "content_type"), the node has two outgoing edges. @@ -14,7 +15,7 @@ class SplitDocumentList(BaseComponent): def __init__(self, split_by: str = "content_type", metadata_values: Optional[List[str]] = None): """ - :param split_by: Field to split the documents by. Either `"content_type"` or a metadata field name. + :param split_by: Field to split the documents by, either `"content_type"` or a metadata field name. If this parameter is set to `"content_type"`, the list of `Document`s will be split into a list containing only `Document`s of type `"text"` (will be routed to `"output_1"`) and a list containing only `Document`s of type `"text"` (will be routed to `"output_2"`). From 4b4c6b09c02723cc4d499b1f6d8bcdc29eeb3fa3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 1 Mar 2022 14:46:18 +0000 Subject: [PATCH 10/14] Update Documentation & Code Style --- .../haystack-pipeline-1.2.0.schema.json | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/json-schemas/haystack-pipeline-1.2.0.schema.json b/json-schemas/haystack-pipeline-1.2.0.schema.json index 0dce877897..ac86f1de80 100644 --- a/json-schemas/haystack-pipeline-1.2.0.schema.json +++ b/json-schemas/haystack-pipeline-1.2.0.schema.json @@ -89,6 +89,9 @@ { "$ref": "#/definitions/RCIReaderComponent" }, + { + "$ref": "#/definitions/RouteDocumentsComponent" + }, { "$ref": "#/definitions/SentenceTransformersRankerComponent" }, @@ -98,9 +101,6 @@ { "$ref": "#/definitions/SklearnQueryClassifierComponent" }, - { - "$ref": "#/definitions/SplitDocumentListComponent" - }, { "$ref": "#/definitions/TableReaderComponent" }, @@ -1697,6 +1697,47 @@ ], "additionalProperties": false }, + "RouteDocumentsComponent": { + "type": "object", + "properties": { + "name": { + "title": "Name", + "description": "Custom name for the component. Helpful for visualization and debugging.", + "type": "string" + }, + "type": { + "title": "Type", + "description": "Haystack Class name for the component.", + "type": "string", + "const": "RouteDocuments" + }, + "params": { + "title": "Parameters", + "type": "object", + "properties": { + "split_by": { + "title": "Split By", + "default": "content_type", + "type": "string" + }, + "metadata_values": { + "title": "Metadata Values", + "type": "array", + "items": { + "type": "string" + } + } + }, + "additionalProperties": false, + "description": "Each parameter can reference other components defined in the same YAML file." + } + }, + "required": [ + "type", + "name" + ], + "additionalProperties": false + }, "SentenceTransformersRankerComponent": { "type": "object", "properties": { @@ -1887,47 +1928,6 @@ ], "additionalProperties": false }, - "SplitDocumentListComponent": { - "type": "object", - "properties": { - "name": { - "title": "Name", - "description": "Custom name for the component. Helpful for visualization and debugging.", - "type": "string" - }, - "type": { - "title": "Type", - "description": "Haystack Class name for the component.", - "type": "string", - "const": "SplitDocumentList" - }, - "params": { - "title": "Parameters", - "type": "object", - "properties": { - "split_by": { - "title": "Split By", - "default": "content_type", - "type": "string" - }, - "metadata_values": { - "title": "Metadata Values", - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false, - "description": "Each parameter can reference other components defined in the same YAML file." - } - }, - "required": [ - "type", - "name" - ], - "additionalProperties": false - }, "TableReaderComponent": { "type": "object", "properties": { From 1842da34e1fbad36846d8f263ca740c4070afae8 Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Tue, 1 Mar 2022 15:55:40 +0100 Subject: [PATCH 11/14] Adapt tutorials to new name --- tutorials/Tutorial15_TableQA.ipynb | 2674 ++++++++++++++-------------- tutorials/Tutorial15_TableQA.py | 10 +- 2 files changed, 1343 insertions(+), 1341 deletions(-) diff --git a/tutorials/Tutorial15_TableQA.ipynb b/tutorials/Tutorial15_TableQA.ipynb index 4c2a4b028c..75a326922b 100644 --- a/tutorials/Tutorial15_TableQA.ipynb +++ b/tutorials/Tutorial15_TableQA.ipynb @@ -1,1357 +1,1359 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "DeAkZwDhufYA" - }, - "source": [ - "# Open-Domain QA on Tables\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", - "\n", - "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vbR3bETlvi-3" - }, - "source": [ - "### Prepare environment\n", - "\n", - "#### Colab: Enable the GPU runtime\n", - "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", - "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HW66x0rfujyO" - }, - "outputs": [], - "source": [ - "# Make sure you have a GPU running\n", - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_ZXoyhOAvn7M" - }, - "outputs": [], - "source": [ - "# Install the latest release of Haystack in your own environment\n", - "#! pip install farm-haystack\n", - "\n", - "# Install the latest master of Haystack\n", - "!pip install --upgrade pip\n", - "!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab]\n", - "\n", - "# The TaPAs-based TableReader requires the torch-scatter library\n", - "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", - "\n", - "# Install pygraphviz for visualization of Pipelines\n", - "!apt install libgraphviz-dev\n", - "!pip install pygraphviz" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K_XJhluXwF5_" - }, - "source": [ - "### Start an Elasticsearch server\n", - "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "frDqgzK7v2i1" - }, - "outputs": [], - "source": [ - "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", - "from haystack.utils import launch_es\n", - "\n", - "launch_es()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "S4PGj1A6wKWu" - }, - "outputs": [], - "source": [ - "# In Colab / No Docker environments: Start Elasticsearch from source\n", - "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", - "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", - "! chown -R daemon:daemon elasticsearch-7.9.2\n", - "\n", - "import os\n", - "from subprocess import Popen, PIPE, STDOUT\n", - "\n", - "es_server = Popen(\n", - " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", - ")\n", - "# wait until ES has started\n", - "! sleep 30" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RmxepXZtwQ0E" - }, - "outputs": [], - "source": [ - "# Connect to Elasticsearch\n", - "from haystack.document_stores import ElasticsearchDocumentStore\n", - "\n", - "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", - "document_index = \"document\"\n", - "document_store = ElasticsearchDocumentStore(\n", - " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fFh26LIlxldw" - }, - "source": [ - "## Add Tables to DocumentStore\n", - "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", - "\n", - "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nM63uwbd8zd6" - }, - "outputs": [], - "source": [ - "# Let's first fetch some tables that we want to query\n", - "# Here: 1000 tables from OTT-QA\n", - "from haystack.utils import fetch_archive_from_http\n", - "\n", - "doc_dir = \"data\"\n", - "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip\"\n", - "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "DeAkZwDhufYA" + }, + "source": [ + "# Open-Domain QA on Tables\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", + "\n", + "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." + ] }, - "id": "SKjw2LuXxlGh", - "outputId": "5ca79348-3eb8-4423-8ed4-2e08d4288765" - }, - "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - " Result ... Score\n", - "0 Winner ... 6-1 , 6-1\n", - "1 Winner ... 6-2 , 4-6 , 6-3\n", - "2 Winner ... 6-2 , 6-2\n", - "3 Runner-up ... 3-6 , 2-6\n", - "4 Winner ... 6-7 , 6-3 , 6-0\n", - "5 Winner ... 6-1 , 6-0\n", - "6 Winner ... 6-2 , 2-6 , 6-2\n", - "7 Winner ... 6-0 , 6-4\n", - "\n", - "[8 rows x 8 columns]\n", - "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" - ] - } - ], - "source": [ - "# Add the tables to the DocumentStore\n", - "\n", - "import json\n", - "from haystack import Document\n", - "import pandas as pd\n", - "\n", - "\n", - "def read_ottqa_tables(filename):\n", - " processed_tables = []\n", - " with open(filename) as tables:\n", - " tables = json.load(tables)\n", - " for key, table in tables.items():\n", - " current_columns = table[\"header\"]\n", - " current_rows = table[\"data\"]\n", - " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", - " current_doc_title = table[\"title\"]\n", - " current_section_title = table[\"section_title\"]\n", - " document = Document(\n", - " content=current_df,\n", - " content_type=\"table\",\n", - " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", - " id=key,\n", - " )\n", - " processed_tables.append(document)\n", - "\n", - " return processed_tables\n", - "\n", - "\n", - "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", - "document_store.write_documents(tables, index=document_index)\n", - "\n", - "# Showing content field and meta field of one of the Documents of content_type 'table'\n", - "print(tables[0].content)\n", - "print(tables[0].meta)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hmQC1sDmw3d7" - }, - "source": [ - "## Initalize Retriever, Reader, & Pipeline\n", - "\n", - "### Retriever\n", - "\n", - "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", - "They use some simple but fast algorithm.\n", - "\n", - "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", - "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", - "\n", - "**Alternatives:**\n", - "\n", - "- `ElasticsearchRetriever` that uses BM25 algorithm\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EY_qvdV6wyK5" - }, - "outputs": [], - "source": [ - "from haystack.nodes.retriever import TableTextRetriever\n", - "\n", - "retriever = TableTextRetriever(\n", - " document_store=document_store,\n", - " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", - " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", - " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", - " embed_meta_fields=[\"title\", \"section_title\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jasi1RM2zIJ7" - }, - "outputs": [], - "source": [ - "# Add table embeddings to the tables in DocumentStore\n", - "document_store.update_embeddings(retriever=retriever)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XM-ijy6Zz11L" - }, - "outputs": [], - "source": [ - "## Alternative: ElasticsearchRetriever\n", - "# from haystack.nodes.retriever import ElasticsearchRetriever\n", - "# retriever = ElasticsearchRetriever(document_store=document_store)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "markdown", + "metadata": { + "id": "vbR3bETlvi-3" + }, + "source": [ + "### Prepare environment\n", + "\n", + "#### Colab: Enable the GPU runtime\n", + "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", + "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", + "\n", + "" + ] }, - "id": "YHfQWxVI0N2e", - "outputId": "3e5bf3b8-1c4f-4c7d-855c-dc82b0dc4b52" - }, - "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the Retriever\n", - "from haystack.utils import print_documents\n", - "\n", - "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", - "# Get highest scored table\n", - "print(retrieved_tables[0].content)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zbwkXScm2-gy" - }, - "source": [ - "### Reader\n", - "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", - "\n", - "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4APcRoio2RxG" - }, - "outputs": [], - "source": [ - "from haystack.nodes import TableReader\n", - "\n", - "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HW66x0rfujyO" + }, + "outputs": [], + "source": [ + "# Make sure you have a GPU running\n", + "!nvidia-smi" + ] }, - "id": "ILuAXkyN4F7x", - "outputId": "142457f0-322b-4ef7-acf2-f89f52bffac3" - }, - "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", - "\n", - "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", - "print(table_doc.content)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_ZXoyhOAvn7M" + }, + "outputs": [], + "source": [ + "# Install the latest release of Haystack in your own environment\n", + "#! pip install farm-haystack\n", + "\n", + "# Install the latest master of Haystack\n", + "!pip install --upgrade pip\n", + "!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab]\n", + "\n", + "# The TaPAs-based TableReader requires the torch-scatter library\n", + "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", + "\n", + "# Install pygraphviz for visualization of Pipelines\n", + "!apt install libgraphviz-dev\n", + "!pip install pygraphviz" + ] }, - "id": "ilbsecgA4vfN", - "outputId": "e0095547-fb82-4b76-f826-284bcff61257" - }, - "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: How many twin buildings are under construction?\n", - "Answers:\n", - "[ ]\n" - ] - } - ], - "source": [ - "from haystack.utils import print_answers\n", - "\n", - "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", - "print_answers(prediction, details=\"all\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jkAYNMb7R9qu" - }, - "source": [ - "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", - "\n", - "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "markdown", + "metadata": { + "id": "K_XJhluXwF5_" + }, + "source": [ + "### Start an Elasticsearch server\n", + "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." + ] }, - "id": "It8XYT2ZTVJs", - "outputId": "b617b61c-1e92-4301-c73c-913b40d464a4" - }, - "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - "Predicted answer: 12\n", - "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" - ] - } - ], - "source": [ - "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", - "print(f\"Meta field: {prediction['answers'][0].meta}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pgmG7pzL5ceh" - }, - "source": [ - "### Pipeline\n", - "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", - "\n", - "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "G-aZZvyv4-Mf" - }, - "outputs": [], - "source": [ - "# Initialize pipeline\n", - "from haystack import Pipeline\n", - "\n", - "table_qa_pipeline = Pipeline()\n", - "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", - "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "frDqgzK7v2i1" + }, + "outputs": [], + "source": [ + "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", + "from haystack.utils import launch_es\n", + "\n", + "launch_es()" + ] }, - "id": "m8evexnW6dev", - "outputId": "099c0ec6-4306-46c5-e9fa-983c57305787" - }, - "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: How many twin buildings are under construction?\n", - "Answers:\n", - "[ { 'answer': '12',\n", - " 'context': Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '7',\n", - " 'context': Building or structure ... Listing\n", - "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", - "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", - "2 Jensen Block ... Seattle landmark\n", - "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", - "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", - "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", - "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", - "\n", - "[7 rows x 3 columns]},\n", - " { 'answer': '8',\n", - " 'context': Years Venue Location\n", - "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", - "1 1987-88 Navesink Country Club Middletown , New Jersey\n", - "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", - "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", - "4 1982 Wykagyl Country Club New Rochelle , New York\n", - "5 1981 Ridgewood Country Club Paramus , New Jersey\n", - "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", - "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", - " { 'answer': '8',\n", - " 'context': Model Specification ... Prime mover Power output\n", - "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", - "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", - "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", - "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", - "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", - "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", - "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", - "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", - "\n", - "[8 rows x 7 columns]},\n", - " { 'answer': '10',\n", - " 'context': Name or designation ... Notes\n", - "0 Aluminum Overcast ... One of only ten flyable B-17s\n", - "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", - "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", - "3 Douglas DC-7B N836D ... \n", - "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", - "5 FIFI ... One of only two B-29s flying\n", - "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", - "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", - "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", - "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", - "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", - "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", - "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", - "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", - "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", - "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", - "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", - "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", - "18 Yankee Lady ... Flyable\n", - "\n", - "[19 rows x 6 columns]},\n", - " { 'answer': '13',\n", - " 'context': N Year Country ... Link Remark K\n", - "0 003+ 2013 INDIA ... LK RK K\n", - "1 005 2006 USA ... LK RK K\n", - "2 010 2014 ZAF ... LK RK K\n", - "3 020 2010 USA ... LK RK K\n", - "4 030 201 ? USA ... LK RK K\n", - "5 040 2007 USA ... LK RK K\n", - "6 042 2004 USA ... LK Only G-S With Large Battery K\n", - "7 050 201 ? USA ... LK RK K\n", - "8 100 20 ? ? USA ... LK RK K\n", - "9 200 20 ? ? USA ... LK RK K\n", - "10 300 2013 EUR ... LK RK K\n", - "11 400 20 ? ? USA ... LK RK K\n", - "12 995 20 ? ? USA ... LK RK K\n", - "\n", - "[13 rows x 12 columns]},\n", - " { 'answer': '5',\n", - " 'context': Team ... Capacity\n", - "0 Barnsley ... 23,009\n", - "1 Blackpool ... 16,750\n", - "2 Bradford City ... 25,136\n", - "3 Burton Albion ... 6,912\n", - "4 Bury ... 11,840\n", - "5 Chesterfield ... 10,400\n", - "6 Colchester United ... 10,105\n", - "7 Coventry City ... 32,500\n", - "8 Crewe Alexandra ... 10,066\n", - "9 Doncaster Rovers ... 15,231\n", - "10 Fleetwood Town ... 5,311\n", - "11 Gillingham ... 11,582\n", - "12 Millwall ... 20,146\n", - "13 Oldham Athletic ... 13,512\n", - "14 Peterborough United ... 14,319\n", - "15 Port Vale ... 18,947\n", - "16 Rochdale ... 10,249\n", - "17 Scunthorpe United ... 9,183\n", - "18 Sheffield United ... 32,702\n", - "19 Shrewsbury Town ... 9,875\n", - "\n", - "[20 rows x 4 columns]},\n", - " { 'answer': '7',\n", - " 'context': Resource Name ... Added\n", - "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", - "1 John M. Beasley House ... March 5 , 1996\n", - "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", - "3 Austin House ... February 5 , 1998\n", - "4 Reid-Woods House ... August 31 , 2000\n", - "5 Villa Serena Apartments ... September 29 , 2000\n", - "6 Paul M. Souder House ... November 2 , 2000\n", - "7 Stevens-Gilchrist House ... August 17 , 2001\n", - "\n", - "[8 rows x 3 columns]},\n", - " { 'answer': '19',\n", - " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", - "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", - "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", - "2 Cowen Park Bridge ... 15th Avenue NE\n", - "3 First Avenue South Bridge ... State Route 99\n", - "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", - "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", - "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", - "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", - "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", - "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", - "10 Magnolia Bridge ... W Garfield Street\n", - "11 Montlake Bridge ... State Route 513\n", - "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", - "13 Salmon Bay Bridge ... BNSF Railway\n", - "14 Ship Canal Bridge ... Interstate 5\n", - "15 Schmitz Park Bridge ... SW Admiral Way\n", - "16 Spokane Street Bridge ... SW Spokane Street\n", - "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", - "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", - "19 University Bridge ... Eastlake Avenue NE\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '8',\n", - " 'context': Location ... Comments\n", - "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", - "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", - "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", - "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", - "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", - "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", - "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", - "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", - "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", - "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", - "\n", - "[10 rows x 4 columns]}]\n" - ] - } - ], - "source": [ - "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", - "print_answers(prediction, details=\"minimum\")" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Open-Domain QA on Text and Tables\n", - "With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information.\n", - "\n", - "To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset." - ], - "metadata": { - "id": "8uMzl9Ml_D1B" - } - }, - { - "cell_type": "code", - "source": [ - "# Add 1,000 text passages from OTT-QA to our document store.\n", - "\n", - "\n", - "def read_ottqa_texts(filename):\n", - " processed_passages = []\n", - " with open(filename) as passages:\n", - " passages = json.load(passages)\n", - " for title, content in passages.items():\n", - " title = title[6:]\n", - " title = title.replace(\"_\", \" \")\n", - " document = Document(content=content, content_type=\"text\", meta={\"title\": title})\n", - " processed_passages.append(document)\n", - "\n", - " return processed_passages\n", - "\n", - "\n", - "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", - "document_store.write_documents(passages, index=document_index)" - ], - "metadata": { - "id": "4CBcIjIq_uFx" - }, - "execution_count": 16, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False)" - ], - "metadata": { - "id": "j1TaNF7SiKgH" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Pipeline for QA on Combination of Text and Tables\n", - "We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `\"text\"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `\"table\"` to a `TableReader`.\n", - "\n", - "To achieve this, we make use of two additional nodes:\n", - "- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `\"text\"` or `\"table\"`, respectively.\n", - "- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers." - ], - "metadata": { - "id": "c2sk_uNHj0DY" - } - }, - { - "cell_type": "code", - "source": [ - "from haystack.nodes import FARMReader, SplitDocumentList, JoinAnswers\n", - "\n", - "text_reader = FARMReader(\"deepset/roberta-base-squad2\")\n", - "# In order to get meaningful scores from the TableReader, use \"deepset/tapas-large-nq-hn-reader\" or\n", - "# \"deepset/tapas-large-nq-reader\" as TableReader models. The disadvantage of these models is, however,\n", - "# that they are not capable of doing aggregations over multiple table cells.\n", - "table_reader = TableReader(\"deepset/tapas-large-nq-hn-reader\")\n", - "split_documents = SplitDocumentList()\n", - "join_answers = JoinAnswers()" - ], - "metadata": { - "id": "Ej_j8Q3wlxXE" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "text_table_qa_pipeline = Pipeline()\n", - "text_table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", - "text_table_qa_pipeline.add_node(component=split_documents, name=\"SplitDocumentList\", inputs=[\"TableTextRetriever\"])\n", - "text_table_qa_pipeline.add_node(component=text_reader, name=\"TextReader\", inputs=[\"SplitDocumentList.output_1\"])\n", - "text_table_qa_pipeline.add_node(component=table_reader, name=\"TableReader\", inputs=[\"SplitDocumentList.output_2\"])\n", - "text_table_qa_pipeline.add_node(component=join_answers, name=\"JoinAnswers\", inputs=[\"TextReader\", \"TableReader\"])" - ], - "metadata": { - "id": "Zdq6JnF5m3aP" - }, - "execution_count": 54, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Let's have a look on the structure of the combined Table an Text QA pipeline.\n", - "from IPython import display\n", - "\n", - "text_table_qa_pipeline.draw()\n", - "display.Image(\"pipeline.png\")" - ], - "metadata": { - "id": "K4vH1ZEnniut", - "outputId": "5926d4ac-aa5a-41d3-df19-73950f88ea2c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 540 - } - }, - "execution_count": 55, - "outputs": [ + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "S4PGj1A6wKWu" + }, + "outputs": [], + "source": [ + "# In Colab / No Docker environments: Start Elasticsearch from source\n", + "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", + "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", + "! chown -R daemon:daemon elasticsearch-7.9.2\n", + "\n", + "import os\n", + "from subprocess import Popen, PIPE, STDOUT\n", + "\n", + "es_server = Popen(\n", + " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", + ")\n", + "# wait until ES has started\n", + "! sleep 30" + ] + }, { - "output_type": "execute_result", - "data": { - "image/png": "\n", - "text/plain": [ - "" + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RmxepXZtwQ0E" + }, + "outputs": [], + "source": [ + "# Connect to Elasticsearch\n", + "from haystack.document_stores import ElasticsearchDocumentStore\n", + "\n", + "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", + "document_index = \"document\"\n", + "document_store = ElasticsearchDocumentStore(\n", + " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", + ")" ] - }, - "metadata": {}, - "execution_count": 55 - } - ] - }, - { - "cell_type": "code", - "source": [ - "# Example query whose answer resides in a text passage\n", - "predictions = text_table_qa_pipeline.run(query=\"Who is Aleksandar Trifunovic?\")" - ], - "metadata": { - "id": "strPNduPoBLe" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# We can see both text passages and tables as contexts of the predicted answers.\n", - "print_answers(predictions, details=\"minimum\")" - ], - "metadata": { - "id": "9YiK75tSoOGA", - "outputId": "65362f83-4313-4ac1-f6ce-9b92522598c1", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 57, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fFh26LIlxldw" + }, + "source": [ + "## Add Tables to DocumentStore\n", + "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", + "\n", + "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nM63uwbd8zd6" + }, + "outputs": [], + "source": [ + "# Let's first fetch some tables that we want to query\n", + "# Here: 1000 tables from OTT-QA\n", + "from haystack.utils import fetch_archive_from_http\n", + "\n", + "doc_dir = \"data\"\n", + "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip\"\n", + "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SKjw2LuXxlGh", + "outputId": "92c67d24-d6fb-413e-8dd7-53075141d508" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Result ... Score\n", + "0 Winner ... 6-1 , 6-1\n", + "1 Winner ... 6-2 , 4-6 , 6-3\n", + "2 Winner ... 6-2 , 6-2\n", + "3 Runner-up ... 3-6 , 2-6\n", + "4 Winner ... 6-7 , 6-3 , 6-0\n", + "5 Winner ... 6-1 , 6-0\n", + "6 Winner ... 6-2 , 2-6 , 6-2\n", + "7 Winner ... 6-0 , 6-4\n", + "\n", + "[8 rows x 8 columns]\n", + "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" + ] + } + ], + "source": [ + "# Add the tables to the DocumentStore\n", + "\n", + "import json\n", + "from haystack import Document\n", + "import pandas as pd\n", + "\n", + "\n", + "def read_ottqa_tables(filename):\n", + " processed_tables = []\n", + " with open(filename) as tables:\n", + " tables = json.load(tables)\n", + " for key, table in tables.items():\n", + " current_columns = table[\"header\"]\n", + " current_rows = table[\"data\"]\n", + " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", + " current_doc_title = table[\"title\"]\n", + " current_section_title = table[\"section_title\"]\n", + " document = Document(\n", + " content=current_df,\n", + " content_type=\"table\",\n", + " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", + " id=key,\n", + " )\n", + " processed_tables.append(document)\n", + "\n", + " return processed_tables\n", + "\n", + "\n", + "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", + "document_store.write_documents(tables, index=document_index)\n", + "\n", + "# Showing content field and meta field of one of the Documents of content_type 'table'\n", + "print(tables[0].content)\n", + "print(tables[0].meta)" + ] + }, { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: Who is Aleksandar Trifunovic?\n", - "Answers:\n", - "[ { 'answer': 'a Serbian professional basketball coach and former player',\n", - " 'context': 'Aleksandar Trifunović ( ; born 30 May 1967 ) is a Serbian '\n", - " 'professional basketball coach and former player .'},\n", - " { 'answer': 'Johnny Höglin',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': 'Ivar Eriksen',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': 'Magne Thomassen',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': '5',\n", - " 'context': Position # Player Moving from\n", - "0 F 12 Nikola Kalinić Radnički Kragujevac\n", - "1 SF 6 Nemanja Dangubić Mega Vizura\n", - "2 C 33 Maik Zirbes Brose Baskets\n", - "3 PG 3 Marcus Williams Lokomotiv Kuban\n", - "4 PG 24 Stefan Jović Radnički Kragujevac\n", - "5 C 14 Đorđe Kaplanović FMP\n", - "6 SF 5 Nikola Čvorović FMP\n", - "7 SG 7 Aleksandar Aranitović Crvena zvezda U18\n", - "8 SG 20 Aleksa Radanov Crvena zvezda U18},\n", - " { 'answer': 'Vasile Sărucan',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Belgium',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Poland',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Hafþór Júlíus Björnsson',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Estonia',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Iceland',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", - " 'professional ice hockey defenceman',\n", - " 'context': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", - " 'professional ice hockey defenceman . He is currently '\n", - " 'playing with Piráti Chomutov of the Czech Extral'},\n", - " { 'answer': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", - " 'is a Russian professional football player',\n", - " 'context': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", - " 'is a Russian professional football player . In 2010 , he '\n", - " 'played for FC Angusht Nazran in the'}]\n" - ] + "cell_type": "markdown", + "metadata": { + "id": "hmQC1sDmw3d7" + }, + "source": [ + "## Initalize Retriever, Reader, & Pipeline\n", + "\n", + "### Retriever\n", + "\n", + "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", + "They use some simple but fast algorithm.\n", + "\n", + "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", + "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", + "\n", + "**Alternatives:**\n", + "\n", + "- `ElasticsearchRetriever` that uses BM25 algorithm\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EY_qvdV6wyK5" + }, + "outputs": [], + "source": [ + "from haystack.nodes.retriever import TableTextRetriever\n", + "\n", + "retriever = TableTextRetriever(\n", + " document_store=document_store,\n", + " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", + " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", + " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", + " embed_meta_fields=[\"title\", \"section_title\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jasi1RM2zIJ7" + }, + "outputs": [], + "source": [ + "# Add table embeddings to the tables in DocumentStore\n", + "document_store.update_embeddings(retriever=retriever)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "XM-ijy6Zz11L" + }, + "outputs": [], + "source": [ + "## Alternative: ElasticsearchRetriever\n", + "# from haystack.nodes.retriever import ElasticsearchRetriever\n", + "# retriever = ElasticsearchRetriever(document_store=document_store)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "YHfQWxVI0N2e", + "outputId": "1d8dc4d2-a184-489e-defa-d445d76c458f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the Retriever\n", + "from haystack.utils import print_documents\n", + "\n", + "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", + "# Get highest scored table\n", + "print(retrieved_tables[0].content)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zbwkXScm2-gy" + }, + "source": [ + "### Reader\n", + "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", + "\n", + "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4APcRoio2RxG" + }, + "outputs": [], + "source": [ + "from haystack.nodes import TableReader\n", + "\n", + "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ILuAXkyN4F7x", + "outputId": "4bd19dcb-df8e-4a4d-b9d2-d34650e9e5c2" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", + "\n", + "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", + "print(table_doc.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ilbsecgA4vfN", + "outputId": "f845f43e-43e8-48fe-d0ef-91b17a5eff0e" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ ]\n" + ] + } + ], + "source": [ + "from haystack.utils import print_answers\n", + "\n", + "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", + "print_answers(prediction, details=\"all\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jkAYNMb7R9qu" + }, + "source": [ + "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", + "\n", + "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "It8XYT2ZTVJs", + "outputId": "7d31af60-e04a-485d-f0ee-f29592b03928" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Predicted answer: 12\n", + "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" + ] + } + ], + "source": [ + "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", + "print(f\"Meta field: {prediction['answers'][0].meta}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgmG7pzL5ceh" + }, + "source": [ + "### Pipeline\n", + "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", + "\n", + "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "G-aZZvyv4-Mf" + }, + "outputs": [], + "source": [ + "# Initialize pipeline\n", + "from haystack import Pipeline\n", + "\n", + "table_qa_pipeline = Pipeline()\n", + "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "m8evexnW6dev", + "outputId": "40514084-f516-4f13-fb48-6a55cb578366" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ { 'answer': '12',\n", + " 'context': Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '7',\n", + " 'context': Building or structure ... Listing\n", + "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", + "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", + "2 Jensen Block ... Seattle landmark\n", + "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", + "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", + "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", + "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", + "\n", + "[7 rows x 3 columns]},\n", + " { 'answer': '8',\n", + " 'context': Years Venue Location\n", + "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", + "1 1987-88 Navesink Country Club Middletown , New Jersey\n", + "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", + "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", + "4 1982 Wykagyl Country Club New Rochelle , New York\n", + "5 1981 Ridgewood Country Club Paramus , New Jersey\n", + "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", + "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", + " { 'answer': '8',\n", + " 'context': Model Specification ... Prime mover Power output\n", + "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", + "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", + "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", + "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", + "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", + "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", + "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", + "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", + "\n", + "[8 rows x 7 columns]},\n", + " { 'answer': '10',\n", + " 'context': Name or designation ... Notes\n", + "0 Aluminum Overcast ... One of only ten flyable B-17s\n", + "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", + "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", + "3 Douglas DC-7B N836D ... \n", + "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", + "5 FIFI ... One of only two B-29s flying\n", + "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", + "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", + "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", + "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", + "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", + "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", + "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", + "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", + "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", + "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", + "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", + "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", + "18 Yankee Lady ... Flyable\n", + "\n", + "[19 rows x 6 columns]},\n", + " { 'answer': '13',\n", + " 'context': N Year Country ... Link Remark K\n", + "0 003+ 2013 INDIA ... LK RK K\n", + "1 005 2006 USA ... LK RK K\n", + "2 010 2014 ZAF ... LK RK K\n", + "3 020 2010 USA ... LK RK K\n", + "4 030 201 ? USA ... LK RK K\n", + "5 040 2007 USA ... LK RK K\n", + "6 042 2004 USA ... LK Only G-S With Large Battery K\n", + "7 050 201 ? USA ... LK RK K\n", + "8 100 20 ? ? USA ... LK RK K\n", + "9 200 20 ? ? USA ... LK RK K\n", + "10 300 2013 EUR ... LK RK K\n", + "11 400 20 ? ? USA ... LK RK K\n", + "12 995 20 ? ? USA ... LK RK K\n", + "\n", + "[13 rows x 12 columns]},\n", + " { 'answer': '5',\n", + " 'context': Team ... Capacity\n", + "0 Barnsley ... 23,009\n", + "1 Blackpool ... 16,750\n", + "2 Bradford City ... 25,136\n", + "3 Burton Albion ... 6,912\n", + "4 Bury ... 11,840\n", + "5 Chesterfield ... 10,400\n", + "6 Colchester United ... 10,105\n", + "7 Coventry City ... 32,500\n", + "8 Crewe Alexandra ... 10,066\n", + "9 Doncaster Rovers ... 15,231\n", + "10 Fleetwood Town ... 5,311\n", + "11 Gillingham ... 11,582\n", + "12 Millwall ... 20,146\n", + "13 Oldham Athletic ... 13,512\n", + "14 Peterborough United ... 14,319\n", + "15 Port Vale ... 18,947\n", + "16 Rochdale ... 10,249\n", + "17 Scunthorpe United ... 9,183\n", + "18 Sheffield United ... 32,702\n", + "19 Shrewsbury Town ... 9,875\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': '7',\n", + " 'context': Resource Name ... Added\n", + "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", + "1 John M. Beasley House ... March 5 , 1996\n", + "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", + "3 Austin House ... February 5 , 1998\n", + "4 Reid-Woods House ... August 31 , 2000\n", + "5 Villa Serena Apartments ... September 29 , 2000\n", + "6 Paul M. Souder House ... November 2 , 2000\n", + "7 Stevens-Gilchrist House ... August 17 , 2001\n", + "\n", + "[8 rows x 3 columns]},\n", + " { 'answer': '19',\n", + " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", + "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", + "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", + "2 Cowen Park Bridge ... 15th Avenue NE\n", + "3 First Avenue South Bridge ... State Route 99\n", + "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", + "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", + "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", + "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", + "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", + "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", + "10 Magnolia Bridge ... W Garfield Street\n", + "11 Montlake Bridge ... State Route 513\n", + "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", + "13 Salmon Bay Bridge ... BNSF Railway\n", + "14 Ship Canal Bridge ... Interstate 5\n", + "15 Schmitz Park Bridge ... SW Admiral Way\n", + "16 Spokane Street Bridge ... SW Spokane Street\n", + "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", + "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", + "19 University Bridge ... Eastlake Avenue NE\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '8',\n", + " 'context': Location ... Comments\n", + "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", + "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", + "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", + "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", + "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", + "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", + "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", + "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", + "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", + "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", + "\n", + "[10 rows x 4 columns]}]\n" + ] + } + ], + "source": [ + "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", + "print_answers(prediction, details=\"minimum\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Open-Domain QA on Text and Tables\n", + "With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information.\n", + "\n", + "To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset." + ], + "metadata": { + "id": "8uMzl9Ml_D1B" + } + }, + { + "cell_type": "code", + "source": [ + "# Add 1,000 text passages from OTT-QA to our document store.\n", + "\n", + "def read_ottqa_texts(filename):\n", + " processed_passages = []\n", + " with open(filename) as passages:\n", + " passages = json.load(passages)\n", + " for title, content in passages.items():\n", + " title = title[6:]\n", + " title = title.replace(\"_\", \" \")\n", + " document = Document(\n", + " content=content,\n", + " content_type=\"text\",\n", + " meta={\"title\": title}\n", + " )\n", + " processed_passages.append(document)\n", + " \n", + " return processed_passages\n", + "\n", + "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", + "document_store.write_documents(passages, index=document_index)" + ], + "metadata": { + "id": "4CBcIjIq_uFx" + }, + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False)" + ], + "metadata": { + "id": "j1TaNF7SiKgH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Pipeline for QA on Combination of Text and Tables\n", + "We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `\"text\"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `\"table\"` to a `TableReader`.\n", + "\n", + "To achieve this, we make use of two additional nodes:\n", + "- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `\"text\"` or `\"table\"`, respectively.\n", + "- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers." + ], + "metadata": { + "id": "c2sk_uNHj0DY" + } + }, + { + "cell_type": "code", + "source": [ + "from haystack.nodes import FARMReader, RouteDocuments, JoinAnswers\n", + "\n", + "text_reader = FARMReader(\"deepset/roberta-base-squad2\")\n", + "# In order to get meaningful scores from the TableReader, use \"deepset/tapas-large-nq-hn-reader\" or\n", + "# \"deepset/tapas-large-nq-reader\" as TableReader models. The disadvantage of these models is, however,\n", + "# that they are not capable of doing aggregations over multiple table cells.\n", + "table_reader = TableReader(\"deepset/tapas-large-nq-hn-reader\")\n", + "route_documents = RouteDocuments()\n", + "join_answers = JoinAnswers()" + ], + "metadata": { + "id": "Ej_j8Q3wlxXE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "text_table_qa_pipeline = Pipeline()\n", + "text_table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "text_table_qa_pipeline.add_node(component=route_documents, name=\"RouteDocuments\", inputs=[\"TableTextRetriever\"])\n", + "text_table_qa_pipeline.add_node(component=text_reader, name=\"TextReader\", inputs=[\"RouteDocuments.output_1\"])\n", + "text_table_qa_pipeline.add_node(component=table_reader, name=\"TableReader\", inputs=[\"RouteDocuments.output_2\"])\n", + "text_table_qa_pipeline.add_node(component=join_answers, name=\"JoinAnswers\", inputs=[\"TextReader\", \"TableReader\"])" + ], + "metadata": { + "id": "Zdq6JnF5m3aP" + }, + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's have a look on the structure of the combined Table an Text QA pipeline.\n", + "from IPython import display\n", + "\n", + "text_table_qa_pipeline.draw()\n", + "display.Image(\"pipeline.png\")" + ], + "metadata": { + "id": "K4vH1ZEnniut", + "outputId": "85aa17a8-227d-40e4-c8c0-5d0532faa47a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 540 + } + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "image/png": "\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a text passage\n", + "predictions = text_table_qa_pipeline.run(query=\"Who is Aleksandar Trifunovic?\")" + ], + "metadata": { + "id": "strPNduPoBLe" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "9YiK75tSoOGA", + "outputId": "bd52f841-3846-441f-dd6f-53b02111691e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: Who is Aleksandar Trifunovic?\n", + "Answers:\n", + "[ { 'answer': 'a Serbian professional basketball coach and former player',\n", + " 'context': 'Aleksandar Trifunović ( ; born 30 May 1967 ) is a Serbian '\n", + " 'professional basketball coach and former player .'},\n", + " { 'answer': 'Johnny Höglin',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Ivar Eriksen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Magne Thomassen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': '5',\n", + " 'context': Position # Player Moving from\n", + "0 F 12 Nikola Kalinić Radnički Kragujevac\n", + "1 SF 6 Nemanja Dangubić Mega Vizura\n", + "2 C 33 Maik Zirbes Brose Baskets\n", + "3 PG 3 Marcus Williams Lokomotiv Kuban\n", + "4 PG 24 Stefan Jović Radnički Kragujevac\n", + "5 C 14 Đorđe Kaplanović FMP\n", + "6 SF 5 Nikola Čvorović FMP\n", + "7 SG 7 Aleksandar Aranitović Crvena zvezda U18\n", + "8 SG 20 Aleksa Radanov Crvena zvezda U18},\n", + " { 'answer': 'Vasile Sărucan',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Belgium',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Poland',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Hafþór Júlíus Björnsson',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Estonia',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Iceland',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman',\n", + " 'context': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman . He is currently '\n", + " 'playing with Piráti Chomutov of the Czech Extral'},\n", + " { 'answer': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player',\n", + " 'context': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player . In 2010 , he '\n", + " 'played for FC Angusht Nazran in the'}]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a table\n", + "predictions = text_table_qa_pipeline.run(query=\"What is Cuba's national tree?\")" + ], + "metadata": { + "id": "QYOHDSmLpzEg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "4kw53uWep3zj", + "outputId": "b332cc17-3cb8-4e20-d79d-bb4cf656f277", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: What is Cuba's national tree?\n", + "Answers:\n", + "[ { 'answer': 'Cuban royal palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Quercus sagraeana , the Cuban oak',\n", + " 'context': 'Quercus sagraeana , the Cuban oak , is a medium-sized '\n", + " 'evergreen tree native to western Cuba in the Cuban pine '\n", + " 'forests ecoregion .'},\n", + " { 'answer': \"Glenn O'Brien\",\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Belize',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Palmyra palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Guadeloupe',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Basse-Terre',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'East Caribbean dollar',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Jenkins',\n", + " 'context': NRHP reference number ... County\n", + "0 72000402 ... Wilkes\n", + "1 ... Meriwether\n", + "2 ... Bartow\n", + "3 71000280 ... Jenkins\n", + "4 ... Chatham\n", + "5 89002015 ... Thomas\n", + "6 ... Glynn\n", + "7 75000615 ... Walton\n", + "8 84001156 ... Sumter\n", + "9 79000713 ... Cobb\n", + "10 82002491 ... Twiggs\n", + "11 74000703 ... Taliaferro\n", + "12 80001039 ... Floyd\n", + "13 90000805 ... Gwinnett\n", + "14 73000620 ... Decatur\n", + "15 79000731 ... Houston\n", + "16 95000741 ... Grady\n", + "17 97000559 ... Greene\n", + "18 74000662 ... Brooks\n", + "19 75000616 ... Washington\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': \"Primula farinosa , the bird's-eye primrose\",\n", + " 'context': \"Primula farinosa , the bird's-eye primrose , is a small \"\n", + " 'perennial plant in the family Primulaceae , native to '\n", + " 'Northern Europe and northern Asia , and '},\n", + " { 'answer': 'Poospiza',\n", + " 'context': 'Poospiza is a genus of finch-like tanagers found in both '\n", + " 'the South American lowlands and the Andes mountains . '\n", + " 'Generally they are arboreal feeders in '},\n", + " { 'answer': 'golden-crowned sparrow',\n", + " 'context': 'The golden-crowned sparrow ( Zonotrichia atricapilla ) is '\n", + " 'a large American sparrow found in the western part of '\n", + " 'North America .'},\n", + " { 'answer': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush',\n", + " 'context': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush ) , with unusually large leaves and '\n", + " 'flower heads . It is a rare variety '},\n", + " { 'answer': 'rain',\n", + " 'context': 's and operates hotels at Machu Picchu Natural Reserve , '\n", + " 'the southeastern rain forest of the Amazon in Puerto '\n", + " 'Maldonado , Tambopata , the Sacred Valley'}]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RyeK3s28_X1C" + }, + "source": [ + "## About us\n", + "\n", + "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", + "\n", + "We bring NLP to the industry via open source! \n", + "Our focus: Industry specific language models & large scale QA systems. \n", + " \n", + "Some of our other work: \n", + "- [German BERT](https://deepset.ai/german-bert)\n", + "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", + "- [FARM](https://github.com/deepset-ai/FARM)\n", + "\n", + "Get in touch:\n", + "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", + "\n", + "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" + ] } - ] - }, - { - "cell_type": "code", - "source": [ - "# Example query whose answer resides in a table\n", - "predictions = text_table_qa_pipeline.run(query=\"What is Cuba's national tree?\")" - ], - "metadata": { - "id": "QYOHDSmLpzEg" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# We can see both text passages and tables as contexts of the predicted answers.\n", - "print_answers(predictions, details=\"minimum\")" - ], - "metadata": { - "id": "4kw53uWep3zj", - "outputId": "6316f9af-ef44-426a-ec42-75a3c0d293a1", + ], + "metadata": { + "accelerator": "GPU", "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 67, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: What is Cuba's national tree?\n", - "Answers:\n", - "[ { 'answer': 'Cuban royal palm',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Quercus sagraeana , the Cuban oak',\n", - " 'context': 'Quercus sagraeana , the Cuban oak , is a medium-sized '\n", - " 'evergreen tree native to western Cuba in the Cuban pine '\n", - " 'forests ecoregion .'},\n", - " { 'answer': \"Glenn O'Brien\",\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Guy Oseary',\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Guy Oseary',\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Belize',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Palmyra palm',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Guadeloupe',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'Basse-Terre',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'East Caribbean dollar',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'Jenkins',\n", - " 'context': NRHP reference number ... County\n", - "0 72000402 ... Wilkes\n", - "1 ... Meriwether\n", - "2 ... Bartow\n", - "3 71000280 ... Jenkins\n", - "4 ... Chatham\n", - "5 89002015 ... Thomas\n", - "6 ... Glynn\n", - "7 75000615 ... Walton\n", - "8 84001156 ... Sumter\n", - "9 79000713 ... Cobb\n", - "10 82002491 ... Twiggs\n", - "11 74000703 ... Taliaferro\n", - "12 80001039 ... Floyd\n", - "13 90000805 ... Gwinnett\n", - "14 73000620 ... Decatur\n", - "15 79000731 ... Houston\n", - "16 95000741 ... Grady\n", - "17 97000559 ... Greene\n", - "18 74000662 ... Brooks\n", - "19 75000616 ... Washington\n", - "\n", - "[20 rows x 4 columns]},\n", - " { 'answer': \"Primula farinosa , the bird's-eye primrose\",\n", - " 'context': \"Primula farinosa , the bird's-eye primrose , is a small \"\n", - " 'perennial plant in the family Primulaceae , native to '\n", - " 'Northern Europe and northern Asia , and '},\n", - " { 'answer': 'Poospiza',\n", - " 'context': 'Poospiza is a genus of finch-like tanagers found in both '\n", - " 'the South American lowlands and the Andes mountains . '\n", - " 'Generally they are arboreal feeders in '},\n", - " { 'answer': 'golden-crowned sparrow',\n", - " 'context': 'The golden-crowned sparrow ( Zonotrichia atricapilla ) is '\n", - " 'a large American sparrow found in the western part of '\n", - " 'North America .'},\n", - " { 'answer': 'Banksia sessilis var . cordata is a variety of Banksia '\n", - " 'sessilis ( Parrot Bush',\n", - " 'context': 'Banksia sessilis var . cordata is a variety of Banksia '\n", - " 'sessilis ( Parrot Bush ) , with unusually large leaves and '\n", - " 'flower heads . It is a rare variety '},\n", - " { 'answer': 'rain',\n", - " 'context': 's and operates hotels at Machu Picchu Natural Reserve , '\n", - " 'the southeastern rain forest of the Amazon in Puerto '\n", - " 'Maldonado , Tambopata , the Sacred Valley'}]\n" - ] + "name": "Tutorial15_TableQA.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RyeK3s28_X1C" - }, - "source": [ - "## About us\n", - "\n", - "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", - "\n", - "We bring NLP to the industry via open source! \n", - "Our focus: Industry specific language models & large scale QA systems. \n", - " \n", - "Some of our other work: \n", - "- [German BERT](https://deepset.ai/german-bert)\n", - "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", - "- [FARM](https://github.com/deepset-ai/FARM)\n", - "\n", - "Get in touch:\n", - "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", - "\n", - "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "name": "Tutorial15_TableQA.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 0 + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file diff --git a/tutorials/Tutorial15_TableQA.py b/tutorials/Tutorial15_TableQA.py index 0ae1bbb3d0..b77e719deb 100644 --- a/tutorials/Tutorial15_TableQA.py +++ b/tutorials/Tutorial15_TableQA.py @@ -6,7 +6,7 @@ from haystack.document_stores import ElasticsearchDocumentStore from haystack import Document, Pipeline from haystack.nodes.retriever import TableTextRetriever -from haystack.nodes import TableReader, FARMReader, SplitDocumentList, JoinAnswers +from haystack.nodes import TableReader, FARMReader, RouteDocuments, JoinAnswers def tutorial15_tableqa(): @@ -126,14 +126,14 @@ def read_ottqa_tables(filename): # "deepset/tapas-large-nq-reader" as TableReader models. The disadvantage of these models is, however, # that they are not capable of doing aggregations over multiple table cells. table_reader = TableReader("deepset/tapas-large-nq-hn-reader") - split_documents = SplitDocumentList() + route_documents = RouteDocuments() join_answers = JoinAnswers() text_table_qa_pipeline = Pipeline() text_table_qa_pipeline.add_node(component=retriever, name="TableTextRetriever", inputs=["Query"]) - text_table_qa_pipeline.add_node(component=split_documents, name="SplitDocumentList", inputs=["TableTextRetriever"]) - text_table_qa_pipeline.add_node(component=text_reader, name="TextReader", inputs=["SplitDocumentList.output_1"]) - text_table_qa_pipeline.add_node(component=table_reader, name="TableReader", inputs=["SplitDocumentList.output_2"]) + text_table_qa_pipeline.add_node(component=route_documents, name="RouteDocuments", inputs=["TableTextRetriever"]) + text_table_qa_pipeline.add_node(component=text_reader, name="TextReader", inputs=["RouteDocuments.output_1"]) + text_table_qa_pipeline.add_node(component=table_reader, name="TableReader", inputs=["RouteDocuments.output_2"]) text_table_qa_pipeline.add_node(component=join_answers, name="JoinAnswers", inputs=["TextReader", "TableReader"]) # Example query whose answer resides in a text passage From 13b0297ced266e7c6c251e508688569d26c98dce Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Tue, 1 Mar 2022 16:50:17 +0100 Subject: [PATCH 12/14] Add test for JoinAnswers --- haystack/nodes/other/join_answers.py | 9 ++++---- test/test_pipeline.py | 34 ++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/haystack/nodes/other/join_answers.py b/haystack/nodes/other/join_answers.py index db2b850e9f..e96652dfcf 100644 --- a/haystack/nodes/other/join_answers.py +++ b/haystack/nodes/other/join_answers.py @@ -16,7 +16,7 @@ def __init__( :param join_mode: `"concatenate"` to combine documents from multiple `Reader`s. `"merge"` to aggregate scores of individual `Answer`s. :param weights: A node-wise list (length of list must be equal to the number of input nodes) of weights for - adjusting `Answer` scores when using the `"merge"` join_mode. By default, equal weight is assignef to each + adjusting `Answer` scores when using the `"merge"` join_mode. By default, equal weight is assigned to each `Reader` score. This parameter is not compatible with the `"concatenate"` join_mode. :param top_k_join: Limit `Answer`s to top_k based on the resulting scored of the join. """ @@ -36,16 +36,17 @@ def __init__( def run(self, inputs: List[Dict], top_k_join: Optional[int] = None) -> Tuple[Dict, str]: # type: ignore reader_results = [inp["answers"] for inp in inputs] + if not top_k_join: + top_k_join = self.top_k_join + if self.join_mode == "concatenate": concatenated_answers = [answer for cur_reader_result in reader_results for answer in cur_reader_result] - concatenated_answers = sorted(concatenated_answers, reverse=True) + concatenated_answers = sorted(concatenated_answers, reverse=True)[:top_k_join] return {"answers": concatenated_answers, "labels": inputs[0].get("labels", None)}, "output_1" elif self.join_mode == "merge": merged_answers = self._merge_answers(reader_results) - if not top_k_join: - top_k_join = self.top_k_join if self.top_k_join is not None else len(merged_answers) merged_answers = merged_answers[:top_k_join] return {"answers": merged_answers, "labels": inputs[0].get("labels", None)}, "output_1" diff --git a/test/test_pipeline.py b/test/test_pipeline.py index cb11af97bb..94799c3f81 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -8,7 +8,7 @@ import pytest import responses -from haystack import __version__, Document +from haystack import __version__, Document, Answer, JoinAnswers from haystack.document_stores.base import BaseDocumentStore from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore @@ -19,7 +19,7 @@ from haystack.nodes.retriever.sparse import ElasticsearchRetriever from haystack.pipelines import Pipeline, DocumentSearchPipeline, RootNode, ExtractiveQAPipeline from haystack.pipelines.base import _PipelineCodeGen -from haystack.nodes import DensePassageRetriever, EmbeddingRetriever, SplitDocumentList +from haystack.nodes import DensePassageRetriever, EmbeddingRetriever, RouteDocuments from conftest import MOCK_DC, DC_API_ENDPOINT, DC_API_KEY, DC_TEST_INDEX, SAMPLES_PATH, deepset_cloud_fixture @@ -1043,8 +1043,8 @@ def test_documentsearch_document_store_authentication(retriever_with_docs, docum assert kwargs["headers"] == auth_headers -def test_split_document_list_content_type(test_docs_xs): - # Test splitting by content_type +def test_route_documents_by_content_type(): + # Test routing by content_type docs = [ Document(content="text document", content_type="text"), Document( @@ -1053,17 +1053,19 @@ def test_split_document_list_content_type(test_docs_xs): ), ] - split_documents = SplitDocumentList() - result, _ = split_documents.run(documents=docs) + route_documents = RouteDocuments() + result, _ = route_documents.run(documents=docs) assert len(result["output_1"]) == 1 assert len(result["output_2"]) == 1 assert result["output_1"][0].content_type == "text" assert result["output_2"][0].content_type == "table" - # Test splitting by metadata field + +def test_route_documents_by_metafield(test_docs_xs): + # Test routing by metadata field docs = [Document.from_dict(doc) if isinstance(doc, dict) else doc for doc in test_docs_xs] - split_documents = SplitDocumentList(split_by="meta_field", metadata_values=["test1", "test3", "test5"]) - result, _ = split_documents.run(docs) + route_documents = RouteDocuments(split_by="meta_field", metadata_values=["test1", "test3", "test5"]) + result, _ = route_documents.run(docs) assert len(result["output_1"]) == 1 assert len(result["output_2"]) == 1 assert len(result["output_3"]) == 1 @@ -1072,6 +1074,20 @@ def test_split_document_list_content_type(test_docs_xs): assert result["output_3"][0].meta["meta_field"] == "test5" +@pytest.mark.parametrize("join_mode", ["concatenate", "merge"]) +def test_join_answers_concatenate(join_mode): + inputs =[{"answers": [Answer(answer="answer 1", score=0.7)]}, {"answers": [Answer(answer="answer 2", score=0.8)]}] + + join_answers = JoinAnswers(join_mode=join_mode) + result, _ = join_answers.run(inputs) + assert len(result["answers"]) == 2 + assert result["answers"] == sorted(result["answers"], reverse=True) + + result, _ = join_answers.run(inputs, top_k_join=1) + assert len(result["answers"]) == 1 + assert result["answers"][0].answer == "answer 2" + + def clean_faiss_document_store(): if Path("existing_faiss_document_store").exists(): os.remove("existing_faiss_document_store") From a6042b6ca8aa2d36a0e0630bf8b9027f27792d5a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 1 Mar 2022 15:53:05 +0000 Subject: [PATCH 13/14] Update Documentation & Code Style --- docs/_src/tutorials/tutorials/15.md | 10 +- test/test_pipeline.py | 2 +- tutorials/Tutorial15_TableQA.ipynb | 2674 +++++++++++++-------------- 3 files changed, 1342 insertions(+), 1344 deletions(-) diff --git a/docs/_src/tutorials/tutorials/15.md b/docs/_src/tutorials/tutorials/15.md index 3b61b00a0f..b2c606c5f5 100644 --- a/docs/_src/tutorials/tutorials/15.md +++ b/docs/_src/tutorials/tutorials/15.md @@ -288,14 +288,14 @@ To achieve this, we make use of two additional nodes: ```python -from haystack.nodes import FARMReader, SplitDocumentList, JoinAnswers +from haystack.nodes import FARMReader, RouteDocuments, JoinAnswers text_reader = FARMReader("deepset/roberta-base-squad2") # In order to get meaningful scores from the TableReader, use "deepset/tapas-large-nq-hn-reader" or # "deepset/tapas-large-nq-reader" as TableReader models. The disadvantage of these models is, however, # that they are not capable of doing aggregations over multiple table cells. table_reader = TableReader("deepset/tapas-large-nq-hn-reader") -split_documents = SplitDocumentList() +route_documents = RouteDocuments() join_answers = JoinAnswers() ``` @@ -303,9 +303,9 @@ join_answers = JoinAnswers() ```python text_table_qa_pipeline = Pipeline() text_table_qa_pipeline.add_node(component=retriever, name="TableTextRetriever", inputs=["Query"]) -text_table_qa_pipeline.add_node(component=split_documents, name="SplitDocumentList", inputs=["TableTextRetriever"]) -text_table_qa_pipeline.add_node(component=text_reader, name="TextReader", inputs=["SplitDocumentList.output_1"]) -text_table_qa_pipeline.add_node(component=table_reader, name="TableReader", inputs=["SplitDocumentList.output_2"]) +text_table_qa_pipeline.add_node(component=route_documents, name="RouteDocuments", inputs=["TableTextRetriever"]) +text_table_qa_pipeline.add_node(component=text_reader, name="TextReader", inputs=["RouteDocuments.output_1"]) +text_table_qa_pipeline.add_node(component=table_reader, name="TableReader", inputs=["RouteDocuments.output_2"]) text_table_qa_pipeline.add_node(component=join_answers, name="JoinAnswers", inputs=["TextReader", "TableReader"]) ``` diff --git a/test/test_pipeline.py b/test/test_pipeline.py index 94799c3f81..f36e0faeb1 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -1076,7 +1076,7 @@ def test_route_documents_by_metafield(test_docs_xs): @pytest.mark.parametrize("join_mode", ["concatenate", "merge"]) def test_join_answers_concatenate(join_mode): - inputs =[{"answers": [Answer(answer="answer 1", score=0.7)]}, {"answers": [Answer(answer="answer 2", score=0.8)]}] + inputs = [{"answers": [Answer(answer="answer 1", score=0.7)]}, {"answers": [Answer(answer="answer 2", score=0.8)]}] join_answers = JoinAnswers(join_mode=join_mode) result, _ = join_answers.run(inputs) diff --git a/tutorials/Tutorial15_TableQA.ipynb b/tutorials/Tutorial15_TableQA.ipynb index 75a326922b..d448118cef 100644 --- a/tutorials/Tutorial15_TableQA.ipynb +++ b/tutorials/Tutorial15_TableQA.ipynb @@ -1,1359 +1,1357 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "DeAkZwDhufYA" - }, - "source": [ - "# Open-Domain QA on Tables\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", - "\n", - "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vbR3bETlvi-3" - }, - "source": [ - "### Prepare environment\n", - "\n", - "#### Colab: Enable the GPU runtime\n", - "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", - "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", - "\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HW66x0rfujyO" - }, - "outputs": [], - "source": [ - "# Make sure you have a GPU running\n", - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_ZXoyhOAvn7M" - }, - "outputs": [], - "source": [ - "# Install the latest release of Haystack in your own environment\n", - "#! pip install farm-haystack\n", - "\n", - "# Install the latest master of Haystack\n", - "!pip install --upgrade pip\n", - "!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab]\n", - "\n", - "# The TaPAs-based TableReader requires the torch-scatter library\n", - "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", - "\n", - "# Install pygraphviz for visualization of Pipelines\n", - "!apt install libgraphviz-dev\n", - "!pip install pygraphviz" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K_XJhluXwF5_" - }, - "source": [ - "### Start an Elasticsearch server\n", - "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "frDqgzK7v2i1" - }, - "outputs": [], - "source": [ - "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", - "from haystack.utils import launch_es\n", - "\n", - "launch_es()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "id": "S4PGj1A6wKWu" - }, - "outputs": [], - "source": [ - "# In Colab / No Docker environments: Start Elasticsearch from source\n", - "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", - "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", - "! chown -R daemon:daemon elasticsearch-7.9.2\n", - "\n", - "import os\n", - "from subprocess import Popen, PIPE, STDOUT\n", - "\n", - "es_server = Popen(\n", - " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", - ")\n", - "# wait until ES has started\n", - "! sleep 30" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RmxepXZtwQ0E" - }, - "outputs": [], - "source": [ - "# Connect to Elasticsearch\n", - "from haystack.document_stores import ElasticsearchDocumentStore\n", - "\n", - "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", - "document_index = \"document\"\n", - "document_store = ElasticsearchDocumentStore(\n", - " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fFh26LIlxldw" - }, - "source": [ - "## Add Tables to DocumentStore\n", - "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", - "\n", - "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nM63uwbd8zd6" - }, - "outputs": [], - "source": [ - "# Let's first fetch some tables that we want to query\n", - "# Here: 1000 tables from OTT-QA\n", - "from haystack.utils import fetch_archive_from_http\n", - "\n", - "doc_dir = \"data\"\n", - "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip\"\n", - "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "SKjw2LuXxlGh", - "outputId": "92c67d24-d6fb-413e-8dd7-53075141d508" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Result ... Score\n", - "0 Winner ... 6-1 , 6-1\n", - "1 Winner ... 6-2 , 4-6 , 6-3\n", - "2 Winner ... 6-2 , 6-2\n", - "3 Runner-up ... 3-6 , 2-6\n", - "4 Winner ... 6-7 , 6-3 , 6-0\n", - "5 Winner ... 6-1 , 6-0\n", - "6 Winner ... 6-2 , 2-6 , 6-2\n", - "7 Winner ... 6-0 , 6-4\n", - "\n", - "[8 rows x 8 columns]\n", - "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" - ] - } - ], - "source": [ - "# Add the tables to the DocumentStore\n", - "\n", - "import json\n", - "from haystack import Document\n", - "import pandas as pd\n", - "\n", - "\n", - "def read_ottqa_tables(filename):\n", - " processed_tables = []\n", - " with open(filename) as tables:\n", - " tables = json.load(tables)\n", - " for key, table in tables.items():\n", - " current_columns = table[\"header\"]\n", - " current_rows = table[\"data\"]\n", - " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", - " current_doc_title = table[\"title\"]\n", - " current_section_title = table[\"section_title\"]\n", - " document = Document(\n", - " content=current_df,\n", - " content_type=\"table\",\n", - " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", - " id=key,\n", - " )\n", - " processed_tables.append(document)\n", - "\n", - " return processed_tables\n", - "\n", - "\n", - "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", - "document_store.write_documents(tables, index=document_index)\n", - "\n", - "# Showing content field and meta field of one of the Documents of content_type 'table'\n", - "print(tables[0].content)\n", - "print(tables[0].meta)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hmQC1sDmw3d7" - }, - "source": [ - "## Initalize Retriever, Reader, & Pipeline\n", - "\n", - "### Retriever\n", - "\n", - "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", - "They use some simple but fast algorithm.\n", - "\n", - "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", - "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", - "\n", - "**Alternatives:**\n", - "\n", - "- `ElasticsearchRetriever` that uses BM25 algorithm\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EY_qvdV6wyK5" - }, - "outputs": [], - "source": [ - "from haystack.nodes.retriever import TableTextRetriever\n", - "\n", - "retriever = TableTextRetriever(\n", - " document_store=document_store,\n", - " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", - " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", - " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", - " embed_meta_fields=[\"title\", \"section_title\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jasi1RM2zIJ7" - }, - "outputs": [], - "source": [ - "# Add table embeddings to the tables in DocumentStore\n", - "document_store.update_embeddings(retriever=retriever)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "XM-ijy6Zz11L" - }, - "outputs": [], - "source": [ - "## Alternative: ElasticsearchRetriever\n", - "# from haystack.nodes.retriever import ElasticsearchRetriever\n", - "# retriever = ElasticsearchRetriever(document_store=document_store)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "YHfQWxVI0N2e", - "outputId": "1d8dc4d2-a184-489e-defa-d445d76c458f" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the Retriever\n", - "from haystack.utils import print_documents\n", - "\n", - "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", - "# Get highest scored table\n", - "print(retrieved_tables[0].content)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zbwkXScm2-gy" - }, - "source": [ - "### Reader\n", - "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", - "\n", - "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4APcRoio2RxG" - }, - "outputs": [], - "source": [ - "from haystack.nodes import TableReader\n", - "\n", - "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ILuAXkyN4F7x", - "outputId": "4bd19dcb-df8e-4a4d-b9d2-d34650e9e5c2" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - " Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]\n" - ] - } - ], - "source": [ - "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", - "\n", - "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", - "print(table_doc.content)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ilbsecgA4vfN", - "outputId": "f845f43e-43e8-48fe-d0ef-91b17a5eff0e" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: How many twin buildings are under construction?\n", - "Answers:\n", - "[ ]\n" - ] - } - ], - "source": [ - "from haystack.utils import print_answers\n", - "\n", - "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", - "print_answers(prediction, details=\"all\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jkAYNMb7R9qu" - }, - "source": [ - "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", - "\n", - "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "It8XYT2ZTVJs", - "outputId": "7d31af60-e04a-485d-f0ee-f29592b03928" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Predicted answer: 12\n", - "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" - ] - } - ], - "source": [ - "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", - "print(f\"Meta field: {prediction['answers'][0].meta}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pgmG7pzL5ceh" - }, - "source": [ - "### Pipeline\n", - "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", - "\n", - "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "id": "G-aZZvyv4-Mf" - }, - "outputs": [], - "source": [ - "# Initialize pipeline\n", - "from haystack import Pipeline\n", - "\n", - "table_qa_pipeline = Pipeline()\n", - "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", - "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "m8evexnW6dev", - "outputId": "40514084-f516-4f13-fb48-6a55cb578366" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: How many twin buildings are under construction?\n", - "Answers:\n", - "[ { 'answer': '12',\n", - " 'context': Name ... Status\n", - "0 Twin Towers II ... Never built\n", - "1 World Trade Center ... Destroyed\n", - "2 Three Sixty West ... Under construction\n", - "3 Gateway Towers ... Under construction\n", - "4 Rustomjee Crown ... Under construction\n", - "5 Orchid Heights ... On-hold\n", - "6 Hermitage Towers ... Proposed\n", - "7 Lokhandwala Minerva ... Under construction\n", - "8 Lamar Towers ... Under construction\n", - "9 Indonesia One Towers ... Under construction\n", - "10 Sky link ... Approved\n", - "11 Vida Za'abeel ... Proposed\n", - "12 Broadway Corridor Twin Towers ... Never built\n", - "13 India Bulls Sky Forest Tower ... Under construction\n", - "14 Capital Towers ... Under construction\n", - "15 One Avighna Park ... Under construction\n", - "16 NEB Towers ... On hold\n", - "17 The Destiny ( Tower ) ... Under construction\n", - "18 Oberoi Esquire Towers ... Under construction\n", - "19 Bhoomi Celestia ... Under construction\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '7',\n", - " 'context': Building or structure ... Listing\n", - "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", - "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", - "2 Jensen Block ... Seattle landmark\n", - "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", - "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", - "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", - "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", - "\n", - "[7 rows x 3 columns]},\n", - " { 'answer': '8',\n", - " 'context': Years Venue Location\n", - "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", - "1 1987-88 Navesink Country Club Middletown , New Jersey\n", - "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", - "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", - "4 1982 Wykagyl Country Club New Rochelle , New York\n", - "5 1981 Ridgewood Country Club Paramus , New Jersey\n", - "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", - "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", - " { 'answer': '8',\n", - " 'context': Model Specification ... Prime mover Power output\n", - "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", - "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", - "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", - "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", - "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", - "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", - "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", - "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", - "\n", - "[8 rows x 7 columns]},\n", - " { 'answer': '10',\n", - " 'context': Name or designation ... Notes\n", - "0 Aluminum Overcast ... One of only ten flyable B-17s\n", - "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", - "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", - "3 Douglas DC-7B N836D ... \n", - "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", - "5 FIFI ... One of only two B-29s flying\n", - "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", - "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", - "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", - "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", - "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", - "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", - "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", - "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", - "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", - "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", - "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", - "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", - "18 Yankee Lady ... Flyable\n", - "\n", - "[19 rows x 6 columns]},\n", - " { 'answer': '13',\n", - " 'context': N Year Country ... Link Remark K\n", - "0 003+ 2013 INDIA ... LK RK K\n", - "1 005 2006 USA ... LK RK K\n", - "2 010 2014 ZAF ... LK RK K\n", - "3 020 2010 USA ... LK RK K\n", - "4 030 201 ? USA ... LK RK K\n", - "5 040 2007 USA ... LK RK K\n", - "6 042 2004 USA ... LK Only G-S With Large Battery K\n", - "7 050 201 ? USA ... LK RK K\n", - "8 100 20 ? ? USA ... LK RK K\n", - "9 200 20 ? ? USA ... LK RK K\n", - "10 300 2013 EUR ... LK RK K\n", - "11 400 20 ? ? USA ... LK RK K\n", - "12 995 20 ? ? USA ... LK RK K\n", - "\n", - "[13 rows x 12 columns]},\n", - " { 'answer': '5',\n", - " 'context': Team ... Capacity\n", - "0 Barnsley ... 23,009\n", - "1 Blackpool ... 16,750\n", - "2 Bradford City ... 25,136\n", - "3 Burton Albion ... 6,912\n", - "4 Bury ... 11,840\n", - "5 Chesterfield ... 10,400\n", - "6 Colchester United ... 10,105\n", - "7 Coventry City ... 32,500\n", - "8 Crewe Alexandra ... 10,066\n", - "9 Doncaster Rovers ... 15,231\n", - "10 Fleetwood Town ... 5,311\n", - "11 Gillingham ... 11,582\n", - "12 Millwall ... 20,146\n", - "13 Oldham Athletic ... 13,512\n", - "14 Peterborough United ... 14,319\n", - "15 Port Vale ... 18,947\n", - "16 Rochdale ... 10,249\n", - "17 Scunthorpe United ... 9,183\n", - "18 Sheffield United ... 32,702\n", - "19 Shrewsbury Town ... 9,875\n", - "\n", - "[20 rows x 4 columns]},\n", - " { 'answer': '7',\n", - " 'context': Resource Name ... Added\n", - "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", - "1 John M. Beasley House ... March 5 , 1996\n", - "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", - "3 Austin House ... February 5 , 1998\n", - "4 Reid-Woods House ... August 31 , 2000\n", - "5 Villa Serena Apartments ... September 29 , 2000\n", - "6 Paul M. Souder House ... November 2 , 2000\n", - "7 Stevens-Gilchrist House ... August 17 , 2001\n", - "\n", - "[8 rows x 3 columns]},\n", - " { 'answer': '19',\n", - " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", - "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", - "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", - "2 Cowen Park Bridge ... 15th Avenue NE\n", - "3 First Avenue South Bridge ... State Route 99\n", - "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", - "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", - "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", - "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", - "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", - "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", - "10 Magnolia Bridge ... W Garfield Street\n", - "11 Montlake Bridge ... State Route 513\n", - "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", - "13 Salmon Bay Bridge ... BNSF Railway\n", - "14 Ship Canal Bridge ... Interstate 5\n", - "15 Schmitz Park Bridge ... SW Admiral Way\n", - "16 Spokane Street Bridge ... SW Spokane Street\n", - "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", - "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", - "19 University Bridge ... Eastlake Avenue NE\n", - "\n", - "[20 rows x 6 columns]},\n", - " { 'answer': '8',\n", - " 'context': Location ... Comments\n", - "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", - "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", - "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", - "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", - "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", - "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", - "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", - "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", - "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", - "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", - "\n", - "[10 rows x 4 columns]}]\n" - ] - } - ], - "source": [ - "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", - "print_answers(prediction, details=\"minimum\")" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Open-Domain QA on Text and Tables\n", - "With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information.\n", - "\n", - "To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset." - ], - "metadata": { - "id": "8uMzl9Ml_D1B" - } - }, - { - "cell_type": "code", - "source": [ - "# Add 1,000 text passages from OTT-QA to our document store.\n", - "\n", - "def read_ottqa_texts(filename):\n", - " processed_passages = []\n", - " with open(filename) as passages:\n", - " passages = json.load(passages)\n", - " for title, content in passages.items():\n", - " title = title[6:]\n", - " title = title.replace(\"_\", \" \")\n", - " document = Document(\n", - " content=content,\n", - " content_type=\"text\",\n", - " meta={\"title\": title}\n", - " )\n", - " processed_passages.append(document)\n", - " \n", - " return processed_passages\n", - "\n", - "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", - "document_store.write_documents(passages, index=document_index)" - ], - "metadata": { - "id": "4CBcIjIq_uFx" - }, - "execution_count": 18, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False)" - ], - "metadata": { - "id": "j1TaNF7SiKgH" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "## Pipeline for QA on Combination of Text and Tables\n", - "We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `\"text\"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `\"table\"` to a `TableReader`.\n", - "\n", - "To achieve this, we make use of two additional nodes:\n", - "- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `\"text\"` or `\"table\"`, respectively.\n", - "- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers." - ], - "metadata": { - "id": "c2sk_uNHj0DY" - } + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "DeAkZwDhufYA" + }, + "source": [ + "# Open-Domain QA on Tables\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/deepset-ai/haystack/blob/master/tutorials/Tutorial15_TableQA.ipynb)\n", + "\n", + "This tutorial shows you how to perform question-answering on tables using the `TableTextRetriever` or `ElasticsearchRetriever` as retriever node and the `TableReader` as reader node." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vbR3bETlvi-3" + }, + "source": [ + "### Prepare environment\n", + "\n", + "#### Colab: Enable the GPU runtime\n", + "Make sure you enable the GPU runtime to experience decent speed in this tutorial.\n", + "**Runtime -> Change Runtime type -> Hardware accelerator -> GPU**\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HW66x0rfujyO" + }, + "outputs": [], + "source": [ + "# Make sure you have a GPU running\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_ZXoyhOAvn7M" + }, + "outputs": [], + "source": [ + "# Install the latest release of Haystack in your own environment\n", + "#! pip install farm-haystack\n", + "\n", + "# Install the latest master of Haystack\n", + "!pip install --upgrade pip\n", + "!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab]\n", + "\n", + "# The TaPAs-based TableReader requires the torch-scatter library\n", + "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.10.0+cu113.html\n", + "\n", + "# Install pygraphviz for visualization of Pipelines\n", + "!apt install libgraphviz-dev\n", + "!pip install pygraphviz" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K_XJhluXwF5_" + }, + "source": [ + "### Start an Elasticsearch server\n", + "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), then you can manually download and execute Elasticsearch from source." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "frDqgzK7v2i1" + }, + "outputs": [], + "source": [ + "# Recommended: Start Elasticsearch using Docker via the Haystack utility function\n", + "from haystack.utils import launch_es\n", + "\n", + "launch_es()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "S4PGj1A6wKWu" + }, + "outputs": [], + "source": [ + "# In Colab / No Docker environments: Start Elasticsearch from source\n", + "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q\n", + "! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz\n", + "! chown -R daemon:daemon elasticsearch-7.9.2\n", + "\n", + "import os\n", + "from subprocess import Popen, PIPE, STDOUT\n", + "\n", + "es_server = Popen(\n", + " [\"elasticsearch-7.9.2/bin/elasticsearch\"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1) # as daemon\n", + ")\n", + "# wait until ES has started\n", + "! sleep 30" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RmxepXZtwQ0E" + }, + "outputs": [], + "source": [ + "# Connect to Elasticsearch\n", + "from haystack.document_stores import ElasticsearchDocumentStore\n", + "\n", + "# We want to use a small model producing 512-dimensional embeddings, so we need to set embedding_dim to 512\n", + "document_index = \"document\"\n", + "document_store = ElasticsearchDocumentStore(\n", + " host=\"localhost\", username=\"\", password=\"\", index=document_index, embedding_dim=512\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fFh26LIlxldw" + }, + "source": [ + "## Add Tables to DocumentStore\n", + "To quickly demonstrate the capabilities of the `TableTextRetriever` and the `TableReader` we use a subset of 1000 tables of the [Open Table-and-Text Question Answering (OTT-QA) dataset](https://github.com/wenhuchen/OTT-QA).\n", + "\n", + "Just as text passages, tables are represented as `Document` objects in Haystack. The content field, though, is a pandas DataFrame instead of a string." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nM63uwbd8zd6" + }, + "outputs": [], + "source": [ + "# Let's first fetch some tables that we want to query\n", + "# Here: 1000 tables from OTT-QA\n", + "from haystack.utils import fetch_archive_from_http\n", + "\n", + "doc_dir = \"data\"\n", + "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/ottqa_sample.zip\"\n", + "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "SKjw2LuXxlGh", + "outputId": "92c67d24-d6fb-413e-8dd7-53075141d508" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "from haystack.nodes import FARMReader, RouteDocuments, JoinAnswers\n", - "\n", - "text_reader = FARMReader(\"deepset/roberta-base-squad2\")\n", - "# In order to get meaningful scores from the TableReader, use \"deepset/tapas-large-nq-hn-reader\" or\n", - "# \"deepset/tapas-large-nq-reader\" as TableReader models. The disadvantage of these models is, however,\n", - "# that they are not capable of doing aggregations over multiple table cells.\n", - "table_reader = TableReader(\"deepset/tapas-large-nq-hn-reader\")\n", - "route_documents = RouteDocuments()\n", - "join_answers = JoinAnswers()" - ], - "metadata": { - "id": "Ej_j8Q3wlxXE" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + " Result ... Score\n", + "0 Winner ... 6-1 , 6-1\n", + "1 Winner ... 6-2 , 4-6 , 6-3\n", + "2 Winner ... 6-2 , 6-2\n", + "3 Runner-up ... 3-6 , 2-6\n", + "4 Winner ... 6-7 , 6-3 , 6-0\n", + "5 Winner ... 6-1 , 6-0\n", + "6 Winner ... 6-2 , 2-6 , 6-2\n", + "7 Winner ... 6-0 , 6-4\n", + "\n", + "[8 rows x 8 columns]\n", + "{'title': 'Rewa Hudson', 'section_title': 'ITF finals ( 7–3 ) -- Doubles ( 7–1 )'}\n" + ] + } + ], + "source": [ + "# Add the tables to the DocumentStore\n", + "\n", + "import json\n", + "from haystack import Document\n", + "import pandas as pd\n", + "\n", + "\n", + "def read_ottqa_tables(filename):\n", + " processed_tables = []\n", + " with open(filename) as tables:\n", + " tables = json.load(tables)\n", + " for key, table in tables.items():\n", + " current_columns = table[\"header\"]\n", + " current_rows = table[\"data\"]\n", + " current_df = pd.DataFrame(columns=current_columns, data=current_rows)\n", + " current_doc_title = table[\"title\"]\n", + " current_section_title = table[\"section_title\"]\n", + " document = Document(\n", + " content=current_df,\n", + " content_type=\"table\",\n", + " meta={\"title\": current_doc_title, \"section_title\": current_section_title},\n", + " id=key,\n", + " )\n", + " processed_tables.append(document)\n", + "\n", + " return processed_tables\n", + "\n", + "\n", + "tables = read_ottqa_tables(f\"{doc_dir}/ottqa_tables_sample.json\")\n", + "document_store.write_documents(tables, index=document_index)\n", + "\n", + "# Showing content field and meta field of one of the Documents of content_type 'table'\n", + "print(tables[0].content)\n", + "print(tables[0].meta)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hmQC1sDmw3d7" + }, + "source": [ + "## Initalize Retriever, Reader, & Pipeline\n", + "\n", + "### Retriever\n", + "\n", + "Retrievers help narrowing down the scope for the Reader to a subset of tables where a given question could be answered.\n", + "They use some simple but fast algorithm.\n", + "\n", + "**Here:** We use the `TableTextRetriever` capable of retrieving relevant content among a database\n", + "of texts and tables using dense embeddings. It is an extension of the `DensePassageRetriever` and consists of three encoders (one query encoder, one text passage encoder and one table encoder) that create embeddings in the same vector space. More details on the `TableTextRetriever` and how it is trained can be found in [this paper](https://arxiv.org/abs/2108.04049).\n", + "\n", + "**Alternatives:**\n", + "\n", + "- `ElasticsearchRetriever` that uses BM25 algorithm\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EY_qvdV6wyK5" + }, + "outputs": [], + "source": [ + "from haystack.nodes.retriever import TableTextRetriever\n", + "\n", + "retriever = TableTextRetriever(\n", + " document_store=document_store,\n", + " query_embedding_model=\"deepset/bert-small-mm_retrieval-question_encoder\",\n", + " passage_embedding_model=\"deepset/bert-small-mm_retrieval-passage_encoder\",\n", + " table_embedding_model=\"deepset/bert-small-mm_retrieval-table_encoder\",\n", + " embed_meta_fields=[\"title\", \"section_title\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jasi1RM2zIJ7" + }, + "outputs": [], + "source": [ + "# Add table embeddings to the tables in DocumentStore\n", + "document_store.update_embeddings(retriever=retriever)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "XM-ijy6Zz11L" + }, + "outputs": [], + "source": [ + "## Alternative: ElasticsearchRetriever\n", + "# from haystack.nodes.retriever import ElasticsearchRetriever\n", + "# retriever = ElasticsearchRetriever(document_store=document_store)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "YHfQWxVI0N2e", + "outputId": "1d8dc4d2-a184-489e-defa-d445d76c458f" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "text_table_qa_pipeline = Pipeline()\n", - "text_table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", - "text_table_qa_pipeline.add_node(component=route_documents, name=\"RouteDocuments\", inputs=[\"TableTextRetriever\"])\n", - "text_table_qa_pipeline.add_node(component=text_reader, name=\"TextReader\", inputs=[\"RouteDocuments.output_1\"])\n", - "text_table_qa_pipeline.add_node(component=table_reader, name=\"TableReader\", inputs=[\"RouteDocuments.output_2\"])\n", - "text_table_qa_pipeline.add_node(component=join_answers, name=\"JoinAnswers\", inputs=[\"TextReader\", \"TableReader\"])" - ], - "metadata": { - "id": "Zdq6JnF5m3aP" - }, - "execution_count": 21, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the Retriever\n", + "from haystack.utils import print_documents\n", + "\n", + "retrieved_tables = retriever.retrieve(\"How many twin buildings are under construction?\", top_k=5)\n", + "# Get highest scored table\n", + "print(retrieved_tables[0].content)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zbwkXScm2-gy" + }, + "source": [ + "### Reader\n", + "The `TableReader` is based on TaPas, a transformer-based language model capable of grasping the two-dimensional structure of a table. It scans the tables returned by the retriever and extracts the anser. The available TableReader models can be found [here](https://huggingface.co/models?pipeline_tag=table-question-answering&sort=downloads).\n", + "\n", + "**Notice**: The `TableReader` will return an answer for each table, even if the query cannot be answered by the table. Furthermore, the confidence scores are not useful as of now, given that they will *always* be very high (i.e. 1 or close to 1)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4APcRoio2RxG" + }, + "outputs": [], + "source": [ + "from haystack.nodes import TableReader\n", + "\n", + "reader = TableReader(model_name_or_path=\"google/tapas-base-finetuned-wtq\", max_seq_len=512)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ILuAXkyN4F7x", + "outputId": "4bd19dcb-df8e-4a4d-b9d2-d34650e9e5c2" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Let's have a look on the structure of the combined Table an Text QA pipeline.\n", - "from IPython import display\n", - "\n", - "text_table_qa_pipeline.draw()\n", - "display.Image(\"pipeline.png\")" - ], - "metadata": { - "id": "K4vH1ZEnniut", - "outputId": "85aa17a8-227d-40e4-c8c0-5d0532faa47a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 540 - } - }, - "execution_count": 22, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "image/png": "\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 22 - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + " Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]\n" + ] + } + ], + "source": [ + "# Try the TableReader on one Table (highest-scored retrieved table from previous section)\n", + "\n", + "table_doc = document_store.get_document_by_id(\"List_of_tallest_twin_buildings_and_structures_in_the_world_1\")\n", + "print(table_doc.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ilbsecgA4vfN", + "outputId": "f845f43e-43e8-48fe-d0ef-91b17a5eff0e" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Example query whose answer resides in a text passage\n", - "predictions = text_table_qa_pipeline.run(query=\"Who is Aleksandar Trifunovic?\")" - ], - "metadata": { - "id": "strPNduPoBLe" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ ]\n" + ] + } + ], + "source": [ + "from haystack.utils import print_answers\n", + "\n", + "prediction = reader.predict(query=\"How many twin buildings are under construction?\", documents=[table_doc])\n", + "print_answers(prediction, details=\"all\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jkAYNMb7R9qu" + }, + "source": [ + "The offsets in the `offsets_in_document` and `offsets_in_context` field indicate the table cells that the model predicts to be part of the answer. They need to be interpreted on the linearized table, i.e., a flat list containing all of the table cells.\n", + "\n", + "In the `Answer`'s meta field, you can find the aggreagtion operator used to construct the answer (in this case `COUNT`) and the answer cells as strings." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "It8XYT2ZTVJs", + "outputId": "7d31af60-e04a-485d-f0ee-f29592b03928" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# We can see both text passages and tables as contexts of the predicted answers.\n", - "print_answers(predictions, details=\"minimum\")" - ], - "metadata": { - "id": "9YiK75tSoOGA", - "outputId": "bd52f841-3846-441f-dd6f-53b02111691e", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 24, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: Who is Aleksandar Trifunovic?\n", - "Answers:\n", - "[ { 'answer': 'a Serbian professional basketball coach and former player',\n", - " 'context': 'Aleksandar Trifunović ( ; born 30 May 1967 ) is a Serbian '\n", - " 'professional basketball coach and former player .'},\n", - " { 'answer': 'Johnny Höglin',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': 'Ivar Eriksen',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': 'Magne Thomassen',\n", - " 'context': Rank Athlete Country Time\n", - "0 1 Kees Verkerk Netherlands 2:03.4\n", - "1 2 Ivar Eriksen Norway 2:05.0\n", - "2 3 Ard Schenk Netherlands 2:05.0\n", - "3 4 Magne Thomassen Norway 2:05.1\n", - "4 5 Johnny Höglin Sweden 2:05.2\n", - "5 5 Bjørn Tveter Norway 2:05.2\n", - "6 7 Svein-Erik Stiansen Norway 2:05.5\n", - "7 8 Eduard Matusevich Soviet Union 2:06.1\n", - "8 9 Peter Nottet Netherlands 2:06.3\n", - "9 10 Örjan Sandler Sweden 2:07.0\n", - "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", - "11 12 Ants Antson Soviet Union 2:07.2\n", - "12 12 Valery Kaplan Soviet Union 2:07.2\n", - "13 14 Jouko Launonen Finland 2:07.5\n", - "14 15 Günter Traub West Germany 2:07.7\n", - "15 16 Jan Bols Netherlands 2:07.8\n", - "16 16 Manne Lavås Sweden 2:07.8\n", - "17 18 Kimmo Koskinen Finland 2:07.9\n", - "18 19 Richard Wurster United States 2:08.4\n", - "19 20 Göran Claeson Sweden 2:08.6},\n", - " { 'answer': '5',\n", - " 'context': Position # Player Moving from\n", - "0 F 12 Nikola Kalinić Radnički Kragujevac\n", - "1 SF 6 Nemanja Dangubić Mega Vizura\n", - "2 C 33 Maik Zirbes Brose Baskets\n", - "3 PG 3 Marcus Williams Lokomotiv Kuban\n", - "4 PG 24 Stefan Jović Radnički Kragujevac\n", - "5 C 14 Đorđe Kaplanović FMP\n", - "6 SF 5 Nikola Čvorović FMP\n", - "7 SG 7 Aleksandar Aranitović Crvena zvezda U18\n", - "8 SG 20 Aleksa Radanov Crvena zvezda U18},\n", - " { 'answer': 'Vasile Sărucan',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Belgium',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Poland',\n", - " 'context': Rank Name Nationality Result\n", - "0 1 Hans Baumgartner West Germany 8.12\n", - "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", - "2 3 Vasile Sărucan Romania 7.88\n", - "3 4 Valeriu Jurcă Romania 7.72\n", - "4 5 Philippe Housiaux Belgium 7.70\n", - "5 6 Andreas Gloerfeld West Germany 7.70\n", - "6 7 Jan Kobuszewski Poland 7.66\n", - "7 8 Jaroslav Brož Czechoslovakia 7.66\n", - "8 9 Alan Lerwill Great Britain 7.61\n", - "9 10 Mikhail Bariban Soviet Union 7.58\n", - "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", - "11 12 Kari Palmen Finland 7.51\n", - "12 13 Georgi Marin Bulgaria 7.51\n", - "13 14 Jesper Tørring Denmark 7.46\n", - "14 15 Milan Spasojević Yugoslavia 7.23\n", - "15 16 Salih Mercan Turkey 6.98\n", - "16 17 Henrik Kalocsai Hungary 5.67},\n", - " { 'answer': 'Hafþór Júlíus Björnsson',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Estonia',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Iceland',\n", - " 'context': # Name Nationality Pts\n", - "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", - "1 2 Robert Oberst United States 29\n", - "2 3 Lauri Nami Estonia 24\n", - "3 4 Nick Best United States 14.5\n", - "4 5 Laurence Shahlaei UK 12\n", - "5 6 Wu Long China 6},\n", - " { 'answer': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", - " 'professional ice hockey defenceman',\n", - " 'context': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", - " 'professional ice hockey defenceman . He is currently '\n", - " 'playing with Piráti Chomutov of the Czech Extral'},\n", - " { 'answer': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", - " 'is a Russian professional football player',\n", - " 'context': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", - " 'is a Russian professional football player . In 2010 , he '\n", - " 'played for FC Angusht Nazran in the'}]\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Predicted answer: 12\n", + "Meta field: {'aggregation_operator': 'COUNT', 'answer_cells': ['Three Sixty West', 'Gateway Towers', 'Rustomjee Crown', 'Lokhandwala Minerva', 'Lamar Towers', 'Indonesia One Towers', 'India Bulls Sky Forest Tower', 'Capital Towers', 'One Avighna Park', 'The Destiny ( Tower )', 'Oberoi Esquire Towers', 'Bhoomi Celestia']}\n" + ] + } + ], + "source": [ + "print(f\"Predicted answer: {prediction['answers'][0].answer}\")\n", + "print(f\"Meta field: {prediction['answers'][0].meta}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pgmG7pzL5ceh" + }, + "source": [ + "### Pipeline\n", + "The Retriever and the Reader can be sticked together to a pipeline in order to first retrieve relevant tables and then extract the answer.\n", + "\n", + "**Notice**: Given that the `TableReader` does not provide useful confidence scores and returns an answer for each of the tables, the sorting of the answers might be not helpful." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "G-aZZvyv4-Mf" + }, + "outputs": [], + "source": [ + "# Initialize pipeline\n", + "from haystack import Pipeline\n", + "\n", + "table_qa_pipeline = Pipeline()\n", + "table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "table_qa_pipeline.add_node(component=reader, name=\"TableReader\", inputs=[\"TableTextRetriever\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "m8evexnW6dev", + "outputId": "40514084-f516-4f13-fb48-6a55cb578366" + }, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Example query whose answer resides in a table\n", - "predictions = text_table_qa_pipeline.run(query=\"What is Cuba's national tree?\")" - ], - "metadata": { - "id": "QYOHDSmLpzEg" - }, - "execution_count": null, - "outputs": [] - }, + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: How many twin buildings are under construction?\n", + "Answers:\n", + "[ { 'answer': '12',\n", + " 'context': Name ... Status\n", + "0 Twin Towers II ... Never built\n", + "1 World Trade Center ... Destroyed\n", + "2 Three Sixty West ... Under construction\n", + "3 Gateway Towers ... Under construction\n", + "4 Rustomjee Crown ... Under construction\n", + "5 Orchid Heights ... On-hold\n", + "6 Hermitage Towers ... Proposed\n", + "7 Lokhandwala Minerva ... Under construction\n", + "8 Lamar Towers ... Under construction\n", + "9 Indonesia One Towers ... Under construction\n", + "10 Sky link ... Approved\n", + "11 Vida Za'abeel ... Proposed\n", + "12 Broadway Corridor Twin Towers ... Never built\n", + "13 India Bulls Sky Forest Tower ... Under construction\n", + "14 Capital Towers ... Under construction\n", + "15 One Avighna Park ... Under construction\n", + "16 NEB Towers ... On hold\n", + "17 The Destiny ( Tower ) ... Under construction\n", + "18 Oberoi Esquire Towers ... Under construction\n", + "19 Bhoomi Celestia ... Under construction\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '7',\n", + " 'context': Building or structure ... Listing\n", + "0 Ford Assembly Plant Building Now Public Storage ... Seattle landmark\n", + "1 Immanuel Lutheran Church ... Seattle landmark NRHP\n", + "2 Jensen Block ... Seattle landmark\n", + "3 Lake Union Steam Plant and Hydro House Now Zymogenetics ... Seattle landmark\n", + "4 New Richmond Laundry Now part of the Alley24 development ... Seattle landmark\n", + "5 St. Spiridon Russian Orthodox Cathedral ... Seattle landmark\n", + "6 Supply Laundry Building Now part of the Stackhouse development ... Seattle landmark NRHP\n", + "\n", + "[7 rows x 3 columns]},\n", + " { 'answer': '8',\n", + " 'context': Years Venue Location\n", + "0 1989 Bamm Hollow Country Club Lincroft , New Jersey\n", + "1 1987-88 Navesink Country Club Middletown , New Jersey\n", + "2 1985-86 Fairmount Country Club Chatham , New Jersey\n", + "3 1983-84 Upper Montclair Country Club Clifton , New Jersey\n", + "4 1982 Wykagyl Country Club New Rochelle , New York\n", + "5 1981 Ridgewood Country Club Paramus , New Jersey\n", + "6 1979-80 Upper Montclair Country Club Clifton , New Jersey\n", + "7 1976-78 Forsgate Country Club Monroe Township , New Jersey},\n", + " { 'answer': '8',\n", + " 'context': Model Specification ... Prime mover Power output\n", + "0 RS-1 E-1641A ... 6-539T 1,000 hp ( 0.75 MW )\n", + "1 RS-2 E-1661 , E-1661A , E-1661B ... 12-244 1,500 hp ( 1.12 MW )\n", + "2 RS-2 E-1661C ... 12-244 1,600 hp ( 1.19 MW )\n", + "3 RS-3 E-1662 , E-1662A , E-1662B ... 12-244 1,600 hp ( 1.19 MW )\n", + "4 RS-11 DL-701 ... 12-251 1,800 hp ( 1.34 MW )\n", + "5 RS-27 DL-640 ... 16-251 2,400 hp ( 1.79 MW )\n", + "6 RS-32 DL-721 ... 12-251 2,000 hp ( 1.49 MW )\n", + "7 RS-36 DL-701XAP ... 12-251 1,800 hp ( 1.34 MW )\n", + "\n", + "[8 rows x 7 columns]},\n", + " { 'answer': '10',\n", + " 'context': Name or designation ... Notes\n", + "0 Aluminum Overcast ... One of only ten flyable B-17s\n", + "1 Avro Lancaster PA474 ... One of only two Lancasters in flying condition in the world\n", + "2 Avro Vulcan XH558 , aka Spirit of Great Britain ... The only Cold War / Falklands War -era Vulcan bomber to fly after 1986 . Res...\n", + "3 Douglas DC-7B N836D ... \n", + "4 Douglas R4D-3 N763A ... Used by the US Navy during World War II . Placed on the National Register of...\n", + "5 FIFI ... One of only two B-29s flying\n", + "6 Glacier Girl ... Forced to land in Greenland in 1942 along with five other P-38s and two B-17...\n", + "7 Hawker Hurricane PZ865 ... Last Hurricane produced . Retained by Hawker Aircraft for trials work . Give...\n", + "8 My Gal Sal ... Forced to land on the Greenland icecap during World War II and abandoned , a...\n", + "9 Piccadilly Lilly II ... Last B-17 to serve in the US Air Force , flying her last mission in 1959 . U...\n", + "10 The Pink Lady ... Only flying B-17 survivor to have seen action in Europe during World War II\n", + "11 Sally B ... Only airworthy B-17 left in Europe . Used in the 1990 film Memphis Belle\n", + "12 Sentimental Journey ... Based at the Commemorative Air Force Museum in Mesa , Arizona , and regularl...\n", + "13 Shoo Shoo Baby ... Crash-landed in Sweden in 1944 . Restored from 1978 to 1988\n", + "14 Swamp Ghost ... Ran out of fuel and crash-landed in a swamp in Papua New Guinea . Recovered ...\n", + "15 Texas Raiders ... Maintained and flown by the Commemorative Air Force ( formerly Confederate A...\n", + "16 Thunderbird ... Housed at the Lone Star Flight Museum in Galveston , Texas\n", + "17 Worry Bird ... Served in World War II and the Korean War before being retired in 1957 and p...\n", + "18 Yankee Lady ... Flyable\n", + "\n", + "[19 rows x 6 columns]},\n", + " { 'answer': '13',\n", + " 'context': N Year Country ... Link Remark K\n", + "0 003+ 2013 INDIA ... LK RK K\n", + "1 005 2006 USA ... LK RK K\n", + "2 010 2014 ZAF ... LK RK K\n", + "3 020 2010 USA ... LK RK K\n", + "4 030 201 ? USA ... LK RK K\n", + "5 040 2007 USA ... LK RK K\n", + "6 042 2004 USA ... LK Only G-S With Large Battery K\n", + "7 050 201 ? USA ... LK RK K\n", + "8 100 20 ? ? USA ... LK RK K\n", + "9 200 20 ? ? USA ... LK RK K\n", + "10 300 2013 EUR ... LK RK K\n", + "11 400 20 ? ? USA ... LK RK K\n", + "12 995 20 ? ? USA ... LK RK K\n", + "\n", + "[13 rows x 12 columns]},\n", + " { 'answer': '5',\n", + " 'context': Team ... Capacity\n", + "0 Barnsley ... 23,009\n", + "1 Blackpool ... 16,750\n", + "2 Bradford City ... 25,136\n", + "3 Burton Albion ... 6,912\n", + "4 Bury ... 11,840\n", + "5 Chesterfield ... 10,400\n", + "6 Colchester United ... 10,105\n", + "7 Coventry City ... 32,500\n", + "8 Crewe Alexandra ... 10,066\n", + "9 Doncaster Rovers ... 15,231\n", + "10 Fleetwood Town ... 5,311\n", + "11 Gillingham ... 11,582\n", + "12 Millwall ... 20,146\n", + "13 Oldham Athletic ... 13,512\n", + "14 Peterborough United ... 14,319\n", + "15 Port Vale ... 18,947\n", + "16 Rochdale ... 10,249\n", + "17 Scunthorpe United ... 9,183\n", + "18 Sheffield United ... 32,702\n", + "19 Shrewsbury Town ... 9,875\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': '7',\n", + " 'context': Resource Name ... Added\n", + "0 Whitfield Estates-Broughton Street Historic District ... October 29 , 1993\n", + "1 John M. Beasley House ... March 5 , 1996\n", + "2 Whitfield Estates-Lantana Avenue Historic District ... March 8 , 1997\n", + "3 Austin House ... February 5 , 1998\n", + "4 Reid-Woods House ... August 31 , 2000\n", + "5 Villa Serena Apartments ... September 29 , 2000\n", + "6 Paul M. Souder House ... November 2 , 2000\n", + "7 Stevens-Gilchrist House ... August 17 , 2001\n", + "\n", + "[8 rows x 3 columns]},\n", + " { 'answer': '19',\n", + " 'context': Name ( Alternative names in parenthesis ) ... Carries\n", + "0 Arboretum Sewer Trestle ... Sewer and a footpath\n", + "1 Ballard Bridge ( 15th Avenue Bridge ) ... 15th Avenue NW\n", + "2 Cowen Park Bridge ... 15th Avenue NE\n", + "3 First Avenue South Bridge ... State Route 99\n", + "4 Fremont Bridge ( Fremont Avenue Bridge ) ... Road connecting Fremont Avenue N and 4th Avenue N\n", + "5 George Washington Memorial Bridge ( Aurora Bridge ) ... State Route 99\n", + "6 Homer M. Hadley Memorial Bridge ( Third Lake Washington Bridge ) ... Interstate 90\n", + "7 Jeanette Williams Memorial Bridge ( West Seattle Bridge ) ... Road connecting Fauntleroy Way SW and the Spokane Street Viaduct\n", + "8 Jose Rizal Bridge ( 12th Avenue South Bridge ) ... 12th Avenue S and Interstate 90\n", + "9 Lacey V. Murrow Memorial Bridge ... Interstate 90\n", + "10 Magnolia Bridge ... W Garfield Street\n", + "11 Montlake Bridge ... State Route 513\n", + "12 North Queen Anne Drive Bridge ... N Queen Anne Drive\n", + "13 Salmon Bay Bridge ... BNSF Railway\n", + "14 Ship Canal Bridge ... Interstate 5\n", + "15 Schmitz Park Bridge ... SW Admiral Way\n", + "16 Spokane Street Bridge ... SW Spokane Street\n", + "17 SR 520 Albert D. Rosellini Evergreen Point Floating Bridge ( Evergreen Point... ... State Route 520\n", + "18 20th Avenue NE Bridge ( Ravenna Park Bridge ) ... 20th Avenue NE ( pedestrian access only )\n", + "19 University Bridge ... Eastlake Avenue NE\n", + "\n", + "[20 rows x 6 columns]},\n", + " { 'answer': '8',\n", + " 'context': Location ... Comments\n", + "0 Ayr ... Known as Wonderwest World 1988-1998 ; operated as Craig Tara by Haven since ...\n", + "1 Bahamas ... The site is now occupied by a new hotel and marina complex known as Old Baha...\n", + "2 Barry Island ... Operated independently until closure in 1996 . Demolished in 2005\n", + "3 Bognor Regis ... Known as Southcoast World 1987-1998 . Still open as Butlins Bognor Regis\n", + "4 Clacton ... Demolished , now a housing estate . Small area yet to be redeveloped\n", + "5 Filey Holiday Camp ... Operated independently for six weeks in 1986 , but the venture failed and it...\n", + "6 Minehead ... Known as Somerwest World 1986-1998 . Still open as Butlins Minehead 30 April...\n", + "7 Mosney ... Operated independently until closure and conversion into an Irish Government...\n", + "8 Pwllheli ... Known as Starcoast World 1990-1998 ; operated as Hafan Y Mor by Haven since ...\n", + "9 Skegness ... Known as Funcoast World 1987-1998 . Still open as Butlins Skegness\n", + "\n", + "[10 rows x 4 columns]}]\n" + ] + } + ], + "source": [ + "prediction = table_qa_pipeline.run(\"How many twin buildings are under construction?\")\n", + "print_answers(prediction, details=\"minimum\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Open-Domain QA on Text and Tables\n", + "With haystack, you not only have the possibility to do QA on texts or tables, solely, but you can also use both texts and tables as your source of information.\n", + "\n", + "To demonstrate this, we add 1,000 sample text passages from the OTT-QA dataset." + ], + "metadata": { + "id": "8uMzl9Ml_D1B" + } + }, + { + "cell_type": "code", + "source": [ + "# Add 1,000 text passages from OTT-QA to our document store.\n", + "\n", + "\n", + "def read_ottqa_texts(filename):\n", + " processed_passages = []\n", + " with open(filename) as passages:\n", + " passages = json.load(passages)\n", + " for title, content in passages.items():\n", + " title = title[6:]\n", + " title = title.replace(\"_\", \" \")\n", + " document = Document(content=content, content_type=\"text\", meta={\"title\": title})\n", + " processed_passages.append(document)\n", + "\n", + " return processed_passages\n", + "\n", + "\n", + "passages = read_ottqa_texts(f\"{doc_dir}/ottqa_texts_sample.json\")\n", + "document_store.write_documents(passages, index=document_index)" + ], + "metadata": { + "id": "4CBcIjIq_uFx" + }, + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "document_store.update_embeddings(retriever=retriever, update_existing_embeddings=False)" + ], + "metadata": { + "id": "j1TaNF7SiKgH" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Pipeline for QA on Combination of Text and Tables\n", + "We are using one node for retrieving both texts and tables, the `TableTextRetriever`. In order to do question-answering on the Documents coming from the `TableTextRetriever`, we need to route Documents of type `\"text\"` to a `FARMReader` (or alternatively `TransformersReader`) and Documents of type `\"table\"` to a `TableReader`.\n", + "\n", + "To achieve this, we make use of two additional nodes:\n", + "- `SplitDocumentList`: Splits the List of Documents retrieved by the `TableTextRetriever` into two lists containing only Documents of type `\"text\"` or `\"table\"`, respectively.\n", + "- `JoinAnswers`: Takes Answers coming from two different Readers (in this case `FARMReader` and `TableReader`) and joins them to a single list of Answers." + ], + "metadata": { + "id": "c2sk_uNHj0DY" + } + }, + { + "cell_type": "code", + "source": [ + "from haystack.nodes import FARMReader, RouteDocuments, JoinAnswers\n", + "\n", + "text_reader = FARMReader(\"deepset/roberta-base-squad2\")\n", + "# In order to get meaningful scores from the TableReader, use \"deepset/tapas-large-nq-hn-reader\" or\n", + "# \"deepset/tapas-large-nq-reader\" as TableReader models. The disadvantage of these models is, however,\n", + "# that they are not capable of doing aggregations over multiple table cells.\n", + "table_reader = TableReader(\"deepset/tapas-large-nq-hn-reader\")\n", + "route_documents = RouteDocuments()\n", + "join_answers = JoinAnswers()" + ], + "metadata": { + "id": "Ej_j8Q3wlxXE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "text_table_qa_pipeline = Pipeline()\n", + "text_table_qa_pipeline.add_node(component=retriever, name=\"TableTextRetriever\", inputs=[\"Query\"])\n", + "text_table_qa_pipeline.add_node(component=route_documents, name=\"RouteDocuments\", inputs=[\"TableTextRetriever\"])\n", + "text_table_qa_pipeline.add_node(component=text_reader, name=\"TextReader\", inputs=[\"RouteDocuments.output_1\"])\n", + "text_table_qa_pipeline.add_node(component=table_reader, name=\"TableReader\", inputs=[\"RouteDocuments.output_2\"])\n", + "text_table_qa_pipeline.add_node(component=join_answers, name=\"JoinAnswers\", inputs=[\"TextReader\", \"TableReader\"])" + ], + "metadata": { + "id": "Zdq6JnF5m3aP" + }, + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Let's have a look on the structure of the combined Table an Text QA pipeline.\n", + "from IPython import display\n", + "\n", + "text_table_qa_pipeline.draw()\n", + "display.Image(\"pipeline.png\")" + ], + "metadata": { + "id": "K4vH1ZEnniut", + "outputId": "85aa17a8-227d-40e4-c8c0-5d0532faa47a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 540 + } + }, + "execution_count": 22, + "outputs": [ { - "cell_type": "code", - "source": [ - "# We can see both text passages and tables as contexts of the predicted answers.\n", - "print_answers(predictions, details=\"minimum\")" - ], - "metadata": { - "id": "4kw53uWep3zj", - "outputId": "b332cc17-3cb8-4e20-d79d-bb4cf656f277", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "execution_count": 26, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n", - "Query: What is Cuba's national tree?\n", - "Answers:\n", - "[ { 'answer': 'Cuban royal palm',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Quercus sagraeana , the Cuban oak',\n", - " 'context': 'Quercus sagraeana , the Cuban oak , is a medium-sized '\n", - " 'evergreen tree native to western Cuba in the Cuban pine '\n", - " 'forests ecoregion .'},\n", - " { 'answer': \"Glenn O'Brien\",\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Guy Oseary',\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Guy Oseary',\n", - " 'context': Book title ... Notes\n", - "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", - "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", - "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", - "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", - "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", - "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", - "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", - "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", - "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", - "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", - "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", - "\n", - "[11 rows x 6 columns]},\n", - " { 'answer': 'Belize',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Palmyra palm',\n", - " 'context': Country ... Scientific name\n", - "0 Afghanistan ... \n", - "1 Albania ... Olea europaea\n", - "2 Antigua and Barbuda ... Bucida buceras\n", - "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", - "4 Australia ... Acacia pycnantha\n", - "5 Bahamas ... Guaiacum sanctum\n", - "6 Bangladesh ... Mangifera indica\n", - "7 Belize ... Swietenia macrophylla\n", - "8 Bhutan ... Cupressus cashmeriana\n", - "9 Brazil ... Caesalpinia echinata\n", - "10 Cambodia ... Borassus flabellifer\n", - "11 Canada ... Acer\n", - "12 Chile ... Araucaria araucana\n", - "13 Colombia ... Ceroxylon quindiuense\n", - "14 Costa Rica ... Enterolobium cyclocarpum\n", - "15 Croatia ... Quercus robur\n", - "16 Cuba ... Roystonea regia\n", - "17 Cyprus ... Quercus alnifolia\n", - "18 Czech Republic ... Tilia cordata\n", - "19 Denmark ... Fagus sylvatica\n", - "\n", - "[20 rows x 3 columns]},\n", - " { 'answer': 'Guadeloupe',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'Basse-Terre',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'East Caribbean dollar',\n", - " 'context': State ... Official Language ( s )\n", - "0 Antigua and Barbuda ... English\n", - "1 Dominica ... English\n", - "2 Grenada ... English\n", - "3 Montserrat ... English\n", - "4 Saint Kitts and Nevis ... English\n", - "5 Saint Lucia ... English\n", - "6 Saint Vincent and the Grenadines ... English\n", - "7 Anguilla ... English\n", - "8 British Virgin Islands ... English\n", - "9 Guadeloupe ... French\n", - "10 Martinique ... French\n", - "\n", - "[11 rows x 10 columns]},\n", - " { 'answer': 'Jenkins',\n", - " 'context': NRHP reference number ... County\n", - "0 72000402 ... Wilkes\n", - "1 ... Meriwether\n", - "2 ... Bartow\n", - "3 71000280 ... Jenkins\n", - "4 ... Chatham\n", - "5 89002015 ... Thomas\n", - "6 ... Glynn\n", - "7 75000615 ... Walton\n", - "8 84001156 ... Sumter\n", - "9 79000713 ... Cobb\n", - "10 82002491 ... Twiggs\n", - "11 74000703 ... Taliaferro\n", - "12 80001039 ... Floyd\n", - "13 90000805 ... Gwinnett\n", - "14 73000620 ... Decatur\n", - "15 79000731 ... Houston\n", - "16 95000741 ... Grady\n", - "17 97000559 ... Greene\n", - "18 74000662 ... Brooks\n", - "19 75000616 ... Washington\n", - "\n", - "[20 rows x 4 columns]},\n", - " { 'answer': \"Primula farinosa , the bird's-eye primrose\",\n", - " 'context': \"Primula farinosa , the bird's-eye primrose , is a small \"\n", - " 'perennial plant in the family Primulaceae , native to '\n", - " 'Northern Europe and northern Asia , and '},\n", - " { 'answer': 'Poospiza',\n", - " 'context': 'Poospiza is a genus of finch-like tanagers found in both '\n", - " 'the South American lowlands and the Andes mountains . '\n", - " 'Generally they are arboreal feeders in '},\n", - " { 'answer': 'golden-crowned sparrow',\n", - " 'context': 'The golden-crowned sparrow ( Zonotrichia atricapilla ) is '\n", - " 'a large American sparrow found in the western part of '\n", - " 'North America .'},\n", - " { 'answer': 'Banksia sessilis var . cordata is a variety of Banksia '\n", - " 'sessilis ( Parrot Bush',\n", - " 'context': 'Banksia sessilis var . cordata is a variety of Banksia '\n", - " 'sessilis ( Parrot Bush ) , with unusually large leaves and '\n", - " 'flower heads . It is a rare variety '},\n", - " { 'answer': 'rain',\n", - " 'context': 's and operates hotels at Machu Picchu Natural Reserve , '\n", - " 'the southeastern rain forest of the Amazon in Puerto '\n", - " 'Maldonado , Tambopata , the Sacred Valley'}]\n" - ] - } + "output_type": "execute_result", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUkAAAILCAYAAABl8m5SAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nOzdeVxUdds/8M+wDsMygKzKoiyKILiAgYAL6VOWeodoZqKmZbl0u5Rad7Y899PdbqlZWplkJrmkopm2mOYekqIIskmQICr7wAybzHL9/ujHuSVwRBs4I1zv1+u80DNnvt/rHJjPnP1IiIjAGGOsLQoTsStgjDFjxiHJGGN6cEgyxpgeZmIXwLoHIkJlZSUqKipQV1cHhUIBALhx4wbq6+sBADKZDJaWlgAABwcHWFtbw8nJCT169IBEIhGtdta9cUgygykqKsLFixeRn5+PgoICFBQUID8/H+Xl5aioqIBOp7urdk1MTODk5ARnZ2f4+vrCx8cHPj4+8PX1xYABA+Dl5WXgOWHsvyR8dJvdjYqKChw/fhynT5/G+fPncf78eVRWVgIAXFxc0KdPHyHM3Nzc4OTkBFdXVzg5OcHW1hZyuRwAYGFhAWtrawBAXV0dmpqaAAA1NTVQqVSoqKhAaWkpKioqUFJSIoTvH3/8gbKyMgBAjx49MHjwYAwePBgREREYMWIEnJycRFgqrAtScEiydmlsbMThw4fx008/4ejRo7h48SJMTEwQFBSEIUOGCCE1cOBA2NnZdUpNSqUSFy5cEEL63LlzyMzMhE6nw4ABAzBq1Cg8+OCDGD16NKRSaafUxLocDkl2a3V1ddi/fz+SkpLwww8/oLa2FkOGDMHIkSMxatQoDB8+HPb29mKX2UJNTQ2OHz+Oo0eP4ujRozh//jxsbGzw0EMPIS4uDuPHjxfWXBlrBw5J1lpqaiq++uorJCYmoqamBhEREXj00UcxefJk9OrVS+zy7kh5eTl++OEH7Ny5EwcPHoRUKsUjjzyCmTNnYsyYMWKXx4wfhyT7k1arxa5du/Duu+/i/PnzCAwMxJw5czB9+nQ4OzuLXZ5BlJeXY8uWLUhISEBWVhYGDx6MF154AY8++ihMTU3FLo8ZJwWIdWtqtZoSEhLI39+fTE1NaerUqfTrr7+KXVaHO3XqFD322GNkampKfn5+tHHjRlKr1WKXxYxPFZ9M3o0dPHgQgwYNwvz58zFixAhkZ2dj27ZtGDZsmNildbjIyEhs374dOTk5GDVqFBYsWICBAwfip59+Ers0ZmQ4JLuhK1euYMKECXjwwQfh7++PzMxMbNy4Ef7+/mKX1un8/Pzw+eefIysrCwEBARg7diwmTJiAK1euiF0aMxIckt3Mtm3bEBISgvz8fBw+fBh79uyBn5+f2GWJztfXF7t378Yvv/yC/Px8hISEYOvWrWKXxYwAh2Q30djYiJkzZyI+Ph7Tp09Hamoq7r//frHLMjoxMTFITU3FjBkzMH36dMycORONjY1il8VExEe3u4GKigrExsYiKysLW7duxdixY8Uu6Z7w448/Ytq0aejfvz++/fZbvoqne+JTgLq64uJijBo1CkSE/fv3o3///mKXdE/JycnBuHHjIJFIcPToUXh4eIhdEutcHJJdWWVlJUaMGAETExMcPnwYLi4uYpd0TyovL8fo0aOh0Whw/PhxXqPsXjgku6obN25g5MiRKCsrw8mTJ9GzZ0+xS7qnXbt2DdHR0XB2dsaxY8f4WvDugx/f0FWtWLEC2dnZ+OmnnzggDaBnz544ePAgcnNzsWLFCrHLYZ2IQ7ILOnz4MNasWYOPPvrI4Oc+HjlyBI899hi8vLwglUpha2uL4OBgLFu2DMXFxQbty9j4+fnho48+wpo1a3Do0CGxy2GdRbSLfViH0Ol0FBISQrGxsQZv++WXXyYANHv2bDp//jw1NDRQTU0N/fjjjxQaGkpyuZyOHj1q8H6NzcSJEyk4OJh0Op3YpbCOV8Uh2cUkJSWRRCKh9PR0g7a7b98+AkBPPfVUm68rlUry8/MjJycnqqysNGjfxiYzM5NMTExo9+7dYpfCOh5fu93VfPjhh4iNjUVwcLBB2125ciUA4LXXXmvzdVtbWyxduhQVFRVISEgwaN/GJjAwEBMnTsSaNWvELoV1Ag7JLkShUODUqVN4/PHHDdpufX09kpOT4enpqfd5MpGRkQD+vHEGACxatAgWFhZwc3MTpnn22WdhbW0NiUSCiooKYbxWq8Vrr70GLy8vWFlZISQkBDt27AAAvPfee5DJZLC1tUVZWRmWLl2KXr16Yfjw4ZBIJJBIJPD19cX58+cBALNnz4ZMJoNcLse+ffsMuiyaPf744/j111+FR1awLkzsdVlmODt27CBzc3NSKBQGbTc7O5sAUGhoqN7pSkpKCAD16dNHGBcfH0+urq4tplu5ciUBoPLycmHcsmXLyNLSknbt2kUKhYJWrFhBJiYmdObMGSL67/7QxYsX00cffURxcXGUnZ1NkyZNIlNTU7p69WqLPqZNm0b79u37u7N+SzU1NWRubk7bt2/vsD6YUeDN7a4kNzcXPj4+Bn+kQm1tLQAID++6FQcHhxbTt1djYyPWr1+PiRMnYtKkSbC3t8crr7wCc3NzbNq0qcW077zzDv75z39i9+7dCAgIwPz586HValtMp1QqcebMGTz88MN3VMedsLOzg5+fH3JzczusD2YcOCS7kJKSkhabtoZia2sL4M/nx+jT/CztO72TeW5uLurr6zFgwABhnJWVFdzc3JCTk6P3vffffz/69u2LL774AvT/r4vYvn07pk6d2uF3G3d3d8f169c7tA8mPg7JLkSlUgmBZkheXl4wNzdHaWmp3ulKSkqE6e9EXV0dAOCVV14R9jFKJBIUFhaivr5e73slEgnmzZuHgoICHD58GADw1Vdf4amnnrqjGu6GnZ0dlEplh/fDxMUh2YW4uLgIz6I2JCsrK0RFRaG4uBiXL1++5XQnT54EAMTGxt5R+81rnqtXrwYRtRiSk5Nv+/5Zs2ZBKpVi48aNyM3NhZ2dHby9ve+ohrtRUlICV1fXDu+HiYtDsgtxd3fH1atXO6Ttl156CQDwf//3f22+rlQqsWrVKnh6emLq1KnCeDMzM6jVar1te3p6QiqVIi0t7a5qc3BwwGOPPYa9e/fi/fffx9NPP31X7dypq1evdsjuDWZcOCS7kKFDh+Lq1avIy8szeNsPPPAA3nrrLWzevBmzZs3ChQsX0NjYCKVSiYMHDyImJga1tbXYu3dviwM8fn5+qKqqwt69e6FWq1FeXo7CwsIWbUulUsyePRvbtm3D+vXroVQqodVqUVxc3O59fvPnz8eNGzewf/9+TJgwwaDz3paCggJcuXIF4eHhHd4XE5moB9eZQanVanJwcKA1a9Z0WB/Jyck0bdo08vLyIgsLC5JIJASAPDw8qKqqqtX0lZWVFBMTQ1KplPr06UMLFy6k5cuXEwDy8/OjoqIiIiK6ceMGvfjii+Tl5UVmZmbk7OxMkyZNoszMTHr33XfJysqKAJCnpydt2bKlzdoGDx5ML730UofN+80+/PBDsre3p6ampk7pj4mmim+V1sU8+eSTSElJQUZGBkxMOn5DobKyEqGhoSgsLMRbb70lbJaLYdy4cfj444/Rp0+fDu1Hp9MhJCQEoaGh2Lx5c4f2xUTHt0rral566SXk5uZi165dndJfjx49sHfvXtjZ2eHll1/Gm2++ifr6enTGd+/N+zrT09MhlUo7PCABYNeuXcjOzsby5cs7vC9mBERelWUdYNq0aeTr60sqlarT+jx37hxFRkaSpaUleXl50cGDBzu8z+eee44uXbpEubm5NGTIEMrMzOzwPlUqFfn6+tLjjz/e4X0xo1BlJnZIM8NbvXo1QkJCsGTJEmzcuLFT+hw8eDBOnTrVKX01k8lkCAgIQK9evbBu3ToEBgZ2eJ/PPfccampq8MEHH3R4X8w48D7JLmrv3r2Ii4vDF198gVmzZoldTpfw1VdfYdasWdi9ezcmTpwodjmsc/A+ya4qNjYWK1aswNNPP91hd8LpTvbt24ennnoK//rXvzgguxlek+zi5s+fjy+//BLbt2/HI488InY596Rvv/0WU6dOxRNPPIFPPvkEEolE7JJY5+E1ya5u3bp1mDVrFiZNmsQ3ib0LH374ISZNmoQnnngC69at44DshvjATRdnYmKCTz75BL6+vli6dCkuXLiAtWvXdsiNMLoSlUqFxYsXY/PmzXj33XexbNkysUtiIuHN7W5k//79eOqpp2BtbY0tW7YgKipK7JKM0qlTpzBz5kyoVCp88cUXGD9+vNglMfHw5nZ3Mn78eGRkZCAkJAQjRozAzJkzb3v7s+6ksrISixcvxsiRI+Hv74+0tDQOSMY3uOhuXFxcsHfvXmzZsgVHjhxBv379sHLlytvet7Era2howAcffABfX18kJSVh8+bN+PHHH9GzZ0+xS2NGgDe3u7H6+nq8++67eP/992FjY4PFixfj2Wefve1jGrqKmpoarF+/HmvWrEFtbS2WLl2KF198EdbW1mKXxoyHgkOSoaysDB9++CHWrVsHIsITTzyBOXPmICQkROzSOkR6ejoSEhKEm1MsWLAAS5YsgYuLi8iVMSPEIcn+q6amBp999hk+//xz/P777xg6dChmz56NuLi4e/4O3KWlpdizZw82bdqE3377Df7+/pgzZw7mzp3bbdac2V3hkGStERGOHTuGhIQEJCUlobGxEcOGDcPEiRMxfvx49OvXT+wS2yU3Nxf79+/Hnj17kJycDKlUiri4ODz11FMYOXIkn/PI2oNDkulXX1+PgwcPYs+ePdi/fz+qqqrQs2dPxMTEYOTIkYiOjkbfvn07/MmEt6PT6XDp0iWcPHkSR48exZEjR3Dt2jU4Ojpi/PjxmDhxIh544AHIZDJR62T3HA5J1n4ajQZnzpzBkSNHcPToUZw6dQr19fWwtrZGSEgIBg8ejIEDB8LX1xc+Pj7w9PSEmZlhr1fQaDS4cuUKCgoKkJ+fjwsXLiAtLQ0XLlxAXV0dZDIZoqKiMGrUKMTExGDo0KEGr4F1KxyS7O41NTUhIyMD58+fR1paGtLS0pCRkSE8ZtXc3BxeXl5wd3eHk5MTnJyc4OLiAjs7O9ja2sLMzAympqaws7MDAOHZNhqNBiqVCkqlEmVlZaioqEBFRQVKSkpQWFgo3GzXzs4OwcHBGDRoEAYNGoTBgwcjODgYFhYWoi0T1uVwSDLDq6ioQH5+vrC2d3PQlZWVQaVSoa6uDk1NTUIgAhCC08LCAtbW1rC1tYWLi0uLgG1eS/X19YWTk5PIc8q6AQ5JJr6pU6dCo9F02iMnGLsDfFkiY4zpwyHJGGN6cEgyxpgeHJKMMaYHhyRjjOnBIckYY3pwSDLGmB4ckowxpgeHJGOM6cEhyRhjenBIMsaYHhySjDGmB4ckY4zpwSHJGGN6cEgyxpgeHJKMMaYHhyRjjOnBIckYY3pwSDLGmB4ckowxpgeHJGOM6cEhyRhjenBIMsaYHhySjDGmB4ckYyLT6XRYvXo1IiMjxS6FtYFDkjER5eXlYcSIEXj++edRX18vdjmsDRySrMtqaGjo8LWzv9PHhQsX8K9//Qvz58/HoEGDDFwZMxQOSdZlJSQkoKyszGj7GDhwIHbv3o34+HhYWloauDJmKBySzGgQEVatWoX+/fvD0tISDg4OiI2NRU5OjjDNokWLYGFhATc3N2Hcs88+C2tra0gkElRUVAAAlixZgqVLlyI/Px8SiQR+fn5Yu3YtpFIpXFxcMG/ePLi7u0MqlSIyMhIpKSkG6YN1QcSYyB577DGaNGkSvfbaa2RhYUFbtmyh6upqSk9PpyFDhpCTkxOVlJQI08fHx5Orq2uLNlauXEkAqLy8XBg3adIk8vX1bTHd3LlzydramrKysqixsZEyMzNp6NChZGtrS0VFRQbp426Eh4fTwIED/3Y7zOCqeE2SGQWtVotVq1YhLi4O06dPh1wuR3BwMD799FNUVFRgw4YNBuvLzMxMWFsNDAzE+vXroVKpsGnTJoP1wboODklmFJRKJWpraxEWFtZi/NChQ2FhYdFic9jQwsLCIJPJWmzWM9aMQ5IZBbVaDQCwsbFp9Zq9vT1UKlWH9m9paYny8vIO7YPdmzgkmVEwNzcHgDbDsLq6Gh4eHh3Wt1qt7vA+2L2LQ5IZBTs7O9jY2ODs2bMtxqekpKCpqQmhoaHCODMzM2HN0xCOHj0KIkJERESH9cHuXRySzCiYmppi6dKlSEpKQmJiIpRKJTIyMjB//ny4u7tj7ty5wrR+fn6oqqrC3r17oVarUV5ejsLCwlZtOjo64tq1a7h8+TJUKpUQejqdDgqFAhqNBunp6ViyZAm8vLwwa9Ysg/XBuhCxj68z1nwKkE6no5UrV5K/vz+Zm5uTg4MDTZw4kXJzc1tMX1lZSTExMSSVSqlPnz60cOFCWr58OQEgPz8/4VSec+fOkbe3N1lZWVF0dDSVlJTQ3LlzydzcnHr16kVmZmZkZ2dHsbGxlJ+fb7A+2is5OZmioqLI3d2dABAAcnNzo8jISDp27NjfXKrMQKokRETixjTr7qZOnQqNRoNdu3Z1eF/z5s3Dzp07UVlZ2eF9sS5BwZvbrNvRarVil8DuIRySjBlITk4OJBLJbYepU6eKXSq7AxySrNtYsWIFNm3ahJqaGvTp08fgm/cBAQEgotsO27dvN2i/rGPxPkkmus7cJ8nYHeJ9kowxpg+HJGOM6cEhyRhjenBIMsaYHhySjDGmB4ckY4zpwSHJGGN6cEgyxpgeHJKMMaYHhyRjjOnBlyWyTpWUlNTq2uWMjAwQEUJCQlqMnzJlCiZPntyZ5TH2VwoOSdap0tLSMHjw4HZNe/bs2RaPbWBMBBySrPP17dsXeXl5eqfx9vbG5cuXO6cgxm6Nb3DBOt+MGTOEpyO2xcLCArNnz+7Eihi7NV6TZJ0uPz8f/v7+0Penl5OTg379+nViVYy1idckWefz9fXFoEGDIJFIWr0mkUgQEhLCAcmMBockE8XMmTNhamraaryZmRlmzpwpQkWMtY03t5korl+/Dg8PD+h0uhbjJRIJioqK4OHhIVJljLXAm9tMHO7u7hg+fHiLtUkTExNERkZyQDKjwiHJRDNjxowW/5dIJLypzYwOb24z0VRXV8PFxQVqtRoAYGpqitLSUvTo0UPkyhgT8OY2E4+9vT3Gjh0LMzMzmJqa4sEHH+SAZEaHQ5KJKj4+HlqtFkSE+Ph4scthrBUzsQtg9776+nrcuHFD+FlXV4empiYAgEqlgkajafWe5um1Wi0sLCyg0+mg1Wqxc+dOyGQyWFpatnqPmZkZbG1tAfx5VY61tbXw09LSEjKZrGNnlHVLvE+ym9JqtaioqBAGhUIBpVIJlUoFpVKJmpoaVFdXQ6lUthhUKhUaGhrQ2Ngo/DQmUqkUVlZWwk9bW1vY2dm1GOzt7SGXy2FnZye87uDgACcnJ2Fo6xxO1i3xDS66ErVajdLSUhQVFeH69esoLi5GRUUFysrKUFZW1iIUKyoqWr3fysqqRZg4ODi0ChgbGxthTe92P4H/htZfmZqaws7ODgDw/fffQ6vVYsKECQAApVIJrVbb6j03h/LNa676ftbW1rYK+uYvhOahoaGhVV83B6aTkxNcXFzg4uICJycneHh4wN3dHV5eXnB1ddV7HTq753FI3kvKysqQn5+PgoICXL58GdeuXUNxcTGuXbuGq1evorS0VDg5WyKRwNXVFc7OznB2doarq2urD72zs7Pwf0dHR9E+7Gq1GkQECwsL0fqvqqoSvjzKy8tbfamUlpaivLwc5eXlKC0tFa47NzExgaurK3r16oWePXvC09MT7u7u6N27N3x9feHj4wMXFxdR5osZBIekMSEiXL58Gbm5uSgoKBCG5mCsra0F8Of+OC8vL/Ts2RNeXl5wd3dHr169WnxQ3dzceA2ng6jVapSUlODKlSvCF9TVq1dx7do1YVxRUZGwX9bGxgY+Pj5CaDb/u1+/fvD29m7zGnZmNDgkxXLt2jVkZWUhMzNT+HnhwgUhCB0cHIQP1F8Hb29v3md2D1AoFC2+7P46AH9+4fn5+SEoKAiBgYHCz4CAAP4dGwcOyY6mVqtx8eJFpKam4uzZs0hNTUVmZqawH8zLywv9+/dHUFCQ8DMgIAAODg4iV846kkKhQHZ2NrKyspCdnY2LFy8iJycHRUVFAP7cPxwUFITQ0FCEhYUhNDQUAwYM4K2DzschaUhEhOzsbCQnJyM1NRWpqam4cOECbty4AWtrawwePBihoaEICQnBgAEDEBAQIBy8YAz486BVTk4OLl68iPT0dKSmpuL8+fOoq6uDpaUlBg4ciNDQUISGhmLYsGHo378/b653LA7Jv0Or1SInJwenTp3CyZMnceTIERQXF8Pc3Bz+/v7CH3NoaCjuu+8+0Q5MsHtb899Z8xdvamoq0tLSUFdXBzs7O9x3330YM2YMoqKiEB4ezmubhsUheafy8vKwf/9+HDp0CCdPnoRSqYSjoyOio6MxYsQIDB8+HEOGDIGZGZ+nzzqORqPBuXPncOLECRw/fhwnT55EVVUV5HI5oqOjMWbMGIwbNw7+/v5il3qv45C8HbVajRMnTuD777/Hd999h0uXLsHBwQFjxozBiBEjMHLkSAQFBcHEhK/wZOLR6XTIzMzEsWPHcPz4cRw6dAgKhQJ9+/bFhAkTMG7cOERHR/Na5p3jkGyLRqPBwYMH8fXXX+PAgQOoqalBYGAgxo0bh3HjxiEqKorXFJlR02g0OHXqFA4cOID9+/cjOzsbcrkc48aNQ3x8PB544AH+G24fDsmbnTt3Dlu2bMG2bdtQVlaGqKgoTJkyBePGjYOPj4/Y5TF21woKCnDgwAF88803OHXqFFxcXPD4449jxowZGDJkiNjlGTMOycbGRnz11VdYu3YtMjMz4e/vj+nTp2P69OkcjKxLKigoQGJiIhITE5GXl4egoCAsXLgQTzzxBKRSqdjlGZvuG5JVVVX45JNP8NFHH6GmpgYzZszAk08+iYiICLFLY6zTnD59Gl988QW2bNkCuVyOhQsXYv78+XB0dBS7NGPR/UJSpVLhjTfewPr162Fubo758+dj4cKFcHNzE7s0xkRTWlqKtWvX4pNPPoFarcb8+fPxyiuv8Hm83S0kt2/fjqVLl+LGjRt4+eWX8fTTT8PGxkbsshgzGrW1tdi4cSPeeOMNWFpa4v3338fjjz8udlliUoC6gby8PIqJiSETExN6+umnqaKiolP7f+yxxwhAu4bvvvtOb1thYWFkYmJCAwcO1DvdypUrydnZmQDQJ5980ul1/h27du2iPn36tOrT0tKSevfuTbNnz6aCgoIO65+I6MCBA2RnZ0f79u3r0H6MVUVFBT3zzDNkYmJCMTExlJeXJ3ZJYqnq8if3ffvttwgLC4NCoUBycjI2bNggynNUDh48iOrqaqjValy/fh0A8I9//ANNTU2oq6tDWVkZnn766du2c+bMGcTExNx2umXLluHXX38Vrc6/Y9KkSSgoKICvry/kcjmICFqtFkVFRXj99dexY8cOREREoLKyssNqoO6zgdWmHj164LPPPsPp06dRXV2NsLAw7Nu3T+yyRNGlQ3LTpk2YNGkSHn30USQnJ+O+++4TpQ6JRIKoqCjI5fIW56ZJJBKYm5tDJpPB2dkZoaGhd9TmvVBnezQ0NCAyMlLvNCYmJnBxccGMGTPwz3/+E2VlZTh06JDB+2k2btw41NTUCDcC7q6GDh2K5ORkTJkyBXFxcfjiiy/ELqnTddmzSb/99lvMmTMHK1aswH/+8x9Ra9m2bVu7pps7d2672+yIKyc6os72SEhIQFlZWbun9/PzAwCUlJR0aD/sT5aWltiwYQPc3NwwZ84cODo6IjY2VuyyOk2XXJO8evUqZs6ciTlz5ogekHfjxIkTCAwMhFwuh1QqRXBwMH766acW0/z+++8ICAiAtbU1rKysMHz4cJw8efK2bWu1Wrz22mvw8vKClZUVQkJCsGPHjruqU19bX375JWxsbCCRSODg4IC9e/fi7Nmzwr0wp02bBgBYsmQJli5divz8fEgkEiEA9cnLywMADBw4sN31tNXPe++9B5lMBltbW5SVlWHp0qXo1asXEhIS4OXlBYlEgo8//rhd7TffjcfExAShoaGor68HALzwwgvC7/HLL7+8bTu3qik3N/eufkeG9Prrr+OZZ57BzJkzUVxcLHY5nUfsvaId4cknnyQfHx9qbGwUu5Q2Xb9+nQDQI4880ubrO3fupH//+99UVVVFlZWVFBERQT169BBeHz16NPn4+NAff/xBarWaLl68SOHh4SSVSunSpUvCdHl5ea0O3CxbtowsLS1p165dpFAoaMWKFWRiYkJnzpy54zpv11ZWVhbJZDJ64oknhPe89NJLtHHjxhbtTJo0iXx9fVu17+vrS3K5XPi/QqGgL7/8kmQyGY0bN+6O62mrn5dffpkA0OLFi+mjjz6iuLg4ys7OpitXrhAA+uijj9rVvkajod69e5OXlxdpNJoWfTz33HO0evXqdtd5q5qMQWNjI/n6+tKsWbPELqWzVHW5kKytrSVra2v69NNPxS7llm4XPn/11ltvEQAqKysjoj9D8q9Ht9PT0wkALVu2TBj315BsaGggmUxGU6dOFaapr68nS0tLWrBgwR3V2d62PvvsMwJAiYmJtHXrVnr++edbtaUvJPGXI9wSiYTeeOMNampquuN69IVkQ0NDi/F/Dcn2tL969WoCQN98840wTV1dHXl5eVFNTU2727lVTcZiw4YNJJPJSKVSiV1KZ+h6R7cvXryIuro6jB07VuxSDKZ5/2NbTxBsFhwcDLlcjvT09FtOk5ubi/r6egwYMEAYZ2VlBTc3N+Tk5NxRTe1t65lnnsHkyZMxb948fPPNN3jvvffuqJ/mo9tEhOXLl4OIIJfLW+2TNeS8taU97c+ZMwdyuRxr1qwRpklMTERsbKxwUnZH19kZxo4di/r6er1/a11JlwvJ6upqAIC9vb3Ildy9AwcOYNSoUXB2doalpSVeeI6W50MAACAASURBVOGFdr3P3NwcarX6lq/X1dUBAF555RVIJBJhKCwsFPahtdedtPXmm2+itrb2bx80efXVV+Hm5oYVK1bgypUrd13P3WhP+zY2NnjmmWfw66+/4rfffgMAfPLJJ1i0aFGn1dkZmi9ZbP6sdXVdLiQ9PDwAAPn5+SJXcneKioowceJEuLm5ISUlBTU1NXj33Xdv+z6NRoOqqip4eXndchpnZ2cAwOrVq4W1s+YhOTn5jupsb1tqtRqLFy/GqlWrkJycjDfeeOOO+rmZra0t3nnnHahUKixYsKDD5q0t7W1/0aJFMDc3x+rVq3H8+HF4enrC19e30+rsDM0Hzjw9PUWupHN0uZAMDAyEt7c3vv76a7FLuSsZGRlQq9VYsGABfHx8IJVK23VO5JEjR6DT6fTe9srT0xNSqRRpaWl/u872trVw4UI8/fTTeO655/D888/jP//5z98Kg5kzZyI8PBz79+/HN998c8f13K32tu/h4YEpU6Zg165dePXVV7FkyZK7aseYff311/Dy8mqxy6Ar63IhKZFI8Nxzz2H9+vW4dOmS2OXcseY1wUOHDqGxsRF5eXlISUlpNV1TUxNqamqE2/gvWrQI3t7emDVr1i3blkqlmD17NrZt24b169dDqVRCq9WiuLhYuLqmvdrT1rp169CrVy/ExcUBAN566y0EBgYiPj4eSqVSaMvR0RHXrl3D5cuXoVKp9O4ykEgkWLt2LSQSCRYtWgSFQtHueu6kn7+z7JYuXQqNRgOFQoH777//rtsxRnl5efj444+xZMmS7vMAsk48StRpmpqaKCwsjIKCgqi6ulrscgRKpZJGjBhBjo6OBIBMTEzIz8+P3nzzzRbTvfjii+To6Ej29vb06KOP0scff0wAyNfXl4qKimjTpk0UExNDLi4uZGZmRj169KDHH3+cCgsLhTY++OADcnV1JQBkbW1NcXFxRER048YNevHFF8nLy4vMzMzI2dmZJk2aRJmZmXdcp762JkyYQBKJhBwdHenXX38loj9PhTExMSEAJJfL6ezZs0REdO7cOfL29iYrKyuKjo6mpKQk6tu3r3BEu2fPnjRv3rwWfc+aNYsAkL29Pb399tvtmre/9vP888+TlZUVASBPT0/asmULERF99NFH5ObmRgBIJpPRP/7xj3Yvu2YxMTGtTnVqz3J7991326zJGFRXV9OAAQMoLCys1dkFXVhVl70LUHFxMYYNGwY3Nzf88MMPcHJyErskxu5ZFRUVeOihh3D9+nUkJyd3m/2RABRdbnO7mYeHB44dO4bKykqEhYXh7NmzYpfE2D3p7NmzCAsLQ1VVlXAwqjvpsiEJAD4+Pjhz5gz69++PYcOGYfHixS32hTHGbq2+vh7//ve/ERUVBR8fHyQnJ3fLR5p06ZAE/rzl0/fff4+EhARs27YNAQEB+Oqrr7r9rbAY0+e7775D//79sXbtWrz33nv4+eef4eLiInZZoujyIQn8eUR05syZyM7Oxvjx4zF79mwMGzYMu3fvhk6nE7s8xoyCTqdDUlIShg0bhtjYWDzwwAO4dOkSFi9eDFNTU7HLE023CMlmPXr0wIYNG5CSkoJevXphypQp6NevHz799FM0NDSIXR5jomhoaMCnn36KgIAAPProo3B3d8fp06fx+eef8wFPAF326HZ75OXl4YMPPsDmzZtha2uLadOmYfr06QgLCxO7NMY63NmzZ5GYmIitW7dCpVJh5syZWLp0Kfr27St2acakez0I7FbKysqwYcMGJCYmIjc3F4GBgZg+fTri4+P1XubH2L2mqKgIX3/9NRITE5GVlYV+/fph+vTpePrpp+Hq6ip2ecaIQ/KvUlJSkJiYiO3bt6OqqgoREREYP348Hn744VY3eWXsXnDhwgUcOHAABw4cwOnTp+Ho6IipU6di+vTpCA8PF7s8Y8cheStqtRo//vgj9u7di++//x4lJSXw9PTEuHHjMG7cONx///2QyWRil8lYK/X19fjll1+EYLxy5Qrc3Nzw8MMPIzY2FmPHju2Qx390URyS7ZWZmYn9+/fju+++Q3JyMkxMTDBw4EBERUUhOjoaY8aMgYODg9hlsm6orq4OycnJOHnyJE6dOoWTJ0+isbERgYGBmDBhAsaPH4/IyEiYmHSr47SGwiF5N65fv45Dhw7h+PHjOHHiBHJzc2FmZoZBgwZhxIgRiI6ORlhYWLe7MoF1jitXriA1NRUnTpzAiRMncP78eWg0GvTr1w/Dhw/HiBEjMGbMGLi7u4tdalfAIWkIJSUlQmAeO3YMmZmZ0Ol0cHFxQWhoqDCEhYUJ97tkrD2aA/HmoaysDCYmJggMDMTIkSMxYsQIjBgxAm5ubmKX2xVxSHYEpVKJ8+fPt/jDzsvLg06ng6urK0JCQhAUFITAwEBh4E317k2hUCArKwuZmZnIzs5GZmYm0tPTUVpaChMTE/j7+7f4wh08eLDwSAjWoTgkO4tSqcS5c+eQmpqKixcvCh+G2tpaAIC7uzuCgoLQv39/9O/fH76+vvDx8YG3tzfvZO8i1Go1CgsLUVBQgPz8fGRnZyMrKwtZWVnCvSRtbW0REBCAoKAgBAcHcyCKj0NSbIWFhcjOzsbFixeRk5ODixcvIjc3V3h+iKmpqfAIAB8fH2Ho3bs3PDw84ObmxjvkjYROp0NJSQmKi4tx+fJlFBQUCIFYUFCAK1euCA9zs7e3R79+/TBgwAAEBARgwIAB6N+/P7y9vUWeC/YXHJLGqqqqSviQ/XW4cuUKNBoNAMDMzAyurq7w8vKCu7s7PDw84OHhAXd3d3h5ecHZ2RlOTk5wcnLqPneSNjAiQkVFBSoqKlBeXo6ioiJcv34dxcXFwh3Fi4qKUFpa2uL34unp2eKL7eah+WFazOhxSN6L1Go1iouLcfXqVRQXF+PatWu4cuUKrl27hqtXr+LKlSu4fv16i8cTmJiYCGHZPLi4uMDZ2Rk9evSAvb095HI57OzsIJfLYW9vDzs7O9jZ2cHS0lLEuTWcGzduQKlUQqlUorq6GjU1NVAqlaipqUF1dTUqKytRXl6OsrIyIRSbh5tvhGJubg53d3d4enqiV69e6NmzJzw9PdGzZ094eHigV69e8PDw4N0kXQOHZFdFRCgtLUV5eTkqKipQVlYm/LutMKipqbnlI00tLS2F8JTL5TAzM4Otre0tf5qbm8PGxkZ4v7W1NSwsLFq1a2Nj0ypI1Gq1sJ/2Zk1NTcLjWAGgtrZWmPbmnyqVChqNRvhZU1MjhOGNGzfanD+ZTAa5XN7ml4eTkxOcnZ3h4uIivO7m5sZr5d0HhyT7r5tDpbq6Wljrunmorq6GVquFUqkUwuivYfXXQKupqWnzlnTV1dVt3tezrSP9JiYmkMvlwv+bg7c5aJt/Noe1nZ0dTE1NW6wR3zw0rzk3hz5jt8AhycQ3depUaDQa7Nq1S+xSGPurrvuMG8YYMwQOScYY04NDkjHG9OCQZIwxPTgkGWNMDw5JxhjTg0OSMcb04JBkjDE9OCQZY0wPDknGGNODQ5IxxvTgkGSMMT04JBljTA8OScYY04NDkjHG9OCQZIwxPTgkGWNMDw5JxhjTg0OSMcb04JBkjDE9OCQZY0wPDknGGNODQ5IxxvTgkGSMMT04JBkTmU6nw+rVqxEZGSl2KawNHJKMiSgvLw8jRozA888/j/r6erHLYW3gkGRdVkNDQ4evnf2dPi5cuIB//etfmD9/PgYNGmTgypihcEiyLishIQFlZWVG28fAgQOxe/duxMfHw9LS0sCVMUPhkGRGg4iwatUq9O/fH5aWlnBwcEBsbCxycnKEaRYtWgQLCwu4ubkJ45599llYW1tDIpGgoqICALBkyRIsXboU+fn5kEgk8PPzw9q1ayGVSuHi4oJ58+bB3d0dUqkUkZGRSElJMUgfrAsixkT22GOP0aRJk+i1114jCwsL2rJlC1VXV1N6ejoNGTKEnJycqKSkRJg+Pj6eXF1dW7SxcuVKAkDl5eXCuEmTJpGvr2+L6ebOnUvW1taUlZVFjY2NlJmZSUOHDiVbW1sqKioySB93Izw8nAYOHPi322EGV8VrkswoaLVarFq1CnFxcZg+fTrkcjmCg4Px6aefoqKiAhs2bDBYX2ZmZsLaamBgINavXw+VSoVNmzYZrA/WdXBIMqOgVCpRW1uLsLCwFuOHDh0KCwuLFpvDhhYWFgaZTNZis56xZhySzCio1WoAgI2NTavX7O3toVKpOrR/S0tLlJeXd2gf7N7EIcmMgrm5OQC0GYbV1dXw8PDosL7VanWH98HuXRySzCjY2dnBxsYGZ8+ebTE+JSUFTU1NCA0NFcaZmZkJa56GcPToURARIiIiOqwPdu/ikGRGwdTUFEuXLkVSUhISExOhVCqRkZGB+fPnw93dHXPnzhWm9fPzQ1VVFfbu3Qu1Wo3y8nIUFha2atPR0RHXrl3D5cuXoVKphNDT6XRQKBTQaDRIT0/HkiVL4OXlhVmzZhmsD9aFiH18nbHmU4B0Oh2tXLmS/P39ydzcnBwcHGjixImUm5vbYvrKykqKiYkhqVRKffr0oYULF9Ly5csJAPn5+Qmn8pw7d468vb3JysqKoqOjqaSkhObOnUvm5ubUq1cvMjMzIzs7O4qNjaX8/HyD9dFeycnJFBUVRe7u7gSAAJCbmxtFRkbSsWPH/uZSZQZSJSEiEjemWXc3depUaDQa7Nq1q8P7mjdvHnbu3InKysoO74t1CQre3GbdjlarFbsEdg/hkGTMQHJyciCRSG47TJ06VexS2R3gkGTdxooVK7Bp0ybU1NSgT58+Bt+8DwgIABHddti+fbtB+2Udi/dJMtF15j5Jxu4Q75NkjDF9OCQZY0wPDknGGNODQ5IxxvTgkGSMMT04JBljTA8OScYY04NDkjHG9OCQZIwxPTgkGWNMD74skXWqpKSkVtcuZ2RkgIgQEhLSYvyUKVMwefLkziyPsb9ScEiyTpWWlobBgwe3a9qzZ8+2eGwDYyLgkGSdr2/fvsjLy9M7jbe3Ny5fvtw5BTF2a3yDC9b5ZsyYITwdsS0WFhaYPXt2J1bE2K3xmiTrdPn5+fD394e+P72cnBz069evE6tirE28Jsk6n6+vLwYNGgSJRNLqNYlEgpCQEA5IZjQ4JJkoZs6cCVNT01bjzczMMHPmTBEqYqxtvLnNRHH9+nV4eHhAp9O1GC+RSFBUVAQPDw+RKmOsBd7cZuJwd3fH8OHDW6xNmpiYIDIykgOSGRUOSSaaGTNmtPi/RCLhTW1mdHhzm4mmuroaLi4uUKvVAABTU1OUlpaiR48eIlfGmIA3t5l47O3tMXbsWJiZmcHU1BQPPvggByQzOhySTFTx8fHQarUgIsTHx4tdDmOtmIldAOsampqaUFdXh4aGBjQ2NqK2tlbYjFYoFK2m12q1UCqV0Gq1sLCwgE6ng1arxc6dO2FnZ9fm6UH29vaQSCQwNzeHjY0NpFIprKysYG1tDQsLiw6fR9Y98T5Jhvr6epSVlaGkpARVVVWorq5GTU0NampqUF1dDYVCIfy7efyNGzegUCiEsDMGzeHq4OAAS0tLyOVyyOVy2NvbQy6Xw8HBQfh383hHR0e4ubnBxcUFMplM7FlgxodvcNGVVVRU4MqVKyguLkZhYaEQhKWlpSgvL0dpaSlKS0tRV1fX4n1mZmZCmNjb28PBwaFF2Njb28PS0hL29vYwNTWFnZ0dLCwsYG1t3ebana2tLczMWm+0NK8Zfv/999BqtZgwYQKICNXV1a2m1Wg0UKlUAP671trY2IiGhgbU1dWhqakJKpUKGo0G1dXVuHHjhhDqzT8VCkWLcRqNpkUf1tbWcHV1haurK5ydneHq6ioEqLe3Nzw8PODp6QknJydD/YqY8eOQvJeVlZXh0qVLyMvLw+XLl1FUVCSEYlFRERoaGoRpnZyc4ObmJnzwnZ2d4eLi0uLf7u7ucHR0hLW1dafOh1qtBhF1+iZzXV0dqqqqcP36dZSVlaG8vBwlJSUt/l1aWoqSkhJUVFQI77OysoKXlxc8PT3h4eEBb29veHt7w9/fH3379oWLi0unzgfrUBySxq6urg5ZWVm4dOmSEIjNQ01NDYA/P7Q+Pj7Ch9bT01NY8/Hw8ICXlxesrKxEnpN7W0NDA4qKilBcXCysmd/8hfTHH38IX0pyuRx+fn7w9/cXgrNv374ICgrq9C8g9rdxSBqTa9euITU1FVlZWcjMzERqaipyc3Oh1Wphbm4OT09P+Pj4wMfHB4GBgQgKCoKPjw969+4NExM+UUFsCoUCBQUFyMzMRFZWFgoKClBQUIDs7GzU19cD+PNKo9DQUAQFBSEwMBChoaEICAho80AVMwockmIpLCzE6dOnkZKSgpSUFFy4cAF1dXUwNTWFr68vQkJCEBwcjODgYISEhKB37978QbpHabVa/PHHH0hPT0dGRgYyMjKQnp6O/Px86HQ6WFtbY+DAgQgPD0d4eDgiIiLg7e0tdtnsTxySnaGpqQmnT5/Gr7/+KgRjSUkJzMzMEBISgoiICAwZMgQDBw5EYGAgH2XtJurr65GZmYn09HScO3cOp0+fRnp6OjQaDdzc3ITAjIyMREREBJ/mJA4OyY6g0+mQnZ2NU6dO4dChQ/jpp5+gVCqFTa3Q0FBER0cjMjKSA5G1oFarkZ6ejpMnTyI1NRWnTp1CQUEBrKysEBUVhTFjxiAqKgrh4eF67+7ODIZD0lAqKyuxb98+7N+/H0ePHkVVVRVcXFxw//33Y/To0bj//vvh4+MjdpnsHvTHH3/g8OHDOHz4MH755ReUlZXB0dERo0aNwvjx4/GPf/yDL+fsOBySf8fVq1exd+9eJCUl4fjx4zAzM8P999+P//mf/8Ho0aMxYMCANu++zdjdIiJcvHgRhw8fxs8//4xffvkFGo0GI0eOxMSJExEbG4tevXqJXWZXwiF5p5RKJbZt24Yvv/wSKSkpsLGxwcMPP4y4uDg89NBDsLW1FbtE1o2oVCr88MMP2L17N3744QfU1tYiIiICTzzxBB5//HHY2dmJXeK9jkOyvX799Vds3LgR33zzDXQ6HSZPnowpU6ZgzJgxkEqlYpfHGBobG3Ho0CHs2LEDu3fvhomJCaZMmYI5c+YgMjJS7PLuVRyS+mg0GmzduhXvvfceMjMzMWjQIMyZMwfx8fGwt7cXuzzGbqm6uhpff/01Nm7ciLS0NAQFBeGFF17AtGnT2rxElN0Sh2RbNBoNEhMT8eabb+Ly5cuYPn06nn32WYSFhYldGmN37OzZs1i3bh0SExPRu3dvvPzyy5g+fTqHZfsoQKyF3bt3k4+PD5mbm9OTTz5J+fn5YpfEmEHk5+fTk08+Sebm5uTj40O7d+8Wu6R7QRVfy/b/lZSUYPLkyZg8eTKio6ORm5uLhIQEPm2HdRk+Pj5ISEhAbm4uoqOjhb/3kpISsUszahySALZu3YqgoCCcO3cOP/30EzZv3ow+ffp0eL+7d++Gj48PJBJJi0EqlaJPnz548skn8ccff3R4HXfqVnVbWFjAxcUFo0aNwsqVK9u82S4TX58+fbB582YcPHgQ586dQ1BQELZu3Sp2WcZL7HVZMWm1Wlq2bBlJJBJauHAh1dbWilKHr68vyeVyoabS0lL66quvSCaTkYuLC1VUVIhS1+3cXLdOpyOFQkFHjhyhWbNmkUQiIXd3dzpz5ozIVTJ9amtradGiRSSRSGj58uWk0+nELsnYVHXbkNTpdDR//nyytLSkxMREUWu5OWxu9sILLxAA2r59e4f2X19fT8OGDbvj992qbiKinTt3komJCbm4uFB1dfXfLfGedrfLtzNt2bKFLC0tacGCBRyULXXffZLvv/8+Pv/8c2zfvt1oH0Dl5+cHAB2+zyghIQFlZWUGbXPy5MmYNWsWysrK8Omnnxq07XtNRyxfQ5s+fTq2b9+Ozz//HB988IHY5RgXsWNaDOnp6WRubk4rV64UuxQiuvUa2fLlywkAHTlypMV4nU5HH3zwAQUEBJCFhQXZ29vTI488QtnZ2cI0CxcuJHNzc3J1dRXGLViwgGQyGQGg8vJyIiJavHgxWVhYEAACQL6+vkREpNFo6NVXXyVPT0+SSqUUHBzcao1W35okEdHx48cJAI0cOfKOam/21VdfUWhoKFlaWpJMJiNvb296/fXX2z1vq1evJplMRhKJhIYMGUIuLi5kZmZGMpmMBg8eTNHR0eTh4UGWlpYkl8tp+fLlLfrXtwzWrVtHMpmMrKysaO/evTR27FiytbWlXr160datW4U2brV8jx49SkOHDiUrKyuytbWlAQMGUE1NzS2XZWd5//33ydzcnNLT08UuxVh0z83tiRMnUmhoqNFsVvw1bBQKBX355Zckk8lo3LhxraZ/7bXXyMLCgrZs2ULV1dWUnp5OQ4YMIScnJyopKRGmi4+PbxEkREQrV65sESRERJMmTRI+vM2WLVtGlpaWtGvXLlIoFLRixQoyMTFpsY/xdiGpVCoJAHl6et5x7atXryYA9Pbbb1NlZSVVVVXRZ599RvHx8Xc0b//7v/9LACglJYXq6uqooqKCxo4dSwDowIEDVF5eTnV1dbRo0SICQGlpae1eBi+//DIBoMOHD1NNTQ2VlZXR8OHDydrampqamm65fGtra8nOzo7effddamhooJKSEoqLi2tRt1h0Oh2FhoZSbGys2KUYi+4XkgqFgiwsLOjrr78WuxSBr6+vsKbRPEgkEnrjjTdafNiI/ty/ZWNjQ1OnTm0x/rfffiMA9Prrrwvj7jYkGxoaSCaTteijvr5e2Gd1c936QpKISCKRkL29/R3V3tTURPb29hQTE9NiOo1GQ2vWrLmjeWsOSZVKJYzbvHkzAaCMjIxWNTSvKbZnGTSHZENDgzDNunXrCAD9/vvvwri/Lt+LFy8SANq/f7/eZSeWrVu3krm5OVVVVYldijHofvskMzIy0NTUhJiYGLFLaUEul4OIQERYvnw5iAhyubzVPQMzMzNRW1vb6uqfoUOHwsLCAikpKX+7ltzcXNTX12PAgAHCOCsrK7i5uSEnJ6fd7dTV1YGIhJsstLf29PR0VFdX48EHH2wxnampKRYvXny3syVovnntzU9LbF7Ozc8Kv9tl0Nx2cztt8fHxgYuLC6ZPn45///vfuHz58l3PS0eIiYkR7mvJuuF5ks0PzzLma69fffVVuLm5YcWKFbhy5UqL15oft2pjY9Pqffb29sJjV/+O5kfMvvLKKy3OgywsLBSe1dIely5dAgAEBAQAaH/tzc/xFvN3ZKhl0BYrKyv88ssviI6OxptvvgkfHx9MnTq1xdMtxeTg4AAAbT7atzvqdiHZs2dPADDKk7Sb2dra4p133oFKpcKCBQtavNYcHG2FYXV1NTw8PP52/87OzgCA1atXC2u3zUNycnK72/nxxx8BAA899BCA9tfe/Du6+TGunc1Qy+BWgoKC8N133+HatWt48cUXsWPHDrz//vt/u11DKCgoAACD/C11Bd0uJAcOHAhnZ2ckJSWJXYpeM2fORHh4OPbv349vvvlGGD9gwADY2Njg7NmzLaZPSUlBU1MTQkNDhXFmZmZ6N/tuxdPTE1KpFGlpaXddf0lJCVavXg0PDw88+eSTd1R779694ejoiIMHD96y/budt/YyxDK4lWvXriErKwvAn2H89ttvY8iQIcI4sSUlJcHJyQmDBg0SuxSj0O1C0tTUFHPnzsXq1atFXVO5HYlEgrVr10IikWDRokXCJX5SqRRLly5FUlISEhMToVQqkZGRgfnz58Pd3R1z584V2vDz80NVVRX27t0LtVqN8vJyFBYWturL0dER165dw+XLl6FSqWBqaorZs2dj27ZtWL9+PZRKJbRaLYqLi3H9+vUW7yUi1NbWQqfTgYhQXl6OHTt2ICoqCqampti7d6+wT7K9tVtaWmLFihU4fvw4Fi1ahKtXr0Kn00GlUglB0t55u1tSqbTdy+B2/rp8CwsLMW/ePOTk5KCpqQnnz59HYWEhIiIiDFb/3aqoqMCqVaswd+5cfjpnMzEOF4mturqavL296eGHHyaNRiNaHadOnaK+ffsKR7R79uxJ8+bNazHNrFmzCADZ29vT22+/TUR/nqaxcuVK8vf3J3Nzc3JwcKCJEydSbm5ui/dWVlZSTEwMSaVS6tOnDy1cuFA499LPz4+KioqIiOjcuXPk7e1NVlZWFB0dTSUlJXTjxg168cUXycvLi8zMzMjZ2ZkmTZpEmZmZtG/fPgoJCSGZTEYWFhZkYmIiHJG3t7en++67j15//XWqrKxsNc/trZ2I6OOPP6bg4GCSSqUklUpp8ODBtG7dunbP25o1a4RzJ3v37k0nTpygd955h+RyOQEgV1dX+vrrr2n79u3k6upKAMjBwYG2bdtGRKR3GTSfJwmA/P39KT8/nzZs2EB2dnYEgLy9venSpUttLt+UlBSKjIwkBwcHMjU1pZ49e9LLL78s6t8i0Z9nDzz88MPk7e1tFOdsGomqbns/yZSUFMTExOCxxx5DQkICTEy63Uo1YwKtVos5c+Zgx44dOHLkCMLDw8UuyVgoum0yhIeHY8+ePdi+fTtiY2MNclSYsXuRSqXCxIkTsX37duzZs4cD8i+6bUgCwIMPPogjR47gzJkzCAkJwc8//yx2SYx1qhMnTiA0NBTJyck4ePBgq3NTWTcPSQCIiIhAWloahg4digceeABTpkxBVVWV2GUx1qFqamowd+5cjBw5Ev369UNaWhqGDx8udllGqdvuk2zL7t278eyzzwIAli9fjnnz5sHa2lrkqhgznLq6Onz66adYuXIlAODjjz/G5MmTRa7KqPGDwP6qqqoK77zzDtavXw9ra2ssXboUCxYsaPMqEcbuFbW1tVi/fj0++OAD1NXVYf78+XjppZfg6OgodmnGjkPyVsrLy7Fq1SqsW7cOFhYWmDVrKC3H3AAAIABJREFUFubMmSNcYsfYvSA7OxsbN27E5s2b0dTUhGeffRbPP/+8cEURuy0OyduprKzEJ598goSEBFy+fBnR0dGYM2cOHn30UchkMrHLY6yV+vp67Ny5Exs3bsTJkyfRu3dvPPXUU5g/fz569Oghdnn3Gg7J9tLpdDh06BA2btyIb7/9FlZWVhg/fjwmTpyIhx56iAOTiaq+vh4//PAD9uzZg++++w6NjY145JFHMGfOHIwZM4bPA757HJJ3o7y8HFu3bkVSUhJOnToFCwsLPPjgg4iLi8P48eOFu6gw1pEUCgW+++477NmzBz/99BOampoQFRWFuLg4TJs2jTepDYND8u+qrKzEgQMHsHPnTvz888/QaDQYNGgQxowZgzFjxiA6OhpSqVTsMlkXoNFocOHCBRw6dAiHDh3CsWPHoNPpEBERgUcffRRTpkyBu7u72GV2NRyShlRTU4Off/4Zhw8fxuHDh5GXlwcrKytERUVh9OjRiI6ORmhoKKysrMQuld0DGhoakJqaipMnT+Lw4cM4deoUGhoa4O/vj9GjR2P06NF44IEHhBuIsA7BIdmRioqKhMD85ZdfcP36dZibmyMkJATh4eHC0LdvX0gkErHLZSIiIly6dAkpKSn47bffcPr0aaSnp0OtVsPNzU0IxdGjR8PLy0vscrsTDsnO9Mcff+D06dPCB+HcuXO4ceMGHB0dMWTIEISEhCA4OBghISEIDAzkzfQuqrGxEZmZmcjIyEBGRgbS09ORmpoKhUIBS0tLDBkyBPfddx/Cw8MRERGBPn36iF1yd8YhKaampiakpaUhJSUFaWlpSE9PR2ZmJhoaGmBmZgZ/f38hNPv16wd/f3/4+/vzkfR7RF1dHfLy8vD7778jJycH6enpyMjIQF5eHrRaLaysrBAUFISQkBAMGjQI4eHhGDRokPCcHGYUOCSNjVarxe+//y58oJrXNAoLC6HVagEAvXr1EgLTz+//sXffYVFc7d/AvwtbYCm7KFVpKiJiQQUeC9hjYi+YGB8RSwwaTSyJmsQUo09s0aiJxhpjw4olRlGxxGjQKAYEISLFQjG0pS+wlGXv9w9/zCuCBWWZBc7nuvZSdoeZL7Oz95yZnTnHCW3btkWbNm1ga2vL7qCoZzk5OUhJScGDBw+QkJBQ5ZGamgoA0NPTg4ODQ5Ujhc6dO8PJyYl1bKv7WJFsKMrKyvDgwQPEx8dzH8J79+4hISEBKSkpqHwbjYyM4ODgADs7O9jZ2cHW1haOjo6wtbWFjY0NLCws2KUhL0mhUEChUCAtLQ2PHj1CYmIiUlJS8OjRI6SkpCApKYkbMEwgEMDOzq7KjsvJyQnOzs5o3bo1JBIJz38N84pYkWwMSkpK8PDhQ+7Dm5ycjOTkZKSkpHA/PzkSn1Ao5IqltbU1LC0tYWFhASsrKzRv3hxyubzKQyaTQS6XVxvetqEoLy9HXl4e8vPzkZeXV+WRnZ2NjIwMKBQKZGZmIj09nSuOTw45a2hoCHt7e27nY29vz/1sa2uLVq1asXPIjRMrkk2FQqFARkZGtUKQnp6OzMxM7vWcnBxu2N2nGRkZcQXT0NAQJiYmEAqFkMvl0NPTg1wuh1AohImJCcRicZUelGq6wN7Q0LBaYSkpKalxaNXKMX6Ax+f6ysrKoFQqoVarkZeXB41Gg7y8PO7nwsJCFBcXIz8/n2vtPU0mk6FZs2awsrLidhpPtrYrdyCVrzNNEiuSTHVEVKW1VdkCe7IlplKpcPr0aZiYmMDe3h4VFRVckVIqlVWKXeVzT6ssck+qLLJPqyzIwP8vrsbGxhCJRJDL5dDX14dMJoNIJMKVK1eQlpaGBQsWwMbGhivslf9WPthlV8xLYEWSeTVHjhzBu+++i+PHj2P06NF8x6kiIyMDHh4ecHV1xZkzZ9iXI8zraLpj3DCv7sGDB/D398dHH32kcwUSAKysrHD06FFcuXIFS5cu5TsO08CxliRTK+Xl5ejbty8KCwsRGhqq07dY7t27F1OmTEFgYCDrfZt5VblCvhMwDcuiRYsQFRWFsLAwnS6QADBp0iT89ddfmDp1Ktq3b48OHTrwHYlpgFhLknlpwcHBGDp0KHbv3o1JkybxHeellJeXY+DAgcjIyMDNmzchk8n4jsQ0LOyLG+blZGRkoEuXLnjrrbewe/duvuPUSnp6Ojw8PODu7o5ff/2VdUDL1Ab74oZ5MY1Gg4kTJ8LY2BgbN27kO06tWVtb48iRIwgODsayZcv4jsM0MKxIMi+0bNkyhISEIDAwsMZrGBuCnj17Yv369Vi6dCmCgoL4jsM0IOxwm3mukJAQDBgwAOvXr8dHH33Ed5zX5u/vj8OHDyM0NBTt27fnOw6j+9g5SebZcnNz0bVrV3Ts2BGnTp1qFHeolJaWok+fPigoKEBoaCjr1Zt5EXZOkqkZEWHq1KmoqKjAnj17GkWBBACJRIKjR48iJycH06ZNA2sjMC/CiiRTow0bNuD06dM4dOhQoxur2c7ODoGBgThx4gR++OEHvuMwOo4VSaaaqKgofP7551iyZAm8vLz4jqMVffv2xbJly/Dpp58iJCSE7ziMDmPnJJkqCgsL4eHhARsbG1y8eLFRdw5BRBg7diyuX7+OiIgIWFtb8x2J0T3sixumqkmTJiE4OBiRkZFo0aIF33G0Li8vD+7u7nBwcMCFCxca9U6BeSXsixvm/9u9ezf27duHnTt3NokCCQByuRzHjx/HjRs3WI9BTI1YS5IBACQkJMDd3R0zZ87Ed999x3ecerdt2zbMmjULQUFBGDJkCN9xGN3BDreZx9cO9uzZE0KhEFevXm2yQ5pOmTIFJ0+eRHh4OBvrmqnEiiQDfPjhh9i/fz8iIiKadHFQqVTo1asXxGIx/vzzTzbCIQOwc5JMUFAQtmzZgi1btjTpAgk8HjsnMDAQsbGxWLhwId9xGB3BWpJNWEpKCrp27Yq3334bW7du5TuOzggMDMS7776LvXv3ws/Pj+84DL/Y4XZTpVar0a9fP+Tl5eHmzZuQSqV8R9Ipc+fOxc6dOxEaGgpXV1e+4zD8YUWyqfriiy+wfv16hIaGonPnznzH0TllZWXo27cvlEolQkNDq4whzjQp7JxkU3T58mWsXr0aP/30EyuQzyAWi3H48GGkp6djzpw5fMdheMRakk1MZmYmunTpAm9vbwQGBvIdR+edOXMGw4cPx/79+/Hf//6X7zhM/WMtyaZEo9HAz88PhoaG+Pnnn/mOwwuNRoP169ejV69eLzX90KFDMXPmTMycOROJiYnaDdeA/e9//4OrqytMTU0hkUjg5OSETz/9FIWFhXxHe33ENBkrV64kkUhE169f5zsKL+Lj48nLy4sAkJub20v/nkqlIjc3N/rPf/5DZWVlWkzYcPXt25c2bdpE2dnZVFBQQIcPHyaRSESDBw/mO9rrymFFsokIDQ0lsVhMa9eu1cr8i4uLqWfPnlqZd10sIzIyknx8fGjfvn3UpUuXWhVJIqJ//vmHDA0NafHixa+0/Nel6+t32LBhpFarqzw3btw4AkDJycl1EY8vOexwuwnIy8vD+PHjMXDgQHz88cdaWcYvv/yCzMxMrcy7Lpbh5uaGY8eOwdfX95XupOnQoQO+//57LFu2DJcuXXqlDK9D19dvUFBQtR6UzM3NAQDFxcWvnY1XfJdpRvvGjx9PVlZWlJ6ezj2n0Who7dq15OLiQmKxmORyOY0aNYru3r3LTTN79mwSiURkZWXFPTdr1iySSqUEgBQKBRERzZ07l8RiMQEgANSmTRv68ccfSSKRkIWFBc2YMYOsra1JIpFQz5496caNG3WyjFfVvXv3WrckK40ePZpsbW0pKyvrudM15fVbadSoUWRoaEilpaWvPS8escPtxm7z5s2kp6dHFy9erPL84sWLSSwWU0BAAOXl5VFUVBR169aNzM3NqxRTX1/fKh8wIqI1a9ZU+YAREY0dO7baB2vGjBlkZGREMTExVFJSQnfu3CFPT08yMTGpcgj2Ost4Fa9TJHNycsje3p7GjBnz3Oma8volIioqKiITExOaM2dOncyPR+xwuzH7559/MH/+fHz99dcYOHAg97xKpcK6devg4+ODiRMnQiaToVOnTti6dSuysrKwffv2OssgFArRvn17SCQSuLq6YvPmzVAqldi1a1edLaM+mZmZISAgACdPnnzmemLrF1ixYgVsbGywbNkyXnPUBVYkG6mioiKMGzcOHh4e+Prrr6u8dufOHW6Yhid5enpCLBYjNDRUa7k8PDwglUoRGxurtWVoW58+ffD5559j7ty5iI6OrvZ6U1+/x48fR2BgIM6dOwcTExPectQVViQbqdmzZyM9PR379u2rdkI9Ly8PAGBsbFzt9+RyOZRKpVazSSQSKBQKrS5D25YsWYJu3bphwoQJUKlUVV5ryuv30KFDWLVqFS5fvgxHR0deMtQ1ViQbocDAQOzevRu7du2Cvb19tdflcjkA1PhhzcvLg62trdaylZeXa30Z9UEoFOLQoUP4999/8fnnn1d5ramu340bN2Lfvn24dOlSoxr+gxXJRub+/fvw9/fH3LlzMWrUqBqn6dixI4yNjREWFlbl+dDQUJSVlcHd3Z17TigUory8vM7yXb58GUSEHj16aG0Z9cXOzg7btm3Dxo0bcfLkSe75prZ+iQifffYZoqOjceLEiRpb0A0ZK5KNSHl5OXx9fdG2bVusWrXqmdMZGBhg/vz5OH78OPbt24eCggJER0dj5syZsLGxwYwZM7hpnZyckJOTgxMnTqC8vBwKhQJJSUnV5tmsWTOkpqYiMTERSqWS+1BqNBrk5uZCrVYjKioK8+bNg729PaZMmVJny+DTO++8g8mTJ2PatGlITU0F0PTWb0xMDFavXo2ff/4ZIpEIAoGgyuP777+vzSrVPTx/vc7UoXnz5pGxsTHFxsa+cFqNRkNr1qyhtm3bkkgkIjMzMxozZgzFxcVVmS47O5v69+9PBgYG1KpVK5o9ezYtXLiQAJCTkxN3qcmtW7fIwcGBDA0Nydvbm9LT02nGjBkkEomoZcuWJBQKydTUlEaPHk3379+vs2W8rOvXr5OXlxfZ2Nhw1wJaW1tTr1696MqVKy89n5oUFhZSu3btqG/fvtxdJ01p/UZHR3PrtKbHmjVrXmv98oxdJ9lYnDlzhgQCAQUEBPAdhTNjxgxq1qwZ3zHqRXh4OInFYvruu+/qbZlNaf3yiF0n2Rj8+++/mDRpEqZOnYqJEyfyHaeKiooKviPUi27dumHFihX46quvqp2L1Kamsn75xIpkA6fRaDBp0iSYmZnhhx9+4DtOvYmNja127qumx/jx4+st0yeffIJ+/frBz8+vwd+vrIvrlzd8t2WZ1/PNN9+QgYEBRURE8B2likWLFnH3Ajs6OtKRI0f4jlQv/v33X2rWrBl99NFHWl1OU12/PMhhPZM3YH/++ScGDBiAjRs3YubMmXzHYf7PsWPH8M477+D06dMYMmQI33GY18MGAmuocnJy0LVrV3h4eODYsWN8x2GeMmHCBFy5cgVRUVFo3rw533GYV8eKZENERBg9ejQiIiIQGRmJZs2a8R2JeUpeXh7c3NzYTqzhY2PcNETr16/HmTNncOjQIVYgdZRcLscvv/yCX3/9FQcOHOA7DvMaWEuygQkPD0evXr2wdOnSavcMM7pn7ty52LNnD6Kiomq8j57ReexwuyEpLCyEu7s7WrZsiQsXLlTr3YfRPSUlJfD09ISVlRXOnz8PPT128NbAsMPthuSDDz5AXl4e9u/fzwpkA2FgYIA9e/YgJCQEGzdu5DsO8wpYkWwgduzYgYMHD2Lfvn2wsbHhOw5TC926dcNXX33F9ZTDNCzscLsBiImJgaenJ+bOnYsVK1bwHYd5BWq1Gr1790ZpaSlu3LgBsVjMdyTm5bBzkrqupKQEPXr0gEQiwdWrVyESifiOxLyiBw8eoEuXLpgzZ06jGPuliWDnJHXdvHnzkJSUhMOHD7MC2cC1bt0aq1evxsqVK/Hnn3/yHYd5SaxI6oCLFy9iz5491Z4/duwYtm3bhi1btjSa8UKauhkzZmDIkCGYOnVqteEdzp8/jw8++ICnZMwz8XDDOPOU999/nwCQr68vFRQUEBFRUlISNWvWjD788EOe0zF1LSMjgywtLWn69OlERFRQUEDTp08ngUBAhoaGVFpaynNC5gmsgwu+aTQaWFpaIjs7G0KhEHZ2dggMDMScOXOgVCpx8+ZNGBoa8h2TqWMnTpyAj48Pli9fjs2bNyMjI4MbLuHPP/9E7969eU7I/B/2xQ3frl+/jl69enE/C4VCAIBUKsWNGzfQvn17vqIxWqRSqeDt7Y2IiAjo6elxneeKxWIsWrQIS5Ys4TcgU4l9ccO3kydPVrkcRK1WQ61Wo7CwEAsXLkR2djaP6RhtuHHjBjp27IioqCgQUZXexcvKyhAcHMxjOuZprCXJs7Zt2+LevXs1viYSidCsWTMEBgaiT58+9ZyMqWslJSX45ptv8P3330MgEDxz6AV9fX3k5ubCxMSknhMyNWAtST7du3fvmQUSADf854ABA7Bp06Z6TMZoQ1BQENavXw/g+WPTVFRUsEuEdAgrkjw6efIkdw7yWYgIw4YNwzvvvFNPqRhtefvtt3H9+nXY2dk995pXsViM33//vR6TMc/DiiSPjh079swWhUgkglQqxdatW/Hbb7/B0tKyntMx2uDu7o6oqCiMGTPmmdOUlZXhzJkz9ZiKeR52TpIn2dnZsLKyqrFI6unpoXfv3ggICICdnR0P6Zj6sHfvXkyfPh0VFRVQq9VVXhMIBEhNTYW1tTVP6Zj/w85J8iUoKAhP75+EQiHEYjFWrFiBS5cusQLZyE2aNAm3bt1C69atqx1+CwQCXLp0iadkzJNYkeTJiRMnqnTAqq+vDzc3N0RFReGzzz5jnbM2Ea6urggPD8e4ceMAPC6OwOOjiYsXL/IZjfk/7HCbB6WlpTAzM4NKpYJQKAQR4auvvsLXX3/NOtNtwvbu3YsZM2Zw18paW1sjLS2N71hNHbvjBnh8AbdSqUReXh4KCwtRVlYGpVJZ5TxRcXExSktLuZ/19PQgk8mqzEcul0MgEMDMzAzGxsYwNjaGVCqttryzZ89i6NChAB63JA4ePIjOnTtr6a9jGpKYmBiMGTMG8fHxAID4+Hi0bdsWwOMvdIqKipCbm8ttjyqVCiUlJdzvExHy8vKqzFMoFFa75tLU1BT6+vowMTGBVCqFkZERzMzMtPzXNUi5z7/+pIEqKSlBUlISUlJSkJ6eDoVCgczMTGRkZEChUEChUCArKwv5+fkoLCysspHVtcpiKpPJYGpqCisrKyQmJkIgEKBPnz7w9fVFcnIyKioq4OjoyDbUJqKkpAQpKSn4999/kZ6ejqysLO7RuXNnKJVKpKWloWfPntBoNNV22tpSWTBNTEzQvHlzmJubV3lYWVnB3NwclpaWcHBwgLW1daM/NdRgW5LZ2dm4e/cu7t69i/v37yMpKQmJiYlISkqqcogiFou5N9Xa2hoWFhbcQyaTwdjYGEZGRjA1NeV+lkgkMDQ0hIGBATcfiURSpVVYXl6OwsJC7meNRoP8/HzuX6VSicLCQhQWFqKgoAD5+fnIz89Heno6Tpw4AWtra6hUKmRkZKCoqIibj6mpKRwcHODo6IhWrVrB0dERLi4ucHFxgaOjI3fOitFt5eXlePjwIeLi4pCQkIDk5GQkJycjJSUFKSkpyMjI4KbV09OrsRgpFAokJydj2rRp3HZqZGQEuVwOqVQKAwMDiMViGBkZVVm2TCarUrhKSkqgUqmqTJObmwsAKCgoQHFxMYqLi7kWanFxMQoKCqoU7qysLK6x8WQXbyKRCC1btoSdnR0cHBxgZ2cHR0dHODs7w9nZGS1atNDG6q1Pun+4rVKpEBkZifDwcNy5cwexsbG4c+cOFAoFAMDExARt2rThCkvlo/INMzc35/kvqKq0tBQVFRVVCm5xcTFSUlK4Il/5b1JSEu7fv4/09HQAj/fylQWzQ4cO6NKlCzw8PNg1lDwqLS3FP//8g4iICMTGxiIuLg7x8fF4+PAh16uPjY0NWrVqBTs7O+7h4OAAW1tb2NrawtLS8pk7v4yMDFhZWdXnn/RCpaWlSE9PR0pKCpKSkvDo0SOkpKRwO4KHDx+ioKAAwOPPZ2XBdHZ2RqdOndC1a1e0atWqoezwdatIajQaREdH48aNGwgLC0NYWBj++ecfqNVqmJmZoVOnTmjfvj33cHFxaRJjGefl5eHu3buIiYlBbGwsYmJicPfuXTx8+BAA4ODgAA8PD3h6esLT0xPdu3ev1rpgXp9KpUJYWBhu3bqFyMhIREREICYmBuXl5TAyMkL79u3h7OyMdu3aVSkMxsbGfEevd+np6dwOo/IRGxuL+/fvo6KiAjKZDF26dEHXrl3RtWtXuLu7w9XVVRcLJ/9F8sGDB7h48SIuXryIS5cuITs7G8bGxnBzc4O7uzv30NEVyKv8/HxER0cjPDyce8TExEAoFMLNzQ1vvPEGvLy80LdvX5iamvIdt8EpKCjAzZs3cfXqVVy7dg1Xr15FSUkJZDIZOnbsWGX7dHFxYVcmvISysjIkJCRU2WYjIiJQXFwMExMTdO/eHV5eXvD29oa3t3eVU148qf8imZ+fj+DgYPz22284d+4ccnJy0Lx5c/Tu3Rv9+/dHv3790LFjx0Z/MlhbUlNTcfnyZe6RkJAAsVgMLy8vjBgxAiNHjkSbNm34jqmTSktL8eeffyI4OBjBwcG4e/cuBAIBXF1d0bt3b3h5ecHLy4sNpVHH1Go1bt++jatXr3I7pLS0NBgYGKBnz54YPHgwhgwZgk6dOvERr36KZFpaGo4dO4aTJ0/iypUr0Gg06NOnD4YPH44BAwagU6dOrChqSWpqKv744w+cPXsWZ86cQW5uLjp06ICRI0di7NixcHd35zsir1JTU/Hbb7/h7NmzuHTpEoqKitCxY0cMHjwY/fr1g5eXF+RyOd8xm5z79+/j2rVruHDhAs6dOweFQgE7OzsMHjwYQ4cOxVtvvVVfPfZrr0iWlpbi/PnzCAgIwIkTJyASiTBgwACMGDECo0ePZl828KCiogLXr19HUFAQTpw4gbi4OLi4uODdd9/FlClTmkwLKScnB0FBQThy5AiCg4MhFovRq1cvDB8+HKNHj4aDgwPfEZknaDQaREREcKflLl++DJFIhOHDh8PPzw9vvfWWNscxz63zgcCio6PJ39+fZDIZCYVCGjZsGB0+fJhUKlVdL4p5TTdu3KBZs2ZRs2bNSE9PjwYNGkTHjx+niooKvqPVubKyMgoMDKS33nqLhEIhSaVSevfdd+nXX39l22YDk5aWRhs2bKBevXqRQCCgZs2a0YwZM+jWrVvaWFxOnRXJCxcu0ODBg0kgEFD79u1p3bp1lJ6eXlezZ7SopKSEjh07RsOGDSM9PT1q27Ytbd68mYqKiviO9trS0tJo6dKl1KJFC9LX16cRI0bQgQMHqLCwkO9oTB1ISkqi1atXU4cOHQgA9erVi/bv31+XI06+fpE8ffo0denShQBQ//79KSgoiDQaTV2EY3gQGxtL06dPJ0NDQzI3N6eVK1dScXEx37FqLT4+nvz8/EgsFpO5uTl99tln9PDhQ75jMVr0xx9/0Ntvv01CoZCsrKxo5cqVdbEzfPUieevWLRo4cCABIB8fHwoPD3/dMIwOyczMpK+++oqMjIzI3t6e9u7d2yAOwxMTE2natGkkFArJxcWFdu3axQ6nm5iUlBRatGgRmZiYkJWVFf3www+vsw3UvkgWFBTQ+++/T3p6etSjRw+6evXqqy6caQBSU1PJ39+f9PX1yd3dnaKjo/mOVKOioiJasGABicViat26Ne3evZvUajXfsRgeKRQKWrBgAUmlUrK1taUjR468ymxqVySvXbtGrVu3JktLSzp06BA7rG5CoqOjycvLiwwMDGjt2rU61aq8fPkyOTk5kVwup82bN1NZWRnfkRgdkpqaStOmTSOBQEA+Pj6UlpZWm19/+SL57bffkr6+Pg0fPpx9IdNEqdVqWr58OYlEIho4cCDl5OTwmqesrIxmz55NAoGARo4cSf/++y+veRjddvHiRWrdujWZmZnRsWPHXvbXXlwk1Wo1ffDBByQUCmnTpk2s9chQWFgY2dvbU8eOHXkrTLm5ufTGG2+QsbEx7d+/n5cMTMNTVFREH3zwAenp6dF33333Mr/y/CJZXl5Ob7/9NhkaGtLJkyfrJmUN3n33XQLwUo9Tp05pLcfRo0epVatW1ZYpkUjI0dGRpk6dSg8ePNDa8itNmzaNjI2NCQBFRERofXmvIiUlhVxdXcnR0ZHu379fr8tOSkoiV1dXatmypbaujSOiut0uPTw8SE9Pj9zc3J473Zo1a8jCwoIA0JYtW14qJ9tua2/Dhg2kr69P/v7+Lzp19Pwi+emnn5KRkRH9+eefdZvwKe+++y6dP3+e8vLyqLy8nNLS0ggAjRw5ksrKyqioqIgyMzPJ399fq0WyUps2bUgmkxERUUVFBWVkZNDevXtJKpWSpaUlZWVlaT3DwYMHdX5jy8rKom7dulGnTp3q7ZrKzMxMcnZ2ps6dO9OjR4+0uqy63i4HDhz4wiJJRJSQkFCrIlmJbbe1ExQURBKJhL744ovnTZbzzBumg4KCsGbNGmzatAm9e/euk/t7nkUgEMDLywsymQxCobDK85XjT1tYWNT5fcYqlQq9evV67jR6enqwtLSEn58fPvroI2RmZrIBmv5P8+bNceLECaSlpcHf31/ry9NoNJg4cSLKy8tx7tw5tGzZUqvL08Z2WV89WbHt9sWGDRuGbdu2YeXKlTh06NAzp6tx+Ib8/HxMnToV06ZNw+TJk7UWstLBgwdfaroZM2bU6XJ/+eUXZGZmvvT0Tk5OAMB0dPQGAAAgAElEQVR1gqtNDaVbODs7O+zZswfDhw/H+PHjMWLECK0ta8eOHfjjjz9w7dq1ehmPWhvb5dNDx9YHtt0+2+TJkxEVFYX3338f3t7esLW1rTZNjS3JDRs2QKPRYM2aNVoP+SoqKiqwePFi2Nvbw9DQEJ07d8bhw4cBALt374axsTE3INeJEycQFhYGBwcH6OvrY8KECQCAefPmYf78+bh//z4EAgG3IT1PQkICAMDNze2l8wBASEgIXF1dIZPJYGBggE6dOuHcuXPc60SENWvWoF27dpBIJJDJZFi4cGGt/u7Vq1dDKpXCxMQEmZmZmD9/Plq2bIm4uLhart3aGzp0KMaOHYtvvvlGa8soKSnB119/jQ8//BCenp5aW87reNH7DAD37t2Di4sLjIyMYGhoiN69e+Pq1asvnPeLtrHnYdvt861YsQLW1tZYunRpzRPUdBDeunVrWrBggTZOA7yUynM/o0aNqvH1BQsWkEQioaNHj1Jubi598cUXpKenR3///TcREcXExJBUKqXJkydzv7No0SLasWNHlfmMHTuW2rRpU23+T57bIXr8Teru3btJKpXSsGHDap3nyJEjtGTJEsrJyaHs7Gzq0aMHNW/enPv9L7/8kgQCAa1du5Zyc3OpuLiYNm3aVO3czouW8+WXXxIAmjt3Lm3cuJF8fHzo7t27L1rddeKvv/7S6rmoAwcOkEgkotTUVK3M/2W8aLt80fs8cOBAat26NT18+JDKy8vpn3/+oe7du5OBgQHFx8dz09V0TvJF7z0R225fx+bNm0kqlVJ+fv7TL1X/4ubBgwcEgEJCQuonXQ2etzGqVCqSSqU0fvx47rni4mKSSCQ0a9Ys7rlt27YRANq3bx8dOHCAPvnkk2rzel6RxFPfFAoEAlq2bFm1C5VfNs+TVqxYQQAoMzOTiouLSSqV0qBBg6pM8/QJ8JdZTuXGxtdteFZWVrRmzRqtzHvKlCnUv39/rcz7Zb2oSD7tyfeZqOYvbqKioghAlUbJ00XyZbcxtt2+OoVCQQKBoKYv4Kp/cVM53i9PvQC/UFxcHIqLi9GxY0fuOUNDQ1hbWyM2NpZ7bvr06Xj77bfxwQcfIDAwEKtXr67VcmQyGYgIRISFCxeCiCCTyaqdU3rZPE+qnEdFRQXu3buH4uJiDBw4sE7+bj516tSJ237qWmxsLLp27aqVeWvLk+/zs3Tq1AkymQxRUVHPnKY27z3bbl+Nubk57OzsasxUrUgWFxcDQJXR/HRJ5fCrX331FQQCAfdISkrisldavnw5CgsLa/XlTE2+/vprWFtb44svvkBKSkqt85w+fRr9+vWDhYUFJBIJPv30U+73Hz16BACwsLCos7+bL0ZGRlWGx61LRUVFOj+42fPe5+cRiUTcyIo1edX3nm23tWNiYlJlmOhK1Ypk8+bNAYAbslXXVL4p69ev5/aYlY/r169z05WXl2Pu3LlYt24drl+/jmXLlr3yMk1MTLBq1SoolUrMmjWrVnmSk5MxZswYWFtbIzQ0FPn5+fjuu++4368c6Ki0tLRO/m4+ZWZmam0IX3Nz83r5dvZVveh9fha1Wo2cnJznjvr5qu89225fHhEhPT29xhETqhVJNzc36Onp6dQf8CQ7OzsYGBggMjLyudPNnj0b/v7++Pjjj/HJJ5/g22+/fa2/adKkSejevTuCgoIQGBj40nmio6NRXl6OWbNmoXXr1jAwMKhymUTloGdXrlx57vJf9u/mS+X46N26ddPK/D08PBASEqKVedeFF73Pz/LHH39Ao9E8d729znvPttuXExMTg+zsbHh4eFR7rVqRlMlk6Nu3L/bv318v4WrLwMAAU6dOxcGDB7F582YUFBSgoqICjx49QlpaGgBg06ZNaNmyJXx8fAA8/orf1dUVvr6+3KDpANCsWTOkpqYiMTERSqXyuYc8AoEAGzZsgEAgwJw5c5Cbm/tSeSpbCBcvXkRJSQkSEhIQGhrKzdfCwgJjx47F0aNH8csvv6CgoABRUVHYvn17rf9uPh09ehRqtRpDhgzRyvzHjh2L2NhY/PXXX1qZ/+t60ftcqaysDPn5+VCr1bh16xbmzJkDBwcHTJky5Znzfp33nm23L2fHjh1wdHSssUjWeAnQkSNHSE9Pr95vLSooKKA+ffpQs2bNCADp6emRk5MTLV++vMp0paWl9Nlnn5G9vT0JhUKysLCgsWPH0p07d2jEiBHcuBd//fUXERF9/PHHpKenRwBIJpNRWFgYET3uONjBwYEMDQ3J29ubjh8/Ts7Oztw3gy1atKAPPvigyrKnTJlCAEgul9PKlStfmIeI6LPPPqNmzZqRXC6nd955h3766ScCQG3atKHk5GRSKpX0/vvvU/PmzcnY2Ji8vb1p8eLFBIBsbW3p9u3bL1zOd999R4aGhgSA7OzsKCAgQKvv1ZPKy8vJxcWFJkyYoNXleHl5UZ8+feq9k5WX3S5f9D7v2rWL+vfvT5aWliQUCql58+b03//+l5KSkrh5rF27lqysrAgAGRkZkY+PDxE9/72/du0a225fw8OHD8nAwIB++OGHml7OqXG0RCJC7969kZ+fjxs3buj8CXOGXwsWLMDWrVtx+/ZtrY7pHRYWhp49e2LNmjWYN2+e1pbDNB1qtRoDBgxATk4Obt26VdOoi88eLTElJYUsLCzov//9r5bqN9MY/PbbbyQQCGjPnj31sryVK1eSUCisl45OmMZNo9HQe++9R1KplKKiop412fN7AQoODiY9PT365JNPWD+STDXBwcFkZGRU7dBOmzQaDb3//vskkUhYP5LMKystLaVJkyaRSCSioKCg50364k53jx8/TgYGBjRx4kTWLT7DOX78OEkkEl62C41GQ9988w0JBAL65ptv2A6cqZWcnBzq378/GRsbv6hAEr3s8A3nzp0jY2NjGjBgACUnJ79+SqbBKisr4+695fsIY8uWLSQUCmnUqFFs6AbmpVy6dInatGlD9vb2zzvEftLLj3Fz69Ytat++Pcnlcjpw4MCrp2QarJiYGOrWrRsZGRnR1q1b+Y5DRI8HAWvTpg3J5XL65ZdfWKuSqVFeXh75+/uTQCCg0aNH12acrtqNllhcXMwNvDRq1CiKi4urfVqmwVEqlbR48WIyNDSk7t27V+mxRhcUFRXRJ598Qvr6+tSnTx9eO2dhdEtpaSlt2bKFWrRoQVZWVhQYGFjbWdR+3G2ix6OOdezYkUQiEX300UekUCheZTaMjlOr1bR9+3aytrYmuVxOq1evpvLycr5jPdPNmzepX79+BIAGDx5cpRsxpmlRq9W0e/duatWqFUkkEpo9e/arDl/xakWyMsTPP/9MNjY2ZGpqSgsXLqSUlJRXnR2jQ1QqFW3fvp1cXFxIJBLR7NmzG9SO8MKFC9SjRw8SCAT01ltv0alTp3RqnHBGe3Jzc2n9+vXk5OREQqGQ3n///SoX67+CVy+SlQoLC2nlypXUokULEolENHHiRK2OYMdoT2ZmJi1dupQsLS1JIpHQe++9p3OH1rURFBREgwYNIoFAQK1bt6bVq1dTdnY237EYLbh9+zZNnz6djIyMyNjYmGbOnEkJCQl1MevXL5KVSktLKTAwkDw9PQkAubq60qpVq7gORxndpFar6cKFC+Tn50dSqZRkMhnNmTOnUR0VJCQkcLfYSSQSGj58OO3Zs4cKCgr4jsa8hpSUFPrhhx/Iy8uLAFDbtm1p1apVdb0jrLsiWUmj0dClS5do0qRJZGRkRGKxmHx8fOj48eP1Nuwo83wajYZCQ0Pp448/JisrKxIIBNS3b1/auXMnFRYW8h1Pa5RKJe3evZsGDx5MQqGQ6zH72LFjrGA2EA8fPqSNGzeSl5cX10eDv78/Xb58WVtXNtR873ZdKSwsxLFjx7Bnzx5cuXIFEokEAwcOxMiRIzFixIh6GfGOeUylUuHSpUs4efIkTp06hbS0NDg5OcHPzw9+fn5o1aoV3xHrVVZWFo4ePYpDhw4hJCQE+vr68Pb2xuDBgzFkyBCd7Zm/qSktLcWff/6J4OBgnD17Fnfv3oWJiQlGjRqF8ePH480339T2CJS5Wi2ST8rIyEBQUBBOnjzJdb/k4eGB/v37o1+/fvD29oaxsXF9RGkSKioqEBkZicuXL+Py5cv4448/UFxcDA8PD24n9fToeU1VVlYWzp8/j7Nnz+LcuXNQKBSwtbXltktvb2+4uro2uOFSGyKVSoWbN28iJCQE165dQ0hICIqKitChQwcMGTIEgwcPhre3NyQSSX1Fqr8i+SSVSoWLFy/i7NmzuHz5Mu7evQuhUAhPT0/069cPPXv2hIeHB2xsbOo7WoNVVFSEiIgI3Lx5E5cvX0ZISAjy8vJgYWGBfv364Y033sDw4cPRokULvqPqNI1Gg/DwcAQHByMkJAQ3btyAUqlEs2bN0KtXL3h5ecHDwwNdu3blevFnXg0R4f79+9x2e+3aNYSFhaG8vBx2dnbo3bs3+vbtiyFDhsDOzo6vmPwUyaelp6dzLZ4rV64gLi4ORISWLVvCw8ODe3Ts2LHGwcObmvz8fMTExCA8PBxhYWEICwtDbGwsKioqYGlpid69e6Nfv37o378/awG9psoW+dWrV3H16lX89ddfSE1NBfC41+0uXbqga9eu6NKlCzp27IhWrVpBKBTynFr3FBYWIi4uDlFRUYiMjERERARu376NgoIC6Ovrw9XVFd7e3vDy8kLv3r2fO5xFPdONIvm0/Px87sNf+UhMTAQAmJqaon379nB1deX+dXJygoODAzfuRmNQUVGBtLQ03L9/H7GxsYiJiUFMTAzu3r2Lf//9FwAgl8ur7EQ8PDzg4ODAc/LGLyMjA5GRkdyHPSIiAvfu3YNGo4FIJELr1q3Rvn17ODs7cw9HR0fY2Ng06gKqUqmQlJSExMRExMbGIi4uDvHx8YiPj+cGDjM0NETnzp2r7Fw6deqkswMPQleLZE2ys7Nx584d3L17lysWd+/e5VY+AFhbW8PBwYF72Nvbw8bGBpaWlrCwsICVlRXMzMx4/CseKy4uhkKhQFpaGhQKBRQKBZKTk5GUlMQ9UlJSuOEkKncMHTp0gIuLC7eDaNWqFWsl6ojKllJ8fHy1AlE5YqC+vj5sbGzg4OAAW1tb2NnZwc7ODhYWFtw2am5uDnNz85o6f+VNUVERFAoFMjIykJWVhaysLKSlpeHRo0dITk5GcnIyHj16hOzsbO53rKys4OLiwu0k2rVrh3bt2qFNmzbQ19fn8a+ptYZTJJ8lPz8fDx484IpLYmIiEhMTkZSUhEePHlUb9VEsFsPCwgIWFhYwMTGBsbExjI2NYWZmBmNjYxgZGXE9sT9ZUPX09CCTybifVSoVSkpKuJ+LiopQVlaG8vJyFBYWIjc3F4WFhSgqKkJhYSHy8vKQl5eH9PT0asOuGhkZwc7OrkqBd3BwgKOjI1q1asXOIzZw//77L7fje7qwVG6jT4/NLZPJYGpqCktLS8jlckilUkilUshkMhgbG0MqlXJfdJqYmFRpoUql0ipfbCiVSqjVau7n4uJilJaWgoiQl5eHoqIiFBcXQ6lUoqCgAMXFxSgsLOQKokqlqpLN0NAQVlZW3DZra2sLW1tb2Nvbw97eHg4ODpDL5dpYlXxo+EXyRdRqNddaS09PR2ZmJrKysqBQKKBUKqFUKrkiVvl/lUqFioqKKoOGVRa/ShKJpMohgoGBAQwNDSEUCmFiYgK5XM4VYGNjY8jlcshkMlhbWyMuLg5r1qzB7t27MXbsWF0+1GDqSWVBqnwcOXIEhw8fxvTp00FEXCHLz8+HUqlEcXExt7PNy8vDkx/jp4vi00XzyW1XLpfDyMgIUqkUpqamMDU15QpwZau28lHZ2m1iw7k0/iKpq/z8/HD+/HlERkayb/GZKh48eAA3NzfMmzcP3377Ld9xmjpWJPlSWFjIXeZ08eLFhnaehtESjUaD/v37Iz8/Hzdv3tSpc5NNVG61cbeZ+mFsbIz9+/fj+vXr+O677/iOw+iINWvWIDQ0FHv37mUFUkewliTPfvjhByxcuBCXL1+Gl5cX33EYHsXExMDd3R1LlizBZ599xncc5jF2uM03IsKYMWNw69YtREREsLs4mii1Wo2ePXtCJBJx95IzOoEdbvNNIBBg165d0NPTw5QpU8D2WU3TkiVLEBMTg927d7MCqWNYkdQBZmZmCAgIwNmzZ7F582a+4zD1LDw8HKtXr8aaNWvg7OzMdxzmKexwW4csXboUK1euxPXr19G1a1e+4zD1oLI3LGtra1y4cIHdQaV72DlJXaLRaPDmm28iKSkJt27dgomJCd+RGC2bO3cu9uzZg6ioKF3q1IH5/9g5SV2ip6eH/fv3Q6lUYs6cOXzHYbQsJCQEP/30EzZu3MgKpA5jLUkddPbsWQwbNgx79uyBn58f33EYLSgoKOB6wzlx4gTfcZhnYy1JXTRkyBB8/PHHmDlzJmJjY/mOw2jBxx9/jKKiImzbto3vKMwLsJakjiovL0efPn1QVFSEmzdvNqq+Mpu6oKAgjBgxAkeOHMHbb7/Ndxzm+dgXN7rswYMH6NatGyZPnowff/yR7zhMHcjOzkbHjh0xaNAg7N27l+84zIuxw21d1rp1a/z888/YuHEjO2/VSMyaNQv6+vpsp9eAsJZkA/D+++/j+PHjiIiIYMMzNGAHDx6Er68vTp8+jSFDhvAdh3k57HC7ISgpKUH37t0hk8lw6dKlRj1OSmOVmpqKTp06Yfz48di0aRPfcZiXxw63GwIDAwMcOHAA4eHh+N///sd3HOYV+Pv7Qy6Xs27xGiBWJBuIDh06YN26dVi+fDkuXrzIdxymFrZv347g4GDs2rWLG5eGaTjY4XYDM2HCBFy6dAmRkZGwtrbmOw7zAg8fPoSbmxtmzZqFVatW8R2HqT12TrKhyc/PR9euXdGuXTucOXOGdYigwyoqKtC/f3/k5uYiLCysymBcTIPBzkk2NDKZDIcPH8alS5ewdu1avuMwz7F69WrcvHkT+/btYwWyAWNFsgHy9PTEt99+iy+++ALXr1/nOw5Tg1u3bmHJkiVYvnw53Nzc+I7DvAZ2uN1AERFGjhyJqKgoREZGwszMjO9IzP8pLi6Gu7s7LC0t8ccff0BPj7VFGjB2uN1QCQQC7Ny5E2q1GtOnT+c7DvOETz75BOnp6QgICGAFshFg72ADZmFhgQMHDuDXX3/Fzz//zHecBkGj0WD9+vXo1auXVuZ/9uxZbN++HVu3bm2SfURqe/3ygpgG78svvyQDAwOKjIzkO4pOi4+PJy8vLwJAbm5udT7/jIwMsrKyoilTptT5vBsCba9fnuSwlmQjsGTJEnh4eGDChAkoLi7mJYNKpdJ66+F1lnH79m18/vnnmDlzJrp06VLHyR6fI542bRqkUqlWOq9o6uuXT6xINgJCoRAHDx5ERkYG5s2bx0uGX375BZmZmTq7DDc3Nxw7dgy+vr5auRxn06ZNOHPmDHbv3g1TU9M6n39TX7+84rsty9SdoKAgEggEtH///hdOq9FoaO3ateTi4kJisZjkcjmNGjWK7t69y00ze/ZsEolEZGVlxT03a9YskkqlBIAUCgUREc2dO5fEYjEBIADUpk0b+vHHH0kikZCFhQXNmDGDrK2tSSKRUM+ePenGjRt1soxX1b179zo9HIyJiSFDQ0P65ptvuOfY+m08h9usSDYyH330ERkbG1NcXNxzp1u8eDGJxWIKCAigvLw8ioqKom7dupG5uTmlp6dz0/n6+lb5gBERrVmzpsoHjIho7Nix1T5YM2bMICMjI4qJiaGSkhK6c+cOeXp6komJCSUnJ9fJMl5FXX6Iy8rKyNPTkzw8PKisrIx7nq3fxlMk2eF2I/P999+jbdu28PX1RVlZWY3TqFQqrFu3Dj4+Ppg4cSJkMhk6deqErVu3IisrC9u3b6+zPEKhEO3bt4dEIoGrqys2b94MpVKJXbt21dky+PTll1/i7t27OHjwIEQiEQC2fhsbViQbGYlEgsDAQMTFxeHLL7+scZo7d+6gsLAQHh4eVZ739PSEWCxGaGio1vJ5eHhAKpU2igHOLly4gLVr1+LHH3+Ek5MT9zxbv40LK5KNkJOTE7Zv3461a9fi1KlT1V7Py8sDgBq77ZLL5VAqlVrNJ5FIoFAotLoMbVMoFJg8eTJ8fHzw3nvvVXmNrd/GhRXJRmr8+PHw8/PDtGnTkJqaWuU1uVwOADV+WPPy8mBra6u1XOXl5VpfhrYREd577z2IRKIaD53Z+m1cWJFsxDZv3ozmzZtjwoQJqKio4J7v2LEjjI2NERYWVmX60NBQlJWVwd3dnXtOKBSivLy8zjJdvnwZRIQePXpobRnatm7dOgQHB+PQoUM13jPP1m/jwopkI2ZkZITAwEDcvHkTK1as4J43MDDA/Pnzcfz4cezbtw8FBQWIjo7GzJkzYWNjgxkzZnDTOjk5IScnBydOnEB5eTkUCgWSkpKqLatZs2ZITU1FYmIilEol96HUaDTIzc2FWq1GVFQU5s2bB3t7e0yZMqXOllGfwsPD8cUXX+Dbb79Fz549a5yGrd9Ghuev15l6sGHDBtLT06Pff/+de06j0dCaNWuobdu2JBKJyMzMjMaMGVPt0qHs7Gzq378/GRgYUKtWrWj27Nm0cOFCAkBOTk7cpSa3bt0iBwcHMjQ0JG9vb0pPT6cZM2aQSCSili1bklAoJFNTUxo9ejTdv3+/zpbxsq5fv05eXl5kY2PDXQtobW1NvXr1oitXrrzUPJRKJbVr14769u1LarX6udOy9Vv79aujclhXaU3EmDFjEBYWhoiICJibm9fLMj/44AMcOXIE2dnZ9bI8bZs0aRKCg4MRGRmJFi1a8B2n0a1fHcW6Smsqdu7cCaFQiMmTJ6M+94tPngttyAIDA7Fv3z788ssvOlEgKzWW9avLWJFsIszMzHDo0CFcuHABGzdu5DvOa4uNjYVAIHjhY/z48a+9rPv378Pf3x/z5s3DiBEj6iC97qvP9avzeD7eZ+rZt99+SxKJhMLDw7W6nEWLFnH3Ajs6OtKRI0e0ujxtKSsro+7du1OnTp1IpVLxHYfTWNZvA8DOSTY1Go0GgwcPxsOHDxEeHq6VHmsakwULFmDbtm0IDw+Hs7Mz33GY+sfOSTY1enp6CAgIQGFhIRv24QVOnTqFdevW4aeffmIFsgljLckm6ty5cxg6dCh27tyJyZMn8x1H5yQlJcHd3R2jR4/Gjh07+I7D8CeXFckm7NNPP8XmzZvx999/o3379nzH0RklJSXw9vZGeXk5rl+/DqlUynckhj+sSDZlarUaffr0gVKpxM2bN2FoaMh3JJ0wffp0HDp0CH///TfatWvHdxyGX+ycZFMmFApx6NAhpKamYuHChXzH0QkHDhzAjh07sHv3blYgGQDsOskmz97eHtu2bcPmzZvx66+/8h2HV9HR0fD398eCBQvg4+PDdxxGR7DDbQYAMGPGDAQGBiIiIgKOjo58x6l3hYWF8PT0hJmZGa5cucL1Ms40eeycJPNYSUkJevToAYlEgqtXrzapIkFEGDduHEJCQnDr1i2duu2Q4R07J8k8ZmBggAMHDuCff/7BkiVL+I5Tr9avX49ff/0VAQEBrEAy1bAiyXBcXV3x448/YtWqVbhw4QLfcerFjRs3sGjRIixbtgyDBg3iOw6jg9jhNlPNxIkTceHCBURGRsLGxqbKa/v378fw4cMhk8l4Sld76enpuHTpEiZMmFDl+czMTHTr1g1du3bFyZMnIRAIeErI6DB2uM1Ut3XrVsjlcvj6+kKj0QAAioqKMHnyZEycOBHBwcE8J6ydw4cPw9fXF/Pnz4darQbw+B52Pz8/CIVC7N69mxVI5tl46FWDaQD+/vtvEovFtGrVKoqMjKQ2bdqQUCgkPT09evfdd/mOVyvdunUjgUBA+vr61LdvX1IoFPTll1+SRCKhsLAwvuMxuo31AsQ827p167BmzRrk5uaioqKCa4UZGRkhJycHYrGY54Qvdv/+fbRt25braFgkEsHExAR5eXnYsmUL6+SDeRF2uM3ULD8/H9evX0dGRgZKS0u5Agk8PvQOCQnhMd3LO3DgAIRCIfdzeXk5CgoKIBAImtRlTsyrY0WSqebmzZvo1KkTTpw4UeNQDyKRCCdPnuQhWe0FBARUG/VPrVajoqIC7733HqZPn85GBWSeixVJpooVK1agV69eSE1NrdJ6fFJ5eTmOHj1az8lqLzIyEgkJCc+dZufOnRg4cCAUCkU9pWIaGlYkmSrat28PU1NT6Ok9f9NITU3F7du36ynVqzl06NALz5sKBAJERETgjz/+qKdUTEPDiiRTxZgxYxAXF4ehQ4cCwDMvjRGLxTp9yE1ECAgIQFlZWY2vV+4E3nzzTcTExGDcuHH1GY9pQFiRZKqxsLDAiRMnEBgYCBMTkxq/4CgrK9PpQ+6QkBCkpqbW+JpQKIS5uTmOHj2K06dPw87Orp7TMQ0JK5LMM73zzjuIjY3FgAEDajz8jo6OxqNHj3hI9mIHDx6sdqgtFAqhp6eHmTNn4t69exg7dixP6ZiGhBVJ5rlsbGxw9uxZbNmyBYaGhlValfr6+jh16hSP6WqmVqtx+PDhKofaenp6cHFxwY0bN7BhwwaYmJjwmJBpSFiRZF5IIBBg+vTpuHPnDjw9PaGvrw/g8a19x48f5zlddefOnUNubi6Ax5crSSQSrFixApGRkfD09OQ5HdPQsDtumFqpqKjA6tWrsXjxYlRUVEAoFCI7O7tayywvLw/FxcUoLi5Gfn4+AHCFq1JJSQlUKlWV5wwNDWFgYFDlOTMzMwCATCaDVCqFVCqFXC5/ZsaJEydi//79AIDhw4djy5YtsLW1fbU/mGnqWKe7zPOVl5cjLS0NycnJUCgU3CMuLg4nT55Efn4+nJycoNFoUFhYiKKiIhQVFcqJv/EAABMoSURBVNVLNiMjI0ilUpiYmMDc3BzNmzeHXC7H0aNHIZFI4Ofnh0GDBsHCwgL29vawsbFhd9kwtcWKZFOnVquRmJiIhIQExMfHIykpCY8ePcKjR4+QnJyM9PR0VFRUcNPLZDJYWlrC3NwcZmZmSElJARHB19cXpqamMDQ0hLGxMdfqMzQ05FqCJiYmVW4RFAqF1VqghYWFVe6AUavVUCqVAB63RFUqFdc6LSwshEqlglKpRFZWFrKysrgvk4yNjZGVlcW1YoHH5yWtra3h4OAAW1tb2NrawsHBAW3btoWzszMcHR2r5GMYsCLZdKhUKvzzzz+IiIhAXFwc4uPjER8fj4cPH3JFycrKCo6OjlUKiK2tLVq2bAkHBwdYWlrW2BKLjY2Fi4tLff9JNSoqKoKRkRH3c3l5OTIzM5GcnMwV/yd3BImJicjIyADw+Pxlq1at4OzsDGdnZ7Rr1w5du3ZFx44d2XC7TRcrko1Rfn4+bt68iYiICNy+fRuRkZGIi4tDRUUFTExM4OLiwrWenJ2duf+bmpryHZ0XBQUFXEs6ISEBcXFxSEhIQGxsLJRKJfT19dGuXTt06dIFbm5u6Nq1K/7zn/80qI6HmVfGimRjkJqaimvXruHq1au4du0aIiIioNFoYGZmBldXV7i7u3OP9u3bv/CWQ+b/S01NRXh4OPeIiYnBgwcPAACtW7eGl5cXvL294eXlBVdXV9Z5b+PDimRDpFAocP78eQQHB+P3339HWloaJBIJ3N3d0atXL3h5eaFnz56wsrLiO2qjlJGRgRs3buDq1av466+/EB4ejtLSUtjY2GDgwIEYPHgwBg0aBEtLS76jMq+PFcmGgIgQGhqK06dPIzg4GLdu3YJQKISXlxfefPNN9O7dGx4eHpBIJHxHbZJKS0sRFhaGq1ev4vz587h69SrUajW6deuGt956C8OGDUOPHj1YK7NhYkVSl925cwdHjhzBvn37cP/+fbRq1QqDBg3CG2+8gTfffJOdE9NRxcXF+Ouvv3Dx4kWcOnUKMTExsLW1hY+PD9555x14eXmxgtlwsCKpax4+fIiff/4ZBw8eRGJiItq2bYtx48Zh3Lhx6Ny5M9/xmFcQHR2NwMBABAYGIj4+Ho6Ojhg/fjz8/f3RunVrvuMxz8eKpC7QaDQ4e/YsNm/ejODgYNjY2MDPzw/jxo1D165d+Y7H1KGIiAgEBgYiICAAaWlpGDx4MGbOnImhQ4eyL9R0EyuSfCotLcWOHTvw/fffIykpCQMHDsTMmTMxcuRIdlFzI6dWq3Hq1Cls2bIFFy9ehIODA+bPnw9/f392blm3sCLJh7KyMuzcuRMrVqyAQqGAv78/PvroIzg7O/MdjeFBfHw8fvrpJ/z888+wsLDAokWLMG3atAYxGmUTwIpkffv111/x8ccfIz09Hf7+/vj888/RsmVLvmMxOiA1NRWrVq3C9u3bYW1tjfXr12PMmDF8x2rq2JCy9SU1NRVjx47F2LFj0a9fPyQkJGDjxo2sQDKcFi1aYMOGDbh37x769++PsWPHwsfH55k9rDP1gxXJenDw4EF06NABkZGROH/+PHbv3s2GDGCeydbWFrt27cLFixcRFRUFV1dXHDx4kO9YTRYrklpERPj666/h6+sLPz8/REdH44033uA7FtNADBgwANHR0ZgyZQp8fX3x9ddf1zgOOqNlxGhFSUkJvf322yQWi2nHjh31skx/f3+Sy+UEgEQiEXl6etbq90+fPk2mpqZ08uTJ186yf/9+AkA9e/Z87XkxRDt37iSxWEzvvPMOlZSU8B2nKclhLUktqKiowIQJE3DhwgWcP38e06ZNq5flbt++HRcuXAAATJ8+HTdv3qzV71MdtlIOHDiANm3a4Pr167h3716dzbepmjp1Ki5cuIALFy7A19e3Sh+fjHaxIqkF3377Lc6cOYNTp06hb9++fMd5acOGDUN+fj5GjBjxWvPJzs5GTEwMli5dCgDYu3dvXcRr8vr06YNTp07h9OnT+N///sd3nCaDFck6duvWLSxbtgxr165F7969+Y7Di8DAQAwbNgwjR46EgYEBAgIC2Lm0OuLt7Y3169dj+fLlCA8P5ztOk8CKZB379NNP0bNnT8ycOZPvKBwiwrp169C+fXtIJBKYmZlh9OjRiI2N5aa5evUq7O3tIRAI8NNPPwEANm/ezI0j89tvv2HIkCEwNTWFra3tc79tPXDgAHx8fGBiYoI333wTiYmJCAkJqTZdbeZ/5coV/Oc//4FUKoWpqSk6deqEgoICeHh4QCAQQCAQoHPnzkhJSakx05IlS9CsWTMYGBhg2bJlAB6fFlm8eDHs7e1haGiIzp074/DhwwCA1atXc+PnZGZmYv78+WjZsiXi4uKemaW+zJgxAz179sSnn35ab8ts0ng+KdqoxMbGkkAgoHPnzvGW4e+//yYA9OGHH3LPLV68mMRiMQUEBFBeXh5FRUVRt27dyNzcnNLT07npUlJSCABt3LiRe+7LL78kAPT7779Tfn4+ZWZmUu/evcnIyIjKysqqLT8pKYksLCxIrVYTEVFAQAABoGnTptWY92XmX1hYSKampvTdd9+RSqWi9PR08vHxIYVCQUREXl5eZGdnRxqNhpvvqVOnyNnZucqyNmzYQMuXL+d+XrBgAUkkEjp69Cjl/r/27j2m6vqP4/gTuRw43OUcBOFAMC4jTVDEGeAGW4m1LDUhZKgxC7S2cvUHLtpqXeZabiwZtnS2XMrNVSD+EeacK8IMBohcBJT7rTgKBw7HOAaf3x+N8+v8MH9leA7C57GdP/ieL5/P+5zBa+d8L5/3yIh46623xJIlS0R1dbVZba+//rrIy8sT27ZtEzU1NfesxVIqKioEIFpaWiw67yJ0S4bkHPrkk0/E0qVLxdTUlNVq+N+QNBgMwsXFRaSmpprt9/PPPwtAvPfee6Zt9wrJ27dvm7bl5+cLQFy/fn3W/AcPHhQZGRmmn3U6nVAoFMLNzU0YDIZZ+/+d8RsbGwUgzp49e9fXfOzYMQGICxcumLZt375dAKKqqsq0LS4uTnR3dwshhLh9+7ZQKpVm74vBYBAKhUK88sorf1nb/6vFUqanp4VKpRK5ublWrWMRkGe351JLSwtRUVHzajWXpqYm9Ho9a9euNdseExODg4MDly9f/sdjztxT/OeuhjNmvmrPcHNzY+PGjYyNjVFWVnZf4wcHB+Pt7U16ejrvvvsuXV1dZvu/8MILKJVK0wmikZERbty4gUKhMG3r6urCwcGBgIAAAFpbWzEYDKxcudI0jpOTEz4+PmaHIf7X/6vFUmxsbIiKiqK5udkq8y8m8+e/eQHQ6/VmnfqsaWZR19HRUQBcXFxm7ePh4WFq1zoXGhsbuXr1Kps3bzYdJ7SxsaG8vBy4/7PcTk5OXLhwgfj4eD788EOCg4NJTU3l9u3bwB+tardt28ZXX32FwWCgsLCQPXv2sHnzZoqLi5mcnKSwsJD09HTTmDO9wd9++22zWru7uzEYDPddiyW5urqi1+stPu9iI0NyDqnVaoaGhqxdBgBeXl7AH0EI3DUMR0dH8ff3n7M5T506xY4dOxBCmD1u3bqFk5MT586du+/3Z8WKFZSXlzMwMEB2djbFxcUcOnTI9HxGRgbj4+N88803FBYWkpqaSkZGBiMjI5w9e5bS0lK2b99u2l+tVgOQm5s7q95Lly79q1osZXBwUPbRsQAZknNo3bp11NfXW/RMJ8DevXvp7+8H/lijEjCtYr5y5UpcXFyoqakx+53Lly9jNBqJjo6ekxqEEBQVFfHqq6/Oes7T05Pk5GSmpqYoKCj4x2MPDAyYvlaq1WoOHjzImjVrzL5qJiYmEhgYyAcffIC3tzdeXl4kJSXh6+vLO++8Q1BQkFnLXI1Gg6OjI/X19XNeiyWMjY1RW1vLunXrLDrvYiRDcg499dRTODg4cOLECYvPXVBQgF6vp6ioiNDQUJ555hkAHB0defPNN/n66685efIkY2NjXL16lX379uHr60tWVtaczF9VVYWbmxtxcXF3fX7mkqj7+co9MDDA3r17uXbtGkajkbq6Orq7u1m/fr1pHxsbG3bv3s21a9fYvXs3ALa2tuzcuZOmpiZ27txpNqajoyMZGRkUFhZy5MgRxsbGmJqaoq+vj8HBwX9ViyWcOHECe3t7nn76aYvOuyhZ7ZzRArV//37h7e0tRkZGLDbnoUOHhIeHh1AoFGLTpk2io6PD7Pnp6Wnx8ccfi9DQUGFvby88PT3F1q1bRWtrq2mfvLw84ePjIwChVCrFs88+K/Lz84VSqRSACA0NFTdu3BBHjx4Vbm5uAhCBgYGira1N7NmzRzg7Ows7OzsRGRkpamtrzeZ///33ha+vrwAEIPz8/ER+fv7fHr+rq0vExsYKT09PYWtrK5YvXy5ycnJMlxnN6OjoEN7e3maXJrW0tAhvb29x586dWe/b5OSkyM7OFgEBAcLOzk6o1Wrx/PPPi6amJvHRRx8JJycnAQiNRiO+/PJLIYT427U8SKOjo2LZsmVi//79FptzEbslF92dYyMjI6xYsYK4uDhKSkpkVzxpTgkhSElJobKykubmZjw9Pa1d0kInF92da56enpw6dYrS0lJycnKsXY60wOTk5FBaWkpBQYEMSAuR3aYegMTERI4fP86LL76IXq8nNzcXW1tba5clPcSmpqZ44403yMvL44svviAxMdHaJS0aMiQfkF27duHs7MyuXbtobW2lpKQEd3d3a5clPYT0er1p6b2TJ0+SlpZm7ZIWFXlM8gGrqanhueeew9nZmWPHjj1US6dJ1vf999/z8ssvMz4+TllZGTExMdYuabGRxyQftLVr11JdXU1ERASJiYlkZmaa7oKRpL8yOjpKVlYWCQkJhIeHU11dLQPSSmRIWsDy5cspKyujuLiY8vJyIiIiOHz4ML/99pu1S5PmmcnJSfLy8oiIiODMmTMUFRVx5swZ2VXTimRIWlBycjLNzc2kpaVx4MABQkJCyM/PN90lIy1eRqORI0eOEBISQnZ2Njt27KC5uZmUlBRrl7boyWOSVjI4OGhqRO/l5UVWVhYvvfQSvr6+1i5NsqDBwUGOHz/OZ599hlarJTMzkwMHDsi/g/ljRIaklfX393P48GE+//xzdDodW7ZsYd++fSQkJMgL0RcoIQQXL17k008/pbS0FHd3dzIyMnjttdfmdMERaU7IkJwvjEYjZWVlHD16lPPnz6PRaNi6dSvJycnExcXJwFwAmpqaOH36NAUFBbS3txMdHU1mZibp6ekolUprlyfdnQzJ+aihoYHCwkKKi4vp7OwkJCSElJQUtmzZQnR09Lxa1Ff6a9PT09TW1lJaWkpxcTHXr18nKCiIlJQU0tLSTCs1SfOaDMn5rrq6mpKSEk6fPk13dzcqlYonn3ySTZs2sXHjRnx8fKxdovQnQ0NDnDt3joqKCr777juGh4cJCAggOTmZlJQUubTZw0eG5MOksbGRb7/9loqKCiorK5mcnGTVqlVs2LCBxx9/nPj4eFN7Askyent7qayspKqqih9++IGGhgYcHBzYsGEDSUlJJCUl8dhjj1m7TOn+yZB8WBkMBi5evMj58+epqqqitraWO3fu4O/vT1xcHLGxsaxevZpVq1bJ2yHniE6no6Ghgbq6Oi5dukRlZSV9fX3Y29uzZs0aYmNjeeKJJ0hISJDHGBcOGZILhcFgoLq6mh9//JGqqip++uknbt68iY2NDUFBQURFRREZGUlUVBSPPvoojzzyCHZ28tb9u/n999/p6uqiubmZ+vp6rly5Qn19PZ2dnQghWLp0KevXryc2Npb4+HhiYmJkKC5cMiQXsp6eHq5cuWJ61NXV0dHRgRACBwcHgoKCCA8PJywsjNDQUMLCwggMDMTPz8/UsXChMhqNDAwM0NXVRXt7O21tbbS1tdHa2kpnZydGoxEbGxuCg4NZvXo1kZGRpoc8pLGoyJBcbMbHx2ltbaW9vZ3W1lZTOLS3t5t689jY2ODj44O/vz/+/v5oNBo0Gg3Lli1DpVKhUqnw9vZGpVLNm+6QMyYmJtBqtfz6669otVq0Wi2//PILvb299Pb20t/fT29vL0NDQ8z86bu6uhIWFmZ6hIeHExoaSnh4OK6urlZ+RZKVyZCU/mtoaIienh76+vro7e2lp6fHFCo9PT0MDw/PuoXSyckJLy8vVCoVLi4uODk54eHhgbOzM0qlEldXV1xdXbGzs8PW1tasGRf80c1x5hpQIcSsxT9mes9MTU0xNjbG+Pg4BoOBiYkJdDodBoMBvV7PzZs30Wq1s1q7KhQK1Go1AQEBptAPCAhAo9Hg5+dHYGCgvEJAuhcZktI/Mz4+zvDwMMPDw2i1WlM4abVaJiYmMBgM6HQ69Hq9KcB0Oh3T09NMTk6a9bSenp5Gp9OZje/u7m52HahSqUShULBkyRLc3d1xcXFBqVTi4uKCu7s7SqUSZ2dn0yfcmcBWq9Wo1Wr5SVD6t2RISpIk3YNcT1KSJOleZEhKkiTdgwxJSZKke7ADTlu7CEmSpHlq4j9hlUo6aIutGwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" ] - }, + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a text passage\n", + "predictions = text_table_qa_pipeline.run(query=\"Who is Aleksandar Trifunovic?\")" + ], + "metadata": { + "id": "strPNduPoBLe" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "9YiK75tSoOGA", + "outputId": "bd52f841-3846-441f-dd6f-53b02111691e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 24, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "RyeK3s28_X1C" - }, - "source": [ - "## About us\n", - "\n", - "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", - "\n", - "We bring NLP to the industry via open source! \n", - "Our focus: Industry specific language models & large scale QA systems. \n", - " \n", - "Some of our other work: \n", - "- [German BERT](https://deepset.ai/german-bert)\n", - "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", - "- [FARM](https://github.com/deepset-ai/FARM)\n", - "\n", - "Get in touch:\n", - "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", - "\n", - "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: Who is Aleksandar Trifunovic?\n", + "Answers:\n", + "[ { 'answer': 'a Serbian professional basketball coach and former player',\n", + " 'context': 'Aleksandar Trifunović ( ; born 30 May 1967 ) is a Serbian '\n", + " 'professional basketball coach and former player .'},\n", + " { 'answer': 'Johnny Höglin',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Ivar Eriksen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': 'Magne Thomassen',\n", + " 'context': Rank Athlete Country Time\n", + "0 1 Kees Verkerk Netherlands 2:03.4\n", + "1 2 Ivar Eriksen Norway 2:05.0\n", + "2 3 Ard Schenk Netherlands 2:05.0\n", + "3 4 Magne Thomassen Norway 2:05.1\n", + "4 5 Johnny Höglin Sweden 2:05.2\n", + "5 5 Bjørn Tveter Norway 2:05.2\n", + "6 7 Svein-Erik Stiansen Norway 2:05.5\n", + "7 8 Eduard Matusevich Soviet Union 2:06.1\n", + "8 9 Peter Nottet Netherlands 2:06.3\n", + "9 10 Örjan Sandler Sweden 2:07.0\n", + "10 11 Aleksandr Kerchenko Soviet Union 2:07.1\n", + "11 12 Ants Antson Soviet Union 2:07.2\n", + "12 12 Valery Kaplan Soviet Union 2:07.2\n", + "13 14 Jouko Launonen Finland 2:07.5\n", + "14 15 Günter Traub West Germany 2:07.7\n", + "15 16 Jan Bols Netherlands 2:07.8\n", + "16 16 Manne Lavås Sweden 2:07.8\n", + "17 18 Kimmo Koskinen Finland 2:07.9\n", + "18 19 Richard Wurster United States 2:08.4\n", + "19 20 Göran Claeson Sweden 2:08.6},\n", + " { 'answer': '5',\n", + " 'context': Position # Player Moving from\n", + "0 F 12 Nikola Kalinić Radnički Kragujevac\n", + "1 SF 6 Nemanja Dangubić Mega Vizura\n", + "2 C 33 Maik Zirbes Brose Baskets\n", + "3 PG 3 Marcus Williams Lokomotiv Kuban\n", + "4 PG 24 Stefan Jović Radnički Kragujevac\n", + "5 C 14 Đorđe Kaplanović FMP\n", + "6 SF 5 Nikola Čvorović FMP\n", + "7 SG 7 Aleksandar Aranitović Crvena zvezda U18\n", + "8 SG 20 Aleksa Radanov Crvena zvezda U18},\n", + " { 'answer': 'Vasile Sărucan',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Belgium',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Poland',\n", + " 'context': Rank Name Nationality Result\n", + "0 1 Hans Baumgartner West Germany 8.12\n", + "1 2 Igor Ter-Ovanesyan Soviet Union 7.91\n", + "2 3 Vasile Sărucan Romania 7.88\n", + "3 4 Valeriu Jurcă Romania 7.72\n", + "4 5 Philippe Housiaux Belgium 7.70\n", + "5 6 Andreas Gloerfeld West Germany 7.70\n", + "6 7 Jan Kobuszewski Poland 7.66\n", + "7 8 Jaroslav Brož Czechoslovakia 7.66\n", + "8 9 Alan Lerwill Great Britain 7.61\n", + "9 10 Mikhail Bariban Soviet Union 7.58\n", + "10 11 Valeriy Podluzhniy Soviet Union 7.54\n", + "11 12 Kari Palmen Finland 7.51\n", + "12 13 Georgi Marin Bulgaria 7.51\n", + "13 14 Jesper Tørring Denmark 7.46\n", + "14 15 Milan Spasojević Yugoslavia 7.23\n", + "15 16 Salih Mercan Turkey 6.98\n", + "16 17 Henrik Kalocsai Hungary 5.67},\n", + " { 'answer': 'Hafþór Júlíus Björnsson',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Estonia',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Iceland',\n", + " 'context': # Name Nationality Pts\n", + "0 1 Hafþór Júlíus Björnsson Iceland 31.5\n", + "1 2 Robert Oberst United States 29\n", + "2 3 Lauri Nami Estonia 24\n", + "3 4 Nick Best United States 14.5\n", + "4 5 Laurence Shahlaei UK 12\n", + "5 6 Wu Long China 6},\n", + " { 'answer': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman',\n", + " 'context': 'Egor Antropov ( born May 8 , 1992 ) is a Russian '\n", + " 'professional ice hockey defenceman . He is currently '\n", + " 'playing with Piráti Chomutov of the Czech Extral'},\n", + " { 'answer': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player',\n", + " 'context': 'Zurab Magomedovich Yevloyev ( ; born February 20 , 1980 ) '\n", + " 'is a Russian professional football player . In 2010 , he '\n", + " 'played for FC Angusht Nazran in the'}]\n" + ] } - ], - "metadata": { - "accelerator": "GPU", + ] + }, + { + "cell_type": "code", + "source": [ + "# Example query whose answer resides in a table\n", + "predictions = text_table_qa_pipeline.run(query=\"What is Cuba's national tree?\")" + ], + "metadata": { + "id": "QYOHDSmLpzEg" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# We can see both text passages and tables as contexts of the predicted answers.\n", + "print_answers(predictions, details=\"minimum\")" + ], + "metadata": { + "id": "4kw53uWep3zj", + "outputId": "b332cc17-3cb8-4e20-d79d-bb4cf656f277", "colab": { - "name": "Tutorial15_TableQA.ipynb", - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "name": "python" + "base_uri": "https://localhost:8080/" } + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Query: What is Cuba's national tree?\n", + "Answers:\n", + "[ { 'answer': 'Cuban royal palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Quercus sagraeana , the Cuban oak',\n", + " 'context': 'Quercus sagraeana , the Cuban oak , is a medium-sized '\n", + " 'evergreen tree native to western Cuba in the Cuban pine '\n", + " 'forests ecoregion .'},\n", + " { 'answer': \"Glenn O'Brien\",\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Guy Oseary',\n", + " 'context': Book title ... Notes\n", + "0 Sex ... The book contains erotica influenced photographs taken by Steven Meisel and ...\n", + "1 Madonna : The Girlie Show ... The photographs in the book showcased behind-the-scenes of the 1993 Girlie S...\n", + "2 The Making of Evita ... Featuring an introduction by Madonna , The Making of Evita chronicles the cr...\n", + "3 The Emperor 's New Clothes : An All-Star Retelling of the Classic Fairy Tale ... This fully illustrated retelling of the classic fairy tale by Hans Christian...\n", + "4 X-Static Process ... In 2002 , Madonna had collaborated with photographer Steven Klein for an art...\n", + "5 Nobody Knows Me ... Available for one month only via Madonna 's official website . Contained 52 ...\n", + "6 Madonna Confessions ... Behind-the-scenes and on-stage pictures from Madonna 's 2006 Confessions Tou...\n", + "7 I Am Because We Are ... The book contains excerpts from interviews with Malawian children , their bi...\n", + "8 Madonna : Sticky & Sweet ... Behind-the-scenes and on-stage photography from Madonna 's Sticky & Sweet To...\n", + "9 Tom Munro ... Munro 's self-titled first monograph book consists of photographs taken by h...\n", + "10 Mayumi 's Kitchen : Macrobiotic Cooking for Body and Soul ... Mayumi Nishimura worked as Madonna 's private chef for seven years , and she...\n", + "\n", + "[11 rows x 6 columns]},\n", + " { 'answer': 'Belize',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Palmyra palm',\n", + " 'context': Country ... Scientific name\n", + "0 Afghanistan ... \n", + "1 Albania ... Olea europaea\n", + "2 Antigua and Barbuda ... Bucida buceras\n", + "3 Argentina ... Erythrina crista-galli , Schinopsis balansae\n", + "4 Australia ... Acacia pycnantha\n", + "5 Bahamas ... Guaiacum sanctum\n", + "6 Bangladesh ... Mangifera indica\n", + "7 Belize ... Swietenia macrophylla\n", + "8 Bhutan ... Cupressus cashmeriana\n", + "9 Brazil ... Caesalpinia echinata\n", + "10 Cambodia ... Borassus flabellifer\n", + "11 Canada ... Acer\n", + "12 Chile ... Araucaria araucana\n", + "13 Colombia ... Ceroxylon quindiuense\n", + "14 Costa Rica ... Enterolobium cyclocarpum\n", + "15 Croatia ... Quercus robur\n", + "16 Cuba ... Roystonea regia\n", + "17 Cyprus ... Quercus alnifolia\n", + "18 Czech Republic ... Tilia cordata\n", + "19 Denmark ... Fagus sylvatica\n", + "\n", + "[20 rows x 3 columns]},\n", + " { 'answer': 'Guadeloupe',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Basse-Terre',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'East Caribbean dollar',\n", + " 'context': State ... Official Language ( s )\n", + "0 Antigua and Barbuda ... English\n", + "1 Dominica ... English\n", + "2 Grenada ... English\n", + "3 Montserrat ... English\n", + "4 Saint Kitts and Nevis ... English\n", + "5 Saint Lucia ... English\n", + "6 Saint Vincent and the Grenadines ... English\n", + "7 Anguilla ... English\n", + "8 British Virgin Islands ... English\n", + "9 Guadeloupe ... French\n", + "10 Martinique ... French\n", + "\n", + "[11 rows x 10 columns]},\n", + " { 'answer': 'Jenkins',\n", + " 'context': NRHP reference number ... County\n", + "0 72000402 ... Wilkes\n", + "1 ... Meriwether\n", + "2 ... Bartow\n", + "3 71000280 ... Jenkins\n", + "4 ... Chatham\n", + "5 89002015 ... Thomas\n", + "6 ... Glynn\n", + "7 75000615 ... Walton\n", + "8 84001156 ... Sumter\n", + "9 79000713 ... Cobb\n", + "10 82002491 ... Twiggs\n", + "11 74000703 ... Taliaferro\n", + "12 80001039 ... Floyd\n", + "13 90000805 ... Gwinnett\n", + "14 73000620 ... Decatur\n", + "15 79000731 ... Houston\n", + "16 95000741 ... Grady\n", + "17 97000559 ... Greene\n", + "18 74000662 ... Brooks\n", + "19 75000616 ... Washington\n", + "\n", + "[20 rows x 4 columns]},\n", + " { 'answer': \"Primula farinosa , the bird's-eye primrose\",\n", + " 'context': \"Primula farinosa , the bird's-eye primrose , is a small \"\n", + " 'perennial plant in the family Primulaceae , native to '\n", + " 'Northern Europe and northern Asia , and '},\n", + " { 'answer': 'Poospiza',\n", + " 'context': 'Poospiza is a genus of finch-like tanagers found in both '\n", + " 'the South American lowlands and the Andes mountains . '\n", + " 'Generally they are arboreal feeders in '},\n", + " { 'answer': 'golden-crowned sparrow',\n", + " 'context': 'The golden-crowned sparrow ( Zonotrichia atricapilla ) is '\n", + " 'a large American sparrow found in the western part of '\n", + " 'North America .'},\n", + " { 'answer': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush',\n", + " 'context': 'Banksia sessilis var . cordata is a variety of Banksia '\n", + " 'sessilis ( Parrot Bush ) , with unusually large leaves and '\n", + " 'flower heads . It is a rare variety '},\n", + " { 'answer': 'rain',\n", + " 'context': 's and operates hotels at Machu Picchu Natural Reserve , '\n", + " 'the southeastern rain forest of the Amazon in Puerto '\n", + " 'Maldonado , Tambopata , the Sacred Valley'}]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RyeK3s28_X1C" + }, + "source": [ + "## About us\n", + "\n", + "This [Haystack](https://github.com/deepset-ai/haystack/) notebook was made with love by [deepset](https://deepset.ai/) in Berlin, Germany\n", + "\n", + "We bring NLP to the industry via open source! \n", + "Our focus: Industry specific language models & large scale QA systems. \n", + " \n", + "Some of our other work: \n", + "- [German BERT](https://deepset.ai/german-bert)\n", + "- [GermanQuAD and GermanDPR](https://deepset.ai/germanquad)\n", + "- [FARM](https://github.com/deepset-ai/FARM)\n", + "\n", + "Get in touch:\n", + "[Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Slack](https://haystack.deepset.ai/community/join) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)\n", + "\n", + "By the way: [we're hiring!](https://www.deepset.ai/jobs)\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "Tutorial15_TableQA.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file From 2ad75f53df290ee49d34f62423e0b8bf27dc81b9 Mon Sep 17 00:00:00 2001 From: bogdankostic Date: Tue, 1 Mar 2022 17:22:53 +0100 Subject: [PATCH 14/14] Adapt name of test for JoinAnswers node --- test/test_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_pipeline.py b/test/test_pipeline.py index f36e0faeb1..1b216e8f2c 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -1075,7 +1075,7 @@ def test_route_documents_by_metafield(test_docs_xs): @pytest.mark.parametrize("join_mode", ["concatenate", "merge"]) -def test_join_answers_concatenate(join_mode): +def test_join_answers(join_mode): inputs = [{"answers": [Answer(answer="answer 1", score=0.7)]}, {"answers": [Answer(answer="answer 2", score=0.8)]}] join_answers = JoinAnswers(join_mode=join_mode)