Generate docstrings and deploy to branches to Staging (Website) (#731)

* test pre commit hook * test status * test on this branch * push generated docstrings and tutorials to branch * fixed syntax error * Add latest docstring and tutorial changes * add files before commit * catch commit error * separate generation from deployment * add deployment process for staging * add current branch to payload Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
deepset-ai · Jan 21, 2021 · 0b583b8 · 0b583b8
1 parent 0f62e0b
commit 0b583b8
Show file tree

Hide file tree

Showing 8 changed files with 285 additions and 187 deletions.
diff --git a/.github/workflows/deploy_website.yml b/.github/workflows/deploy_website.yml
@@ -13,28 +13,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v2
-
-      - name: Set up Python 3.7
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.7
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install 'pydoc-markdown>=3.0.0,<4.0.0'
-          pip install mkdocs
-          pip install jupytercontrib
-
-      # Generates the docstrings and tutorials so that we have the latest for the deployment
-      - name: Generate Docstrings and Tutorials
-        run: |
-          cd docs/_src/api/api/
-          ./generate_docstrings.sh 
-          cd ../../tutorials/tutorials/
-          python3 convert_ipynb.py 
-
+
       # Creates dispatch event for haystack-website repo
       - name: Repository Dispatch
         uses: peter-evans/repository-dispatch@v1

diff --git a/.github/workflows/deploy_website_staging.yml b/.github/workflows/deploy_website_staging.yml
@@ -0,0 +1,26 @@
+name: Deploy website
+
+# Controls when the action will run. Triggers the workflow on push 
+# events but only for the master branch
+on:
+  push:
+    branches-ignore:
+      - master
+      - benchmarks
+
+jobs:
+  # This workflow contains a single job called "build"
+  build:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    steps:
+
+      # Creates dispatch event for haystack-website repo
+      - name: Repository Dispatch
+        uses: peter-evans/repository-dispatch@v1
+        with:
+          token: ${{ secrets.PUBLIC_REPO_ACCESS_TOKEN }}
+          repository: deepset-ai/haystack-website
+          event-type: deploy-website-staging
+          client-payload: '{"ref": "${{ github.ref }}"}'
diff --git a/.github/workflows/update_docs.yml b/.github/workflows/update_docs.yml
@@ -0,0 +1,55 @@
+name: Update Docstrings and Tutorials
+
+# Controls when the action will run. Triggers the workflow on push 
+# events but only for the master branch
+on:
+  push:
+    branches-ignore:
+      - master
+      - benchmarks
+
+jobs:
+  # This workflow contains a single job called "build"
+  build:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          persist-credentials: false # otherwise, the token used is the GITHUB_TOKEN, instead of your personal token
+          fetch-depth: 0 # otherwise, you will failed to push refs to dest repo
+
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install 'pydoc-markdown>=3.0.0,<4.0.0'
+          pip install mkdocs
+          pip install jupytercontrib
+
+      # Generates the docstrings and tutorials so that we have the latest for the deployment
+      - name: Generate Docstrings and Tutorials
+        run: |
+          cd docs/_src/api/api/
+          ./generate_docstrings.sh 
+          cd ../../tutorials/tutorials/
+          python3 convert_ipynb.py  
+          cd ../../../../
+          git status
+
+      - name: Commit files
+        run: |
+          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+          git add .
+          git commit -m "Add latest docstring and tutorial changes" -a || echo "No changes to commit"
+      - name: Push changes
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          branch: ${{ github.ref }}
diff --git a/docs/_src/api/api/file_converter.md b/docs/_src/api/api/file_converter.md
@@ -1,3 +1,62 @@
+<a name="base"></a>
+# Module base
+
+<a name="base.BaseConverter"></a>
+## BaseConverter Objects
+
+```python
+class BaseConverter()
+```
+
+Base class for implementing file converts to transform input documents to text format for ingestion in DocumentStore.
+
+<a name="base.BaseConverter.__init__"></a>
+#### \_\_init\_\_
+
+```python
+ | __init__(remove_numeric_tables: Optional[bool] = None, valid_languages: Optional[List[str]] = None)
+```
+
+**Arguments**:
+
+- `remove_numeric_tables`: This option uses heuristics to remove numeric rows from the tables.
+The tabular structures in documents might be noise for the reader model if it
+does not have table parsing capability for finding answers. However, tables
+may also have long strings that could possible candidate for searching answers.
+The rows containing strings are thus retained in this option.
+- `valid_languages`: validate languages from a list of languages specified in the ISO 639-1
+(https://en.wikipedia.org/wiki/ISO_639-1) format.
+This option can be used to add test for encoding errors. If the extracted text is
+not one of the valid languages, then it might likely be encoding error resulting
+in garbled text.
+
+<a name="base.BaseConverter.convert"></a>
+#### convert
+
+```python
+ | @abstractmethod
+ | convert(file_path: Path, meta: Optional[Dict[str, str]]) -> Dict[str, Any]
+```
+
+Convert a file to a dictionary containing the text and any associated meta data.
+
+File converters may extract file meta like name or size. In addition to it, user
+supplied meta data like author, url, external IDs can be supplied as a dictionary.
+
+**Arguments**:
+
+- `file_path`: path of the file to convert
+- `meta`: dictionary of meta data key-value pairs to append in the returned document.
+
+<a name="base.BaseConverter.validate_language"></a>
+#### validate\_language
+
+```python
+ | validate_language(text: str) -> bool
+```
+
+Validate if the language of the text is one of valid languages.
+
 <a name="txt"></a>
 # Module txt
 
@@ -118,65 +177,6 @@ in garbled text.
 
 a list of pages and the extracted meta data of the file.
 
-<a name="base"></a>
-# Module base
-
-<a name="base.BaseConverter"></a>
-## BaseConverter Objects
-
-```python
-class BaseConverter()
-```
-
-Base class for implementing file converts to transform input documents to text format for ingestion in DocumentStore.
-
-<a name="base.BaseConverter.__init__"></a>
-#### \_\_init\_\_
-
-```python
- | __init__(remove_numeric_tables: Optional[bool] = None, valid_languages: Optional[List[str]] = None)
-```
-
-**Arguments**:
-
-- `remove_numeric_tables`: This option uses heuristics to remove numeric rows from the tables.
-The tabular structures in documents might be noise for the reader model if it
-does not have table parsing capability for finding answers. However, tables
-may also have long strings that could possible candidate for searching answers.
-The rows containing strings are thus retained in this option.
-- `valid_languages`: validate languages from a list of languages specified in the ISO 639-1
-(https://en.wikipedia.org/wiki/ISO_639-1) format.
-This option can be used to add test for encoding errors. If the extracted text is
-not one of the valid languages, then it might likely be encoding error resulting
-in garbled text.
-
-<a name="base.BaseConverter.convert"></a>
-#### convert
-
-```python
- | @abstractmethod
- | convert(file_path: Path, meta: Optional[Dict[str, str]]) -> Dict[str, Any]
-```
-
-Convert a file to a dictionary containing the text and any associated meta data.
-
-File converters may extract file meta like name or size. In addition to it, user
-supplied meta data like author, url, external IDs can be supplied as a dictionary.
-
-**Arguments**:
-
-- `file_path`: path of the file to convert
-- `meta`: dictionary of meta data key-value pairs to append in the returned document.
-
-<a name="base.BaseConverter.validate_language"></a>
-#### validate\_language
-
-```python
- | validate_language(text: str) -> bool
-```
-
-Validate if the language of the text is one of valid languages.
-
 <a name="pdf"></a>
 # Module pdf
 

diff --git a/docs/_src/api/api/generator.md b/docs/_src/api/api/generator.md
@@ -1,3 +1,35 @@
+<a name="base"></a>
+# Module base
+
+<a name="base.BaseGenerator"></a>
+## BaseGenerator Objects
+
+```python
+class BaseGenerator(ABC)
+```
+
+Abstract class for Generators
+
+<a name="base.BaseGenerator.predict"></a>
+#### predict
+
+```python
+ | @abstractmethod
+ | predict(query: str, documents: List[Document], top_k: Optional[int]) -> Dict
+```
+
+Abstract method to generate answers.
+
+**Arguments**:
+
+- `query`: Query
+- `documents`: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
+- `top_k`: Number of returned answers
+
+**Returns**:
+
+Generated answers plus additional infos in a dict
+
 <a name="transformers"></a>
 # Module transformers
 
@@ -106,35 +138,3 @@ Generated answers plus additional infos in a dict like this:
 |      }}]}
 ```
 
-<a name="base"></a>
-# Module base
-
-<a name="base.BaseGenerator"></a>
-## BaseGenerator Objects
-
-```python
-class BaseGenerator(ABC)
-```
-
-Abstract class for Generators
-
-<a name="base.BaseGenerator.predict"></a>
-#### predict
-
-```python
- | @abstractmethod
- | predict(query: str, documents: List[Document], top_k: Optional[int]) -> Dict
-```
-
-Abstract method to generate answers.
-
-**Arguments**:
-
-- `query`: Query
-- `documents`: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
-- `top_k`: Number of returned answers
-
-**Returns**:
-
-Generated answers plus additional infos in a dict
-
diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md
@@ -207,6 +207,44 @@ Initialize a Pipeline for Generative Question Answering.
 - `generator`: Generator instance
 - `retriever`: Retriever instance
 
+<a name="pipeline.SearchSummarizationPipeline"></a>
+## SearchSummarizationPipeline Objects
+
+```python
+class SearchSummarizationPipeline(BaseStandardPipeline)
+```
+
+<a name="pipeline.SearchSummarizationPipeline.__init__"></a>
+#### \_\_init\_\_
+
+```python
+ | __init__(summarizer: BaseSummarizer, retriever: BaseRetriever)
+```
+
+Initialize a Pipeline that retrieves documents for a query and then summarizes those documents.
+
+**Arguments**:
+
+- `summarizer`: Summarizer instance
+- `retriever`: Retriever instance
+
+<a name="pipeline.SearchSummarizationPipeline.run"></a>
+#### run
+
+```python
+ | run(query: str, filters: Optional[Dict] = None, top_k_retriever: int = 10, generate_single_summary: bool = False, return_in_answer_format=False)
+```
+
+**Arguments**:
+
+- `query`: Your search query
+- `filters`: 
+- `top_k_retriever`: Number of top docs the retriever should pass to the summarizer.
+The higher this value, the slower your pipeline.
+- `generate_single_summary`: Whether to generate single summary from all retrieved docs (True) or one per doc (False).
+- `return_in_answer_format`: Whether the results should be returned as documents (False) or in the answer format used in other QA pipelines (True).
+With the latter, you can use this pipeline as a "drop-in replacement" for other QA pipelines.
+
 <a name="pipeline.FAQPipeline"></a>
 ## FAQPipeline Objects
 

diff --git a/docs/_src/api/api/reader.md b/docs/_src/api/api/reader.md
@@ -1,3 +1,6 @@
+<a name="base"></a>
+# Module base
+
 <a name="farm"></a>
 # Module farm
 
@@ -378,6 +381,3 @@ Example:
 
 Dict containing query and answers
 
-<a name="base"></a>
-# Module base
-