From 6ee9ddc90dedbe0202cfdfa366f821fe587a882d Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Wed, 16 Feb 2022 17:22:37 +0100
Subject: [PATCH 01/22] pipeline.to_code() with jupyter support

---
 haystack/pipelines/base.py | 45 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 760883d9b8..f9bb957afc 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -6,6 +6,7 @@
 import inspect
 import logging
 import os
+import re
 import traceback
 import numpy as np
 import pandas as pd
@@ -1040,6 +1041,50 @@ def get_config(self, return_defaults: bool = False) -> dict:
         config = {"components": list(components.values()), "pipelines": list(pipelines.values()), "version": "0.8"}
         return config
 
+    def to_code(self, pipeline_name="pipeline", create_ipython_cell=True):
+        code = self._generate_pipeline_code(pipeline_name=pipeline_name)
+        if create_ipython_cell:
+            try:
+                get_ipython().set_next_input(code) #type: ignore
+            except NameError:
+                return code
+        else:
+            return code
+
+    def _order_component_code(self, depedency: Dict[str, List[str]], keys: Optional[List[str]]=None):
+        ordered = []
+        keys = keys or depedency.keys() #type: ignore
+        for k in keys: #type: ignore
+            v = depedency[k]
+            if len(v) > 0:
+                ordered += [k for k in self._order_component_code(depedency, v) if k not in ordered]
+            if k not in ordered: 
+                ordered.append(k)
+        return ordered
+
+    def _generate_pipeline_code(self, pipeline_name="pipeline"):
+        config = self.get_config()
+        code = ""
+        name_to_variable_name = {c['name']: re.sub(r'(?<!^)(?=[A-Z])', '_', c['name']).lower() for c in config["components"]}
+        component_code = {}
+        component_dependency = {}
+        for component in config["components"]:
+            param_value_dict = {k: name_to_variable_name.get(v, f'"{v}"') if type(v) == str else v for k,v in component['params'].items()}
+            args = ', '.join(f"{k}={v}" for k,v in param_value_dict.items())
+            variable_name = name_to_variable_name[component['name']]
+            component_code[variable_name] = f"{variable_name} = {component['type']}({args})\n"
+            component_dependency[variable_name] = [name_to_variable_name[v] for v in component['params'].values() if v in name_to_variable_name]
+        for v in self._order_component_code(component_dependency):
+            code += component_code[v]
+
+        code += "\n"
+        code += f"{pipeline_name} = Pipeline()\n"
+        for node in config["pipelines"][0]["nodes"]:
+            component_variable_name = name_to_variable_name[node["name"]]
+            inputs = ', '.join(f'"{name}"' for name in node["inputs"])
+            code += f"{pipeline_name}.add_node(component={component_variable_name}, name=\"{node['name']}\", inputs=[{inputs}])\n"
+        return code
+
     def _format_document_answer(self, document_or_answer: dict):
         return "\n \t".join([f"{name}: {value}" for name, value in document_or_answer.items()])
 

From 54749970f056409d55aa1ba23bab44fd2713cb2a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 16 Feb 2022 16:37:14 +0000
Subject: [PATCH 02/22] Update Documentation & Code Style

---
 docs/_src/api/openapi/openapi.json |  2 +-
 haystack/pipelines/base.py         | 29 ++++++++++++++++++-----------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/docs/_src/api/openapi/openapi.json b/docs/_src/api/openapi/openapi.json
index af6d9ce457..f5fcee5b74 100644
--- a/docs/_src/api/openapi/openapi.json
+++ b/docs/_src/api/openapi/openapi.json
@@ -278,7 +278,7 @@
                     "file-upload"
                 ],
                 "summary": "Upload File",
-                "description": "You can use this endpoint to upload a file for indexing\n(see [http://localhost:3000/guides/rest-api#indexing-documents-in-the-haystack-rest-api-document-store]).",
+                "description": "You can use this endpoint to upload a file for indexing\n(see https://haystack.deepset.ai/guides/rest-api#indexing-documents-in-the-haystack-rest-api-document-store).",
                 "operationId": "upload_file_file_upload_post",
                 "requestBody": {
                     "content": {
diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index f9bb957afc..70df39246a 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -1045,35 +1045,42 @@ def to_code(self, pipeline_name="pipeline", create_ipython_cell=True):
         code = self._generate_pipeline_code(pipeline_name=pipeline_name)
         if create_ipython_cell:
             try:
-                get_ipython().set_next_input(code) #type: ignore
+                get_ipython().set_next_input(code)  # type: ignore
             except NameError:
                 return code
         else:
             return code
 
-    def _order_component_code(self, depedency: Dict[str, List[str]], keys: Optional[List[str]]=None):
+    def _order_component_code(self, depedency: Dict[str, List[str]], keys: Optional[List[str]] = None):
         ordered = []
-        keys = keys or depedency.keys() #type: ignore
-        for k in keys: #type: ignore
+        keys = keys or depedency.keys()  # type: ignore
+        for k in keys:  # type: ignore
             v = depedency[k]
             if len(v) > 0:
                 ordered += [k for k in self._order_component_code(depedency, v) if k not in ordered]
-            if k not in ordered: 
+            if k not in ordered:
                 ordered.append(k)
         return ordered
 
     def _generate_pipeline_code(self, pipeline_name="pipeline"):
         config = self.get_config()
         code = ""
-        name_to_variable_name = {c['name']: re.sub(r'(?<!^)(?=[A-Z])', '_', c['name']).lower() for c in config["components"]}
+        name_to_variable_name = {
+            c["name"]: re.sub(r"(?<!^)(?=[A-Z])", "_", c["name"]).lower() for c in config["components"]
+        }
         component_code = {}
         component_dependency = {}
         for component in config["components"]:
-            param_value_dict = {k: name_to_variable_name.get(v, f'"{v}"') if type(v) == str else v for k,v in component['params'].items()}
-            args = ', '.join(f"{k}={v}" for k,v in param_value_dict.items())
-            variable_name = name_to_variable_name[component['name']]
+            param_value_dict = {
+                k: name_to_variable_name.get(v, f'"{v}"') if type(v) == str else v
+                for k, v in component["params"].items()
+            }
+            args = ", ".join(f"{k}={v}" for k, v in param_value_dict.items())
+            variable_name = name_to_variable_name[component["name"]]
             component_code[variable_name] = f"{variable_name} = {component['type']}({args})\n"
-            component_dependency[variable_name] = [name_to_variable_name[v] for v in component['params'].values() if v in name_to_variable_name]
+            component_dependency[variable_name] = [
+                name_to_variable_name[v] for v in component["params"].values() if v in name_to_variable_name
+            ]
         for v in self._order_component_code(component_dependency):
             code += component_code[v]
 
@@ -1081,7 +1088,7 @@ def _generate_pipeline_code(self, pipeline_name="pipeline"):
         code += f"{pipeline_name} = Pipeline()\n"
         for node in config["pipelines"][0]["nodes"]:
             component_variable_name = name_to_variable_name[node["name"]]
-            inputs = ', '.join(f'"{name}"' for name in node["inputs"])
+            inputs = ", ".join(f'"{name}"' for name in node["inputs"])
             code += f"{pipeline_name}.add_node(component={component_variable_name}, name=\"{node['name']}\", inputs=[{inputs}])\n"
         return code
 

From 4f3d7394754910e76ad211e2ec8090d121761b64 Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Wed, 16 Feb 2022 18:01:52 +0100
Subject: [PATCH 03/22] add imports

---
 haystack/pipelines/base.py | 53 ++++++++++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index f9bb957afc..64319fab24 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+import sys
 from typing import Dict, List, Optional, Any
 
 import copy
@@ -1041,39 +1042,63 @@ def get_config(self, return_defaults: bool = False) -> dict:
         config = {"components": list(components.values()), "pipelines": list(pipelines.values()), "version": "0.8"}
         return config
 
-    def to_code(self, pipeline_name="pipeline", create_ipython_cell=True):
-        code = self._generate_pipeline_code(pipeline_name=pipeline_name)
+    def to_code(self, pipeline_name: str = "pipeline", create_ipython_cell: bool = True, generate_imports: bool = True):
+        code = self._generate_pipeline_code(pipeline_name=pipeline_name, generate_imports=generate_imports)
         if create_ipython_cell:
             try:
-                get_ipython().set_next_input(code) #type: ignore
+                get_ipython().set_next_input(code)  # type: ignore
             except NameError:
                 return code
         else:
             return code
 
-    def _order_component_code(self, depedency: Dict[str, List[str]], keys: Optional[List[str]]=None):
+    def _order_component_code(self, depedency: Dict[str, List[str]], keys: Optional[List[str]] = None):
         ordered = []
-        keys = keys or depedency.keys() #type: ignore
-        for k in keys: #type: ignore
+        keys = keys or depedency.keys()  # type: ignore
+        for k in keys:  # type: ignore
             v = depedency[k]
             if len(v) > 0:
                 ordered += [k for k in self._order_component_code(depedency, v) if k not in ordered]
-            if k not in ordered: 
+            if k not in ordered:
                 ordered.append(k)
         return ordered
 
-    def _generate_pipeline_code(self, pipeline_name="pipeline"):
+    def _generate_pipeline_code(self, pipeline_name: str = "pipeline", generate_imports: bool = True):
         config = self.get_config()
         code = ""
-        name_to_variable_name = {c['name']: re.sub(r'(?<!^)(?=[A-Z])', '_', c['name']).lower() for c in config["components"]}
+        if generate_imports:
+            types = [c["type"] for c in config["components"]]
+            allowed_imports = ["haystack.nodes", "haystack.document_stores"]
+            importable_classes = {
+                name: mod
+                for mod in allowed_imports
+                for name, obj in inspect.getmembers(sys.modules[mod])
+                if inspect.isclass(obj)
+            }
+
+            for t in types:
+                if t in importable_classes:
+                    code += f"from {importable_classes[t]} import {t}\n"
+                else:
+                    code += f"# from MODULE_NOT_FOUND import {t}\n"
+            code += "\n"
+
+        name_to_variable_name = {
+            c["name"]: re.sub(r"(?<!^)(?=[A-Z])", "_", c["name"]).lower() for c in config["components"]
+        }
         component_code = {}
         component_dependency = {}
         for component in config["components"]:
-            param_value_dict = {k: name_to_variable_name.get(v, f'"{v}"') if type(v) == str else v for k,v in component['params'].items()}
-            args = ', '.join(f"{k}={v}" for k,v in param_value_dict.items())
-            variable_name = name_to_variable_name[component['name']]
+            param_value_dict = {
+                k: name_to_variable_name.get(v, f'"{v}"') if type(v) == str else v
+                for k, v in component["params"].items()
+            }
+            args = ", ".join(f"{k}={v}" for k, v in param_value_dict.items())
+            variable_name = name_to_variable_name[component["name"]]
             component_code[variable_name] = f"{variable_name} = {component['type']}({args})\n"
-            component_dependency[variable_name] = [name_to_variable_name[v] for v in component['params'].values() if v in name_to_variable_name]
+            component_dependency[variable_name] = [
+                name_to_variable_name[v] for v in component["params"].values() if v in name_to_variable_name
+            ]
         for v in self._order_component_code(component_dependency):
             code += component_code[v]
 
@@ -1081,7 +1106,7 @@ def _generate_pipeline_code(self, pipeline_name="pipeline"):
         code += f"{pipeline_name} = Pipeline()\n"
         for node in config["pipelines"][0]["nodes"]:
             component_variable_name = name_to_variable_name[node["name"]]
-            inputs = ', '.join(f'"{name}"' for name in node["inputs"])
+            inputs = ", ".join(f'"{name}"' for name in node["inputs"])
             code += f"{pipeline_name}.add_node(component={component_variable_name}, name=\"{node['name']}\", inputs=[{inputs}])\n"
         return code
 

From 466dca445208dae8e44a79ae4e3c68a7fe829144 Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Wed, 16 Feb 2022 19:20:57 +0100
Subject: [PATCH 04/22] refactoring

---
 haystack/pipelines/base.py | 113 ++++++++++++++++++++++++-------------
 1 file changed, 74 insertions(+), 39 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 64319fab24..36caab407e 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -40,6 +40,7 @@
 
 
 ROOT_NODE_TO_PIPELINE_NAME = {"query": "query", "file": "indexing"}
+CAMEL_CASE_TO_SNAKE_CASE_REGEX = re.compile(r"(?<!^)(?=[A-Z])")
 
 
 class RootNode(BaseComponent):
@@ -1043,7 +1044,8 @@ def get_config(self, return_defaults: bool = False) -> dict:
         return config
 
     def to_code(self, pipeline_name: str = "pipeline", create_ipython_cell: bool = True, generate_imports: bool = True):
-        code = self._generate_pipeline_code(pipeline_name=pipeline_name, generate_imports=generate_imports)
+        config = self.get_config()
+        code = self._generate_code(config=config, pipeline_name=pipeline_name, generate_imports=generate_imports)
         if create_ipython_cell:
             try:
                 get_ipython().set_next_input(code)  # type: ignore
@@ -1052,62 +1054,95 @@ def to_code(self, pipeline_name: str = "pipeline", create_ipython_cell: bool = T
         else:
             return code
 
-    def _order_component_code(self, depedency: Dict[str, List[str]], keys: Optional[List[str]] = None):
+    @classmethod
+    def _order_component_code(cls, depedency: Dict[str, List[str]], keys: Optional[List[str]] = None):
         ordered = []
         keys = keys or depedency.keys()  # type: ignore
         for k in keys:  # type: ignore
             v = depedency[k]
             if len(v) > 0:
-                ordered += [k for k in self._order_component_code(depedency, v) if k not in ordered]
+                ordered += [k for k in cls._order_component_code(depedency, v) if k not in ordered]
             if k not in ordered:
                 ordered.append(k)
         return ordered
 
-    def _generate_pipeline_code(self, pipeline_name: str = "pipeline", generate_imports: bool = True):
-        config = self.get_config()
-        code = ""
-        if generate_imports:
-            types = [c["type"] for c in config["components"]]
-            allowed_imports = ["haystack.nodes", "haystack.document_stores"]
-            importable_classes = {
-                name: mod
-                for mod in allowed_imports
-                for name, obj in inspect.getmembers(sys.modules[mod])
-                if inspect.isclass(obj)
-            }
+    @classmethod
+    def _camel_to_snake_case(cls, input: str) -> str:
+        return CAMEL_CASE_TO_SNAKE_CASE_REGEX.sub("_", input).lower()
 
-            for t in types:
-                if t in importable_classes:
-                    code += f"from {importable_classes[t]} import {t}\n"
-                else:
-                    code += f"# from MODULE_NOT_FOUND import {t}\n"
-            code += "\n"
+    @classmethod
+    def _generate_code(cls, config: dict, pipeline_name: str = "pipeline", generate_imports: bool = True):
+        component_definitions = config["components"]
+        pipeline_definition = config["pipelines"][0]
+        code_parts = []
+        if generate_imports:
+            types_to_import = [c["type"] for c in component_definitions]
+            code_parts.append(cls._generate_imports_code(types_to_import=types_to_import))
 
-        name_to_variable_name = {
-            c["name"]: re.sub(r"(?<!^)(?=[A-Z])", "_", c["name"]).lower() for c in config["components"]
+        component_variable_names = {
+            c["name"]: cls._camel_to_snake_case(c["name"]) for c in component_definitions
         }
-        component_code = {}
-        component_dependency = {}
-        for component in config["components"]:
+        code_parts.append(
+            cls._generate_components_code(
+                component_definitions=component_definitions, component_variable_names=component_variable_names
+            )
+        )
+        code_parts.append(
+            cls._generate_pipeline_code(
+                pipeline_definition=pipeline_definition,
+                component_variable_names=component_variable_names,
+                pipeline_name=pipeline_name,
+            )
+        )
+        code = "\n".join(code_parts)
+        return code
+
+    @classmethod
+    def _generate_pipeline_code(
+        cls, pipeline_definition: Dict, component_variable_names: Dict[str, str], pipeline_name: str
+    ):
+        code = f"{pipeline_name} = Pipeline()\n"
+        for node in pipeline_definition["nodes"]:
+            component_variable_name = component_variable_names[node["name"]]
+            inputs = ", ".join(f'"{name}"' for name in node["inputs"])
+            code += f"{pipeline_name}.add_node(component={component_variable_name}, name=\"{node['name']}\", inputs=[{inputs}])\n"
+
+    @classmethod
+    def _generate_components_code(cls, component_definitions: List[Dict], component_variable_names: Dict[str, str]):
+        code = ""
+        declarations = {}
+        dependencies = {}
+        for component in component_definitions:
             param_value_dict = {
-                k: name_to_variable_name.get(v, f'"{v}"') if type(v) == str else v
+                k: component_variable_names.get(v, f'"{v}"') if type(v) == str else v
                 for k, v in component["params"].items()
             }
             args = ", ".join(f"{k}={v}" for k, v in param_value_dict.items())
-            variable_name = name_to_variable_name[component["name"]]
-            component_code[variable_name] = f"{variable_name} = {component['type']}({args})\n"
-            component_dependency[variable_name] = [
-                name_to_variable_name[v] for v in component["params"].values() if v in name_to_variable_name
+            variable_name = component_variable_names[component["name"]]
+            declarations[variable_name] = f"{variable_name} = {component['type']}({args})\n"
+            dependencies[variable_name] = [
+                component_variable_names[v] for v in component["params"].values() if v in component_variable_names
             ]
-        for v in self._order_component_code(component_dependency):
-            code += component_code[v]
+        for v in cls._order_component_code(dependencies):
+            code += declarations[v]
+        return code
 
-        code += "\n"
-        code += f"{pipeline_name} = Pipeline()\n"
-        for node in config["pipelines"][0]["nodes"]:
-            component_variable_name = name_to_variable_name[node["name"]]
-            inputs = ", ".join(f'"{name}"' for name in node["inputs"])
-            code += f"{pipeline_name}.add_node(component={component_variable_name}, name=\"{node['name']}\", inputs=[{inputs}])\n"
+    @classmethod
+    def _generate_imports_code(cls, types_to_import: List[str]):
+        code = ""
+        allowed_imports = ["haystack.nodes", "haystack.document_stores"]
+        importable_classes = {
+            name: mod
+            for mod in allowed_imports
+            for name, obj in inspect.getmembers(sys.modules[mod])
+            if inspect.isclass(obj)
+        }
+
+        for t in types_to_import:
+            if t in importable_classes:
+                code += f"from {importable_classes[t]} import {t}\n"
+            else:
+                code += f"# from MODULE_NOT_FOUND import {t}\n"
         return code
 
     def _format_document_answer(self, document_or_answer: dict):

From e02e05c83052cc7478e9164724e247cd55e15fb8 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 16 Feb 2022 18:23:02 +0000
Subject: [PATCH 05/22] Update Documentation & Code Style

---
 haystack/pipelines/base.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 36caab407e..4c5243a893 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -1079,9 +1079,7 @@ def _generate_code(cls, config: dict, pipeline_name: str = "pipeline", generate_
             types_to_import = [c["type"] for c in component_definitions]
             code_parts.append(cls._generate_imports_code(types_to_import=types_to_import))
 
-        component_variable_names = {
-            c["name"]: cls._camel_to_snake_case(c["name"]) for c in component_definitions
-        }
+        component_variable_names = {c["name"]: cls._camel_to_snake_case(c["name"]) for c in component_definitions}
         code_parts.append(
             cls._generate_components_code(
                 component_definitions=component_definitions, component_variable_names=component_variable_names

From 6ed77a0b8fd3071e175e1e12ee2ed3e9bf358c7c Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Thu, 17 Feb 2022 15:27:43 +0100
Subject: [PATCH 06/22] docstrings added and refactoring

---
 haystack/pipelines/base.py | 246 +++++++++++++++++++++----------------
 1 file changed, 142 insertions(+), 104 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 36caab407e..463df75084 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -71,6 +71,36 @@ def get_config(self, return_defaults: bool = False) -> dict:
         """
         raise NotImplementedError
 
+    def to_code(
+        self, pipeline_variable_name: str = "pipeline", create_ipython_cell: bool = True, generate_imports: bool = True
+    ):
+        """
+        Returns the respecting code to create this pipeline.
+        If we're running in a notebook environment and create_ipyhton_cell is set to True,
+        this will automatically create a new notebook cell containing the code instead of returning a string.
+
+        :param pipeline_variable_name: The variable name of the generated pipeline.
+                                       Default value is 'pipeline'.
+        :param create_ipython_cell: If set to True and we're running in a notebook environment, to_code() will automatically create a new notebook cell containing the code.
+                                    Otherwise to_code() will return the code as string.
+                                    Default value is True.
+        :param generate_imports: Whether to include the required import statements into the code.
+                                 Default value is True.
+        """
+        pipeline_config = self.get_config()
+        code = self._generate_code(
+            pipeline_config=pipeline_config,
+            pipeline_variable_name=pipeline_variable_name,
+            generate_imports=generate_imports,
+        )
+        if create_ipython_cell:
+            try:
+                get_ipython().set_next_input(code)  # type: ignore
+            except NameError:
+                return code
+        else:
+            return code
+
     @classmethod
     def load_from_config(
         cls, pipeline_config: Dict, pipeline_name: Optional[str] = None, overwrite_with_env_variables: bool = True
@@ -338,7 +368,100 @@ def save_to_deepset_cloud(
             logger.info(f"Pipeline config '{pipeline_config_name}' successfully created.")
 
     @classmethod
-    def _get_pipeline_definition(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None):
+    def _camel_to_snake_case(cls, input: str) -> str:
+        return CAMEL_CASE_TO_SNAKE_CASE_REGEX.sub("_", input).lower()
+
+    @classmethod
+    def _generate_code(
+        cls, pipeline_config: dict, pipeline_variable_name: str = "pipeline", generate_imports: bool = True
+    ) -> str:
+        component_definitions = cls._get_component_definitions(
+            pipeline_config=pipeline_config, overwrite_with_env_variables=False
+        )
+        component_variable_names = {name: cls._camel_to_snake_case(name) for name in component_definitions.keys()}
+        pipeline_definition = cls._get_pipeline_definition(pipeline_config=pipeline_config)
+
+        code_parts = []
+        if generate_imports:
+            types_to_import = [component["type"] for component in component_definitions.values()]
+            imports_code = cls._generate_imports_code(types_to_import=types_to_import)
+            code_parts.append(imports_code)
+
+        components_code = cls._generate_components_code(
+            component_definitions=component_definitions, component_variable_names=component_variable_names
+        )
+        pipeline_code = cls._generate_pipeline_code(
+            pipeline_definition=pipeline_definition,
+            component_variable_names=component_variable_names,
+            pipeline_variable_name=pipeline_variable_name,
+        )
+
+        code_parts.append(components_code)
+        code_parts.append(pipeline_code)
+        code = "\n\n".join(code_parts)
+        return code
+
+    @classmethod
+    def _generate_pipeline_code(
+        cls, pipeline_definition: dict, component_variable_names: Dict[str, str], pipeline_variable_name: str
+    ) -> str:
+        code_lines = [f"{pipeline_variable_name} = Pipeline()"]
+        for node in pipeline_definition["nodes"]:
+            node_name = node["name"]
+            component_variable_name = component_variable_names[node_name]
+            inputs = ", ".join(f'"{name}"' for name in node["inputs"])
+            code_lines.append(
+                f'{pipeline_variable_name}.add_node(component={component_variable_name}, name="{node_name}", inputs=[{inputs}])'
+            )
+
+        code = "\n".join(code_lines)
+        return code
+
+    @classmethod
+    def _generate_components_code(cls, component_definitions: dict, component_variable_names: Dict[str, str]) -> str:
+        code = ""
+        declarations = {}
+        dependency_map = {}
+        for name, definition in component_definitions.items():
+            variable_name = component_variable_names[name]
+            class_name = definition["type"]
+            param_value_dict = {
+                key: component_variable_names.get(value, f'"{value}"') if type(value) == str else value
+                for key, value in definition["params"].items()
+            }
+            init_args = ", ".join(f"{key}={value}" for key, value in param_value_dict.items())
+            declarations[name] = f"{variable_name} = {class_name}({init_args})"
+            dependency_map[name] = [
+                param_value for param_value in definition["params"].values() if param_value in component_variable_names
+            ]
+
+        ordered_components = cls._order_components(dependency_map=dependency_map)
+        ordered_declarations = [declarations[component] for component in ordered_components]
+        code = "\n".join(ordered_declarations)
+        return code
+
+    @classmethod
+    def _generate_imports_code(cls, types_to_import: List[str]) -> str:
+        code_lines = []
+        allowed_imports = ["haystack.nodes", "haystack.document_stores"]
+        importable_classes = {
+            name: mod
+            for mod in allowed_imports
+            for name, obj in inspect.getmembers(sys.modules[mod])
+            if inspect.isclass(obj)
+        }
+
+        for t in types_to_import:
+            if t in importable_classes:
+                code_lines.append(f"from {importable_classes[t]} import {t}")
+            else:
+                code_lines.append(f"# from MODULE_NOT_FOUND import {t}")
+
+        code = "\n".join(code_lines)
+        return code
+
+    @classmethod
+    def _get_pipeline_definition(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None) -> dict:
         """
         Get the definition of Pipeline from a given pipeline config. If the config contains more than one Pipeline,
         then the pipeline_name must be supplied.
@@ -360,7 +483,7 @@ def _get_pipeline_definition(cls, pipeline_config: Dict, pipeline_name: Optional
         return pipeline_definition
 
     @classmethod
-    def _get_component_definitions(cls, pipeline_config: Dict, overwrite_with_env_variables: bool):
+    def _get_component_definitions(cls, pipeline_config: Dict, overwrite_with_env_variables: bool) -> dict:
         """
         Returns the definitions of all components from a given pipeline config.
 
@@ -380,6 +503,23 @@ def _get_component_definitions(cls, pipeline_config: Dict, overwrite_with_env_va
 
         return component_definitions
 
+    @classmethod
+    def _order_components(
+        cls, dependency_map: Dict[str, List[str]], components_to_order: Optional[List[str]] = None
+    ) -> List[str]:
+        ordered_components = []
+        if components_to_order is None:
+            components_to_order = list(dependency_map.keys())
+        for component in components_to_order:
+            dependencies = dependency_map[component]
+            ordered_dependencies = cls._order_components(
+                dependency_map=dependency_map, components_to_order=dependencies
+            )
+            ordered_components += [d for d in ordered_dependencies if d not in ordered_components]
+            if component not in ordered_components:
+                ordered_components.append(component)
+        return ordered_components
+
     @classmethod
     def _overwrite_with_env_variables(cls, definition: dict):
         """
@@ -1043,108 +1183,6 @@ def get_config(self, return_defaults: bool = False) -> dict:
         config = {"components": list(components.values()), "pipelines": list(pipelines.values()), "version": "0.8"}
         return config
 
-    def to_code(self, pipeline_name: str = "pipeline", create_ipython_cell: bool = True, generate_imports: bool = True):
-        config = self.get_config()
-        code = self._generate_code(config=config, pipeline_name=pipeline_name, generate_imports=generate_imports)
-        if create_ipython_cell:
-            try:
-                get_ipython().set_next_input(code)  # type: ignore
-            except NameError:
-                return code
-        else:
-            return code
-
-    @classmethod
-    def _order_component_code(cls, depedency: Dict[str, List[str]], keys: Optional[List[str]] = None):
-        ordered = []
-        keys = keys or depedency.keys()  # type: ignore
-        for k in keys:  # type: ignore
-            v = depedency[k]
-            if len(v) > 0:
-                ordered += [k for k in cls._order_component_code(depedency, v) if k not in ordered]
-            if k not in ordered:
-                ordered.append(k)
-        return ordered
-
-    @classmethod
-    def _camel_to_snake_case(cls, input: str) -> str:
-        return CAMEL_CASE_TO_SNAKE_CASE_REGEX.sub("_", input).lower()
-
-    @classmethod
-    def _generate_code(cls, config: dict, pipeline_name: str = "pipeline", generate_imports: bool = True):
-        component_definitions = config["components"]
-        pipeline_definition = config["pipelines"][0]
-        code_parts = []
-        if generate_imports:
-            types_to_import = [c["type"] for c in component_definitions]
-            code_parts.append(cls._generate_imports_code(types_to_import=types_to_import))
-
-        component_variable_names = {
-            c["name"]: cls._camel_to_snake_case(c["name"]) for c in component_definitions
-        }
-        code_parts.append(
-            cls._generate_components_code(
-                component_definitions=component_definitions, component_variable_names=component_variable_names
-            )
-        )
-        code_parts.append(
-            cls._generate_pipeline_code(
-                pipeline_definition=pipeline_definition,
-                component_variable_names=component_variable_names,
-                pipeline_name=pipeline_name,
-            )
-        )
-        code = "\n".join(code_parts)
-        return code
-
-    @classmethod
-    def _generate_pipeline_code(
-        cls, pipeline_definition: Dict, component_variable_names: Dict[str, str], pipeline_name: str
-    ):
-        code = f"{pipeline_name} = Pipeline()\n"
-        for node in pipeline_definition["nodes"]:
-            component_variable_name = component_variable_names[node["name"]]
-            inputs = ", ".join(f'"{name}"' for name in node["inputs"])
-            code += f"{pipeline_name}.add_node(component={component_variable_name}, name=\"{node['name']}\", inputs=[{inputs}])\n"
-
-    @classmethod
-    def _generate_components_code(cls, component_definitions: List[Dict], component_variable_names: Dict[str, str]):
-        code = ""
-        declarations = {}
-        dependencies = {}
-        for component in component_definitions:
-            param_value_dict = {
-                k: component_variable_names.get(v, f'"{v}"') if type(v) == str else v
-                for k, v in component["params"].items()
-            }
-            args = ", ".join(f"{k}={v}" for k, v in param_value_dict.items())
-            variable_name = component_variable_names[component["name"]]
-            declarations[variable_name] = f"{variable_name} = {component['type']}({args})\n"
-            dependencies[variable_name] = [
-                component_variable_names[v] for v in component["params"].values() if v in component_variable_names
-            ]
-        for v in cls._order_component_code(dependencies):
-            code += declarations[v]
-        return code
-
-    @classmethod
-    def _generate_imports_code(cls, types_to_import: List[str]):
-        code = ""
-        allowed_imports = ["haystack.nodes", "haystack.document_stores"]
-        importable_classes = {
-            name: mod
-            for mod in allowed_imports
-            for name, obj in inspect.getmembers(sys.modules[mod])
-            if inspect.isclass(obj)
-        }
-
-        for t in types_to_import:
-            if t in importable_classes:
-                code += f"from {importable_classes[t]} import {t}\n"
-            else:
-                code += f"# from MODULE_NOT_FOUND import {t}\n"
-        return code
-
     def _format_document_answer(self, document_or_answer: dict):
         return "\n \t".join([f"{name}: {value}" for name, value in document_or_answer.items()])
 

From 0c33bc717a0c45ae9b5dea80e53fc733a8e68e0e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 17 Feb 2022 14:34:39 +0000
Subject: [PATCH 07/22] Update Documentation & Code Style

---
 docs/_src/api/api/pipelines.md | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md
index b68750be81..76cce4558c 100644
--- a/docs/_src/api/api/pipelines.md
+++ b/docs/_src/api/api/pipelines.md
@@ -37,6 +37,29 @@ Returns a configuration for the Pipeline that can be used with `BasePipeline.loa
 
 - `return_defaults`: whether to output parameters that have the default values.
 
+<a id="base.BasePipeline.to_code"></a>
+
+#### to\_code
+
+```python
+def to_code(pipeline_variable_name: str = "pipeline", create_ipython_cell: bool = True, generate_imports: bool = True)
+```
+
+Returns the respecting code to create this pipeline.
+
+If we're running in a notebook environment and create_ipyhton_cell is set to True,
+this will automatically create a new notebook cell containing the code instead of returning a string.
+
+**Arguments**:
+
+- `pipeline_variable_name`: The variable name of the generated pipeline.
+Default value is 'pipeline'.
+- `create_ipython_cell`: If set to True and we're running in a notebook environment, to_code() will automatically create a new notebook cell containing the code.
+Otherwise to_code() will return the code as string.
+Default value is True.
+- `generate_imports`: Whether to include the required import statements into the code.
+Default value is True.
+
 <a id="base.BasePipeline.load_from_config"></a>
 
 #### load\_from\_config

From 896616bb4f572db6358ba7820b37753eb0638ed0 Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Thu, 17 Feb 2022 16:10:02 +0100
Subject: [PATCH 08/22] improve imports code generation

---
 haystack/pipelines/base.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 463df75084..0fd667d98a 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -41,6 +41,8 @@
 
 ROOT_NODE_TO_PIPELINE_NAME = {"query": "query", "file": "indexing"}
 CAMEL_CASE_TO_SNAKE_CASE_REGEX = re.compile(r"(?<!^)(?=[A-Z])")
+MODULE_NOT_FOUND = "MODULE_NOT_FOUND"
+CODE_GEN_ALLOWED_IMPORTS = ["haystack.document_stores", "haystack.nodes"]
 
 
 class RootNode(BaseComponent):
@@ -443,19 +445,26 @@ def _generate_components_code(cls, component_definitions: dict, component_variab
     @classmethod
     def _generate_imports_code(cls, types_to_import: List[str]) -> str:
         code_lines = []
-        allowed_imports = ["haystack.nodes", "haystack.document_stores"]
         importable_classes = {
             name: mod
-            for mod in allowed_imports
+            for mod in CODE_GEN_ALLOWED_IMPORTS
             for name, obj in inspect.getmembers(sys.modules[mod])
             if inspect.isclass(obj)
         }
 
+        imports_by_module: Dict[str, List[str]] = {}
         for t in types_to_import:
-            if t in importable_classes:
-                code_lines.append(f"from {importable_classes[t]} import {t}")
+            mod = importable_classes.get(t, MODULE_NOT_FOUND)
+            if mod in imports_by_module:
+                imports_by_module[mod].append(t)
             else:
-                code_lines.append(f"# from MODULE_NOT_FOUND import {t}")
+                imports_by_module[mod] = [t]
+
+        for mod in sorted(imports_by_module.keys()):
+            sorted_types = sorted(set(imports_by_module[mod]))
+            import_types = ", ".join(sorted_types)
+            line_prefix = "# " if mod == MODULE_NOT_FOUND else ""
+            code_lines.append(f"{line_prefix}from {mod} import {import_types}")
 
         code = "\n".join(code_lines)
         return code

From b27d73bcb8480a0eb0b885535839cdc93cd4368c Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Thu, 17 Feb 2022 17:19:08 +0100
Subject: [PATCH 09/22] add comment param

---
 haystack/pipelines/base.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 0fd667d98a..76ceb9a548 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -43,6 +43,7 @@
 CAMEL_CASE_TO_SNAKE_CASE_REGEX = re.compile(r"(?<!^)(?=[A-Z])")
 MODULE_NOT_FOUND = "MODULE_NOT_FOUND"
 CODE_GEN_ALLOWED_IMPORTS = ["haystack.document_stores", "haystack.nodes"]
+CODE_GEN_DEFAULT_COMMENT = "This code has been generated by Pipeline.to_code()"
 
 
 class RootNode(BaseComponent):
@@ -74,7 +75,11 @@ def get_config(self, return_defaults: bool = False) -> dict:
         raise NotImplementedError
 
     def to_code(
-        self, pipeline_variable_name: str = "pipeline", create_ipython_cell: bool = True, generate_imports: bool = True
+        self,
+        pipeline_variable_name: str = "pipeline",
+        create_notebook_cell: bool = True,
+        generate_imports: bool = True,
+        comment: Optional[str] = CODE_GEN_DEFAULT_COMMENT,
     ):
         """
         Returns the respecting code to create this pipeline.
@@ -83,19 +88,22 @@ def to_code(
 
         :param pipeline_variable_name: The variable name of the generated pipeline.
                                        Default value is 'pipeline'.
-        :param create_ipython_cell: If set to True and we're running in a notebook environment, to_code() will automatically create a new notebook cell containing the code.
+        :param create_notebook_cell: If set to True and we're running in a notebook environment, to_code() will automatically create a new notebook cell containing the code.
                                     Otherwise to_code() will return the code as string.
                                     Default value is True.
         :param generate_imports: Whether to include the required import statements into the code.
                                  Default value is True.
+        :param comment: Preceding comment to add.
+                        Defaults to "This code has been generated by Pipeline.to_code()"
         """
         pipeline_config = self.get_config()
         code = self._generate_code(
             pipeline_config=pipeline_config,
             pipeline_variable_name=pipeline_variable_name,
             generate_imports=generate_imports,
+            comment=comment,
         )
-        if create_ipython_cell:
+        if create_notebook_cell:
             try:
                 get_ipython().set_next_input(code)  # type: ignore
             except NameError:
@@ -375,7 +383,11 @@ def _camel_to_snake_case(cls, input: str) -> str:
 
     @classmethod
     def _generate_code(
-        cls, pipeline_config: dict, pipeline_variable_name: str = "pipeline", generate_imports: bool = True
+        cls,
+        pipeline_config: dict,
+        pipeline_variable_name: str = "pipeline",
+        generate_imports: bool = True,
+        comment: Optional[str] = None,
     ) -> str:
         component_definitions = cls._get_component_definitions(
             pipeline_config=pipeline_config, overwrite_with_env_variables=False
@@ -401,6 +413,11 @@ def _generate_code(
         code_parts.append(components_code)
         code_parts.append(pipeline_code)
         code = "\n\n".join(code_parts)
+
+        if comment:
+            comment = re.sub(r"^(#\s)?", "# ", comment, flags=re.MULTILINE)
+            code = "\n".join([comment, code])
+
         return code
 
     @classmethod

From f9fea7a9cf69b876d29ab0ec1611f61515fcf135 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 17 Feb 2022 16:28:45 +0000
Subject: [PATCH 10/22] Update Documentation & Code Style

---
 docs/_src/api/api/pipelines.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md
index 76cce4558c..425a2b6a41 100644
--- a/docs/_src/api/api/pipelines.md
+++ b/docs/_src/api/api/pipelines.md
@@ -42,7 +42,7 @@ Returns a configuration for the Pipeline that can be used with `BasePipeline.loa
 #### to\_code
 
 ```python
-def to_code(pipeline_variable_name: str = "pipeline", create_ipython_cell: bool = True, generate_imports: bool = True)
+def to_code(pipeline_variable_name: str = "pipeline", create_notebook_cell: bool = True, generate_imports: bool = True, comment: Optional[str] = CODE_GEN_DEFAULT_COMMENT)
 ```
 
 Returns the respecting code to create this pipeline.
@@ -54,11 +54,13 @@ this will automatically create a new notebook cell containing the code instead o
 
 - `pipeline_variable_name`: The variable name of the generated pipeline.
 Default value is 'pipeline'.
-- `create_ipython_cell`: If set to True and we're running in a notebook environment, to_code() will automatically create a new notebook cell containing the code.
+- `create_notebook_cell`: If set to True and we're running in a notebook environment, to_code() will automatically create a new notebook cell containing the code.
 Otherwise to_code() will return the code as string.
 Default value is True.
 - `generate_imports`: Whether to include the required import statements into the code.
 Default value is True.
+- `comment`: Preceding comment to add.
+Defaults to "This code has been generated by Pipeline.to_code()"
 
 <a id="base.BasePipeline.load_from_config"></a>
 

From 62d77a38f7cf44825e3eb1de42283b2f7c5bae60 Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Fri, 18 Feb 2022 10:20:01 +0100
Subject: [PATCH 11/22] add simple test

---
 test/test_pipeline.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index 75b94c2f86..af9d80438b 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -162,6 +162,24 @@ def test_load_tfidfretriever_yaml(tmp_path):
     assert prediction["answers"][0].answer == "haystack"
 
 
+@pytest.mark.elasticsearch
+def test_to_code():
+    index_pipeline = Pipeline.load_from_yaml(
+        SAMPLES_PATH / "pipeline" / "test_pipeline.yaml", pipeline_name="indexing_pipeline"
+    )
+    query_pipeline = Pipeline.load_from_yaml(
+        SAMPLES_PATH / "pipeline" / "test_pipeline.yaml", pipeline_name="query_pipeline"
+    )
+    query_pipeline_code = query_pipeline.to_code(pipeline_variable_name="query_pipeline_from_code")
+    index_pipeline_code = index_pipeline.to_code(pipeline_variable_name="index_pipeline_from_code")
+    exec(query_pipeline_code)
+    exec(index_pipeline_code)
+    assert locals()["query_pipeline_from_code"] is not None
+    assert locals()["index_pipeline_from_code"] is not None
+    assert query_pipeline.get_config() == locals()["query_pipeline_from_code"].get_config()
+    assert index_pipeline.get_config() == locals()["index_pipeline_from_code"].get_config()
+
+
 @pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
 @responses.activate
 def test_load_from_deepset_cloud_query():

From cf2248fbfe3a1fee963bc52e8109617b86c2f04c Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Mon, 21 Feb 2022 20:16:28 +0100
Subject: [PATCH 12/22] add to_notebook_cell()

---
 haystack/pipelines/base.py | 52 +++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 76ceb9a548..6c572b0416 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -43,7 +43,7 @@
 CAMEL_CASE_TO_SNAKE_CASE_REGEX = re.compile(r"(?<!^)(?=[A-Z])")
 MODULE_NOT_FOUND = "MODULE_NOT_FOUND"
 CODE_GEN_ALLOWED_IMPORTS = ["haystack.document_stores", "haystack.nodes"]
-CODE_GEN_DEFAULT_COMMENT = "This code has been generated by Pipeline.to_code()"
+CODE_GEN_DEFAULT_COMMENT = "This code has been generated."
 
 
 class RootNode(BaseComponent):
@@ -77,39 +77,51 @@ def get_config(self, return_defaults: bool = False) -> dict:
     def to_code(
         self,
         pipeline_variable_name: str = "pipeline",
-        create_notebook_cell: bool = True,
         generate_imports: bool = True,
-        comment: Optional[str] = CODE_GEN_DEFAULT_COMMENT,
-    ):
+        add_comment: bool = False,
+    ) -> str:
         """
-        Returns the respecting code to create this pipeline.
-        If we're running in a notebook environment and create_ipyhton_cell is set to True,
-        this will automatically create a new notebook cell containing the code instead of returning a string.
+        Returns the code to create this pipeline as string.
 
         :param pipeline_variable_name: The variable name of the generated pipeline.
                                        Default value is 'pipeline'.
-        :param create_notebook_cell: If set to True and we're running in a notebook environment, to_code() will automatically create a new notebook cell containing the code.
-                                    Otherwise to_code() will return the code as string.
-                                    Default value is True.
         :param generate_imports: Whether to include the required import statements into the code.
                                  Default value is True.
-        :param comment: Preceding comment to add.
-                        Defaults to "This code has been generated by Pipeline.to_code()"
+        :param add_comment: Whether to add a preceding comment that this code has been generated.
+                            Default value is False.
         """
         pipeline_config = self.get_config()
         code = self._generate_code(
             pipeline_config=pipeline_config,
             pipeline_variable_name=pipeline_variable_name,
             generate_imports=generate_imports,
-            comment=comment,
+            comment=CODE_GEN_DEFAULT_COMMENT if add_comment else None,
         )
-        if create_notebook_cell:
-            try:
-                get_ipython().set_next_input(code)  # type: ignore
-            except NameError:
-                return code
-        else:
-            return code
+        return code
+
+    def to_notebook_cell(
+        self,
+        pipeline_variable_name: str = "pipeline",
+        generate_imports: bool = True,
+        add_comment: bool = True,
+    ):
+        """
+        Creates a new notebook cell with the code to create this pipeline.
+
+        :param pipeline_variable_name: The variable name of the generated pipeline.
+                                       Default value is 'pipeline'.
+        :param generate_imports: Whether to include the required import statements into the code.
+                                 Default value is True.
+        :param add_comment: Whether to add a preceding comment that this code has been generated.
+                            Default value is True.
+        """
+        code = self.to_code(
+            pipeline_variable_name=pipeline_variable_name, generate_imports=generate_imports, add_comment=add_comment
+        )
+        try:
+            get_ipython().set_next_input(code)  # type: ignore
+        except NameError:
+            logger.error("Could not create notebook cell. Make sure you're running in a notebook environment.")
 
     @classmethod
     def load_from_config(

From 97634232cae819cc61cadf1020be7eeadc1662ff Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 21 Feb 2022 19:24:28 +0000
Subject: [PATCH 13/22] Update Documentation & Code Style

---
 docs/_src/api/api/pipelines.md | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/docs/_src/api/api/pipelines.md b/docs/_src/api/api/pipelines.md
index 425a2b6a41..48bbb7eda8 100644
--- a/docs/_src/api/api/pipelines.md
+++ b/docs/_src/api/api/pipelines.md
@@ -42,25 +42,38 @@ Returns a configuration for the Pipeline that can be used with `BasePipeline.loa
 #### to\_code
 
 ```python
-def to_code(pipeline_variable_name: str = "pipeline", create_notebook_cell: bool = True, generate_imports: bool = True, comment: Optional[str] = CODE_GEN_DEFAULT_COMMENT)
+def to_code(pipeline_variable_name: str = "pipeline", generate_imports: bool = True, add_comment: bool = False) -> str
 ```
 
-Returns the respecting code to create this pipeline.
-
-If we're running in a notebook environment and create_ipyhton_cell is set to True,
-this will automatically create a new notebook cell containing the code instead of returning a string.
+Returns the code to create this pipeline as string.
 
 **Arguments**:
 
 - `pipeline_variable_name`: The variable name of the generated pipeline.
 Default value is 'pipeline'.
-- `create_notebook_cell`: If set to True and we're running in a notebook environment, to_code() will automatically create a new notebook cell containing the code.
-Otherwise to_code() will return the code as string.
+- `generate_imports`: Whether to include the required import statements into the code.
 Default value is True.
+- `add_comment`: Whether to add a preceding comment that this code has been generated.
+Default value is False.
+
+<a id="base.BasePipeline.to_notebook_cell"></a>
+
+#### to\_notebook\_cell
+
+```python
+def to_notebook_cell(pipeline_variable_name: str = "pipeline", generate_imports: bool = True, add_comment: bool = True)
+```
+
+Creates a new notebook cell with the code to create this pipeline.
+
+**Arguments**:
+
+- `pipeline_variable_name`: The variable name of the generated pipeline.
+Default value is 'pipeline'.
 - `generate_imports`: Whether to include the required import statements into the code.
 Default value is True.
-- `comment`: Preceding comment to add.
-Defaults to "This code has been generated by Pipeline.to_code()"
+- `add_comment`: Whether to add a preceding comment that this code has been generated.
+Default value is True.
 
 <a id="base.BasePipeline.load_from_config"></a>
 

From 975119429ddeb6f122dd7b930f00256a6940fda0 Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Tue, 22 Feb 2022 10:20:24 +0100
Subject: [PATCH 14/22] introduce helper classes for code gen and eval report
 gen

---
 haystack/pipelines/base.py | 494 +++++++++++++++++++------------------
 1 file changed, 258 insertions(+), 236 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 6c572b0416..84c58ffa27 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -90,9 +90,8 @@ def to_code(
         :param add_comment: Whether to add a preceding comment that this code has been generated.
                             Default value is False.
         """
-        pipeline_config = self.get_config()
-        code = self._generate_code(
-            pipeline_config=pipeline_config,
+        code = _PipelineCodeGen.generate_code(
+            pipeline=self,
             pipeline_variable_name=pipeline_variable_name,
             generate_imports=generate_imports,
             comment=CODE_GEN_DEFAULT_COMMENT if add_comment else None,
@@ -389,115 +388,6 @@ def save_to_deepset_cloud(
             client.save_pipeline_config(config=config, pipeline_config_name=pipeline_config_name)
             logger.info(f"Pipeline config '{pipeline_config_name}' successfully created.")
 
-    @classmethod
-    def _camel_to_snake_case(cls, input: str) -> str:
-        return CAMEL_CASE_TO_SNAKE_CASE_REGEX.sub("_", input).lower()
-
-    @classmethod
-    def _generate_code(
-        cls,
-        pipeline_config: dict,
-        pipeline_variable_name: str = "pipeline",
-        generate_imports: bool = True,
-        comment: Optional[str] = None,
-    ) -> str:
-        component_definitions = cls._get_component_definitions(
-            pipeline_config=pipeline_config, overwrite_with_env_variables=False
-        )
-        component_variable_names = {name: cls._camel_to_snake_case(name) for name in component_definitions.keys()}
-        pipeline_definition = cls._get_pipeline_definition(pipeline_config=pipeline_config)
-
-        code_parts = []
-        if generate_imports:
-            types_to_import = [component["type"] for component in component_definitions.values()]
-            imports_code = cls._generate_imports_code(types_to_import=types_to_import)
-            code_parts.append(imports_code)
-
-        components_code = cls._generate_components_code(
-            component_definitions=component_definitions, component_variable_names=component_variable_names
-        )
-        pipeline_code = cls._generate_pipeline_code(
-            pipeline_definition=pipeline_definition,
-            component_variable_names=component_variable_names,
-            pipeline_variable_name=pipeline_variable_name,
-        )
-
-        code_parts.append(components_code)
-        code_parts.append(pipeline_code)
-        code = "\n\n".join(code_parts)
-
-        if comment:
-            comment = re.sub(r"^(#\s)?", "# ", comment, flags=re.MULTILINE)
-            code = "\n".join([comment, code])
-
-        return code
-
-    @classmethod
-    def _generate_pipeline_code(
-        cls, pipeline_definition: dict, component_variable_names: Dict[str, str], pipeline_variable_name: str
-    ) -> str:
-        code_lines = [f"{pipeline_variable_name} = Pipeline()"]
-        for node in pipeline_definition["nodes"]:
-            node_name = node["name"]
-            component_variable_name = component_variable_names[node_name]
-            inputs = ", ".join(f'"{name}"' for name in node["inputs"])
-            code_lines.append(
-                f'{pipeline_variable_name}.add_node(component={component_variable_name}, name="{node_name}", inputs=[{inputs}])'
-            )
-
-        code = "\n".join(code_lines)
-        return code
-
-    @classmethod
-    def _generate_components_code(cls, component_definitions: dict, component_variable_names: Dict[str, str]) -> str:
-        code = ""
-        declarations = {}
-        dependency_map = {}
-        for name, definition in component_definitions.items():
-            variable_name = component_variable_names[name]
-            class_name = definition["type"]
-            param_value_dict = {
-                key: component_variable_names.get(value, f'"{value}"') if type(value) == str else value
-                for key, value in definition["params"].items()
-            }
-            init_args = ", ".join(f"{key}={value}" for key, value in param_value_dict.items())
-            declarations[name] = f"{variable_name} = {class_name}({init_args})"
-            dependency_map[name] = [
-                param_value for param_value in definition["params"].values() if param_value in component_variable_names
-            ]
-
-        ordered_components = cls._order_components(dependency_map=dependency_map)
-        ordered_declarations = [declarations[component] for component in ordered_components]
-        code = "\n".join(ordered_declarations)
-        return code
-
-    @classmethod
-    def _generate_imports_code(cls, types_to_import: List[str]) -> str:
-        code_lines = []
-        importable_classes = {
-            name: mod
-            for mod in CODE_GEN_ALLOWED_IMPORTS
-            for name, obj in inspect.getmembers(sys.modules[mod])
-            if inspect.isclass(obj)
-        }
-
-        imports_by_module: Dict[str, List[str]] = {}
-        for t in types_to_import:
-            mod = importable_classes.get(t, MODULE_NOT_FOUND)
-            if mod in imports_by_module:
-                imports_by_module[mod].append(t)
-            else:
-                imports_by_module[mod] = [t]
-
-        for mod in sorted(imports_by_module.keys()):
-            sorted_types = sorted(set(imports_by_module[mod]))
-            import_types = ", ".join(sorted_types)
-            line_prefix = "# " if mod == MODULE_NOT_FOUND else ""
-            code_lines.append(f"{line_prefix}from {mod} import {import_types}")
-
-        code = "\n".join(code_lines)
-        return code
-
     @classmethod
     def _get_pipeline_definition(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None) -> dict:
         """
@@ -541,23 +431,6 @@ def _get_component_definitions(cls, pipeline_config: Dict, overwrite_with_env_va
 
         return component_definitions
 
-    @classmethod
-    def _order_components(
-        cls, dependency_map: Dict[str, List[str]], components_to_order: Optional[List[str]] = None
-    ) -> List[str]:
-        ordered_components = []
-        if components_to_order is None:
-            components_to_order = list(dependency_map.keys())
-        for component in components_to_order:
-            dependencies = dependency_map[component]
-            ordered_dependencies = cls._order_components(
-                dependency_map=dependency_map, components_to_order=dependencies
-            )
-            ordered_components += [d for d in ordered_dependencies if d not in ordered_components]
-            if component not in ordered_components:
-                ordered_components.append(component)
-        return ordered_components
-
     @classmethod
     def _overwrite_with_env_variables(cls, definition: dict):
         """
@@ -1221,85 +1094,6 @@ def get_config(self, return_defaults: bool = False) -> dict:
         config = {"components": list(components.values()), "pipelines": list(pipelines.values()), "version": "0.8"}
         return config
 
-    def _format_document_answer(self, document_or_answer: dict):
-        return "\n \t".join([f"{name}: {value}" for name, value in document_or_answer.items()])
-
-    def _format_wrong_sample(self, query: dict):
-        metrics = "\n \t".join([f"{name}: {value}" for name, value in query["metrics"].items()])
-        documents = "\n\n \t".join([self._format_document_answer(doc) for doc in query.get("documents", [])])
-        documents = f"Documents: \n \t{documents}\n" if len(documents) > 0 else ""
-        answers = "\n\n \t".join([self._format_document_answer(answer) for answer in query.get("answers", [])])
-        answers = f"Answers: \n \t{answers}\n" if len(answers) > 0 else ""
-        gold_document_ids = "\n \t".join(query["gold_document_ids"])
-        gold_answers = "\n \t".join(query.get("gold_answers", []))
-        gold_answers = f"Gold Answers: \n \t{gold_answers}\n" if len(gold_answers) > 0 else ""
-        s = (
-            f"Query: \n \t{query['query']}\n"
-            f"{gold_answers}"
-            f"Gold Document Ids: \n \t{gold_document_ids}\n"
-            f"Metrics: \n \t{metrics}\n"
-            f"{answers}"
-            f"{documents}"
-            f"_______________________________________________________"
-        )
-        return s
-
-    def _format_wrong_samples_node(self, node_name: str, wrong_samples_formatted: str):
-        s = (
-            f"                Wrong {node_name} Examples\n"
-            f"=======================================================\n"
-            f"{wrong_samples_formatted}\n"
-            f"=======================================================\n"
-        )
-        return s
-
-    def _format_wrong_samples_report(self, eval_result: EvaluationResult, n_wrong_examples: int = 3):
-        examples = {
-            node: eval_result.wrong_examples(node, doc_relevance_col="gold_id_or_answer_match", n=n_wrong_examples)
-            for node in eval_result.node_results.keys()
-        }
-        examples_formatted = {
-            node: "\n".join([self._format_wrong_sample(example) for example in examples])
-            for node, examples in examples.items()
-        }
-
-        return "\n".join(
-            [self._format_wrong_samples_node(node, examples) for node, examples in examples_formatted.items()]
-        )
-
-    def _format_pipeline_node(self, node: str, calculated_metrics: dict):
-        node_metrics: dict = {}
-        for metric_mode in calculated_metrics:
-            for metric, value in calculated_metrics[metric_mode].get(node, {}).items():
-                node_metrics[f"{metric}{metric_mode}"] = value
-
-        node_metrics_formatted = "\n".join(
-            sorted([f"                        | {metric}: {value:5.3}" for metric, value in node_metrics.items()])
-        )
-        node_metrics_formatted = f"{node_metrics_formatted}\n" if len(node_metrics_formatted) > 0 else ""
-        s = (
-            f"                      {node}\n"
-            f"                        |\n"
-            f"{node_metrics_formatted}"
-            f"                        |"
-        )
-        return s
-
-    def _format_pipeline_overview(self, calculated_metrics: dict):
-        pipeline_overview = "\n".join(
-            [self._format_pipeline_node(node, calculated_metrics) for node in self.graph.nodes]
-        )
-        s = (
-            f"================== Evaluation Report ==================\n"
-            f"=======================================================\n"
-            f"                   Pipeline Overview\n"
-            f"=======================================================\n"
-            f"{pipeline_overview}\n"
-            f"                      Output\n"
-            f"=======================================================\n"
-        )
-        return s
-
     def print_eval_report(
         self,
         eval_result: EvaluationResult,
@@ -1313,36 +1107,10 @@ def print_eval_report(
         :param n_wrong_examples: The number of worst queries to show.
         :param metrics_filter: The metrics to show per node. If None all metrics will be shown.
         """
-        if any(degree > 1 for node, degree in self.graph.out_degree):
-            logger.warning("Pipelines with junctions are currently not supported.")
-            return
-
-        calculated_metrics = {
-            "": eval_result.calculate_metrics(doc_relevance_col="gold_id_or_answer_match"),
-            "_top_1": eval_result.calculate_metrics(
-                doc_relevance_col="gold_id_or_answer_match", simulated_top_k_reader=1
-            ),
-            " upper bound": eval_result.calculate_metrics(
-                doc_relevance_col="gold_id_or_answer_match", eval_mode="isolated"
-            ),
-        }
-
-        if metrics_filter is not None:
-            for metric_mode in calculated_metrics:
-                calculated_metrics[metric_mode] = {
-                    node: metrics
-                    if node not in metrics_filter
-                    else {metric: value for metric, value in metrics.items() if metric in metrics_filter[node]}
-                    for node, metrics in calculated_metrics[metric_mode].items()
-                }
-
-        pipeline_overview = self._format_pipeline_overview(calculated_metrics)
-        wrong_samples_report = self._format_wrong_samples_report(
-            eval_result=eval_result, n_wrong_examples=n_wrong_examples
+        _PipelineEvalReportGen.print_eval_report(
+            eval_result=eval_result, pipeline=self, n_wrong_examples=n_wrong_examples, metrics_filter=metrics_filter
         )
 
-        print(f"{pipeline_overview}\n" f"{wrong_samples_report}")
-
 
 class RayPipeline(Pipeline):
     """
@@ -1627,3 +1395,257 @@ def __call__(self, *args, **kwargs):
         Ray calls this method which is then re-directed to the corresponding component's run().
         """
         return self.node._dispatch_run(*args, **kwargs)
+
+
+class _PipelineCodeGen:
+    @classmethod
+    def _camel_to_snake_case(cls, input: str) -> str:
+        return CAMEL_CASE_TO_SNAKE_CASE_REGEX.sub("_", input).lower()
+
+    @classmethod
+    def generate_code(
+        cls,
+        pipeline: BasePipeline,
+        pipeline_variable_name: str = "pipeline",
+        generate_imports: bool = True,
+        comment: Optional[str] = None,
+    ) -> str:
+        pipeline_config = pipeline.get_config()
+        component_definitions = pipeline._get_component_definitions(
+            pipeline_config=pipeline_config, overwrite_with_env_variables=False
+        )
+        component_variable_names = {name: cls._camel_to_snake_case(name) for name in component_definitions.keys()}
+        pipeline_definition = pipeline._get_pipeline_definition(pipeline_config=pipeline_config)
+
+        code_parts = []
+        if generate_imports:
+            types_to_import = [component["type"] for component in component_definitions.values()]
+            imports_code = cls._generate_imports_code(types_to_import=types_to_import)
+            code_parts.append(imports_code)
+
+        components_code = cls._generate_components_code(
+            component_definitions=component_definitions, component_variable_names=component_variable_names
+        )
+        pipeline_code = cls._generate_pipeline_code(
+            pipeline_definition=pipeline_definition,
+            component_variable_names=component_variable_names,
+            pipeline_variable_name=pipeline_variable_name,
+        )
+
+        code_parts.append(components_code)
+        code_parts.append(pipeline_code)
+        code = "\n\n".join(code_parts)
+
+        if comment:
+            comment = re.sub(r"^(#\s)?", "# ", comment, flags=re.MULTILINE)
+            code = "\n".join([comment, code])
+
+        return code
+
+    @classmethod
+    def _generate_pipeline_code(
+        cls, pipeline_definition: dict, component_variable_names: Dict[str, str], pipeline_variable_name: str
+    ) -> str:
+        code_lines = [f"{pipeline_variable_name} = Pipeline()"]
+        for node in pipeline_definition["nodes"]:
+            node_name = node["name"]
+            component_variable_name = component_variable_names[node_name]
+            inputs = ", ".join(f'"{name}"' for name in node["inputs"])
+            code_lines.append(
+                f'{pipeline_variable_name}.add_node(component={component_variable_name}, name="{node_name}", inputs=[{inputs}])'
+            )
+
+        code = "\n".join(code_lines)
+        return code
+
+    @classmethod
+    def _generate_components_code(cls, component_definitions: dict, component_variable_names: Dict[str, str]) -> str:
+        code = ""
+        declarations = {}
+        dependency_map = {}
+        for name, definition in component_definitions.items():
+            variable_name = component_variable_names[name]
+            class_name = definition["type"]
+            param_value_dict = {
+                key: component_variable_names.get(value, f'"{value}"') if type(value) == str else value
+                for key, value in definition["params"].items()
+            }
+            init_args = ", ".join(f"{key}={value}" for key, value in param_value_dict.items())
+            declarations[name] = f"{variable_name} = {class_name}({init_args})"
+            dependency_map[name] = [
+                param_value for param_value in definition["params"].values() if param_value in component_variable_names
+            ]
+
+        ordered_components = cls._order_components(dependency_map=dependency_map)
+        ordered_declarations = [declarations[component] for component in ordered_components]
+        code = "\n".join(ordered_declarations)
+        return code
+
+    @classmethod
+    def _generate_imports_code(cls, types_to_import: List[str]) -> str:
+        code_lines = []
+        importable_classes = {
+            name: mod
+            for mod in CODE_GEN_ALLOWED_IMPORTS
+            for name, obj in inspect.getmembers(sys.modules[mod])
+            if inspect.isclass(obj)
+        }
+
+        imports_by_module: Dict[str, List[str]] = {}
+        for t in types_to_import:
+            mod = importable_classes.get(t, MODULE_NOT_FOUND)
+            if mod in imports_by_module:
+                imports_by_module[mod].append(t)
+            else:
+                imports_by_module[mod] = [t]
+
+        for mod in sorted(imports_by_module.keys()):
+            sorted_types = sorted(set(imports_by_module[mod]))
+            import_types = ", ".join(sorted_types)
+            line_prefix = "# " if mod == MODULE_NOT_FOUND else ""
+            code_lines.append(f"{line_prefix}from {mod} import {import_types}")
+
+        code = "\n".join(code_lines)
+        return code
+
+    @classmethod
+    def _order_components(
+        cls, dependency_map: Dict[str, List[str]], components_to_order: Optional[List[str]] = None
+    ) -> List[str]:
+        ordered_components = []
+        if components_to_order is None:
+            components_to_order = list(dependency_map.keys())
+        for component in components_to_order:
+            dependencies = dependency_map[component]
+            ordered_dependencies = cls._order_components(
+                dependency_map=dependency_map, components_to_order=dependencies
+            )
+            ordered_components += [d for d in ordered_dependencies if d not in ordered_components]
+            if component not in ordered_components:
+                ordered_components.append(component)
+        return ordered_components
+
+
+class _PipelineEvalReportGen:
+    @classmethod
+    def print_eval_report(
+        cls,
+        eval_result: EvaluationResult,
+        pipeline: Pipeline,
+        n_wrong_examples: int = 3,
+        metrics_filter: Optional[Dict[str, List[str]]] = None,
+    ):
+        if any(degree > 1 for node, degree in pipeline.graph.out_degree):
+            logger.warning("Pipelines with junctions are currently not supported.")
+            return
+
+        calculated_metrics = {
+            "": eval_result.calculate_metrics(doc_relevance_col="gold_id_or_answer_match"),
+            "_top_1": eval_result.calculate_metrics(
+                doc_relevance_col="gold_id_or_answer_match", simulated_top_k_reader=1
+            ),
+            " upper bound": eval_result.calculate_metrics(
+                doc_relevance_col="gold_id_or_answer_match", eval_mode="isolated"
+            ),
+        }
+
+        if metrics_filter is not None:
+            for metric_mode in calculated_metrics:
+                calculated_metrics[metric_mode] = {
+                    node: metrics
+                    if node not in metrics_filter
+                    else {metric: value for metric, value in metrics.items() if metric in metrics_filter[node]}
+                    for node, metrics in calculated_metrics[metric_mode].items()
+                }
+
+        pipeline_overview = cls._format_pipeline_overview(calculated_metrics=calculated_metrics, pipeline=pipeline)
+        wrong_samples_report = cls._format_wrong_samples_report(
+            eval_result=eval_result, n_wrong_examples=n_wrong_examples
+        )
+
+        print(f"{pipeline_overview}\n" f"{wrong_samples_report}")
+
+    @classmethod
+    def _format_document_answer(cls, document_or_answer: dict):
+        return "\n \t".join([f"{name}: {value}" for name, value in document_or_answer.items()])
+
+    @classmethod
+    def _format_wrong_sample(cls, query: dict):
+        metrics = "\n \t".join([f"{name}: {value}" for name, value in query["metrics"].items()])
+        documents = "\n\n \t".join([cls._format_document_answer(doc) for doc in query.get("documents", [])])
+        documents = f"Documents: \n \t{documents}\n" if len(documents) > 0 else ""
+        answers = "\n\n \t".join([cls._format_document_answer(answer) for answer in query.get("answers", [])])
+        answers = f"Answers: \n \t{answers}\n" if len(answers) > 0 else ""
+        gold_document_ids = "\n \t".join(query["gold_document_ids"])
+        gold_answers = "\n \t".join(query.get("gold_answers", []))
+        gold_answers = f"Gold Answers: \n \t{gold_answers}\n" if len(gold_answers) > 0 else ""
+        s = (
+            f"Query: \n \t{query['query']}\n"
+            f"{gold_answers}"
+            f"Gold Document Ids: \n \t{gold_document_ids}\n"
+            f"Metrics: \n \t{metrics}\n"
+            f"{answers}"
+            f"{documents}"
+            f"_______________________________________________________"
+        )
+        return s
+
+    @classmethod
+    def _format_wrong_samples_node(cls, node_name: str, wrong_samples_formatted: str):
+        s = (
+            f"                Wrong {node_name} Examples\n"
+            f"=======================================================\n"
+            f"{wrong_samples_formatted}\n"
+            f"=======================================================\n"
+        )
+        return s
+
+    @classmethod
+    def _format_wrong_samples_report(cls, eval_result: EvaluationResult, n_wrong_examples: int = 3):
+        examples = {
+            node: eval_result.wrong_examples(node, doc_relevance_col="gold_id_or_answer_match", n=n_wrong_examples)
+            for node in eval_result.node_results.keys()
+        }
+        examples_formatted = {
+            node: "\n".join([cls._format_wrong_sample(example) for example in examples])
+            for node, examples in examples.items()
+        }
+
+        return "\n".join(
+            [cls._format_wrong_samples_node(node, examples) for node, examples in examples_formatted.items()]
+        )
+
+    @classmethod
+    def _format_pipeline_node(cls, node: str, calculated_metrics: dict):
+        node_metrics: dict = {}
+        for metric_mode in calculated_metrics:
+            for metric, value in calculated_metrics[metric_mode].get(node, {}).items():
+                node_metrics[f"{metric}{metric_mode}"] = value
+
+        node_metrics_formatted = "\n".join(
+            sorted([f"                        | {metric}: {value:5.3}" for metric, value in node_metrics.items()])
+        )
+        node_metrics_formatted = f"{node_metrics_formatted}\n" if len(node_metrics_formatted) > 0 else ""
+        s = (
+            f"                      {node}\n"
+            f"                        |\n"
+            f"{node_metrics_formatted}"
+            f"                        |"
+        )
+        return s
+
+    @classmethod
+    def _format_pipeline_overview(cls, calculated_metrics: dict, pipeline: Pipeline):
+        pipeline_overview = "\n".join(
+            [cls._format_pipeline_node(node, calculated_metrics) for node in pipeline.graph.nodes]
+        )
+        s = (
+            f"================== Evaluation Report ==================\n"
+            f"=======================================================\n"
+            f"                   Pipeline Overview\n"
+            f"=======================================================\n"
+            f"{pipeline_overview}\n"
+            f"                      Output\n"
+            f"=======================================================\n"
+        )
+        return s

From f20b402a50bffc712ad4ab023d9ec5ca39f93ad9 Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Tue, 22 Feb 2022 12:06:35 +0100
Subject: [PATCH 15/22] add more tests

---
 test/test_pipeline.py | 116 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 115 insertions(+), 1 deletion(-)

diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index af9d80438b..b8ecd020a1 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -8,13 +8,16 @@
 from haystack.document_stores.deepsetcloud import DeepsetCloudDocumentStore
 
 from haystack.document_stores.elasticsearch import ElasticsearchDocumentStore
+from haystack.document_stores.memory import InMemoryDocumentStore
+from haystack.nodes.other.join_docs import JoinDocuments
 from haystack.nodes.retriever.sparse import ElasticsearchRetriever
 from haystack.pipelines import (
     Pipeline,
     DocumentSearchPipeline,
     RootNode,
+    ExtractiveQAPipeline
 )
-from haystack.pipelines import ExtractiveQAPipeline
+from haystack.pipelines.base import _PipelineCodeGen
 from haystack.nodes import DensePassageRetriever, EmbeddingRetriever
 
 from conftest import MOCK_DC, DC_API_ENDPOINT, DC_API_KEY, DC_TEST_INDEX, SAMPLES_PATH, deepset_cloud_fixture
@@ -180,6 +183,117 @@ def test_to_code():
     assert index_pipeline.get_config() == locals()["index_pipeline_from_code"].get_config()
 
 
+@pytest.mark.elasticsearch
+def test_PipelineCodeGen_simple_dense_pipeline():
+    doc_store = InMemoryDocumentStore(index="my-index")
+    retriever = EmbeddingRetriever(document_store=doc_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")
+    pipeline = Pipeline()
+    pipeline.add_node(component=retriever, name="retri", inputs=["Query"])
+
+    code = _PipelineCodeGen.generate_code(pipeline=pipeline, pipeline_variable_name="p", generate_imports=False)
+    assert code == ("in_memory_document_store = InMemoryDocumentStore(index=\"my-index\")\n"
+                   "retri = EmbeddingRetriever(document_store=in_memory_document_store, embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\")\n"
+                   "\n"
+                   "p = Pipeline()\n"
+                   "p.add_node(component=retri, name=\"retri\", inputs=[\"Query\"])")
+
+
+@pytest.mark.elasticsearch
+def test_PipelineCodeGen_simple_sparse_pipeline():
+    doc_store = ElasticsearchDocumentStore(index="my-index")
+    retriever = ElasticsearchRetriever(document_store=doc_store, top_k=20)
+    pipeline = Pipeline()
+    pipeline.add_node(component=retriever, name="retri", inputs=["Query"])
+
+    code = _PipelineCodeGen.generate_code(pipeline=pipeline, pipeline_variable_name="p", generate_imports=False)
+    assert code == ("elasticsearch_document_store = ElasticsearchDocumentStore(index=\"my-index\")\n"
+                   "retri = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
+                   "\n"
+                   "p = Pipeline()\n"
+                   "p.add_node(component=retri, name=\"retri\", inputs=[\"Query\"])")
+
+
+@pytest.mark.elasticsearch
+def test_PipelineCodeGen_dual_retriever_pipeline():
+    es_doc_store = ElasticsearchDocumentStore(index="my-index")
+    es_retriever = ElasticsearchRetriever(document_store=es_doc_store, top_k=20)
+    dense_doc_store = InMemoryDocumentStore(index="my-index")
+    emb_retriever = EmbeddingRetriever(document_store=dense_doc_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")
+    p_ensemble = Pipeline()
+    p_ensemble.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])
+    p_ensemble.add_node(component=emb_retriever, name="EmbeddingRetriever", inputs=["Query"])
+    p_ensemble.add_node(
+        component=JoinDocuments(join_mode="merge"), name="JoinResults", inputs=["EsRetriever", "EmbeddingRetriever"]
+    )
+
+    code = _PipelineCodeGen.generate_code(pipeline=p_ensemble, pipeline_variable_name="p", generate_imports=False)
+    assert code == ("elasticsearch_document_store = ElasticsearchDocumentStore(index=\"my-index\")\n"
+                    "es_retriever = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"                  
+                    "in_memory_document_store = InMemoryDocumentStore(index=\"my-index\")\n" 
+                    "embedding_retriever = EmbeddingRetriever(document_store=in_memory_document_store, embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\")\n"
+                    "join_results = JoinDocuments(join_mode=\"merge\")\n"
+                    "\n"
+                    "p = Pipeline()\n"
+                    "p.add_node(component=es_retriever, name=\"EsRetriever\", inputs=[\"Query\"])\n"
+                    "p.add_node(component=embedding_retriever, name=\"EmbeddingRetriever\", inputs=[\"Query\"])\n"
+                    "p.add_node(component=join_results, name=\"JoinResults\", inputs=[\"EsRetriever\", \"EmbeddingRetriever\"])")
+
+
+@pytest.mark.elasticsearch
+def test_PipelineCodeGen_dual_retriever_pipeline_same_docstore():
+    es_doc_store = ElasticsearchDocumentStore(index="my-index")
+    es_retriever = ElasticsearchRetriever(document_store=es_doc_store, top_k=20)
+    emb_retriever = EmbeddingRetriever(document_store=es_retriever, embedding_model="sentence-transformers/all-MiniLM-L6-v2")
+    p_ensemble = Pipeline()
+    p_ensemble.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])
+    p_ensemble.add_node(component=emb_retriever, name="EmbeddingRetriever", inputs=["Query"])
+    p_ensemble.add_node(
+        component=JoinDocuments(join_mode="merge"), name="JoinResults", inputs=["EsRetriever", "EmbeddingRetriever"]
+    )
+
+    code = _PipelineCodeGen.generate_code(pipeline=p_ensemble, pipeline_variable_name="p", generate_imports=False)
+    assert code == ("elasticsearch_document_store = ElasticsearchDocumentStore(index=\"my-index\")\n"
+                    "es_retriever = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"                  
+                    "embedding_retriever = EmbeddingRetriever(document_store=elasticsearch_document_store, embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\")\n"
+                    "join_results = JoinDocuments(join_mode=\"merge\")\n"
+                    "\n"
+                    "p = Pipeline()\n"
+                    "p.add_node(component=es_retriever, name=\"EsRetriever\", inputs=[\"Query\"])\n"
+                    "p.add_node(component=embedding_retriever, name=\"EmbeddingRetriever\", inputs=[\"Query\"])\n"
+                    "p.add_node(component=join_results, name=\"JoinResults\", inputs=[\"EsRetriever\", \"EmbeddingRetriever\"])")
+
+
+@pytest.mark.elasticsearch
+def test_PipelineCodeGen_imports():
+    doc_store = ElasticsearchDocumentStore(index="my-index")
+    retriever = ElasticsearchRetriever(document_store=doc_store, top_k=20)
+    pipeline = Pipeline()
+    pipeline.add_node(component=retriever, name="retri", inputs=["Query"])
+
+    code = _PipelineCodeGen.generate_code(pipeline=pipeline, pipeline_variable_name="p", generate_imports=True)
+    assert code == ("from haystack.document_stores import ElasticsearchDocumentStore\n"
+                    "from haystack.nodes import ElasticsearchRetriever\n"
+                    "\n"
+                    "elasticsearch_document_store = ElasticsearchDocumentStore(index=\"my-index\")\n"
+                   "retri = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
+                   "\n"
+                   "p = Pipeline()\n"
+                   "p.add_node(component=retri, name=\"retri\", inputs=[\"Query\"])")
+
+
+def test_PipelineCodeGen_order_components():
+    dependency_map = {
+        "a": ["aa", "ab"],
+        "aa": [],
+        "ab": ["aba"],
+        "aba": [],
+        "b": ["a", "c"],
+        "c": ["a"]
+    }
+    ordered = _PipelineCodeGen._order_components(dependency_map=dependency_map)
+    assert ordered == ["aa", "aba", "ab", "a", "c", "b"]
+
+
 @pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
 @responses.activate
 def test_load_from_deepset_cloud_query():

From 4f63666957f21514ac2f2bf8e9dc18b43bd8739a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 22 Feb 2022 11:09:04 +0000
Subject: [PATCH 16/22] Update Documentation & Code Style

---
 test/test_pipeline.py | 108 +++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 53 deletions(-)

diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index b8ecd020a1..07c103410f 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -11,12 +11,7 @@
 from haystack.document_stores.memory import InMemoryDocumentStore
 from haystack.nodes.other.join_docs import JoinDocuments
 from haystack.nodes.retriever.sparse import ElasticsearchRetriever
-from haystack.pipelines import (
-    Pipeline,
-    DocumentSearchPipeline,
-    RootNode,
-    ExtractiveQAPipeline
-)
+from haystack.pipelines import Pipeline, DocumentSearchPipeline, RootNode, ExtractiveQAPipeline
 from haystack.pipelines.base import _PipelineCodeGen
 from haystack.nodes import DensePassageRetriever, EmbeddingRetriever
 
@@ -191,11 +186,13 @@ def test_PipelineCodeGen_simple_dense_pipeline():
     pipeline.add_node(component=retriever, name="retri", inputs=["Query"])
 
     code = _PipelineCodeGen.generate_code(pipeline=pipeline, pipeline_variable_name="p", generate_imports=False)
-    assert code == ("in_memory_document_store = InMemoryDocumentStore(index=\"my-index\")\n"
-                   "retri = EmbeddingRetriever(document_store=in_memory_document_store, embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\")\n"
-                   "\n"
-                   "p = Pipeline()\n"
-                   "p.add_node(component=retri, name=\"retri\", inputs=[\"Query\"])")
+    assert code == (
+        'in_memory_document_store = InMemoryDocumentStore(index="my-index")\n'
+        'retri = EmbeddingRetriever(document_store=in_memory_document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")\n'
+        "\n"
+        "p = Pipeline()\n"
+        'p.add_node(component=retri, name="retri", inputs=["Query"])'
+    )
 
 
 @pytest.mark.elasticsearch
@@ -206,11 +203,13 @@ def test_PipelineCodeGen_simple_sparse_pipeline():
     pipeline.add_node(component=retriever, name="retri", inputs=["Query"])
 
     code = _PipelineCodeGen.generate_code(pipeline=pipeline, pipeline_variable_name="p", generate_imports=False)
-    assert code == ("elasticsearch_document_store = ElasticsearchDocumentStore(index=\"my-index\")\n"
-                   "retri = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
-                   "\n"
-                   "p = Pipeline()\n"
-                   "p.add_node(component=retri, name=\"retri\", inputs=[\"Query\"])")
+    assert code == (
+        'elasticsearch_document_store = ElasticsearchDocumentStore(index="my-index")\n'
+        "retri = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
+        "\n"
+        "p = Pipeline()\n"
+        'p.add_node(component=retri, name="retri", inputs=["Query"])'
+    )
 
 
 @pytest.mark.elasticsearch
@@ -218,7 +217,9 @@ def test_PipelineCodeGen_dual_retriever_pipeline():
     es_doc_store = ElasticsearchDocumentStore(index="my-index")
     es_retriever = ElasticsearchRetriever(document_store=es_doc_store, top_k=20)
     dense_doc_store = InMemoryDocumentStore(index="my-index")
-    emb_retriever = EmbeddingRetriever(document_store=dense_doc_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")
+    emb_retriever = EmbeddingRetriever(
+        document_store=dense_doc_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2"
+    )
     p_ensemble = Pipeline()
     p_ensemble.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])
     p_ensemble.add_node(component=emb_retriever, name="EmbeddingRetriever", inputs=["Query"])
@@ -227,23 +228,27 @@ def test_PipelineCodeGen_dual_retriever_pipeline():
     )
 
     code = _PipelineCodeGen.generate_code(pipeline=p_ensemble, pipeline_variable_name="p", generate_imports=False)
-    assert code == ("elasticsearch_document_store = ElasticsearchDocumentStore(index=\"my-index\")\n"
-                    "es_retriever = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"                  
-                    "in_memory_document_store = InMemoryDocumentStore(index=\"my-index\")\n" 
-                    "embedding_retriever = EmbeddingRetriever(document_store=in_memory_document_store, embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\")\n"
-                    "join_results = JoinDocuments(join_mode=\"merge\")\n"
-                    "\n"
-                    "p = Pipeline()\n"
-                    "p.add_node(component=es_retriever, name=\"EsRetriever\", inputs=[\"Query\"])\n"
-                    "p.add_node(component=embedding_retriever, name=\"EmbeddingRetriever\", inputs=[\"Query\"])\n"
-                    "p.add_node(component=join_results, name=\"JoinResults\", inputs=[\"EsRetriever\", \"EmbeddingRetriever\"])")
+    assert code == (
+        'elasticsearch_document_store = ElasticsearchDocumentStore(index="my-index")\n'
+        "es_retriever = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
+        'in_memory_document_store = InMemoryDocumentStore(index="my-index")\n'
+        'embedding_retriever = EmbeddingRetriever(document_store=in_memory_document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")\n'
+        'join_results = JoinDocuments(join_mode="merge")\n'
+        "\n"
+        "p = Pipeline()\n"
+        'p.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])\n'
+        'p.add_node(component=embedding_retriever, name="EmbeddingRetriever", inputs=["Query"])\n'
+        'p.add_node(component=join_results, name="JoinResults", inputs=["EsRetriever", "EmbeddingRetriever"])'
+    )
 
 
 @pytest.mark.elasticsearch
 def test_PipelineCodeGen_dual_retriever_pipeline_same_docstore():
     es_doc_store = ElasticsearchDocumentStore(index="my-index")
     es_retriever = ElasticsearchRetriever(document_store=es_doc_store, top_k=20)
-    emb_retriever = EmbeddingRetriever(document_store=es_retriever, embedding_model="sentence-transformers/all-MiniLM-L6-v2")
+    emb_retriever = EmbeddingRetriever(
+        document_store=es_retriever, embedding_model="sentence-transformers/all-MiniLM-L6-v2"
+    )
     p_ensemble = Pipeline()
     p_ensemble.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])
     p_ensemble.add_node(component=emb_retriever, name="EmbeddingRetriever", inputs=["Query"])
@@ -252,15 +257,17 @@ def test_PipelineCodeGen_dual_retriever_pipeline_same_docstore():
     )
 
     code = _PipelineCodeGen.generate_code(pipeline=p_ensemble, pipeline_variable_name="p", generate_imports=False)
-    assert code == ("elasticsearch_document_store = ElasticsearchDocumentStore(index=\"my-index\")\n"
-                    "es_retriever = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"                  
-                    "embedding_retriever = EmbeddingRetriever(document_store=elasticsearch_document_store, embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\")\n"
-                    "join_results = JoinDocuments(join_mode=\"merge\")\n"
-                    "\n"
-                    "p = Pipeline()\n"
-                    "p.add_node(component=es_retriever, name=\"EsRetriever\", inputs=[\"Query\"])\n"
-                    "p.add_node(component=embedding_retriever, name=\"EmbeddingRetriever\", inputs=[\"Query\"])\n"
-                    "p.add_node(component=join_results, name=\"JoinResults\", inputs=[\"EsRetriever\", \"EmbeddingRetriever\"])")
+    assert code == (
+        'elasticsearch_document_store = ElasticsearchDocumentStore(index="my-index")\n'
+        "es_retriever = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
+        'embedding_retriever = EmbeddingRetriever(document_store=elasticsearch_document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")\n'
+        'join_results = JoinDocuments(join_mode="merge")\n'
+        "\n"
+        "p = Pipeline()\n"
+        'p.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])\n'
+        'p.add_node(component=embedding_retriever, name="EmbeddingRetriever", inputs=["Query"])\n'
+        'p.add_node(component=join_results, name="JoinResults", inputs=["EsRetriever", "EmbeddingRetriever"])'
+    )
 
 
 @pytest.mark.elasticsearch
@@ -271,25 +278,20 @@ def test_PipelineCodeGen_imports():
     pipeline.add_node(component=retriever, name="retri", inputs=["Query"])
 
     code = _PipelineCodeGen.generate_code(pipeline=pipeline, pipeline_variable_name="p", generate_imports=True)
-    assert code == ("from haystack.document_stores import ElasticsearchDocumentStore\n"
-                    "from haystack.nodes import ElasticsearchRetriever\n"
-                    "\n"
-                    "elasticsearch_document_store = ElasticsearchDocumentStore(index=\"my-index\")\n"
-                   "retri = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
-                   "\n"
-                   "p = Pipeline()\n"
-                   "p.add_node(component=retri, name=\"retri\", inputs=[\"Query\"])")
+    assert code == (
+        "from haystack.document_stores import ElasticsearchDocumentStore\n"
+        "from haystack.nodes import ElasticsearchRetriever\n"
+        "\n"
+        'elasticsearch_document_store = ElasticsearchDocumentStore(index="my-index")\n'
+        "retri = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
+        "\n"
+        "p = Pipeline()\n"
+        'p.add_node(component=retri, name="retri", inputs=["Query"])'
+    )
 
 
 def test_PipelineCodeGen_order_components():
-    dependency_map = {
-        "a": ["aa", "ab"],
-        "aa": [],
-        "ab": ["aba"],
-        "aba": [],
-        "b": ["a", "c"],
-        "c": ["a"]
-    }
+    dependency_map = {"a": ["aa", "ab"], "aa": [], "ab": ["aba"], "aba": [], "b": ["a", "c"], "c": ["a"]}
     ordered = _PipelineCodeGen._order_components(dependency_map=dependency_map)
     assert ordered == ["aa", "aba", "ab", "a", "c", "b"]
 

From dec2f96a44ba4451f7ad1d044dbebd94fa0ba5dc Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Tue, 22 Feb 2022 18:42:55 +0100
Subject: [PATCH 17/22] fix Dict typings

---
 haystack/pipelines/base.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 1793aa3476..bc3e37d4c5 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -401,7 +401,7 @@ def save_to_deepset_cloud(
             logger.info(f"Pipeline config '{pipeline_config_name}' successfully created.")
 
     @classmethod
-    def _get_pipeline_definition(cls, pipeline_config: Dict, pipeline_name: Optional[str] = None) -> dict:
+    def _get_pipeline_definition(cls, pipeline_config: Dict[str, Any], pipeline_name: Optional[str] = None) -> Dict[str, Any]:
         """
         Get the definition of Pipeline from a given pipeline config. If the config contains more than one Pipeline,
         then the pipeline_name must be supplied.
@@ -423,7 +423,7 @@ def _get_pipeline_definition(cls, pipeline_config: Dict, pipeline_name: Optional
         return pipeline_definition
 
     @classmethod
-    def _get_component_definitions(cls, pipeline_config: Dict, overwrite_with_env_variables: bool) -> dict:
+    def _get_component_definitions(cls, pipeline_config: Dict[str, Any], overwrite_with_env_variables: bool) -> Dict[str, Any]:
         """
         Returns the definitions of all components from a given pipeline config.
 
@@ -444,7 +444,7 @@ def _get_component_definitions(cls, pipeline_config: Dict, overwrite_with_env_va
         return component_definitions
 
     @classmethod
-    def _overwrite_with_env_variables(cls, definition: dict):
+    def _overwrite_with_env_variables(cls, definition: Dict[str, Any]):
         """
         Overwrite the pipeline config with environment variables. For example, to change index name param for an
         ElasticsearchDocumentStore, an env variable 'MYDOCSTORE_PARAMS_INDEX=documents-2021' can be set. Note that an
@@ -1484,7 +1484,7 @@ def generate_code(
 
     @classmethod
     def _generate_pipeline_code(
-        cls, pipeline_definition: dict, component_variable_names: Dict[str, str], pipeline_variable_name: str
+        cls, pipeline_definition: Dict[str, Any], component_variable_names: Dict[str, str], pipeline_variable_name: str
     ) -> str:
         code_lines = [f"{pipeline_variable_name} = Pipeline()"]
         for node in pipeline_definition["nodes"]:
@@ -1499,7 +1499,7 @@ def _generate_pipeline_code(
         return code
 
     @classmethod
-    def _generate_components_code(cls, component_definitions: dict, component_variable_names: Dict[str, str]) -> str:
+    def _generate_components_code(cls, component_definitions: Dict[str, Any], component_variable_names: Dict[str, str]) -> str:
         code = ""
         declarations = {}
         dependency_map = {}

From 14c11fb8cf9a9e4d5a5e16fd91c0310bc04edf6d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 22 Feb 2022 18:03:47 +0000
Subject: [PATCH 18/22] Update Documentation & Code Style

---
 haystack/pipelines/base.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index bc3e37d4c5..8ed8385f6d 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -401,7 +401,9 @@ def save_to_deepset_cloud(
             logger.info(f"Pipeline config '{pipeline_config_name}' successfully created.")
 
     @classmethod
-    def _get_pipeline_definition(cls, pipeline_config: Dict[str, Any], pipeline_name: Optional[str] = None) -> Dict[str, Any]:
+    def _get_pipeline_definition(
+        cls, pipeline_config: Dict[str, Any], pipeline_name: Optional[str] = None
+    ) -> Dict[str, Any]:
         """
         Get the definition of Pipeline from a given pipeline config. If the config contains more than one Pipeline,
         then the pipeline_name must be supplied.
@@ -423,7 +425,9 @@ def _get_pipeline_definition(cls, pipeline_config: Dict[str, Any], pipeline_name
         return pipeline_definition
 
     @classmethod
-    def _get_component_definitions(cls, pipeline_config: Dict[str, Any], overwrite_with_env_variables: bool) -> Dict[str, Any]:
+    def _get_component_definitions(
+        cls, pipeline_config: Dict[str, Any], overwrite_with_env_variables: bool
+    ) -> Dict[str, Any]:
         """
         Returns the definitions of all components from a given pipeline config.
 
@@ -1499,7 +1503,9 @@ def _generate_pipeline_code(
         return code
 
     @classmethod
-    def _generate_components_code(cls, component_definitions: Dict[str, Any], component_variable_names: Dict[str, str]) -> str:
+    def _generate_components_code(
+        cls, component_definitions: Dict[str, Any], component_variable_names: Dict[str, str]
+    ) -> str:
         code = ""
         declarations = {}
         dependency_map = {}

From 1fd94b9cf5d650fb597c12fe824318f5351a6040 Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Tue, 22 Feb 2022 23:19:39 +0100
Subject: [PATCH 19/22] validate user input before code gen

---
 haystack/pipelines/base.py |  76 ++++++++++++++---
 test/test_pipeline.py      | 170 +++++++++++++++++++++++++++++++++----
 2 files changed, 215 insertions(+), 31 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 8ed8385f6d..676cc8c7e1 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -42,7 +42,8 @@
 
 
 ROOT_NODE_TO_PIPELINE_NAME = {"query": "query", "file": "indexing"}
-CAMEL_CASE_TO_SNAKE_CASE_REGEX = re.compile(r"(?<!^)(?=[A-Z])")
+CAMEL_CASE_TO_SNAKE_CASE_REGEX = re.compile(r"(?<=[a-z])(?=[A-Z0-9])")
+VALID_CODE_GEN_INPUT_REGEX = re.compile(r"^[-a-zA-Z0-9_/.]+$")
 MODULE_NOT_FOUND = "MODULE_NOT_FOUND"
 CODE_GEN_ALLOWED_IMPORTS = ["haystack.document_stores", "haystack.nodes"]
 CODE_GEN_DEFAULT_COMMENT = "This code has been generated."
@@ -1077,13 +1078,11 @@ def get_config(self, return_defaults: bool = False) -> dict:
 
         :param return_defaults: whether to output parameters that have the default values.
         """
-        nodes = self.graph.nodes
-
         pipeline_name = ROOT_NODE_TO_PIPELINE_NAME[self.root_node.lower()]
         pipelines: dict = {pipeline_name: {"name": pipeline_name, "type": self.__class__.__name__, "nodes": []}}
 
         components = {}
-        for node in nodes:
+        for node in self.graph.nodes:
             if node == self.root_node:
                 continue
             component_instance = self.graph.nodes.get(node)["component"]
@@ -1096,25 +1095,37 @@ def get_config(self, return_defaults: bool = False) -> dict:
             for component_parent in component_parent_classes:
                 component_signature = {**component_signature, **inspect.signature(component_parent).parameters}
 
-            for key, value in component_params.items():
+            for param_key, param_value in component_params.items():
                 # A parameter for a Component could be another Component. For instance, a Retriever has
                 # the DocumentStore as a parameter.
                 # Component configs must be a dict with a "type" key. The "type" keys distinguishes between
                 # other parameters like "custom_mapping" that are dicts.
                 # This currently only checks for the case single-level nesting case, wherein, "a Component has another
                 # Component as a parameter". For deeper nesting cases, this function should be made recursive.
-                if isinstance(value, dict) and "type" in value.keys():  # the parameter is a Component
-                    components[node]["params"][key] = value["type"]
-                    sub_component_signature = inspect.signature(BaseComponent.subclasses[value["type"]]).parameters
-                    params = {
+                if isinstance(param_value, dict) and "type" in param_value.keys():  # the parameter is a Component
+                    sub_component = param_value
+                    sub_component_type_name = sub_component["type"]
+                    sub_component_signature = inspect.signature(
+                        BaseComponent.subclasses[sub_component_type_name]
+                    ).parameters
+                    sub_component_params = {
                         k: v
-                        for k, v in value["params"].items()
+                        for k, v in sub_component["params"].items()
                         if sub_component_signature[k].default != v or return_defaults is True
                     }
-                    components[value["type"]] = {"name": value["type"], "type": value["type"], "params": params}
+
+                    sub_component_name = self._generate_component_name(
+                        type_name=sub_component_type_name, params=sub_component_params, existing_components=components
+                    )
+                    components[sub_component_name] = {
+                        "name": sub_component_name,
+                        "type": sub_component_type_name,
+                        "params": sub_component_params,
+                    }
+                    components[node]["params"][param_key] = sub_component_name
                 else:
-                    if component_signature[key].default != value or return_defaults is True:
-                        components[node]["params"][key] = value
+                    if component_signature[param_key].default != param_value or return_defaults is True:
+                        components[node]["params"][param_key] = param_value
 
             # create the Pipeline definition with how the Component are connected
             pipelines[pipeline_name]["nodes"].append({"name": node, "inputs": list(self.graph.predecessors(node))})
@@ -1126,6 +1137,22 @@ def get_config(self, return_defaults: bool = False) -> dict:
         }
         return config
 
+    def _generate_component_name(
+        self,
+        type_name: str,
+        params: Dict[str, Any],
+        existing_components: Dict[str, Any],
+    ):
+        component_name: str = type_name
+        # add number if there are multiple distinct ones of the same type
+        while component_name in existing_components and params != existing_components[component_name]["params"]:
+            occupied_num = 1
+            if len(component_name) > len(type_name):
+                occupied_num = int(component_name[len(type_name) + 1 :])
+            new_num = occupied_num + 1
+            component_name = f"{type_name}_{new_num}"
+        return component_name
+
     def print_eval_report(
         self,
         eval_result: EvaluationResult,
@@ -1446,6 +1473,27 @@ class _PipelineCodeGen:
     def _camel_to_snake_case(cls, input: str) -> str:
         return CAMEL_CASE_TO_SNAKE_CASE_REGEX.sub("_", input).lower()
 
+    @classmethod
+    def _validate_user_input(cls, input: str):
+        if isinstance(input, str) and not VALID_CODE_GEN_INPUT_REGEX.match(input):
+            raise ValueError(f"'{input}' is not a valid code gen variable name. Use word characters only.")
+
+    @classmethod
+    def _validate_config(cls, pipeline_config: Dict[str, Any]):
+        for component in pipeline_config["components"]:
+            cls._validate_user_input(component["name"])
+            cls._validate_user_input(component["type"])
+            for k, v in component.get("params", {}).items():
+                cls._validate_user_input(k)
+                cls._validate_user_input(v)
+        for pipeline in pipeline_config["pipelines"]:
+            cls._validate_user_input(pipeline["name"])
+            cls._validate_user_input(pipeline["type"])
+            for node in pipeline["nodes"]:
+                cls._validate_user_input(node["name"])
+                for input in node["inputs"]:
+                    cls._validate_user_input(input)
+
     @classmethod
     def generate_code(
         cls,
@@ -1455,6 +1503,8 @@ def generate_code(
         comment: Optional[str] = None,
     ) -> str:
         pipeline_config = pipeline.get_config()
+        cls._validate_config(pipeline_config)
+
         component_definitions = pipeline._get_component_definitions(
             pipeline_config=pipeline_config, overwrite_with_env_variables=False
         )
diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index 17b34c7831..b522843ebe 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -181,23 +181,6 @@ def test_to_code():
     assert index_pipeline.get_config() == locals()["index_pipeline_from_code"].get_config()
 
 
-@pytest.mark.elasticsearch
-def test_PipelineCodeGen_simple_dense_pipeline():
-    doc_store = InMemoryDocumentStore(index="my-index")
-    retriever = EmbeddingRetriever(document_store=doc_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")
-    pipeline = Pipeline()
-    pipeline.add_node(component=retriever, name="retri", inputs=["Query"])
-
-    code = _PipelineCodeGen.generate_code(pipeline=pipeline, pipeline_variable_name="p", generate_imports=False)
-    assert code == (
-        'in_memory_document_store = InMemoryDocumentStore(index="my-index")\n'
-        'retri = EmbeddingRetriever(document_store=in_memory_document_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2")\n'
-        "\n"
-        "p = Pipeline()\n"
-        'p.add_node(component=retri, name="retri", inputs=["Query"])'
-    )
-
-
 @pytest.mark.elasticsearch
 def test_PipelineCodeGen_simple_sparse_pipeline():
     doc_store = ElasticsearchDocumentStore(index="my-index")
@@ -250,7 +233,7 @@ def test_PipelineCodeGen_dual_retriever_pipeline_same_docstore():
     es_doc_store = ElasticsearchDocumentStore(index="my-index")
     es_retriever = ElasticsearchRetriever(document_store=es_doc_store, top_k=20)
     emb_retriever = EmbeddingRetriever(
-        document_store=es_retriever, embedding_model="sentence-transformers/all-MiniLM-L6-v2"
+        document_store=es_doc_store, embedding_model="sentence-transformers/all-MiniLM-L6-v2"
     )
     p_ensemble = Pipeline()
     p_ensemble.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])
@@ -273,6 +256,62 @@ def test_PipelineCodeGen_dual_retriever_pipeline_same_docstore():
     )
 
 
+@pytest.mark.elasticsearch
+def test_PipelineCodeGen_dual_retriever_pipeline_different_docstore():
+    es_doc_store_a = ElasticsearchDocumentStore(index="my-index-a")
+    es_doc_store_b = ElasticsearchDocumentStore(index="my-index-b")
+    es_retriever = ElasticsearchRetriever(document_store=es_doc_store_a, top_k=20)
+    emb_retriever = EmbeddingRetriever(
+        document_store=es_doc_store_b, embedding_model="sentence-transformers/all-MiniLM-L6-v2"
+    )
+    p_ensemble = Pipeline()
+    p_ensemble.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])
+    p_ensemble.add_node(component=emb_retriever, name="EmbeddingRetriever", inputs=["Query"])
+    p_ensemble.add_node(
+        component=JoinDocuments(join_mode="merge"), name="JoinResults", inputs=["EsRetriever", "EmbeddingRetriever"]
+    )
+
+    code = _PipelineCodeGen.generate_code(pipeline=p_ensemble, pipeline_variable_name="p", generate_imports=False)
+    assert code == (
+        'elasticsearch_document_store = ElasticsearchDocumentStore(index="my-index-a")\n'
+        "es_retriever = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
+        'elasticsearch_document_store_2 = ElasticsearchDocumentStore(index="my-index-b")\n'
+        'embedding_retriever = EmbeddingRetriever(document_store=elasticsearch_document_store_2, embedding_model="sentence-transformers/all-MiniLM-L6-v2")\n'
+        'join_results = JoinDocuments(join_mode="merge")\n'
+        "\n"
+        "p = Pipeline()\n"
+        'p.add_node(component=es_retriever, name="EsRetriever", inputs=["Query"])\n'
+        'p.add_node(component=embedding_retriever, name="EmbeddingRetriever", inputs=["Query"])\n'
+        'p.add_node(component=join_results, name="JoinResults", inputs=["EsRetriever", "EmbeddingRetriever"])'
+    )
+
+
+@pytest.mark.elasticsearch
+def test_PipelineCodeGen_dual_retriever_pipeline_same_type():
+    es_doc_store = ElasticsearchDocumentStore(index="my-index")
+    es_retriever_1 = ElasticsearchRetriever(document_store=es_doc_store, top_k=20)
+    es_retriever_2 = ElasticsearchRetriever(document_store=es_doc_store, top_k=10)
+    p_ensemble = Pipeline()
+    p_ensemble.add_node(component=es_retriever_1, name="EsRetriever1", inputs=["Query"])
+    p_ensemble.add_node(component=es_retriever_2, name="EsRetriever2", inputs=["Query"])
+    p_ensemble.add_node(
+        component=JoinDocuments(join_mode="merge"), name="JoinResults", inputs=["EsRetriever1", "EsRetriever2"]
+    )
+
+    code = _PipelineCodeGen.generate_code(pipeline=p_ensemble, pipeline_variable_name="p", generate_imports=False)
+    assert code == (
+        'elasticsearch_document_store = ElasticsearchDocumentStore(index="my-index")\n'
+        "es_retriever_1 = ElasticsearchRetriever(document_store=elasticsearch_document_store, top_k=20)\n"
+        "es_retriever_2 = ElasticsearchRetriever(document_store=elasticsearch_document_store)\n"
+        'join_results = JoinDocuments(join_mode="merge")\n'
+        "\n"
+        "p = Pipeline()\n"
+        'p.add_node(component=es_retriever_1, name="EsRetriever1", inputs=["Query"])\n'
+        'p.add_node(component=es_retriever_2, name="EsRetriever2", inputs=["Query"])\n'
+        'p.add_node(component=join_results, name="JoinResults", inputs=["EsRetriever1", "EsRetriever2"])'
+    )
+
+
 @pytest.mark.elasticsearch
 def test_PipelineCodeGen_imports():
     doc_store = ElasticsearchDocumentStore(index="my-index")
@@ -299,6 +338,101 @@ def test_PipelineCodeGen_order_components():
     assert ordered == ["aa", "aba", "ab", "a", "c", "b"]
 
 
+@pytest.mark.parametrize("input", ["\btest", " test", "%test", "+test"])
+def test_PipelineCodeGen_validate_user_input_invalid(input):
+    with pytest.raises(ValueError):
+        _PipelineCodeGen._validate_user_input(input)
+
+
+@pytest.mark.parametrize("input", ["test", "testName", "test_name", "test-name", "test-name1234"])
+def test_PipelineCodeGen_validate_user_input_valid(input):
+    _PipelineCodeGen._validate_user_input(input)
+
+
+def test_PipelineCodeGen_validate_pipeline_config_invalid_component_name():
+    with pytest.raises(ValueError):
+        _PipelineCodeGen._validate_config({"components": [{"name": "\btest"}]})
+
+
+def test_PipelineCodeGen_validate_pipeline_config_invalid_component_type():
+    with pytest.raises(ValueError):
+        _PipelineCodeGen._validate_config({"components": [{"name": "test", "type": "\btest"}]})
+
+
+def test_PipelineCodeGen_validate_pipeline_config_invalid_component_param():
+    with pytest.raises(ValueError):
+        _PipelineCodeGen._validate_config(
+            {"components": [{"name": "test", "type": "test", "params": {"key": "\btest"}}]}
+        )
+
+
+def test_PipelineCodeGen_validate_pipeline_config_invalid_component_param_key():
+    with pytest.raises(ValueError):
+        _PipelineCodeGen._validate_config(
+            {"components": [{"name": "test", "type": "test", "params": {"\btest": "test"}}]}
+        )
+
+
+def test_PipelineCodeGen_validate_pipeline_config_invalid_pipeline_name():
+    with pytest.raises(ValueError):
+        _PipelineCodeGen._validate_config(
+            {
+                "components": [
+                    {
+                        "name": "test",
+                        "type": "test",
+                    }
+                ],
+                "pipelines": [{"name": "\btest"}],
+            }
+        )
+
+
+def test_PipelineCodeGen_validate_pipeline_config_invalid_pipeline_type():
+    with pytest.raises(ValueError):
+        _PipelineCodeGen._validate_config(
+            {
+                "components": [
+                    {
+                        "name": "test",
+                        "type": "test",
+                    }
+                ],
+                "pipelines": [{"name": "test", "type": "\btest"}],
+            }
+        )
+
+
+def test_PipelineCodeGen_validate_pipeline_config_invalid_pipeline_node_name():
+    with pytest.raises(ValueError):
+        _PipelineCodeGen._validate_config(
+            {
+                "components": [
+                    {
+                        "name": "test",
+                        "type": "test",
+                    }
+                ],
+                "pipelines": [{"name": "test", "type": "test", "nodes": [{"name": "\btest"}]}],
+            }
+        )
+
+
+def test_PipelineCodeGen_validate_pipeline_config_invalid_pipeline_node_inputs():
+    with pytest.raises(ValueError):
+        _PipelineCodeGen._validate_config(
+            {
+                "components": [
+                    {
+                        "name": "test",
+                        "type": "test",
+                    }
+                ],
+                "pipelines": [{"name": "test", "type": "test", "nodes": [{"name": "test", "inputs": ["\btest"]}]}],
+            }
+        )
+
+
 @pytest.mark.usefixtures(deepset_cloud_fixture.__name__)
 @responses.activate
 def test_load_from_deepset_cloud_query():

From 53abe8a82f596b616e7d2aff7c731788d5176bd6 Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Wed, 23 Feb 2022 10:02:19 +0100
Subject: [PATCH 20/22] enable urls for to_code()

---
 haystack/pipelines/base.py | 2 +-
 test/test_pipeline.py      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 676cc8c7e1..48a66b17c8 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -43,7 +43,7 @@
 
 ROOT_NODE_TO_PIPELINE_NAME = {"query": "query", "file": "indexing"}
 CAMEL_CASE_TO_SNAKE_CASE_REGEX = re.compile(r"(?<=[a-z])(?=[A-Z0-9])")
-VALID_CODE_GEN_INPUT_REGEX = re.compile(r"^[-a-zA-Z0-9_/.]+$")
+VALID_CODE_GEN_INPUT_REGEX = re.compile(r"^[-a-zA-Z0-9_/.?=:%&$§#@,;()+/*~]+$")
 MODULE_NOT_FOUND = "MODULE_NOT_FOUND"
 CODE_GEN_ALLOWED_IMPORTS = ["haystack.document_stores", "haystack.nodes"]
 CODE_GEN_DEFAULT_COMMENT = "This code has been generated."
diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index b522843ebe..715acd335d 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -338,13 +338,13 @@ def test_PipelineCodeGen_order_components():
     assert ordered == ["aa", "aba", "ab", "a", "c", "b"]
 
 
-@pytest.mark.parametrize("input", ["\btest", " test", "%test", "+test"])
+@pytest.mark.parametrize("input", ["\btest", " test", "\ttest", "\ntest"])
 def test_PipelineCodeGen_validate_user_input_invalid(input):
     with pytest.raises(ValueError):
         _PipelineCodeGen._validate_user_input(input)
 
 
-@pytest.mark.parametrize("input", ["test", "testName", "test_name", "test-name", "test-name1234"])
+@pytest.mark.parametrize("input", ["test", "testName", "test_name", "test-name", "test-name1234", "http://localhost:8000/my-path?param=value#anchor"])
 def test_PipelineCodeGen_validate_user_input_valid(input):
     _PipelineCodeGen._validate_user_input(input)
 

From f356f92f7ab479d680d0060fd6cef3a76748422c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 23 Feb 2022 09:04:54 +0000
Subject: [PATCH 21/22] Update Documentation & Code Style

---
 test/test_pipeline.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index 715acd335d..29e0dd48ae 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -344,7 +344,10 @@ def test_PipelineCodeGen_validate_user_input_invalid(input):
         _PipelineCodeGen._validate_user_input(input)
 
 
-@pytest.mark.parametrize("input", ["test", "testName", "test_name", "test-name", "test-name1234", "http://localhost:8000/my-path?param=value#anchor"])
+@pytest.mark.parametrize(
+    "input",
+    ["test", "testName", "test_name", "test-name", "test-name1234", "http://localhost:8000/my-path?param=value#anchor"],
+)
 def test_PipelineCodeGen_validate_user_input_valid(input):
     _PipelineCodeGen._validate_user_input(input)
 

From 278358e561f7b4479b469684309f4e5750a5dab8 Mon Sep 17 00:00:00 2001
From: Thomas Stadelmann <thomas.stadelmann@deepset.ai>
Date: Wed, 23 Feb 2022 10:24:19 +0100
Subject: [PATCH 22/22] remove all chars except colon from validation regex

---
 haystack/pipelines/base.py | 2 +-
 test/test_pipeline.py      | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/haystack/pipelines/base.py b/haystack/pipelines/base.py
index 48a66b17c8..028e9acf73 100644
--- a/haystack/pipelines/base.py
+++ b/haystack/pipelines/base.py
@@ -43,7 +43,7 @@
 
 ROOT_NODE_TO_PIPELINE_NAME = {"query": "query", "file": "indexing"}
 CAMEL_CASE_TO_SNAKE_CASE_REGEX = re.compile(r"(?<=[a-z])(?=[A-Z0-9])")
-VALID_CODE_GEN_INPUT_REGEX = re.compile(r"^[-a-zA-Z0-9_/.?=:%&$§#@,;()+/*~]+$")
+VALID_CODE_GEN_INPUT_REGEX = re.compile(r"^[-a-zA-Z0-9_/.:]+$")
 MODULE_NOT_FOUND = "MODULE_NOT_FOUND"
 CODE_GEN_ALLOWED_IMPORTS = ["haystack.document_stores", "haystack.nodes"]
 CODE_GEN_DEFAULT_COMMENT = "This code has been generated."
diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index 29e0dd48ae..9c2c8ec33e 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -338,15 +338,14 @@ def test_PipelineCodeGen_order_components():
     assert ordered == ["aa", "aba", "ab", "a", "c", "b"]
 
 
-@pytest.mark.parametrize("input", ["\btest", " test", "\ttest", "\ntest"])
+@pytest.mark.parametrize("input", ["\btest", " test", "#test", "+test", "\ttest", "\ntest", "test()"])
 def test_PipelineCodeGen_validate_user_input_invalid(input):
     with pytest.raises(ValueError):
         _PipelineCodeGen._validate_user_input(input)
 
 
 @pytest.mark.parametrize(
-    "input",
-    ["test", "testName", "test_name", "test-name", "test-name1234", "http://localhost:8000/my-path?param=value#anchor"],
+    "input", ["test", "testName", "test_name", "test-name", "test-name1234", "http://localhost:8000/my-path"]
 )
 def test_PipelineCodeGen_validate_user_input_valid(input):
     _PipelineCodeGen._validate_user_input(input)