From 495cd4f5aac9dd0b7eb57a2d8be653f3c6c1293d Mon Sep 17 00:00:00 2001
From: Gabriel Pelouze <gabriel@pelouze.net>
Date: Mon, 18 Mar 2024 11:51:43 +0100
Subject: [PATCH 1/5] add Extractor base class to prepare combining extractors

---
 .../component_containerizer/handlers.py       | 87 ++++++++-----------
 .../services/extractor/extractor.py           | 56 ++++++++++++
 .../services/extractor/headerextractor.py     | 49 +++++++----
 .../services/extractor/pyextractor.py         | 26 +++---
 .../services/extractor/rextractor.py          | 25 +++---
 5 files changed, 155 insertions(+), 88 deletions(-)
 create mode 100644 jupyterlab_vre/services/extractor/extractor.py

diff --git a/jupyterlab_vre/component_containerizer/handlers.py b/jupyterlab_vre/component_containerizer/handlers.py
index 876974ae7..cd055aaa8 100644
--- a/jupyterlab_vre/component_containerizer/handlers.py
+++ b/jupyterlab_vre/component_containerizer/handlers.py
@@ -29,6 +29,7 @@
 from jupyterlab_vre.database.cell import Cell
 from jupyterlab_vre.services.containerizer.Rcontainerizer import Rcontainerizer
 from jupyterlab_vre.services.converter.converter import ConverterReactFlowChart
+from jupyterlab_vre.services.extractor.extractor import DummyExtractor
 from jupyterlab_vre.services.extractor.pyextractor import PyExtractor
 from jupyterlab_vre.services.extractor.rextractor import RExtractor
 from jupyterlab_vre.services.extractor.headerextractor import HeaderExtractor
@@ -124,27 +125,31 @@ async def post(self, *args, **kwargs):
 
         source = notebook.cells[cell_index].source
 
-        # extractor based on the cell header
-        try:
-            extractor = HeaderExtractor(notebook, source)
-        except jsonschema.ValidationError as e:
-            self.set_status(400, f"Invalid cell header")
-            self.write(
-                {
-                    'message': f"Error in cell header: {e}",
-                    'reason': None,
-                    'traceback': traceback.format_exception(e),
-                }
-            )
-            self.flush()
-            return
+        if notebook.cells[cell_index].cell_type != 'code':
+            # dummy extractor for non-code cells (e.g. markdown)
+            extractor = DummyExtractor(notebook, source)
+        else:
+            # extractor based on the cell header
+            try:
+                extractor = HeaderExtractor(notebook, source)
+            except jsonschema.ValidationError as e:
+                self.set_status(400, f"Invalid cell header")
+                self.write(
+                    {
+                        'message': f"Error in cell header: {e}",
+                        'reason': None,
+                        'traceback': traceback.format_exception(e),
+                    }
+                )
+                self.flush()
+                return
 
-        # extractor based on the kernel (if cell header is not defined)
-        if not extractor.enabled():
-            if kernel == "IRkernel":
-                extractor = RExtractor(notebook)
-            else:
-                extractor = PyExtractor(notebook)
+            # extractor based on the kernel (if cell header is not defined)
+            if not extractor.enabled():
+                if kernel == "IRkernel":
+                    extractor = RExtractor(notebook, source)
+                else:
+                    extractor = PyExtractor(notebook, source)
 
         extracted_nb = extract_cell_by_index(notebook, cell_index)
         if kernel == "IRkernel":
@@ -162,49 +167,33 @@ async def post(self, *args, **kwargs):
                 '.', '-').replace('@',
                                   '-at-').strip()
 
-        ins = {}
-        outs = {}
-        params = {}
-        confs = []
-        dependencies = []
-
-        # Check if cell is code. If cell is for example markdown we get execution from 'extractor.infer_cell_inputs(
-        # source)'
-        if notebook.cells[cell_index].cell_type == 'code':
-            ins = extractor.infer_cell_inputs(source)
-            outs = extractor.infer_cell_outputs(source)
-
-            confs = extractor.extract_cell_conf_ref(source)
-            dependencies = extractor.infer_cell_dependencies(source, confs)
-
         node_id = str(uuid.uuid4())[:7]
         cell = Cell(
             node_id=node_id,
             title=title,
             task_name=title.lower().replace(' ', '-').replace('.', '-'),
             original_source=source,
-            inputs=ins,
-            outputs=outs,
-            params=params,
-            confs=confs,
-            dependencies=dependencies,
+            inputs=extractor.ins,
+            outputs=extractor.outs,
+            params={},
+            confs=extractor.confs,
+            dependencies=extractor.dependencies,
             container_source="",
             kernel=kernel,
             notebook_dict=extracted_nb.dict()
         )
-        if notebook.cells[cell_index].cell_type == 'code':
-            cell.integrate_configuration()
-            params = extractor.extract_cell_params(cell.original_source)
-            cell.add_params(params)
-            cell.add_param_values(params)
+        cell.integrate_configuration()
+        extractor.params = extractor.extract_cell_params(cell.original_source)
+        cell.add_params(extractor.params)
+        cell.add_param_values(extractor.params)
 
         node = ConverterReactFlowChart.get_node(
             node_id,
             title,
-            set(ins),
-            set(outs),
-            params,
-            dependencies
+            set(extractor.ins),
+            set(extractor.outs),
+            extractor.params,
+            extractor.dependencies
         )
 
         chart = {
diff --git a/jupyterlab_vre/services/extractor/extractor.py b/jupyterlab_vre/services/extractor/extractor.py
new file mode 100644
index 000000000..dcd3fb867
--- /dev/null
+++ b/jupyterlab_vre/services/extractor/extractor.py
@@ -0,0 +1,56 @@
+import abc
+
+
+class Extractor(abc.ABC):
+    ins: dict
+    outs: dict
+    params: dict
+    confs: list
+    dependencies: list
+
+    def __init__(self, notebook, cell_source):
+        self.notebook = notebook
+        self.cell_source = cell_source
+
+        self.ins = self.infer_cell_inputs()
+        self.outs = self.infer_cell_outputs()
+        self.params = self.extract_cell_params(cell_source)
+        self.confs = self.extract_cell_conf_ref()
+        self.dependencies = self.infer_cell_dependencies(self.confs)
+
+    @abc.abstractmethod
+    def infer_cell_inputs(self):
+        pass
+
+    @abc.abstractmethod
+    def infer_cell_outputs(self):
+        pass
+
+    @abc.abstractmethod
+    def extract_cell_params(self, source):
+        pass
+
+    @abc.abstractmethod
+    def extract_cell_conf_ref(self):
+        pass
+
+    @abc.abstractmethod
+    def infer_cell_dependencies(self, confs):
+        pass
+
+
+class DummyExtractor(Extractor):
+    def infer_cell_inputs(self):
+        return {}
+
+    def infer_cell_outputs(self):
+        return {}
+
+    def extract_cell_params(self, source):
+        return {}
+
+    def extract_cell_conf_ref(self):
+        return []
+
+    def infer_cell_dependencies(self, confs):
+        return []
diff --git a/jupyterlab_vre/services/extractor/headerextractor.py b/jupyterlab_vre/services/extractor/headerextractor.py
index 753c85409..381f8c569 100644
--- a/jupyterlab_vre/services/extractor/headerextractor.py
+++ b/jupyterlab_vre/services/extractor/headerextractor.py
@@ -7,8 +7,10 @@
 import logging
 import yaml
 
+from .extractor import Extractor
 
-class HeaderExtractor:
+
+class HeaderExtractor(Extractor):
     """ Extracts cells using information defined by the user in its header
 
     Cells should contain a comment with a yaml block defining inputs, outputs,
@@ -49,11 +51,10 @@ def __init__(self, notebook, cell_source):
              ),
             re.MULTILINE)
         self.schema = self._load_schema()
-
-        self.notebook = notebook
-        self.cell_source = cell_source
         self.cell_header = self._extract_header(cell_source)
 
+        super().__init__(notebook, cell_source)
+
     @staticmethod
     def _load_schema():
         filename = os.path.join(
@@ -155,32 +156,44 @@ def _parse_inputs_outputs_param_items(
 
     def _infer_cell_inputs_outputs_params(
             self,
-            source,
+            header: Union[dict, None],
             item_type: Literal['inputs', 'outputs', 'params'],
             ) -> dict:
-        header = self._extract_header(source)
+        if header is None:
+            return {}
         items = header['NaaVRE']['cell'].get(item_type, [])
         items = [self._parse_inputs_outputs_param_items(it, item_type)
                  for it in items]
         return {it['name']: it for it in items}
 
-    def infer_cell_inputs(self, source):
-        return self._infer_cell_inputs_outputs_params(source, 'inputs')
+    def infer_cell_inputs(self):
+        return self._infer_cell_inputs_outputs_params(
+            self.cell_header,
+            'inputs',
+            )
 
-    def infer_cell_outputs(self, source):
-        return self._infer_cell_inputs_outputs_params(source, 'outputs')
+    def infer_cell_outputs(self):
+        return self._infer_cell_inputs_outputs_params(
+            self.cell_header,
+            'outputs',
+            )
 
     def extract_cell_params(self, source):
-        return self._infer_cell_inputs_outputs_params(source, 'params')
-
-    def extract_cell_conf_ref(self, source):
-        header = self._extract_header(source)
-        items = header['NaaVRE']['cell'].get('confs', [])
+        return self._infer_cell_inputs_outputs_params(
+            self._extract_header(source),
+            'params',
+            )
+
+    def extract_cell_conf_ref(self):
+        if self.cell_header is None:
+            return {}
+        items = self.cell_header['NaaVRE']['cell'].get('confs', [])
         return {k: v['assignation'] for it in items for k, v in it.items()}
 
-    def infer_cell_dependencies(self, source, confs):
-        header = self._extract_header(source)
-        items = header['NaaVRE']['cell'].get('dependencies', [])
+    def infer_cell_dependencies(self, confs):
+        if self.cell_header is None:
+            return []
+        items = self.cell_header['NaaVRE']['cell'].get('dependencies', [])
         return [
             {
                 'name': it.get('name'),
diff --git a/jupyterlab_vre/services/extractor/pyextractor.py b/jupyterlab_vre/services/extractor/pyextractor.py
index d08bd4636..dae2978a0 100644
--- a/jupyterlab_vre/services/extractor/pyextractor.py
+++ b/jupyterlab_vre/services/extractor/pyextractor.py
@@ -8,15 +8,17 @@
 from pytype import config as pytype_config
 from pytype.tools.annotate_ast import annotate_ast
 
+from .extractor import Extractor
 
-class PyExtractor:
+
+class PyExtractor(Extractor):
     sources: list
     imports: dict
     configurations: dict
     global_params: dict
     undefined: dict
 
-    def __init__(self, notebook):
+    def __init__(self, notebook, cell_source):
         # If cell_type is code and not starting with '!'
         self.sources = [nbcell.source for nbcell in notebook.cells if
                         nbcell.cell_type == 'code' and len(nbcell.source) > 0 and nbcell.source[0] != '!']
@@ -31,6 +33,8 @@ def __init__(self, notebook):
         for source in self.sources:
             self.undefined.update(self.__extract_cell_undefined(source))
 
+        super().__init__(notebook, cell_source)
+
     def __extract_imports(self, sources):
         imports = {}
         for s in sources:
@@ -92,20 +96,20 @@ def __extract_params(self, sources):
                         }
         return params
 
-    def infer_cell_outputs(self, cell_source):
-        cell_names = self.__extract_cell_names(cell_source)
+    def infer_cell_outputs(self):
+        cell_names = self.__extract_cell_names(self.cell_source)
         return {
             name: properties
             for name, properties in cell_names.items()
-            if name not in self.__extract_cell_undefined(cell_source)
+            if name not in self.__extract_cell_undefined(self.cell_source)
                and name not in self.imports
                and name in self.undefined
                and name not in self.configurations
                and name not in self.global_params
         }
 
-    def infer_cell_inputs(self, cell_source):
-        cell_undefined = self.__extract_cell_undefined(cell_source)
+    def infer_cell_inputs(self):
+        cell_undefined = self.__extract_cell_undefined(self.cell_source)
         return {
             und: properties
             for und, properties in cell_undefined.items()
@@ -114,9 +118,9 @@ def infer_cell_inputs(self, cell_source):
                and und not in self.global_params
         }
 
-    def infer_cell_dependencies(self, cell_source, confs):
+    def infer_cell_dependencies(self, confs):
         dependencies = []
-        names = self.__extract_cell_names(cell_source)
+        names = self.__extract_cell_names(self.cell_source)
 
         for ck in confs:
             names.update(self.__extract_cell_names(confs[ck]))
@@ -236,9 +240,9 @@ def extract_cell_params(self, cell_source):
                 params[u] = self.global_params[u]
         return params
 
-    def extract_cell_conf_ref(self, cell_source):
+    def extract_cell_conf_ref(self):
         confs = {}
-        cell_unds = self.__extract_cell_undefined(cell_source)
+        cell_unds = self.__extract_cell_undefined(self.cell_source)
         conf_unds = [und for und in cell_unds if und in self.configurations]
         for u in conf_unds:
             if u not in confs:
diff --git a/jupyterlab_vre/services/extractor/rextractor.py b/jupyterlab_vre/services/extractor/rextractor.py
index 027d932d4..228074c3f 100644
--- a/jupyterlab_vre/services/extractor/rextractor.py
+++ b/jupyterlab_vre/services/extractor/rextractor.py
@@ -7,6 +7,9 @@
 import rpy2.robjects.packages as rpackages
 from rpy2.robjects.packages import importr
 
+from .extractor import Extractor
+
+
 # Create an R environment
 r_env = robjects.globalenv
 
@@ -99,14 +102,14 @@
 
 # TODO: create an interface such that it can be easily extended to other kernels
 
-class RExtractor:
+class RExtractor(Extractor):
     sources: list
     imports: dict
     configurations: dict
     global_params: dict
     undefined: dict
 
-    def __init__(self, notebook):
+    def __init__(self, notebook, cell_source):
         self.sources = [nbcell.source for nbcell in notebook.cells if
                         nbcell.cell_type == 'code' and len(nbcell.source) > 0]
 
@@ -117,6 +120,8 @@ def __init__(self, notebook):
         for source in self.sources:
             self.undefined.update(self.__extract_cell_undefined(source))
 
+        super().__init__(notebook, cell_source)
+
     def __extract_imports(self, sources):
         imports = {}
         for s in sources:
@@ -213,20 +218,20 @@ def __extract_params(self, sources):  # check source https://adv-r.hadley.nz/exp
                 }
         return params
 
-    def infer_cell_outputs(self, cell_source):
-        cell_names = self.__extract_cell_names(cell_source)
+    def infer_cell_outputs(self):
+        cell_names = self.__extract_cell_names(self.cell_source)
         return {
             name: properties
             for name, properties in cell_names.items()
-            if name not in self.__extract_cell_undefined(cell_source)
+            if name not in self.__extract_cell_undefined(self.cell_source)
                and name not in self.imports
                and name in self.undefined
                and name not in self.configurations
                and name not in self.global_params
         }
 
-    def infer_cell_inputs(self, cell_source):
-        cell_undefined = self.__extract_cell_undefined(cell_source)
+    def infer_cell_inputs(self):
+        cell_undefined = self.__extract_cell_undefined(self.cell_source)
         return {
             und: properties
             for und, properties in cell_undefined.items()
@@ -235,7 +240,7 @@ def infer_cell_inputs(self, cell_source):
                and und not in self.global_params
         }
 
-    def infer_cell_dependencies(self, cell_source, confs):
+    def infer_cell_dependencies(self, confs):
         # TODO: check this code, you have removed logic. 
         # we probably like to only use dependencies that are necessary to execute the cell
         # however this is challenging in R as functions are non-scoped
@@ -381,9 +386,9 @@ def extract_cell_params(self, cell_source):
                 params[u] = self.global_params[u]
         return params
 
-    def extract_cell_conf_ref(self, cell_source):
+    def extract_cell_conf_ref(self):
         confs = {}
-        cell_unds = self.__extract_cell_undefined(cell_source)
+        cell_unds = self.__extract_cell_undefined(self.cell_source)
         conf_unds = [und for und in cell_unds if und in self.configurations]
         for u in conf_unds:
             if u not in confs:

From f9ef870241efaea1ea4c6d665f14342bd23ad961 Mon Sep 17 00:00:00 2001
From: Gabriel Pelouze <gabriel@pelouze.net>
Date: Wed, 20 Mar 2024 14:45:41 +0100
Subject: [PATCH 2/5] fix test_extractor

---
 jupyterlab_vre/tests/test_extractor.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/jupyterlab_vre/tests/test_extractor.py b/jupyterlab_vre/tests/test_extractor.py
index 8d5601af2..d755c7184 100644
--- a/jupyterlab_vre/tests/test_extractor.py
+++ b/jupyterlab_vre/tests/test_extractor.py
@@ -27,12 +27,12 @@ def create_cell(payload_path=None):
 
     cell_index = payload['cell_index']
     notebook = nb.reads(json.dumps(payload['notebook']), nb.NO_CONVERT)
+    source = notebook.cells[cell_index].source
     if payload['kernel'] == "IRkernel":
-        extractor = RExtractor(notebook)
+        extractor = RExtractor(notebook, source)
     else:
-        extractor = PyExtractor(notebook)
+        extractor = PyExtractor(notebook, source)
 
-    source = notebook.cells[cell_index].source
     title = source.partition('\n')[0]
     title = title.replace('#', '').replace(
         '_', '-').replace('(', '-').replace(')', '-').replace('.', '-').strip() if title and title[
@@ -53,11 +53,11 @@ def create_cell(payload_path=None):
     # Check if cell is code. If cell is for example markdown we get execution from 'extractor.infere_cell_inputs(
     # source)'
     if notebook.cells[cell_index].cell_type == 'code':
-        ins = extractor.infer_cell_inputs(source)
-        outs = extractor.infer_cell_outputs(source)
+        ins = extractor.infer_cell_inputs()
+        outs = extractor.infer_cell_outputs()
 
-        confs = extractor.extract_cell_conf_ref(source)
-        dependencies = extractor.infer_cell_dependencies(source, confs)
+        confs = extractor.extract_cell_conf_ref()
+        dependencies = extractor.infer_cell_dependencies(confs)
 
     node_id = str(uuid.uuid4())[:7]
     cell = Cell(

From 6eb9ca3f6117769a02ac0290d36320c71ba8f788 Mon Sep 17 00:00:00 2001
From: Gabriel Pelouze <gabriel@pelouze.net>
Date: Mon, 25 Mar 2024 15:39:05 +0100
Subject: [PATCH 3/5] remove notebook_containerizer

---
 .github/dependabot.yml                        |   5 -
 Makefile                                      |   4 -
 docker/vanilla/dev.Dockerfile                 |   5 +-
 jupyterlab_vre/__init__.py                    |   3 -
 .../notebook_containerizer/__init__.py        |   0
 .../notebook_containerizer/handlers.py        | 104 ------------------
 packages/notebook-containerizer/package.json  |  48 --------
 .../src/NotebookContainerizerDialog.tsx       |  62 -----------
 packages/notebook-containerizer/src/Theme.ts  |  16 ---
 packages/notebook-containerizer/src/index.tsx |  77 -------------
 .../notebook-containerizer/style/index.css    |   1 -
 packages/notebook-containerizer/tsconfig.json |  13 ---
 12 files changed, 2 insertions(+), 336 deletions(-)
 delete mode 100644 jupyterlab_vre/notebook_containerizer/__init__.py
 delete mode 100644 jupyterlab_vre/notebook_containerizer/handlers.py
 delete mode 100644 packages/notebook-containerizer/package.json
 delete mode 100644 packages/notebook-containerizer/src/NotebookContainerizerDialog.tsx
 delete mode 100644 packages/notebook-containerizer/src/Theme.ts
 delete mode 100644 packages/notebook-containerizer/src/index.tsx
 delete mode 100644 packages/notebook-containerizer/style/index.css
 delete mode 100644 packages/notebook-containerizer/tsconfig.json

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 62799f60d..549d4ba2b 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -25,11 +25,6 @@ updates:
     schedule:
       interval: "weekly"
 
-  - package-ecosystem: "npm"
-    directory: "/packages/notebook-containerizer"
-    schedule:
-      interval: "weekly"
-
   - package-ecosystem: "npm"
     directory: "/packages/notebook-search"
     schedule:
diff --git a/Makefile b/Makefile
index bfb28abef..190b72162 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,6 @@ install-backend: build-backend
 build-frontend: jlpm-install
 	npx lerna run build --scope @jupyter_vre/chart-customs
 	npx lerna run build --scope @jupyter_vre/core
-	npx lerna run build --scope @jupyter_vre/notebook-containerizer
 	npx lerna run build --scope @jupyter_vre/notebook-search
 	npx lerna run build --scope @jupyter_vre/dataset-search
 	npx lerna run build --scope @jupyter_vre/components
@@ -59,7 +58,6 @@ install-ui:
 	$(call INSTALL_LAB_EXTENSION,notebook-search)
 	$(call INSTALL_LAB_EXTENSION,dataset-search)
 	$(call INSTALL_LAB_EXTENSION,core)
-	$(call INSTALL_LAB_EXTENSION,notebook-containerizer)
 	$(call INSTALL_LAB_EXTENSION,chart-customs)
 	$(call INSTALL_LAB_EXTENSION,components)
 	$(call INSTALL_LAB_EXTENSION,experiment-manager)
@@ -70,7 +68,6 @@ link-ui:
 	$(call LINK_LAB_EXTENSION,notebook-search)
 	$(call LINK_LAB_EXTENSION,dataset-search)
 	$(call LINK_LAB_EXTENSION,core)
-	$(call LINK_LAB_EXTENSION,notebook-containerizer)
 	$(call LINK_LAB_EXTENSION,chart-customs)
 	$(call LINK_LAB_EXTENSION,components)
 	$(call LINK_LAB_EXTENSION,experiment-manager)
@@ -80,7 +77,6 @@ link-ui:
 dist-ui: build-frontend
 	mkdir -p dist
 	$(call PACKAGE_LAB_EXTENSION,core)
-	$(call PACKAGE_LAB_EXTENSION,notebook-containerizer)
 	$(call PACKAGE_LAB_EXTENSION,chart-customs)
 	$(call PACKAGE_LAB_EXTENSION,components)
 	$(call PACKAGE_LAB_EXTENSION,experiment-manager)
diff --git a/docker/vanilla/dev.Dockerfile b/docker/vanilla/dev.Dockerfile
index 81d13818c..02b15ff26 100644
--- a/docker/vanilla/dev.Dockerfile
+++ b/docker/vanilla/dev.Dockerfile
@@ -41,7 +41,6 @@ COPY packages/chart-customs/package.json packages/chart-customs/
 COPY packages/components/package.json packages/components/
 COPY packages/core/package.json packages/core/
 COPY packages/experiment-manager/package.json packages/experiment-manager/
-COPY packages/notebook-containerizer/package.json packages/notebook-containerizer/
 COPY packages/notebook-search/package.json packages/notebook-search/
 COPY packages/vre-menu/package.json packages/vre-menu/
 COPY packages/vre-panel/package.json packages/vre-panel/
@@ -69,11 +68,11 @@ RUN jupyter serverextension enable --py jupyterlab_vre --user
 WORKDIR /live/ts
 COPY --chown=$NB_USER:users packages/ packages/
 COPY --chown=$NB_USER:users tsconfig-base.json .
-RUN extensions="chart-customs core notebook-containerizer notebook-search components experiment-manager vre-panel vre-menu"; \
+RUN extensions="chart-customs core notebook-search components experiment-manager vre-panel vre-menu"; \
     for ext in $extensions; do \
       npx lerna run build --scope "@jupyter_vre/$ext"; \
     done
-RUN extensions="chart-customs core notebook-containerizer notebook-search components experiment-manager vre-panel vre-menu"; \
+RUN extensions="chart-customs core notebook-search components experiment-manager vre-panel vre-menu"; \
     for ext in $extensions; do \
       jupyter labextension link --no-build "packages/$ext"; \
     done
diff --git a/jupyterlab_vre/__init__.py b/jupyterlab_vre/__init__.py
index 2e3ad83e2..d7344a1c8 100644
--- a/jupyterlab_vre/__init__.py
+++ b/jupyterlab_vre/__init__.py
@@ -6,7 +6,6 @@
 from .dataset_search.handlers import DatasetSearchHandler, DatasetDownloadHandler
 from .experiment_manager.handlers import ExportWorkflowHandler, ExecuteWorkflowHandler
 from .handlers import CatalogGetAllHandler
-from .notebook_containerizer.handlers import NotebookExtractorHandler
 from .notebook_search.handlers import NotebookSearchHandler, NotebookSearchRatingHandler, NotebookDownloadHandler, \
     NotebookSeachHistoryHandler, NotebookSourceHandler
 from .registries.handlers import RegistriesHandler
@@ -33,8 +32,6 @@ def load_jupyter_server_extension(lab_app):
         (
         url_path_join(lab_app.web_app.settings['base_url'], r'/vre/notebooksearchrating'), NotebookSearchRatingHandler),
         (url_path_join(lab_app.web_app.settings['base_url'], r'/vre/containerizer/extract'), ExtractorHandler),
-        (
-        url_path_join(lab_app.web_app.settings['base_url'], r'/vre/nbcontainerizer/extract'), NotebookExtractorHandler),
         (url_path_join(lab_app.web_app.settings['base_url'], r'/vre/containerizer/types'), TypesHandler),
         (url_path_join(lab_app.web_app.settings['base_url'], r'/vre/containerizer/baseimage'), BaseImageHandler),
         (url_path_join(lab_app.web_app.settings['base_url'], r'/vre/containerizer/addcell'), CellsHandler),
diff --git a/jupyterlab_vre/notebook_containerizer/__init__.py b/jupyterlab_vre/notebook_containerizer/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/jupyterlab_vre/notebook_containerizer/handlers.py b/jupyterlab_vre/notebook_containerizer/handlers.py
deleted file mode 100644
index b1a871783..000000000
--- a/jupyterlab_vre/notebook_containerizer/handlers.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import copy
-import json
-import logging
-import os
-import uuid
-
-import nbformat as nb
-from notebook.base.handlers import APIHandler
-from tornado import web
-
-from jupyterlab_vre.database.catalog import Catalog
-from jupyterlab_vre.database.cell import Cell
-from jupyterlab_vre.services.converter.converter import ConverterReactFlowChart
-from jupyterlab_vre.services.extractor.pyextractor import PyExtractor
-from jupyterlab_vre.services.extractor.rextractor import RExtractor
-
-
-# TODO: we might have to do something similar here where we have to determine the kernel and based on that get the extractor
-
-class NotebookExtractorHandler(APIHandler, Catalog):
-
-    @web.authenticated
-    async def get(self):
-        msg_json = dict(title='Operation not supported.')
-        self.write(msg_json)
-        self.flush()
-
-    @web.authenticated
-    async def post(self, *args, **kwargs):
-
-        payload = self.get_json_body()
-        logging.getLogger(__name__).debug('NotebookExtractorHandler. payload: ' + json.dumps(payload, indent=4))
-        print('----------------------------------------------')
-        print('NotebookExtractorHandler. payload: ' + json.dumps(payload, indent=4))
-        print('----------------------------------------------')
-        notebook = nb.reads(json.dumps(payload['notebook']), nb.NO_CONVERT)
-        kernel = payload['kernel']
-        if kernel == "IRkernel":
-            extractor = RExtractor(notebook)
-        else:
-            extractor = PyExtractor(notebook)
-        source = ''
-        params = set()
-        confs = set()
-        ins = dict()
-        outs = extractor.infer_cell_outputs(notebook.cells[len(notebook.cells) - 1].source)
-        title = ''
-        for cell_source in extractor.sources:
-            p = extractor.extract_cell_params(cell_source)
-            params.update(p)
-            c = extractor.extract_cell_conf_ref(source)
-            confs.update(c)
-            source += cell_source + '\n'
-
-            if not title:
-                title = cell_source.partition('\n')[0].strip()
-                title = 'notebook-' + title.replace('#', '').replace('_', '-').replace('(', '-').replace(')', '-'). \
-                    replace('.', '-').strip() if title[0] == '#' \
-                    else 'Untitled'
-                if 'JUPYTERHUB_USER' in os.environ:
-                    title += '-' + os.environ['JUPYTERHUB_USER'].replace('_', '-').replace('(', '-').replace(')',
-                                                                                                             '-').replace(
-                        '.', '-').replace('@',
-                                          '-at-').strip()
-        dependencies = extractor.infer_cell_dependencies(source, confs)
-
-        node_id = str(uuid.uuid4())[:7]
-        cell = Cell(
-            node_id=node_id,
-            title=title,
-            task_name=title.lower().replace(' ', '-').replace('.', '-'),
-            original_source=source,
-            inputs=ins,
-            outputs=outs,
-            params=list(params),
-            confs=list(confs),
-            dependencies=list(dependencies),
-            container_source=''
-        )
-        cell.integrate_configuration()
-        node = ConverterReactFlowChart.get_node(
-            node_id,
-            title,
-            set(ins),
-            set(outs),
-            params,
-        )
-
-        chart = {
-            'offset': {
-                'x': 0,
-                'y': 0,
-            },
-            'scale': 1,
-            'nodes': {node_id: node},
-            'links': {},
-            'selected': {},
-            'hovered': {},
-        }
-        cell.chart_obj = chart
-        Catalog.editor_buffer = copy.deepcopy(cell)
-        logging.getLogger(__name__).debug('NotebookExtractorHandler. cell: ' + str(cell.toJSON()))
-        self.write(cell.toJSON())
-        self.flush()
diff --git a/packages/notebook-containerizer/package.json b/packages/notebook-containerizer/package.json
deleted file mode 100644
index b99c23816..000000000
--- a/packages/notebook-containerizer/package.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
-    "name": "@jupyter_vre/notebook-containerizer",
-    "version": "1.0.0",
-    "description": "NaaVRE Notebook Containerizer",
-    "keywords": [
-        "jupyter",
-        "jupyterlab",
-        "jupyterlab-extension"
-    ],
-    "homepage": "https://github.com/QCDIS/NaaVRE",
-    "bugs": {
-        "url": "https://github.com/QCDIS/NaaVRE/issues"
-    },
-    "license": "BSD-3-Clause",
-    "author": "Riccardo Bianchi",
-    "files": [
-        "lib/**/*.{d.ts,eot,gif,html,jpg,js,js.map,json,png,svg,woff2,ttf}",
-        "src/**/*.{ts,tsx}",
-        "style/**/*.{css,eot,gif,html,jpg,json,png,svg,woff2,ttf}"
-    ],
-    "main": "lib/index.js",
-    "types": "lib/index.d.ts",
-    "style": "style/index.css",
-    "repository": {
-        "type": "git",
-        "url": "https://github.com/QCDIS/NaaVRE.git"
-    },
-    "scripts": {
-        "build": "tsc",
-        "dist": "yarn pack",
-        "clean": "rimraf lib",
-        "watch": "tsc -w"
-    },
-    "dependencies": {
-        "@jupyterlab/notebook": "3.5.3"
-    },
-    "devDependencies": {
-        "@types/node": "20.2.5",
-        "rimraf": "5.0.0",
-        "typescript": "5.0.2"
-    },
-    "publishConfig": {
-        "access": "public"
-    },
-    "jupyterlab": {
-        "extension": true
-    }
-}
\ No newline at end of file
diff --git a/packages/notebook-containerizer/src/NotebookContainerizerDialog.tsx b/packages/notebook-containerizer/src/NotebookContainerizerDialog.tsx
deleted file mode 100644
index cd4e4abf3..000000000
--- a/packages/notebook-containerizer/src/NotebookContainerizerDialog.tsx
+++ /dev/null
@@ -1,62 +0,0 @@
-import { INotebookModel, INotebookTracker } from '@jupyterlab/notebook';
-import { styled, ThemeProvider } from '@material-ui/core';
-import * as React from 'react';
-import { theme } from './Theme';
-import { requestAPI } from '@jupyter_vre/core';
-
-interface IState {
-}
-
-export const DefaultState: IState = {
-}
-
-const CatalogBody = styled('div')({
-    display: 'flex',
-    overflow: 'hidden',
-    flexDirection: 'row',
-})
-
-interface NotebookContainerizerDialogProps {
-
-    notebookTracker: INotebookTracker
-}
-
-export class NotebookContainerizerDialog extends React.Component<NotebookContainerizerDialogProps> {
-
-    state = DefaultState
-
-    constructor(props: NotebookContainerizerDialogProps) {
-        super(props);
-    }
-
-    exctractor = async (notebookModel: INotebookModel, _save = false) => {
-        try {
-            const resp = await requestAPI<any>('nbcontainerizer/extract', {
-                body: JSON.stringify({
-                    notebook: notebookModel.toJSON()
-                }),
-                method: 'POST'
-            });
-    
-            console.log(resp);
-            
-        } catch (error) {
-            console.log(error);
-        }
-    }
-
-    componentDidMount(): void {
-        
-        this.exctractor(this.props.notebookTracker.currentWidget.model);
-    }
-
-    render(): React.ReactElement {
-        return (
-            <ThemeProvider theme={theme}>
-                <p className='section-header'>Containerize Notebook</p>
-                <CatalogBody>
-                </CatalogBody>
-            </ThemeProvider>
-        )
-    }
-}
\ No newline at end of file
diff --git a/packages/notebook-containerizer/src/Theme.ts b/packages/notebook-containerizer/src/Theme.ts
deleted file mode 100644
index 5a98d6fdf..000000000
--- a/packages/notebook-containerizer/src/Theme.ts
+++ /dev/null
@@ -1,16 +0,0 @@
-import { createTheme } from '@material-ui/core';
-
-export const theme = createTheme({
-  palette: {
-    secondary: {
-      main: '#ea5b2d',
-      dark: '#b12800',
-      light: '#ff8c5a',
-    },
-    primary: {
-      main: '#0f4e8a',
-      dark: '#00275c',
-      light: '#4e79ba',
-    },
-  },
-});
diff --git a/packages/notebook-containerizer/src/index.tsx b/packages/notebook-containerizer/src/index.tsx
deleted file mode 100644
index 3e36ebacc..000000000
--- a/packages/notebook-containerizer/src/index.tsx
+++ /dev/null
@@ -1,77 +0,0 @@
-import {
-    JupyterFrontEnd,
-    JupyterFrontEndPlugin
-} from '@jupyterlab/application';
-import { ToolbarButton, ReactWidget, Dialog, showDialog } from '@jupyterlab/apputils';
-import { DocumentRegistry } from '@jupyterlab/docregistry';
-import {
-    INotebookModel, INotebookTracker, NotebookPanel
-} from '@jupyterlab/notebook';
-import { DisposableDelegate, IDisposable } from '@lumino/disposable';
-import * as React from 'react';
-import { NotebookContainerizerDialog } from './NotebookContainerizerDialog';
-
-/**
- * The plugin registration information.
- */
-const plugin: JupyterFrontEndPlugin<void> = {
-    activate,
-    id: 'toolbar-containerize-notebook',
-    autoStart: true,
-    requires: [INotebookTracker]
-};
-
-export class NotebookSearchExtension implements DocumentRegistry.IWidgetExtension<NotebookPanel, INotebookModel> {
-
-    notebookTracker: INotebookTracker
-
-    constructor(notebookTracker: INotebookTracker) {
-        this.notebookTracker = notebookTracker;
-    }
-
-    createNew(
-        panel: NotebookPanel,
-        _context: DocumentRegistry.IContext<INotebookModel>
-    ): IDisposable {
-
-        const containerizeNotebook = () => {
-
-            const catalogOptions: Partial<Dialog.IOptions<any>> = {
-                title: '',
-                body: ReactWidget.create(
-                    <NotebookContainerizerDialog notebookTracker={this.notebookTracker}/>
-                ) as Dialog.IBodyWidget<any>,
-                buttons: []
-            };
-        
-
-            showDialog(catalogOptions);
-        };
-
-        const button = new ToolbarButton({
-            className: 'notebook-containerizer',
-            label: 'Notebooks Containerizer',
-            onClick: containerizeNotebook,
-            tooltip: 'Notebooks Containerizer',
-        });
-
-        panel.toolbar.insertItem(10, 'containerizeNotebooks', button);
-        return new DisposableDelegate(() => {
-            button.dispose();
-        });
-    }
-}
-
-/**
- * Activate the extension.
- *
- * @param app Main application object
- */
-function activate(app: JupyterFrontEnd, notebookTracker: INotebookTracker): void {
-    app.docRegistry.addWidgetExtension('Notebook', new NotebookSearchExtension(notebookTracker));
-}
-
-/**
- * Export the plugin as default.
- */
-export default plugin;
\ No newline at end of file
diff --git a/packages/notebook-containerizer/style/index.css b/packages/notebook-containerizer/style/index.css
deleted file mode 100644
index 6e02bdfae..000000000
--- a/packages/notebook-containerizer/style/index.css
+++ /dev/null
@@ -1 +0,0 @@
-/* Fill */
\ No newline at end of file
diff --git a/packages/notebook-containerizer/tsconfig.json b/packages/notebook-containerizer/tsconfig.json
deleted file mode 100644
index 5df522e9b..000000000
--- a/packages/notebook-containerizer/tsconfig.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "extends": "../../tsconfig-base",
-  "compilerOptions": {
-    "module": "commonjs",
-    "outDir": "lib",
-    "rootDir": "src",
-    "target": "es6",
-    "types": ["node"],
-    "typeRoots": ["./node_modules/@types"],
-    "skipLibCheck": true
-  },
-  "include": ["src/*", "src/*/*"]
-}
\ No newline at end of file

From 4c9ff69a925cdf8e88744286f50710cf85e97fe3 Mon Sep 17 00:00:00 2001
From: Gabriel Pelouze <gabriel@pelouze.net>
Date: Mon, 25 Mar 2024 16:41:25 +0100
Subject: [PATCH 4/5] enable partial override for code analyzer

---
 .../component_containerizer/handlers.py       | 10 ++--
 .../services/extractor/headerextractor.py     | 56 ++++++++++++++++---
 2 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/jupyterlab_vre/component_containerizer/handlers.py b/jupyterlab_vre/component_containerizer/handlers.py
index 27e3757b2..904d38d7c 100644
--- a/jupyterlab_vre/component_containerizer/handlers.py
+++ b/jupyterlab_vre/component_containerizer/handlers.py
@@ -144,12 +144,14 @@ async def post(self, *args, **kwargs):
                 self.flush()
                 return
 
-            # extractor based on the kernel (if cell header is not defined)
-            if not extractor.enabled():
+            # Extractor based on code analysis. Used if the cell has no header,
+            # or if some values are not specified in the header
+            if not extractor.is_complete():
                 if kernel == "IRkernel":
-                    extractor = RExtractor(notebook, source)
+                    code_extractor = RExtractor(notebook, source)
                 else:
-                    extractor = PyExtractor(notebook, source)
+                    code_extractor = PyExtractor(notebook, source)
+                extractor.add_missing_values(code_extractor)
 
         extracted_nb = extract_cell_by_index(notebook, cell_index)
         if kernel == "IRkernel":
diff --git a/jupyterlab_vre/services/extractor/headerextractor.py b/jupyterlab_vre/services/extractor/headerextractor.py
index 9e22ac362..38429c800 100644
--- a/jupyterlab_vre/services/extractor/headerextractor.py
+++ b/jupyterlab_vre/services/extractor/headerextractor.py
@@ -41,6 +41,11 @@ class HeaderExtractor(Extractor):
     The document is validated with the schema `cell_header.schema.json`
 
     """
+    ins: Union[dict, None]
+    outs: Union[dict, None]
+    params: Union[dict, None]
+    confs: Union[list, None]
+    dependencies: Union[list, None]
 
     def __init__(self, notebook, cell_source):
         self.re_yaml_doc_in_comment = re.compile(
@@ -52,6 +57,7 @@ def __init__(self, notebook, cell_source):
             re.MULTILINE)
         self.schema = self._load_schema()
         self.cell_header = self._extract_header(cell_source)
+        self._external_extract_cell_params = None
 
         super().__init__(notebook, cell_source)
 
@@ -67,6 +73,15 @@ def _load_schema():
     def enabled(self):
         return self.cell_header is not None
 
+    def is_complete(self):
+        return (
+                (self.ins is not None)
+                and (self.outs is not None)
+                and (self.params is not None)
+                and (self.confs is not None)
+                and (self.dependencies is not None)
+            )
+
     def _extract_header(self, cell_source):
         # get yaml document from cell comments
         m = self.re_yaml_doc_in_comment.match(cell_source)
@@ -88,6 +103,25 @@ def _extract_header(self, cell_source):
             raise e
         return header
 
+    def add_missing_values(self, extractor: Extractor):
+        """ Add values not specified in the header from another extractor
+        (e.g. PyExtractor or RExtractor)
+        """
+        if self.ins is None:
+            self.ins = extractor.ins
+        if self.outs is None:
+            self.outs = extractor.outs
+        if self.params is None:
+            self.params = extractor.params
+            # We store a reference to extractor.extract_cell_params because
+            # self.extract_cell_params is called after self.add_missing_values
+            # in component_containerizer.handlers.ExtractorHandler.post()
+            self._external_extract_cell_params = extractor.extract_cell_params
+        if self.confs is None:
+            self.confs = extractor.confs
+        if self.dependencies is None:
+            self.dependencies = extractor.dependencies
+
     @staticmethod
     def _parse_inputs_outputs_param_items(
             item: Union[str, dict],
@@ -158,10 +192,12 @@ def _infer_cell_inputs_outputs_params(
             self,
             header: Union[dict, None],
             item_type: Literal['inputs', 'outputs', 'params'],
-            ) -> dict:
+            ) -> Union[dict, None]:
         if header is None:
-            return {}
-        items = header['NaaVRE']['cell'].get(item_type, [])
+            return None
+        items = header['NaaVRE']['cell'].get(item_type)
+        if items is None:
+            return None
         items = [self._parse_inputs_outputs_param_items(it, item_type)
                  for it in items]
         return {it['name']: it for it in items}
@@ -179,6 +215,8 @@ def infer_cell_outputs(self):
             )
 
     def extract_cell_params(self, source):
+        if self._external_extract_cell_params is not None:
+            return self._external_extract_cell_params(source)
         return self._infer_cell_inputs_outputs_params(
             self._extract_header(source),
             'params',
@@ -186,14 +224,18 @@ def extract_cell_params(self, source):
 
     def extract_cell_conf_ref(self):
         if self.cell_header is None:
-            return {}
-        items = self.cell_header['NaaVRE']['cell'].get('confs', [])
+            return None
+        items = self.cell_header['NaaVRE']['cell'].get('confs')
+        if items is None:
+            return None
         return {k: v['assignation'] for it in items for k, v in it.items()}
 
     def infer_cell_dependencies(self, confs):
         if self.cell_header is None:
-            return []
-        items = self.cell_header['NaaVRE']['cell'].get('dependencies', [])
+            return None
+        items = self.cell_header['NaaVRE']['cell'].get('dependencies')
+        if items is None:
+            return None
         return [
             {
                 'name': it.get('name'),

From 027ccba6cb62d376f49aabc4252d11bc583b5017 Mon Sep 17 00:00:00 2001
From: Gabriel Pelouze <gabriel@pelouze.net>
Date: Mon, 25 Mar 2024 16:56:31 +0100
Subject: [PATCH 5/5] remove deps from cell identity

should have followed 5166a15d3a5b1328a70a4566ef54e48be4d6e36c in PR #1257
---
 jupyterlab_vre/component_containerizer/handlers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/jupyterlab_vre/component_containerizer/handlers.py b/jupyterlab_vre/component_containerizer/handlers.py
index 904d38d7c..fd3936207 100644
--- a/jupyterlab_vre/component_containerizer/handlers.py
+++ b/jupyterlab_vre/component_containerizer/handlers.py
@@ -173,7 +173,6 @@ async def post(self, *args, **kwargs):
             'params': extractor.params,
             'inputs': extractor.ins,
             'outputs': extractor.outs,
-            'deps': sorted(extractor.dependencies, key=lambda x: x['name']),
             }
         cell_identity_str = json.dumps(cell_identity_dict, sort_keys=True)
         node_id = hashlib.sha1(cell_identity_str.encode()).hexdigest()[:7]