From 495cd4f5aac9dd0b7eb57a2d8be653f3c6c1293d Mon Sep 17 00:00:00 2001 From: Gabriel Pelouze Date: Mon, 18 Mar 2024 11:51:43 +0100 Subject: [PATCH 1/5] add Extractor base class to prepare combining extractors --- .../component_containerizer/handlers.py | 87 ++++++++----------- .../services/extractor/extractor.py | 56 ++++++++++++ .../services/extractor/headerextractor.py | 49 +++++++---- .../services/extractor/pyextractor.py | 26 +++--- .../services/extractor/rextractor.py | 25 +++--- 5 files changed, 155 insertions(+), 88 deletions(-) create mode 100644 jupyterlab_vre/services/extractor/extractor.py diff --git a/jupyterlab_vre/component_containerizer/handlers.py b/jupyterlab_vre/component_containerizer/handlers.py index 876974ae7..cd055aaa8 100644 --- a/jupyterlab_vre/component_containerizer/handlers.py +++ b/jupyterlab_vre/component_containerizer/handlers.py @@ -29,6 +29,7 @@ from jupyterlab_vre.database.cell import Cell from jupyterlab_vre.services.containerizer.Rcontainerizer import Rcontainerizer from jupyterlab_vre.services.converter.converter import ConverterReactFlowChart +from jupyterlab_vre.services.extractor.extractor import DummyExtractor from jupyterlab_vre.services.extractor.pyextractor import PyExtractor from jupyterlab_vre.services.extractor.rextractor import RExtractor from jupyterlab_vre.services.extractor.headerextractor import HeaderExtractor @@ -124,27 +125,31 @@ async def post(self, *args, **kwargs): source = notebook.cells[cell_index].source - # extractor based on the cell header - try: - extractor = HeaderExtractor(notebook, source) - except jsonschema.ValidationError as e: - self.set_status(400, f"Invalid cell header") - self.write( - { - 'message': f"Error in cell header: {e}", - 'reason': None, - 'traceback': traceback.format_exception(e), - } - ) - self.flush() - return + if notebook.cells[cell_index].cell_type != 'code': + # dummy extractor for non-code cells (e.g. markdown) + extractor = DummyExtractor(notebook, source) + else: + # extractor based on the cell header + try: + extractor = HeaderExtractor(notebook, source) + except jsonschema.ValidationError as e: + self.set_status(400, f"Invalid cell header") + self.write( + { + 'message': f"Error in cell header: {e}", + 'reason': None, + 'traceback': traceback.format_exception(e), + } + ) + self.flush() + return - # extractor based on the kernel (if cell header is not defined) - if not extractor.enabled(): - if kernel == "IRkernel": - extractor = RExtractor(notebook) - else: - extractor = PyExtractor(notebook) + # extractor based on the kernel (if cell header is not defined) + if not extractor.enabled(): + if kernel == "IRkernel": + extractor = RExtractor(notebook, source) + else: + extractor = PyExtractor(notebook, source) extracted_nb = extract_cell_by_index(notebook, cell_index) if kernel == "IRkernel": @@ -162,49 +167,33 @@ async def post(self, *args, **kwargs): '.', '-').replace('@', '-at-').strip() - ins = {} - outs = {} - params = {} - confs = [] - dependencies = [] - - # Check if cell is code. If cell is for example markdown we get execution from 'extractor.infer_cell_inputs( - # source)' - if notebook.cells[cell_index].cell_type == 'code': - ins = extractor.infer_cell_inputs(source) - outs = extractor.infer_cell_outputs(source) - - confs = extractor.extract_cell_conf_ref(source) - dependencies = extractor.infer_cell_dependencies(source, confs) - node_id = str(uuid.uuid4())[:7] cell = Cell( node_id=node_id, title=title, task_name=title.lower().replace(' ', '-').replace('.', '-'), original_source=source, - inputs=ins, - outputs=outs, - params=params, - confs=confs, - dependencies=dependencies, + inputs=extractor.ins, + outputs=extractor.outs, + params={}, + confs=extractor.confs, + dependencies=extractor.dependencies, container_source="", kernel=kernel, notebook_dict=extracted_nb.dict() ) - if notebook.cells[cell_index].cell_type == 'code': - cell.integrate_configuration() - params = extractor.extract_cell_params(cell.original_source) - cell.add_params(params) - cell.add_param_values(params) + cell.integrate_configuration() + extractor.params = extractor.extract_cell_params(cell.original_source) + cell.add_params(extractor.params) + cell.add_param_values(extractor.params) node = ConverterReactFlowChart.get_node( node_id, title, - set(ins), - set(outs), - params, - dependencies + set(extractor.ins), + set(extractor.outs), + extractor.params, + extractor.dependencies ) chart = { diff --git a/jupyterlab_vre/services/extractor/extractor.py b/jupyterlab_vre/services/extractor/extractor.py new file mode 100644 index 000000000..dcd3fb867 --- /dev/null +++ b/jupyterlab_vre/services/extractor/extractor.py @@ -0,0 +1,56 @@ +import abc + + +class Extractor(abc.ABC): + ins: dict + outs: dict + params: dict + confs: list + dependencies: list + + def __init__(self, notebook, cell_source): + self.notebook = notebook + self.cell_source = cell_source + + self.ins = self.infer_cell_inputs() + self.outs = self.infer_cell_outputs() + self.params = self.extract_cell_params(cell_source) + self.confs = self.extract_cell_conf_ref() + self.dependencies = self.infer_cell_dependencies(self.confs) + + @abc.abstractmethod + def infer_cell_inputs(self): + pass + + @abc.abstractmethod + def infer_cell_outputs(self): + pass + + @abc.abstractmethod + def extract_cell_params(self, source): + pass + + @abc.abstractmethod + def extract_cell_conf_ref(self): + pass + + @abc.abstractmethod + def infer_cell_dependencies(self, confs): + pass + + +class DummyExtractor(Extractor): + def infer_cell_inputs(self): + return {} + + def infer_cell_outputs(self): + return {} + + def extract_cell_params(self, source): + return {} + + def extract_cell_conf_ref(self): + return [] + + def infer_cell_dependencies(self, confs): + return [] diff --git a/jupyterlab_vre/services/extractor/headerextractor.py b/jupyterlab_vre/services/extractor/headerextractor.py index 753c85409..381f8c569 100644 --- a/jupyterlab_vre/services/extractor/headerextractor.py +++ b/jupyterlab_vre/services/extractor/headerextractor.py @@ -7,8 +7,10 @@ import logging import yaml +from .extractor import Extractor -class HeaderExtractor: + +class HeaderExtractor(Extractor): """ Extracts cells using information defined by the user in its header Cells should contain a comment with a yaml block defining inputs, outputs, @@ -49,11 +51,10 @@ def __init__(self, notebook, cell_source): ), re.MULTILINE) self.schema = self._load_schema() - - self.notebook = notebook - self.cell_source = cell_source self.cell_header = self._extract_header(cell_source) + super().__init__(notebook, cell_source) + @staticmethod def _load_schema(): filename = os.path.join( @@ -155,32 +156,44 @@ def _parse_inputs_outputs_param_items( def _infer_cell_inputs_outputs_params( self, - source, + header: Union[dict, None], item_type: Literal['inputs', 'outputs', 'params'], ) -> dict: - header = self._extract_header(source) + if header is None: + return {} items = header['NaaVRE']['cell'].get(item_type, []) items = [self._parse_inputs_outputs_param_items(it, item_type) for it in items] return {it['name']: it for it in items} - def infer_cell_inputs(self, source): - return self._infer_cell_inputs_outputs_params(source, 'inputs') + def infer_cell_inputs(self): + return self._infer_cell_inputs_outputs_params( + self.cell_header, + 'inputs', + ) - def infer_cell_outputs(self, source): - return self._infer_cell_inputs_outputs_params(source, 'outputs') + def infer_cell_outputs(self): + return self._infer_cell_inputs_outputs_params( + self.cell_header, + 'outputs', + ) def extract_cell_params(self, source): - return self._infer_cell_inputs_outputs_params(source, 'params') - - def extract_cell_conf_ref(self, source): - header = self._extract_header(source) - items = header['NaaVRE']['cell'].get('confs', []) + return self._infer_cell_inputs_outputs_params( + self._extract_header(source), + 'params', + ) + + def extract_cell_conf_ref(self): + if self.cell_header is None: + return {} + items = self.cell_header['NaaVRE']['cell'].get('confs', []) return {k: v['assignation'] for it in items for k, v in it.items()} - def infer_cell_dependencies(self, source, confs): - header = self._extract_header(source) - items = header['NaaVRE']['cell'].get('dependencies', []) + def infer_cell_dependencies(self, confs): + if self.cell_header is None: + return [] + items = self.cell_header['NaaVRE']['cell'].get('dependencies', []) return [ { 'name': it.get('name'), diff --git a/jupyterlab_vre/services/extractor/pyextractor.py b/jupyterlab_vre/services/extractor/pyextractor.py index d08bd4636..dae2978a0 100644 --- a/jupyterlab_vre/services/extractor/pyextractor.py +++ b/jupyterlab_vre/services/extractor/pyextractor.py @@ -8,15 +8,17 @@ from pytype import config as pytype_config from pytype.tools.annotate_ast import annotate_ast +from .extractor import Extractor -class PyExtractor: + +class PyExtractor(Extractor): sources: list imports: dict configurations: dict global_params: dict undefined: dict - def __init__(self, notebook): + def __init__(self, notebook, cell_source): # If cell_type is code and not starting with '!' self.sources = [nbcell.source for nbcell in notebook.cells if nbcell.cell_type == 'code' and len(nbcell.source) > 0 and nbcell.source[0] != '!'] @@ -31,6 +33,8 @@ def __init__(self, notebook): for source in self.sources: self.undefined.update(self.__extract_cell_undefined(source)) + super().__init__(notebook, cell_source) + def __extract_imports(self, sources): imports = {} for s in sources: @@ -92,20 +96,20 @@ def __extract_params(self, sources): } return params - def infer_cell_outputs(self, cell_source): - cell_names = self.__extract_cell_names(cell_source) + def infer_cell_outputs(self): + cell_names = self.__extract_cell_names(self.cell_source) return { name: properties for name, properties in cell_names.items() - if name not in self.__extract_cell_undefined(cell_source) + if name not in self.__extract_cell_undefined(self.cell_source) and name not in self.imports and name in self.undefined and name not in self.configurations and name not in self.global_params } - def infer_cell_inputs(self, cell_source): - cell_undefined = self.__extract_cell_undefined(cell_source) + def infer_cell_inputs(self): + cell_undefined = self.__extract_cell_undefined(self.cell_source) return { und: properties for und, properties in cell_undefined.items() @@ -114,9 +118,9 @@ def infer_cell_inputs(self, cell_source): and und not in self.global_params } - def infer_cell_dependencies(self, cell_source, confs): + def infer_cell_dependencies(self, confs): dependencies = [] - names = self.__extract_cell_names(cell_source) + names = self.__extract_cell_names(self.cell_source) for ck in confs: names.update(self.__extract_cell_names(confs[ck])) @@ -236,9 +240,9 @@ def extract_cell_params(self, cell_source): params[u] = self.global_params[u] return params - def extract_cell_conf_ref(self, cell_source): + def extract_cell_conf_ref(self): confs = {} - cell_unds = self.__extract_cell_undefined(cell_source) + cell_unds = self.__extract_cell_undefined(self.cell_source) conf_unds = [und for und in cell_unds if und in self.configurations] for u in conf_unds: if u not in confs: diff --git a/jupyterlab_vre/services/extractor/rextractor.py b/jupyterlab_vre/services/extractor/rextractor.py index 027d932d4..228074c3f 100644 --- a/jupyterlab_vre/services/extractor/rextractor.py +++ b/jupyterlab_vre/services/extractor/rextractor.py @@ -7,6 +7,9 @@ import rpy2.robjects.packages as rpackages from rpy2.robjects.packages import importr +from .extractor import Extractor + + # Create an R environment r_env = robjects.globalenv @@ -99,14 +102,14 @@ # TODO: create an interface such that it can be easily extended to other kernels -class RExtractor: +class RExtractor(Extractor): sources: list imports: dict configurations: dict global_params: dict undefined: dict - def __init__(self, notebook): + def __init__(self, notebook, cell_source): self.sources = [nbcell.source for nbcell in notebook.cells if nbcell.cell_type == 'code' and len(nbcell.source) > 0] @@ -117,6 +120,8 @@ def __init__(self, notebook): for source in self.sources: self.undefined.update(self.__extract_cell_undefined(source)) + super().__init__(notebook, cell_source) + def __extract_imports(self, sources): imports = {} for s in sources: @@ -213,20 +218,20 @@ def __extract_params(self, sources): # check source https://adv-r.hadley.nz/exp } return params - def infer_cell_outputs(self, cell_source): - cell_names = self.__extract_cell_names(cell_source) + def infer_cell_outputs(self): + cell_names = self.__extract_cell_names(self.cell_source) return { name: properties for name, properties in cell_names.items() - if name not in self.__extract_cell_undefined(cell_source) + if name not in self.__extract_cell_undefined(self.cell_source) and name not in self.imports and name in self.undefined and name not in self.configurations and name not in self.global_params } - def infer_cell_inputs(self, cell_source): - cell_undefined = self.__extract_cell_undefined(cell_source) + def infer_cell_inputs(self): + cell_undefined = self.__extract_cell_undefined(self.cell_source) return { und: properties for und, properties in cell_undefined.items() @@ -235,7 +240,7 @@ def infer_cell_inputs(self, cell_source): and und not in self.global_params } - def infer_cell_dependencies(self, cell_source, confs): + def infer_cell_dependencies(self, confs): # TODO: check this code, you have removed logic. # we probably like to only use dependencies that are necessary to execute the cell # however this is challenging in R as functions are non-scoped @@ -381,9 +386,9 @@ def extract_cell_params(self, cell_source): params[u] = self.global_params[u] return params - def extract_cell_conf_ref(self, cell_source): + def extract_cell_conf_ref(self): confs = {} - cell_unds = self.__extract_cell_undefined(cell_source) + cell_unds = self.__extract_cell_undefined(self.cell_source) conf_unds = [und for und in cell_unds if und in self.configurations] for u in conf_unds: if u not in confs: From f9ef870241efaea1ea4c6d665f14342bd23ad961 Mon Sep 17 00:00:00 2001 From: Gabriel Pelouze Date: Wed, 20 Mar 2024 14:45:41 +0100 Subject: [PATCH 2/5] fix test_extractor --- jupyterlab_vre/tests/test_extractor.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/jupyterlab_vre/tests/test_extractor.py b/jupyterlab_vre/tests/test_extractor.py index 8d5601af2..d755c7184 100644 --- a/jupyterlab_vre/tests/test_extractor.py +++ b/jupyterlab_vre/tests/test_extractor.py @@ -27,12 +27,12 @@ def create_cell(payload_path=None): cell_index = payload['cell_index'] notebook = nb.reads(json.dumps(payload['notebook']), nb.NO_CONVERT) + source = notebook.cells[cell_index].source if payload['kernel'] == "IRkernel": - extractor = RExtractor(notebook) + extractor = RExtractor(notebook, source) else: - extractor = PyExtractor(notebook) + extractor = PyExtractor(notebook, source) - source = notebook.cells[cell_index].source title = source.partition('\n')[0] title = title.replace('#', '').replace( '_', '-').replace('(', '-').replace(')', '-').replace('.', '-').strip() if title and title[ @@ -53,11 +53,11 @@ def create_cell(payload_path=None): # Check if cell is code. If cell is for example markdown we get execution from 'extractor.infere_cell_inputs( # source)' if notebook.cells[cell_index].cell_type == 'code': - ins = extractor.infer_cell_inputs(source) - outs = extractor.infer_cell_outputs(source) + ins = extractor.infer_cell_inputs() + outs = extractor.infer_cell_outputs() - confs = extractor.extract_cell_conf_ref(source) - dependencies = extractor.infer_cell_dependencies(source, confs) + confs = extractor.extract_cell_conf_ref() + dependencies = extractor.infer_cell_dependencies(confs) node_id = str(uuid.uuid4())[:7] cell = Cell( From 6eb9ca3f6117769a02ac0290d36320c71ba8f788 Mon Sep 17 00:00:00 2001 From: Gabriel Pelouze Date: Mon, 25 Mar 2024 15:39:05 +0100 Subject: [PATCH 3/5] remove notebook_containerizer --- .github/dependabot.yml | 5 - Makefile | 4 - docker/vanilla/dev.Dockerfile | 5 +- jupyterlab_vre/__init__.py | 3 - .../notebook_containerizer/__init__.py | 0 .../notebook_containerizer/handlers.py | 104 ------------------ packages/notebook-containerizer/package.json | 48 -------- .../src/NotebookContainerizerDialog.tsx | 62 ----------- packages/notebook-containerizer/src/Theme.ts | 16 --- packages/notebook-containerizer/src/index.tsx | 77 ------------- .../notebook-containerizer/style/index.css | 1 - packages/notebook-containerizer/tsconfig.json | 13 --- 12 files changed, 2 insertions(+), 336 deletions(-) delete mode 100644 jupyterlab_vre/notebook_containerizer/__init__.py delete mode 100644 jupyterlab_vre/notebook_containerizer/handlers.py delete mode 100644 packages/notebook-containerizer/package.json delete mode 100644 packages/notebook-containerizer/src/NotebookContainerizerDialog.tsx delete mode 100644 packages/notebook-containerizer/src/Theme.ts delete mode 100644 packages/notebook-containerizer/src/index.tsx delete mode 100644 packages/notebook-containerizer/style/index.css delete mode 100644 packages/notebook-containerizer/tsconfig.json diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 62799f60d..549d4ba2b 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -25,11 +25,6 @@ updates: schedule: interval: "weekly" - - package-ecosystem: "npm" - directory: "/packages/notebook-containerizer" - schedule: - interval: "weekly" - - package-ecosystem: "npm" directory: "/packages/notebook-search" schedule: diff --git a/Makefile b/Makefile index bfb28abef..190b72162 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,6 @@ install-backend: build-backend build-frontend: jlpm-install npx lerna run build --scope @jupyter_vre/chart-customs npx lerna run build --scope @jupyter_vre/core - npx lerna run build --scope @jupyter_vre/notebook-containerizer npx lerna run build --scope @jupyter_vre/notebook-search npx lerna run build --scope @jupyter_vre/dataset-search npx lerna run build --scope @jupyter_vre/components @@ -59,7 +58,6 @@ install-ui: $(call INSTALL_LAB_EXTENSION,notebook-search) $(call INSTALL_LAB_EXTENSION,dataset-search) $(call INSTALL_LAB_EXTENSION,core) - $(call INSTALL_LAB_EXTENSION,notebook-containerizer) $(call INSTALL_LAB_EXTENSION,chart-customs) $(call INSTALL_LAB_EXTENSION,components) $(call INSTALL_LAB_EXTENSION,experiment-manager) @@ -70,7 +68,6 @@ link-ui: $(call LINK_LAB_EXTENSION,notebook-search) $(call LINK_LAB_EXTENSION,dataset-search) $(call LINK_LAB_EXTENSION,core) - $(call LINK_LAB_EXTENSION,notebook-containerizer) $(call LINK_LAB_EXTENSION,chart-customs) $(call LINK_LAB_EXTENSION,components) $(call LINK_LAB_EXTENSION,experiment-manager) @@ -80,7 +77,6 @@ link-ui: dist-ui: build-frontend mkdir -p dist $(call PACKAGE_LAB_EXTENSION,core) - $(call PACKAGE_LAB_EXTENSION,notebook-containerizer) $(call PACKAGE_LAB_EXTENSION,chart-customs) $(call PACKAGE_LAB_EXTENSION,components) $(call PACKAGE_LAB_EXTENSION,experiment-manager) diff --git a/docker/vanilla/dev.Dockerfile b/docker/vanilla/dev.Dockerfile index 81d13818c..02b15ff26 100644 --- a/docker/vanilla/dev.Dockerfile +++ b/docker/vanilla/dev.Dockerfile @@ -41,7 +41,6 @@ COPY packages/chart-customs/package.json packages/chart-customs/ COPY packages/components/package.json packages/components/ COPY packages/core/package.json packages/core/ COPY packages/experiment-manager/package.json packages/experiment-manager/ -COPY packages/notebook-containerizer/package.json packages/notebook-containerizer/ COPY packages/notebook-search/package.json packages/notebook-search/ COPY packages/vre-menu/package.json packages/vre-menu/ COPY packages/vre-panel/package.json packages/vre-panel/ @@ -69,11 +68,11 @@ RUN jupyter serverextension enable --py jupyterlab_vre --user WORKDIR /live/ts COPY --chown=$NB_USER:users packages/ packages/ COPY --chown=$NB_USER:users tsconfig-base.json . -RUN extensions="chart-customs core notebook-containerizer notebook-search components experiment-manager vre-panel vre-menu"; \ +RUN extensions="chart-customs core notebook-search components experiment-manager vre-panel vre-menu"; \ for ext in $extensions; do \ npx lerna run build --scope "@jupyter_vre/$ext"; \ done -RUN extensions="chart-customs core notebook-containerizer notebook-search components experiment-manager vre-panel vre-menu"; \ +RUN extensions="chart-customs core notebook-search components experiment-manager vre-panel vre-menu"; \ for ext in $extensions; do \ jupyter labextension link --no-build "packages/$ext"; \ done diff --git a/jupyterlab_vre/__init__.py b/jupyterlab_vre/__init__.py index 2e3ad83e2..d7344a1c8 100644 --- a/jupyterlab_vre/__init__.py +++ b/jupyterlab_vre/__init__.py @@ -6,7 +6,6 @@ from .dataset_search.handlers import DatasetSearchHandler, DatasetDownloadHandler from .experiment_manager.handlers import ExportWorkflowHandler, ExecuteWorkflowHandler from .handlers import CatalogGetAllHandler -from .notebook_containerizer.handlers import NotebookExtractorHandler from .notebook_search.handlers import NotebookSearchHandler, NotebookSearchRatingHandler, NotebookDownloadHandler, \ NotebookSeachHistoryHandler, NotebookSourceHandler from .registries.handlers import RegistriesHandler @@ -33,8 +32,6 @@ def load_jupyter_server_extension(lab_app): ( url_path_join(lab_app.web_app.settings['base_url'], r'/vre/notebooksearchrating'), NotebookSearchRatingHandler), (url_path_join(lab_app.web_app.settings['base_url'], r'/vre/containerizer/extract'), ExtractorHandler), - ( - url_path_join(lab_app.web_app.settings['base_url'], r'/vre/nbcontainerizer/extract'), NotebookExtractorHandler), (url_path_join(lab_app.web_app.settings['base_url'], r'/vre/containerizer/types'), TypesHandler), (url_path_join(lab_app.web_app.settings['base_url'], r'/vre/containerizer/baseimage'), BaseImageHandler), (url_path_join(lab_app.web_app.settings['base_url'], r'/vre/containerizer/addcell'), CellsHandler), diff --git a/jupyterlab_vre/notebook_containerizer/__init__.py b/jupyterlab_vre/notebook_containerizer/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/jupyterlab_vre/notebook_containerizer/handlers.py b/jupyterlab_vre/notebook_containerizer/handlers.py deleted file mode 100644 index b1a871783..000000000 --- a/jupyterlab_vre/notebook_containerizer/handlers.py +++ /dev/null @@ -1,104 +0,0 @@ -import copy -import json -import logging -import os -import uuid - -import nbformat as nb -from notebook.base.handlers import APIHandler -from tornado import web - -from jupyterlab_vre.database.catalog import Catalog -from jupyterlab_vre.database.cell import Cell -from jupyterlab_vre.services.converter.converter import ConverterReactFlowChart -from jupyterlab_vre.services.extractor.pyextractor import PyExtractor -from jupyterlab_vre.services.extractor.rextractor import RExtractor - - -# TODO: we might have to do something similar here where we have to determine the kernel and based on that get the extractor - -class NotebookExtractorHandler(APIHandler, Catalog): - - @web.authenticated - async def get(self): - msg_json = dict(title='Operation not supported.') - self.write(msg_json) - self.flush() - - @web.authenticated - async def post(self, *args, **kwargs): - - payload = self.get_json_body() - logging.getLogger(__name__).debug('NotebookExtractorHandler. payload: ' + json.dumps(payload, indent=4)) - print('----------------------------------------------') - print('NotebookExtractorHandler. payload: ' + json.dumps(payload, indent=4)) - print('----------------------------------------------') - notebook = nb.reads(json.dumps(payload['notebook']), nb.NO_CONVERT) - kernel = payload['kernel'] - if kernel == "IRkernel": - extractor = RExtractor(notebook) - else: - extractor = PyExtractor(notebook) - source = '' - params = set() - confs = set() - ins = dict() - outs = extractor.infer_cell_outputs(notebook.cells[len(notebook.cells) - 1].source) - title = '' - for cell_source in extractor.sources: - p = extractor.extract_cell_params(cell_source) - params.update(p) - c = extractor.extract_cell_conf_ref(source) - confs.update(c) - source += cell_source + '\n' - - if not title: - title = cell_source.partition('\n')[0].strip() - title = 'notebook-' + title.replace('#', '').replace('_', '-').replace('(', '-').replace(')', '-'). \ - replace('.', '-').strip() if title[0] == '#' \ - else 'Untitled' - if 'JUPYTERHUB_USER' in os.environ: - title += '-' + os.environ['JUPYTERHUB_USER'].replace('_', '-').replace('(', '-').replace(')', - '-').replace( - '.', '-').replace('@', - '-at-').strip() - dependencies = extractor.infer_cell_dependencies(source, confs) - - node_id = str(uuid.uuid4())[:7] - cell = Cell( - node_id=node_id, - title=title, - task_name=title.lower().replace(' ', '-').replace('.', '-'), - original_source=source, - inputs=ins, - outputs=outs, - params=list(params), - confs=list(confs), - dependencies=list(dependencies), - container_source='' - ) - cell.integrate_configuration() - node = ConverterReactFlowChart.get_node( - node_id, - title, - set(ins), - set(outs), - params, - ) - - chart = { - 'offset': { - 'x': 0, - 'y': 0, - }, - 'scale': 1, - 'nodes': {node_id: node}, - 'links': {}, - 'selected': {}, - 'hovered': {}, - } - cell.chart_obj = chart - Catalog.editor_buffer = copy.deepcopy(cell) - logging.getLogger(__name__).debug('NotebookExtractorHandler. cell: ' + str(cell.toJSON())) - self.write(cell.toJSON()) - self.flush() diff --git a/packages/notebook-containerizer/package.json b/packages/notebook-containerizer/package.json deleted file mode 100644 index b99c23816..000000000 --- a/packages/notebook-containerizer/package.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "name": "@jupyter_vre/notebook-containerizer", - "version": "1.0.0", - "description": "NaaVRE Notebook Containerizer", - "keywords": [ - "jupyter", - "jupyterlab", - "jupyterlab-extension" - ], - "homepage": "https://github.com/QCDIS/NaaVRE", - "bugs": { - "url": "https://github.com/QCDIS/NaaVRE/issues" - }, - "license": "BSD-3-Clause", - "author": "Riccardo Bianchi", - "files": [ - "lib/**/*.{d.ts,eot,gif,html,jpg,js,js.map,json,png,svg,woff2,ttf}", - "src/**/*.{ts,tsx}", - "style/**/*.{css,eot,gif,html,jpg,json,png,svg,woff2,ttf}" - ], - "main": "lib/index.js", - "types": "lib/index.d.ts", - "style": "style/index.css", - "repository": { - "type": "git", - "url": "https://github.com/QCDIS/NaaVRE.git" - }, - "scripts": { - "build": "tsc", - "dist": "yarn pack", - "clean": "rimraf lib", - "watch": "tsc -w" - }, - "dependencies": { - "@jupyterlab/notebook": "3.5.3" - }, - "devDependencies": { - "@types/node": "20.2.5", - "rimraf": "5.0.0", - "typescript": "5.0.2" - }, - "publishConfig": { - "access": "public" - }, - "jupyterlab": { - "extension": true - } -} \ No newline at end of file diff --git a/packages/notebook-containerizer/src/NotebookContainerizerDialog.tsx b/packages/notebook-containerizer/src/NotebookContainerizerDialog.tsx deleted file mode 100644 index cd4e4abf3..000000000 --- a/packages/notebook-containerizer/src/NotebookContainerizerDialog.tsx +++ /dev/null @@ -1,62 +0,0 @@ -import { INotebookModel, INotebookTracker } from '@jupyterlab/notebook'; -import { styled, ThemeProvider } from '@material-ui/core'; -import * as React from 'react'; -import { theme } from './Theme'; -import { requestAPI } from '@jupyter_vre/core'; - -interface IState { -} - -export const DefaultState: IState = { -} - -const CatalogBody = styled('div')({ - display: 'flex', - overflow: 'hidden', - flexDirection: 'row', -}) - -interface NotebookContainerizerDialogProps { - - notebookTracker: INotebookTracker -} - -export class NotebookContainerizerDialog extends React.Component { - - state = DefaultState - - constructor(props: NotebookContainerizerDialogProps) { - super(props); - } - - exctractor = async (notebookModel: INotebookModel, _save = false) => { - try { - const resp = await requestAPI('nbcontainerizer/extract', { - body: JSON.stringify({ - notebook: notebookModel.toJSON() - }), - method: 'POST' - }); - - console.log(resp); - - } catch (error) { - console.log(error); - } - } - - componentDidMount(): void { - - this.exctractor(this.props.notebookTracker.currentWidget.model); - } - - render(): React.ReactElement { - return ( - -

Containerize Notebook

- - -
- ) - } -} \ No newline at end of file diff --git a/packages/notebook-containerizer/src/Theme.ts b/packages/notebook-containerizer/src/Theme.ts deleted file mode 100644 index 5a98d6fdf..000000000 --- a/packages/notebook-containerizer/src/Theme.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { createTheme } from '@material-ui/core'; - -export const theme = createTheme({ - palette: { - secondary: { - main: '#ea5b2d', - dark: '#b12800', - light: '#ff8c5a', - }, - primary: { - main: '#0f4e8a', - dark: '#00275c', - light: '#4e79ba', - }, - }, -}); diff --git a/packages/notebook-containerizer/src/index.tsx b/packages/notebook-containerizer/src/index.tsx deleted file mode 100644 index 3e36ebacc..000000000 --- a/packages/notebook-containerizer/src/index.tsx +++ /dev/null @@ -1,77 +0,0 @@ -import { - JupyterFrontEnd, - JupyterFrontEndPlugin -} from '@jupyterlab/application'; -import { ToolbarButton, ReactWidget, Dialog, showDialog } from '@jupyterlab/apputils'; -import { DocumentRegistry } from '@jupyterlab/docregistry'; -import { - INotebookModel, INotebookTracker, NotebookPanel -} from '@jupyterlab/notebook'; -import { DisposableDelegate, IDisposable } from '@lumino/disposable'; -import * as React from 'react'; -import { NotebookContainerizerDialog } from './NotebookContainerizerDialog'; - -/** - * The plugin registration information. - */ -const plugin: JupyterFrontEndPlugin = { - activate, - id: 'toolbar-containerize-notebook', - autoStart: true, - requires: [INotebookTracker] -}; - -export class NotebookSearchExtension implements DocumentRegistry.IWidgetExtension { - - notebookTracker: INotebookTracker - - constructor(notebookTracker: INotebookTracker) { - this.notebookTracker = notebookTracker; - } - - createNew( - panel: NotebookPanel, - _context: DocumentRegistry.IContext - ): IDisposable { - - const containerizeNotebook = () => { - - const catalogOptions: Partial> = { - title: '', - body: ReactWidget.create( - - ) as Dialog.IBodyWidget, - buttons: [] - }; - - - showDialog(catalogOptions); - }; - - const button = new ToolbarButton({ - className: 'notebook-containerizer', - label: 'Notebooks Containerizer', - onClick: containerizeNotebook, - tooltip: 'Notebooks Containerizer', - }); - - panel.toolbar.insertItem(10, 'containerizeNotebooks', button); - return new DisposableDelegate(() => { - button.dispose(); - }); - } -} - -/** - * Activate the extension. - * - * @param app Main application object - */ -function activate(app: JupyterFrontEnd, notebookTracker: INotebookTracker): void { - app.docRegistry.addWidgetExtension('Notebook', new NotebookSearchExtension(notebookTracker)); -} - -/** - * Export the plugin as default. - */ -export default plugin; \ No newline at end of file diff --git a/packages/notebook-containerizer/style/index.css b/packages/notebook-containerizer/style/index.css deleted file mode 100644 index 6e02bdfae..000000000 --- a/packages/notebook-containerizer/style/index.css +++ /dev/null @@ -1 +0,0 @@ -/* Fill */ \ No newline at end of file diff --git a/packages/notebook-containerizer/tsconfig.json b/packages/notebook-containerizer/tsconfig.json deleted file mode 100644 index 5df522e9b..000000000 --- a/packages/notebook-containerizer/tsconfig.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "extends": "../../tsconfig-base", - "compilerOptions": { - "module": "commonjs", - "outDir": "lib", - "rootDir": "src", - "target": "es6", - "types": ["node"], - "typeRoots": ["./node_modules/@types"], - "skipLibCheck": true - }, - "include": ["src/*", "src/*/*"] -} \ No newline at end of file From 4c9ff69a925cdf8e88744286f50710cf85e97fe3 Mon Sep 17 00:00:00 2001 From: Gabriel Pelouze Date: Mon, 25 Mar 2024 16:41:25 +0100 Subject: [PATCH 4/5] enable partial override for code analyzer --- .../component_containerizer/handlers.py | 10 ++-- .../services/extractor/headerextractor.py | 56 ++++++++++++++++--- 2 files changed, 55 insertions(+), 11 deletions(-) diff --git a/jupyterlab_vre/component_containerizer/handlers.py b/jupyterlab_vre/component_containerizer/handlers.py index 27e3757b2..904d38d7c 100644 --- a/jupyterlab_vre/component_containerizer/handlers.py +++ b/jupyterlab_vre/component_containerizer/handlers.py @@ -144,12 +144,14 @@ async def post(self, *args, **kwargs): self.flush() return - # extractor based on the kernel (if cell header is not defined) - if not extractor.enabled(): + # Extractor based on code analysis. Used if the cell has no header, + # or if some values are not specified in the header + if not extractor.is_complete(): if kernel == "IRkernel": - extractor = RExtractor(notebook, source) + code_extractor = RExtractor(notebook, source) else: - extractor = PyExtractor(notebook, source) + code_extractor = PyExtractor(notebook, source) + extractor.add_missing_values(code_extractor) extracted_nb = extract_cell_by_index(notebook, cell_index) if kernel == "IRkernel": diff --git a/jupyterlab_vre/services/extractor/headerextractor.py b/jupyterlab_vre/services/extractor/headerextractor.py index 9e22ac362..38429c800 100644 --- a/jupyterlab_vre/services/extractor/headerextractor.py +++ b/jupyterlab_vre/services/extractor/headerextractor.py @@ -41,6 +41,11 @@ class HeaderExtractor(Extractor): The document is validated with the schema `cell_header.schema.json` """ + ins: Union[dict, None] + outs: Union[dict, None] + params: Union[dict, None] + confs: Union[list, None] + dependencies: Union[list, None] def __init__(self, notebook, cell_source): self.re_yaml_doc_in_comment = re.compile( @@ -52,6 +57,7 @@ def __init__(self, notebook, cell_source): re.MULTILINE) self.schema = self._load_schema() self.cell_header = self._extract_header(cell_source) + self._external_extract_cell_params = None super().__init__(notebook, cell_source) @@ -67,6 +73,15 @@ def _load_schema(): def enabled(self): return self.cell_header is not None + def is_complete(self): + return ( + (self.ins is not None) + and (self.outs is not None) + and (self.params is not None) + and (self.confs is not None) + and (self.dependencies is not None) + ) + def _extract_header(self, cell_source): # get yaml document from cell comments m = self.re_yaml_doc_in_comment.match(cell_source) @@ -88,6 +103,25 @@ def _extract_header(self, cell_source): raise e return header + def add_missing_values(self, extractor: Extractor): + """ Add values not specified in the header from another extractor + (e.g. PyExtractor or RExtractor) + """ + if self.ins is None: + self.ins = extractor.ins + if self.outs is None: + self.outs = extractor.outs + if self.params is None: + self.params = extractor.params + # We store a reference to extractor.extract_cell_params because + # self.extract_cell_params is called after self.add_missing_values + # in component_containerizer.handlers.ExtractorHandler.post() + self._external_extract_cell_params = extractor.extract_cell_params + if self.confs is None: + self.confs = extractor.confs + if self.dependencies is None: + self.dependencies = extractor.dependencies + @staticmethod def _parse_inputs_outputs_param_items( item: Union[str, dict], @@ -158,10 +192,12 @@ def _infer_cell_inputs_outputs_params( self, header: Union[dict, None], item_type: Literal['inputs', 'outputs', 'params'], - ) -> dict: + ) -> Union[dict, None]: if header is None: - return {} - items = header['NaaVRE']['cell'].get(item_type, []) + return None + items = header['NaaVRE']['cell'].get(item_type) + if items is None: + return None items = [self._parse_inputs_outputs_param_items(it, item_type) for it in items] return {it['name']: it for it in items} @@ -179,6 +215,8 @@ def infer_cell_outputs(self): ) def extract_cell_params(self, source): + if self._external_extract_cell_params is not None: + return self._external_extract_cell_params(source) return self._infer_cell_inputs_outputs_params( self._extract_header(source), 'params', @@ -186,14 +224,18 @@ def extract_cell_params(self, source): def extract_cell_conf_ref(self): if self.cell_header is None: - return {} - items = self.cell_header['NaaVRE']['cell'].get('confs', []) + return None + items = self.cell_header['NaaVRE']['cell'].get('confs') + if items is None: + return None return {k: v['assignation'] for it in items for k, v in it.items()} def infer_cell_dependencies(self, confs): if self.cell_header is None: - return [] - items = self.cell_header['NaaVRE']['cell'].get('dependencies', []) + return None + items = self.cell_header['NaaVRE']['cell'].get('dependencies') + if items is None: + return None return [ { 'name': it.get('name'), From 027ccba6cb62d376f49aabc4252d11bc583b5017 Mon Sep 17 00:00:00 2001 From: Gabriel Pelouze Date: Mon, 25 Mar 2024 16:56:31 +0100 Subject: [PATCH 5/5] remove deps from cell identity should have followed 5166a15d3a5b1328a70a4566ef54e48be4d6e36c in PR #1257 --- jupyterlab_vre/component_containerizer/handlers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/jupyterlab_vre/component_containerizer/handlers.py b/jupyterlab_vre/component_containerizer/handlers.py index 904d38d7c..fd3936207 100644 --- a/jupyterlab_vre/component_containerizer/handlers.py +++ b/jupyterlab_vre/component_containerizer/handlers.py @@ -173,7 +173,6 @@ async def post(self, *args, **kwargs): 'params': extractor.params, 'inputs': extractor.ins, 'outputs': extractor.outs, - 'deps': sorted(extractor.dependencies, key=lambda x: x['name']), } cell_identity_str = json.dumps(cell_identity_dict, sort_keys=True) node_id = hashlib.sha1(cell_identity_str.encode()).hexdigest()[:7]