Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1435 extract secrets from notebooks #1436

Merged
merged 10 commits into from
May 29, 2024
140 changes: 16 additions & 124 deletions jupyterlab_vre/component_containerizer/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,11 @@
from github.GithubException import UnknownObjectException
from jinja2 import Environment, PackageLoader
from notebook.base.handlers import APIHandler
from slugify import slugify
from tornado import web

from jupyterlab_vre.database.catalog import Catalog
from jupyterlab_vre.database.cell import Cell
from jupyterlab_vre.services.containerizer.Rcontainerizer import Rcontainerizer
from jupyterlab_vre.services.converter.converter import ConverterReactFlowChart
from jupyterlab_vre.services.extractor.extractor import DummyExtractor
from jupyterlab_vre.services.extractor.pyextractor import PyExtractor
from jupyterlab_vre.services.extractor.rextractor import RExtractor
from jupyterlab_vre.services.extractor.pyheaderextractor import PyHeaderExtractor
from jupyterlab_vre.services.extractor.rheaderextractor import RHeaderExtractor
from jupyterlab_vre.services.extractor.extract_cell import extract_cell

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -62,22 +55,6 @@ def git_hash(contents):
return s.hexdigest()


def extract_cell_by_index(notebook, cell_index):
new_nb = copy.deepcopy(notebook)
if cell_index < len(notebook.cells):
new_nb.cells = [notebook.cells[cell_index]]
return new_nb


def set_notebook_kernel(notebook, kernel):
new_nb = copy.deepcopy(notebook)
# Replace kernel name in the notebook metadata
new_nb.metadata.kernelspec.name = kernel
new_nb.metadata.kernelspec.display_name = kernel
new_nb.metadata.kernelspec.language = kernel
return new_nb


def query_registry_for_image(image_repo, image_name):
m = re.match(r'^docker.io/(\w+)', image_name)
if m:
Expand Down Expand Up @@ -120,110 +97,25 @@ async def post(self, *args, **kwargs):
payload = self.get_json_body()
logging.getLogger(__name__).debug('ExtractorHandler. payload: ' + json.dumps(payload, indent=4))
print('ExtractorHandler. payload: ' + json.dumps(payload, indent=4))
kernel = payload['kernel']
cell_index = payload['cell_index']
notebook = nb.reads(json.dumps(payload['notebook']), nb.NO_CONVERT)

source = notebook.cells[cell_index].source

if notebook.cells[cell_index].cell_type != 'code':
# dummy extractor for non-code cells (e.g. markdown)
extractor = DummyExtractor(notebook, source)
else:
# extractor based on the cell header
try:
if 'python' in kernel.lower():
extractor = PyHeaderExtractor(notebook, source)
elif 'r' in kernel.lower():
extractor = RHeaderExtractor(notebook, source)
except jsonschema.ValidationError as e:
self.set_status(400, f"Invalid cell header")
self.write(
{
'message': f"Error in cell header: {e}",
'reason': None,
'traceback': traceback.format_exception(e),
try:
cell = extract_cell(
nb.reads(json.dumps(payload['notebook']), nb.NO_CONVERT),
payload['cell_index'],
payload['kernel'],
)
except jsonschema.ValidationError as e:
self.set_status(400, f"Invalid cell header")
self.write(
{
'message': f"Error in cell header: {e}",
'reason': None,
'traceback': traceback.format_exception(e),
}
)
self.flush()
return
params = extractor.params
inputs = extractor.ins
outputs = extractor.outs
confs = extractor.confs
# Extractor based on code analysis. Used if the cell has no header,
# or if some values are not specified in the header
if not extractor.is_complete():
if kernel == "IRkernel":
code_extractor = RExtractor(notebook, source)
else:
code_extractor = PyExtractor(notebook, source)
extractor.add_missing_values(code_extractor)

extracted_nb = extract_cell_by_index(notebook, cell_index)
if kernel == "IRkernel":
extracted_nb = set_notebook_kernel(extracted_nb, 'R')
else:
extracted_nb = set_notebook_kernel(extracted_nb, 'python3')

# initialize variables
title = source.partition('\n')[0].strip()
title = slugify(title) if title and title[0] == "#" else "Untitled"

if 'JUPYTERHUB_USER' in os.environ:
title += '-' + slugify(os.environ['JUPYTERHUB_USER'])

# If any of these change, we create a new cell in the catalog.
# This matches the cell properties saved in workflows.
cell_identity_dict = {
'title': title,
'params': extractor.params,
'inputs': extractor.ins,
'outputs': extractor.outs,
}
cell_identity_str = json.dumps(cell_identity_dict, sort_keys=True)
node_id = hashlib.sha1(cell_identity_str.encode()).hexdigest()[:7]

cell = Cell(
node_id=node_id,
title=title,
task_name=slugify(title.lower()),
original_source=source,
inputs=extractor.ins,
outputs=extractor.outs,
params={},
confs=extractor.confs,
dependencies=extractor.dependencies,
container_source="",
kernel=kernel,
notebook_dict=extracted_nb.dict()
)
cell.integrate_configuration()
extractor.params = extractor.extract_cell_params(cell.original_source)
cell.add_params(extractor.params)
cell.add_param_values(extractor.params)

node = ConverterReactFlowChart.get_node(
node_id,
title,
set(extractor.ins),
set(extractor.outs),
extractor.params,
)

chart = {
'offset': {
'x': 0,
'y': 0,
},
'scale': 1,
'nodes': {node_id: node},
'links': {},
'selected': {},
'hovered': {},
}
self.flush()
return

cell.chart_obj = chart
Catalog.editor_buffer = copy.deepcopy(cell)
self.write(cell.toJSON())
self.flush()
Expand Down
11 changes: 9 additions & 2 deletions jupyterlab_vre/database/cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Cell:
outputs: list
params: list
param_values: dict
secrets: list
confs: dict
dependencies: list
chart_obj: dict
Expand All @@ -38,6 +39,7 @@ def __init__(
inputs,
outputs,
params,
secrets,
confs,
dependencies,
container_source,
Expand All @@ -56,8 +58,9 @@ def __init__(
self.add_outputs(outputs)
self.add_params(params)
self.add_param_values(params)
self.add_secrets(secrets)
self.confs = confs
self.all_inputs = list(inputs) + list(params)
self.all_inputs = list(inputs) + list(params) + list(secrets)
self.dependencies = list(sorted(dependencies, key=lambda x: x['name']))
self.chart_obj = chart_obj
self.node_id = node_id
Expand Down Expand Up @@ -94,10 +97,14 @@ def add_params(self, params):
params = self._extract_types(params)
self.params = params

def add_secrets(self, secrets):
if isinstance(secrets, dict):
secrets = self._extract_types(secrets)
self.secrets = secrets

def set_image_version(self, image_version):
self.image_version = image_version


def add_param_values(self, params):
self.param_values = {}
if isinstance(params, dict):
Expand Down
3 changes: 2 additions & 1 deletion jupyterlab_vre/services/converter/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
class ConverterReactFlowChart:

@staticmethod
def get_node(node_id, title, ins, outs, params):
def get_node(node_id, title, ins, outs, params, secrets):

node = {}
position = {}
Expand All @@ -21,6 +21,7 @@ def get_node(node_id, title, ins, outs, params):
properties['title'] = title
properties['vars'] = list()
properties['params'] = list(params)
properties['secrets'] = list(secrets)
properties['inputs'] = list(ins)
properties['outputs'] = list(outs)
properties['og_node_id'] = node_id
Expand Down
31 changes: 31 additions & 0 deletions jupyterlab_vre/services/extractor/cell_header.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@
"$ref": "#/definitions/ParamElement"
}
},
"secrets": {
"type": "array",
"items": {
"$ref": "#/definitions/SecretElement"
}
},
"confs": {
"type": "array",
"items": {
Expand Down Expand Up @@ -134,6 +140,22 @@
}
}
},
"SecretElementVarDict": {
"title": "Full secret variable description",
"type": "object",
"additionalProperties": false,
"minProperties": 1,
"maxProperties": 1,
"patternProperties": {
"^secret_.*$": {
"type": "object",
"additionalProperties": false,
"properties": {
"type": {"$ref": "#/definitions/VarType"}
}
}
}
},
"ConfElementVarDict": {
"title": "Full param variable description",
"type": "object",
Expand Down Expand Up @@ -180,6 +202,15 @@
{"$ref": "#/definitions/ParamElementVarDict"}
]
},
"SecretElement": {
"title": "Secret element",
"description": "description secret variable elements",
"anyOf": [
{"$ref": "#/definitions/ElementVarName"},
{"$ref": "#/definitions/ElementVarNameType"},
{"$ref": "#/definitions/SecretElementVarDict"}
]
},
"ConfElement": {
"title": "Conf element",
"description": "description conf variable elements",
Expand Down
Loading
Loading