diff --git a/deployments/w261/config/common.yaml b/deployments/w261/config/common.yaml index af5bcd694..fcd8037a4 100644 --- a/deployments/w261/config/common.yaml +++ b/deployments/w261/config/common.yaml @@ -1,4 +1,15 @@ jupyterhub: + hub: + extraConfig: + 90-sparklyspawner: | + import z2jh + c.JupyterHub.spawner_class = 'sparklyspawner.SparklySpawner' + + sparkly_config = z2jh.get_config('custom.sparkly', {}) + c.SparklySpawner.gcp_service_key = sparkly_config.get('gcp_service_key', None) + c.SparklySpawner.storage_bucket_template = 'ucb-datahub-2018-w261-test-user-{username}' + c.SparklySpawner.service_account_template = 'w261-s-{username}' + c.SparklySpawner.gcp_project = 'ucb-datahub-2018' auth: type: google admin: diff --git a/deployments/w261/secrets/staging.yaml b/deployments/w261/secrets/staging.yaml index 1d3870a29..3718fb6d6 100644 Binary files a/deployments/w261/secrets/staging.yaml and b/deployments/w261/secrets/staging.yaml differ diff --git a/hub/Chart.yaml b/hub/Chart.yaml index 2b081ba58..3c328fba1 100644 --- a/hub/Chart.yaml +++ b/hub/Chart.yaml @@ -2,4 +2,4 @@ apiVersion: v1 appVersion: '1.0' description: Deployment Chart for JupyterHub name: hub -version: 0.1.0-facec6f +version: 0.1.0-a34aa77 diff --git a/hub/values.yaml b/hub/values.yaml index 530beb026..292330843 100644 --- a/hub/values.yaml +++ b/hub/values.yaml @@ -35,7 +35,7 @@ jupyterhub: # Generated by chartpress image: name: gcr.io/ucb-datahub-2018/jupyterhub-hub - tag: '0.1.0-401ee46' + tag: '0.1.0-a34aa77' networkPolicy: enabled: true extraConfig: @@ -47,7 +47,7 @@ jupyterhub: hosted_domain = 'berkeley.edu' course_profile_tmpl = '/srv/jupyterhub/profiles.d/{}-{}.txt' - + def memparse(val): '''Parse memory for relative comparisons.''' if type(val) != str or len(val) == 0: return val @@ -55,7 +55,7 @@ jupyterhub: unit = val[-1] n = {'B':0, 'K':1, 'M':2, 'G':3}[unit] return mem * 1024**n - + def mem_cmp(a, b): '''Compare memory values.''' x = memparse(a) ; y = memparse(b) diff --git a/images/hub/Dockerfile b/images/hub/Dockerfile index 9d8cf8d61..3340bce66 100644 --- a/images/hub/Dockerfile +++ b/images/hub/Dockerfile @@ -1,3 +1,20 @@ # Should match the hub image used by version of chart in hub/requirements.yaml # If that changes, this should be changed too! FROM jupyterhub/k8s-hub:7b33351 + +USER root +RUN apt update && apt install --yes curl python + +RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-245.0.0-linux-x86_64.tar.gz && \ + tar xzf google-cloud-sdk-245.0.0-linux-x86_64.tar.gz && \ + mv google-cloud-sdk /usr/local/google-cloud-sdk && \ + rm google-cloud-sdk-245.0.0-linux-x86_64.tar.gz + +ENV PATH /usr/local/google-cloud-sdk/bin:${PATH} +RUN gcloud components install kubectl + +COPY sparklyspawner /srv/sparklyspawner + +RUN python3 -m pip install --no-cache /srv/sparklyspawner + +USER ${NB_USER} \ No newline at end of file diff --git a/images/hub/sparklyspawner/setup.py b/images/hub/sparklyspawner/setup.py new file mode 100644 index 000000000..85f0724e5 --- /dev/null +++ b/images/hub/sparklyspawner/setup.py @@ -0,0 +1,13 @@ +from setuptools import setup, find_packages + +setup( + name='jupyterhub-sparklykubespawner', + version='0.1dev', + python_requires='>=3.5', + packages=find_packages(), + install_requires=[ + 'google-cloud-storage', + 'google-cloud-iam', + 'google-api-python-client', + ] +) diff --git a/images/hub/sparklyspawner/sparklyspawner/__init__.py b/images/hub/sparklyspawner/sparklyspawner/__init__.py new file mode 100644 index 000000000..14471800c --- /dev/null +++ b/images/hub/sparklyspawner/sparklyspawner/__init__.py @@ -0,0 +1,124 @@ +from kubespawner import KubeSpawner +from traitlets import Unicode +from google.cloud import storage +from google.api_core.exceptions import Conflict +from google.api_core.iam import Policy +from googleapiclient.errors import HttpError +import tempfile +from google.oauth2 import service_account +import googleapiclient.discovery +import json +import base64 + +class SparklySpawner(KubeSpawner): + gcp_service_key = Unicode( + None, + allow_none=True, + config=True, + help=""" + Google Service Account JSON key for authenticating to GCP. + + *Must* be set for SparklySpawner to work + """ + ) + + gcp_project = Unicode( + None, + allow_none=True, + config=True, + help=""" + Name of Google Cloud Project this hub is running in. + + *Must* be set. + """ + ) + + storage_bucket_template = Unicode( + None, + allow_none=True, + config=True, + help=""" + Template to use when creating storage buckets for users. + + {username} is expanded. + """ + ) + + service_account_template = Unicode( + None, + allow_none=True, + config=True, + help=""" + Template to use when creating service accounts for users. + + {username} is expanded. + """ + ) + + async def ensure_gcp_resources(self): + with tempfile.NamedTemporaryFile() as f: + f.write(self.gcp_service_key.encode()) + f.flush() + storage_client = storage.Client.from_service_account_json(f.name) + + credentials = service_account.Credentials.from_service_account_file( + f.name, scopes=['https://www.googleapis.com/auth/cloud-platform']) + service = googleapiclient.discovery.build('iam', 'v1', credentials=credentials) + + + # Create bucket if it doesn't exist + # FIXME: Don't use a private method? + bucket_name = self._expand_all(self.storage_bucket_template) + + bucket = storage.Bucket(storage_client, bucket_name) + + try: + bucket.create() + self.log.info(f'Creating {bucket_name}') + except Conflict as e: + # Bucket already exists + self.log.info(f'Not creating {bucket_name}, it already exists') + self.environment['SPARK_GCS_BUCKET'] = bucket_name + + # This is how service account emails are formatted + # FIXME: Clip this to 30char + sa_name = self._expand_all(self.service_account_template) + sa_email = f'{sa_name}@{self.gcp_project}.iam.gserviceaccount.com' + try: + sa = service.projects().serviceAccounts().create( + name=f'projects/{self.gcp_project}', + body={ + 'accountId': sa_name, + 'serviceAccount': {'displayName': sa_name} + }).execute() + # We assume this create call will create a service account with email sa_email + assert sa_email == sa['email'] + self.log.info(f'Created service account {sa["email"]}') + except HttpError as e: + if e.resp.status == 409: + self.log.info(f'Created service account {sa_email}') + else: + raise + + # Grant SA access to bucket if it isn't present + policy = bucket.get_iam_policy() + role = 'roles/storage.objectAdmin' + if Policy.service_account(sa_email) not in policy.get(role, set()): + policy[role].add(Policy.service_account(sa_email)) + bucket.set_iam_policy(policy) + + # Check if _key exists in bucket. This is where we store private key + key_blob = bucket.blob('__key__.json') + if not key_blob.exists(): + key = service.projects().serviceAccounts().keys().create( + name=f'projects/{self.gcp_project}/serviceAccounts/{sa_email}', body={} + ).execute() + else: + key = json.loads(key_blob.download_as_string()) + + self.environment['SPARK_GCS_KEY'] = base64.b64decode(key['privateKeyData']).decode() + + async def start(self): + self.log.info('Testing Sparkly spawner') + await self.ensure_gcp_resources() + return await super().start() \ No newline at end of file