Skip to content

Commit

Permalink
Merge pull request #714 from berkeley-dsep-infra/staging
Browse files Browse the repository at this point in the history
Merge #713 to prod
  • Loading branch information
yuvipanda authored May 14, 2019
2 parents 93453ca + acf463e commit 15f06f0
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 4 deletions.
11 changes: 11 additions & 0 deletions deployments/w261/config/common.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
jupyterhub:
hub:
extraConfig:
90-sparklyspawner: |
import z2jh
c.JupyterHub.spawner_class = 'sparklyspawner.SparklySpawner'
sparkly_config = z2jh.get_config('custom.sparkly', {})
c.SparklySpawner.gcp_service_key = sparkly_config.get('gcp_service_key', None)
c.SparklySpawner.storage_bucket_template = 'ucb-datahub-2018-w261-test-user-{username}'
c.SparklySpawner.service_account_template = 'w261-s-{username}'
c.SparklySpawner.gcp_project = 'ucb-datahub-2018'
auth:
type: google
admin:
Expand Down
Binary file modified deployments/w261/secrets/staging.yaml
Binary file not shown.
2 changes: 1 addition & 1 deletion hub/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ apiVersion: v1
appVersion: '1.0'
description: Deployment Chart for JupyterHub
name: hub
version: 0.1.0-facec6f
version: 0.1.0-a34aa77
6 changes: 3 additions & 3 deletions hub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jupyterhub:
# Generated by chartpress
image:
name: gcr.io/ucb-datahub-2018/jupyterhub-hub
tag: '0.1.0-401ee46'
tag: '0.1.0-a34aa77'
networkPolicy:
enabled: true
extraConfig:
Expand All @@ -47,15 +47,15 @@ jupyterhub:
hosted_domain = 'berkeley.edu'
course_profile_tmpl = '/srv/jupyterhub/profiles.d/{}-{}.txt'
def memparse(val):
'''Parse memory for relative comparisons.'''
if type(val) != str or len(val) == 0: return val
mem = int(val.upper()[0:-1])
unit = val[-1]
n = {'B':0, 'K':1, 'M':2, 'G':3}[unit]
return mem * 1024**n
def mem_cmp(a, b):
'''Compare memory values.'''
x = memparse(a) ; y = memparse(b)
Expand Down
17 changes: 17 additions & 0 deletions images/hub/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# Should match the hub image used by version of chart in hub/requirements.yaml
# If that changes, this should be changed too!
FROM jupyterhub/k8s-hub:7b33351

USER root
RUN apt update && apt install --yes curl python

RUN curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-245.0.0-linux-x86_64.tar.gz && \
tar xzf google-cloud-sdk-245.0.0-linux-x86_64.tar.gz && \
mv google-cloud-sdk /usr/local/google-cloud-sdk && \
rm google-cloud-sdk-245.0.0-linux-x86_64.tar.gz

ENV PATH /usr/local/google-cloud-sdk/bin:${PATH}
RUN gcloud components install kubectl

COPY sparklyspawner /srv/sparklyspawner

RUN python3 -m pip install --no-cache /srv/sparklyspawner

USER ${NB_USER}
13 changes: 13 additions & 0 deletions images/hub/sparklyspawner/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from setuptools import setup, find_packages

setup(
name='jupyterhub-sparklykubespawner',
version='0.1dev',
python_requires='>=3.5',
packages=find_packages(),
install_requires=[
'google-cloud-storage',
'google-cloud-iam',
'google-api-python-client',
]
)
124 changes: 124 additions & 0 deletions images/hub/sparklyspawner/sparklyspawner/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from kubespawner import KubeSpawner
from traitlets import Unicode
from google.cloud import storage
from google.api_core.exceptions import Conflict
from google.api_core.iam import Policy
from googleapiclient.errors import HttpError
import tempfile
from google.oauth2 import service_account
import googleapiclient.discovery
import json
import base64

class SparklySpawner(KubeSpawner):
gcp_service_key = Unicode(
None,
allow_none=True,
config=True,
help="""
Google Service Account JSON key for authenticating to GCP.
*Must* be set for SparklySpawner to work
"""
)

gcp_project = Unicode(
None,
allow_none=True,
config=True,
help="""
Name of Google Cloud Project this hub is running in.
*Must* be set.
"""
)

storage_bucket_template = Unicode(
None,
allow_none=True,
config=True,
help="""
Template to use when creating storage buckets for users.
{username} is expanded.
"""
)

service_account_template = Unicode(
None,
allow_none=True,
config=True,
help="""
Template to use when creating service accounts for users.
{username} is expanded.
"""
)

async def ensure_gcp_resources(self):
with tempfile.NamedTemporaryFile() as f:
f.write(self.gcp_service_key.encode())
f.flush()
storage_client = storage.Client.from_service_account_json(f.name)

credentials = service_account.Credentials.from_service_account_file(
f.name, scopes=['https://www.googleapis.com/auth/cloud-platform'])
service = googleapiclient.discovery.build('iam', 'v1', credentials=credentials)


# Create bucket if it doesn't exist
# FIXME: Don't use a private method?
bucket_name = self._expand_all(self.storage_bucket_template)

bucket = storage.Bucket(storage_client, bucket_name)

try:
bucket.create()
self.log.info(f'Creating {bucket_name}')
except Conflict as e:
# Bucket already exists
self.log.info(f'Not creating {bucket_name}, it already exists')
self.environment['SPARK_GCS_BUCKET'] = bucket_name

# This is how service account emails are formatted
# FIXME: Clip this to 30char
sa_name = self._expand_all(self.service_account_template)
sa_email = f'{sa_name}@{self.gcp_project}.iam.gserviceaccount.com'
try:
sa = service.projects().serviceAccounts().create(
name=f'projects/{self.gcp_project}',
body={
'accountId': sa_name,
'serviceAccount': {'displayName': sa_name}
}).execute()
# We assume this create call will create a service account with email sa_email
assert sa_email == sa['email']
self.log.info(f'Created service account {sa["email"]}')
except HttpError as e:
if e.resp.status == 409:
self.log.info(f'Created service account {sa_email}')
else:
raise

# Grant SA access to bucket if it isn't present
policy = bucket.get_iam_policy()
role = 'roles/storage.objectAdmin'
if Policy.service_account(sa_email) not in policy.get(role, set()):
policy[role].add(Policy.service_account(sa_email))
bucket.set_iam_policy(policy)

# Check if _key exists in bucket. This is where we store private key
key_blob = bucket.blob('__key__.json')
if not key_blob.exists():
key = service.projects().serviceAccounts().keys().create(
name=f'projects/{self.gcp_project}/serviceAccounts/{sa_email}', body={}
).execute()
else:
key = json.loads(key_blob.download_as_string())

self.environment['SPARK_GCS_KEY'] = base64.b64decode(key['privateKeyData']).decode()

async def start(self):
self.log.info('Testing Sparkly spawner')
await self.ensure_gcp_resources()
return await super().start()

0 comments on commit 15f06f0

Please sign in to comment.