From 5bf17dda912b8a81c125482f02f0b531e2bb0a72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Mon, 22 May 2023 08:15:44 +0200 Subject: [PATCH] feat: upgrade to Palm This upgrade includes SSO login. Finally! We can now login to Superset via the LMS. Thanks to OARS for providing the base security manager implementation! --- CHANGELOG.md | 9 ++ README.rst | 46 +++--- changelog.d/20230519_161836_regis.md | 1 - changelog.d/20230602_095408_regis.md | 1 - setup.py | 4 +- tutorcairn/__about__.py | 2 +- tutorcairn/patches/k8s-deployments | 2 +- tutorcairn/patches/k8s-jobs | 2 +- .../patches/local-docker-compose-dev-services | 15 ++ .../local-docker-compose-jobs-services | 2 +- .../local-docker-compose-permissions-command | 2 + .../local-docker-compose-permissions-volumes | 2 + .../patches/local-docker-compose-services | 27 ++-- tutorcairn/plugin.py | 49 ++++-- .../cairn/apps/superset/superset_config.py | 71 ++++++-- .../build/cairn-clickhouse/scripts/cairn | 48 +----- .../cairn/build/cairn-superset/Dockerfile | 23 +-- .../cairn/__init__.py} | 0 .../build/cairn-superset/cairn/bootstrap.py | 151 ++++++++++++++++++ .../{scripts/cairn => cairn/ctl.py} | 99 ++++-------- .../cairn/build/cairn-superset/cairn/sso.py | 99 ++++++++++++ .../templates/cairn/tasks/cairn-openedx/init | 28 ++++ .../templates/cairn/tasks/cairn-superset/init | 2 +- 23 files changed, 495 insertions(+), 190 deletions(-) delete mode 100644 changelog.d/20230519_161836_regis.md delete mode 100644 changelog.d/20230602_095408_regis.md create mode 100644 tutorcairn/patches/local-docker-compose-dev-services create mode 100644 tutorcairn/patches/local-docker-compose-permissions-command create mode 100644 tutorcairn/patches/local-docker-compose-permissions-volumes rename tutorcairn/templates/cairn/build/{cairn-clickhouse/scripts/clickhouse-auth.json => cairn-superset/cairn/__init__.py} (100%) mode change 100755 => 100644 create mode 100644 tutorcairn/templates/cairn/build/cairn-superset/cairn/bootstrap.py rename tutorcairn/templates/cairn/build/cairn-superset/{scripts/cairn => cairn/ctl.py} (76%) create mode 100644 tutorcairn/templates/cairn/build/cairn-superset/cairn/sso.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d5d920..a5be16f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,3 +19,12 @@ instructions, because git commits are used to generate release notes: + +## v16.0.0 (2023-06-15) + +- 💥[Feature] Upgrade to Palm. +- 💥[Feature] Add single sign-on (SSO) authentication with the LMS. User accounts no longer need to be created manually. Instead, users log in via the LMS and are automatically granted access to their course data. With this change, users will no longer have access to the accounts that were created manually, unless they used the same username in Superset and the LMS. To revert to the previous behaviour, set `CAIRN_ENABLE_SSO=false`. (by @regisb) + - The `cairn` utility scripts were removed from the Superset and Clickhouse images. +- [Bugfix] Support Superset passwords that include an empty space. (by @regisb) +- [Improvement] Add a scriv-compliant changelog. (by @regisb) + diff --git a/README.rst b/README.rst index f7ccb31..081c633 100644 --- a/README.rst +++ b/README.rst @@ -61,22 +61,10 @@ Then, restart your platform and run the initialization scripts:: tutor local launch -Create a user to access both in the Clickhouse database and the Superset frontend:: - - tutor local do cairn-createuser YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM - -You can use the ``--password=`` option to provide a password on the command line. - -To make this user an administrator, add the ``--admin`` option:: +Open http(s)://data. in your browser. When running locally, this will be http://data.local.overhang.io. (http://data.local.overhang.io:2247 in development). Users authenticate with their LMS user. By default, they have access to the data generated by the courses in which they have the "staff role". To convert an existing user to administrator status, run:: tutor local do cairn-createuser --admin YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM -To add the default dashboards to the new user, add the ``--bootstrap-dashboards`` option:: - - tutor local do cairn-createuser --bootstrap-dashboards YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM - -You can then access the frontend with the user credentials you just created. Open http(s)://data. in your browser. When running locally, this will be http://data.local.overhang.io. The admin user will automatically be granted access to the "openedx" database in Superset and will be able to query all tables. - Some event data might be missing from your dashboards: just start using your LMS and refresh your dashboard. The new events should appear immediately. .. image:: https://raw.githubusercontent.com/overhangio/tutor-cairn/master/screenshots/courseoverview-01.png @@ -86,6 +74,9 @@ Some event data might be missing from your dashboards: just start using your LMS .. image:: https://raw.githubusercontent.com/overhangio/tutor-cairn/master/screenshots/courseoverview-03.png :alt: Course overview dashboard part 3 +⚠️ WARNING ⚠️ Previous versions of Cairn required manual user management. If you have an existing installation of Cairn, this behaviour will change when you upgrade to v16. To revert to the previous behaviour, see `"manual user management" <#manual-user-management>`__ below. + + Available metrics ~~~~~~~~~~~~~~~~~ @@ -109,19 +100,32 @@ Cairn allows you to collect and view just any metric from your Open edX platform - Total watch time - Second-per-second statistics: Number of unique viewers, Total number of views +.. _manual_user_management: -Data-based access control -~~~~~~~~~~~~~~~~~~~~~~~~~ +Manual user management +~~~~~~~~~~~~~~~~~~~~~~ -Most of your users should probably not have access to all data from all courses. To restrict a given user to one or more courses or organizations, select the course IDs and/or organization IDS to which the user should have access and create a user with limited access to the datalake:: +By default, authentication uses single sign-on (SSO) with the LMS such that users do not have to create separate accounts in Superset. In previous versions of Cairn (v15 and earlier), user accounts had to be created manually. To restore this behaviour, modify the ``CAIRN_ENABLE_SSO`` setting:: - tutor local run cairn-clickhouse cairn createuser --course-id='course-v1:edX+DemoX+Demo_Course' --org-id='edX' YOURUSERNAME + tutor config save --set CAIRN_ENABLE_SSO=false + tutor local restart -Then, create the corresponding user on the frontend with the same command as above (but without the ``--admin`` option):: +SSO will then disabled, and only manually created users will be able to login. To create a user, run:: - tutor local run cairn-superset cairn createuser YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM + tutor local do cairn-createuser --password=yourpassword YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM + +To make this user an administrator, add the ``--admin`` option:: + + tutor local do cairn-createuser --admin YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM + +To add the default dashboards to the new user, add the ``--bootstrap-dashboards`` option:: + + tutor local do cairn-createuser --bootstrap-dashboards YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM + + To restrict a given user to one or more courses or organizations, select the course IDs and/or organization IDS to which the user should have access:: + + tutor local do cairn-createuser --course-id='course-v1:edX+DemoX+Demo_Course' YOURUSERNAME YOURUSERNAME@YOUREMAIL.COM -Your frontend user will automatically be associated to the datalake database you created. Refreshing course block data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -179,6 +183,8 @@ In this example, the following folder should be created in the plugin:: ``tutorc Development ----------- +In development, the Superset user interface will be available at http://data.local.overhang.io:2247. + To reload Vector configuration after changes to vector.toml, run:: tutor config save && tutor local exec cairn-vector sh -c "kill -s HUP 1" diff --git a/changelog.d/20230519_161836_regis.md b/changelog.d/20230519_161836_regis.md deleted file mode 100644 index 7d796ae..0000000 --- a/changelog.d/20230519_161836_regis.md +++ /dev/null @@ -1 +0,0 @@ -- [Improvement] Add a scriv-compliant changelog. (by @regisb) diff --git a/changelog.d/20230602_095408_regis.md b/changelog.d/20230602_095408_regis.md deleted file mode 100644 index 36acf0e..0000000 --- a/changelog.d/20230602_095408_regis.md +++ /dev/null @@ -1 +0,0 @@ -- [Bugfix] Support Superset passwords that include an empty space. (by @regisb) diff --git a/setup.py b/setup.py index 643b84a..3e2d5f3 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ def load_about(): packages=find_packages(exclude=["tests*"]), include_package_data=True, python_requires=">=3.7", - install_requires=["tutor>=15.0.0,<16.0.0"], + install_requires=["tutor>=16.0.0,<17.0.0"], entry_points={"tutor.plugin.v1": ["cairn = tutorcairn.plugin"]}, classifiers=[ "Development Status :: 3 - Alpha", @@ -47,9 +47,9 @@ def load_about(): "License :: OSI Approved :: GNU Affero General Public License v3", "Operating System :: OS Independent", "Programming Language :: Python", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], ) diff --git a/tutorcairn/__about__.py b/tutorcairn/__about__.py index 10dccc1..60b692a 100644 --- a/tutorcairn/__about__.py +++ b/tutorcairn/__about__.py @@ -1,4 +1,4 @@ -__version__ = "15.0.6" +__version__ = "16.0.0" # Handle version suffix for nightly, just like tutor core. __version_suffix__ = "" diff --git a/tutorcairn/patches/k8s-deployments b/tutorcairn/patches/k8s-deployments index 78e4ced..cd0ef9f 100644 --- a/tutorcairn/patches/k8s-deployments +++ b/tutorcairn/patches/k8s-deployments @@ -196,7 +196,7 @@ spec: subPath: superset_config.py - mountPath: /app/bootstrap/ name: bootstrap - - mountPath: /scripts/clickhouse-auth.json + - mountPath: /app/superset/cairn/clickhouse-auth.json name: clickhouse-auth subPath: auth.json securityContext: diff --git a/tutorcairn/patches/k8s-jobs b/tutorcairn/patches/k8s-jobs index f763460..c401fb3 100644 --- a/tutorcairn/patches/k8s-jobs +++ b/tutorcairn/patches/k8s-jobs @@ -53,7 +53,7 @@ spec: subPath: superset_config.py - mountPath: /app/bootstrap/ name: bootstrap - - mountPath: /scripts/clickhouse-auth.json + - mountPath: /app/superset/cairn/clickhouse-auth.json name: clickhouse-auth subPath: auth.json volumes: diff --git a/tutorcairn/patches/local-docker-compose-dev-services b/tutorcairn/patches/local-docker-compose-dev-services new file mode 100644 index 0000000..c3f0fa5 --- /dev/null +++ b/tutorcairn/patches/local-docker-compose-dev-services @@ -0,0 +1,15 @@ +cairn-superset: + command: ["superset", "run", "--host=0.0.0.0", "--port=2247"] + environment: + FLASK_ENV: development + ports: + - "2247:2247" + +cairn-superset-worker: + environment: + FLASK_ENV: development + +cairn-superset-worker-beat: + environment: + FLASK_ENV: development + diff --git a/tutorcairn/patches/local-docker-compose-jobs-services b/tutorcairn/patches/local-docker-compose-jobs-services index c12d90e..407fe30 100644 --- a/tutorcairn/patches/local-docker-compose-jobs-services +++ b/tutorcairn/patches/local-docker-compose-jobs-services @@ -17,7 +17,7 @@ cairn-superset-job: image: {{ CAIRN_SUPERSET_DOCKER_IMAGE }} volumes: - ../plugins/cairn/apps/superset/superset_config.py:/app/superset_config.py:ro - - ../plugins/cairn/apps/clickhouse/auth.json:/scripts/clickhouse-auth.json:ro + - ../plugins/cairn/apps/clickhouse/auth.json:/app/superset/cairn/clickhouse-auth.json:ro - ../plugins/cairn/apps/superset/bootstrap:/app/bootstrap:ro healthcheck: disable: true diff --git a/tutorcairn/patches/local-docker-compose-permissions-command b/tutorcairn/patches/local-docker-compose-permissions-command new file mode 100644 index 0000000..e2443a0 --- /dev/null +++ b/tutorcairn/patches/local-docker-compose-permissions-command @@ -0,0 +1,2 @@ +setowner 1000 /data/cairn-clickhouse +{% if CAIRN_RUN_POSTGRESQL %}setowner 70 /data/cairn-postgresql{% endif %} diff --git a/tutorcairn/patches/local-docker-compose-permissions-volumes b/tutorcairn/patches/local-docker-compose-permissions-volumes new file mode 100644 index 0000000..5a5f485 --- /dev/null +++ b/tutorcairn/patches/local-docker-compose-permissions-volumes @@ -0,0 +1,2 @@ +- ../../data/cairn/clickhouse:/data/cairn-clickhouse +{% if CAIRN_RUN_POSTGRESQL %}- ../../data/cairn/postgresql:/data/cairn-postgresql{% endif %} diff --git a/tutorcairn/patches/local-docker-compose-services b/tutorcairn/patches/local-docker-compose-services index 82200b9..a963b52 100644 --- a/tutorcairn/patches/local-docker-compose-services +++ b/tutorcairn/patches/local-docker-compose-services @@ -28,20 +28,17 @@ cairn-clickhouse: hard: 262144 restart: unless-stopped depends_on: - - cairn-clickhouse-permissions -cairn-clickhouse-permissions: - image: {{ DOCKER_IMAGE_PERMISSIONS }} - command: ["1000", "/data/clickhouse"] - restart: on-failure - volumes: - - ../../data/cairn/clickhouse:/data/clickhouse + - permissions {% endif %} cairn-superset: image: {{ CAIRN_SUPERSET_DOCKER_IMAGE }} volumes: - ../plugins/cairn/apps/superset/superset_config.py:/app/superset_config.py:ro - - ../plugins/cairn/apps/clickhouse/auth.json:/scripts/clickhouse-auth.json:ro + - ../plugins/cairn/apps/clickhouse/auth.json:/app/superset/cairn/clickhouse-auth.json:ro - ../plugins/cairn/apps/superset/bootstrap:/app/bootstrap:ro + {%- for mount in iter_mounts(MOUNTS, "cairn-superset") %} + - {{ mount }} + {%- endfor %} restart: unless-stopped depends_on: {% if RUN_REDIS %}- redis{% endif %} @@ -50,6 +47,9 @@ cairn-superset-worker: image: {{ CAIRN_SUPERSET_DOCKER_IMAGE }} volumes: - ../plugins/cairn/apps/superset/superset_config.py:/app/superset_config.py:ro + {%- for mount in iter_mounts(MOUNTS, "cairn-superset") %} + - {{ mount }} + {%- endfor %} command: celery --app=superset.tasks.celery_app:app worker -Ofair -l INFO restart: unless-stopped healthcheck: @@ -61,6 +61,9 @@ cairn-superset-worker-beat: image: {{ CAIRN_SUPERSET_DOCKER_IMAGE }} volumes: - ../plugins/cairn/apps/superset/superset_config.py:/app/superset_config.py:ro + {%- for mount in iter_mounts(MOUNTS, "cairn-superset") %} + - {{ mount }} + {%- endfor %} command: celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid -l INFO --schedule=/tmp/celerybeat-schedule restart: unless-stopped healthcheck: @@ -80,11 +83,5 @@ cairn-postgresql: restart: unless-stopped user: "70:70" depends_on: - - cairn-postgresql-permissions -cairn-postgresql-permissions: - image: {{ DOCKER_IMAGE_PERMISSIONS }} - command: ["70", "/data/postgresql"] - restart: on-failure - volumes: - - ../../data/cairn/postgresql:/data/postgresql + - permissions {% endif %} diff --git a/tutorcairn/plugin.py b/tutorcairn/plugin.py index 3e6e862..09c830b 100644 --- a/tutorcairn/plugin.py +++ b/tutorcairn/plugin.py @@ -19,6 +19,7 @@ ("CAIRN_CLICKHOUSE_PASSWORD", "{{ 20|random_string }}"), ("CAIRN_POSTGRESQL_PASSWORD", "{{ 20|random_string }}"), ("CAIRN_SUPERSET_SECRET_KEY", "{{ 20|random_string }}"), + ("CAIRN_SSO_CLIENT_SECRET", "{{ 20|random_string }}"), ] ) hooks.Filters.CONFIG_DEFAULTS.add_items( @@ -47,6 +48,9 @@ "{{ DOCKER_REGISTRY }}overhangio/cairn-superset:{{ CAIRN_VERSION }}", ), ("CAIRN_SUPERSET_LANGUAGE_CODE", "{{ LANGUAGE_CODE[:2] }}"), + # SSO + ("CAIRN_ENABLE_SSO", True), + ("CAIRN_SSO_CLIENT_ID", "cairn"), # Vector # https://hub.docker.com/r/timberio/vector/tags # https://github.com/vectordotdev/vector/releases @@ -106,6 +110,15 @@ ) +@hooks.Filters.APP_PUBLIC_HOSTS.add() +def _print_superset_host(hosts: list[str], context_name: t.Literal["local", "dev"]): + if context_name == "dev": + hosts.append("{{ CAIRN_HOST }}:2247") + else: + hosts.append("{{ CAIRN_HOST }}") + return hosts + + @click.command( name="cairn-createuser", help="Create a Cairn user, both in Clickhouse and Superset" ) @@ -118,27 +131,41 @@ @click.option( "-p", "--password", - help="Specify password from the command line. If undefined, you will be prompted to input a password", - prompt=True, + help="Specify password from the command line. If undefined, no password will be set. (Ignored with SSO)", + hide_input=True, +) +@click.option( + "-c", + "--course-id", + "course_ids", + help="Limit access to a selection of courses (Ignored with SSO).", + multiple=True, hide_input=True, ) @click.argument("username") @click.argument("email") def create_user_command( - bootstrap_dashboards: bool, admin: bool, password: str, username: str, email: str + bootstrap_dashboards: bool, admin: bool, password: str, course_ids: list[str], username: str, email: str ) -> t.Iterable[tuple[str, str]]: admin_opt = " --admin" if admin else "" - yield from [ - ("cairn-clickhouse", f"cairn createuser {username}"), - ( - "cairn-superset", - f"cairn createuser{admin_opt} --password {shlex.quote(password)} {username} {email}", - ), - ] + + # TODO can we now simplify the clickhouse image? + # - get rid of the cairn utility + # - remove the auth.json file + + create_superset_user = "python ./superset/cairn/ctl.py createuser" + if password: + create_superset_user += f" --password={shlex.quote(password)}" + for course_id in course_ids: + create_superset_user += f" --course-id={course_id}" + create_superset_user += f" {admin_opt} {username} {email}" + yield ("cairn-superset", create_superset_user) + + # Bootstrap dashboards if bootstrap_dashboards: yield ( "cairn-superset", - f"cairn bootstrap-dashboards {username} /app/bootstrap/courseoverview.json", + f"python ./superset/cairn/ctl.py bootstrap-dashboards {username} /app/bootstrap/courseoverview.json", ) diff --git a/tutorcairn/templates/cairn/apps/superset/superset_config.py b/tutorcairn/templates/cairn/apps/superset/superset_config.py index 75ce0b3..026cfe3 100644 --- a/tutorcairn/templates/cairn/apps/superset/superset_config.py +++ b/tutorcairn/templates/cairn/apps/superset/superset_config.py @@ -1,8 +1,14 @@ import logging +import os +import typing as t from cachelib.redis import RedisCache from celery.schedules import crontab +from superset.extensions import security_manager +from superset.cairn import bootstrap as cairn_bootstrap +from superset.cairn import sso as cairn_sso + # https://superset.apache.org/docs/installation/configuring-superset SECRET_KEY = "{{ CAIRN_SUPERSET_SECRET_KEY }}" SQLALCHEMY_DATABASE_URI = "postgresql+psycopg2://{{ CAIRN_POSTGRESQL_USERNAME }}:{{ CAIRN_POSTGRESQL_PASSWORD }}@cairn-postgresql/{{ CAIRN_POSTGRESQL_DATABASE }}" @@ -46,16 +52,24 @@ REDIS_CELERY_DB = {{ OPENEDX_CELERY_REDIS_DB + 2 }} REDIS_CACHE_DB = {{ OPENEDX_CACHE_REDIS_DB + 2 }} -# Charting data queried from datasets cache (optional) -DATA_CACHE_CONFIG = { +# Cache configuration +CACHE_CONFIG = { "CACHE_TYPE": "redis", - "CACHE_DEFAULT_TIMEOUT": 60 * 60 * 24, # 1 day default (in secs) + "CACHE_DEFAULT_TIMEOUT": 60 * 60 * 24 * 1, # 1 day default (in secs) "CACHE_KEY_PREFIX": "superset_data_cache", "CACHE_REDIS_URL": f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CACHE_DB}", } -# Metadata cache (optional) -CACHE_CONFIG = DATA_CACHE_CONFIG -# SQL Lab query results cache (optional) +DATA_CACHE_CONFIG = CACHE_CONFIG.copy() +FILTER_STATE_CACHE_CONFIG = CACHE_CONFIG.copy() +FILTER_STATE_CACHE_CONFIG.update({ + "CACHE_DEFAULT_TIMEOUT": 60 * 60 * 24 * 90, # 90 days + "REFRESH_TIMEOUT_ON_RETRIEVAL": True, +}) +EXPLORE_FORM_DATA_CACHE_CONFIG = CACHE_CONFIG.copy() +EXPLORE_FORM_DATA_CACHE_CONFIG.update({ + "CACHE_DEFAULT_TIMEOUT": 60 * 60 * 24 * 7, # 7 days + "REFRESH_TIMEOUT_ON_RETRIEVAL": True, +}) RESULTS_BACKEND = RedisCache( host=REDIS_HOST, port=REDIS_PORT, @@ -63,7 +77,40 @@ key_prefix="superset_results", ) -# TODO implement FILTER_STATE_CACHE_CONFIG and EXPLORE_FORM_DATA_CACHE_CONFIG such that we get rid of the warning messages +{% if CAIRN_ENABLE_SSO %} +# Authentication +# https://superset.apache.org/docs/installation/configuring-superset/#custom-oauth2-configuration +# https://flask-appbuilder.readthedocs.io/en/latest/security.html#authentication-oauth +from flask_appbuilder.security.manager import AUTH_OAUTH +AUTH_TYPE = AUTH_OAUTH +OPENEDX_LMS_ROOT_URL = "{% if ENABLE_HTTPS %}https{% else %}http{% endif %}://{{ LMS_HOST }}" +OPENEDX_SSO_CLIENT_ID = "{{ CAIRN_SSO_CLIENT_ID }}" +if os.environ.get("FLASK_ENV") == "development": + OPENEDX_LMS_ROOT_URL = "http://{{ LMS_HOST }}:8000" + OPENEDX_SSO_CLIENT_ID = "{{ CAIRN_SSO_CLIENT_ID }}-dev" +OAUTH_PROVIDERS = [ + { + "name": cairn_sso.OPENEDX_SSO_PROVIDER, + "token_key": "access_token", + "icon": "fa-right-to-bracket", + "remote_app": { + "client_id": OPENEDX_SSO_CLIENT_ID, + "client_secret": "{{ CAIRN_SSO_CLIENT_SECRET }}", + "client_kwargs": {"scope": "read"}, + "access_token_method": "POST", + "api_base_url": f"{OPENEDX_LMS_ROOT_URL}", + "access_token_url": f"{OPENEDX_LMS_ROOT_URL}/oauth2/access_token/", + "authorize_url": f"{OPENEDX_LMS_ROOT_URL}/oauth2/authorize/", + } + } +] +CUSTOM_SECURITY_MANAGER = cairn_sso.OpenEdxSsoSecurityManager +# Update roles on login: this will cause all roles (except those that are preserved) to +# be ovewritten. +AUTH_ROLES_SYNC_AT_LOGIN = True +# Login will create user +AUTH_USER_REGISTRATION = True +{% endif %} class CeleryConfig: # pylint: disable=too-few-public-methods BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/{REDIS_CELERY_DB}" @@ -101,16 +148,10 @@ class CeleryConfig: # pylint: disable=too-few-public-methods # Avoid duplicate logging because of propagation to root logger logging.getLogger("superset").propagate = False -# Enable dashboard embedding +# https://github.com/apache/superset/blob/master/RESOURCES/FEATURE_FLAGS.md FEATURE_FLAGS = { + # Enable dashboard embedding "EMBEDDED_SUPERSET": True } -# Enable some custom feature flags -# Do this once native filters are fully functional https://github.com/apache/superset/projects/15+ -# def get_cairn_feature_flags(flags): -# flags["DASHBOARD_NATIVE_FILTERS"] = True -# return flags -# GET_FEATURE_FLAGS_FUNC = get_cairn_feature_flags - {{ patch("cairn-superset-settings") }} diff --git a/tutorcairn/templates/cairn/build/cairn-clickhouse/scripts/cairn b/tutorcairn/templates/cairn/build/cairn-clickhouse/scripts/cairn index f854253..8bae9bf 100755 --- a/tutorcairn/templates/cairn/build/cairn-clickhouse/scripts/cairn +++ b/tutorcairn/templates/cairn/build/cairn-clickhouse/scripts/cairn @@ -6,7 +6,9 @@ import os import subprocess -with open(os.path.join(os.path.dirname(__file__), "clickhouse-auth.json")) as f: +with open( + os.path.join(os.path.dirname(__file__), "clickhouse-auth.json"), encoding="utf-8" +) as f: CLICKHOUSE_AUTH = json.load(f) @@ -18,23 +20,6 @@ def main(): parser_client = subparsers.add_parser("client") parser_client.set_defaults(func=command_client) - # Create user - parser_createuser = subparsers.add_parser("createuser") - parser_createuser.add_argument( - "-c", - "--course-id", - action="append", - help="Restrict user to access data only from these courses.", - ) - parser_createuser.add_argument( - "-o", - "--org-id", - action="append", - help="Restrict user to access data only from these organizations.", - ) - parser_createuser.add_argument("username") - parser_createuser.set_defaults(func=command_create_user) - # Apply migrations parser_migrate = subparsers.add_parser("migrate") parser_migrate.add_argument( @@ -64,35 +49,10 @@ def main(): parser.print_help() -def command_client(args): +def command_client(_args): subprocess.check_call(get_client_command()) -def command_create_user(args): - conditions = [] - course_ids = args.course_id or [] - org_ids = args.org_id or [] - for course_id in course_ids: - conditions.append(f"course_id = '{course_id}'") - for org_id in org_ids: - conditions.append(f"course_id LIKE 'course-v1:{org_id}+%'") - condition = " OR ".join(conditions) if conditions else "1" - username = args.username - # Note that the "CREATE TEMPORARY TABLE" grant is required to make use of "numbers()" functions. - run_query( - f"""CREATE USER IF NOT EXISTS {username}; -GRANT CREATE TEMPORARY TABLE ON *.* TO {username};""" - ) - # Find the list of tables to which the user should have access: all tables that do not start with "_" - tables = run_query("SHOW TABLES").strip().split("\n") - for table in tables: - if not table.startswith("_"): - query = f"""GRANT SELECT ON {table} TO {username}; -CREATE ROW POLICY OR REPLACE {username} ON {table} AS RESTRICTIVE FOR SELECT USING {condition} TO {username};""" - print(query) - run_query(query) - - def command_migrate(args): # Create database query = f"""CREATE DATABASE IF NOT EXISTS {CLICKHOUSE_AUTH["database"]}""" diff --git a/tutorcairn/templates/cairn/build/cairn-superset/Dockerfile b/tutorcairn/templates/cairn/build/cairn-superset/Dockerfile index 7bc1ee5..718b728 100644 --- a/tutorcairn/templates/cairn/build/cairn-superset/Dockerfile +++ b/tutorcairn/templates/cairn/build/cairn-superset/Dockerfile @@ -3,25 +3,28 @@ # https://github.com/apache/superset/releases # https://github.com/apache/superset/blob/master/Dockerfile # https://superset.apache.org/docs/databases/installing-database-drivers -FROM docker.io/apache/superset:2.0.0 +FROM docker.io/apache/superset:2.1.0 USER root # https://pypi.org/project/clickhouse-driver/ # https://pypi.org/project/mysqlclient/ +# https://pypi.org/project/clickhouse-connect/ # https://pypi.org/project/clickhouse-sqlalchemy/ -RUN pip install clickhouse-driver==0.2.4 mysqlclient==2.1.1 -# Later versions of clickhouse-sqlalchemy will not work. -# Note that this connector be replaced by clickhouse-connect in v2.0.1: -# https://github.com/apache/superset/pull/22039 -RUN pip install clickhouse-sqlalchemy==0.1.10 - -COPY --chown=superset:superset ./scripts /scripts -RUN chmod a+x /scripts/* -ENV PATH /scripts:${PATH} +# https://pypi.org/project/Authlib/ +# We preserve the clickhouse-sqlalchemy package to keep backward compatibility with existing dashboards +RUN {% if is_buildkit_enabled() %}--mount=type=cache,target=/root/.cache/pip,sharing=shared {% endif %}pip install \ + clickhouse-driver==0.2.6 \ + mysqlclient==2.1.1 \ + clickhouse-connect==0.5.24 \ + clickhouse-sqlalchemy==0.2.4 \ + authlib==1.2.0 USER superset +# Copy lib +COPY --chown=superset:superset ./cairn /app/superset/cairn + # This is required to have a proper healthcheck ENV SUPERSET_PORT=8000 diff --git a/tutorcairn/templates/cairn/build/cairn-clickhouse/scripts/clickhouse-auth.json b/tutorcairn/templates/cairn/build/cairn-superset/cairn/__init__.py old mode 100755 new mode 100644 similarity index 100% rename from tutorcairn/templates/cairn/build/cairn-clickhouse/scripts/clickhouse-auth.json rename to tutorcairn/templates/cairn/build/cairn-superset/cairn/__init__.py diff --git a/tutorcairn/templates/cairn/build/cairn-superset/cairn/bootstrap.py b/tutorcairn/templates/cairn/build/cairn-superset/cairn/bootstrap.py new file mode 100644 index 0000000..25ba62f --- /dev/null +++ b/tutorcairn/templates/cairn/build/cairn-superset/cairn/bootstrap.py @@ -0,0 +1,151 @@ +from functools import lru_cache +import json +import logging +import os + +import requests +from superset.extensions import db, security_manager +from superset.utils.database import get_or_create_db + +logger = logging.getLogger(__name__) + +# sql_lab is required in 2.1.0 for non-admin users to get access to sql queries +DEFAULT_ROLES = ["Gamma", "sql_lab"] + +def setup_user(username: str, course_ids=None) -> None: + """ + Create clickhouse DB, superset role, and superset DB associated to user. + This role will have access to the database with the same name. + """ + clickhouse_username = f"openedx-{username}" + superset_db = f"openedx-{username}" + superset_role = get_user_role_name(username) + + create_clickhouse_user(clickhouse_username) + grant_clickhouse_row_based_access(clickhouse_username, course_ids=course_ids) + create_superset_db(superset_db, clickhouse_username) + create_superset_db_role(superset_role, superset_db) + +def get_role_names(username: str) -> str: + """ + Return all the role names normally associated to a user. + """ + return DEFAULT_ROLES + [get_user_role_name(username)] + +def get_user_role_name(username: str) -> str: + """ + Return the user-specific role name associated to a user. + """ + return f"openedx-{username}" + +def create_superset_db(superset_database: str, clickhouse_username: str) -> None: + """ + Create a database object with the right Clickhouse URI: + + - user: clickhouse_username + - password: None + - host/port: clickhouse host/port + - database: database name + + User will be able to access the Clickhouse DB without any password, but should only + be granted access to the right rows. + """ + # https://superset.apache.org/docs/databases/clickhouse + auth = get_clickhouse_credentials() + uri = f"clickhousedb://{clickhouse_username}:@{auth['host']}:{auth['http_port']}/{auth['database']}" + logger.info("Creating Superset DB: %s", uri) + superset_db = get_or_create_db(superset_database, uri, always_create=True) + db.session.add(superset_db) + db.session.commit() + + +def create_superset_db_role(role_name: str, superset_database_name: str) -> None: + """ + Create a role that has basic access permissions for a certain database. + """ + + def check_permission(permission_view) -> bool: + """ + The list of all available permissions can be obtained from the admin role: + + print(security_manager.find_role("Admin").permissions) + """ + permission_name = str(permission_view) + if permission_name in [ + "can save on Datasource", + "can sql json on Superset", + "menu access on Datasets", + # Modify "see table schema" dropdown in sql lab + "can expanded on TableSchemaView", + "can delete on TableSchemaView", + "can post on TableSchemaView", + ]: + return True + if permission_name.startswith(f"database access on [{superset_database_name}]"): + return True + if permission_name.startswith(f"schema access on [{superset_database_name}]"): + return True + return False + + # Create or update role with the same name as the user + security_manager.set_role(role_name, check_permission) + + +def create_clickhouse_user(clickhouse_username): + """ + Create a password-less clickhouse user with access to Clickhouse. + """ + make_clickhouse_query(f"""CREATE USER IF NOT EXISTS '{clickhouse_username}';""") + make_clickhouse_query( + f"""GRANT CREATE TEMPORARY TABLE ON *.* TO '{clickhouse_username}';""" + ) + + +def grant_clickhouse_row_based_access(clickhouse_username, course_ids=None): + """ + Grant row-based access to a Clickhouse user based on a selection of course IDs. + + When the list of course IDs is None, grant access to all courses. + """ + if course_ids: + condition = " OR ".join( + [f"course_id = '{course_id}'" for course_id in course_ids] + ) + else: + condition = "1" + # Find the list of tables to which the user should have access: all tables that do not start with "_" + for table in make_clickhouse_query("SHOW TABLES").split("\n"): + if not table.startswith("_"): + make_clickhouse_query( + f"""GRANT SELECT ON {table} TO '{clickhouse_username}';""" + ) + make_clickhouse_query( + f"""CREATE ROW POLICY OR REPLACE '{clickhouse_username}' ON {table} AS RESTRICTIVE FOR SELECT USING {condition} TO '{clickhouse_username}';""" + ) + + +def make_clickhouse_query(query): + """ + Query Clickhouse by POSTing some content by http. + """ + logger.info("Running Clickhouse query: %s", query) + auth = get_clickhouse_credentials() + clickhouse_uri = f"{auth['http_scheme']}://{auth['username']}:{auth['password']}@{auth['host']}:{auth['http_port']}/?database={auth['database']}" + response = requests.post(clickhouse_uri, data=query.encode("utf8"), timeout=10) + if response.status_code != 200: + raise ValueError( + f"An error occurred while attempting to post a query: {response.content.decode()}" + ) + return response.content.decode("utf8").strip() + + +@lru_cache(maxsize=None) +def get_clickhouse_credentials(): + """ + Load the clickhouse credentials from file. + """ + with open( + os.path.join(os.path.dirname(__file__), "clickhouse-auth.json"), + encoding="utf-8", + ) as f: + return json.load(f) diff --git a/tutorcairn/templates/cairn/build/cairn-superset/scripts/cairn b/tutorcairn/templates/cairn/build/cairn-superset/cairn/ctl.py similarity index 76% rename from tutorcairn/templates/cairn/build/cairn-superset/scripts/cairn rename to tutorcairn/templates/cairn/build/cairn-superset/cairn/ctl.py index e800877..07c8832 100644 --- a/tutorcairn/templates/cairn/build/cairn-superset/scripts/cairn +++ b/tutorcairn/templates/cairn/build/cairn-superset/cairn/ctl.py @@ -1,9 +1,7 @@ #! /usr/bin/env python3 import argparse -from getpass import getpass import json -import os from time import time from superset.app import create_app @@ -16,9 +14,11 @@ from superset.models.slice import Slice from superset.extensions import db, security_manager import superset.dashboards.commands.importers.v0 as importers -from superset.utils.database import get_or_create_db from werkzeug.security import generate_password_hash +# Our convenient library +from superset.cairn import bootstrap as cairn_bootstrap + now = time() @@ -45,18 +45,10 @@ def main(): action="store_true", help=("Make the user an administrator."), ) - parser_user.add_argument( - "-r", - "--role", - help=( - "Name of the role to which the user should be assigned." - " Defaults to the username." - ), - ) parser_user.add_argument( "-p", "--password", - help="User password. If undefined, you will be prompted for one.", + help="User password.", ) parser_user.add_argument( "--firstname", default="", help="User first name (optional)." @@ -64,6 +56,12 @@ def main(): parser_user.add_argument( "--lastname", default="", help="User last name (optional)." ) + parser_user.add_argument( + "-c", + "--course-id", + action="append", + help="Restrict user to access data only from these courses.", + ) parser_user.add_argument("username") parser_user.add_argument("email") parser_user.set_defaults(func=bootstrap_user) @@ -95,30 +93,20 @@ def main(): def bootstrap_user(args): # Bootstrap database database_name = args.db or args.username - bootstrap_database(args.username, database_name) + cairn_bootstrap.create_superset_db(args.username, database_name) # Get or create user user = security_manager.find_user(args.username) if user: print(f"User '{args.username}' already exists. Skipping creation.") - if args.password: - print("Setting user password...") - user.password = generate_password_hash(args.password) - db.session.add(user) - db.session.commit() else: print(f"Creating user '{args.username}'...") - password = args.password - while not password: - password = getpass() - base_role_name = "Admin" if args.admin else "Gamma" user = security_manager.add_user( args.username, args.firstname, args.lastname, args.email, - security_manager.find_role(base_role_name), - password=password, + "Gamma", ) if user is None or user is False: # This may happen for instance when the email address is already associated @@ -127,52 +115,31 @@ def bootstrap_user(args): f"Failed to create user '{args.username}' email='{args.email}'" ) - # Associate role with the same name to user, if it exists - role_name = args.role or args.username - - def check_permission(permission_view): - permission_name = str(permission_view) - if permission_name in [ - "can save on Datasource", - "can sqllab on Superset", - "can sql json on Superset", - "menu access on Datasets", - "menu access on SQL Lab", - ]: - return True - if permission_name.startswith(f"database access on [{database_name}]"): - return True - if permission_name.startswith(f"schema access on [{database_name}]"): - return True - return False - - security_manager.set_role(role_name, check_permission) - role = security_manager.find_role(role_name) - if role in user.roles: - print(f"Role '{role_name}' is already associated to user.") - else: - print(f"Associating role '{role_name}' to user...") - user.roles.append(role) + # Set password + if args.password: + print("Setting user password...") + user.password = generate_password_hash(args.password) db.session.add(user) db.session.commit() - print("Done.") - -def bootstrap_database(username, database_name): - with open( - os.path.join(os.path.dirname(__file__), "clickhouse-auth.json"), - encoding="utf-8", - ) as f: - CLICKHOUSE_AUTH = json.load(f) - - host = CLICKHOUSE_AUTH["host"] - port = CLICKHOUSE_AUTH["port"] - database = CLICKHOUSE_AUTH["database"] - uri = f"clickhouse+native://{username}:@{host}:{port}/{database}" - database = get_or_create_db(database_name, uri, always_create=True) + # Create user role, clickhouse db, etc. + cairn_bootstrap.setup_user(args.username, course_ids=args.course_id) + + # Associate user to roles + user_roles = cairn_bootstrap.get_role_names(args.username) + if args.admin: + user_roles.append("Admin") + for role_name in user_roles: + role = security_manager.find_role(role_name) + if role in user.roles: + print(f"Role '{role_name}' is already associated to user.") + else: + print(f"Associating role '{role_name}' to user...") + user.roles.append(role) + db.session.add(user) + db.session.commit() - db.session.add(database) - db.session.commit() + print("Done.") # Note: we would like to start using superset's native export/import-dashboards command diff --git a/tutorcairn/templates/cairn/build/cairn-superset/cairn/sso.py b/tutorcairn/templates/cairn/build/cairn-superset/cairn/sso.py new file mode 100644 index 0000000..9c3011a --- /dev/null +++ b/tutorcairn/templates/cairn/build/cairn-superset/cairn/sso.py @@ -0,0 +1,99 @@ +import logging +import typing as t + +from flask import session +from superset.security import SupersetSecurityManager + +from . import bootstrap as cairn_bootstrap + +logger = logging.getLogger(__name__) + + +OPENEDX_SSO_PROVIDER = "openedx" + + +class OpenEdxSsoSecurityManager(SupersetSecurityManager): + def oauth_user_info(self, provider, response=None): + """ + Identify the user + """ + if provider == OPENEDX_SSO_PROVIDER: + try: + return self.get_user_info() + except Exception as e: + # Log exceptions, otherwise the stacktrace is swallowed by + # flask_appbuilder.security.views.AuthOAuthView.oauth_authorized + logger.exception(e) + raise + + def get_user_info(self): + """ + Make calls to the LMS API to fetch user information + http://local.overhang.io:8000/api-docs/#/user/user_v1_me_read + """ + username = self.get_lms_api("/api/user/v1/me")["username"] + account = self.get_lms_api(f"/api/user/v1/accounts/{username}") + + # Fetch list of courses in which user is staff + courses = [ + c["course_id"] + for c in self.get_lms_api( + f"/api/courses/v1/courses/?permissions=staff&username={username}" + )["results"] + ] + if not courses: + # User is not staff, entry is forbidden + return {} + + # Create role, db, clickhouse db associated to user + cairn_bootstrap.setup_user(username, course_ids=courses) + + # See flask_appbuilder.security.manager.BaseSecurityManager.auth_user_oauth for + # valid keys + return { + "name": account["name"], + "email": account["email"], + "id": username, + "username": username, + } + + def get_lms_api(self, endpoint): + """ + Make a call to the LMS API using the client app credentials. + """ + return ( + self.appbuilder.sm.oauth_remotes[OPENEDX_SSO_PROVIDER].get(endpoint).json() + ) + + def _oauth_calculate_user_roles(self, userinfo) -> t.List[str]: + """ + Override parent method to be able to create groups that match the user name. + + This is a bit hackish, but the cleanest solution we found. + """ + roles = [] + for name in cairn_bootstrap.get_role_names(userinfo["username"]): + role = self.find_role(name) + if role: + roles.append(role) + else: + logger.error("Could not find role: %s", name) + + # If user is already a member of one of these roles, preserve them. + if user := self.find_user(username=userinfo["username"]): + roles_to_preserve = ["Admin"] + for role_to_preserve in roles_to_preserve: + role = self.find_role(role_to_preserve) + if role in user.roles: + roles.append(role) + + return roles + + def set_oauth_session(self, provider, oauth_response): + """ + Store the oauth token in the session for later retrieval. + """ + super().set_oauth_session(provider, oauth_response) + + if provider == OPENEDX_SSO_PROVIDER: + session["oauth_token"] = oauth_response diff --git a/tutorcairn/templates/cairn/tasks/cairn-openedx/init b/tutorcairn/templates/cairn/tasks/cairn-openedx/init index 7e67094..cc39b66 100644 --- a/tutorcairn/templates/cairn/tasks/cairn-openedx/init +++ b/tutorcairn/templates/cairn/tasks/cairn-openedx/init @@ -1 +1,29 @@ +{% if CAIRN_ENABLE_SSO %} +# SSO user +./manage.py lms manage_user cairn cairn@openedx + +# Production app +./manage.py lms create_dot_application \ + --grant-type authorization-code \ + --redirect-uris "{% if ENABLE_HTTPS %}https{% else %}http{% endif %}://{{ CAIRN_HOST }}/oauth-authorized/openedx" \ + --client-id {{ CAIRN_SSO_CLIENT_ID }} \ + --client-secret {{ CAIRN_SSO_CLIENT_SECRET }} \ + --scopes "user_id" \ + --skip-authorization \ + --update \ + cairn-sso cairn + +# Development app +./manage.py lms create_dot_application \ + --grant-type authorization-code \ + --redirect-uris "http://{{ CAIRN_HOST }}:2247/oauth-authorized/openedx" \ + --client-id {{ CAIRN_SSO_CLIENT_ID }}-dev \ + --client-secret {{ CAIRN_SSO_CLIENT_SECRET }} \ + --scopes "user_id" \ + --skip-authorization \ + --update \ + cairn-sso-dev cairn +{% endif %} + +# Update course blocks data python /openedx/scripts/importcoursedata.py diff --git a/tutorcairn/templates/cairn/tasks/cairn-superset/init b/tutorcairn/templates/cairn/tasks/cairn-superset/init index 0d38e3c..9d34624 100644 --- a/tutorcairn/templates/cairn/tasks/cairn-superset/init +++ b/tutorcairn/templates/cairn/tasks/cairn-superset/init @@ -5,4 +5,4 @@ superset db upgrade superset init # Create/Update database with full access -superset set-database-uri --database-name={{ CAIRN_CLICKHOUSE_DATABASE }} --uri='clickhouse+native://{{ CAIRN_CLICKHOUSE_USERNAME }}:{{ CAIRN_CLICKHOUSE_PASSWORD }}@{{ CAIRN_CLICKHOUSE_HOST }}:{{ CAIRN_CLICKHOUSE_PORT }}/{{ CAIRN_CLICKHOUSE_DATABASE }}' +superset set-database-uri --database-name={{ CAIRN_CLICKHOUSE_DATABASE }} --uri='clickhousedb://{{ CAIRN_CLICKHOUSE_USERNAME }}:{{ CAIRN_CLICKHOUSE_PASSWORD }}@{{ CAIRN_CLICKHOUSE_HOST }}:{{ CAIRN_CLICKHOUSE_HTTP_PORT }}/{{ CAIRN_CLICKHOUSE_DATABASE }}'