From 39a2b21b5b4f992e8a01d6a63752601d93fc2224 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 26 Feb 2024 17:37:57 -0800 Subject: [PATCH 1/4] feat: docker-compose to work off repo Dockerfile Currently our docker-compose setup pulls images that have been built recently on the `master` branch. While this works in most cases, it's non-deterministic on not guaranteed to always work. For example if I merge a PR to master that removes a certain python library for instance, people in branches out there doing development that still have that dependencies are not going to work. In this PR, I change the docker-compose setup(s) to: - reference the local Dockerfile - point to the right cache location (apache/superset-cache:....) - make that DRY since it's repeated many times across the docker-compose files - touch up both docker-compose.yml and docker-compose-non-dev.yml with the same approach As far as testing goes, I made sure this builds and that the resulting setup is functional. I was also very fast in my experience, the cache was clearly leveraged here. --- docker-compose-non-dev.yml | 19 ++++++++++++++----- docker-compose.yml | 22 ++++++++++++++++------ 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/docker-compose-non-dev.yml b/docker-compose-non-dev.yml index 34aec9bbb7531..d5c142926a31e 100644 --- a/docker-compose-non-dev.yml +++ b/docker-compose-non-dev.yml @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -x-superset-image: &superset-image apachesuperset.docker.scarf.sh/apache/superset:${TAG:-latest} x-superset-depends-on: &superset-depends-on - db - redis @@ -23,6 +22,12 @@ x-superset-volumes: - ./docker:/app/docker - superset_home:/app/superset_home +x-common-build: &common-build + context: . + target: dev + cache_from: + - apache/superset-cache:3.9-slim-bookworm + version: "3.7" services: redis: @@ -43,7 +48,8 @@ services: superset: env_file: docker/.env-non-dev - image: *superset-image + build: + <<: *common-build container_name: superset_app command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"] user: "root" @@ -54,8 +60,9 @@ services: volumes: *superset-volumes superset-init: - image: *superset-image container_name: superset_init + build: + <<: *common-build command: ["/app/docker/docker-init.sh"] env_file: docker/.env-non-dev depends_on: *superset-depends-on @@ -65,7 +72,8 @@ services: disable: true superset-worker: - image: *superset-image + build: + <<: *common-build container_name: superset_worker command: ["/app/docker/docker-bootstrap.sh", "worker"] env_file: docker/.env-non-dev @@ -81,7 +89,8 @@ services: ] superset-worker-beat: - image: *superset-image + build: + <<: *common-build container_name: superset_worker_beat command: ["/app/docker/docker-bootstrap.sh", "beat"] env_file: docker/.env-non-dev diff --git a/docker-compose.yml b/docker-compose.yml index aba88707cafd0..0c2d53c775838 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -x-superset-image: &superset-image apachesuperset.docker.scarf.sh/apache/superset:${TAG:-master-dev} x-superset-user: &superset-user root x-superset-depends-on: &superset-depends-on - db @@ -27,6 +26,12 @@ x-superset-volumes: &superset-volumes - superset_home:/app/superset_home - ./tests:/app/tests +x-common-build: &common-build + context: . + target: dev + cache_from: + - apache/superset-cache:3.9-slim-bookworm + version: "3.7" services: nginx: @@ -61,7 +66,8 @@ services: superset: env_file: docker/.env - image: *superset-image + build: + <<: *common-build container_name: superset_app command: ["/app/docker/docker-bootstrap.sh", "app"] restart: unless-stopped @@ -106,7 +112,8 @@ services: - REDIS_SSL=false superset-init: - image: *superset-image + build: + <<: *common-build container_name: superset_init command: ["/app/docker/docker-init.sh"] env_file: docker/.env @@ -129,7 +136,8 @@ services: volumes: *superset-volumes superset-worker: - image: *superset-image + build: + <<: *common-build container_name: superset_worker command: ["/app/docker/docker-bootstrap.sh", "worker"] env_file: docker/.env @@ -146,7 +154,8 @@ services: # mem_reservation: 128M superset-worker-beat: - image: *superset-image + build: + <<: *common-build container_name: superset_worker_beat command: ["/app/docker/docker-bootstrap.sh", "beat"] env_file: docker/.env @@ -158,7 +167,8 @@ services: disable: true superset-tests-worker: - image: *superset-image + build: + <<: *common-build container_name: superset_tests_worker command: ["/app/docker/docker-bootstrap.sh", "worker"] env_file: docker/.env From ccdc40993e514f75913228304be20a8c12696ff0 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 8 Mar 2024 17:33:01 -0800 Subject: [PATCH 2/4] more tweaks + docs --- docker-compose-non-dev.yml | 2 +- docker-compose.yml | 10 +- docker/docker-frontend.sh | 18 +- ...stalling-superset-using-docker-compose.mdx | 185 +++++++++++------- 4 files changed, 137 insertions(+), 78 deletions(-) diff --git a/docker-compose-non-dev.yml b/docker-compose-non-dev.yml index d5c142926a31e..676bf59b53345 100644 --- a/docker-compose-non-dev.yml +++ b/docker-compose-non-dev.yml @@ -28,7 +28,7 @@ x-common-build: &common-build cache_from: - apache/superset-cache:3.9-slim-bookworm -version: "3.7" +version: "4.0" services: redis: image: redis:7 diff --git a/docker-compose.yml b/docker-compose.yml index 0c2d53c775838..23c040b2f6ec0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,7 +32,7 @@ x-common-build: &common-build cache_from: - apache/superset-cache:3.9-slim-bookworm -version: "3.7" +version: "4.0" services: nginx: image: nginx:latest @@ -127,12 +127,16 @@ services: superset-node: image: node:16 + environment: + # set this to false if you have perf issues running the npm i; npm run dev in-docker + # if you do so, you have to run this manually on the host, which should perform better! + BUILD_SUPERSET_FRONTEND_IN_DOCKER: ${BUILD_SUPERSET_FRONTEND_IN_DOCKER:-true} + SCARF_ANALYTICS: "${SCARF_ANALYTICS}" + PUPPETEER_SKIP_CHROMIUM_DOWNLOAD: ${BUILD_SUPERSET_FRONTEND_IN_DOCKER:-false} container_name: superset_node command: ["/app/docker/docker-frontend.sh"] env_file: docker/.env depends_on: *superset-depends-on - environment: - SCARF_ANALYTICS: "${SCARF_ANALYTICS}" volumes: *superset-volumes superset-worker: diff --git a/docker/docker-frontend.sh b/docker/docker-frontend.sh index a1ad94470ce5b..ba4ec0d2adc40 100755 --- a/docker/docker-frontend.sh +++ b/docker/docker-frontend.sh @@ -19,11 +19,17 @@ set -e # Packages needed for puppeteer: apt update -apt install -y chromium +if [ "$BUILD_SUPERSET_FRONTEND_IN_DOCKER" = "true" ]; then + apt install -y chromium +fi -cd /app/superset-frontend -npm install -f --no-optional --global webpack webpack-cli -npm install -f --no-optional +if [ "$BUILD_SUPERSET_FRONTEND_IN_DOCKER" = "true" ]; then + cd /app/superset-frontend + npm install -f --no-optional --global webpack webpack-cli + npm install -f --no-optional -echo "Running frontend" -npm run dev + echo "Running frontend" + npm run dev +else + echo "Skipping frontend build steps - YOU RUN IT MANUALLY ON THE HOST!" +fi diff --git a/docs/docs/installation/installing-superset-using-docker-compose.mdx b/docs/docs/installation/installing-superset-using-docker-compose.mdx index a13e49c094d74..74d26213ab2d1 100644 --- a/docs/docs/installation/installing-superset-using-docker-compose.mdx +++ b/docs/docs/installation/installing-superset-using-docker-compose.mdx @@ -5,12 +5,31 @@ sidebar_position: 1 version: 1 --- -## Installing Superset Locally Using Docker Compose +## Using Docker Compose The fastest way to try Superset locally is using Docker and Docker Compose on a Linux or Mac OSX computer. Superset does not have official support for Windows, so we have provided a VM workaround below. +It's **not** typical, nor recommended to use docker-compose to productionize an +application like Superset. docker-compose should be used for local development +or testing the app. + +Note that there are 3 major ways we support to run docker-compose: +1. **docker-compose.yml:** for interactive development, where we mount your local folder with the + frontend/backend files that you can edit and experience the changes you + make in the app in real time +1. **docker-compose-non-dev.yml** where we just build a more immutable image based on the + local branch and get all the required images running. Changes in the local branch + at the time you fire this up will be reflected, but changes to the code + while `up` won't be reflected in the app +1. **docker-compose-image-tag.yml** where we fetch an image from docker-hub say for the + `3.0.0` release for instance, and fire it up so you can try it. Here what's in + the local branch has no effects on what's running, we just fetch and run + pre-built images from docker-hub + +More on these two approaches after setting up the requirements for either. + ### 1. Install a Docker Engine and Docker Compose **Mac OSX** @@ -31,12 +50,13 @@ part of the base Docker installation on Linux, once you have a working engine, f **Windows** -Superset is not officially supported on Windows unfortunately. One option for Windows users to -try out Superset locally is to install an Ubuntu Desktop VM via +Superset is not officially supported on Windows unfortunately. One option for Windows users to try +out Superset locally is to install an Ubuntu Desktop VM via [VirtualBox](https://www.virtualbox.org/) and proceed with the Docker on Linux instructions inside of that VM. We recommend assigning at least 8GB of RAM to the virtual machine as well as provisioning a hard drive of at least 40GB, so that there will be enough space for both the OS and -all of the required dependencies. Docker Desktop [recently added support for Windows Subsystem for Linux (WSL) 2](https://docs.docker.com/docker-for-windows/wsl/), which may be another option. +all of the required dependencies. Docker Desktop [recently added support for Windows Subsystem for +Linux (WSL) 2](https://docs.docker.com/docker-for-windows/wsl/), which may be another option. ### 2. Clone Superset's GitHub repository @@ -58,76 +78,86 @@ Navigate to the folder you created in step 1: cd superset ``` -When working on master branch, run the following commands to run `development` mode using `docker compose`: -```bash -docker compose up -``` -:::tip -When running in development mode the `superset-node` container needs to finish building assets in order for the UI to render properly. If you would just like to try out Superset without making any code changes follow the steps documented for `production` or a specific version below. -::: +When working on master branch, run the following commands to run `development` mode using `docker +compose`: ```bash docker compose up ``` :::tip When running in development mode the `superset-node` +container needs to finish building assets in order for the UI to render properly. If you would just +like to try out Superset without making any code changes follow the steps documented for +`production` or a specific version below. ::: -When working on master branch, run the following commands to run `production` mode using `docker compose`: +When working on master branch, run the following commands to run `production` mode using `docker +compose`: -```bash -docker compose -f docker-compose-non-dev.yml pull -docker compose -f docker-compose-non-dev.yml up -``` +```bash docker compose -f docker-compose-non-dev.yml pull docker compose -f +docker-compose-non-dev.yml up ``` -Alternatively, you can also run a specific version of Superset by first checking out -the branch/tag, and then starting `docker compose` with the `TAG` variable. -For example, to run the 3.0.0 version, run the following commands on Linux-based systems: +Alternatively, you can also run a specific version of Superset by first checking out the branch/tag, +and then starting `docker compose` with the `TAG` variable. For example, to run the 3.0.0 version, +run the following commands on Linux-based systems: -```bash -git checkout 3.0.0 -TAG=3.0.0 docker compose -f docker-compose-non-dev.yml pull -TAG=3.0.0 docker compose -f docker-compose-non-dev.yml up -``` +```bash git checkout 3.0.0 TAG=3.0.0 docker compose -f docker-compose-non-dev.yml pull TAG=3.0.0 +docker compose -f docker-compose-non-dev.yml up ``` If you are using Docker Desktop for Windows then run the following commands: -```bash -git checkout 3.0.0 -set TAG=3.0.0 -docker compose -f docker-compose-non-dev.yml pull -docker compose -f docker-compose-non-dev.yml up -``` - -:::tip -Note that some configuration is mandatory for production instances of Superset. In particular, Superset will not start without a user-specified value of `SECRET_KEY` in a Superset config file or `SUPERSET_SECRET_KEY` as an [environment variable](https://github.com/apache/superset/blob/master/docker/.env-non-dev). Please see [Configuring Superset](/docs/installation/configuring-superset/) for more details. -::: -:::caution -All of the content belonging to a Superset instance - charts, dashboards, users, etc. - is stored in its metadata database. In production, this database should be backed up. -The default installation with docker compose will store that data in a PostgreSQL database contained in a Docker [volume](https://docs.docker.com/storage/volumes/), -which is not backed up. To avoid risking data loss, either use a managed database for your metadata (recommended) or perform your own regular backups by extracting -and storing the contents of the default PostgreSQL database from its volume (here's an -[example of how to dump and restore](https://stackoverflow.com/questions/24718706/backup-restore-a-dockerized-postgresql-database)). -::: -You should see a wall of logging output from the containers being launched on your machine. Once -this output slows, you should have a running instance of Superset on your local machine! To -avoid the wall of text on future runs, add the `-d` option to the end of the `docker compose up` command. +```bash git checkout 3.0.0 set TAG=3.0.0 docker compose -f docker-compose-non-dev.yml pull docker +compose -f docker-compose-non-dev.yml up ``` + +:::tip Note that some configuration is mandatory for production instances of Superset. In +particular, Superset will not start without a user-specified value of `SECRET_KEY` in a Superset +config file or `SUPERSET_SECRET_KEY` as an [environment +variable](https://github.com/apache/superset/blob/master/docker/.env-non-dev). Please see +[Configuring Superset](/docs/installation/configuring-superset/) for more details. ::: :::caution +All of the content belonging to a Superset instance - charts, dashboards, users, etc. - is stored in +its metadata database. In production, this database should be backed up. The default installation +with docker compose will store that data in a PostgreSQL database contained in a Docker +[volume](https://docs.docker.com/storage/volumes/), which is not backed up. To avoid risking data +loss, either use a managed database for your metadata (recommended) or perform your own regular +backups by extracting and storing the contents of the default PostgreSQL database from its volume +(here's an [example of how to dump and +restore](https://stackoverflow.com/questions/24718706/backup-restore-a-dockerized-postgresql-database)). +::: You should see a wall of logging output from the containers being launched on your machine. Once +this output slows, you should have a running instance of Superset on your local machine! To avoid +the wall of text on future runs, add the `-d` option to the end of the `docker compose up` command. #### Configuring Docker Compose -The following is for users who want to configure how Superset runs in Docker Compose; otherwise, you can skip to the next section. +The following is for users who want to configure how Superset runs in Docker Compose; otherwise, you +can skip to the next section. -You can install additional python packages and apply config overrides by following the steps mentioned in [docker/README.md](https://github.com/apache/superset/tree/master/docker#configuration) +You can install additional python packages and apply config overrides by following the steps +mentioned in [docker/README.md](https://github.com/apache/superset/tree/master/docker#configuration) -You can configure the Docker Compose environment variables for dev and non-dev mode with `docker/.env` and `docker/.env-non-dev` respectively. These environment files set the environment for most containers in the Docker Compose setup, and some variables affect multiple containers and others only single ones. +You can configure the Docker Compose environment variables for dev and non-dev mode with +`docker/.env` and `docker/.env-non-dev` respectively. These environment files set the environment +for most containers in the Docker Compose setup, and some variables affect multiple containers and + others only single ones. -One important variable is `SUPERSET_LOAD_EXAMPLES` which determines whether the `superset_init` container will populate example data and visualizations into the metadata database. These examples are helpful for learning and testing out Superset but unnecessary for experienced users and production deployments. The loading process can sometimes take a few minutes and a good amount of CPU, so you may want to disable it on a resource-constrained device. +One important variable is `SUPERSET_LOAD_EXAMPLES` which determines whether the `superset_init` +container will populate example data and visualizations into the metadata database. These examples +are helpful for learning and testing out Superset but unnecessary for experienced users and +production deployments. The loading process can sometimes take a few minutes and a good amount of +CPU, so you may want to disable it on a resource-constrained device. -:::note -Users often want to connect to other databases from Superset. Currently, the easiest way to do this is to modify the `docker-compose-non-dev.yml` file and add your database as a service that the other services depend on (via `x-superset-depends-on`). Others have attempted to set `network_mode: host` on the Superset services, but these generally break the installation, because the configuration requires use of the Docker Compose DNS resolver for the service names. If you have a good solution for this, let us know! -::: +:::note Users often want to connect to other databases from Superset. Currently, the easiest way to +do this is to modify the `docker-compose-non-dev.yml` file and add your database as a service that + the other services depend on (via `x-superset-depends-on`). Others have attempted to set + `network_mode: host` on the Superset services, but these generally break the installation, + because the configuration requires use of the Docker Compose DNS resolver for the service names. + If you have a good solution for this, let us know! ::: -:::note -Superset uses [Scarf Gateway](https://about.scarf.sh/scarf-gateway) to collect telemetry data. Knowing the installation counts for different Superset versions informs the project's decisions about patching and long-term support. Scarf purges personally identifiable information (PII) and provides only aggregated statistics. +:::note Superset uses [Scarf Gateway](https://about.scarf.sh/scarf-gateway) to collect telemetry +data. Knowing the installation counts for different Superset versions informs the project's +decisions about patching and long-term support. Scarf purges personally identifiable information +(PII) and provides only aggregated statistics. -To opt-out of this data collection for packages downloaded through the Scarf Gateway by your docker compose based installation, edit the `x-superset-image:` line in your `docker-compose.yml` and `docker-compose-non-dev.yml` files, replacing `apachesuperset.docker.scarf.sh/apache/superset` with `apache/superset` to pull the image directly from Docker Hub. +To opt-out of this data collection for packages downloaded through the Scarf Gateway by your docker +compose based installation, edit the `x-superset-image:` line in your `docker-compose.yml` and +`docker-compose-non-dev.yml` files, replacing `apachesuperset.docker.scarf.sh/apache/superset` with +`apache/superset` to pull the image directly from Docker Hub. -To disable the Scarf telemetry pixel, set the `SCARF_ANALYTICS` environment variable to `False` in your terminal and/or in your `docker/.env` and `docker/.env-non-dev` files. -::: +To disable the Scarf telemetry pixel, set the `SCARF_ANALYTICS` environment variable to `False` in +your terminal and/or in your `docker/.env` and `docker/.env-non-dev` files. ::: ### 4. Log in to Superset @@ -138,19 +168,38 @@ yours is one of them, please make sure it uses `http`. Log in with the default username and password: -```bash -username: admin -``` +```bash username: admin ``` -```bash -password: admin -``` +```bash password: admin ``` ### 5. Connecting Superset to your local database instance -When running Superset using `docker` or `docker compose` it runs in its own docker container, as if the Superset was running in a separate machine entirely. Therefore attempts to connect to your local database with the hostname `localhost` won't work as `localhost` refers to the docker container Superset is running in, and not your actual host machine. Fortunately, docker provides an easy way to access network resources in the host machine from inside a container, and we will leverage this capability to connect to our local database instance. - -Here the instructions are for connecting to postgresql (which is running on your host machine) from Superset (which is running in its docker container). Other databases may have slightly different configurations but gist would be same and boils down to 2 steps - - -1. **(Mac users may skip this step)** Configuring the local postgresql/database instance to accept public incoming connections. By default, postgresql only allows incoming connections from `localhost` and under Docker, unless you use `--network=host`, `localhost` will refer to different endpoints on the host machine and in a docker container respectively. Allowing postgresql to accept connections from the Docker involves making one-line changes to the files `postgresql.conf` and `pg_hba.conf`; you can find helpful links tailored to your OS / PG version on the web easily for this task. For Docker it suffices to only whitelist IPs `172.0.0.0/8` instead of `*`, but in any case you are _warned_ that doing this in a production database _may_ have disastrous consequences as you are opening your database to the public internet. -2. Instead of `localhost`, try using `host.docker.internal` (Mac users, Ubuntu) or `172.18.0.1` (Linux users) as the hostname when attempting to connect to the database. This is a Docker internal detail -- what is happening is that, in Mac systems, Docker Desktop creates a dns entry for the hostname `host.docker.internal` which resolves to the correct address for the host machine, whereas in Linux this is not the case (at least by default). If neither of these 2 hostnames work then you may want to find the exact hostname you want to use, for that you can do `ifconfig` or `ip addr show` and look at the IP address of `docker0` interface that must have been created by Docker for you. Alternately if you don't even see the `docker0` interface try (if needed with sudo) `docker network inspect bridge` and see if there is an entry for `"Gateway"` and note the IP address. +When running Superset using `docker` or `docker compose` it runs in its own docker container, as if +the Superset was running in a separate machine entirely. Therefore attempts to connect to your local +database with the hostname `localhost` won't work as `localhost` refers to the docker container +Superset is running in, and not your actual host machine. Fortunately, docker provides an easy way +to access network resources in the host machine from inside a container, and we will leverage this +capability to connect to our local database instance. + +Here the instructions are for connecting to postgresql (which is running on your host machine) from +Superset (which is running in its docker container). Other databases may have slightly different +configurations but gist would be same and boils down to 2 steps - + +1. **(Mac users may skip this step)** Configuring the local postgresql/database instance to accept +public incoming connections. By default, postgresql only allows incoming connections from +`localhost` and under Docker, unless you use `--network=host`, `localhost` will refer to different +endpoints on the host machine and in a docker container respectively. Allowing postgresql to accept +connections from the Docker involves making one-line changes to the files `postgresql.conf` and +`pg_hba.conf`; you can find helpful links tailored to your OS / PG version on the web easily for +this task. For Docker it suffices to only whitelist IPs `172.0.0.0/8` instead of `*`, but in any +case you are _warned_ that doing this in a production database _may_ have disastrous consequences as +you are opening your database to the public internet. 2. Instead of `localhost`, try using +`host.docker.internal` (Mac users, Ubuntu) or `172.18.0.1` (Linux users) as the hostname when +attempting to connect to the database. This is a Docker internal detail -- what is happening is +that, in Mac systems, Docker Desktop creates a dns entry for the hostname `host.docker.internal` +which resolves to the correct address for the host machine, whereas in Linux this is not the case +(at least by default). If neither of these 2 hostnames work then you may want to find the exact +hostname you want to use, for that you can do `ifconfig` or `ip addr show` and look at the IP +address of `docker0` interface that must have been created by Docker for you. Alternately if you +don't even see the `docker0` interface try (if needed with sudo) `docker network inspect bridge` and +see if there is an entry for `"Gateway"` and note the IP address. From 5cf9f2301c57b3241d92c433dade1705a8955b2a Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 8 Mar 2024 17:35:07 -0800 Subject: [PATCH 3/4] more tweaks + docs --- docker-compose-image-tag.yml | 101 +++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 docker-compose-image-tag.yml diff --git a/docker-compose-image-tag.yml b/docker-compose-image-tag.yml new file mode 100644 index 0000000000000..34aec9bbb7531 --- /dev/null +++ b/docker-compose-image-tag.yml @@ -0,0 +1,101 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +x-superset-image: &superset-image apachesuperset.docker.scarf.sh/apache/superset:${TAG:-latest} +x-superset-depends-on: &superset-depends-on + - db + - redis +x-superset-volumes: + &superset-volumes # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container + - ./docker:/app/docker + - superset_home:/app/superset_home + +version: "3.7" +services: + redis: + image: redis:7 + container_name: superset_cache + restart: unless-stopped + volumes: + - redis:/data + + db: + env_file: docker/.env-non-dev + image: postgres:15 + container_name: superset_db + restart: unless-stopped + volumes: + - db_home:/var/lib/postgresql/data + - ./docker/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d + + superset: + env_file: docker/.env-non-dev + image: *superset-image + container_name: superset_app + command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"] + user: "root" + restart: unless-stopped + ports: + - 8088:8088 + depends_on: *superset-depends-on + volumes: *superset-volumes + + superset-init: + image: *superset-image + container_name: superset_init + command: ["/app/docker/docker-init.sh"] + env_file: docker/.env-non-dev + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + healthcheck: + disable: true + + superset-worker: + image: *superset-image + container_name: superset_worker + command: ["/app/docker/docker-bootstrap.sh", "worker"] + env_file: docker/.env-non-dev + restart: unless-stopped + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + healthcheck: + test: + [ + "CMD-SHELL", + "celery -A superset.tasks.celery_app:app inspect ping -d celery@$$HOSTNAME", + ] + + superset-worker-beat: + image: *superset-image + container_name: superset_worker_beat + command: ["/app/docker/docker-bootstrap.sh", "beat"] + env_file: docker/.env-non-dev + restart: unless-stopped + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + healthcheck: + disable: true + +volumes: + superset_home: + external: false + db_home: + external: false + redis: + external: false From 08b7f98b5b2ef2b5a9ebca2b1c76f3e6b8659520 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 8 Mar 2024 18:54:02 -0800 Subject: [PATCH 4/4] removing env-non-dev --- UPDATING.md | 5 + docker-compose-image-tag.yml | 10 +- docker-compose-non-dev.yml | 10 +- docker/.env | 2 + docker/.env-non-dev | 53 ---------- docker/docker-frontend.sh | 2 +- docs/docs/frequently-asked-questions.mdx | 2 +- ...stalling-superset-using-docker-compose.mdx | 98 ++++++++++++------- 8 files changed, 80 insertions(+), 102 deletions(-) delete mode 100644 docker/.env-non-dev diff --git a/UPDATING.md b/UPDATING.md index 4dd68340bcdb4..c91de826bda15 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -28,6 +28,11 @@ assists people when migrating to a new version. - [26450](https://github.com/apache/superset/pull/26450): Deprecates the `KV_STORE` feature flag and its related assets such as the API endpoint and `keyvalue` table. The main dependency of this feature is the `SHARE_QUERIES_VIA_KV_STORE` feature flag which allows sharing SQL Lab queries without the necessity of saving the query. Our intention is to use the permalink feature to implement this use case before 5.0 and that's why we are deprecating the feature flag now. +- [27434](https://github.com/apache/superset/pull/27434/files): DO NOT USE our docker-compose.* + files for production use cases! While we never really supported + or should have tried to support docker-compose for production use cases, we now actively + have taken a stance against supporting it. See the PR for details. + ### Breaking Changes - [27130](https://github.com/apache/superset/pull/27130): Fixes the DELETE `/database/{id}/ssh_tunnel/`` endpoint to now correctly accept a database ID as a parameter, rather than an SSH tunnel ID. diff --git a/docker-compose-image-tag.yml b/docker-compose-image-tag.yml index 34aec9bbb7531..07f0d0dcb14b7 100644 --- a/docker-compose-image-tag.yml +++ b/docker-compose-image-tag.yml @@ -33,7 +33,7 @@ services: - redis:/data db: - env_file: docker/.env-non-dev + env_file: docker/.env image: postgres:15 container_name: superset_db restart: unless-stopped @@ -42,7 +42,7 @@ services: - ./docker/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d superset: - env_file: docker/.env-non-dev + env_file: docker/.env image: *superset-image container_name: superset_app command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"] @@ -57,7 +57,7 @@ services: image: *superset-image container_name: superset_init command: ["/app/docker/docker-init.sh"] - env_file: docker/.env-non-dev + env_file: docker/.env depends_on: *superset-depends-on user: "root" volumes: *superset-volumes @@ -68,7 +68,7 @@ services: image: *superset-image container_name: superset_worker command: ["/app/docker/docker-bootstrap.sh", "worker"] - env_file: docker/.env-non-dev + env_file: docker/.env restart: unless-stopped depends_on: *superset-depends-on user: "root" @@ -84,7 +84,7 @@ services: image: *superset-image container_name: superset_worker_beat command: ["/app/docker/docker-bootstrap.sh", "beat"] - env_file: docker/.env-non-dev + env_file: docker/.env restart: unless-stopped depends_on: *superset-depends-on user: "root" diff --git a/docker-compose-non-dev.yml b/docker-compose-non-dev.yml index 676bf59b53345..b49d070118bcb 100644 --- a/docker-compose-non-dev.yml +++ b/docker-compose-non-dev.yml @@ -38,7 +38,7 @@ services: - redis:/data db: - env_file: docker/.env-non-dev + env_file: docker/.env image: postgres:15 container_name: superset_db restart: unless-stopped @@ -47,7 +47,7 @@ services: - ./docker/docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d superset: - env_file: docker/.env-non-dev + env_file: docker/.env build: <<: *common-build container_name: superset_app @@ -64,7 +64,7 @@ services: build: <<: *common-build command: ["/app/docker/docker-init.sh"] - env_file: docker/.env-non-dev + env_file: docker/.env depends_on: *superset-depends-on user: "root" volumes: *superset-volumes @@ -76,7 +76,7 @@ services: <<: *common-build container_name: superset_worker command: ["/app/docker/docker-bootstrap.sh", "worker"] - env_file: docker/.env-non-dev + env_file: docker/.env restart: unless-stopped depends_on: *superset-depends-on user: "root" @@ -93,7 +93,7 @@ services: <<: *common-build container_name: superset_worker_beat command: ["/app/docker/docker-bootstrap.sh", "beat"] - env_file: docker/.env-non-dev + env_file: docker/.env restart: unless-stopped depends_on: *superset-depends-on user: "root" diff --git a/docker/.env b/docker/.env index 25bdac0ab7fa0..1b7d3df8c195c 100644 --- a/docker/.env +++ b/docker/.env @@ -51,3 +51,5 @@ SUPERSET_LOAD_EXAMPLES=yes CYPRESS_CONFIG=false SUPERSET_PORT=8088 MAPBOX_API_KEY='' + +SUPERSET_SECRET_KEY=TEST_NON_DEV_SECRET diff --git a/docker/.env-non-dev b/docker/.env-non-dev deleted file mode 100644 index a86ddbd193f02..0000000000000 --- a/docker/.env-non-dev +++ /dev/null @@ -1,53 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -COMPOSE_PROJECT_NAME=superset - -# database configurations (do not modify) -DATABASE_DB=superset -DATABASE_HOST=db -DATABASE_PASSWORD=superset -DATABASE_USER=superset -DATABASE_PORT=5432 -DATABASE_DIALECT=postgresql - -EXAMPLES_DB=examples -EXAMPLES_HOST=db -EXAMPLES_USER=examples -EXAMPLES_PASSWORD=examples -EXAMPLES_PORT=5432 - -# database engine specific environment variables -# change the below if you prefer another database engine -POSTGRES_DB=superset -POSTGRES_USER=superset -POSTGRES_PASSWORD=superset -#MYSQL_DATABASE=superset -#MYSQL_USER=superset -#MYSQL_PASSWORD=superset -#MYSQL_RANDOM_ROOT_PASSWORD=yes - -# Add the mapped in /app/pythonpath_docker which allows devs to override stuff -PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev -REDIS_HOST=redis -REDIS_PORT=6379 - -SUPERSET_ENV=production -SUPERSET_LOAD_EXAMPLES=yes -SUPERSET_SECRET_KEY=TEST_NON_DEV_SECRET -CYPRESS_CONFIG=false -SUPERSET_PORT=8088 -MAPBOX_API_KEY='' diff --git a/docker/docker-frontend.sh b/docker/docker-frontend.sh index ba4ec0d2adc40..85c57cbf0fc88 100755 --- a/docker/docker-frontend.sh +++ b/docker/docker-frontend.sh @@ -19,7 +19,7 @@ set -e # Packages needed for puppeteer: apt update -if [ "$BUILD_SUPERSET_FRONTEND_IN_DOCKER" = "true" ]; then +if [ "$PUPPETEER_SKIP_CHROMIUM_DOWNLOAD" = "false" ]; then apt install -y chromium fi diff --git a/docs/docs/frequently-asked-questions.mdx b/docs/docs/frequently-asked-questions.mdx index 8e42d062a24fe..e848bc07a3e55 100644 --- a/docs/docs/frequently-asked-questions.mdx +++ b/docs/docs/frequently-asked-questions.mdx @@ -89,7 +89,7 @@ SUPERSET_WEBSERVER_TIMEOUT = 60 ### Why is the map not visible in the geospatial visualization? You need to register a free account at [Mapbox.com](https://www.mapbox.com), obtain an API key, and add it -to **.env** and **.env-non-dev** at the key MAPBOX_API_KEY: +to **.env** at the key MAPBOX_API_KEY: ``` MAPBOX_API_KEY = "longstringofalphanumer1c" diff --git a/docs/docs/installation/installing-superset-using-docker-compose.mdx b/docs/docs/installation/installing-superset-using-docker-compose.mdx index 74d26213ab2d1..d40ce649c4fae 100644 --- a/docs/docs/installation/installing-superset-using-docker-compose.mdx +++ b/docs/docs/installation/installing-superset-using-docker-compose.mdx @@ -15,6 +15,8 @@ It's **not** typical, nor recommended to use docker-compose to productionize an application like Superset. docker-compose should be used for local development or testing the app. +**DO NOT USE THIS FOR PRODUCTION!** + Note that there are 3 major ways we support to run docker-compose: 1. **docker-compose.yml:** for interactive development, where we mount your local folder with the frontend/backend files that you can edit and experience the changes you @@ -72,54 +74,68 @@ current directory. ### 3. Launch Superset Through Docker Compose -Navigate to the folder you created in step 1: +First let's assume you're familiar with docker-compose mechanics. Here we'll refer generally +to `docker compose up` even though in some cases you may want to force a check for newer remote +images using `docker compose pull`, force a build with `docker compose build` or force a build +on latest base images using `docker compose build --pull`. In most cases though, the simple +`up` command should do just fine. Refer to docker compose docs for more information on the topic. + +### Option #1 - for an interactive development environment ```bash -cd superset +docker compose up ``` -When working on master branch, run the following commands to run `development` mode using `docker -compose`: ```bash docker compose up ``` :::tip When running in development mode the `superset-node` +:::tip +When running in development mode the `superset-node` container needs to finish building assets in order for the UI to render properly. If you would just like to try out Superset without making any code changes follow the steps documented for -`production` or a specific version below. ::: +`production` or a specific version below. +::: -When working on master branch, run the following commands to run `production` mode using `docker -compose`: +:::tip +By default, we mount the local superset-frontend folder here and run `npm install` as well +as `npm run dev` which triggers webpack to compile/bundle the frontend code. Depending +on your local setup, especially if you have less than 16GB of memory, it may be very slow to +perform those operations. In this case, we recommend you set the env var +`BUILD_SUPERSET_FRONTEND_IN_DOCKER` to `false`, and to run this locally instead in a terminal. +Simply trigger `npm i && npm run dev`, this should be MUCH faster. +::: -```bash docker compose -f docker-compose-non-dev.yml pull docker compose -f -docker-compose-non-dev.yml up ``` +### Option #2 - build an immutable image from the local branch -Alternatively, you can also run a specific version of Superset by first checking out the branch/tag, -and then starting `docker compose` with the `TAG` variable. For example, to run the 3.0.0 version, -run the following commands on Linux-based systems: +```bash +docker compose -f docker-compose-non-dev.yml up +``` -```bash git checkout 3.0.0 TAG=3.0.0 docker compose -f docker-compose-non-dev.yml pull TAG=3.0.0 -docker compose -f docker-compose-non-dev.yml up ``` +### Option #3 - pull and build a release image from docker-hub + +```bash +export TAG=3.1.1 +docker compose -f docker-compose-image-tag.yml up +``` -If you are using Docker Desktop for Windows then run the following commands: +Here various release tags, github SHA, and latest `master` can be referenced by the TAG env var. +Refer to the docker-related documentation to learn more about existing tags you can point to +from Docker Hub. -```bash git checkout 3.0.0 set TAG=3.0.0 docker compose -f docker-compose-non-dev.yml pull docker -compose -f docker-compose-non-dev.yml up ``` +## General tips & configuration -:::tip Note that some configuration is mandatory for production instances of Superset. In -particular, Superset will not start without a user-specified value of `SECRET_KEY` in a Superset -config file or `SUPERSET_SECRET_KEY` as an [environment -variable](https://github.com/apache/superset/blob/master/docker/.env-non-dev). Please see -[Configuring Superset](/docs/installation/configuring-superset/) for more details. ::: :::caution +:::caution All of the content belonging to a Superset instance - charts, dashboards, users, etc. - is stored in its metadata database. In production, this database should be backed up. The default installation with docker compose will store that data in a PostgreSQL database contained in a Docker -[volume](https://docs.docker.com/storage/volumes/), which is not backed up. To avoid risking data -loss, either use a managed database for your metadata (recommended) or perform your own regular -backups by extracting and storing the contents of the default PostgreSQL database from its volume -(here's an [example of how to dump and -restore](https://stackoverflow.com/questions/24718706/backup-restore-a-dockerized-postgresql-database)). -::: You should see a wall of logging output from the containers being launched on your machine. Once +[volume](https://docs.docker.com/storage/volumes/), which is not backed up. + +Again **DO NOT USE THIS FOR PRODUCTION** + +::: + +You should see a wall of logging output from the containers being launched on your machine. Once this output slows, you should have a running instance of Superset on your local machine! To avoid the wall of text on future runs, add the `-d` option to the end of the `docker compose up` command. -#### Configuring Docker Compose +#### Configuring Further The following is for users who want to configure how Superset runs in Docker Compose; otherwise, you can skip to the next section. @@ -128,9 +144,9 @@ You can install additional python packages and apply config overrides by followi mentioned in [docker/README.md](https://github.com/apache/superset/tree/master/docker#configuration) You can configure the Docker Compose environment variables for dev and non-dev mode with -`docker/.env` and `docker/.env-non-dev` respectively. These environment files set the environment +`docker/.env`. This environment file sets the environment for most containers in the Docker Compose setup, and some variables affect multiple containers and - others only single ones. +others only single ones. One important variable is `SUPERSET_LOAD_EXAMPLES` which determines whether the `superset_init` container will populate example data and visualizations into the metadata database. These examples @@ -139,14 +155,17 @@ production deployments. The loading process can sometimes take a few minutes and CPU, so you may want to disable it on a resource-constrained device. -:::note Users often want to connect to other databases from Superset. Currently, the easiest way to +:::note +Users often want to connect to other databases from Superset. Currently, the easiest way to do this is to modify the `docker-compose-non-dev.yml` file and add your database as a service that the other services depend on (via `x-superset-depends-on`). Others have attempted to set `network_mode: host` on the Superset services, but these generally break the installation, because the configuration requires use of the Docker Compose DNS resolver for the service names. - If you have a good solution for this, let us know! ::: + If you have a good solution for this, let us know! +::: -:::note Superset uses [Scarf Gateway](https://about.scarf.sh/scarf-gateway) to collect telemetry +:::note +Superset uses [Scarf Gateway](https://about.scarf.sh/scarf-gateway) to collect telemetry data. Knowing the installation counts for different Superset versions informs the project's decisions about patching and long-term support. Scarf purges personally identifiable information (PII) and provides only aggregated statistics. @@ -157,7 +176,8 @@ compose based installation, edit the `x-superset-image:` line in your `docker-co `apache/superset` to pull the image directly from Docker Hub. To disable the Scarf telemetry pixel, set the `SCARF_ANALYTICS` environment variable to `False` in -your terminal and/or in your `docker/.env` and `docker/.env-non-dev` files. ::: +your terminal and/or in your `docker/.env` file. +::: ### 4. Log in to Superset @@ -168,9 +188,13 @@ yours is one of them, please make sure it uses `http`. Log in with the default username and password: -```bash username: admin ``` +```bash +username: admin +``` -```bash password: admin ``` +```bash +password: admin +``` ### 5. Connecting Superset to your local database instance