From 29037522ad0f1b25b20e323c242b6d8ade87bed9 Mon Sep 17 00:00:00 2001 From: Daniel Vaz Gaspar Date: Tue, 11 Jan 2022 15:13:04 +0000 Subject: [PATCH] fix: Change default SECRET_KEY, improve docs and banner warning (#17984) * fix: Change default SECRET_KEY, improve docs and banner warning on default * lint * Update superset/initialization/__init__.py Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> * add a secret migration procedure, update UPDATING * fix lint Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com> --- UPDATING.md | 12 +- .../pages/docs/installation/configuring.mdx | 5 +- superset/cli.py | 29 +++++ superset/config.py | 7 +- superset/constants.py | 1 + superset/initialization/__init__.py | 16 +++ superset/utils/encrypt.py | 121 +++++++++++++++++- 7 files changed, 173 insertions(+), 18 deletions(-) diff --git a/UPDATING.md b/UPDATING.md index 5a96f86f4d5fd..0f8b9a4742032 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -22,20 +22,10 @@ under the License. This file documents any backwards-incompatible changes in Superset and assists people when migrating to a new version. -## Next - -### Breaking Changes - -### Potential Downtime - -### Deprecations - -### Other - ## 1.4.0 ### Breaking Changes - +- [17984](https://github.com/apache/superset/pull/17984): Default Flask SECRET_KEY has changed for security reasons. You should always override with your own secret. Set `PREVIOUS_SECRET_KEY` (ex: PREVIOUS_SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h") with your previous key and use `superset re-encrypt-secrets` to rotate you current secrets - [16660](https://github.com/apache/superset/pull/16660): The `columns` Jinja parameter has been renamed `table_columns` to make the `columns` query object parameter available in the Jinja context. - [16711](https://github.com/apache/superset/pull/16711): The `url_param` Jinja function will now by default escape the result. For instance, the value `O'Brien` will now be changed to `O''Brien`. To disable this behavior, call `url_param` with `escape_result` set to `False`: `url_param("my_key", "my default", escape_result=False)`. diff --git a/docs/src/pages/docs/installation/configuring.mdx b/docs/src/pages/docs/installation/configuring.mdx index 922f97d7a2d4b..f5ae5d2d12f8b 100644 --- a/docs/src/pages/docs/installation/configuring.mdx +++ b/docs/src/pages/docs/installation/configuring.mdx @@ -21,7 +21,7 @@ SUPERSET_WEBSERVER_PORT = 8088 # Flask App Builder configuration # Your App secret key -SECRET_KEY = '\2\1thisismyscretkey\1\2\e\y\y\h' +SECRET_KEY = 'USE_YOUR_OWN_SECURE_RANDOM_KEY' # The SQLAlchemy connection string to your database backend # This connection defines the path to the database that stores your @@ -56,7 +56,8 @@ for more information on how to configure it. Make sure to change: - `SQLALCHEMY_DATABASE_URI`: by default it is stored at ~/.superset/superset.db -- `SECRET_KEY`: to a long random string +- `SECRET_KEY`: Use a strong complex alphanumeric string and use a tool + to help you generate a sufficiently random sequence, ex: openssl rand -base64 42" If you need to exempt endpoints from CSRF (e.g. if you are running a custom auth postback endpoint), you can add the endpoints to `WTF_CSRF_EXEMPT_LIST`: diff --git a/superset/cli.py b/superset/cli.py index 55a2c24311494..bbdafb3dedd95 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -41,6 +41,7 @@ from superset.extensions import celery_app, db from superset.utils import core as utils from superset.utils.celery import session_scope +from superset.utils.encrypt import SecretsMigrator from superset.utils.urls import get_url_path logger = logging.getLogger(__name__) @@ -873,3 +874,31 @@ def update_api_docs() -> None: json.dump(api_spec.to_dict(), outfile, sort_keys=True, indent=2) else: click.secho("API version not found", err=True) + + +@superset.command() +@with_appcontext +@click.option( + "--previous_secret_key", + "-a", + required=False, + help="An optional previous secret key, if PREVIOUS_SECRET_KEY " + "is not set on the config", +) +def re_encrypt_secrets(previous_secret_key: Optional[str] = None) -> None: + previous_secret_key = previous_secret_key or current_app.config.get( + "PREVIOUS_SECRET_KEY" + ) + if previous_secret_key is None: + click.secho("A previous secret key must be provided", err=True) + sys.exit(1) + secrets_migrator = SecretsMigrator(previous_secret_key=previous_secret_key) + try: + secrets_migrator.run() + except ValueError as exc: + click.secho( + f"An error occurred, " + f"probably an invalid previoud secret key was provided. Error:[{exc}]", + err=True, + ) + sys.exit(1) diff --git a/superset/config.py b/superset/config.py index 7f504582c5920..a060416fe302c 100644 --- a/superset/config.py +++ b/superset/config.py @@ -40,6 +40,7 @@ from pandas.io.parsers import STR_NA_VALUES from werkzeug.local import LocalProxy +from superset.constants import CHANGE_ME_SECRET_KEY from superset.jinja_context import BaseTemplateProcessor from superset.stats_logger import DummyStatsLogger from superset.typing import CacheConfig @@ -153,8 +154,10 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]: SQLALCHEMY_TRACK_MODIFICATIONS = False # --------------------------------------------------------- -# Your App secret key -SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h" +# Your App secret key. Make sure you override it on superset_config.py. +# Use a strong complex alphanumeric string and use a tool to help you generate +# a sufficiently random sequence, ex: openssl rand -base64 42" +SECRET_KEY = CHANGE_ME_SECRET_KEY # The SQLAlchemy connection string. SQLALCHEMY_DATABASE_URI = "sqlite:///" + os.path.join(DATA_DIR, "superset.db") diff --git a/superset/constants.py b/superset/constants.py index 6ea189bd81059..c08b8ac69c5c4 100644 --- a/superset/constants.py +++ b/superset/constants.py @@ -22,6 +22,7 @@ NULL_STRING = "" +CHANGE_ME_SECRET_KEY = "CHANGE_ME_TO_A_COMPLEX_RANDOM_SECRET" # UUID for the examples database EXAMPLES_DB_UUID = "a2dc77af-e654-49bb-b321-40f6b559a1ee" diff --git a/superset/initialization/__init__.py b/superset/initialization/__init__.py index b8a1098cf0b9d..cfea38f8be3e8 100644 --- a/superset/initialization/__init__.py +++ b/superset/initialization/__init__.py @@ -29,6 +29,7 @@ from werkzeug.middleware.proxy_fix import ProxyFix from superset.connectors.connector_registry import ConnectorRegistry +from superset.constants import CHANGE_ME_SECRET_KEY from superset.extensions import ( _event_logger, APP_DIR, @@ -568,12 +569,27 @@ def init_app_in_ctx(self) -> None: self.init_views() + def check_secret_key(self) -> None: + if self.config["SECRET_KEY"] == CHANGE_ME_SECRET_KEY: + top_banner = 80 * "-" + "\n" + 36 * " " + "WARNING\n" + 80 * "-" + bottom_banner = 80 * "-" + "\n" + 80 * "-" + logger.warning(top_banner) + logger.warning( + "A Default SECRET_KEY was detected, please use superset_config.py " + "to override it.\n" + "Use a strong complex alphanumeric string and use a tool to help" + " you generate \n" + "a sufficiently random sequence, ex: openssl rand -base64 42" + ) + logger.warning(bottom_banner) + def init_app(self) -> None: """ Main entry point which will delegate to other methods in order to fully init the app """ self.pre_init() + self.check_secret_key() # Configuration of logging must be done first to apply the formatter properly self.configure_logging() # Configuration of feature_flags must be done first to allow init features diff --git a/superset/utils/encrypt.py b/superset/utils/encrypt.py index 240ca5aa8bf91..0fa2a6d177a06 100644 --- a/superset/utils/encrypt.py +++ b/superset/utils/encrypt.py @@ -14,13 +14,17 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import logging from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional from flask import Flask -from sqlalchemy import TypeDecorator +from sqlalchemy import text, TypeDecorator +from sqlalchemy.engine import Connection, Dialect, RowProxy from sqlalchemy_utils import EncryptedType +logger = logging.getLogger(__name__) + class AbstractEncryptedFieldAdapter(ABC): # pylint: disable=too-few-public-methods @abstractmethod @@ -28,7 +32,7 @@ def create( self, app_config: Optional[Dict[str, Any]], *args: List[Any], - **kwargs: Optional[Dict[str, Any]] + **kwargs: Optional[Dict[str, Any]], ) -> TypeDecorator: pass @@ -40,7 +44,7 @@ def create( self, app_config: Optional[Dict[str, Any]], *args: List[Any], - **kwargs: Optional[Dict[str, Any]] + **kwargs: Optional[Dict[str, Any]], ) -> TypeDecorator: if app_config: return EncryptedType(*args, app_config["SECRET_KEY"], **kwargs) @@ -66,3 +70,114 @@ def create( return self._concrete_type_adapter.create(self._config, *args, **kwargs) raise Exception("App not initialized yet. Please call init_app first") + + +class SecretsMigrator: + def __init__(self, previous_secret_key: str) -> None: + from superset import db # pylint: disable=import-outside-toplevel + + self._db = db + self._previous_secret_key = previous_secret_key + self._dialect: Dialect = db.engine.url.get_dialect() + + def discover_encrypted_fields(self) -> Dict[str, Dict[str, EncryptedType]]: + """ + Iterates over SqlAlchemy's metadata, looking for EncryptedType + columns along the way. Builds up a dict of + table_name -> dict of col_name: enc type instance + :return: + """ + meta_info: Dict[str, Any] = {} + + for table_name, table in self._db.metadata.tables.items(): + for col_name, col in table.columns.items(): + if isinstance(col.type, EncryptedType): + cols = meta_info.get(table_name, {}) + cols[col_name] = col.type + meta_info[table_name] = cols + + return meta_info + + @staticmethod + def _read_bytes(col_name: str, value: Any) -> Optional[bytes]: + if value is None or isinstance(value, bytes): + return value + # Note that the Postgres Driver returns memoryview's for BLOB types + if isinstance(value, memoryview): + return value.tobytes() + if isinstance(value, str): + return bytes(value.encode("utf8")) + + # Just bail if we haven't seen this type before... + raise ValueError(f"DB column {col_name} has unknown type: {type(value)}") + + @staticmethod + def _select_columns_from_table( + conn: Connection, column_names: List[str], table_name: str + ) -> RowProxy: + return conn.execute(f"SELECT id, {','.join(column_names)} FROM {table_name}") + + def _re_encrypt_row( + self, + conn: Connection, + row: RowProxy, + table_name: str, + columns: Dict[str, EncryptedType], + ) -> None: + """ + Re encrypts all columns in a Row + :param row: Current row to reencrypt + :param columns: Meta info from columns + """ + re_encrypted_columns = {} + + for column_name, encrypted_type in columns.items(): + previous_encrypted_type = EncryptedType( + type_in=encrypted_type.underlying_type, key=self._previous_secret_key + ) + try: + unencrypted_value = previous_encrypted_type.process_result_value( + self._read_bytes(column_name, row[column_name]), self._dialect + ) + except ValueError as exc: + # Failed to unencrypt + try: + encrypted_type.process_result_value( + self._read_bytes(column_name, row[column_name]), self._dialect + ) + logger.info( + "Current secret is able to decrypt value on column [%s.%s]," + " nothing to do", + table_name, + column_name, + ) + return + except Exception: + raise Exception from exc + + re_encrypted_columns[column_name] = encrypted_type.process_bind_param( + unencrypted_value, self._dialect, + ) + + set_cols = ",".join( + [f"{name} = :{name}" for name in list(re_encrypted_columns.keys())] + ) + logger.info("Processing table: %s", table_name) + conn.execute( + text(f"UPDATE {table_name} SET {set_cols} WHERE id = :id"), + id=row["id"], + **re_encrypted_columns, + ) + + def run(self) -> None: + encrypted_meta_info = self.discover_encrypted_fields() + + with self._db.engine.begin() as conn: + logger.info("Collecting info for re encryption") + for table_name, columns in encrypted_meta_info.items(): + column_names = list(columns.keys()) + rows = self._select_columns_from_table(conn, column_names, table_name) + + for row in rows: + self._re_encrypt_row(conn, row, table_name, columns) + logger.info("All tables processed")