Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Change default SECRET_KEY, improve docs and banner warning on de… #17984

Merged
merged 5 commits into from
Jan 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ assists people when migrating to a new version.

### Breaking Changes

- [17984](https://github.com/apache/superset/pull/17984): Default Flask SECRET_KEY has changed for security reasons. You should always override with your own secret. Set `PREVIOUS_SECRET_KEY` (ex: PREVIOUS_SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h") with your previous key and use `superset re-encrypt-secrets` to rotate you current secrets
- [17556](https://github.com/apache/superset/pull/17556): Bumps mysqlclient from v1 to v2
- [15254](https://github.com/apache/superset/pull/15254): Previously `QUERY_COST_FORMATTERS_BY_ENGINE`, `SQL_VALIDATORS_BY_ENGINE` and `SCHEDULED_QUERIES` were expected to be defined in the feature flag dictionary in the `config.py` file. These should now be defined as a top-level config, with the feature flag dictionary being reserved for boolean only values.
- [17290](https://github.com/apache/superset/pull/17290): Bumps pandas to `1.3.4` and pyarrow to `5.0.0`
Expand Down
5 changes: 3 additions & 2 deletions docs/src/pages/docs/installation/configuring.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ SUPERSET_WEBSERVER_PORT = 8088

# Flask App Builder configuration
# Your App secret key
SECRET_KEY = '\2\1thisismyscretkey\1\2\e\y\y\h'
SECRET_KEY = 'USE_YOUR_OWN_SECURE_RANDOM_KEY'

# The SQLAlchemy connection string to your database backend
# This connection defines the path to the database that stores your
Expand Down Expand Up @@ -56,7 +56,8 @@ for more information on how to configure it.
Make sure to change:

- `SQLALCHEMY_DATABASE_URI`: by default it is stored at ~/.superset/superset.db
- `SECRET_KEY`: to a long random string
- `SECRET_KEY`: Use a strong complex alphanumeric string and use a tool
to help you generate a sufficiently random sequence, ex: openssl rand -base64 42"

If you need to exempt endpoints from CSRF (e.g. if you are running a custom auth postback endpoint),
you can add the endpoints to `WTF_CSRF_EXEMPT_LIST`:
Expand Down
29 changes: 29 additions & 0 deletions superset/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from superset.extensions import celery_app, db
from superset.utils import core as utils
from superset.utils.celery import session_scope
from superset.utils.encrypt import SecretsMigrator
from superset.utils.urls import get_url_path

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -870,3 +871,31 @@ def update_api_docs() -> None:
json.dump(api_spec.to_dict(), outfile, sort_keys=True, indent=2)
else:
click.secho("API version not found", err=True)


@superset.command()
@with_appcontext
@click.option(
"--previous_secret_key",
"-a",
required=False,
help="An optional previous secret key, if PREVIOUS_SECRET_KEY "
"is not set on the config",
)
def re_encrypt_secrets(previous_secret_key: Optional[str] = None) -> None:
previous_secret_key = previous_secret_key or current_app.config.get(
"PREVIOUS_SECRET_KEY"
)
if previous_secret_key is None:
click.secho("A previous secret key must be provided", err=True)
sys.exit(1)
secrets_migrator = SecretsMigrator(previous_secret_key=previous_secret_key)
try:
secrets_migrator.run()
except ValueError as exc:
click.secho(
f"An error occurred, "
f"probably an invalid previoud secret key was provided. Error:[{exc}]",
err=True,
)
sys.exit(1)
7 changes: 5 additions & 2 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from typing_extensions import Literal
from werkzeug.local import LocalProxy

from superset.constants import CHANGE_ME_SECRET_KEY
from superset.jinja_context import BaseTemplateProcessor
from superset.stats_logger import DummyStatsLogger
from superset.typing import CacheConfig
Expand Down Expand Up @@ -160,8 +161,10 @@ def _try_json_readsha(filepath: str, length: int) -> Optional[str]:
SQLALCHEMY_TRACK_MODIFICATIONS = False
# ---------------------------------------------------------

# Your App secret key
SECRET_KEY = "\2\1thisismyscretkey\1\2\\e\\y\\y\\h"
# Your App secret key. Make sure you override it on superset_config.py.
# Use a strong complex alphanumeric string and use a tool to help you generate
# a sufficiently random sequence, ex: openssl rand -base64 42"
SECRET_KEY = CHANGE_ME_SECRET_KEY

# The SQLAlchemy connection string.
SQLALCHEMY_DATABASE_URI = "sqlite:///" + os.path.join(DATA_DIR, "superset.db")
Expand Down
1 change: 1 addition & 0 deletions superset/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

NULL_STRING = "<NULL>"

CHANGE_ME_SECRET_KEY = "CHANGE_ME_TO_A_COMPLEX_RANDOM_SECRET"

# UUID for the examples database
EXAMPLES_DB_UUID = "a2dc77af-e654-49bb-b321-40f6b559a1ee"
Expand Down
16 changes: 16 additions & 0 deletions superset/initialization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from werkzeug.middleware.proxy_fix import ProxyFix

from superset.connectors.connector_registry import ConnectorRegistry
from superset.constants import CHANGE_ME_SECRET_KEY
from superset.extensions import (
_event_logger,
APP_DIR,
Expand Down Expand Up @@ -572,12 +573,27 @@ def init_app_in_ctx(self) -> None:

self.init_views()

def check_secret_key(self) -> None:
if self.config["SECRET_KEY"] == CHANGE_ME_SECRET_KEY:
top_banner = 80 * "-" + "\n" + 36 * " " + "WARNING\n" + 80 * "-"
bottom_banner = 80 * "-" + "\n" + 80 * "-"
logger.warning(top_banner)
logger.warning(
"A Default SECRET_KEY was detected, please use superset_config.py "
"to override it.\n"
"Use a strong complex alphanumeric string and use a tool to help"
" you generate \n"
"a sufficiently random sequence, ex: openssl rand -base64 42"
)
logger.warning(bottom_banner)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this happens should we do a sys.exit call as well so the server doesn't even start up?

Copy link
Member Author

@dpgaspar dpgaspar Jan 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I evaluated something similar, but I'm avoiding break pip installations and docker. It's really nice to have really fast and frictionless first installations.


def init_app(self) -> None:
"""
Main entry point which will delegate to other methods in
order to fully init the app
"""
self.pre_init()
self.check_secret_key()
# Configuration of logging must be done first to apply the formatter properly
self.configure_logging()
# Configuration of feature_flags must be done first to allow init features
Expand Down
121 changes: 118 additions & 3 deletions superset/utils/encrypt.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,25 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import logging
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional

from flask import Flask
from sqlalchemy import TypeDecorator
from sqlalchemy import text, TypeDecorator
from sqlalchemy.engine import Connection, Dialect, RowProxy
from sqlalchemy_utils import EncryptedType

logger = logging.getLogger(__name__)


class AbstractEncryptedFieldAdapter(ABC): # pylint: disable=too-few-public-methods
@abstractmethod
def create(
self,
app_config: Optional[Dict[str, Any]],
*args: List[Any],
**kwargs: Optional[Dict[str, Any]]
**kwargs: Optional[Dict[str, Any]],
) -> TypeDecorator:
pass

Expand All @@ -40,7 +44,7 @@ def create(
self,
app_config: Optional[Dict[str, Any]],
*args: List[Any],
**kwargs: Optional[Dict[str, Any]]
**kwargs: Optional[Dict[str, Any]],
) -> TypeDecorator:
if app_config:
return EncryptedType(*args, app_config["SECRET_KEY"], **kwargs)
Expand All @@ -66,3 +70,114 @@ def create(
return self._concrete_type_adapter.create(self._config, *args, **kwargs)

raise Exception("App not initialized yet. Please call init_app first")


class SecretsMigrator:
def __init__(self, previous_secret_key: str) -> None:
from superset import db # pylint: disable=import-outside-toplevel

self._db = db
self._previous_secret_key = previous_secret_key
self._dialect: Dialect = db.engine.url.get_dialect()

def discover_encrypted_fields(self) -> Dict[str, Dict[str, EncryptedType]]:
"""
Iterates over SqlAlchemy's metadata, looking for EncryptedType
columns along the way. Builds up a dict of
table_name -> dict of col_name: enc type instance
:return:
"""
meta_info: Dict[str, Any] = {}

for table_name, table in self._db.metadata.tables.items():
for col_name, col in table.columns.items():
if isinstance(col.type, EncryptedType):
cols = meta_info.get(table_name, {})
cols[col_name] = col.type
meta_info[table_name] = cols

return meta_info

@staticmethod
def _read_bytes(col_name: str, value: Any) -> Optional[bytes]:
if value is None or isinstance(value, bytes):
return value
# Note that the Postgres Driver returns memoryview's for BLOB types
if isinstance(value, memoryview):
return value.tobytes()
if isinstance(value, str):
return bytes(value.encode("utf8"))

# Just bail if we haven't seen this type before...
raise ValueError(f"DB column {col_name} has unknown type: {type(value)}")

@staticmethod
def _select_columns_from_table(
conn: Connection, column_names: List[str], table_name: str
) -> RowProxy:
return conn.execute(f"SELECT id, {','.join(column_names)} FROM {table_name}")

def _re_encrypt_row(
self,
conn: Connection,
row: RowProxy,
table_name: str,
columns: Dict[str, EncryptedType],
) -> None:
"""
Re encrypts all columns in a Row
:param row: Current row to reencrypt
:param columns: Meta info from columns
"""
re_encrypted_columns = {}

for column_name, encrypted_type in columns.items():
previous_encrypted_type = EncryptedType(
type_in=encrypted_type.underlying_type, key=self._previous_secret_key
)
try:
unencrypted_value = previous_encrypted_type.process_result_value(
self._read_bytes(column_name, row[column_name]), self._dialect
)
except ValueError as exc:
# Failed to unencrypt
try:
encrypted_type.process_result_value(
self._read_bytes(column_name, row[column_name]), self._dialect
)
logger.info(
"Current secret is able to decrypt value on column [%s.%s],"
" nothing to do",
table_name,
column_name,
)
return
except Exception:
raise Exception from exc

re_encrypted_columns[column_name] = encrypted_type.process_bind_param(
unencrypted_value, self._dialect,
)

set_cols = ",".join(
[f"{name} = :{name}" for name in list(re_encrypted_columns.keys())]
)
logger.info("Processing table: %s", table_name)
conn.execute(
text(f"UPDATE {table_name} SET {set_cols} WHERE id = :id"),
id=row["id"],
**re_encrypted_columns,
)

def run(self) -> None:
encrypted_meta_info = self.discover_encrypted_fields()

with self._db.engine.begin() as conn:
logger.info("Collecting info for re encryption")
for table_name, columns in encrypted_meta_info.items():
column_names = list(columns.keys())
rows = self._select_columns_from_table(conn, column_names, table_name)

for row in rows:
self._re_encrypt_row(conn, row, table_name, columns)
logger.info("All tables processed")