Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Masks secrets in traces. #1797

Merged
merged 10 commits into from
Sep 14, 2024
18 changes: 11 additions & 7 deletions dlt/cli/_dlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,13 +617,17 @@ def main() -> int:
elif args.command == "deploy":
try:
deploy_args = vars(args)
return deploy_command_wrapper(
pipeline_script_path=deploy_args.pop("pipeline_script_path"),
deployment_method=deploy_args.pop("deployment_method"),
repo_location=deploy_args.pop("location"),
branch=deploy_args.pop("branch"),
**deploy_args,
)
if deploy_args.get("deployment_method") is None:
print_help(deploy_cmd)
return -1
else:
return deploy_command_wrapper(
pipeline_script_path=deploy_args.pop("pipeline_script_path"),
deployment_method=deploy_args.pop("deployment_method"),
repo_location=deploy_args.pop("location"),
branch=deploy_args.pop("branch"),
**deploy_args,
)
except (NameError, KeyError):
fmt.warning(
"Please install additional command line dependencies to use deploy command:"
Expand Down
9 changes: 2 additions & 7 deletions dlt/cli/deploy_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from enum import Enum
from importlib.metadata import version as pkg_version

from dlt.common.configuration.providers import SECRETS_TOML, SECRETS_TOML_KEY, StringTomlProvider
from dlt.common.configuration.providers import SECRETS_TOML, SECRETS_TOML_KEY
from dlt.common.configuration.paths import make_dlt_settings_path
from dlt.common.configuration.utils import serialize_value
from dlt.common.git import is_dirty
Expand Down Expand Up @@ -393,12 +393,7 @@ def _echo_instructions(self, *args: Optional[Any]) -> None:
f" {SECRETS_TOML_KEY} variable."
)
fmt.echo()
toml_provider = StringTomlProvider("")
for s_v in self.secret_envs:
toml_provider.set_value(s_v.key, s_v.value, None, *s_v.sections)
for s_v in self.envs:
toml_provider.set_value(s_v.key, s_v.value, None, *s_v.sections)
fmt.echo(toml_provider.dumps())
self._echo_secrets_toml()
else:
raise ValueError(self.secrets_format)

Expand Down
45 changes: 42 additions & 3 deletions dlt/cli/deploy_command_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
import dlt

from dlt.common import git
from dlt.common.configuration.exceptions import LookupTrace
from dlt.common.configuration.exceptions import LookupTrace, ConfigFieldMissingException
from dlt.common.configuration.providers import ConfigTomlProvider, EnvironProvider
from dlt.common.configuration.providers.toml import BaseDocProvider, StringTomlProvider
from dlt.common.git import get_origin, get_repo, Repo
from dlt.common.configuration.specs.run_configuration import get_default_pipeline_name
from dlt.common.typing import StrAny
Expand Down Expand Up @@ -198,12 +199,50 @@ def _update_envs(self, trace: PipelineTrace) -> None:
# fmt.echo(f"{resolved_value.key} in {resolved_value.sections} moved to CONFIG")

def _echo_secrets(self) -> None:
display_info = False
for s_v in self.secret_envs:
fmt.secho("Name:", fg="green")
fmt.echo(fmt.bold(self.env_prov.get_key_name(s_v.key, *s_v.sections)))
fmt.secho("Secret:", fg="green")
fmt.echo(s_v.value)
try:
fmt.secho("Secret:", fg="green")
fmt.echo(self._lookup_secret_value(s_v))
except ConfigFieldMissingException:
fmt.secho("please set me up!", fg="red")
display_info = True
fmt.echo()
if display_info:
self._display_missing_secret_info()
fmt.echo()

def _echo_secrets_toml(self) -> None:
display_info = False
toml_provider = StringTomlProvider("")
for s_v in self.secret_envs:
try:
secret_value = self._lookup_secret_value(s_v)
except ConfigFieldMissingException:
secret_value = "please set me up!"
display_info = True
toml_provider.set_value(s_v.key, secret_value, None, *s_v.sections)
for s_v in self.envs:
toml_provider.set_value(s_v.key, s_v.value, None, *s_v.sections)
fmt.echo(toml_provider.dumps())
if display_info:
self._display_missing_secret_info()
fmt.echo()

def _display_missing_secret_info(self) -> None:
fmt.warning(
"We could not read and display some secrets. Starting from 1.0 version of dlt,"
" those are not stored in the traces. Instead we are trying to read them from the"
" available configuration ie. secrets.toml file. Please run the deploy command from"
" the same working directory you ran your pipeline script. If you pass the"
" credentials in code we will not be able to display them here. See"
" https://dlthub.com/docs/general-usage/credentials"
)

def _lookup_secret_value(self, trace: LookupTrace) -> Any:
return dlt.secrets[BaseDocProvider.get_key_name(trace.key, *trace.sections)]

def _echo_envs(self) -> None:
for v in self.envs:
Expand Down
8 changes: 5 additions & 3 deletions dlt/cli/echo.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ def always_choose(always_choose_default: bool, always_choose_value: Any) -> Iter
_always_choose_value = ALWAYS_CHOOSE_VALUE
ALWAYS_CHOOSE_DEFAULT = always_choose_default
ALWAYS_CHOOSE_VALUE = always_choose_value
yield
ALWAYS_CHOOSE_DEFAULT = _always_choose_default
ALWAYS_CHOOSE_VALUE = _always_choose_value
try:
yield
finally:
ALWAYS_CHOOSE_DEFAULT = _always_choose_default
ALWAYS_CHOOSE_VALUE = _always_choose_value


echo = click.echo
Expand Down
8 changes: 4 additions & 4 deletions dlt/pipeline/trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import os
import pickle
import datetime # noqa: 251
from typing import Any, List, NamedTuple, Optional, Protocol, Sequence
from typing import Any, List, NamedTuple, Optional, Protocol, Sequence, Union
import humanize

from dlt.common.pendulum import pendulum
from dlt.common.configuration import is_secret_hint
from dlt.common.configuration.exceptions import ContextDefaultCannotBeCreated
from dlt.common.configuration.specs.config_section_context import ConfigSectionContext
from dlt.common.configuration.utils import _RESOLVED_TRACES
from dlt.common.configuration.utils import _RESOLVED_TRACES, ResolvedValueTrace
from dlt.common.configuration.container import Container
from dlt.common.exceptions import ExceptionTrace, ResourceNameNotAvailable
from dlt.common.logger import suppress_and_warn
Expand Down Expand Up @@ -280,8 +280,8 @@ def end_trace_step(
resolved_values = map(
lambda v: SerializableResolvedValueTrace(
v.key,
v.value,
v.default_value,
None if is_secret_hint(v.hint) else v.value,
None if is_secret_hint(v.hint) else v.default_value,
is_secret_hint(v.hint),
v.sections,
v.provider_name,
Expand Down
52 changes: 29 additions & 23 deletions tests/cli/test_deploy_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from dlt.pipeline.exceptions import CannotRestorePipelineException
from dlt.cli.deploy_command_helpers import get_schedule_description

from tests.utils import TEST_STORAGE_ROOT, test_storage
from tests.utils import TEST_STORAGE_ROOT, reset_providers, test_storage


DEPLOY_PARAMS = [
Expand Down Expand Up @@ -134,28 +134,34 @@ def test_deploy_command(
test_storage.delete(".dlt/secrets.toml")
test_storage.atomic_rename(".dlt/secrets.toml.ci", ".dlt/secrets.toml")

# this time script will run
venv.run_script("debug_pipeline.py")
with echo.always_choose(False, always_choose_value=True):
with io.StringIO() as buf, contextlib.redirect_stdout(buf):
deploy_command.deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
)
_out = buf.getvalue()
print(_out)
# make sure our secret and config values are all present
assert "api_key_9x3ehash" in _out
assert "dlt_data" in _out
if "schedule" in deployment_args:
assert get_schedule_description(deployment_args["schedule"])
secrets_format = deployment_args.get("secrets_format", "env")
if secrets_format == "env":
assert "API_KEY" in _out
else:
assert "api_key = " in _out
# reset toml providers to (1) CWD (2) non existing dir so API_KEY is not found
for project_dir, api_key in [
(None, "api_key_9x3ehash"),
(".", "please set me up!"),
]:
with reset_providers(project_dir=project_dir):
# this time script will run
venv.run_script("debug_pipeline.py")
with echo.always_choose(False, always_choose_value=True):
with io.StringIO() as buf, contextlib.redirect_stdout(buf):
deploy_command.deploy_command(
"debug_pipeline.py",
deployment_method,
deploy_command.COMMAND_DEPLOY_REPO_LOCATION,
**deployment_args
)
_out = buf.getvalue()
print(_out)
# make sure our secret and config values are all present
assert api_key in _out
assert "dlt_data" in _out
if "schedule" in deployment_args:
assert get_schedule_description(deployment_args["schedule"])
secrets_format = deployment_args.get("secrets_format", "env")
if secrets_format == "env":
assert "API_KEY" in _out
else:
assert "api_key = " in _out

# non existing script name
with pytest.raises(NoSuchPathError):
Expand Down
17 changes: 3 additions & 14 deletions tests/common/configuration/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,11 @@
from dlt.common.configuration import configspec
from dlt.common.configuration.specs import BaseConfiguration, CredentialsConfiguration
from dlt.common.configuration.container import Container
from dlt.common.configuration.providers import (
ConfigProvider,
EnvironProvider,
ConfigTomlProvider,
SecretsTomlProvider,
)
from dlt.common.configuration.providers import ConfigProvider, EnvironProvider
from dlt.common.configuration.utils import get_resolved_traces
from dlt.common.configuration.specs.config_providers_context import ConfigProvidersContext
from dlt.common.typing import TSecretValue, StrAny
from tests.utils import _reset_providers


@configspec
Expand Down Expand Up @@ -118,14 +114,7 @@ def env_provider() -> Iterator[ConfigProvider]:

@pytest.fixture
def toml_providers() -> Iterator[ConfigProvidersContext]:
pipeline_root = "./tests/common/cases/configuration/.dlt"
ctx = ConfigProvidersContext()
ctx.providers.clear()
ctx.add_provider(EnvironProvider())
ctx.add_provider(SecretsTomlProvider(project_dir=pipeline_root))
ctx.add_provider(ConfigTomlProvider(project_dir=pipeline_root))
with Container().injectable_context(ctx):
yield ctx
yield from _reset_providers("./tests/common/cases/configuration/.dlt")


class MockProvider(ConfigProvider):
Expand Down
9 changes: 5 additions & 4 deletions tests/pipeline/test_pipeline_trace.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,18 @@ def data():
assert resolved.is_secret_hint is False
assert resolved.default_value is None
assert resolved.provider_name == "config.toml"
# dictionaries are not returned anymore
# dictionaries are not returned anymore, secrets are masked
resolved = _find_resolved_value(trace.resolved_config_values, "credentials", [])
assert resolved is None or isinstance(resolved.value, str)
resolved = _find_resolved_value(trace.resolved_config_values, "secret_value", [])
assert resolved.is_secret_hint is True
assert resolved.value == "2137"
assert resolved.default_value == "123"
assert resolved.value is None, "Credential is not masked"
assert resolved.default_value is None, "Credential is not masked"
resolved = _find_resolved_value(trace.resolved_config_values, "credentials", ["databricks"])
assert resolved.is_secret_hint is True
assert resolved.value == databricks_creds
assert resolved.value is None, "Credential is not masked"
assert_trace_serializable(trace)

# activate pipeline because other was running in assert trace
p.activate()

Expand Down
24 changes: 23 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import multiprocessing
import os
import platform
Expand All @@ -12,7 +13,12 @@

import dlt
from dlt.common.configuration.container import Container
from dlt.common.configuration.providers import DictionaryProvider
from dlt.common.configuration.providers import (
DictionaryProvider,
EnvironProvider,
SecretsTomlProvider,
ConfigTomlProvider,
)
from dlt.common.configuration.resolve import resolve_configuration
from dlt.common.configuration.specs import RunConfiguration
from dlt.common.configuration.specs.config_providers_context import (
Expand Down Expand Up @@ -382,3 +388,19 @@ def assert_query_data(
# the second is load id
if info:
assert row[1] in info.loads_ids


@contextlib.contextmanager
def reset_providers(project_dir: str) -> Iterator[ConfigProvidersContext]:
"""Context manager injecting standard set of providers where toml providers are initialized from `project_dir`"""
return _reset_providers(project_dir)


def _reset_providers(project_dir: str) -> Iterator[ConfigProvidersContext]:
ctx = ConfigProvidersContext()
ctx.providers.clear()
ctx.add_provider(EnvironProvider())
ctx.add_provider(SecretsTomlProvider(project_dir=project_dir))
ctx.add_provider(ConfigTomlProvider(project_dir=project_dir))
with Container().injectable_context(ctx):
yield ctx
Loading