From 5ac661e98c27de024125112eee4da2ff67c8f70c Mon Sep 17 00:00:00 2001 From: Mauricio Villegas <5780272+mauvilsa@users.noreply.github.com> Date: Tue, 28 Feb 2023 09:25:48 -0500 Subject: [PATCH] Add support for single json dict environment variable for protocol (#1194) * Add support for single json dict environment variable for protocol. * Update docs/source/features.rst Co-authored-by: Tim Snyder * Add warnings when environment variables are ignored. --------- Co-authored-by: Tim Snyder --- docs/source/features.rst | 12 +++++---- fsspec/config.py | 43 +++++++++++++++++++++++++----- fsspec/tests/test_config.py | 52 +++++++++++++++++++++++++++++++++++-- 3 files changed, 93 insertions(+), 14 deletions(-) diff --git a/docs/source/features.rst b/docs/source/features.rst index 9fd23d94b..4bbfc82b3 100644 --- a/docs/source/features.rst +++ b/docs/source/features.rst @@ -365,14 +365,16 @@ For instance: Obviously, you should only define default values that are appropriate for a given file system implementation. INI files only support string values. -Alternatively, you can provide overrides with environment variables of -the style ``FSSPEC_{protocol}_{kwargname}=value``. +Alternatively, you can provide overrides with environment variables of the style +``FSSPEC_{protocol}=`` and +``FSSPEC_{protocol}_{kwargname}=``. Configuration is determined in the following order, with later items winning: -#. the contents of ini files, and json files in the config directory, sorted - alphabetically -#. environment variables +#. ini and json files in the config directory (``FSSPEC_CONFIG_DIRECTORY`` or ``$HOME/.config/fsspec/``), sorted + lexically by filename +#. ``FSSPEC_{protocol}`` environment variables +#. ``FSSPEC_{protocol}_{kwargname}`` environment variables #. the contents of ``fsspec.config.conf``, which can be edited at runtime #. kwargs explicitly passed, whether with ``fsspec.open``, ``fsspec.filesystem`` or directly instantiating the implementation class. diff --git a/fsspec/config.py b/fsspec/config.py index 5db4fd78b..685582938 100644 --- a/fsspec/config.py +++ b/fsspec/config.py @@ -1,6 +1,7 @@ import configparser import json import os +import warnings conf = {} default_conf_dir = os.path.join(os.path.expanduser("~"), ".config/fsspec") @@ -10,9 +11,14 @@ def set_conf_env(conf_dict, envdict=os.environ): """Set config values from environment variables - Looks for variable of the form ``FSSPEC__``. - There is no attempt to convert strings, but the kwarg keys will - be lower-cased. + Looks for variables of the form ``FSSPEC_`` and + ``FSSPEC__``. For ``FSSPEC_`` the value is parsed + as a json dictionary and used to ``update`` the config of the + corresponding protocol. For ``FSSPEC__`` there is no + attempt to convert the string value, but the kwarg keys will be lower-cased. + + The ``FSSPEC__`` variables are applied after the + ``FSSPEC_`` ones. Parameters ---------- @@ -21,12 +27,35 @@ def set_conf_env(conf_dict, envdict=os.environ): envdict : dict-like(str, str) Source for the values - usually the real environment """ + kwarg_keys = [] for key in envdict: - if key.startswith("FSSPEC"): - if key.count("_") < 2: + if key.startswith("FSSPEC_") and len(key) > 7 and key[7] != "_": + if key.count("_") > 1: + kwarg_keys.append(key) continue - _, proto, kwarg = key.split("_", 2) - conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key] + try: + value = json.loads(envdict[key]) + except json.decoder.JSONDecodeError as ex: + warnings.warn( + f"Ignoring environment variable {key} due to a parse failure: {ex}" + ) + else: + if isinstance(value, dict): + _, proto = key.split("_", 1) + conf_dict.setdefault(proto.lower(), {}).update(value) + else: + warnings.warn( + f"Ignoring environment variable {key} due to not being a dict:" + f" {type(value)}" + ) + elif key.startswith("FSSPEC"): + warnings.warn( + f"Ignoring environment variable {key} due to having an unexpected name" + ) + + for key in kwarg_keys: + _, proto, kwarg = key.split("_", 2) + conf_dict.setdefault(proto.lower(), {})[kwarg.lower()] = envdict[key] def set_conf_files(cdir, conf_dict): diff --git a/fsspec/tests/test_config.py b/fsspec/tests/test_config.py index a30dcb0aa..dc251a0d9 100644 --- a/fsspec/tests/test_config.py +++ b/fsspec/tests/test_config.py @@ -1,4 +1,5 @@ import os +from warnings import catch_warnings import pytest @@ -14,17 +15,64 @@ def clean_conf(): conf.clear() -def test_from_env(clean_conf): +def test_from_env_ignored(clean_conf): + env = { + "FSSPEC": "missing_protocol", + "FSSPEC_": "missing_protocol", + "FSSPEC__INVALID_KEY": "invalid_protocol", + "FSSPEC_INVALID1": "not_json_dict", + "FSSPEC_INVALID2": '["not_json_dict"]', + } + cd = {} + with catch_warnings(record=True) as w: + set_conf_env(conf_dict=cd, envdict=env) + assert len(w) == 5 + assert "unexpected name" in str(w[0].message) + assert "unexpected name" in str(w[1].message) + assert "unexpected name" in str(w[2].message) + assert "parse failure" in str(w[3].message) + assert "not being a dict" in str(w[4].message) + assert cd == {} + + +def test_from_env_kwargs(clean_conf): env = { "FSSPEC_PROTO_KEY": "value", "FSSPEC_PROTO_LONG_KEY": "othervalue", "FSSPEC_MALFORMED": "novalue", } cd = {} - set_conf_env(conf_dict=cd, envdict=env) + with catch_warnings(record=True) as w: + set_conf_env(conf_dict=cd, envdict=env) + assert len(w) == 1 + assert "parse failure" in str(w[0].message) assert cd == {"proto": {"key": "value", "long_key": "othervalue"}} +def test_from_env_protocol_dict(clean_conf): + env = { + "FSSPEC_PROTO": '{"int": 1, "float": 2.3, "bool": true, "dict": {"key": "val"}}' + } + cd = {} + set_conf_env(conf_dict=cd, envdict=env) + assert cd == { + "proto": {"int": 1, "float": 2.3, "bool": True, "dict": {"key": "val"}} + } + + +def test_from_env_kwargs_override_protocol_dict(clean_conf): + env = { + "FSSPEC_PROTO_LONG_KEY": "override1", + "FSSPEC_PROTO": '{"key": "value1", "long_key": "value2", "otherkey": "value3"}', + "FSSPEC_PROTO_KEY": "override2", + } + cd = {} + set_conf_env(conf_dict=cd, envdict=env) + assert cd == { + "proto": {"key": "override2", "long_key": "override1", "otherkey": "value3"} + } + + def test_from_file_ini(clean_conf, tmpdir): file1 = os.path.join(tmpdir, "1.ini") file2 = os.path.join(tmpdir, "2.ini")