From 131c498e66351a17942cfba6e31f9e7e0d84835e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Rami=CC=81rez=20Mondrago=CC=81n?= Date: Thu, 29 Sep 2022 00:19:20 -0500 Subject: [PATCH] Allow passing custo logging config --- docs/implementation/metrics.md | 41 +++++++++++++++---- noxfile.py | 1 + poetry.lock | 25 +++++++----- pyproject.toml | 3 ++ singer_sdk/default_logging.yml | 15 +++++++ singer_sdk/helpers/_resources.py | 24 +++++++++++ singer_sdk/metrics.py | 69 ++++++++++++++++---------------- singer_sdk/streams/core.py | 34 ++++++++-------- 8 files changed, 143 insertions(+), 69 deletions(-) create mode 100644 singer_sdk/default_logging.yml create mode 100644 singer_sdk/helpers/_resources.py diff --git a/docs/implementation/metrics.md b/docs/implementation/metrics.md index c6a0a13f2..57fafc4ec 100644 --- a/docs/implementation/metrics.md +++ b/docs/implementation/metrics.md @@ -2,17 +2,42 @@ Metrics logging is specified in the [Singer Spec](https://hub.meltano.com/singer/spec#metrics). The SDK will automatically -emit two types of metrics `record_count` and `http_request_duration`. +emit two types of metrics `record_count`, `http_request_duration` and `sync_duration`. -Customization options: +## Customization options -Developers may optionally add a `metrics_log_level` config option to their taps, -which will automatically allow this metrics logging to be customized at runtime. +### `metrics_log_level` -When `metrics_log_level` is supported, users can then -set one of these values (case insensitive), `INFO`, `DEBUG`, `NONE`, to override the -default logging level for metrics. This can be helpful for REST-type sources which use -make a large number of REST calls can therefor have very noisy metrics. +Metrics are logged at the `INFO` level. Developers may optionally add a +`metrics_log_level` config option to their taps, `WARNING` or `ERROR` to disable +metrics logging. + +### `SINGER_SDK_LOG_CONFIG` + +Users of a tap can configure the SDK logging by setting the `SINGER_SDK_LOG_CONFIG` +environment variable. The value of this variable should be a path to a YAML file in the +[Python logging dict format](https://docs.python.org/3/library/logging.config.html#dictionary-schema-details). + +For example, to direct metrics records to a file, you could use the following config: + +```yaml +version: 1 +disable_existing_loggers: false +formatters: + metrics: + format: "{asctime} {message}" + style: "{" +handlers: + metrics: + class: logging.FileHandler + formatter: metrics + filename: metrics.log +loggers: + singer_sdk.metrics: + level: INFO + handlers: [ metrics ] + propagate: yes +``` ## Additional Singer Metrics References diff --git a/noxfile.py b/noxfile.py index f7cc2be2c..00f4801cd 100644 --- a/noxfile.py +++ b/noxfile.py @@ -42,6 +42,7 @@ def mypy(session: Session) -> None: "types-requests", "types-pytz", "types-simplejson", + "types-PyYAML", ) session.run("mypy", *args) if not session.posargs: diff --git a/poetry.lock b/poetry.lock index 24249b27e..de6e7cd51 100644 --- a/poetry.lock +++ b/poetry.lock @@ -894,7 +894,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] -name = "pyyaml" +name = "PyYAML" version = "6.0" description = "YAML parser and emitter for Python" category = "main" @@ -1213,6 +1213,14 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "types-PyYAML" +version = "6.0.12" +description = "Typing stubs for PyYAML" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "types-requests" version = "2.28.11" @@ -1323,7 +1331,7 @@ docs = ["sphinx", "sphinx-rtd-theme", "sphinx-copybutton", "myst-parser", "sphin [metadata] lock-version = "1.1" python-versions = "<3.11,>=3.7.1" -content-hash = "8e890e4749a821f4e55d529e6c42e968870e244225c4d7397468c44baf9ca21f" +content-hash = "ca092add73a74f70ea020de72447ca4c6fb68f253cdf8efb70a49ba1fade177b" [metadata.files] alabaster = [ @@ -1993,7 +2001,7 @@ pytzdata = [ {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, {file = "pytzdata-2020.1.tar.gz", hash = "sha256:3efa13b335a00a8de1d345ae41ec78dd11c9f8807f522d39850f2dd828681540"}, ] -pyyaml = [ +PyYAML = [ {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, @@ -2001,13 +2009,6 @@ pyyaml = [ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, - {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, - {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, - {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, - {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, @@ -2260,6 +2261,10 @@ types-pytz = [ {file = "types-pytz-2022.2.1.0.tar.gz", hash = "sha256:47cfb19c52b9f75896440541db392fd312a35b279c6307a531db71152ea63e2b"}, {file = "types_pytz-2022.2.1.0-py3-none-any.whl", hash = "sha256:50ead2254b524a3d4153bc65d00289b66898060d2938e586170dce918dbaf3b3"}, ] +types-PyYAML = [ + {file = "types-PyYAML-6.0.12.tar.gz", hash = "sha256:f6f350418125872f3f0409d96a62a5a5ceb45231af5cc07ee0034ec48a3c82fa"}, + {file = "types_PyYAML-6.0.12-py3-none-any.whl", hash = "sha256:29228db9f82df4f1b7febee06bbfb601677882e98a3da98132e31c6874163e15"}, +] types-requests = [ {file = "types-requests-2.28.11.tar.gz", hash = "sha256:7ee827eb8ce611b02b5117cfec5da6455365b6a575f5e3ff19f655ba603e6b4e"}, {file = "types_requests-2.28.11-py3-none-any.whl", hash = "sha256:af5f55e803cabcfb836dad752bd6d8a0fc8ef1cd84243061c0e27dee04ccf4fd"}, diff --git a/pyproject.toml b/pyproject.toml index bf62312ba..2e3fd8489 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ PyJWT = "~=2.4" requests = "^2.25.1" cryptography = ">=3.4.6,<39.0.0" importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +importlib-resources = {version = "^5.9.0", markers = "python_version < \"3.9\""} memoization = ">=0.3.2,<0.5.0" jsonpath-ng = "^1.5.3" joblib = "^1.0.1" @@ -54,6 +55,7 @@ typing-extensions = "^4.2.0" simplejson = "^3.17.6" jsonschema = "^4.16.0" pytz = "^2022.2.1" +PyYAML = "^6.0" # Sphinx dependencies installed as optional 'docs' extras # https://github.com/readthedocs/readthedocs.org/issues/4912#issuecomment-664002569 @@ -89,6 +91,7 @@ types-python-dateutil = "^2.8.19" types-pytz = "^2022.2.1.0" types-requests = "^2.28.11" types-simplejson = "^3.17.7" +types-PyYAML = "^6.0.12" coverage = {extras = ["toml"], version = "^6.4"} # Cookiecutter tests diff --git a/singer_sdk/default_logging.yml b/singer_sdk/default_logging.yml new file mode 100644 index 000000000..c121fb47d --- /dev/null +++ b/singer_sdk/default_logging.yml @@ -0,0 +1,15 @@ +version: 1 +disable_existing_loggers: false +formatters: + console: + format: "{asctime} {message}" + style: "{" +handlers: + default: + class: logging.StreamHandler + formatter: console + stream: ext://sys.stderr +root: + level: INFO + propagate: true + handlers: [default] diff --git a/singer_sdk/helpers/_resources.py b/singer_sdk/helpers/_resources.py new file mode 100644 index 000000000..a458e0a55 --- /dev/null +++ b/singer_sdk/helpers/_resources.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import sys +from types import ModuleType + +if sys.version_info >= (3, 9): + import importlib.resources as importlib_resources + from importlib.abc import Traversable +else: + import importlib_resources + from importlib_resources.abc import Traversable + + +def get_package_files(package: str | ModuleType) -> Traversable: + """Load a file from a package. + + Args: + package: The package to load the file from. + file: The file to load. + + Returns: + The file as a Traversable object. + """ + return importlib_resources.files(package) diff --git a/singer_sdk/metrics.py b/singer_sdk/metrics.py index 6f2026551..b7f2da023 100644 --- a/singer_sdk/metrics.py +++ b/singer_sdk/metrics.py @@ -7,13 +7,19 @@ import json import logging import logging.config +import os from dataclasses import asdict, dataclass, field +from pathlib import Path from time import time from types import TracebackType from typing import Any, Generic, Mapping, TypeVar +import yaml + +from singer_sdk.helpers._resources import Traversable, get_package_files + DEFAULT_LOG_INTERVAL = 60.0 -METRICS_LOGGER_NAME = "singer.metrics" +METRICS_LOGGER_NAME = __name__ METRICS_LOG_LEVEL_SETTING = "metrics_log_level" _TVal = TypeVar("_TVal") @@ -279,7 +285,7 @@ def record_counter( ) -> Counter: """Use for counting records retrieved from the source. - with singer.metrics.record_counter(endpoint="users") as counter: + with record_counter("my_stream", endpoint="/users") as counter: for record in my_records: # Do something with the record counter.increment() @@ -302,7 +308,7 @@ def record_counter( def batch_counter(stream: str, **tags: Any) -> Counter: """Use for counting batches sent to the target. - with singer.metrics.batch_counter() as counter: + with batch_counter("my_stream") as counter: for batch in my_batches: # Do something with the batch counter.increment() @@ -326,7 +332,7 @@ def http_request_counter( ) -> Counter: """Use for counting HTTP requests. - with singer.metrics.http_request_counter() as counter: + with http_request_counter() as counter: for record in my_records: # Do something with the record counter.increment() @@ -362,42 +368,27 @@ def sync_timer(stream: str, **tags: Any) -> Timer: return Timer(Metric.SYNC_DURATION, tags) -def _get_logging_config(config: Mapping[str, Any] | None = None) -> dict: - """Get a logging configuration. +def _load_yaml_logging_config(path: Traversable | Path) -> Any: # noqa: ANN401 + """Load the logging config from the YAML file. Args: - config: A logging configuration. + path: A path to the YAML file. + + Returns: + The logging config. + """ + with path.open() as f: + return yaml.safe_load(f) + + +def _get_default_config() -> Any: # noqa: ANN401 + """Get a logging configuration. Returns: A logging configuration. """ - config = config or {} - metrics_log_level = config.get(METRICS_LOG_LEVEL_SETTING, "INFO") - - return { - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "metrics": { - "format": "{asctime} {message}", - "style": "{", - }, - }, - "handlers": { - "metrics": { - "class": "logging.FileHandler", - "formatter": "metrics", - "filename": "metrics.log", - }, - }, - "loggers": { - METRICS_LOGGER_NAME: { - "level": metrics_log_level, - "handlers": ["metrics"], - "propagate": True, - }, - }, - } + log_config_path = get_package_files("singer_sdk").joinpath("default_logging.yml") + return _load_yaml_logging_config(log_config_path) def _setup_logging(config: Mapping[str, Any]) -> None: @@ -406,4 +397,12 @@ def _setup_logging(config: Mapping[str, Any]) -> None: Args: config: A plugin configuration dictionary. """ - logging.config.dictConfig(_get_logging_config(config)) + logging.config.dictConfig(_get_default_config()) + + config = config or {} + metrics_log_level = config.get(METRICS_LOG_LEVEL_SETTING, "INFO").upper() + logging.getLogger(METRICS_LOGGER_NAME).setLevel(metrics_log_level) + + if "SINGER_SDK_LOG_CONFIG" in os.environ: + log_config_path = Path(os.environ["SINGER_SDK_LOG_CONFIG"]) + logging.config.dictConfig(_load_yaml_logging_config(log_config_path)) diff --git a/singer_sdk/streams/core.py b/singer_sdk/streams/core.py index f38602478..bc41601be 100644 --- a/singer_sdk/streams/core.py +++ b/singer_sdk/streams/core.py @@ -967,20 +967,22 @@ def _sync_records( context_list = [context] if context is not None else self.partitions selected = self.selected - for current_context in context_list or [{}]: - record_counter.context = current_context - timer.context = current_context - - partition_record_count = 0 - current_context = current_context or None - state = self.get_context_state(current_context) - state_partition_context = self._get_state_partition_context(current_context) - self._write_starting_replication_value(current_context) - child_context: dict | None = ( - None if current_context is None else copy.copy(current_context) - ) + with record_counter, timer: + for current_context in context_list or [{}]: + record_counter.context = current_context + timer.context = current_context + + partition_record_count = 0 + current_context = current_context or None + state = self.get_context_state(current_context) + state_partition_context = self._get_state_partition_context( + current_context + ) + self._write_starting_replication_value(current_context) + child_context: dict | None = ( + None if current_context is None else copy.copy(current_context) + ) - with record_counter, timer: for record_result in self.get_records(current_context): if isinstance(record_result, tuple): # Tuple items should be the record and the child context @@ -1022,9 +1024,9 @@ def _sync_records( record_count += 1 partition_record_count += 1 - if current_context == state_partition_context: - # Finalize per-partition state only if 1:1 with context - finalize_state_progress_markers(state) + if current_context == state_partition_context: + # Finalize per-partition state only if 1:1 with context + finalize_state_progress_markers(state) if not context: # Finalize total stream only if we have the full full context. # Otherwise will be finalized by tap at end of sync.