diff --git a/airflow/config_templates/airflow_local_settings.py b/airflow/config_templates/airflow_local_settings.py index 317544ca7ed87..aa5791a324636 100644 --- a/airflow/config_templates/airflow_local_settings.py +++ b/airflow/config_templates/airflow_local_settings.py @@ -158,7 +158,7 @@ DEFAULT_DAG_PARSING_LOGGING_CONFIG: dict[str, dict[str, dict[str, Any]]] = { 'handlers': { 'processor_manager': { - 'class': 'logging.handlers.RotatingFileHandler', + 'class': 'airflow.utils.log.non_caching_file_handler.NonCachingRotatingFileHandler', 'formatter': 'airflow', 'filename': DAG_PROCESSOR_MANAGER_LOG_LOCATION, 'mode': 'a', diff --git a/airflow/utils/log/non_caching_file_handler.py b/airflow/utils/log/non_caching_file_handler.py index 5fbed3a1e1626..46fe69a99bc75 100644 --- a/airflow/utils/log/non_caching_file_handler.py +++ b/airflow/utils/log/non_caching_file_handler.py @@ -16,11 +16,25 @@ # under the License. from __future__ import annotations -import logging import os +from logging import FileHandler +from logging.handlers import RotatingFileHandler +from typing import IO -class NonCachingFileHandler(logging.FileHandler): +def make_file_io_non_caching(io: IO[str]) -> IO[str]: + try: + fd = io.fileno() + os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) + except Exception: + # in case either file descriptor cannot be retrieved or fadvise is not available + # we should simply return the wrapper retrieved by FileHandler's open method + # the advice to the kernel is just an advice and if we cannot give it, we won't + pass + return io + + +class NonCachingFileHandler(FileHandler): """ This is an extension of the python FileHandler that advises the Kernel to not cache the file in PageCache when it is written. While there is nothing wrong with such cache (it will be cleaned @@ -35,13 +49,22 @@ class NonCachingFileHandler(logging.FileHandler): """ def _open(self): - wrapper = super()._open() - try: - fd = wrapper.fileno() - os.posix_fadvise(fd, 0, 0, os.POSIX_FADV_DONTNEED) - except Exception: - # in case either file descriptor cannot be retrieved or fadvise is not available - # we should simply return the wrapper retrieved by FileHandler's open method - # the advise to the kernel is just an advise and if we cannot give it, we won't - pass - return wrapper + return make_file_io_non_caching(super()._open()) + + +class NonCachingRotatingFileHandler(RotatingFileHandler): + """ + This is an extension of the python RotatingFileHandler that advises the Kernel to not cache the file + in PageCache when it is written. While there is nothing wrong with such cache (it will be cleaned + when memory is needed), it causes ever-growing memory usage when scheduler is running as it keeps + on writing new log files and the files are not rotated later on. This might lead to confusion + for our users, who are monitoring memory usage of Scheduler - without realising that it is + harmless and expected in this case. + + See https://github.com/apache/airflow/issues/27065 + + Adding the advice to Kernel might help with not generating the cache memory growth in the first place. + """ + + def _open(self): + return make_file_io_non_caching(super()._open()) diff --git a/newsfragments/27065.misc.rst b/newsfragments/27065.misc.rst new file mode 100644 index 0000000000000..215bc8321d912 --- /dev/null +++ b/newsfragments/27065.misc.rst @@ -0,0 +1 @@ +In case you want to decrease cache memory when ``CONFIG_PROCESSOR_MANAGER_LOGGER=True``, and you have your local settings created before, you can update ``processor_manager_handler`` to use ``airflow.utils.log.non_caching_file_handler.NonCachingRotatingFileHandler`` handler instead of ``logging.RotatingFileHandler``.