Issue #332 less verbose log printing on failed batch job

Squashed commit of PR #374
Open-EO · Feb 10, 2023 · 9e7946b · 9e7946b
1 parent 05015ed
commit 9e7946b
Show file tree

Hide file tree

Showing 6 changed files with 226 additions and 10 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 ### Changed
+- Less verbose log printing on failed batch job [#332](https://github.com/Open-EO/openeo-python-client/issues/332)
 
 ### Removed
 

diff --git a/docs/api.rst b/docs/api.rst
@@ -55,7 +55,7 @@ openeo.api.logs
 -----------------
 
 .. automodule:: openeo.api.logs
-    :members: LogEntry
+    :members: LogEntry, normalize_log_level
 
 
 openeo.rest.connection

diff --git a/openeo/api/logs.py b/openeo/api/logs.py
@@ -1,3 +1,7 @@
+import logging
+from typing import Optional, Union
+
+
 class LogEntry(dict):
     """
     Log message and info for jobs and services
@@ -46,3 +50,47 @@ def level(self):
         return self["level"]
 
     # TODO: add properties for "code", "time", "path", "links" and "data" with sensible defaults?
+
+
+def normalize_log_level(log_level: Optional[Union[int, str]] = logging.DEBUG) -> int:
+    """Helper function to convert a log level to the integer constants defined in logging, e.g. ``logging.ERROR``.
+
+    :param log_level: The input to be converted.
+
+        The value  may be user input, or it can come from a method/function parameter
+        filled in by the user of the Python client, so it is not necessarily valid.
+
+        If no value is given or it is None, the empty string, or even any other 'falsy' value,
+        then the default return value is ``logging.DEBUG``.
+
+    :raises TypeError: when log_level is any other type than str, an int or None.
+    :return: One of the following log level constants from the standard module ``logging``:
+        ``logging.ERROR``, ``logging.WARNING``, ``logging.INFO``, or ``logging.DEBUG`` .
+    """
+
+    # None and the empty string could be passed explicitly (or other falsy values).
+    # Or the value could come from a field that is None.
+    if not log_level:
+        return logging.DEBUG
+
+    if isinstance(log_level, str):
+        log_level = log_level.upper()
+        if log_level in ["CRITICAL", "ERROR"]:
+            return logging.ERROR
+        elif log_level == "WARNING":
+            return logging.WARNING
+        elif log_level == "INFO":
+            return logging.INFO
+        elif log_level == "DEBUG":
+            return logging.DEBUG
+
+        # Still a string, but not a supported/standard log level.
+        return logging.ERROR
+
+    # Now it should be an int, otherwise the input is an unsupported type.
+    if not isinstance(log_level, int):
+        raise TypeError(
+            f"Value for log_level is not an int or str: type={type(log_level)}, value={log_level!r}"
+        )
+
+    return log_level
diff --git a/openeo/rest/job.py b/openeo/rest/job.py
@@ -9,7 +9,7 @@
 
 import requests
 
-from openeo.api.logs import LogEntry
+from openeo.api.logs import LogEntry, normalize_log_level
 from openeo.internal.jupyter import render_component, render_error, VisualDict, VisualList
 from openeo.internal.warnings import deprecated
 from openeo.rest import OpenEoClientException, JobFailedException, OpenEoApiError
@@ -136,12 +136,44 @@ def get_results(self) -> "JobResults":
         """
         return JobResults(self)
 
-    def logs(self, offset=None) -> List[LogEntry]:
-        """ Retrieve job logs."""
-        # TODO: option to filter on level? Or move filtering functionality to a separate batch job logs class?
+    def logs(
+        self, offset=None, log_level: Optional[Union[int, str]] = None
+    ) -> List[LogEntry]:
+        """Retrieve job logs.
+
+        :param offset: The last identifier (property ``id`` of a LogEntry) the client has received.
+
+            If provided, the back-ends only sends the entries that occurred after the specified identifier.
+            If not provided or empty, start with the first entry.
+
+            Defaults to None.
+
+        :param log_level: Show only messages of this log level and its higher levels.
+
+            You can use either constants from Python's standard module ``logging``
+            or their names (case insensitive).
+
+            For example:
+                ``logging.INFO``, ``"info"`` or ``"INFO"`` can all be used to show the messages
+                for level ``logging.INFO`` and above, i.e. also ``logging.WARNING`` and
+                ``logging.ERROR`` will be included.
+
+            Default is to show all log levels, in other words ``logging.DEBUG``.
+            This is also the result when you explicitly pass log_level=None or log_level="".
+
+        :return: A list containing the log entries for the batch job.
+        """
         url = "/jobs/{}/logs".format(self.job_id)
         logs = self.connection.get(url, params={'offset': offset}, expected_status=200).json()["logs"]
-        entries = [LogEntry(log) for log in logs]
+
+        # Before we introduced the log_level param we printed all logs.
+        # So we explicitly keep that default behavior, making DEBUG the default log level.
+        log_level = normalize_log_level(log_level)
+        entries = [
+            LogEntry(log)
+            for log in logs
+            if normalize_log_level(log["level"]) >= log_level
+        ]
         return VisualList('logs', data=entries)
 
     def run_synchronous(
@@ -230,9 +262,16 @@ def soft_error(message: str):
             """.format(i=self.job_id)))
             # TODO: make it possible to disable printing logs automatically?
             # TODO: render logs jupyter-aware in a notebook context?
-            # TODO: only print the error level logs? Or the tail of the logs?
-            print("Printing logs:")
-            print(self.logs())
+            print(
+                textwrap.dedent(
+                    f"""
+                Printing logs (only log level ERROR):
+                To get logs that include all log levels, use:
+                BatchJob(job_id='{self.job_id}', connection=your_connection>).logs(log_level='DEBUG')")
+                """
+                )
+            )
+            print(self.logs(log_level=logging.ERROR))
             raise JobFailedException("Batch job {i!r} didn't finish successfully. Status: {s} (after {t}).".format(
                 i=self.job_id, s=status, t=elapsed()
             ), job=self)

diff --git a/tests/api/test_logs.py b/tests/api/test_logs.py
@@ -1,4 +1,6 @@
-from openeo.api.logs import LogEntry
+import logging
+
+from openeo.api.logs import LogEntry, normalize_log_level
 import pytest
 
 
@@ -31,3 +33,36 @@ def test_log_entry_basic_dict():
 def test_log_entry_legacy():
     log = LogEntry(id="log01", level="info", message="hello")
     assert log.log_id == "log01"
+
+
+@pytest.mark.parametrize(
+    ["log_level_in", "expected_log_level"],
+    [
+        (None, logging.DEBUG),
+        ("", logging.DEBUG),
+        (logging.ERROR, logging.ERROR),
+        ("error", logging.ERROR),
+        ("ERROR", logging.ERROR),
+        (logging.WARNING, logging.WARNING),
+        ("warning", logging.WARNING),
+        ("WARNING", logging.WARNING),
+        (logging.INFO, logging.INFO),
+        ("INFO", logging.INFO),
+        ("info", logging.INFO),
+        (logging.DEBUG, logging.DEBUG),
+        ("DEBUG", logging.DEBUG),
+        ("debug", logging.DEBUG),
+    ],
+)
+def test_normalize_log_level(log_level_in, expected_log_level):
+    assert normalize_log_level(log_level_in) == expected_log_level
+
+
+def test_normalize_log_level_default_input_returns_debug_level():
+    assert normalize_log_level() == logging.DEBUG
+
+
+@pytest.mark.parametrize("log_level", [10.0, b"not a string"])
+def test_normalize_log_level_raises_type_error(log_level):
+    with pytest.raises(TypeError):
+        assert normalize_log_level(log_level)
diff --git a/tests/rest/test_job.py b/tests/rest/test_job.py
@@ -1,4 +1,5 @@
 import json
+import logging
 import re
 from pathlib import Path
 from unittest import mock
@@ -264,6 +265,98 @@ def test_get_job_logs(session040, requests_mock):
     assert log_entries[0].message == "error processing batch job"
 
 
+# TODO: do we keep testing on sesssion040, or should we switch to con100?
+def test_get_job_logs_returns_debug_loglevel_by_default(session040, requests_mock):
+    requests_mock.get(
+        API_URL + "/jobs/f00ba5/logs",
+        json={
+            "logs": [
+                {
+                    "id": "123abc",
+                    "level": "error",
+                    "message": "error processing batch job",
+                },
+                {
+                    "id": "234abc",
+                    "level": "debug",
+                    "message": "Some debug info we want to filter out",
+                },
+                {
+                    "id": "345abc",
+                    "level": "info",
+                    "message": "Some general info we want to filter out",
+                },
+                {
+                    "id": "345abc",
+                    "level": "warning",
+                    "message": "Some warning we want to filter out",
+                },
+            ]
+        },
+    )
+
+    log_entries = session040.job("f00ba5").logs()
+
+    assert len(log_entries) == 4
+    assert log_entries[0].level == "error"
+    assert log_entries[1].level == "debug"
+    assert log_entries[2].level == "info"
+    assert log_entries[3].level == "warning"
+
+
+@pytest.mark.parametrize(
+    ["log_level", "exp_num_messages"],
+    [
+        (None, 4),  # Default is DEBUG / show all log levels.
+        (logging.ERROR, 1),
+        ("error", 1),
+        ("ERROR", 1),
+        (logging.WARNING, 2),
+        ("warning", 2),
+        ("WARNING", 2),
+        (logging.INFO, 3),
+        ("INFO", 3),
+        ("info", 3),
+        (logging.DEBUG, 4),
+        ("DEBUG", 4),
+        ("debug", 4),
+    ],
+)
+def test_get_job_logs_keeps_loglevel_that_is_higher_or_equal(
+    session040, requests_mock, log_level, exp_num_messages
+):
+    requests_mock.get(
+        API_URL + "/jobs/f00ba5/logs",
+        json={
+            "logs": [
+                {
+                    "id": "123abc",
+                    "level": "error",
+                    "message": "error processing batch job",
+                },
+                {
+                    "id": "234abc",
+                    "level": "debug",
+                    "message": "Some debug info we want to filter out",
+                },
+                {
+                    "id": "345abc",
+                    "level": "info",
+                    "message": "Some general info we want to filter out",
+                },
+                {
+                    "id": "345abc",
+                    "level": "warning",
+                    "message": "Some warning we want to filter out",
+                },
+            ]
+        },
+    )
+
+    log_entries = session040.job("f00ba5").logs(log_level=log_level)
+    assert len(log_entries) == exp_num_messages
+
+
 def test_create_job_100(con100, requests_mock):
     def check_request(request):
         assert request.json() == {