Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better handle RDS restarts (PP-991) #1689

Merged
merged 2 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def initialize_application() -> PalaceFlask:
container.init_resources()

# Initialize the applications error handler.
error_handler = ErrorHandler(app, container.config.logging.level())
error_handler = ErrorHandler(app)
app.register_error_handler(Exception, error_handler.handle)

# Initialize the circulation manager
Expand Down
64 changes: 23 additions & 41 deletions core/app_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@
from __future__ import annotations

import gzip
import sys
import traceback
from functools import wraps
from io import BytesIO
from typing import TYPE_CHECKING

import flask
from flask import Response, make_response, url_for
from flask_pydantic_spec import FlaskPydanticSpec
from psycopg2 import DatabaseError
from psycopg2 import OperationalError
from werkzeug.exceptions import HTTPException

import core
Expand All @@ -20,10 +18,9 @@
from core.lane import Facets, Pagination
from core.model import Identifier
from core.problem_details import *
from core.service.logging.configuration import LogLevel
from core.util.log import LoggerMixin
from core.util.opds_writer import OPDSMessage
from core.util.problem_detail import ProblemDetail
from core.util.problem_detail import ProblemDetail, ProblemError

if TYPE_CHECKING:
from api.util.flask import PalaceFlask
Expand Down Expand Up @@ -170,14 +167,13 @@ def compress(response):


class ErrorHandler(LoggerMixin):
def __init__(self, app: PalaceFlask, log_level: LogLevel):
def __init__(self, app: PalaceFlask) -> None:
"""Constructor.

:param app: The Flask application object.
:param log_level: The log level set for this application.
"""
self.app = app
self.debug = log_level == LogLevel.debug

def handle(self, exception: Exception) -> Response | HTTPException:
"""Something very bad has happened. Notify the client."""
Expand All @@ -191,54 +187,40 @@ def handle(self, exception: Exception) -> Response | HTTPException:
# If there is an active database session, then roll the session back.
self.app.manager._db.rollback()

# By default, when reporting errors, we err on the side of
# terseness, to avoid leaking sensitive information. We only
# log a stack trace in the case we have debugging turned on.
# Otherwise, we just display a generic error message.
tb = traceback.format_exc()
if isinstance(exception, DatabaseError):
# The database session may have become tainted. For now
# the simplest thing to do is to kill the entire process
# and let uwsgi restart it.
self.log.error(
"Database error: %s Treating as fatal to avoid holding on to a tainted session!",
exception,
exc_info=exception,
)
shutdown = flask.request.environ.get("werkzeug.server.shutdown")
if shutdown:
shutdown()
else:
sys.exit()

# By default, the error will be logged at log level ERROR.
log_method = self.log.error
document = None
response = None

# Okay, it's not a database error. Turn it into a useful HTTP error
# response.
# If we can, we will turn the exception into a problem detail
if hasattr(exception, "as_problem_detail_document"):
# This exception can be turned directly into a problem
# detail document.
document = exception.as_problem_detail_document(self.debug)
if not self.debug:
document.debug_message = None
else:
if document.debug_message:
document.debug_message += "\n\n" + tb
else:
document.debug_message = tb
document = exception.as_problem_detail_document(debug=False)
elif isinstance(exception, ProblemError):
document = exception.problem_detail

if document:
document.debug_message = None
if document.status_code == 502:
# This is an error in integrating with some upstream
# service. It's a serious problem, but probably not
# indicative of a bug in our software. Log it at log level
# WARN.
log_method = self.log.warning
response = make_response(document.response)
else:

if isinstance(exception, OperationalError):
# This is an error, but it is probably unavoidable. Likely it was caused by
# the database dropping our connection which can happen then the database is
# restarted for maintenance. We'll log it at log level WARN.
log_method = self.log.warning
body = "Service temporarily unavailable. Please try again later."
response = make_response(body, 503, {"Content-Type": "text/plain"})
Fixed Show fixed Hide fixed

if response is None:
# There's no way to turn this exception into a problem
# document. This is probably indicative of a bug in our
# software.
body = tb if self.debug else "An internal error occurred"
body = "An internal error occurred"
response = make_response(body, 500, {"Content-Type": "text/plain"})

log_method("Exception in web app: %s", exception, exc_info=exception)
Expand Down
4 changes: 3 additions & 1 deletion core/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,9 @@ class SessionManager:
@classmethod
def engine(cls, url=None):
url = url or Configuration.database_url()
return create_engine(url, echo=DEBUG, json_serializer=json_serializer)
return create_engine(
url, echo=DEBUG, json_serializer=json_serializer, pool_pre_ping=True
)

@classmethod
def setup_event_listener(
Expand Down
95 changes: 55 additions & 40 deletions tests/core/test_app_server.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import gzip
import json
from collections.abc import Callable, Iterable
from functools import partial
from io import BytesIO
from unittest.mock import MagicMock, PropertyMock

import flask
import pytest
from _pytest.logging import LogCaptureFixture
from flask import Flask, Response, make_response
from flask_babel import Babel
from flask_babel import lazy_gettext as _
from psycopg2 import OperationalError

import core
from api.admin.config import Configuration as AdminUiConfig
Expand All @@ -27,9 +28,10 @@
from core.feed.annotator.base import Annotator
from core.lane import Facets, Pagination, SearchFacets, WorkList
from core.model import Identifier
from core.problem_details import INVALID_INPUT, INVALID_URN
from core.problem_details import INTEGRATION_ERROR, INVALID_INPUT, INVALID_URN
from core.service.logging.configuration import LogLevel
from core.util.opds_writer import OPDSFeed, OPDSMessage
from core.util.problem_detail import ProblemError
from tests.fixtures.database import DatabaseTransactionFixture
from tests.fixtures.library import LibraryFixture

Expand Down Expand Up @@ -503,14 +505,17 @@ def error_handler_fixture(
data.app = PalaceFlask(ErrorHandlerFixture.__name__)
Babel(data.app)
data.app.manager = mock_manager
data.handler = partial(ErrorHandler, app=data.app, log_level=LogLevel.error)
data.handler = partial(ErrorHandler, app=data.app)
return data


class TestErrorHandler:
def raise_exception(self, cls=Exception):
def raise_exception(self, exc: type[Exception] | Exception = Exception) -> None:
"""Simulate an exception that happens deep within the stack."""
raise cls()
if callable(exc):
raise exc()
else:
raise exc

def test_unhandled_error(self, error_handler_fixture: ErrorHandlerFixture):
handler = error_handler_fixture.handler()
Expand All @@ -522,62 +527,72 @@ def test_unhandled_error(self, error_handler_fixture: ErrorHandlerFixture):
response = handler.handle(exception)
assert isinstance(response, Response)
assert 500 == response.status_code
assert "An internal error occurred" == response.data.decode("utf8")

def test_unhandled_error_debug(self, error_handler_fixture: ErrorHandlerFixture):
# Set the sitewide log level to DEBUG to get a stack trace
# instead of a generic error message.
handler = error_handler_fixture.handler(log_level=LogLevel.debug)
assert "An internal error occurred" == response.get_data(as_text=True)

def test_handle_error_as_problem_detail_document(
self, error_handler_fixture: ErrorHandlerFixture, caplog: LogCaptureFixture
):
handler = error_handler_fixture.handler()
with error_handler_fixture.app.test_request_context("/"):
response = None
try:
self.raise_exception()
self.raise_exception(CanBeProblemDetailDocument)
except Exception as exception:
response = handler.handle(exception)
assert isinstance(response, Response)
assert 500 == response.status_code
assert response.data.startswith(b"Traceback (most recent call last)")

def test_handle_error_as_problem_detail_document(
self, error_handler_fixture: ErrorHandlerFixture
assert isinstance(response, Response)
response_json = response.json
assert isinstance(response_json, dict)
assert INVALID_URN.status_code == response.status_code
assert INVALID_URN.title == response_json["title"]

# The debug_message is destroyed.
assert "debug_message" not in response_json

# The exception was logged at the error level
assert len(caplog.records) == 1
log_record = caplog.records[0]
assert log_record.levelname == LogLevel.error.value
assert "Exception in web app" in log_record.message

def test_handle_error_problem_error(
self, error_handler_fixture: ErrorHandlerFixture, caplog: LogCaptureFixture
):
handler = error_handler_fixture.handler()
with error_handler_fixture.app.test_request_context("/"):
try:
self.raise_exception(CanBeProblemDetailDocument)
self.raise_exception(ProblemError(problem_detail=INTEGRATION_ERROR))
except Exception as exception:
response = handler.handle(exception)

assert isinstance(response, Response)
assert 400 == response.status_code
data = json.loads(response.data.decode("utf8"))
assert INVALID_URN.title == data["title"]

# Since we are not in debug mode, the debug_message is
# destroyed.
assert "debug_message" not in data

def test_handle_error_as_problem_detail_document_debug(
self, error_handler_fixture: ErrorHandlerFixture
response_json = response.json
assert isinstance(response_json, dict)
assert INTEGRATION_ERROR.status_code == response.status_code
assert INTEGRATION_ERROR.title == response_json["title"]

# The exception was logged at the warn level, because the problem
# detail document had a status code of 502.
assert len(caplog.records) == 1
log_record = caplog.records[0]
assert log_record.levelname == LogLevel.warning.value
assert "Exception in web app" in log_record.message

def test_handle_operational_error(
self, error_handler_fixture, caplog: LogCaptureFixture
):
# When in debug mode, the debug_message is preserved and a
# stack trace is appended to it.
handler = error_handler_fixture.handler(log_level=LogLevel.debug)
handler = error_handler_fixture.handler()
with error_handler_fixture.app.test_request_context("/"):
try:
self.raise_exception(CanBeProblemDetailDocument)
self.raise_exception(OperationalError("An operational error occurred"))
except Exception as exception:
response = handler.handle(exception)

assert isinstance(response, Response)
assert 400 == response.status_code
data = json.loads(response.data.decode("utf8"))
assert INVALID_URN.title == data["title"]
assert data["debug_message"].startswith(
"A debug_message which should only appear in debug mode.\n\n"
"Traceback (most recent call last)"
)
assert 503 == response.status_code

assert len(caplog.records) == 1
log_record = caplog.records[0]
assert log_record.levelname == LogLevel.warning.value


class TestCompressibleAnnotator:
Expand Down