Skip to content

Commit

Permalink
Fix misc errors that show up in the logs (#830)
Browse files Browse the repository at this point in the history
* Clean up formatting

* Log decode errors in JsonStats as WARNING instead of ERROR

* Ensure that the server will start, even if hydra is unreachable

* Log on INFO level when game setup times out

* Handle exceptions when processing game stats in a task

* Correctly check error text during deadlock handling

* Check gamestate before launching

* Reduce pop timer log level to info
  • Loading branch information
Askaholic authored Sep 18, 2021
1 parent f2e70b6 commit 5cb90fa
Show file tree
Hide file tree
Showing 12 changed files with 254 additions and 50 deletions.
3 changes: 2 additions & 1 deletion server/db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ async def deadlock_retry_execute(conn, *args, max_attempts=3):
try:
return await conn.execute(*args)
except OperationalError as e:
if any(msg in e.message for msg in (
error_text = str(e)
if any(msg in error_text for msg in (
"Deadlock found",
"Lock wait timeout exceeded"
)):
Expand Down
24 changes: 18 additions & 6 deletions server/gameconnection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import asyncio
import contextlib
import json
from typing import Any, List

from sqlalchemy import select, text
Expand Down Expand Up @@ -362,7 +363,14 @@ async def handle_operation_complete(
self.game.leaderboard_saved = True

async def handle_json_stats(self, stats: str):
self.game.report_army_stats(stats)
try:
self.game.report_army_stats(stats)
except json.JSONDecodeError:
self._logger.warning(
"Malformed game stats reported by %s: '...%s'",
self._player.login,
stats[-20:]
)

async def handle_enforce_rating(self):
self.game.enforce_rating = True
Expand Down Expand Up @@ -471,11 +479,15 @@ async def handle_game_state(self, state: str):
if self.player.state != PlayerState.HOSTING:
return

self._logger.info(
"Launching game %s in state %s",
self.game,
self.game.state
)
if self.game.state is not GameState.LOBBY:
self._logger.warning(
"Trying to launch game %s in invalid state %s",
self.game,
self.game.state
)
return

self._logger.info("Launching game %s", self.game)

await self.game.launch()

Expand Down
19 changes: 7 additions & 12 deletions server/games/game.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,20 +117,16 @@ def __init__(
"RestrictedCategories": 0
}
self.mods = {}
loop = asyncio.get_event_loop()
self._is_hosted = loop.create_future()
self._launch_fut = loop.create_future()
self._hosted_event = asyncio.Event()
self._launched_event = asyncio.Event()

self._logger.debug("%s created", self)
asyncio.get_event_loop().create_task(self.timeout_game(setup_timeout))

async def timeout_game(self, timeout: int = 60):
await asyncio.sleep(timeout)
if self.state is GameState.INITIALIZING:
self._is_hosted.set_exception(
asyncio.TimeoutError("Game setup timed out")
)
self._logger.debug("Game setup timed out.. Cancelling game")
self._logger.debug("Game setup timed out, cancelling game")
await self.on_game_end()

@property
Expand Down Expand Up @@ -272,17 +268,16 @@ def get_team_sets(self) -> List[Set[Player]]:

async def wait_hosted(self, timeout: float):
return await asyncio.wait_for(
asyncio.shield(self._is_hosted),
self._hosted_event.wait(),
timeout=timeout
)

def set_hosted(self):
if not self._is_hosted.done():
self._is_hosted.set_result(None)
self._hosted_event.set()

async def wait_launched(self, timeout: float):
return await asyncio.wait_for(
asyncio.shield(self._launch_fut),
self._launched_event.wait(),
timeout=timeout
)

Expand Down Expand Up @@ -707,7 +702,7 @@ async def launch(self):
await self.on_game_launched()
await self.validate_game_settings()

self._launch_fut.set_result(None)
self._launched_event.set()
self._logger.info("Game launched")

async def on_game_launched(self):
Expand Down
13 changes: 10 additions & 3 deletions server/ladder_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,11 +466,18 @@ def game_options(player: Player) -> GameLaunchOptions:
if guest.lobby_connection is not None
])
await game.wait_launched(60 + 10 * len(all_guests))
self._logger.debug("Ladder game launched successfully")
except Exception:
self._logger.debug("Ladder game launched successfully %s", game)
except Exception as e:
if isinstance(e, asyncio.TimeoutError):
self._logger.info(
"Ladder game failed to start! %s setup timed out",
game
)
else:
self._logger.exception("Ladder game failed to start %s", game)

if game:
await game.on_game_end()
self._logger.exception("Failed to start ladder game!")

msg = {"command": "match_cancelled"}
for player in all_players:
Expand Down
2 changes: 1 addition & 1 deletion server/matchmaker/pop_timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def time_until_next_pop(self, num_queued: int, time_queued: float) -> float:
# Obtained by solving $ NUM_PLAYERS = rate * time $ for time.
next_pop_time = desired_players * total_times / total_players
if next_pop_time > config.QUEUE_POP_TIME_MAX:
self._logger.warning(
self._logger.info(
"Required time (%.2fs) for %s is larger than max pop time (%ds). "
"Consider increasing the max pop time",
next_pop_time, self.queue.name, config.QUEUE_POP_TIME_MAX
Expand Down
56 changes: 47 additions & 9 deletions server/oauth_service.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import time

import aiocron
import aiohttp
import jwt
Expand All @@ -6,6 +8,7 @@

from server.config import config

from .asyncio_extensions import synchronizedmethod
from .core import Service
from .decorators import with_logger
from .exceptions import AuthenticationError
Expand All @@ -19,6 +22,7 @@ class OAuthService(Service, name="oauth_service"):

def __init__(self):
self.public_keys = {}
self._last_key_fetch_time = None

async def initialize(self) -> None:
await self.retrieve_public_keys()
Expand All @@ -28,25 +32,59 @@ async def initialize(self) -> None:
"*/10 * * * *", func=self.retrieve_public_keys
)

@synchronizedmethod
async def get_public_keys(self) -> dict:
"""
Return cached keys, or fetch them if they're missing
"""
if not self.public_keys:
# Rate limit requests so we don't spam the endpoint when it's down
if (
not self._last_key_fetch_time or
time.monotonic() - self._last_key_fetch_time > 5
):
await self.retrieve_public_keys()

if not self.public_keys:
raise RuntimeError("jwks could not be retrieved")

return self.public_keys

async def retrieve_public_keys(self) -> None:
"""
Get the latest jwks from the hydra endpoint
"""
async with aiohttp.ClientSession(raise_for_status=True) as session:
async with session.get(config.HYDRA_JWKS_URI) as resp:
jwks = await resp.json()
self.public_keys = {
jwk["kid"]: RSAAlgorithm.from_jwk(jwk)
for jwk in jwks["keys"]
}
self._last_key_fetch_time = time.monotonic()
try:
async with aiohttp.ClientSession(raise_for_status=True) as session:
async with session.get(config.HYDRA_JWKS_URI) as resp:
jwks = await resp.json()
self.public_keys = {
jwk["kid"]: RSAAlgorithm.from_jwk(jwk)
for jwk in jwks["keys"]
}
except Exception:
self._logger.exception(
"Unable to retrieve jwks, token login will be unavailable!"
)

async def get_player_id_from_token(self, token: str) -> int:
"""
Decode the JWT to get the player_id
"""
# Ensures that if we're missing the jwks we will try to fetch them on
# each new login request. This way our login functionality will be
# restored as soon as possible
keys = await self.get_public_keys()
try:
kid = jwt.get_unverified_header(token)["kid"]
key = self.public_keys[kid]
return int(jwt.decode(token, key=key, algorithms="RS256", options={"verify_aud": False})["sub"])
key = keys[kid]
decoded = jwt.decode(
token,
key=key,
algorithms="RS256",
options={"verify_aud": False}
)
return int(decoded["sub"])
except (InvalidTokenError, KeyError, ValueError):
raise AuthenticationError("Token signature was invalid", "token")
24 changes: 23 additions & 1 deletion server/stats/game_stats_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,29 @@ def __init__(self, event_service: EventService, achievement_service: Achievement
self._event_service = event_service
self._achievement_service = achievement_service

async def process_game_stats(self, player: Player, game: Game, army_stats_list: List):
async def process_game_stats(
self,
player: Player,
game: Game,
army_stats_list: List
):
try:
await self._process_game_stats(player, game, army_stats_list)
except KeyError as e:
self._logger.info("Malformed game stats. KeyError: %s", e)
except Exception:
self._logger.exception(
"Error processing game stats for %s in game %d",
player.login,
game.id
)

async def _process_game_stats(
self,
player: Player,
game: Game,
army_stats_list: List
):
stats = None
number_of_humans = 0
highest_score = 0
Expand Down
1 change: 0 additions & 1 deletion tests/integration_tests/test_oauth_session.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import asyncio
import os

import pytest
Expand Down
41 changes: 41 additions & 0 deletions tests/unit_tests/test_database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from unittest import mock

import asynctest
import pymysql
import pytest

from server.db import deadlock_retry_execute
from tests.utils import fast_forward


@pytest.mark.asyncio
@fast_forward(10)
async def test_deadlock_retry_execute():
mock_conn = mock.Mock()
mock_conn.execute = asynctest.CoroutineMock(
side_effect=pymysql.err.OperationalError(-1, "Deadlock found")
)

with pytest.raises(pymysql.err.OperationalError):
await deadlock_retry_execute(mock_conn, "foo")

assert mock_conn.execute.call_count == 3


@pytest.mark.asyncio
@fast_forward(10)
async def test_deadlock_retry_execute_success():
call_count = 0

async def execute(*args):
nonlocal call_count
call_count += 1
if call_count <= 1:
raise pymysql.err.OperationalError(-1, "Deadlock found")

mock_conn = mock.Mock()
mock_conn.execute = asynctest.CoroutineMock(side_effect=execute)

await deadlock_retry_execute(mock_conn, "foo")

assert mock_conn.execute.call_count == 2
Loading

0 comments on commit 5cb90fa

Please sign in to comment.