Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Limit how often GC happens by time. #9902

Merged
merged 6 commits into from
May 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/9902.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add limits to how often Synapse will GC, ensuring that large servers do not end up GC thrashing if `gc_thresholds` has not been correctly set.
10 changes: 10 additions & 0 deletions docs/sample_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,16 @@ presence:
#
#gc_thresholds: [700, 10, 10]

# The minimum time in seconds between each GC for a generation, regardless of
# the GC thresholds. This ensures that we don't do GC too frequently.
#
# A value of `[1s, 10s, 30s]` indicates that a second must pass between consecutive
# generation 0 GCs, etc.
#
# Defaults to `[1s, 10s, 30s]`.
#
#gc_min_interval: [0.5s, 30s, 1m]

# Set the limit on the returned events in the timeline in the get
# and sync operations. The default value is 100. -1 means no upper limit.
#
Expand Down
3 changes: 3 additions & 0 deletions synapse/app/generic_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,9 @@ def start(config_options):

synapse.events.USE_FROZEN_DICTS = config.use_frozen_dicts

if config.server.gc_seconds:
synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds

hs = GenericWorkerServer(
config.server_name,
config=config,
Expand Down
3 changes: 3 additions & 0 deletions synapse/app/homeserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,9 @@ def setup(config_options):

events.USE_FROZEN_DICTS = config.use_frozen_dicts

if config.server.gc_seconds:
synapse.metrics.MIN_TIME_BETWEEN_GCS = config.server.gc_seconds

hs = SynapseHomeServer(
config.server_name,
config=config,
Expand Down
31 changes: 30 additions & 1 deletion synapse/config/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import os.path
import re
from textwrap import indent
from typing import Any, Dict, Iterable, List, Optional, Set
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple

import attr
import yaml
Expand Down Expand Up @@ -572,6 +572,7 @@ def read_config(self, config, **kwargs):
_warn_if_webclient_configured(self.listeners)

self.gc_thresholds = read_gc_thresholds(config.get("gc_thresholds", None))
self.gc_seconds = self.read_gc_intervals(config.get("gc_min_interval", None))
erikjohnston marked this conversation as resolved.
Show resolved Hide resolved

@attr.s
class LimitRemoteRoomsConfig:
Expand Down Expand Up @@ -917,6 +918,16 @@ def generate_config_section(
#
#gc_thresholds: [700, 10, 10]

# The minimum time in seconds between each GC for a generation, regardless of
# the GC thresholds. This ensures that we don't do GC too frequently.
#
# A value of `[1s, 10s, 30s]` indicates that a second must pass between consecutive
# generation 0 GCs, etc.
#
# Defaults to `[1s, 10s, 30s]`.
#
#gc_min_interval: [0.5s, 30s, 1m]

# Set the limit on the returned events in the timeline in the get
# and sync operations. The default value is 100. -1 means no upper limit.
#
Expand Down Expand Up @@ -1305,6 +1316,24 @@ def add_arguments(parser):
help="Turn on the twisted telnet manhole service on the given port.",
)

def read_gc_intervals(self, durations) -> Optional[Tuple[float, float, float]]:
"""Reads the three durations for the GC min interval option, returning seconds."""
if durations is None:
return None

try:
if len(durations) != 3:
raise ValueError()
return (
self.parse_duration(durations[0]) / 1000,
self.parse_duration(durations[1]) / 1000,
self.parse_duration(durations[2]) / 1000,
)
except Exception:
raise ConfigError(
"Value of `gc_min_interval` must be a list of three durations if set"
)


def is_threepid_reserved(reserved_threepids, threepid):
"""Check the threepid against the reserved threepid config
Expand Down
18 changes: 16 additions & 2 deletions synapse/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,13 @@ def collect(self):

REGISTRY.register(ReactorLastSeenMetric())

# The minimum time in seconds between GCs for each generation, regardless of the current GC
# thresholds and counts.
MIN_TIME_BETWEEN_GCS = (1.0, 10.0, 30.0)

# The time (in seconds since the epoch) of the last time we did a GC for each generation.
_last_gc = [0.0, 0.0, 0.0]


def runUntilCurrentTimer(reactor, func):
@functools.wraps(func)
Expand Down Expand Up @@ -575,11 +582,16 @@ def f(*args, **kwargs):
return ret

# Check if we need to do a manual GC (since its been disabled), and do
# one if necessary.
# one if necessary. Note we go in reverse order as e.g. a gen 1 GC may
# promote an object into gen 2, and we don't want to handle the same
# object multiple times.
threshold = gc.get_threshold()
counts = gc.get_count()
for i in (2, 1, 0):
if threshold[i] < counts[i]:
# We check if we need to do one based on a straightforward
# comparison between the threshold and count. We also do an extra
# check to make sure that we don't a GC too often.
if threshold[i] < counts[i] and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can become out-of-bounds if someone specifies [1, 10] or similar accidentally.

Suggested change
if threshold[i] < counts[i] and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:
if threshold[i] < counts[i] and len(MIN_TIME_BETWEEN_GCS) => i and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:

I'm also wary of mutating a "constant" like this, because it can give mixed signals in the code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, mutating is a bit icky, but I can't think of a better way of injecting the values at this point.

This can become out-of-bounds if someone specifies [1, 10] or similar accidentally.

We check for the right format in the config, so I think its fine that this explodes if a developer messes up and sets it incorrectly.

if i == 0:
logger.debug("Collecting gc %d", i)
else:
Expand All @@ -589,6 +601,8 @@ def f(*args, **kwargs):
unreachable = gc.collect(i)
end = time.time()

_last_gc[i] = end

gc_time.labels(i).observe(end - start)
gc_unreachable.labels(i).set(unreachable)

Expand Down