From e68f1dc3a594ff57339d841e4da870fb3783c778 Mon Sep 17 00:00:00 2001 From: Claire Date: Thu, 23 Feb 2023 16:52:40 +0100 Subject: [PATCH] Change auto-deletion throttling constants to better scale with server size (#23320) --- .../accounts_statuses_cleanup_scheduler.rb | 23 +++++++------------ ...ccounts_statuses_cleanup_scheduler_spec.rb | 10 -------- 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb index bd92fe32c40c44..d245f6bbdc156c 100644 --- a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb +++ b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb @@ -7,7 +7,7 @@ class Scheduler::AccountsStatusesCleanupScheduler # This limit is mostly to be nice to the fediverse at large and not # generate too much traffic. # This also helps limiting the running time of the scheduler itself. - MAX_BUDGET = 50 + MAX_BUDGET = 150 # This is an attempt to spread the load across instances, as various # accounts are likely to have various followers. @@ -15,28 +15,22 @@ class Scheduler::AccountsStatusesCleanupScheduler # This is an attempt to limit the workload generated by status removal # jobs to something the particular instance can handle. - PER_THREAD_BUDGET = 5 + PER_THREAD_BUDGET = 6 # Those avoid loading an instance that is already under load - MAX_DEFAULT_SIZE = 2 + MAX_DEFAULT_SIZE = 200 MAX_DEFAULT_LATENCY = 5 - MAX_PUSH_SIZE = 5 + MAX_PUSH_SIZE = 500 MAX_PUSH_LATENCY = 10 + # 'pull' queue has lower priority jobs, and it's unlikely that pushing # deletes would cause much issues with this queue if it didn't cause issues # with default and push. Yet, do not enqueue deletes if the instance is # lagging behind too much. - MAX_PULL_SIZE = 500 - MAX_PULL_LATENCY = 300 - - # This is less of an issue in general, but deleting old statuses is likely - # to cause delivery errors, and thus increase the number of jobs to be retried. - # This doesn't directly translate to load, but connection errors and a high - # number of dead instances may lead to this spiraling out of control if - # unchecked. - MAX_RETRY_SIZE = 50_000 + MAX_PULL_SIZE = 10_000 + MAX_PULL_LATENCY = 5.minutes.to_i - sidekiq_options retry: 0, lock: :until_executed + sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 1.day.to_i def perform return if under_load? @@ -72,7 +66,6 @@ def compute_budget end def under_load? - return true if Sidekiq::Stats.new.retry_size > MAX_RETRY_SIZE queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY) end diff --git a/spec/workers/scheduler/accounts_statuses_cleanup_scheduler_spec.rb b/spec/workers/scheduler/accounts_statuses_cleanup_scheduler_spec.rb index 8f20725c87f256..d953cc39d41133 100644 --- a/spec/workers/scheduler/accounts_statuses_cleanup_scheduler_spec.rb +++ b/spec/workers/scheduler/accounts_statuses_cleanup_scheduler_spec.rb @@ -23,7 +23,6 @@ }, ] end - let(:retry_size) { 0 } before do queue_stub = double @@ -33,7 +32,6 @@ allow(Sidekiq::ProcessSet).to receive(:new).and_return(process_set_stub) sidekiq_stats_stub = double - allow(sidekiq_stats_stub).to receive(:retry_size).and_return(retry_size) allow(Sidekiq::Stats).to receive(:new).and_return(sidekiq_stats_stub) # Create a bunch of old statuses @@ -70,14 +68,6 @@ expect(subject.under_load?).to be true end end - - context 'when there is a huge amount of jobs to retry' do - let(:retry_size) { 1_000_000 } - - it 'returns true' do - expect(subject.under_load?).to be true - end - end end describe '#get_budget' do