Skip to content

Commit

Permalink
Clarify that the default retry strategy uses polynomial backoff and n…
Browse files Browse the repository at this point in the history
…ot exponential backoff
  • Loading branch information
victormours committed Sep 15, 2023
1 parent 19930b4 commit 1bdabc1
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 30 deletions.
6 changes: 6 additions & 0 deletions activejob/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
* Clarify the backoff strategy for the recommended `:wait` option when retrying jobs

`wait: :exponentially_longer` is waiting polynomially longer, so it is now recommended to use `wait: :polynomially_longer` to keep the same behavior.

*Victor Mours*

## Rails 7.1.0.beta1 (September 13, 2023) ##

* Fix Active Job log message to correctly report a job failed to enqueue
Expand Down
17 changes: 12 additions & 5 deletions activejob/lib/active_job/exceptions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module ClassMethods
# ==== Options
# * <tt>:wait</tt> - Re-enqueues the job with a delay specified either in seconds (default: 3 seconds),
# as a computing proc that takes the number of executions so far as an argument, or as a symbol reference of
# <tt>:exponentially_longer</tt>, which applies the wait algorithm of <tt>((executions**4) + (Kernel.rand * (executions**4) * jitter)) + 2</tt>
# <tt>:polynomially_longer</tt>, which applies the wait algorithm of <tt>((executions**4) + (Kernel.rand * (executions**4) * jitter)) + 2</tt>
# (first wait ~3s, then ~18s, then ~83s, etc)
# * <tt>:attempts</tt> - Re-enqueues the job the specified number of times (default: 5 attempts) or a symbol reference of <tt>:unlimited</tt>
# to retry the job until it succeeds
Expand All @@ -40,11 +40,11 @@ module ClassMethods
# retry_on CustomInfrastructureException, wait: 5.minutes, attempts: :unlimited
#
# retry_on ActiveRecord::Deadlocked, wait: 5.seconds, attempts: 3
# retry_on Net::OpenTimeout, Timeout::Error, wait: :exponentially_longer, attempts: 10 # retries at most 10 times for Net::OpenTimeout and Timeout::Error combined
# retry_on Net::OpenTimeout, Timeout::Error, wait: :polynomially_longer, attempts: 10 # retries at most 10 times for Net::OpenTimeout and Timeout::Error combined
# # To retry at most 10 times for each individual exception:
# # retry_on Net::OpenTimeout, wait: :exponentially_longer, attempts: 10
# # retry_on Net::OpenTimeout, wait: :polynomially_longer, attempts: 10
# # retry_on Net::ReadTimeout, wait: 5.seconds, jitter: 0.30, attempts: 10
# # retry_on Timeout::Error, wait: :exponentially_longer, attempts: 10
# # retry_on Timeout::Error, wait: :polynomially_longer, attempts: 10
#
# retry_on(YetAnotherCustomAppException) do |job, error|
# ExceptionNotifier.caught(error)
Expand All @@ -57,6 +57,12 @@ module ClassMethods
# end
# end
def retry_on(*exceptions, wait: 3.seconds, attempts: 5, queue: nil, priority: nil, jitter: JITTER_DEFAULT)
if wait == :exponentially_longer
ActiveJob.deprecator.warn(<<~MSG.squish)
`wait: :exponentially_longer` will actually wait polynomially longer and is therefore deprecated.
Prefer `wait: :polynomially_longer` to avoid confusion and keep the same behavior.
MSG
end
rescue_from(*exceptions) do |error|
executions = executions_for(exceptions)
if attempts == :unlimited || executions < attempts
Expand Down Expand Up @@ -156,7 +162,8 @@ def determine_delay(seconds_or_duration_or_algorithm:, executions:, jitter: JITT
jitter = jitter == JITTER_DEFAULT ? self.class.retry_jitter : (jitter || 0.0)

case seconds_or_duration_or_algorithm
when :exponentially_longer
when :exponentially_longer, :polynomially_longer
# This delay uses a polynomial backoff strategy, which was previously misnamed as exponential
delay = executions**4
delay_jitter = determine_jitter_for_delay(delay, jitter)
delay + delay_jitter + 2
Expand Down
62 changes: 43 additions & 19 deletions activejob/test/cases/exceptions_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,23 +108,23 @@ class ExceptionsTest < ActiveSupport::TestCase
end
end

test "exponentially retrying job includes jitter" do
test "polynomially retrying job includes jitter" do
travel_to Time.now

random_amount = 2
delay_for_jitter = -> (delay) { random_amount * delay * ActiveJob::Base.retry_jitter }

Kernel.stub(:rand, random_amount) do
RetryJob.perform_later "ExponentialWaitTenAttemptsError", 5, :log_scheduled_at
RetryJob.perform_later "PolynomialWaitTenAttemptsError", 5, :log_scheduled_at

assert_equal [
"Raised ExponentialWaitTenAttemptsError for the 1st time",
"Raised PolynomialWaitTenAttemptsError for the 1st time",
"Next execution scheduled at #{(Time.now + 3.seconds + delay_for_jitter.(1)).to_f}",
"Raised ExponentialWaitTenAttemptsError for the 2nd time",
"Raised PolynomialWaitTenAttemptsError for the 2nd time",
"Next execution scheduled at #{(Time.now + 18.seconds + delay_for_jitter.(16)).to_f}",
"Raised ExponentialWaitTenAttemptsError for the 3rd time",
"Raised PolynomialWaitTenAttemptsError for the 3rd time",
"Next execution scheduled at #{(Time.now + 83.seconds + delay_for_jitter.(81)).to_f}",
"Raised ExponentialWaitTenAttemptsError for the 4th time",
"Raised PolynomialWaitTenAttemptsError for the 4th time",
"Next execution scheduled at #{(Time.now + 258.seconds + delay_for_jitter.(256)).to_f}",
"Successfully completed job"
], JobBuffer.values
Expand All @@ -140,16 +140,16 @@ class ExceptionsTest < ActiveSupport::TestCase
random_amount = 1

Kernel.stub(:rand, random_amount) do
RetryJob.perform_later "ExponentialWaitTenAttemptsError", 5, :log_scheduled_at
RetryJob.perform_later "PolynomialWaitTenAttemptsError", 5, :log_scheduled_at

assert_equal [
"Raised ExponentialWaitTenAttemptsError for the 1st time",
"Raised PolynomialWaitTenAttemptsError for the 1st time",
"Next execution scheduled at #{(Time.now + 7.seconds).to_f}",
"Raised ExponentialWaitTenAttemptsError for the 2nd time",
"Raised PolynomialWaitTenAttemptsError for the 2nd time",
"Next execution scheduled at #{(Time.now + 82.seconds).to_f}",
"Raised ExponentialWaitTenAttemptsError for the 3rd time",
"Raised PolynomialWaitTenAttemptsError for the 3rd time",
"Next execution scheduled at #{(Time.now + 407.seconds).to_f}",
"Raised ExponentialWaitTenAttemptsError for the 4th time",
"Raised PolynomialWaitTenAttemptsError for the 4th time",
"Next execution scheduled at #{(Time.now + 1282.seconds).to_f}",
"Successfully completed job"
], JobBuffer.values
Expand All @@ -175,16 +175,16 @@ class ExceptionsTest < ActiveSupport::TestCase
ActiveJob::Base.retry_jitter = old_jitter
end

test "random wait time for exponentially retrying job when retry jitter delay multiplier value is between 1 and 2" do
test "random wait time for polynomially retrying job when retry jitter delay multiplier value is between 1 and 2" do
old_jitter = ActiveJob::Base.retry_jitter
ActiveJob::Base.retry_jitter = 1.2

travel_to Time.now

RetryJob.perform_later "ExponentialWaitTenAttemptsError", 2, :log_scheduled_at
RetryJob.perform_later "PolynomialWaitTenAttemptsError", 2, :log_scheduled_at

assert_not_equal [
"Raised ExponentialWaitTenAttemptsError for the 1st time",
"Raised PolynomialWaitTenAttemptsError for the 1st time",
"Next execution scheduled at #{(Time.now + 3.seconds).to_f}",
"Successfully completed job"
], JobBuffer.values
Expand All @@ -198,10 +198,10 @@ class ExceptionsTest < ActiveSupport::TestCase

travel_to Time.now

RetryJob.perform_later "ExponentialWaitTenAttemptsError", 2, :log_scheduled_at
RetryJob.perform_later "PolynomialWaitTenAttemptsError", 2, :log_scheduled_at

assert_not_equal [
"Raised ExponentialWaitTenAttemptsError for the 1st time",
"Raised PolynomialWaitTenAttemptsError for the 1st time",
"Next execution scheduled at #{(Time.now + 3.seconds).to_f}",
"Successfully completed job"
], JobBuffer.values
Expand Down Expand Up @@ -258,7 +258,7 @@ class ExceptionsTest < ActiveSupport::TestCase
test "use individual execution timers when calculating retry delay" do
travel_to Time.now

exceptions_to_raise = %w(ExponentialWaitTenAttemptsError CustomWaitTenAttemptsError ExponentialWaitTenAttemptsError CustomWaitTenAttemptsError)
exceptions_to_raise = %w(PolynomialWaitTenAttemptsError CustomWaitTenAttemptsError PolynomialWaitTenAttemptsError CustomWaitTenAttemptsError)

random_amount = 1

Expand All @@ -268,11 +268,11 @@ class ExceptionsTest < ActiveSupport::TestCase
delay_for_jitter = -> (delay) { random_amount * delay * ActiveJob::Base.retry_jitter }

assert_equal [
"Raised ExponentialWaitTenAttemptsError for the 1st time",
"Raised PolynomialWaitTenAttemptsError for the 1st time",
"Next execution scheduled at #{(Time.now + 3.seconds + delay_for_jitter.(1)).to_f}",
"Raised CustomWaitTenAttemptsError for the 2nd time",
"Next execution scheduled at #{(Time.now + 2.seconds).to_f}",
"Raised ExponentialWaitTenAttemptsError for the 3rd time",
"Raised PolynomialWaitTenAttemptsError for the 3rd time",
"Next execution scheduled at #{(Time.now + 18.seconds + delay_for_jitter.(16)).to_f}",
"Raised CustomWaitTenAttemptsError for the 4th time",
"Next execution scheduled at #{(Time.now + 4.seconds).to_f}",
Expand Down Expand Up @@ -370,6 +370,30 @@ class ExceptionsTest < ActiveSupport::TestCase
assert_equal expected_array, JobBuffer.values.last(2)
end

class ::LegacyExponentialNamingError < StandardError; end
test "wait: :exponentially_longer is deprecated but still works" do
assert_deprecated(ActiveJob.deprecator) do
class LegacyRetryJob < RetryJob
retry_on LegacyExponentialNamingError, wait: :exponentially_longer, attempts: 10, jitter: nil
end
end

travel_to Time.now
LegacyRetryJob.perform_later "LegacyExponentialNamingError", 5, :log_scheduled_at

assert_equal [
"Raised LegacyExponentialNamingError for the 1st time",
"Next execution scheduled at #{(Time.now + 3.seconds).to_f}",
"Raised LegacyExponentialNamingError for the 2nd time",
"Next execution scheduled at #{(Time.now + 18.seconds).to_f}",
"Raised LegacyExponentialNamingError for the 3rd time",
"Next execution scheduled at #{(Time.now + 83.seconds).to_f}",
"Raised LegacyExponentialNamingError for the 4th time",
"Next execution scheduled at #{(Time.now + 258.seconds).to_f}",
"Successfully completed job"
], JobBuffer.values
end

private
def adapter_skips_scheduling?(queue_adapter)
[
Expand Down
4 changes: 2 additions & 2 deletions activejob/test/jobs/retry_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class FirstRetryableErrorOfTwo < StandardError; end
class SecondRetryableErrorOfTwo < StandardError; end
class LongWaitError < StandardError; end
class ShortWaitTenAttemptsError < StandardError; end
class ExponentialWaitTenAttemptsError < StandardError; end
class PolynomialWaitTenAttemptsError < StandardError; end
class CustomWaitTenAttemptsError < StandardError; end
class CustomCatchError < StandardError; end
class DiscardableError < StandardError; end
Expand All @@ -26,7 +26,7 @@ class RetryJob < ActiveJob::Base
retry_on FirstRetryableErrorOfTwo, SecondRetryableErrorOfTwo, attempts: 4
retry_on LongWaitError, wait: 1.hour, attempts: 10
retry_on ShortWaitTenAttemptsError, wait: 1.second, attempts: 10
retry_on ExponentialWaitTenAttemptsError, wait: :exponentially_longer, attempts: 10
retry_on PolynomialWaitTenAttemptsError, wait: :polynomially_longer, attempts: 10
retry_on CustomWaitTenAttemptsError, wait: ->(executions) { executions * 2 }, attempts: 10
retry_on(CustomCatchError) { |job, error| JobBuffer.add("Dealt with a job that failed to retry in a custom way after #{job.arguments.second} attempts. Message: #{error.message}") }
retry_on(ActiveJob::DeserializationError) { |job, error| JobBuffer.add("Raised #{error.class} for the #{job.executions} time") }
Expand Down
2 changes: 1 addition & 1 deletion activestorage/app/jobs/active_storage/analyze_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class ActiveStorage::AnalyzeJob < ActiveStorage::BaseJob
queue_as { ActiveStorage.queues[:analysis] }

discard_on ActiveRecord::RecordNotFound
retry_on ActiveStorage::IntegrityError, attempts: 10, wait: :exponentially_longer
retry_on ActiveStorage::IntegrityError, attempts: 10, wait: :polynomially_longer

def perform(blob)
blob.analyze
Expand Down
2 changes: 1 addition & 1 deletion activestorage/app/jobs/active_storage/mirror_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class ActiveStorage::MirrorJob < ActiveStorage::BaseJob
queue_as { ActiveStorage.queues[:mirror] }

discard_on ActiveStorage::FileNotFoundError
retry_on ActiveStorage::IntegrityError, attempts: 10, wait: :exponentially_longer
retry_on ActiveStorage::IntegrityError, attempts: 10, wait: :polynomially_longer

def perform(key, checksum:)
ActiveStorage::Blob.service.try(:mirror, key, checksum: checksum)
Expand Down
2 changes: 1 addition & 1 deletion activestorage/app/jobs/active_storage/purge_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class ActiveStorage::PurgeJob < ActiveStorage::BaseJob
queue_as { ActiveStorage.queues[:purge] }

discard_on ActiveRecord::RecordNotFound
retry_on ActiveRecord::Deadlocked, attempts: 10, wait: :exponentially_longer
retry_on ActiveRecord::Deadlocked, attempts: 10, wait: :polynomially_longer

def perform(blob)
blob.purge
Expand Down
2 changes: 1 addition & 1 deletion activestorage/app/jobs/active_storage/transform_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ class ActiveStorage::TransformJob < ActiveStorage::BaseJob
queue_as { ActiveStorage.queues[:transform] }

discard_on ActiveRecord::RecordNotFound
retry_on ActiveStorage::IntegrityError, attempts: 10, wait: :exponentially_longer
retry_on ActiveStorage::IntegrityError, attempts: 10, wait: :polynomially_longer

def perform(blob, transformations)
blob.variant(transformations).processed
Expand Down

0 comments on commit 1bdabc1

Please sign in to comment.