Skip to content

Commit

Permalink
Retry on 404/410 within short window after create
Browse files Browse the repository at this point in the history
In summary, this is fallback logic to opRepoPostCreateDelay. The
opRepoPostCreateDelay is a balance between not waiting too long
where the SDK isn't being response, but long enough where the
likelihood of running into the backend replica delay issue is low.

Given the above, we can further lower the chance of this issue by
retrying requests that fail with 404 or 410 for Subscriptions.
We don't want to do this with all 404/410 requests so we utilize the
same NewRecordsState list, but added a new isInMissingRetryWindow
method and opRepoPostCreateRetryUpTo value to drive the window value.

Added tests to executors to ensure the new 404 logic is working. Some
executors were missing 404 handling altogether so we filled in those
gaps in this commit as well.
  • Loading branch information
jkasten2 committed Apr 17, 2024
1 parent 59fd1df commit 0ba8c2c
Show file tree
Hide file tree
Showing 11 changed files with 456 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ class ConfigModel : Model() {
}

/**
* The number milliseconds to delay after an operation completes
* The number of milliseconds to delay after an operation completes
* that creates or changes ids.
* This is a "cold down" period to avoid a caveat with OneSignal's backend
* replication, where you may incorrectly get a 404 when attempting a GET
Expand All @@ -154,6 +154,19 @@ class ConfigModel : Model() {
setLongProperty(::opRepoPostCreateDelay.name, value)
}

/**
* The number of milliseconds to retry operations for new models.
* This is a fallback to opRepoPostCreateDelay, where it's delay may
* not be enough. The server may be unusually overloaded so we will
* retry these (back-off rules apply to all retries) as we only want
* to re-create records as a last resort.
*/
var opRepoPostCreateRetryUpTo: Long
get() = getLongProperty(::opRepoPostCreateRetryUpTo.name) { 60_000 }
set(value) {
setLongProperty(::opRepoPostCreateRetryUpTo.name, value)
}

/**
* The minimum number of milliseconds required to pass to allow the fetching of IAM to occur.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ import com.onesignal.user.internal.builduser.IRebuildUserService
import com.onesignal.user.internal.identity.IdentityModelStore
import com.onesignal.user.internal.operations.DeleteAliasOperation
import com.onesignal.user.internal.operations.SetAliasOperation
import com.onesignal.user.internal.operations.impl.states.NewRecordsState

internal class IdentityOperationExecutor(
private val _identityBackend: IIdentityBackendService,
private val _identityModelStore: IdentityModelStore,
private val _buildUserService: IRebuildUserService,
private val _newRecordState: NewRecordsState,
) : IOperationExecutor {
override val operations: List<String>
get() = listOf(SET_ALIAS, DELETE_ALIAS)
Expand Down Expand Up @@ -67,11 +69,15 @@ internal class IdentityOperationExecutor(
NetworkUtils.ResponseStatusType.UNAUTHORIZED ->
ExecutionResponse(ExecutionResult.FAIL_UNAUTHORIZED)
NetworkUtils.ResponseStatusType.MISSING -> {
val operations = _buildUserService.getRebuildOperationsIfCurrentUser(lastOperation.appId, lastOperation.onesignalId)
if (operations == null) {
if (_newRecordState.isInMissingRetryWindow(lastOperation.onesignalId)) {
return ExecutionResponse(ExecutionResult.FAIL_RETRY)
}

val rebuildOps = _buildUserService.getRebuildOperationsIfCurrentUser(lastOperation.appId, lastOperation.onesignalId)
if (rebuildOps == null) {
return ExecutionResponse(ExecutionResult.FAIL_NORETRY)
} else {
return ExecutionResponse(ExecutionResult.FAIL_RETRY, operations = operations)
return ExecutionResponse(ExecutionResult.FAIL_RETRY, operations = rebuildOps)
}
}
}
Expand Down Expand Up @@ -103,10 +109,14 @@ internal class IdentityOperationExecutor(
NetworkUtils.ResponseStatusType.UNAUTHORIZED ->
ExecutionResponse(ExecutionResult.FAIL_UNAUTHORIZED)
NetworkUtils.ResponseStatusType.MISSING -> {
// This means either the User or the Alias was already
// deleted, either way the end state is the same, the
// alias no longer exists on that User.
ExecutionResponse(ExecutionResult.SUCCESS)
return if (_newRecordState.isInMissingRetryWindow(lastOperation.onesignalId)) {
ExecutionResponse(ExecutionResult.FAIL_RETRY)
} else {
// This means either the User or the Alias was already
// deleted, either way the end state is the same, the
// alias no longer exists on that User.
ExecutionResponse(ExecutionResult.SUCCESS)
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import com.onesignal.user.internal.builduser.IRebuildUserService
import com.onesignal.user.internal.identity.IdentityModel
import com.onesignal.user.internal.identity.IdentityModelStore
import com.onesignal.user.internal.operations.RefreshUserOperation
import com.onesignal.user.internal.operations.impl.states.NewRecordsState
import com.onesignal.user.internal.properties.PropertiesModel
import com.onesignal.user.internal.properties.PropertiesModelStore
import com.onesignal.user.internal.subscriptions.SubscriptionModel
Expand All @@ -31,6 +32,7 @@ internal class RefreshUserOperationExecutor(
private val _subscriptionsModelStore: SubscriptionModelStore,
private val _configModelStore: ConfigModelStore,
private val _buildUserService: IRebuildUserService,
private val _newRecordState: NewRecordsState,
) : IOperationExecutor {
override val operations: List<String>
get() = listOf(REFRESH_USER)
Expand Down Expand Up @@ -135,6 +137,9 @@ internal class RefreshUserOperationExecutor(
NetworkUtils.ResponseStatusType.UNAUTHORIZED ->
ExecutionResponse(ExecutionResult.FAIL_UNAUTHORIZED)
NetworkUtils.ResponseStatusType.MISSING -> {
if (_newRecordState.isInMissingRetryWindow(op.onesignalId)) {
return ExecutionResponse(ExecutionResult.FAIL_RETRY)
}
val operations = _buildUserService.getRebuildOperationsIfCurrentUser(op.appId, op.onesignalId)
if (operations == null) {
return ExecutionResponse(ExecutionResult.FAIL_NORETRY)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import com.onesignal.user.internal.operations.CreateSubscriptionOperation
import com.onesignal.user.internal.operations.DeleteSubscriptionOperation
import com.onesignal.user.internal.operations.TransferSubscriptionOperation
import com.onesignal.user.internal.operations.UpdateSubscriptionOperation
import com.onesignal.user.internal.operations.impl.states.NewRecordsState
import com.onesignal.user.internal.subscriptions.SubscriptionModel
import com.onesignal.user.internal.subscriptions.SubscriptionModelStore
import com.onesignal.user.internal.subscriptions.SubscriptionType
Expand All @@ -37,6 +38,7 @@ internal class SubscriptionOperationExecutor(
private val _subscriptionModelStore: SubscriptionModelStore,
private val _configModelStore: ConfigModelStore,
private val _buildUserService: IRebuildUserService,
private val _newRecordState: NewRecordsState,
) : IOperationExecutor {
override val operations: List<String>
get() = listOf(CREATE_SUBSCRIPTION, UPDATE_SUBSCRIPTION, DELETE_SUBSCRIPTION, TRANSFER_SUBSCRIPTION)
Expand Down Expand Up @@ -136,6 +138,9 @@ internal class SubscriptionOperationExecutor(
NetworkUtils.ResponseStatusType.UNAUTHORIZED ->
ExecutionResponse(ExecutionResult.FAIL_UNAUTHORIZED)
NetworkUtils.ResponseStatusType.MISSING -> {
if (_newRecordState.isInMissingRetryWindow(createOperation.onesignalId)) {
return ExecutionResponse(ExecutionResult.FAIL_RETRY)
}
val operations = _buildUserService.getRebuildOperationsIfCurrentUser(createOperation.appId, createOperation.onesignalId)
if (operations == null) {
return ExecutionResponse(ExecutionResult.FAIL_NORETRY)
Expand Down Expand Up @@ -177,23 +182,27 @@ internal class SubscriptionOperationExecutor(
return when (responseType) {
NetworkUtils.ResponseStatusType.RETRYABLE ->
ExecutionResponse(ExecutionResult.FAIL_RETRY)
NetworkUtils.ResponseStatusType.MISSING ->
NetworkUtils.ResponseStatusType.MISSING -> {
if (listOf(lastOperation.onesignalId, lastOperation.subscriptionId).any { _newRecordState.isInMissingRetryWindow(it) }) {
return ExecutionResponse(ExecutionResult.FAIL_RETRY)
}
// toss this, but create an identical CreateSubscriptionOperation to re-create the subscription being updated.
ExecutionResponse(
ExecutionResult.FAIL_NORETRY,
operations =
listOf(
CreateSubscriptionOperation(
lastOperation.appId,
lastOperation.onesignalId,
lastOperation.subscriptionId,
lastOperation.type,
lastOperation.enabled,
lastOperation.address,
lastOperation.status,
),
listOf(
CreateSubscriptionOperation(
lastOperation.appId,
lastOperation.onesignalId,
lastOperation.subscriptionId,
lastOperation.type,
lastOperation.enabled,
lastOperation.address,
lastOperation.status,
),
),
)
}
else ->
ExecutionResponse(ExecutionResult.FAIL_NORETRY)
}
Expand Down Expand Up @@ -248,9 +257,14 @@ internal class SubscriptionOperationExecutor(
val responseType = NetworkUtils.getResponseStatusType(ex.statusCode)

return when (responseType) {
NetworkUtils.ResponseStatusType.MISSING ->
// if the subscription is missing, we are good!
ExecutionResponse(ExecutionResult.SUCCESS)
NetworkUtils.ResponseStatusType.MISSING -> {
if (listOf(op.onesignalId, op.subscriptionId).any { _newRecordState.isInMissingRetryWindow(it) }) {
ExecutionResponse(ExecutionResult.FAIL_RETRY)
} else {
// if the subscription is missing, we are good!
ExecutionResponse(ExecutionResult.SUCCESS)
}
}
NetworkUtils.ResponseStatusType.RETRYABLE ->
ExecutionResponse(ExecutionResult.FAIL_RETRY)
else ->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ import com.onesignal.user.internal.operations.SetTagOperation
import com.onesignal.user.internal.operations.TrackPurchaseOperation
import com.onesignal.user.internal.operations.TrackSessionEndOperation
import com.onesignal.user.internal.operations.TrackSessionStartOperation
import com.onesignal.user.internal.operations.impl.states.NewRecordsState
import com.onesignal.user.internal.properties.PropertiesModelStore

internal class UpdateUserOperationExecutor(
private val _userBackend: IUserBackendService,
private val _identityModelStore: IdentityModelStore,
private val _propertiesModelStore: PropertiesModelStore,
private val _buildUserService: IRebuildUserService,
private val _newRecordState: NewRecordsState,
) : IOperationExecutor {
override val operations: List<String>
get() = listOf(SET_TAG, DELETE_TAG, SET_PROPERTY, TRACK_SESSION_START, TRACK_SESSION_END, TRACK_PURCHASE)
Expand Down Expand Up @@ -163,6 +165,9 @@ internal class UpdateUserOperationExecutor(
NetworkUtils.ResponseStatusType.UNAUTHORIZED ->
ExecutionResponse(ExecutionResult.FAIL_UNAUTHORIZED)
NetworkUtils.ResponseStatusType.MISSING -> {
if (_newRecordState.isInMissingRetryWindow(onesignalId)) {
return ExecutionResponse(ExecutionResult.FAIL_RETRY)
}
val operations = _buildUserService.getRebuildOperationsIfCurrentUser(appId, onesignalId)
if (operations == null) {
return ExecutionResponse(ExecutionResult.FAIL_NORETRY)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import com.onesignal.core.internal.time.ITime
/**
* Purpose: Keeps track of ids that were just created on the backend.
* This list gets used to delay network calls to ensure upcoming
* requests are ready to be accepted by the backend.
* requests are ready to be accepted by the backend. Also used for retries
* as a fallback if the server is under extra load.
*/
class NewRecordsState(
private val _time: ITime,
Expand All @@ -24,4 +25,9 @@ class NewRecordsState(
val timeLastMovedOrCreated = records[key] ?: return true
return _time.currentTimeMillis - timeLastMovedOrCreated > _configModelStore.model.opRepoPostCreateDelay
}

fun isInMissingRetryWindow(key: String): Boolean {
val timeLastMovedOrCreated = records[key] ?: return false
return _time.currentTimeMillis - timeLastMovedOrCreated < _configModelStore.model.opRepoPostCreateRetryUpTo
}
}
Loading

0 comments on commit 0ba8c2c

Please sign in to comment.