Skip to content

Commit

Permalink
[ECO-5095][CHA-RL*] Implement logging for RoomLifecycleManager
Browse files Browse the repository at this point in the history
1. Added private property logger with thread specific dynamic context
2. Added trace, debug, error logging for private doRetry method
3. Added trace, debug, error logging for attach method
  • Loading branch information
sacOO7 committed Dec 11, 2024
1 parent 11180ae commit e686835
Showing 1 changed file with 69 additions and 22 deletions.
91 changes: 69 additions & 22 deletions chat-android/src/main/java/com/ably/chat/RoomLifecycleManager.kt
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,12 @@ internal class RoomLifecycleManager(
private val roomScope: CoroutineScope,
private val statusLifecycle: DefaultRoomLifecycle,
private val contributors: List<ContributesToRoomLifecycle>,
private val logger: Logger,
roomLogger: Logger,
) {
private val logger = roomLogger.withContext(
"RoomLifecycleManager",
dynamicContext = mapOf("scope" to { Thread.currentThread().name }),
)

/**
* AtomicCoroutineScope makes sure all operations are atomic and run with given priority.
Expand Down Expand Up @@ -183,63 +187,84 @@ internal class RoomLifecycleManager(
* @returns Returns when the room is attached, or the room enters a failed state.
* Spec: CHA-RL5
*/
@Suppress("CognitiveComplexMethod", "ThrowsCount")
@Suppress("CognitiveComplexMethod", "ThrowsCount", "LongMethod")
private suspend fun doRetry(contributor: ContributesToRoomLifecycle) {
logger.trace("doRetry();")
// Handle the channel wind-down.
logger.debug("doRetry(); winding down channels except for feature: ${contributor.featureName}")
// CHA-RL5a - Handle the channel wind-down for other channels
var result = kotlin.runCatching { doChannelWindDown(contributor) }
while (result.isFailure) {
// CHA-RL5c - If in doing the wind down, we've entered failed state, then it's game over anyway
if (this.statusLifecycle.status === RoomStatus.Failed) {
logger.error("doRetry(); channels wind down failed, RoomStatus is FAILED", result.exceptionOrNull())
throw result.exceptionOrNull() ?: IllegalStateException("room is in a failed state")
}
logger.warn("doRetry(); channels wind down failed, retrying in $retryDurationInMs ms", result.exceptionOrNull())
delay(retryDurationInMs)
result = kotlin.runCatching { doChannelWindDown(contributor) }
}
logger.debug("doRetry(); channels wind down successfully completed")

// A helper that allows us to retry the attach operation
val doAttachWithRetry: suspend () -> Unit = {
logger.trace("doRetry.doAttachWithRetry();")
coroutineScope {
logger.debug("doRetry.doAttachWithRetry(); attempting to attach all feature channels")
statusLifecycle.setStatus(RoomStatus.Attaching)
val attachmentResult = doAttach()

// CHA-RL5c - If we're in failed, then we should wind down all the channels, eventually - but we're done here
if (attachmentResult.status === RoomStatus.Failed) {
logger.error("doRetry.doAttachWithRetry(); failed to attach channels, RoomStatus is Failed", attachmentResult.exception)
atomicCoroutineScope.async(LifecycleOperationPrecedence.Internal.priority) {
runDownChannelsOnFailedAttach()
}
logger.warn("doRetry.doAttachWithRetry(); started winding down all channels internally")
return@coroutineScope
}

// If we're in suspended, then we should wait for the channel to reattach, but wait for it to do so
if (attachmentResult.status === RoomStatus.Suspended) {
val failedFeature = attachmentResult.failedFeature
?: throw lifeCycleException(
"no failed feature in doRetry",
ErrorCode.RoomLifecycleError,
)
logger.warn(
"doRetry.doAttachWithRetry(); failed to attach channels, RoomStatus is Suspended",
attachmentResult.exception,
)
val failedFeature = attachmentResult.failedFeature ?: throw lifeCycleException(
"no failed feature in doRetry",
ErrorCode.RoomLifecycleError,
)
logger.warn("doRetry.doAttachWithRetry(); Retrying attach due to suspended feature: ${failedFeature.featureName}")
// No need to catch errors, rather they should propagate to caller method
return@coroutineScope doRetry(failedFeature)
}

// We attached, huzzah!
logger.debug("doRetry.doAttachWithRetry(); Room ATTACH success, RoomStatus: ${statusLifecycle.status}")
}
}

// If given suspended contributor channel has reattached, then we can retry the attach
if (contributor.channel.state == ChannelState.attached) {
logger.debug("doRetry(); feature: ${contributor.featureName} reattached, retrying attach for others")
return doAttachWithRetry()
}

// CHA-RL5d - Otherwise, wait for our suspended contributor channel to re-attach and try again
try {
logger.debug("doRetry(); waiting to channel ATTACHED for feature: ${contributor.featureName}")
listenToChannelAttachOrFailure(contributor)
logger.debug("doRetry(); waiting success, feature: ${contributor.featureName} ATTACHED, retrying attach for others")
delay(retryDurationInMs) // Let other channels get into ATTACHING state
// Attach successful
return doAttachWithRetry()
} catch (ex: AblyException) {
// CHA-RL5c - Channel attach failed
logger.error("doRetry(); waiting failed for channel attach, feature: ${contributor.featureName}", ex)
// No need for channels wind down, since it's taken care at the start of doRetry()
statusLifecycle.setStatus(RoomStatus.Failed, ex.errorInfo)
throw ex
}
// Attach successful, try attach for other channels
return doAttachWithRetry()
}

/**
Expand Down Expand Up @@ -292,59 +317,60 @@ internal class RoomLifecycleManager(
*/
@Suppress("ThrowsCount")
internal suspend fun attach() {
logger.trace("attach();")
val deferredAttach = atomicCoroutineScope.async(LifecycleOperationPrecedence.AttachOrDetach.priority) { // CHA-RL1d
if (statusLifecycle.status == RoomStatus.Attached) { // CHA-RL1a
logger.debug("attach(); room is already attached")
return@async
}

if (statusLifecycle.status == RoomStatus.Releasing) { // CHA-RL1b
throw lifeCycleException(
"unable to attach room; room is releasing",
ErrorCode.RoomIsReleasing,
)
logger.error("attach(); attach failed, room is in releasing state")
throw lifeCycleException("unable to attach room; room is releasing", ErrorCode.RoomIsReleasing)
}

if (statusLifecycle.status == RoomStatus.Released) { // CHA-RL1c
throw lifeCycleException(
"unable to attach room; room is released",
ErrorCode.RoomIsReleased,
)
logger.error("attach(); attach failed, room is in released state")
throw lifeCycleException("unable to attach room; room is released", ErrorCode.RoomIsReleased)
}

// At this point, we force the room status to be attaching
clearAllTransientDetachTimeouts()
operationInProgress = true
logger.debug("attach(); transitioned room into ATTACHING state")
statusLifecycle.setStatus(RoomStatus.Attaching) // CHA-RL1e

val attachResult = doAttach()

// CHA-RL1h4 - If we're in a failed state, then we should wind down all the channels, eventually
if (attachResult.status === RoomStatus.Failed) {
logger.error("attach(); failed to attach channels, RoomStatus is Failed", attachResult.exception)
// CHA-RL1h5 - detach all remaining channels
atomicCoroutineScope.async(LifecycleOperationPrecedence.Internal.priority) {
runDownChannelsOnFailedAttach()
}
logger.warn("attach(); started winding down all channels internally")
throw attachResult.exception // CHA-RL1h1
}

// CHA-RL1h1, CHA-RL1h2 - If we're in suspended, then this attach should fail, but we'll retry after a short delay async
if (attachResult.status === RoomStatus.Suspended) {
logger.warn("attach(); failed to attach channels, RoomStatus is Suspended", attachResult.exception)
if (attachResult.failedFeature == null) {
throw lifeCycleException(
"no failed feature in attach",
ErrorCode.RoomLifecycleError,
)
throw lifeCycleException("no failed feature in attach", ErrorCode.RoomLifecycleError)
}
attachResult.failedFeature?.let {
// CHA-RL1h3 - Enter recovery for failed room feature/contributor
atomicCoroutineScope.async(LifecycleOperationPrecedence.Internal.priority) {
doRetry(it)
}
logger.warn("attach(); retrying attach due to suspended feature: ${it.featureName}")
}
throw attachResult.exception // CHA-RL1h1
}

// We attached, finally!
logger.debug("attach(): room attached successfully")
}

deferredAttach.await()
Expand All @@ -357,12 +383,17 @@ internal class RoomLifecycleManager(
* Spec: CHA-RL1f, CHA-RL1g, CHA-RL1h
*/
private suspend fun doAttach(): RoomAttachmentResult {
logger.trace("doAttach();")
val attachResult = DefaultRoomAttachmentResult()
logger.debug("doAttach(); trying to attach all features: ${contributors.joinToString { it.featureName }}")
for (feature in contributors) { // CHA-RL1f - attach each feature sequentially
try {
logger.debug("doAttach(); attaching feature: ${feature.featureName}")
feature.channel.attachCoroutine()
firstAttachesCompleted[feature] = true
logger.debug("doAttach(); attached feature: ${feature.featureName}")
} catch (ex: Throwable) { // CHA-RL1h - handle channel attach failure
logger.error("doAttach(); attach failed for feature: ${feature.featureName}", ex)
attachResult.throwable = ex
attachResult.failedFeatureField = feature
attachResult.errorField = lifeCycleErrorInfo(
Expand All @@ -387,17 +418,20 @@ internal class RoomLifecycleManager(

// Regardless of whether we're suspended or failed, run-down the other channels
// The wind-down procedure will take Precedence over any user-driven actions
logger.warn("doAttach(); transitioning room into ${attachResult.status.name} state")
statusLifecycle.setStatus(attachResult)
return attachResult
}
}

// CHA-RL1g, We successfully attached all the channels - set our status to attached, start listening changes in channel status
logger.debug("doAttach(); attach success for all features: ${contributors.joinToString { it.featureName }}")
this.statusLifecycle.setStatus(attachResult)
this.operationInProgress = false

// Iterate the pending discontinuity events and trigger them
for ((contributor, error) in pendingDiscontinuityEvents) {
logger.warn("doAttach(); emitting discontinuity event for feature: ${contributor.featureName}")
contributor.discontinuityDetected(error)
}
pendingDiscontinuityEvents.clear()
Expand All @@ -407,9 +441,11 @@ internal class RoomLifecycleManager(
/**
* If we've failed to attach, then we're in the failed state and all that is left to do is to detach all the channels.
* Spec: CHA-RL1h5, CHA-RL1h6
* @returns Returns only when all channels are detached. Doesn't throw exception.
* @returns Returns only when all channels are either detached or failed. Doesn't throw exception.
*/
private suspend fun runDownChannelsOnFailedAttach() {
logger.trace("runDownChannelsOnFailedAttach();")
logger.debug("runDownChannelsOnFailedAttach(); attempting to detach all channels")
// At this point, we have control over the channel lifecycle, so we can hold onto it until things are resolved
// Keep trying to detach the channels until they're all detached.
var channelWindDown = kotlin.runCatching { doChannelWindDown() }
Expand All @@ -419,6 +455,7 @@ internal class RoomLifecycleManager(
delay(retryDurationInMs)
channelWindDown = kotlin.runCatching { doChannelWindDown() }
}
logger.debug("runDownChannelsOnFailedAttach(); success, all channels are either detached or failed")
}

/**
Expand All @@ -431,11 +468,13 @@ internal class RoomLifecycleManager(
*/
@Suppress("CognitiveComplexMethod", "ComplexCondition")
private suspend fun doChannelWindDown(except: ContributesToRoomLifecycle? = null) = coroutineScope {
logger.trace("doChannelWindDown();")
contributors.map { contributor: ContributesToRoomLifecycle ->
async {
// CHA-RL5a1 - If its the contributor we want to wait for a conclusion on, then we should not detach it
// Unless we're in a failed state, in which case we should detach it
if (contributor.channel === except?.channel && statusLifecycle.status !== RoomStatus.Failed) {
logger.debug("doChannelWindDown(); ignoring feature: ${contributor.featureName} as per except param")
return@async
}
// If the room's already in the failed state, or it's releasing, we should not detach a failed channel
Expand All @@ -446,12 +485,15 @@ internal class RoomLifecycleManager(
) &&
contributor.channel.state === ChannelState.failed
) {
logger.debug("doChannelWindDown(); ignoring feature: ${contributor.featureName} since it's already in failed state")
return@async
}

try {
logger.debug("doChannelWindDown(); detaching feature: ${contributor.featureName}")
contributor.channel.detachCoroutine()
} catch (throwable: Throwable) {
logger.warn("doChannelWindDown(); failed to detach feature: ${contributor.featureName}", throwable)
// CHA-RL2h2 - If the contributor is in a failed state and we're not ignoring failed states, we should fail the room
if (
contributor.channel.state === ChannelState.failed &&
Expand Down Expand Up @@ -482,6 +524,7 @@ internal class RoomLifecycleManager(
*/
@Suppress("ThrowsCount")
internal suspend fun detach() {
logger.trace("detach();")
val deferredDetach = atomicCoroutineScope.async(LifecycleOperationPrecedence.AttachOrDetach.priority) { // CHA-RL2i
// CHA-RL2a - If we're already detached, this is a no-op
if (statusLifecycle.status === RoomStatus.Detached) {
Expand Down Expand Up @@ -529,6 +572,7 @@ internal class RoomLifecycleManager(
* Spec: CHA-RL2f
*/
private suspend fun doDetach() {
logger.trace("doDetach();")
var channelWindDown = kotlin.runCatching { doChannelWindDown() }
var firstContributorFailedError: AblyException? = null
while (channelWindDown.isFailure) { // CHA-RL2h
Expand Down Expand Up @@ -564,6 +608,7 @@ internal class RoomLifecycleManager(
* Spec: CHA-RL3
*/
internal suspend fun release() {
logger.trace("release();")
val deferredRelease = atomicCoroutineScope.async(LifecycleOperationPrecedence.Release.priority) { // CHA-RL3k
// CHA-RL3a - If we're already released, this is a no-op
if (statusLifecycle.status === RoomStatus.Released) {
Expand Down Expand Up @@ -595,6 +640,7 @@ internal class RoomLifecycleManager(
* Spec: CHA-RL3f, CHA-RL3d
*/
private suspend fun releaseChannels() {
logger.trace("releaseChannels();")
var contributorsReleased = kotlin.runCatching { doRelease() }
while (contributorsReleased.isFailure) {
// Wait a short period and then try again
Expand All @@ -610,6 +656,7 @@ internal class RoomLifecycleManager(
*/
@Suppress("RethrowCaughtException")
private suspend fun doRelease() = coroutineScope {
logger.trace("doRelease();")
contributors.map { contributor: ContributesToRoomLifecycle ->
async {
// CHA-RL3e - Failed channels, we can ignore
Expand Down

0 comments on commit e686835

Please sign in to comment.