From 05a30bb3a10fe43937c22cad5e1bd4acf0f0b59e Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 22 Aug 2023 15:47:52 -0400 Subject: [PATCH 01/29] Draft WorkflowCallbackActor --- .../finalization/WorkflowCallbackActor.scala | 105 ++++++++++++++++++ .../WorkflowCallbackJsonSupport.scala | 11 ++ 2 files changed, 116 insertions(+) create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala create mode 100644 engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala new file mode 100644 index 00000000000..9aa00ece81e --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -0,0 +1,105 @@ +package cromwell.engine.workflow.lifecycle.finalization + +import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props} +import akka.event.LoggingReceive +import akka.http.scaladsl.Http +import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ +import akka.http.scaladsl.marshalling.Marshal +import akka.http.scaladsl.model._ +import akka.util.ByteString +import cromwell.backend.BackendLifecycleActor.BackendWorkflowLifecycleActorResponse +import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationFailed, FinalizationSuccess, Finalize} +import cromwell.core.Dispatcher.IoDispatcher +import cromwell.core.retry.Retry.withRetry +import cromwell.core.retry.SimpleExponentialBackoff +import cromwell.core.{CallOutputs, WorkflowId, WorkflowState} +import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ + +import scala.concurrent.ExecutionContextExecutor +//import cromwell.services.ServiceRegistryActor + +import java.net.URI +import scala.concurrent.{ExecutionContext, Future} +import scala.util.{Failure, Success} + +/** + * The WorkflowCallbackActor is responsible for sending a message on workflow completion to a configured endpoint. + * This allows for users to build automation around workflow completion without needing to poll Cromwell for + * workflow status. + */ +object WorkflowCallbackActor { + def props(workflowId: WorkflowId, + // serviceRegistryActor: ServiceRegistryActor, + workflowDescriptor: EngineWorkflowDescriptor, + workflowOutputs: CallOutputs, + lastWorkflowState: WorkflowState, + callbackUri: URI, + retryBackoff: SimpleExponentialBackoff + ) = Props( + new WorkflowCallbackActor( + workflowId, + /*serviceRegistryActor,*/ + workflowDescriptor, + workflowOutputs, + lastWorkflowState, + callbackUri, + retryBackoff) + ).withDispatcher(IoDispatcher) +} + +class WorkflowCallbackActor(workflowId: WorkflowId, + //serviceRegistryActor: ServiceRegistryActor, + val workflowDescriptor: EngineWorkflowDescriptor, + workflowOutputs: CallOutputs, + lastWorkflowState: WorkflowState, + callbackUri: URI, + retryBackoff: SimpleExponentialBackoff) + extends Actor with ActorLogging { + + implicit val ec: ExecutionContextExecutor = context.dispatcher + implicit val system: ActorSystem = context.system + + lazy private val callbackMessage = CallbackMessage( + workflowId.toString, lastWorkflowState.toString, workflowOutputs.outputs.map(entry => (entry._1.name, entry._2)) + ) + + override def receive = LoggingReceive { + case Finalize => performActionThenRespond( + performCallback().map( _ => FinalizationSuccess), + FinalizationFailed + ) + } + + private def performActionThenRespond(operation: => Future[BackendWorkflowLifecycleActorResponse], + onFailure: (Throwable) => BackendWorkflowLifecycleActorResponse) + (implicit ec: ExecutionContext) = { + val respondTo: ActorRef = sender() + operation onComplete { + case Success(r) => respondTo ! r + case Failure(t) => respondTo ! onFailure(t) + } + } + + def performCallback(): Future[Unit] = { + val headers = List[HttpHeader]() // TODO add auth + for { + entity <- Marshal(callbackMessage).to[RequestEntity] + request = HttpRequest(method = HttpMethods.POST, uri = callbackUri.toString, entity = entity) + // TODO add logging for retries + response <- withRetry(() => Http().singleRequest(request.withHeaders(headers)), backoff = retryBackoff) + result <- if (response.status.isFailure()) { + response.entity.dataBytes.runFold(ByteString(""))(_ ++ _).map(_.utf8String) flatMap { errorBody => + Future.failed( + new RuntimeException(s"Permanently failed to send callback for workflow state $lastWorkflowState to $callbackUri : $errorBody") + ) + } + } else { + log.info(s"Successfully sent callback for workflow state $lastWorkflowState to $callbackUri") + // TODO send metadata + Future.unit + } + } yield result + } +} + diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala new file mode 100644 index 00000000000..559b6a205a5 --- /dev/null +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala @@ -0,0 +1,11 @@ +package cromwell.engine.workflow.lifecycle.finalization + +import cromwell.util.JsonFormatting.WomValueJsonFormatter.WomValueJsonFormat +import spray.json.DefaultJsonProtocol +import wom.values.WomValue + +final case class CallbackMessage(workflowId: String, state: String, outputs: Map[String, WomValue]) + +object WorkflowCallbackJsonSupport extends DefaultJsonProtocol { + implicit val callbackMessageFormat = jsonFormat3(CallbackMessage) +} From 106b669fb48f923acfcf9f80657efa4b4ab75642 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 28 Aug 2023 15:51:27 -0400 Subject: [PATCH 02/29] Configure and run WorkflowCallbackActor in finalization --- core/src/main/resources/reference.conf | 12 +++++ .../scala/cromwell/core/WorkflowOptions.scala | 1 + .../engine/workflow/WorkflowActor.scala | 29 ++++++++++-- .../finalization/WorkflowCallbackActor.scala | 47 +++++++++++++++++-- .../WorkflowFinalizationActor.scala | 25 +++++++--- 5 files changed, 102 insertions(+), 12 deletions(-) diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index d2cc9c5171f..018bd3b2239 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -626,3 +626,15 @@ ga4gh { contact-info-url = "https://cromwell.readthedocs.io/en/stable/" } } + +workflow-state-callback { + enabled: false + endpoint: "http://example.com/34893ifjk4389/f00b4r" # Can be overridden in workflow options + # Users can override default retry backoff behavior if desired +# request-backoff { +# min: "10 seconds" +# max: "5 minutes" +# multiplier: 1.1 +# } + +} diff --git a/core/src/main/scala/cromwell/core/WorkflowOptions.scala b/core/src/main/scala/cromwell/core/WorkflowOptions.scala index 010300b2d8b..91a7c30bbfe 100644 --- a/core/src/main/scala/cromwell/core/WorkflowOptions.scala +++ b/core/src/main/scala/cromwell/core/WorkflowOptions.scala @@ -62,6 +62,7 @@ object WorkflowOptions { case object WorkflowFailureMode extends WorkflowOption("workflow_failure_mode") case object UseReferenceDisks extends WorkflowOption("use_reference_disks") case object MemoryRetryMultiplier extends WorkflowOption("memory_retry_multiplier") + case object WorkflowCallbackUri extends WorkflowOption("workflow_callback_uri") private lazy val WorkflowOptionsConf = ConfigFactory.load.getConfig("workflow-options") private lazy val EncryptedFields: Seq[String] = WorkflowOptionsConf.getStringList("encrypted-fields").asScala.toList diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index b4ccf3de53b..230ef5e02aa 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -1,7 +1,6 @@ package cromwell.engine.workflow import java.util.concurrent.atomic.AtomicInteger - import akka.actor.SupervisorStrategy.Stop import akka.actor._ import com.typesafe.config.Config @@ -24,7 +23,7 @@ import cromwell.engine.workflow.lifecycle.deletion.DeleteWorkflowFilesActor.{Del import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ import cromwell.engine.workflow.lifecycle.finalization.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationFailedResponse, WorkflowFinalizationSucceededResponse} -import cromwell.engine.workflow.lifecycle.finalization.{CopyWorkflowLogsActor, CopyWorkflowOutputsActor, WorkflowFinalizationActor} +import cromwell.engine.workflow.lifecycle.finalization.{CopyWorkflowLogsActor, CopyWorkflowOutputsActor, WorkflowCallbackActor, WorkflowCallbackConfig, WorkflowFinalizationActor} import cromwell.engine.workflow.lifecycle.initialization.WorkflowInitializationActor import cromwell.engine.workflow.lifecycle.initialization.WorkflowInitializationActor.{StartInitializationCommand, WorkflowInitializationFailedResponse, WorkflowInitializationResponse, WorkflowInitializationSucceededResponse} import cromwell.engine.workflow.lifecycle.materialization.MaterializeWorkflowDescriptorActor @@ -257,6 +256,8 @@ class WorkflowActor(workflowToStart: WorkflowToStart, private val deleteWorkflowFiles = conf.getBoolean("system.delete-workflow-files") + private val workflowCallbackConfig = WorkflowCallbackConfig(conf.getConfig("workflow-state-callback")) + private val workflowDockerLookupActor = context.actorOf( WorkflowDockerLookupActor.props(workflowId, dockerHashActor, initialStartableState.restarted), s"WorkflowDockerLookupActor-$workflowId") @@ -664,13 +665,35 @@ class WorkflowActor(workflowToStart: WorkflowToStart, case _ => Option(CopyWorkflowOutputsActor.props(workflowIdForLogging, ioActor, workflowDescriptor, workflowOutputs, stateData.initializationData)) } + val workflowCallbackActorProps = if (workflowCallbackConfig.enabled) { + // TODO do we need to check whether this is the root workflow? + workflowDescriptor + .getWorkflowOption(WorkflowOptions.WorkflowCallbackUri) + .flatMap(WorkflowCallbackConfig.createAndValidateUri) + .orElse(workflowCallbackConfig.uri) + .map( + WorkflowCallbackActor.props( + rootWorkflowIdForLogging, + workflowDescriptor, + workflowOutputs, + stateName.workflowState, + _, + workflowCallbackConfig.retryBackoff + ) + ) + } + else { + None + } + context.actorOf(WorkflowFinalizationActor.props( workflowDescriptor = workflowDescriptor, ioActor = ioActor, jobExecutionMap = jobExecutionMap, workflowOutputs = workflowOutputs, initializationData = stateData.initializationData, - copyWorkflowOutputsActor = copyWorkflowOutputsActorProps + copyWorkflowOutputsActor = copyWorkflowOutputsActorProps, + workflowCallbackActorProps = workflowCallbackActorProps ), name = s"WorkflowFinalizationActor") } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index 9aa00ece81e..f593d107317 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -7,6 +7,8 @@ import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ import akka.http.scaladsl.marshalling.Marshal import akka.http.scaladsl.model._ import akka.util.ByteString +import com.typesafe.config.Config +import com.typesafe.scalalogging.LazyLogging import cromwell.backend.BackendLifecycleActor.BackendWorkflowLifecycleActorResponse import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationFailed, FinalizationSuccess, Finalize} import cromwell.core.Dispatcher.IoDispatcher @@ -15,14 +17,48 @@ import cromwell.core.retry.SimpleExponentialBackoff import cromwell.core.{CallOutputs, WorkflowId, WorkflowState} import cromwell.engine.EngineWorkflowDescriptor import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ +import net.ceedubs.ficus.Ficus._ import scala.concurrent.ExecutionContextExecutor +import scala.concurrent.duration.DurationInt +import scala.util.Try //import cromwell.services.ServiceRegistryActor import java.net.URI import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} + +case class WorkflowCallbackConfig(enabled: Boolean, + uri: Option[URI], // May be overridden by workflow options + retryBackoff: Option[SimpleExponentialBackoff] + // TODO auth + ) + +object WorkflowCallbackConfig extends LazyLogging { + def apply(config: Config): WorkflowCallbackConfig = { + val backoff = config.as[Option[Config]]("request-backoff").map(SimpleExponentialBackoff(_)) + val uri = config.as[Option[String]]("endpoint").flatMap(createAndValidateUri) + + WorkflowCallbackConfig( + config.getBoolean("enabled"), + uri = uri, + retryBackoff = backoff + ) + } + + def createAndValidateUri(uriString: String): Option[URI] = { + // TODO validate + Try(new URI(uriString)) match { + case Success(uri) => Option(uri) + case Failure(err) => + logger.warn(s"Failed to parse provided workflow callback URI (${uriString}): $err") + None + } + } +} + + /** * The WorkflowCallbackActor is responsible for sending a message on workflow completion to a configured endpoint. * This allows for users to build automation around workflow completion without needing to poll Cromwell for @@ -35,7 +71,7 @@ object WorkflowCallbackActor { workflowOutputs: CallOutputs, lastWorkflowState: WorkflowState, callbackUri: URI, - retryBackoff: SimpleExponentialBackoff + retryBackoff: Option[SimpleExponentialBackoff] = None ) = Props( new WorkflowCallbackActor( workflowId, @@ -54,7 +90,7 @@ class WorkflowCallbackActor(workflowId: WorkflowId, workflowOutputs: CallOutputs, lastWorkflowState: WorkflowState, callbackUri: URI, - retryBackoff: SimpleExponentialBackoff) + retryBackoff: Option[SimpleExponentialBackoff]) extends Actor with ActorLogging { implicit val ec: ExecutionContextExecutor = context.dispatcher @@ -64,6 +100,9 @@ class WorkflowCallbackActor(workflowId: WorkflowId, workflowId.toString, lastWorkflowState.toString, workflowOutputs.outputs.map(entry => (entry._1.name, entry._2)) ) + private lazy val defaultRetryBackoff = SimpleExponentialBackoff(2.seconds, 30.seconds, 1.1) + private lazy val backoffWithDefault = retryBackoff.getOrElse(defaultRetryBackoff) + override def receive = LoggingReceive { case Finalize => performActionThenRespond( performCallback().map( _ => FinalizationSuccess), @@ -87,7 +126,9 @@ class WorkflowCallbackActor(workflowId: WorkflowId, entity <- Marshal(callbackMessage).to[RequestEntity] request = HttpRequest(method = HttpMethods.POST, uri = callbackUri.toString, entity = entity) // TODO add logging for retries - response <- withRetry(() => Http().singleRequest(request.withHeaders(headers)), backoff = retryBackoff) + // TODO retries aren't working? + // TODO don't fall over if there's a response body + response <- withRetry(() => Http().singleRequest(request.withHeaders(headers)), backoff = backoffWithDefault) result <- if (response.status.isFailure()) { response.entity.dataBytes.runFold(ByteString(""))(_ ++ _).map(_.utf8String) flatMap { errorBody => Future.failed( diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowFinalizationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowFinalizationActor.scala index 8731367cfb7..585455e2367 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowFinalizationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowFinalizationActor.scala @@ -46,14 +46,16 @@ object WorkflowFinalizationActor { jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData, - copyWorkflowOutputsActor: Option[Props]): Props = { + copyWorkflowOutputsActor: Option[Props], + workflowCallbackActorProps: Option[Props]): Props = { Props(new WorkflowFinalizationActor( workflowDescriptor, ioActor, jobExecutionMap, workflowOutputs, initializationData, - copyWorkflowOutputsActor + copyWorkflowOutputsActor, + workflowCallbackActorProps )).withDispatcher(EngineDispatcher) } } @@ -63,7 +65,8 @@ case class WorkflowFinalizationActor(workflowDescriptor: EngineWorkflowDescripto jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData, - copyWorkflowOutputsActorProps: Option[Props]) + copyWorkflowOutputsActorProps: Option[Props], + workflowCallbackActorProps: Option[Props]) extends WorkflowLifecycleActor[WorkflowFinalizationActorState] { override lazy val workflowIdForLogging = workflowDescriptor.possiblyNotRootWorkflowId @@ -99,12 +102,22 @@ case class WorkflowFinalizationActor(workflowDescriptor: EngineWorkflowDescripto } yield actor } - val engineFinalizationActor = Try { copyWorkflowOutputsActorProps.map(context.actorOf(_, "CopyWorkflowOutputsActor")).toList } + val engineFinalizationActors = Try { + List( + (copyWorkflowOutputsActorProps, "CopyWorkflowOutputsActor"), + (workflowCallbackActorProps, "WorkflowCallbackActor") + ).flatMap { engineActor => + engineActor match { + case (Some(props), actorName) => Some(context.actorOf(props, actorName)) + case _ => None + } + } + } val allActors = for { backendFinalizationActorsFromTry <- backendFinalizationActors - engineFinalizationActorFromTry <- engineFinalizationActor - } yield backendFinalizationActorsFromTry.toList ++ engineFinalizationActorFromTry + engineFinalizationActorsFromTry <- engineFinalizationActors + } yield backendFinalizationActorsFromTry.toList ++ engineFinalizationActorsFromTry allActors match { case Failure(ex) => From 5fc1e667896edcc71deefceda9eac6322dab354d Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 28 Aug 2023 16:48:11 -0400 Subject: [PATCH 03/29] Actually retry when there is a failure --- .../engine/workflow/WorkflowActor.scala | 1 - .../finalization/WorkflowCallbackActor.scala | 33 ++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index 230ef5e02aa..a4aa33928f3 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -666,7 +666,6 @@ class WorkflowActor(workflowToStart: WorkflowToStart, } val workflowCallbackActorProps = if (workflowCallbackConfig.enabled) { - // TODO do we need to check whether this is the root workflow? workflowDescriptor .getWorkflowOption(WorkflowOptions.WorkflowCallbackUri) .flatMap(WorkflowCallbackConfig.createAndValidateUri) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index f593d107317..d3d6291f24a 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -100,7 +100,7 @@ class WorkflowCallbackActor(workflowId: WorkflowId, workflowId.toString, lastWorkflowState.toString, workflowOutputs.outputs.map(entry => (entry._1.name, entry._2)) ) - private lazy val defaultRetryBackoff = SimpleExponentialBackoff(2.seconds, 30.seconds, 1.1) + private lazy val defaultRetryBackoff = SimpleExponentialBackoff(3.seconds, 5.minutes, 1.1) private lazy val backoffWithDefault = retryBackoff.getOrElse(defaultRetryBackoff) override def receive = LoggingReceive { @@ -125,22 +125,31 @@ class WorkflowCallbackActor(workflowId: WorkflowId, for { entity <- Marshal(callbackMessage).to[RequestEntity] request = HttpRequest(method = HttpMethods.POST, uri = callbackUri.toString, entity = entity) - // TODO add logging for retries - // TODO retries aren't working? - // TODO don't fall over if there's a response body - response <- withRetry(() => Http().singleRequest(request.withHeaders(headers)), backoff = backoffWithDefault) - result <- if (response.status.isFailure()) { - response.entity.dataBytes.runFold(ByteString(""))(_ ++ _).map(_.utf8String) flatMap { errorBody => - Future.failed( - new RuntimeException(s"Permanently failed to send callback for workflow state $lastWorkflowState to $callbackUri : $errorBody") - ) - } - } else { + response <- withRetry( + () => sendRequestOrFail(request.withHeaders(headers)), + backoff = backoffWithDefault, + onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $lastWorkflowState to $callbackUri : $err") + ) + result <- { + response.entity.discardBytes() log.info(s"Successfully sent callback for workflow state $lastWorkflowState to $callbackUri") // TODO send metadata Future.unit } + } yield result } + + def sendRequestOrFail(request: HttpRequest): Future[HttpResponse] = + Http().singleRequest(request).flatMap(response => + if (response.status.isFailure()) { + response.entity.dataBytes.runFold(ByteString(""))(_ ++ _).map(_.utf8String) flatMap { errorBody => + Future.failed( + new RuntimeException(errorBody) + ) + } + } else Future.successful(response) + ) + } From 3e8500f9cc9ada8872e7d3e076c25d18120f50cd Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Thu, 31 Aug 2023 16:19:07 -0400 Subject: [PATCH 04/29] Add workflow callback result to metadata --- .../engine/workflow/WorkflowActor.scala | 1 + .../finalization/WorkflowCallbackActor.scala | 49 ++++++++++++++----- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index a4aa33928f3..c5813c3b3ff 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -673,6 +673,7 @@ class WorkflowActor(workflowToStart: WorkflowToStart, .map( WorkflowCallbackActor.props( rootWorkflowIdForLogging, + serviceRegistryActor, workflowDescriptor, workflowOutputs, stateName.workflowState, diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index d3d6291f24a..fd693f494f4 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -10,7 +10,7 @@ import akka.util.ByteString import com.typesafe.config.Config import com.typesafe.scalalogging.LazyLogging import cromwell.backend.BackendLifecycleActor.BackendWorkflowLifecycleActorResponse -import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationFailed, FinalizationSuccess, Finalize} +import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationSuccess, Finalize} import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.retry.Retry.withRetry import cromwell.core.retry.SimpleExponentialBackoff @@ -22,9 +22,11 @@ import net.ceedubs.ficus.Ficus._ import scala.concurrent.ExecutionContextExecutor import scala.concurrent.duration.DurationInt import scala.util.Try -//import cromwell.services.ServiceRegistryActor +import cromwell.services.metadata.MetadataService.PutMetadataAction +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} import java.net.URI +import java.time.Instant import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} @@ -66,7 +68,7 @@ object WorkflowCallbackConfig extends LazyLogging { */ object WorkflowCallbackActor { def props(workflowId: WorkflowId, - // serviceRegistryActor: ServiceRegistryActor, + serviceRegistryActor: ActorRef, workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, lastWorkflowState: WorkflowState, @@ -75,7 +77,7 @@ object WorkflowCallbackActor { ) = Props( new WorkflowCallbackActor( workflowId, - /*serviceRegistryActor,*/ + serviceRegistryActor, workflowDescriptor, workflowOutputs, lastWorkflowState, @@ -85,7 +87,7 @@ object WorkflowCallbackActor { } class WorkflowCallbackActor(workflowId: WorkflowId, - //serviceRegistryActor: ServiceRegistryActor, + serviceRegistryActor: ActorRef, val workflowDescriptor: EngineWorkflowDescriptor, workflowOutputs: CallOutputs, lastWorkflowState: WorkflowState, @@ -103,20 +105,26 @@ class WorkflowCallbackActor(workflowId: WorkflowId, private lazy val defaultRetryBackoff = SimpleExponentialBackoff(3.seconds, 5.minutes, 1.1) private lazy val backoffWithDefault = retryBackoff.getOrElse(defaultRetryBackoff) - override def receive = LoggingReceive { + override def receive: Actor.Receive = LoggingReceive { case Finalize => performActionThenRespond( performCallback().map( _ => FinalizationSuccess), - FinalizationFailed + _ => FinalizationSuccess // Don't fail the workflow when this step fails ) } private def performActionThenRespond(operation: => Future[BackendWorkflowLifecycleActorResponse], onFailure: (Throwable) => BackendWorkflowLifecycleActorResponse) - (implicit ec: ExecutionContext) = { + (implicit ec: ExecutionContext): Unit = { val respondTo: ActorRef = sender() operation onComplete { - case Success(r) => respondTo ! r - case Failure(t) => respondTo ! onFailure(t) + case Success(r) => + log.info(s"Successfully sent callback for workflow for workflow $workflowId in state $lastWorkflowState to $callbackUri") + sendMetadata(successful = true) + respondTo ! r + case Failure(t) => + log.warning(s"Permanently failed to send callback for workflow $workflowId in state $lastWorkflowState to $callbackUri: ${t.getMessage}") + sendMetadata(successful = false) + respondTo ! onFailure(t) } } @@ -132,11 +140,8 @@ class WorkflowCallbackActor(workflowId: WorkflowId, ) result <- { response.entity.discardBytes() - log.info(s"Successfully sent callback for workflow state $lastWorkflowState to $callbackUri") - // TODO send metadata Future.unit } - } yield result } @@ -151,5 +156,23 @@ class WorkflowCallbackActor(workflowId: WorkflowId, } else Future.successful(response) ) + def sendMetadata(successful: Boolean): Unit = { + val events = List( + MetadataEvent( + MetadataKey(workflowId, None, "workflowCallback", "successful"), + MetadataValue(successful) + ), + MetadataEvent( + MetadataKey(workflowId, None, "workflowCallback", "url"), + MetadataValue(callbackUri.toString) + ), + MetadataEvent( + MetadataKey(workflowId, None, "workflowCallback", "timestamp"), + MetadataValue(Instant.now()) + ) + ) + serviceRegistryActor ! PutMetadataAction(events) + } + } From a11437244e313a4f2aca55e4b22a71119b588a41 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 1 Sep 2023 15:39:00 -0400 Subject: [PATCH 05/29] Add Azure auth --- .../cloudsupport/azure/AzureCredentials.scala | 2 +- .../engine/workflow/WorkflowActor.scala | 3 +- .../finalization/WorkflowCallbackActor.scala | 59 +++++++++++++++---- 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureCredentials.scala b/cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureCredentials.scala index c29155056a9..200b162c614 100644 --- a/cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureCredentials.scala +++ b/cloudSupport/src/main/scala/cromwell/cloudsupport/azure/AzureCredentials.scala @@ -33,7 +33,7 @@ case object AzureCredentials { new DefaultAzureCredentialBuilder() .authorityHost(azureProfile.getEnvironment.getActiveDirectoryEndpoint) - def getAccessToken(identityClientId: Option[String]): ErrorOr[String] = { + def getAccessToken(identityClientId: Option[String] = None): ErrorOr[String] = { val credentials = identityClientId.foldLeft(defaultCredentialBuilder) { (builder, clientId) => builder.managedIdentityClientId(clientId) }.build() diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index c5813c3b3ff..7080ee2cd63 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -678,7 +678,8 @@ class WorkflowActor(workflowToStart: WorkflowToStart, workflowOutputs, stateName.workflowState, _, - workflowCallbackConfig.retryBackoff + workflowCallbackConfig.retryBackoff, + workflowCallbackConfig.authMethod ) ) } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index fd693f494f4..6734bd18788 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -6,16 +6,22 @@ import akka.http.scaladsl.Http import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ import akka.http.scaladsl.marshalling.Marshal import akka.http.scaladsl.model._ +import akka.http.scaladsl.model.headers.RawHeader import akka.util.ByteString +import cats.data.Validated.{Invalid, Valid} +import cats.implicits.toTraverseOps import com.typesafe.config.Config import com.typesafe.scalalogging.LazyLogging +import common.validation.ErrorOr import cromwell.backend.BackendLifecycleActor.BackendWorkflowLifecycleActorResponse import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationSuccess, Finalize} +import cromwell.cloudsupport.azure.AzureCredentials import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.retry.Retry.withRetry import cromwell.core.retry.SimpleExponentialBackoff import cromwell.core.{CallOutputs, WorkflowId, WorkflowState} import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackConfig.AuthMethod import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ import net.ceedubs.ficus.Ficus._ @@ -33,19 +39,36 @@ import scala.util.{Failure, Success} case class WorkflowCallbackConfig(enabled: Boolean, uri: Option[URI], // May be overridden by workflow options - retryBackoff: Option[SimpleExponentialBackoff] - // TODO auth + retryBackoff: Option[SimpleExponentialBackoff], + authMethod: Option[WorkflowCallbackConfig.AuthMethod] ) object WorkflowCallbackConfig extends LazyLogging { + sealed trait AuthMethod { def getAccessToken: ErrorOr.ErrorOr[String] } + case object AzureAuth extends AuthMethod { + override def getAccessToken: ErrorOr.ErrorOr[String] = AzureCredentials.getAccessToken() + } + // TODO +// case class GoogleAuth(mode: GoogleAuthMode) extends AuthMethod { +// override def getAccessToken: String = ??? +// } + def apply(config: Config): WorkflowCallbackConfig = { val backoff = config.as[Option[Config]]("request-backoff").map(SimpleExponentialBackoff(_)) val uri = config.as[Option[String]]("endpoint").flatMap(createAndValidateUri) + val authMethod = if (config.hasPath("auth.azure")) { + Option(AzureAuth) + // TODO + // } else if (config.hasPath("auth.google")) { + // ??? + } else None + WorkflowCallbackConfig( config.getBoolean("enabled"), uri = uri, - retryBackoff = backoff + retryBackoff = backoff, + authMethod = authMethod ) } @@ -73,7 +96,8 @@ object WorkflowCallbackActor { workflowOutputs: CallOutputs, lastWorkflowState: WorkflowState, callbackUri: URI, - retryBackoff: Option[SimpleExponentialBackoff] = None + retryBackoff: Option[SimpleExponentialBackoff] = None, + authMethod: Option[WorkflowCallbackConfig.AuthMethod] = None ) = Props( new WorkflowCallbackActor( workflowId, @@ -82,7 +106,8 @@ object WorkflowCallbackActor { workflowOutputs, lastWorkflowState, callbackUri, - retryBackoff) + retryBackoff, + authMethod) ).withDispatcher(IoDispatcher) } @@ -92,7 +117,8 @@ class WorkflowCallbackActor(workflowId: WorkflowId, workflowOutputs: CallOutputs, lastWorkflowState: WorkflowState, callbackUri: URI, - retryBackoff: Option[SimpleExponentialBackoff]) + retryBackoff: Option[SimpleExponentialBackoff], + authMethod: Option[AuthMethod]) extends Actor with ActorLogging { implicit val ec: ExecutionContextExecutor = context.dispatcher @@ -128,13 +154,22 @@ class WorkflowCallbackActor(workflowId: WorkflowId, } } - def performCallback(): Future[Unit] = { - val headers = List[HttpHeader]() // TODO add auth + private def makeHeaders: Future[List[HttpHeader]] = { + authMethod.toList.map(_.getAccessToken).map { + case Valid(header) => Future.successful(header) + case Invalid(err) => Future.failed(new RuntimeException(err.toString)) // TODO better error + } + .map(t => t.map(RawHeader("Authorization", _))) + .traverse(identity) + } + + private def performCallback(): Future[Unit] = { for { entity <- Marshal(callbackMessage).to[RequestEntity] - request = HttpRequest(method = HttpMethods.POST, uri = callbackUri.toString, entity = entity) + headers <- makeHeaders + request = HttpRequest(method = HttpMethods.POST, uri = callbackUri.toString, entity = entity).withHeaders(headers) response <- withRetry( - () => sendRequestOrFail(request.withHeaders(headers)), + () => sendRequestOrFail(request), backoff = backoffWithDefault, onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $lastWorkflowState to $callbackUri : $err") ) @@ -145,7 +180,7 @@ class WorkflowCallbackActor(workflowId: WorkflowId, } yield result } - def sendRequestOrFail(request: HttpRequest): Future[HttpResponse] = + private def sendRequestOrFail(request: HttpRequest): Future[HttpResponse] = Http().singleRequest(request).flatMap(response => if (response.status.isFailure()) { response.entity.dataBytes.runFold(ByteString(""))(_ ++ _).map(_.utf8String) flatMap { errorBody => @@ -156,7 +191,7 @@ class WorkflowCallbackActor(workflowId: WorkflowId, } else Future.successful(response) ) - def sendMetadata(successful: Boolean): Unit = { + private def sendMetadata(successful: Boolean): Unit = { val events = List( MetadataEvent( MetadataKey(workflowId, None, "workflowCallback", "successful"), From 4ff381deaa03e29420dd3f54576499b3367e591a Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 4 Sep 2023 14:12:18 -0400 Subject: [PATCH 06/29] Move workflow callback out of finalization step --- .../engine/workflow/WorkflowActor.scala | 87 ++++++++---------- .../workflow/WorkflowManagerActor.scala | 4 + .../finalization/WorkflowCallbackActor.scala | 90 ++++++++----------- .../WorkflowFinalizationActor.scala | 25 ++---- .../cromwell/server/CromwellRootActor.scala | 12 ++- .../cromwell/SimpleWorkflowActorSpec.scala | 1 + .../engine/workflow/WorkflowActorSpec.scala | 1 + 7 files changed, 100 insertions(+), 120 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index 7080ee2cd63..d70513e225c 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -22,8 +22,9 @@ import cromwell.engine.workflow.lifecycle.deletion.DeleteWorkflowFilesActor import cromwell.engine.workflow.lifecycle.deletion.DeleteWorkflowFilesActor.{DeleteWorkflowFilesFailedResponse, DeleteWorkflowFilesSucceededResponse, StartWorkflowFilesDeletion} import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor._ +import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackActor.PerformCallbackCommand import cromwell.engine.workflow.lifecycle.finalization.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationFailedResponse, WorkflowFinalizationSucceededResponse} -import cromwell.engine.workflow.lifecycle.finalization.{CopyWorkflowLogsActor, CopyWorkflowOutputsActor, WorkflowCallbackActor, WorkflowCallbackConfig, WorkflowFinalizationActor} +import cromwell.engine.workflow.lifecycle.finalization.{CopyWorkflowLogsActor, CopyWorkflowOutputsActor, WorkflowFinalizationActor} import cromwell.engine.workflow.lifecycle.initialization.WorkflowInitializationActor import cromwell.engine.workflow.lifecycle.initialization.WorkflowInitializationActor.{StartInitializationCommand, WorkflowInitializationFailedResponse, WorkflowInitializationResponse, WorkflowInitializationSucceededResponse} import cromwell.engine.workflow.lifecycle.materialization.MaterializeWorkflowDescriptorActor @@ -51,6 +52,7 @@ object WorkflowActor { final case class AbortWorkflowWithExceptionCommand(exception: Throwable) extends WorkflowActorCommand case object SendWorkflowHeartbeatCommand extends WorkflowActorCommand case object AwaitMetadataIntegrity + case class PerformWorkflowCallback(uri: Option[String], workflowState: WorkflowState) case class WorkflowFailedResponse(workflowId: WorkflowId, inState: WorkflowActorState, reasons: Seq[Throwable]) @@ -148,7 +150,7 @@ object WorkflowActor { initializationData: AllBackendInitializationData, lastStateReached: StateCheckpoint, effectiveStartableState: StartableState, - workflowFinalOutputs: Set[WomValue] = Set.empty, + workflowFinalOutputs: Option[CallOutputs] = None, workflowAllOutputs: Set[WomValue] = Set.empty, rootAndSubworkflowIds: Set[WorkflowId] = Set.empty, failedInitializationAttempts: Int = 0) @@ -175,6 +177,7 @@ object WorkflowActor { ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, + workflowCallbackActor: Option[ActorRef], jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, @@ -198,6 +201,7 @@ object WorkflowActor { ioActor = ioActor, serviceRegistryActor = serviceRegistryActor, workflowLogCopyRouter = workflowLogCopyRouter, + workflowCallbackActor = workflowCallbackActor, jobStoreActor = jobStoreActor, subWorkflowStoreActor = subWorkflowStoreActor, callCacheReadActor = callCacheReadActor, @@ -225,6 +229,7 @@ class WorkflowActor(workflowToStart: WorkflowToStart, ioActor: ActorRef, override val serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, + workflowCallbackActor: Option[ActorRef], jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, @@ -256,8 +261,6 @@ class WorkflowActor(workflowToStart: WorkflowToStart, private val deleteWorkflowFiles = conf.getBoolean("system.delete-workflow-files") - private val workflowCallbackConfig = WorkflowCallbackConfig(conf.getConfig("workflow-state-callback")) - private val workflowDockerLookupActor = context.actorOf( WorkflowDockerLookupActor.props(workflowId, dockerHashActor, initialStartableState.restarted), s"WorkflowDockerLookupActor-$workflowId") @@ -517,6 +520,11 @@ class WorkflowActor(workflowToStart: WorkflowToStart, stay() case Event(AwaitMetadataIntegrity, data) => goto(MetadataIntegrityValidationState) using data.copy(lastStateReached = data.lastStateReached.copy(state = stateName)) + case Event(PerformWorkflowCallback(uri, workflowState), data) => + workflowCallbackActor.foreach { wca => + wca ! PerformCallbackCommand(workflowId, uri, workflowState, data.workflowFinalOutputs.getOrElse(CallOutputs.empty)) + } + stay() } onTransition { @@ -535,26 +543,28 @@ class WorkflowActor(workflowToStart: WorkflowToStart, case _ => // The WMA is waiting for the WorkflowActorWorkComplete message. No extra information needed here. } - // Copy/Delete workflow logs - if (WorkflowLogger.isEnabled) { - /* - * The submitted workflow options have been previously validated by the CromwellApiHandler. These are - * being recreated so that in case MaterializeWorkflowDescriptor fails, the workflow logs can still - * be copied by accessing the workflow options outside of the EngineWorkflowDescriptor. - */ - def bruteForceWorkflowOptions: WorkflowOptions = sources.workflowOptions - val system = context.system - val ec = context.system.dispatcher - def bruteForcePathBuilders: Future[List[PathBuilder]] = { - // Protect against path builders that may throw an exception instead of returning a failed future - Future(EngineFilesystems.pathBuildersForWorkflow(bruteForceWorkflowOptions, pathBuilderFactories)(system))(ec).flatten - } + /* + * The submitted workflow options have been previously validated by the CromwellApiHandler. These are + * being recreated so that in case MaterializeWorkflowDescriptor fails, the workflow logs can still + * be copied by accessing the workflow options outside of the EngineWorkflowDescriptor. Used for both + * copying workflow log and sending workflow callback. + */ + def bruteForceWorkflowOptions: WorkflowOptions = sources.workflowOptions + + val system = context.system + val ec = context.system.dispatcher + def bruteForcePathBuilders: Future[List[PathBuilder]] = { + // Protect against path builders that may throw an exception instead of returning a failed future + Future(EngineFilesystems.pathBuildersForWorkflow(bruteForceWorkflowOptions, pathBuilderFactories)(system))(ec).flatten + } - val (workflowOptions, pathBuilders) = stateData.workflowDescriptor match { - case Some(wd) => (wd.backendDescriptor.workflowOptions, Future.successful(wd.pathBuilders)) - case None => (bruteForceWorkflowOptions, bruteForcePathBuilders) - } + val (workflowOptions, pathBuilders) = stateData.workflowDescriptor match { + case Some(wd) => (wd.backendDescriptor.workflowOptions, Future.successful(wd.pathBuilders)) + case None => (bruteForceWorkflowOptions, bruteForcePathBuilders) + } + // Copy/Delete workflow logs + if (WorkflowLogger.isEnabled) { workflowOptions.get(FinalWorkflowLogDir).toOption match { case Some(destinationDir) => pathBuilders @@ -567,6 +577,10 @@ class WorkflowActor(workflowToStart: WorkflowToStart, } } + // Attempt to perform workflow completion callback + val callbackUri = workflowOptions.get(WorkflowOptions.WorkflowCallbackUri).toOption + self ! PerformWorkflowCallback(callbackUri, terminalState.workflowState) + // We can't transition from within another transition function, but we can instruct ourselves to with a message: self ! AwaitMetadataIntegrity @@ -628,7 +642,7 @@ class WorkflowActor(workflowToStart: WorkflowToStart, rootWorkflowId = rootWorkflowId, rootWorkflowRootPaths = data.initializationData.getWorkflowRoots(), rootAndSubworkflowIds = data.rootAndSubworkflowIds, - workflowFinalOutputs = data.workflowFinalOutputs, + workflowFinalOutputs = data.workflowFinalOutputs.map(out => out.outputs.values.toSet).getOrElse(Set.empty), workflowAllOutputs = data.workflowAllOutputs, pathBuilders = data.workflowDescriptor.get.pathBuilders, serviceRegistryActor = serviceRegistryActor, @@ -665,36 +679,13 @@ class WorkflowActor(workflowToStart: WorkflowToStart, case _ => Option(CopyWorkflowOutputsActor.props(workflowIdForLogging, ioActor, workflowDescriptor, workflowOutputs, stateData.initializationData)) } - val workflowCallbackActorProps = if (workflowCallbackConfig.enabled) { - workflowDescriptor - .getWorkflowOption(WorkflowOptions.WorkflowCallbackUri) - .flatMap(WorkflowCallbackConfig.createAndValidateUri) - .orElse(workflowCallbackConfig.uri) - .map( - WorkflowCallbackActor.props( - rootWorkflowIdForLogging, - serviceRegistryActor, - workflowDescriptor, - workflowOutputs, - stateName.workflowState, - _, - workflowCallbackConfig.retryBackoff, - workflowCallbackConfig.authMethod - ) - ) - } - else { - None - } - context.actorOf(WorkflowFinalizationActor.props( workflowDescriptor = workflowDescriptor, ioActor = ioActor, jobExecutionMap = jobExecutionMap, workflowOutputs = workflowOutputs, initializationData = stateData.initializationData, - copyWorkflowOutputsActor = copyWorkflowOutputsActorProps, - workflowCallbackActorProps = workflowCallbackActorProps + copyWorkflowOutputsActor = copyWorkflowOutputsActorProps ), name = s"WorkflowFinalizationActor") } @@ -713,7 +704,7 @@ class WorkflowActor(workflowToStart: WorkflowToStart, finalizationActor ! StartFinalizationCommand goto(FinalizingWorkflowState) using data.copy( lastStateReached = StateCheckpoint (lastStateOverride.getOrElse(stateName), failures), - workflowFinalOutputs = workflowFinalOutputs.outputs.values.toSet, + workflowFinalOutputs = Option(workflowFinalOutputs), workflowAllOutputs = workflowAllOutputs, rootAndSubworkflowIds = rootAndSubworkflowIds ) diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala index 15d0c4321a7..e84d9091ad0 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowManagerActor.scala @@ -57,6 +57,7 @@ object WorkflowManagerActor { ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, + workflowCallbackActor: Option[ActorRef], jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, @@ -75,6 +76,7 @@ object WorkflowManagerActor { ioActor = ioActor, serviceRegistryActor = serviceRegistryActor, workflowLogCopyRouter = workflowLogCopyRouter, + workflowCallbackActor = workflowCallbackActor, jobStoreActor = jobStoreActor, subWorkflowStoreActor = subWorkflowStoreActor, callCacheReadActor = callCacheReadActor, @@ -124,6 +126,7 @@ case class WorkflowManagerActorParams(config: Config, ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, + workflowCallbackActor: Option[ActorRef], jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, @@ -313,6 +316,7 @@ class WorkflowManagerActor(params: WorkflowManagerActorParams) ioActor = params.ioActor, serviceRegistryActor = params.serviceRegistryActor, workflowLogCopyRouter = params.workflowLogCopyRouter, + workflowCallbackActor = params.workflowCallbackActor, jobStoreActor = params.jobStoreActor, subWorkflowStoreActor = params.subWorkflowStoreActor, callCacheReadActor = params.callCacheReadActor, diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index 6734bd18788..91e468c3c53 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -13,14 +13,12 @@ import cats.implicits.toTraverseOps import com.typesafe.config.Config import com.typesafe.scalalogging.LazyLogging import common.validation.ErrorOr -import cromwell.backend.BackendLifecycleActor.BackendWorkflowLifecycleActorResponse -import cromwell.backend.BackendWorkflowFinalizationActor.{FinalizationSuccess, Finalize} import cromwell.cloudsupport.azure.AzureCredentials import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.retry.Retry.withRetry import cromwell.core.retry.SimpleExponentialBackoff import cromwell.core.{CallOutputs, WorkflowId, WorkflowState} -import cromwell.engine.EngineWorkflowDescriptor +import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackActor.PerformCallbackCommand import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackConfig.AuthMethod import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ import net.ceedubs.ficus.Ficus._ @@ -33,12 +31,12 @@ import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} import java.net.URI import java.time.Instant -import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.Future import scala.util.{Failure, Success} case class WorkflowCallbackConfig(enabled: Boolean, - uri: Option[URI], // May be overridden by workflow options + defaultUri: Option[URI], // May be overridden by workflow options retryBackoff: Option[SimpleExponentialBackoff], authMethod: Option[WorkflowCallbackConfig.AuthMethod] ) @@ -66,7 +64,7 @@ object WorkflowCallbackConfig extends LazyLogging { WorkflowCallbackConfig( config.getBoolean("enabled"), - uri = uri, + defaultUri = uri, retryBackoff = backoff, authMethod = authMethod ) @@ -90,33 +88,27 @@ object WorkflowCallbackConfig extends LazyLogging { * workflow status. */ object WorkflowCallbackActor { - def props(workflowId: WorkflowId, - serviceRegistryActor: ActorRef, - workflowDescriptor: EngineWorkflowDescriptor, - workflowOutputs: CallOutputs, - lastWorkflowState: WorkflowState, - callbackUri: URI, + + final case class PerformCallbackCommand(workflowId: WorkflowId, + uri: Option[String], + terminalState: WorkflowState, + workflowOutputs: CallOutputs) + + def props(serviceRegistryActor: ActorRef, + defaultCallbackUri: Option[URI], retryBackoff: Option[SimpleExponentialBackoff] = None, authMethod: Option[WorkflowCallbackConfig.AuthMethod] = None ) = Props( new WorkflowCallbackActor( - workflowId, serviceRegistryActor, - workflowDescriptor, - workflowOutputs, - lastWorkflowState, - callbackUri, + defaultCallbackUri, retryBackoff, authMethod) ).withDispatcher(IoDispatcher) } -class WorkflowCallbackActor(workflowId: WorkflowId, - serviceRegistryActor: ActorRef, - val workflowDescriptor: EngineWorkflowDescriptor, - workflowOutputs: CallOutputs, - lastWorkflowState: WorkflowState, - callbackUri: URI, +class WorkflowCallbackActor(serviceRegistryActor: ActorRef, + defaultCallbackUri: Option[URI], retryBackoff: Option[SimpleExponentialBackoff], authMethod: Option[AuthMethod]) extends Actor with ActorLogging { @@ -124,34 +116,25 @@ class WorkflowCallbackActor(workflowId: WorkflowId, implicit val ec: ExecutionContextExecutor = context.dispatcher implicit val system: ActorSystem = context.system - lazy private val callbackMessage = CallbackMessage( - workflowId.toString, lastWorkflowState.toString, workflowOutputs.outputs.map(entry => (entry._1.name, entry._2)) - ) - private lazy val defaultRetryBackoff = SimpleExponentialBackoff(3.seconds, 5.minutes, 1.1) private lazy val backoffWithDefault = retryBackoff.getOrElse(defaultRetryBackoff) override def receive: Actor.Receive = LoggingReceive { - case Finalize => performActionThenRespond( - performCallback().map( _ => FinalizationSuccess), - _ => FinalizationSuccess // Don't fail the workflow when this step fails - ) - } - - private def performActionThenRespond(operation: => Future[BackendWorkflowLifecycleActorResponse], - onFailure: (Throwable) => BackendWorkflowLifecycleActorResponse) - (implicit ec: ExecutionContext): Unit = { - val respondTo: ActorRef = sender() - operation onComplete { - case Success(r) => - log.info(s"Successfully sent callback for workflow for workflow $workflowId in state $lastWorkflowState to $callbackUri") - sendMetadata(successful = true) - respondTo ! r - case Failure(t) => - log.warning(s"Permanently failed to send callback for workflow $workflowId in state $lastWorkflowState to $callbackUri: ${t.getMessage}") - sendMetadata(successful = false) - respondTo ! onFailure(t) - } + case PerformCallbackCommand(workflowId, uri, terminalState, outputs) => + // If no uri was provided to us here, fall back to the one in config. If there isn't + // one there, do not perform a callback. + val callbackUri: Option[URI] = uri.map(WorkflowCallbackConfig.createAndValidateUri).getOrElse(defaultCallbackUri) + callbackUri.map { uri => + performCallback(workflowId, uri, terminalState, outputs) onComplete { + case Success(_) => + log.info(s"Successfully sent callback for workflow for workflow $workflowId in state $terminalState to $uri") + sendMetadata(workflowId, successful = true) + case Failure(t) => + log.warning(s"Permanently failed to send callback for workflow $workflowId in state $terminalState to $uri: ${t.getMessage}") + sendMetadata(workflowId, successful = false) + } + }.getOrElse(()) + case other => log.warning(s"WorkflowCallbackActor received an unexpected message: $other") } private def makeHeaders: Future[List[HttpHeader]] = { @@ -163,15 +146,18 @@ class WorkflowCallbackActor(workflowId: WorkflowId, .traverse(identity) } - private def performCallback(): Future[Unit] = { + private def performCallback(workflowId: WorkflowId, callbackUri: URI, terminalState: WorkflowState, outputs: CallOutputs): Future[Unit] = { + val callbackPostBody = CallbackMessage( + workflowId.toString, terminalState.toString, outputs.outputs.map(entry => (entry._1.name, entry._2)) + ) for { - entity <- Marshal(callbackMessage).to[RequestEntity] + entity <- Marshal(callbackPostBody).to[RequestEntity] headers <- makeHeaders request = HttpRequest(method = HttpMethods.POST, uri = callbackUri.toString, entity = entity).withHeaders(headers) response <- withRetry( () => sendRequestOrFail(request), backoff = backoffWithDefault, - onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $lastWorkflowState to $callbackUri : $err") + onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $terminalState to $defaultCallbackUri : $err") ) result <- { response.entity.discardBytes() @@ -191,7 +177,7 @@ class WorkflowCallbackActor(workflowId: WorkflowId, } else Future.successful(response) ) - private def sendMetadata(successful: Boolean): Unit = { + private def sendMetadata(workflowId: WorkflowId, successful: Boolean): Unit = { val events = List( MetadataEvent( MetadataKey(workflowId, None, "workflowCallback", "successful"), @@ -199,7 +185,7 @@ class WorkflowCallbackActor(workflowId: WorkflowId, ), MetadataEvent( MetadataKey(workflowId, None, "workflowCallback", "url"), - MetadataValue(callbackUri.toString) + MetadataValue(defaultCallbackUri.toString) ), MetadataEvent( MetadataKey(workflowId, None, "workflowCallback", "timestamp"), diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowFinalizationActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowFinalizationActor.scala index 585455e2367..8731367cfb7 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowFinalizationActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowFinalizationActor.scala @@ -46,16 +46,14 @@ object WorkflowFinalizationActor { jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData, - copyWorkflowOutputsActor: Option[Props], - workflowCallbackActorProps: Option[Props]): Props = { + copyWorkflowOutputsActor: Option[Props]): Props = { Props(new WorkflowFinalizationActor( workflowDescriptor, ioActor, jobExecutionMap, workflowOutputs, initializationData, - copyWorkflowOutputsActor, - workflowCallbackActorProps + copyWorkflowOutputsActor )).withDispatcher(EngineDispatcher) } } @@ -65,8 +63,7 @@ case class WorkflowFinalizationActor(workflowDescriptor: EngineWorkflowDescripto jobExecutionMap: JobExecutionMap, workflowOutputs: CallOutputs, initializationData: AllBackendInitializationData, - copyWorkflowOutputsActorProps: Option[Props], - workflowCallbackActorProps: Option[Props]) + copyWorkflowOutputsActorProps: Option[Props]) extends WorkflowLifecycleActor[WorkflowFinalizationActorState] { override lazy val workflowIdForLogging = workflowDescriptor.possiblyNotRootWorkflowId @@ -102,22 +99,12 @@ case class WorkflowFinalizationActor(workflowDescriptor: EngineWorkflowDescripto } yield actor } - val engineFinalizationActors = Try { - List( - (copyWorkflowOutputsActorProps, "CopyWorkflowOutputsActor"), - (workflowCallbackActorProps, "WorkflowCallbackActor") - ).flatMap { engineActor => - engineActor match { - case (Some(props), actorName) => Some(context.actorOf(props, actorName)) - case _ => None - } - } - } + val engineFinalizationActor = Try { copyWorkflowOutputsActorProps.map(context.actorOf(_, "CopyWorkflowOutputsActor")).toList } val allActors = for { backendFinalizationActorsFromTry <- backendFinalizationActors - engineFinalizationActorsFromTry <- engineFinalizationActors - } yield backendFinalizationActorsFromTry.toList ++ engineFinalizationActorsFromTry + engineFinalizationActorFromTry <- engineFinalizationActor + } yield backendFinalizationActorsFromTry.toList ++ engineFinalizationActorFromTry allActors match { case Failure(ex) => diff --git a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala index df4fc5b1207..2b0985163c1 100644 --- a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala +++ b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala @@ -20,7 +20,7 @@ import cromwell.engine.io.{IoActor, IoActorProxy} import cromwell.engine.workflow.WorkflowManagerActor import cromwell.engine.workflow.WorkflowManagerActor.AbortAllWorkflowsCommand import cromwell.engine.workflow.lifecycle.execution.callcaching.{CallCache, CallCacheReadActor, CallCacheWriteActor} -import cromwell.engine.workflow.lifecycle.finalization.CopyWorkflowLogsActor +import cromwell.engine.workflow.lifecycle.finalization.{CopyWorkflowLogsActor, WorkflowCallbackActor, WorkflowCallbackConfig} import cromwell.engine.workflow.tokens.{DynamicRateLimiter, JobTokenDispenserActor} import cromwell.engine.workflow.workflowstore.AbortRequestScanningActor.AbortConfig import cromwell.engine.workflow.workflowstore._ @@ -122,6 +122,15 @@ abstract class CromwellRootActor(terminator: CromwellTerminator, .props(CopyWorkflowLogsActor.props(serviceRegistryActor, ioActor)), "WorkflowLogCopyRouter") + private val workflowCallbackConfig = WorkflowCallbackConfig(config.getConfig("workflow-state-callback")) + + lazy val workflowCallbackActor: Option[ActorRef] = { + if (workflowCallbackConfig.enabled) { + val props = WorkflowCallbackActor.props(serviceRegistryActor, workflowCallbackConfig.defaultUri, workflowCallbackConfig.retryBackoff, workflowCallbackConfig.authMethod) + Option(context.actorOf(props, "WorkflowCallbackActor")) + } else None + } + //Call-caching config validation lazy val callCachingConfig = config.getConfig("call-caching") lazy val callCachingEnabled = callCachingConfig.getBoolean("enabled") @@ -174,6 +183,7 @@ abstract class CromwellRootActor(terminator: CromwellTerminator, ioActor = ioActorProxy, serviceRegistryActor = serviceRegistryActor, workflowLogCopyRouter = workflowLogCopyRouter, + workflowCallbackActor = workflowCallbackActor, jobStoreActor = jobStoreActor, subWorkflowStoreActor = subWorkflowStoreActor, callCacheReadActor = callCacheReadActor, diff --git a/server/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala b/server/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala index 375c51e886b..b3f4df64d07 100644 --- a/server/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala +++ b/server/src/test/scala/cromwell/SimpleWorkflowActorSpec.scala @@ -68,6 +68,7 @@ class SimpleWorkflowActorSpec extends CromwellTestKitWordSpec with BeforeAndAfte invalidateBadCacheResults = invalidateBadCacheResults, serviceRegistryActor = watchActor, workflowLogCopyRouter = system.actorOf(Props.empty, s"workflow-copy-log-router-$workflowId-${UUID.randomUUID()}"), + workflowCallbackActor = None, jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props), subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props), callCacheReadActor = system.actorOf(EmptyCallCacheReadActor.props), diff --git a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala index d9de71b9125..d5ef975915c 100644 --- a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala +++ b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala @@ -379,6 +379,7 @@ class WorkflowActorWithTestAddons(val finalizationProbe: TestProbe, ioActor = ioActor, serviceRegistryActor = serviceRegistryActor, workflowLogCopyRouter = workflowLogCopyRouter, + None, jobStoreActor = jobStoreActor, subWorkflowStoreActor = subWorkflowStoreActor, callCacheReadActor = callCacheReadActor, From 14584e927f4a61a00f759ac0e40e4de54ecad32c Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 4 Sep 2023 14:18:39 -0400 Subject: [PATCH 07/29] Only send outputs if the workflow succeeded --- .../lifecycle/finalization/WorkflowCallbackActor.scala | 10 ++++++---- .../finalization/WorkflowCallbackJsonSupport.scala | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index 91e468c3c53..4a1ba31d249 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -17,7 +17,7 @@ import cromwell.cloudsupport.azure.AzureCredentials import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.retry.Retry.withRetry import cromwell.core.retry.SimpleExponentialBackoff -import cromwell.core.{CallOutputs, WorkflowId, WorkflowState} +import cromwell.core.{CallOutputs, WorkflowId, WorkflowState, WorkflowSucceeded} import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackActor.PerformCallbackCommand import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackConfig.AuthMethod import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ @@ -147,9 +147,11 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, } private def performCallback(workflowId: WorkflowId, callbackUri: URI, terminalState: WorkflowState, outputs: CallOutputs): Future[Unit] = { - val callbackPostBody = CallbackMessage( - workflowId.toString, terminalState.toString, outputs.outputs.map(entry => (entry._1.name, entry._2)) - ) + // Only send outputs if the workflow succeeded + val callbackPostBody = terminalState match { + case WorkflowSucceeded => CallbackMessage(workflowId.toString, terminalState.toString, Option(outputs.outputs.map(entry => (entry._1.name, entry._2)))) + case _ => CallbackMessage(workflowId.toString, terminalState.toString, None) + } for { entity <- Marshal(callbackPostBody).to[RequestEntity] headers <- makeHeaders diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala index 559b6a205a5..79d669c6dd7 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala @@ -4,7 +4,7 @@ import cromwell.util.JsonFormatting.WomValueJsonFormatter.WomValueJsonFormat import spray.json.DefaultJsonProtocol import wom.values.WomValue -final case class CallbackMessage(workflowId: String, state: String, outputs: Map[String, WomValue]) +final case class CallbackMessage(workflowId: String, state: String, outputs: Option[Map[String, WomValue]]) object WorkflowCallbackJsonSupport extends DefaultJsonProtocol { implicit val callbackMessageFormat = jsonFormat3(CallbackMessage) From a87cf071955158e8fedd5e1d5fceaf4fc33dee06 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 5 Sep 2023 11:00:29 -0400 Subject: [PATCH 08/29] Let users configure maxRetries --- core/src/main/resources/reference.conf | 16 ++++++------ .../finalization/WorkflowCallbackActor.scala | 25 ++++++++++++------- .../cromwell/server/CromwellRootActor.scala | 8 +++++- 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index 018bd3b2239..c4e7dae6b7d 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -629,12 +629,14 @@ ga4gh { workflow-state-callback { enabled: false - endpoint: "http://example.com/34893ifjk4389/f00b4r" # Can be overridden in workflow options - # Users can override default retry backoff behavior if desired -# request-backoff { -# min: "10 seconds" -# max: "5 minutes" -# multiplier: 1.1 -# } + # endpoint: "http://example.com/foo" # Can be overridden in workflow options + # auth.azure: true + # Users can override default retry behavior if desired + # request-backoff { + # min: "3 seconds" + # max: "5 minutes" + # multiplier: 1.1 + # } + # max-retries = 10 } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index 4a1ba31d249..ebd775a292d 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -38,8 +38,8 @@ import scala.util.{Failure, Success} case class WorkflowCallbackConfig(enabled: Boolean, defaultUri: Option[URI], // May be overridden by workflow options retryBackoff: Option[SimpleExponentialBackoff], - authMethod: Option[WorkflowCallbackConfig.AuthMethod] - ) + maxRetries: Option[Int], + authMethod: Option[WorkflowCallbackConfig.AuthMethod]) object WorkflowCallbackConfig extends LazyLogging { sealed trait AuthMethod { def getAccessToken: ErrorOr.ErrorOr[String] } @@ -47,25 +47,25 @@ object WorkflowCallbackConfig extends LazyLogging { override def getAccessToken: ErrorOr.ErrorOr[String] = AzureCredentials.getAccessToken() } // TODO -// case class GoogleAuth(mode: GoogleAuthMode) extends AuthMethod { -// override def getAccessToken: String = ??? -// } + // case class GoogleAuth(mode: GoogleAuthMode) extends AuthMethod { + // override def getAccessToken: String = ??? + // } def apply(config: Config): WorkflowCallbackConfig = { val backoff = config.as[Option[Config]]("request-backoff").map(SimpleExponentialBackoff(_)) + val maxRetries = config.as[Option[Int]]("max-retries") val uri = config.as[Option[String]]("endpoint").flatMap(createAndValidateUri) + // TODO add google auth val authMethod = if (config.hasPath("auth.azure")) { Option(AzureAuth) - // TODO - // } else if (config.hasPath("auth.google")) { - // ??? } else None WorkflowCallbackConfig( config.getBoolean("enabled"), defaultUri = uri, retryBackoff = backoff, + maxRetries = maxRetries, authMethod = authMethod ) } @@ -97,12 +97,14 @@ object WorkflowCallbackActor { def props(serviceRegistryActor: ActorRef, defaultCallbackUri: Option[URI], retryBackoff: Option[SimpleExponentialBackoff] = None, + maxRetries: Option[Int] = None, authMethod: Option[WorkflowCallbackConfig.AuthMethod] = None ) = Props( new WorkflowCallbackActor( serviceRegistryActor, defaultCallbackUri, retryBackoff, + maxRetries, authMethod) ).withDispatcher(IoDispatcher) } @@ -110,6 +112,7 @@ object WorkflowCallbackActor { class WorkflowCallbackActor(serviceRegistryActor: ActorRef, defaultCallbackUri: Option[URI], retryBackoff: Option[SimpleExponentialBackoff], + maxRetries: Option[Int], authMethod: Option[AuthMethod]) extends Actor with ActorLogging { @@ -117,7 +120,10 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, implicit val system: ActorSystem = context.system private lazy val defaultRetryBackoff = SimpleExponentialBackoff(3.seconds, 5.minutes, 1.1) + private lazy val defaultMaxRetries = 10 + private lazy val backoffWithDefault = retryBackoff.getOrElse(defaultRetryBackoff) + private lazy val maxRetriesWithDefault = maxRetries.getOrElse(defaultMaxRetries) override def receive: Actor.Receive = LoggingReceive { case PerformCallbackCommand(workflowId, uri, terminalState, outputs) => @@ -159,6 +165,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, response <- withRetry( () => sendRequestOrFail(request), backoff = backoffWithDefault, + maxRetries = Option(maxRetriesWithDefault), onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $terminalState to $defaultCallbackUri : $err") ) result <- { @@ -173,7 +180,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, if (response.status.isFailure()) { response.entity.dataBytes.runFold(ByteString(""))(_ ++ _).map(_.utf8String) flatMap { errorBody => Future.failed( - new RuntimeException(errorBody) + new RuntimeException(s"HTTP ${response.status.value}: $errorBody") ) } } else Future.successful(response) diff --git a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala index 2b0985163c1..e4d0d58204f 100644 --- a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala +++ b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala @@ -126,7 +126,13 @@ abstract class CromwellRootActor(terminator: CromwellTerminator, lazy val workflowCallbackActor: Option[ActorRef] = { if (workflowCallbackConfig.enabled) { - val props = WorkflowCallbackActor.props(serviceRegistryActor, workflowCallbackConfig.defaultUri, workflowCallbackConfig.retryBackoff, workflowCallbackConfig.authMethod) + val props = WorkflowCallbackActor.props( + serviceRegistryActor, + workflowCallbackConfig.defaultUri, + workflowCallbackConfig.retryBackoff, + workflowCallbackConfig.maxRetries, + workflowCallbackConfig.authMethod + ) Option(context.actorOf(props, "WorkflowCallbackActor")) } else None } From 891121e3ae16bf39f2ca9e8fc5509382f7deb449 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 5 Sep 2023 15:30:35 -0400 Subject: [PATCH 09/29] Fix auth header --- .../workflow/lifecycle/finalization/WorkflowCallbackActor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index ebd775a292d..d5a54ea066d 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -148,7 +148,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, case Valid(header) => Future.successful(header) case Invalid(err) => Future.failed(new RuntimeException(err.toString)) // TODO better error } - .map(t => t.map(RawHeader("Authorization", _))) + .map(t => t.map(t => RawHeader("Authorization", s"Bearer $t"))) .traverse(identity) } From a5454aa8e8c32362ce1211155d79dd04dd96cf7c Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 8 Sep 2023 13:46:41 -0400 Subject: [PATCH 10/29] WorkflowActor test --- .../engine/workflow/WorkflowActorSpec.scala | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala index d5ef975915c..9c26fdd4ff7 100644 --- a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala +++ b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala @@ -15,7 +15,7 @@ import cromwell.engine.workflow.WorkflowActor._ import cromwell.engine.workflow.WorkflowManagerActor.WorkflowActorWorkComplete import cromwell.engine.workflow.lifecycle.EngineLifecycleActorAbortCommand import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{ExecuteWorkflowCommand, WorkflowExecutionAbortedResponse, WorkflowExecutionFailedResponse, WorkflowExecutionSucceededResponse} -import cromwell.engine.workflow.lifecycle.finalization.CopyWorkflowLogsActor +import cromwell.engine.workflow.lifecycle.finalization.{CopyWorkflowLogsActor, WorkflowCallbackActor} import cromwell.engine.workflow.lifecycle.finalization.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationSucceededResponse} import cromwell.engine.workflow.lifecycle.initialization.WorkflowInitializationActor.{StartInitializationCommand, WorkflowInitializationAbortedResponse, WorkflowInitializationFailedResponse, WorkflowInitializationSucceededResponse} import cromwell.engine.workflow.lifecycle.materialization.MaterializeWorkflowDescriptorActor.MaterializeWorkflowDescriptorFailureResponse @@ -50,7 +50,8 @@ class WorkflowActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorB ) val mockDir: Path = DefaultPathBuilder.get("/where/to/copy/wf/logs") - val mockWorkflowOptions = s"""{ "final_workflow_log_dir" : "$mockDir" }""" + val mockUri: String = "http://example.com" + val mockWorkflowOptions = s"""{ "final_workflow_log_dir" : "$mockDir", "workflow_callback_uri": "$mockUri" }""" var currentWorkflowId: WorkflowId = _ val currentLifecycleActor: TestProbe = TestProbe("currentLifecycleActor") @@ -63,6 +64,7 @@ class WorkflowActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorB val executionProbe: TestProbe = TestProbe("executionProbe") val finalizationProbe: TestProbe = TestProbe("finalizationProbe") var copyWorkflowLogsProbe: TestProbe = _ + val workflowCallbackProbe: TestProbe = TestProbe("workflowCallbackProbe") val AwaitAlmostNothing: FiniteDuration = 100.milliseconds val initialJobCtByRootWf = new AtomicInteger() val callCachingEnabled = true @@ -79,7 +81,11 @@ class WorkflowActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorB private val workflowHeartbeatConfig = WorkflowHeartbeatConfig(ConfigFactory.load()) - private def createWorkflowActor(state: WorkflowActorState, extraPathBuilderFactory: Option[PathBuilderFactory] = None, initializationMaxRetries: Int = 3, initializationInterval: FiniteDuration = 10.millis) = { + private def createWorkflowActor(state: WorkflowActorState, + extraPathBuilderFactory: Option[PathBuilderFactory] = None, + workflowCallbackActor: Option[ActorRef] = None, + initializationMaxRetries: Int = 3, + initializationInterval: FiniteDuration = 10.millis) = { val actor = TestFSMRef( factory = new WorkflowActorWithTestAddons( finalizationProbe = finalizationProbe, @@ -92,6 +98,7 @@ class WorkflowActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorB ioActor = system.actorOf(SimpleIoActor.props, s"ioActor-$currentWorkflowId"), serviceRegistryActor = mockServiceRegistryActor, workflowLogCopyRouter = copyWorkflowLogsProbe.ref, + workflowCallbackActor = workflowCallbackActor, jobStoreActor = system.actorOf(AlwaysHappyJobStoreActor.props, s"jobStoreActor-$currentWorkflowId"), subWorkflowStoreActor = system.actorOf(AlwaysHappySubWorkflowStoreActor.props, s"subWorkflowStoreActor-$currentWorkflowId"), @@ -328,6 +335,16 @@ class WorkflowActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorB val _ = createWorkflowActor(WorkflowSucceededState, Option(new ThrowingPathBuilderFactory())) } } + + "send a workflow callback message" in { + val actor = createWorkflowActor(FinalizingWorkflowState, workflowCallbackActor = Option(workflowCallbackProbe.ref)) + deathwatch watch actor + + workflowCallbackProbe.expectNoMessage(AwaitAlmostNothing) + actor ! WorkflowFinalizationSucceededResponse + workflowCallbackProbe.expectMsg(WorkflowCallbackActor.PerformCallbackCommand(currentWorkflowId, Some(mockUri), WorkflowFailed, CallOutputs.empty)) + deathwatch.expectTerminated(actor) + } } } @@ -353,6 +370,7 @@ class WorkflowActorWithTestAddons(val finalizationProbe: TestProbe, ioActor: ActorRef, serviceRegistryActor: ActorRef, workflowLogCopyRouter: ActorRef, + workflowCallbackActor: Option[ActorRef], jobStoreActor: ActorRef, subWorkflowStoreActor: ActorRef, callCacheReadActor: ActorRef, @@ -379,7 +397,7 @@ class WorkflowActorWithTestAddons(val finalizationProbe: TestProbe, ioActor = ioActor, serviceRegistryActor = serviceRegistryActor, workflowLogCopyRouter = workflowLogCopyRouter, - None, + workflowCallbackActor, jobStoreActor = jobStoreActor, subWorkflowStoreActor = subWorkflowStoreActor, callCacheReadActor = callCacheReadActor, From f37d614e81b55b831d3d16edd28a940d67d6890d Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 8 Sep 2023 16:10:12 -0400 Subject: [PATCH 11/29] Break http interactions out for mocking --- .../finalization/WorkflowCallbackActor.scala | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index d5a54ea066d..b279d13519f 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -98,14 +98,16 @@ object WorkflowCallbackActor { defaultCallbackUri: Option[URI], retryBackoff: Option[SimpleExponentialBackoff] = None, maxRetries: Option[Int] = None, - authMethod: Option[WorkflowCallbackConfig.AuthMethod] = None + authMethod: Option[WorkflowCallbackConfig.AuthMethod] = None, + httpClient: CallbackHttpHandler = CallbackHttpHandlerImpl ) = Props( new WorkflowCallbackActor( serviceRegistryActor, defaultCallbackUri, retryBackoff, maxRetries, - authMethod) + authMethod, + httpClient) ).withDispatcher(IoDispatcher) } @@ -113,7 +115,8 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, defaultCallbackUri: Option[URI], retryBackoff: Option[SimpleExponentialBackoff], maxRetries: Option[Int], - authMethod: Option[AuthMethod]) + authMethod: Option[AuthMethod], + httpClient: CallbackHttpHandler) extends Actor with ActorLogging { implicit val ec: ExecutionContextExecutor = context.dispatcher @@ -134,10 +137,10 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, performCallback(workflowId, uri, terminalState, outputs) onComplete { case Success(_) => log.info(s"Successfully sent callback for workflow for workflow $workflowId in state $terminalState to $uri") - sendMetadata(workflowId, successful = true) + sendMetadata(workflowId, successful = true, uri) case Failure(t) => log.warning(s"Permanently failed to send callback for workflow $workflowId in state $terminalState to $uri: ${t.getMessage}") - sendMetadata(workflowId, successful = false) + sendMetadata(workflowId, successful = false, uri) } }.getOrElse(()) case other => log.warning(s"WorkflowCallbackActor received an unexpected message: $other") @@ -176,7 +179,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, } private def sendRequestOrFail(request: HttpRequest): Future[HttpResponse] = - Http().singleRequest(request).flatMap(response => + httpClient.sendRequest(request).flatMap(response => if (response.status.isFailure()) { response.entity.dataBytes.runFold(ByteString(""))(_ ++ _).map(_.utf8String) flatMap { errorBody => Future.failed( @@ -186,7 +189,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, } else Future.successful(response) ) - private def sendMetadata(workflowId: WorkflowId, successful: Boolean): Unit = { + private def sendMetadata(workflowId: WorkflowId, successful: Boolean, uri: URI): Unit = { val events = List( MetadataEvent( MetadataKey(workflowId, None, "workflowCallback", "successful"), @@ -194,7 +197,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, ), MetadataEvent( MetadataKey(workflowId, None, "workflowCallback", "url"), - MetadataValue(defaultCallbackUri.toString) + MetadataValue(uri.toString) ), MetadataEvent( MetadataKey(workflowId, None, "workflowCallback", "timestamp"), @@ -206,3 +209,13 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, } +// Wrap the http call in a trait so it can be easily mocked for testing +trait CallbackHttpHandler { + def sendRequest(httpRequest: HttpRequest)(implicit actorSystem: ActorSystem): Future[HttpResponse] +} + +object CallbackHttpHandlerImpl extends CallbackHttpHandler { + override def sendRequest(httpRequest: HttpRequest)(implicit actorSystem: ActorSystem): Future[HttpResponse] = { + Http().singleRequest(httpRequest) + } +} From 04b8ce766bf540d3e11695d141f372aaa2d19ce1 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 8 Sep 2023 16:27:10 -0400 Subject: [PATCH 12/29] WorkflowCallbackActor shutdown --- .../lifecycle/finalization/WorkflowCallbackActor.scala | 3 +++ engine/src/main/scala/cromwell/server/CromwellRootActor.scala | 1 + engine/src/main/scala/cromwell/server/CromwellShutdown.scala | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index b279d13519f..c72e07afe9e 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -7,6 +7,7 @@ import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ import akka.http.scaladsl.marshalling.Marshal import akka.http.scaladsl.model._ import akka.http.scaladsl.model.headers.RawHeader +import akka.routing.Broadcast import akka.util.ByteString import cats.data.Validated.{Invalid, Valid} import cats.implicits.toTraverseOps @@ -28,6 +29,7 @@ import scala.concurrent.duration.DurationInt import scala.util.Try import cromwell.services.metadata.MetadataService.PutMetadataAction import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} +import cromwell.util.GracefulShutdownHelper.ShutdownCommand import java.net.URI import java.time.Instant @@ -143,6 +145,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, sendMetadata(workflowId, successful = false, uri) } }.getOrElse(()) + case Broadcast(ShutdownCommand) | ShutdownCommand => context stop self case other => log.warning(s"WorkflowCallbackActor received an unexpected message: $other") } diff --git a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala index e4d0d58204f..9d705755bf0 100644 --- a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala +++ b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala @@ -234,6 +234,7 @@ abstract class CromwellRootActor(terminator: CromwellTerminator, actorSystem = context.system, workflowManagerActor = workflowManagerActor, logCopyRouter = workflowLogCopyRouter, + workflowCallbackActor = workflowCallbackActor, jobStoreActor = jobStoreActor, jobTokenDispenser = jobExecutionTokenDispenserActor, workflowStoreActor = workflowStoreActor, diff --git a/engine/src/main/scala/cromwell/server/CromwellShutdown.scala b/engine/src/main/scala/cromwell/server/CromwellShutdown.scala index 99e4041db56..ce204249b1e 100644 --- a/engine/src/main/scala/cromwell/server/CromwellShutdown.scala +++ b/engine/src/main/scala/cromwell/server/CromwellShutdown.scala @@ -1,7 +1,6 @@ package cromwell.server import java.util.concurrent.atomic.AtomicBoolean - import akka.Done import akka.actor._ import akka.http.scaladsl.Http @@ -83,6 +82,7 @@ object CromwellShutdown extends GracefulStopSupport { actorSystem: ActorSystem, workflowManagerActor: ActorRef, logCopyRouter: ActorRef, + workflowCallbackActor: Option[ActorRef], jobTokenDispenser: ActorRef, jobStoreActor: ActorRef, workflowStoreActor: ActorRef, @@ -180,6 +180,7 @@ object CromwellShutdown extends GracefulStopSupport { shutdownActor(workflowStoreActor, CoordinatedShutdown.PhaseServiceRequestsDone, ShutdownCommand) shutdownActor(logCopyRouter, CoordinatedShutdown.PhaseServiceRequestsDone, Broadcast(ShutdownCommand)) + workflowCallbackActor.foreach(wca => shutdownActor(wca, CoordinatedShutdown.PhaseServiceRequestsDone, Broadcast(ShutdownCommand))) shutdownActor(jobTokenDispenser, CoordinatedShutdown.PhaseServiceRequestsDone, ShutdownCommand) /* From a9c2ce255678daddc94cb749170a35bfc913ae91 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 8 Sep 2023 16:41:20 -0400 Subject: [PATCH 13/29] Fix duplicated variable name --- .../lifecycle/finalization/WorkflowCallbackActor.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index c72e07afe9e..18157c6605a 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -131,10 +131,10 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, private lazy val maxRetriesWithDefault = maxRetries.getOrElse(defaultMaxRetries) override def receive: Actor.Receive = LoggingReceive { - case PerformCallbackCommand(workflowId, uri, terminalState, outputs) => + case PerformCallbackCommand(workflowId, requestedCallbackUri, terminalState, outputs) => // If no uri was provided to us here, fall back to the one in config. If there isn't // one there, do not perform a callback. - val callbackUri: Option[URI] = uri.map(WorkflowCallbackConfig.createAndValidateUri).getOrElse(defaultCallbackUri) + val callbackUri: Option[URI] = requestedCallbackUri.map(WorkflowCallbackConfig.createAndValidateUri).getOrElse(defaultCallbackUri) callbackUri.map { uri => performCallback(workflowId, uri, terminalState, outputs) onComplete { case Success(_) => From 27dac9188a3fc2b104a807f860df5bb697159110 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 8 Sep 2023 17:01:53 -0400 Subject: [PATCH 14/29] WorkflowCallbackActor tests --- .../WorkflowCallbackActorSpec.scala | 267 ++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala new file mode 100644 index 00000000000..f50d85bd474 --- /dev/null +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala @@ -0,0 +1,267 @@ +package cromwell.engine.workflow.lifecycle.finalization + +import akka.testkit._ +import akka.http.scaladsl.client.RequestBuilding.Post +import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ +import akka.http.scaladsl.model.{HttpResponse, StatusCodes} +import akka.testkit.TestProbe +import common.mock.MockSugar +import cromwell.core.retry.SimpleExponentialBackoff +import org.mockito.Mockito._ +import cromwell.core.{TestKitSuite, WorkflowFailed, WorkflowId, WorkflowSucceeded} +import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackActor.PerformCallbackCommand +import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ +import cromwell.services.metadata.MetadataService.PutMetadataAction +import cromwell.services.metadata.{MetadataEvent, MetadataKey, MetadataValue} +import cromwell.util.{GracefulShutdownHelper, WomMocks} +import org.scalatest.flatspec.AnyFlatSpecLike +import org.scalatest.matchers.should.Matchers +import wom.values.WomString + +import java.net.URI +import java.time.Instant +import scala.concurrent.duration._ +import scala.concurrent.Future + +class WorkflowCallbackActorSpec + extends TestKitSuite with AnyFlatSpecLike with Matchers with MockSugar { + + behavior of "WorkflowCallbackActor" + + private implicit val ec = system.dispatcher + + private val msgWait = 10.second.dilated + private val awaitAlmostNothing = 1.second + private val serviceRegistryActor = TestProbe("testServiceRegistryActor") + private val deathWatch = TestProbe("deathWatch") + private val mockUri = new URI("http://example.com") + private val basicOutputs = WomMocks.mockOutputExpectations(List("foo" -> WomString("bar")).toMap) + + private val httpSuccess = Future.successful(HttpResponse.apply(StatusCodes.OK)) + private val httpFailure = Future.successful(HttpResponse.apply(StatusCodes.GatewayTimeout)) + + private def metadataEvents(workflowId: WorkflowId, successful: Boolean) = ( + MetadataEvent( + MetadataKey(workflowId, None, "workflowCallback", "successful"), + MetadataValue(successful) + ), + MetadataEvent( + MetadataKey(workflowId, None, "workflowCallback", "url"), + MetadataValue(mockUri.toString) + ), + MetadataEvent( + MetadataKey(workflowId, None, "workflowCallback", "timestamp"), + MetadataValue(Instant.now()) + ) + ) + + it should "send a command to the default URI and record the correct metadata" in { + // Setup + val workflowId = WorkflowId.randomId() + val mockHttpClient = mock[CallbackHttpHandler] + + val expectedPostBody = CallbackMessage( + workflowId.toString, + WorkflowSucceeded.toString, + Option(basicOutputs.outputs.map(entry => (entry._1.name, entry._2))) + ) + val expectedRequest = Post(mockUri.toString, expectedPostBody) + val (expectedResultMetadata, expectedUriMetadata, expectedTimestampMetadata) = metadataEvents(workflowId, true) + + when(mockHttpClient.sendRequest(expectedRequest)).thenReturn(httpSuccess) + + val props = WorkflowCallbackActor.props( + serviceRegistryActor.ref, + Option(mockUri), + httpClient = mockHttpClient + ) + val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorSuccess") + + // Do the thing + val cmd = PerformCallbackCommand( + workflowId = workflowId, uri = None, terminalState = WorkflowSucceeded, workflowOutputs = basicOutputs + ) + workflowCallbackActor ! cmd + + // Check the result + serviceRegistryActor.expectMsgPF(msgWait) { + case PutMetadataAction(List(resultEvent, uriEvent, timestampEvent), _) => + resultEvent.key shouldBe expectedResultMetadata.key + resultEvent.value shouldBe expectedResultMetadata.value + uriEvent.key shouldBe expectedUriMetadata.key + uriEvent.value shouldBe expectedUriMetadata.value + timestampEvent.key shouldBe expectedTimestampMetadata.key + // Not checking timestamp value because it won't match + case _ => + } + + verify(mockHttpClient, times(1)).sendRequest(expectedRequest) + + // Shut it down + deathWatch.watch(workflowCallbackActor) + workflowCallbackActor ! GracefulShutdownHelper.ShutdownCommand + deathWatch.expectTerminated(workflowCallbackActor, msgWait) + + } + + it should "correctly handle a callback that fails, then succeeds" in { + // Setup + val workflowId = WorkflowId.randomId() + val mockHttpClient = mock[CallbackHttpHandler] + + val expectedPostBody = CallbackMessage( + workflowId.toString, + WorkflowSucceeded.toString, + Option(basicOutputs.outputs.map(entry => (entry._1.name, entry._2))) + ) + val expectedRequest = Post(mockUri.toString, expectedPostBody) + + val (expectedResultMetadata, expectedUriMetadata, expectedTimestampMetadata) = metadataEvents(workflowId, true) + + when(mockHttpClient.sendRequest(expectedRequest)).thenReturn(httpFailure, httpFailure, httpSuccess) + + val props = WorkflowCallbackActor.props( + serviceRegistryActor.ref, + Option(mockUri), + httpClient = mockHttpClient + ) + val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorFailThenSuccees") + + // Do the thing + val cmd = PerformCallbackCommand( + workflowId = workflowId, uri = None, terminalState = WorkflowSucceeded, workflowOutputs = basicOutputs + ) + workflowCallbackActor ! cmd + + // Check the result + serviceRegistryActor.expectMsgPF(msgWait) { + case PutMetadataAction(List(resultEvent, uriEvent, timestampEvent), _) => + resultEvent.key shouldBe expectedResultMetadata.key + resultEvent.value shouldBe expectedResultMetadata.value + uriEvent.key shouldBe expectedUriMetadata.key + uriEvent.value shouldBe expectedUriMetadata.value + timestampEvent.key shouldBe expectedTimestampMetadata.key + // Not checking timestamp value because it won't match + case _ => + } + + verify(mockHttpClient, times(3)).sendRequest(expectedRequest) + + // Shut it down + deathWatch.watch(workflowCallbackActor) + workflowCallbackActor ! GracefulShutdownHelper.ShutdownCommand + deathWatch.expectTerminated(workflowCallbackActor, msgWait) + + } + + it should "retry sending the callback for a failing workflow before failing" in { + // Setup + val workflowId = WorkflowId.randomId() + val mockHttpClient = mock[CallbackHttpHandler] + + val expectedPostBody = CallbackMessage( + workflowId.toString, + WorkflowFailed.toString, + None + ) + val expectedRequest = Post(mockUri.toString, expectedPostBody) + + val (expectedResultMetadata, expectedUriMetadata, expectedTimestampMetadata) = metadataEvents(workflowId, false) + + when(mockHttpClient.sendRequest(expectedRequest)).thenReturn(httpFailure) + + val props = WorkflowCallbackActor.props( + serviceRegistryActor.ref, + None, + retryBackoff = Option(SimpleExponentialBackoff(500.millis, 1.minute, 1.1)), + maxRetries = Option(5), + httpClient = mockHttpClient + ) + val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorFail") + + // Do the thing + val cmd = PerformCallbackCommand( + workflowId = workflowId, uri = Option(mockUri.toString), terminalState = WorkflowFailed, workflowOutputs = basicOutputs + ) + workflowCallbackActor ! cmd + + // Check the result + serviceRegistryActor.expectMsgPF(msgWait) { + case PutMetadataAction(List(resultEvent, uriEvent, timestampEvent), _) => + resultEvent.key shouldBe expectedResultMetadata.key + resultEvent.value shouldBe expectedResultMetadata.value + uriEvent.key shouldBe expectedUriMetadata.key + uriEvent.value shouldBe expectedUriMetadata.value + timestampEvent.key shouldBe expectedTimestampMetadata.key + // Not checking timestamp value because it won't match + case _ => + } + + verify(mockHttpClient, times(5)).sendRequest(expectedRequest) + + // Shut it down + deathWatch.watch(workflowCallbackActor) + workflowCallbackActor ! GracefulShutdownHelper.ShutdownCommand + deathWatch.expectTerminated(workflowCallbackActor, msgWait) + + } + + it should "not send a callback if no URI is provided" in { + // Setup + val workflowId = WorkflowId.randomId() + val mockHttpClient = mock[CallbackHttpHandler] + + val props = WorkflowCallbackActor.props( + serviceRegistryActor.ref, + None, + httpClient = mockHttpClient + ) + val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorNoUri") + + // Do the thing + val cmd = PerformCallbackCommand( + workflowId = workflowId, uri = None, terminalState = WorkflowSucceeded, workflowOutputs = basicOutputs + ) + workflowCallbackActor ! cmd + + // Check the result + serviceRegistryActor.expectNoMessage(awaitAlmostNothing) + verifyNoInteractions(mockHttpClient) + + // Shut it down + deathWatch.watch(workflowCallbackActor) + workflowCallbackActor ! GracefulShutdownHelper.ShutdownCommand + deathWatch.expectTerminated(workflowCallbackActor, msgWait) + } + + it should "no nothing when given a bogus URI" in { + // Setup + val workflowId = WorkflowId.randomId() + val mockHttpClient = mock[CallbackHttpHandler] + + val props = WorkflowCallbackActor.props( + serviceRegistryActor.ref, + None, + httpClient = mockHttpClient + ) + val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorBogusUri") + + // Do the thing + val cmd = PerformCallbackCommand( + workflowId = workflowId, + uri = Option("this is not a very good URI, is it"), + terminalState = WorkflowSucceeded, + workflowOutputs = basicOutputs + ) + workflowCallbackActor ! cmd + + // Check the result + serviceRegistryActor.expectNoMessage(awaitAlmostNothing) + verifyNoInteractions(mockHttpClient) + + // Shut it down + deathWatch.watch(workflowCallbackActor) + workflowCallbackActor ! GracefulShutdownHelper.ShutdownCommand + deathWatch.expectTerminated(workflowCallbackActor, msgWait) + } +} From b190a44b4a3fd82a4f667c5fb42f5041b9d92b49 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Sun, 10 Sep 2023 14:26:23 -0400 Subject: [PATCH 15/29] Fix test --- .../test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala index 9c26fdd4ff7..035ac58f89d 100644 --- a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala +++ b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala @@ -342,7 +342,7 @@ class WorkflowActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorB workflowCallbackProbe.expectNoMessage(AwaitAlmostNothing) actor ! WorkflowFinalizationSucceededResponse - workflowCallbackProbe.expectMsg(WorkflowCallbackActor.PerformCallbackCommand(currentWorkflowId, Some(mockUri), WorkflowFailed, CallOutputs.empty)) + workflowCallbackProbe.expectMsg(WorkflowCallbackActor.PerformCallbackCommand(currentWorkflowId, Some(mockUri), WorkflowSucceeded, CallOutputs.empty)) deathwatch.expectTerminated(actor) } } From 84b6461e74ec472737e75a9b6ac4c4be2ceed673 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Sun, 10 Sep 2023 14:57:32 -0400 Subject: [PATCH 16/29] Docs --- CHANGELOG.md | 4 ++ docs/cromwell_features/WorkflowCallback.md | 54 ++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 docs/cromwell_features/WorkflowCallback.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 526c244da03..0e9b14f5789 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ WDL `size` engine function now works for HTTP files. Cromwell can now send logs to Azure Application Insights. To enable, set environment variable `APPLICATIONINSIGHTS_INSTRUMENTATIONKEY` to your account's key. [See here for information.](https://learn.microsoft.com/en-us/azure/azure-monitor/app/sdk-connection-string) +### Workflow Completion Callback + +Cromwell can be configured to send a POST request to a specified URL when a workflow completes. The request body +includes the workflow id, terminal state, and (if workflow succeeded) final outputs. ## 85 Release Notes diff --git a/docs/cromwell_features/WorkflowCallback.md b/docs/cromwell_features/WorkflowCallback.md new file mode 100644 index 00000000000..ffe392de439 --- /dev/null +++ b/docs/cromwell_features/WorkflowCallback.md @@ -0,0 +1,54 @@ +The workflow callback is a simple way to integrate Cromwell with an external system. When each workflow reaches a terminal +state, Cromwell will POST a message to a provided URL (see below for schema of this message). Messages are sent for root +workflows only, not subworkflows. + +### Configuration + +This feature will only be used if enabled via config. All config items except `enabled` are optional. + +``` +workflow-state-callback { + enabled: true + endpoint: "http://example.com" + auth.azure: true + request-backoff { + min: "3 seconds", + max: "5 minutes", + multiplier: 1.1 + } + max-retries = 10 +} +``` + + * `enabled`: This boolean controls whether a callback will be attempted or not. + * `endpoint`: This is the default URL to send the message to. If this is unset, and no URL is set in workflow options, no callback will be sent. + * `auth.azure`: If true, and if Cromwell is running in an Azure environment, Cromwell will include an auth header with bearer token generated from local Azure credentials. + * `request-backoff` and `max-retries`: Include these to override the default retry behavior (default behavior shown here). + +### Workflow Options + +You may choose to override the `endpoint` set in config by including this workflow option: +``` +{ + "workflow_callback_uri": "http://mywebsite.com" +} +``` + +### Callback schema + +Below is an example of a callback request body. + +``` +{ + "workflowId": "00001111-2222-3333-4444-555566667777", + "state": "Succeeded", + "outputs": { + "task1.out": 5, + "task2.out": "/some/file.txt" + } +} +``` + + * `workflowId`: The UUID of the workflow + * `state`: The terminal state of the workflow. The list of possible values is: `Succeeded`, `Failed`, `Aborted` + * `outputs`: The final outputs of the workflow, as would be returned from the `api/workflows/{version}/{id}/outputs` endpoint. This field will ONLY be included when the workflow was successful. \ No newline at end of file From 2870c92eaa422f7f846541ef6863f725da4c733c Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 11 Sep 2023 09:56:36 -0400 Subject: [PATCH 17/29] Include workflow failure message in callback --- docs/cromwell_features/WorkflowCallback.md | 3 ++- .../engine/workflow/WorkflowActor.scala | 8 +++++- .../finalization/WorkflowCallbackActor.scala | 18 +++++++------ .../WorkflowCallbackJsonSupport.scala | 7 +++-- .../WorkflowCallbackActorSpec.scala | 27 ++++++++++++------- .../engine/workflow/WorkflowActorSpec.scala | 3 ++- 6 files changed, 44 insertions(+), 22 deletions(-) diff --git a/docs/cromwell_features/WorkflowCallback.md b/docs/cromwell_features/WorkflowCallback.md index ffe392de439..23d6be93e0a 100644 --- a/docs/cromwell_features/WorkflowCallback.md +++ b/docs/cromwell_features/WorkflowCallback.md @@ -51,4 +51,5 @@ Below is an example of a callback request body. * `workflowId`: The UUID of the workflow * `state`: The terminal state of the workflow. The list of possible values is: `Succeeded`, `Failed`, `Aborted` - * `outputs`: The final outputs of the workflow, as would be returned from the `api/workflows/{version}/{id}/outputs` endpoint. This field will ONLY be included when the workflow was successful. \ No newline at end of file + * `outputs`: The final outputs of the workflow, as would be returned from the `api/workflows/{version}/{id}/outputs` endpoint. This field will ONLY be included when the workflow was successful. + * `failures`: A list of strings describing the workflow's failures. This field will ONLY be included if the workflow failed. \ No newline at end of file diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index d70513e225c..698f8984a18 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -522,7 +522,13 @@ class WorkflowActor(workflowToStart: WorkflowToStart, goto(MetadataIntegrityValidationState) using data.copy(lastStateReached = data.lastStateReached.copy(state = stateName)) case Event(PerformWorkflowCallback(uri, workflowState), data) => workflowCallbackActor.foreach { wca => - wca ! PerformCallbackCommand(workflowId, uri, workflowState, data.workflowFinalOutputs.getOrElse(CallOutputs.empty)) + wca ! PerformCallbackCommand( + workflowId, + uri, + workflowState, + data.workflowFinalOutputs.getOrElse(CallOutputs.empty), + data.lastStateReached.failures.toList.flatMap(_.map(_.getMessage)) + ) } stay() } diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index 18157c6605a..d24994f32a9 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -18,7 +18,7 @@ import cromwell.cloudsupport.azure.AzureCredentials import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.retry.Retry.withRetry import cromwell.core.retry.SimpleExponentialBackoff -import cromwell.core.{CallOutputs, WorkflowId, WorkflowState, WorkflowSucceeded} +import cromwell.core.{CallOutputs, WorkflowFailed, WorkflowId, WorkflowState, WorkflowSucceeded} import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackActor.PerformCallbackCommand import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackConfig.AuthMethod import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ @@ -94,7 +94,8 @@ object WorkflowCallbackActor { final case class PerformCallbackCommand(workflowId: WorkflowId, uri: Option[String], terminalState: WorkflowState, - workflowOutputs: CallOutputs) + workflowOutputs: CallOutputs, + failureMessage: List[String]) def props(serviceRegistryActor: ActorRef, defaultCallbackUri: Option[URI], @@ -131,12 +132,12 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, private lazy val maxRetriesWithDefault = maxRetries.getOrElse(defaultMaxRetries) override def receive: Actor.Receive = LoggingReceive { - case PerformCallbackCommand(workflowId, requestedCallbackUri, terminalState, outputs) => + case PerformCallbackCommand(workflowId, requestedCallbackUri, terminalState, outputs, failures) => // If no uri was provided to us here, fall back to the one in config. If there isn't // one there, do not perform a callback. val callbackUri: Option[URI] = requestedCallbackUri.map(WorkflowCallbackConfig.createAndValidateUri).getOrElse(defaultCallbackUri) callbackUri.map { uri => - performCallback(workflowId, uri, terminalState, outputs) onComplete { + performCallback(workflowId, uri, terminalState, outputs, failures) onComplete { case Success(_) => log.info(s"Successfully sent callback for workflow for workflow $workflowId in state $terminalState to $uri") sendMetadata(workflowId, successful = true, uri) @@ -158,11 +159,12 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, .traverse(identity) } - private def performCallback(workflowId: WorkflowId, callbackUri: URI, terminalState: WorkflowState, outputs: CallOutputs): Future[Unit] = { + private def performCallback(workflowId: WorkflowId, callbackUri: URI, terminalState: WorkflowState, outputs: CallOutputs, failures: List[String]): Future[Unit] = { // Only send outputs if the workflow succeeded val callbackPostBody = terminalState match { - case WorkflowSucceeded => CallbackMessage(workflowId.toString, terminalState.toString, Option(outputs.outputs.map(entry => (entry._1.name, entry._2)))) - case _ => CallbackMessage(workflowId.toString, terminalState.toString, None) + case WorkflowSucceeded => CallbackMessage(workflowId.toString, terminalState.toString, Option(outputs.outputs.map(entry => (entry._1.name, entry._2))), None) + case WorkflowFailed => CallbackMessage(workflowId.toString, terminalState.toString, None, Option(failures)) + case _ => CallbackMessage(workflowId.toString, terminalState.toString, None, None) } for { entity <- Marshal(callbackPostBody).to[RequestEntity] @@ -172,7 +174,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, () => sendRequestOrFail(request), backoff = backoffWithDefault, maxRetries = Option(maxRetriesWithDefault), - onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $terminalState to $defaultCallbackUri : $err") + onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $terminalState to $callbackUri : $err") ) result <- { response.entity.discardBytes() diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala index 79d669c6dd7..4acb37f4e36 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala @@ -4,8 +4,11 @@ import cromwell.util.JsonFormatting.WomValueJsonFormatter.WomValueJsonFormat import spray.json.DefaultJsonProtocol import wom.values.WomValue -final case class CallbackMessage(workflowId: String, state: String, outputs: Option[Map[String, WomValue]]) +final case class CallbackMessage(workflowId: String, + state: String, + outputs: Option[Map[String, WomValue]], + failures: Option[List[String]]) object WorkflowCallbackJsonSupport extends DefaultJsonProtocol { - implicit val callbackMessageFormat = jsonFormat3(CallbackMessage) + implicit val callbackMessageFormat = jsonFormat4(CallbackMessage) } diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala index f50d85bd474..8ec8cc40591 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala @@ -63,7 +63,8 @@ class WorkflowCallbackActorSpec val expectedPostBody = CallbackMessage( workflowId.toString, WorkflowSucceeded.toString, - Option(basicOutputs.outputs.map(entry => (entry._1.name, entry._2))) + Option(basicOutputs.outputs.map(entry => (entry._1.name, entry._2))), + None ) val expectedRequest = Post(mockUri.toString, expectedPostBody) val (expectedResultMetadata, expectedUriMetadata, expectedTimestampMetadata) = metadataEvents(workflowId, true) @@ -79,7 +80,7 @@ class WorkflowCallbackActorSpec // Do the thing val cmd = PerformCallbackCommand( - workflowId = workflowId, uri = None, terminalState = WorkflowSucceeded, workflowOutputs = basicOutputs + workflowId = workflowId, uri = None, terminalState = WorkflowSucceeded, workflowOutputs = basicOutputs, List.empty ) workflowCallbackActor ! cmd @@ -112,7 +113,8 @@ class WorkflowCallbackActorSpec val expectedPostBody = CallbackMessage( workflowId.toString, WorkflowSucceeded.toString, - Option(basicOutputs.outputs.map(entry => (entry._1.name, entry._2))) + Option(basicOutputs.outputs.map(entry => (entry._1.name, entry._2))), + None ) val expectedRequest = Post(mockUri.toString, expectedPostBody) @@ -129,7 +131,7 @@ class WorkflowCallbackActorSpec // Do the thing val cmd = PerformCallbackCommand( - workflowId = workflowId, uri = None, terminalState = WorkflowSucceeded, workflowOutputs = basicOutputs + workflowId = workflowId, uri = None, terminalState = WorkflowSucceeded, workflowOutputs = basicOutputs, List.empty ) workflowCallbackActor ! cmd @@ -159,10 +161,12 @@ class WorkflowCallbackActorSpec val workflowId = WorkflowId.randomId() val mockHttpClient = mock[CallbackHttpHandler] + val errorMsg = "Something bad happened :(" val expectedPostBody = CallbackMessage( workflowId.toString, WorkflowFailed.toString, - None + None, + Option(List(errorMsg)) ) val expectedRequest = Post(mockUri.toString, expectedPostBody) @@ -181,7 +185,11 @@ class WorkflowCallbackActorSpec // Do the thing val cmd = PerformCallbackCommand( - workflowId = workflowId, uri = Option(mockUri.toString), terminalState = WorkflowFailed, workflowOutputs = basicOutputs + workflowId = workflowId, + uri = Option(mockUri.toString), + terminalState = WorkflowFailed, + workflowOutputs = basicOutputs, + List(errorMsg) ) workflowCallbackActor ! cmd @@ -220,7 +228,7 @@ class WorkflowCallbackActorSpec // Do the thing val cmd = PerformCallbackCommand( - workflowId = workflowId, uri = None, terminalState = WorkflowSucceeded, workflowOutputs = basicOutputs + workflowId = workflowId, uri = None, terminalState = WorkflowSucceeded, workflowOutputs = basicOutputs, List.empty ) workflowCallbackActor ! cmd @@ -234,7 +242,7 @@ class WorkflowCallbackActorSpec deathWatch.expectTerminated(workflowCallbackActor, msgWait) } - it should "no nothing when given a bogus URI" in { + it should "do nothing when given a bogus URI" in { // Setup val workflowId = WorkflowId.randomId() val mockHttpClient = mock[CallbackHttpHandler] @@ -251,7 +259,8 @@ class WorkflowCallbackActorSpec workflowId = workflowId, uri = Option("this is not a very good URI, is it"), terminalState = WorkflowSucceeded, - workflowOutputs = basicOutputs + workflowOutputs = basicOutputs, + List.empty ) workflowCallbackActor ! cmd diff --git a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala index 035ac58f89d..bd8c75d943b 100644 --- a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala +++ b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala @@ -339,10 +339,11 @@ class WorkflowActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorB "send a workflow callback message" in { val actor = createWorkflowActor(FinalizingWorkflowState, workflowCallbackActor = Option(workflowCallbackProbe.ref)) deathwatch watch actor + val msg = WorkflowCallbackActor.PerformCallbackCommand(currentWorkflowId, Some(mockUri), WorkflowSucceeded, CallOutputs.empty, List.empty) workflowCallbackProbe.expectNoMessage(AwaitAlmostNothing) actor ! WorkflowFinalizationSucceededResponse - workflowCallbackProbe.expectMsg(WorkflowCallbackActor.PerformCallbackCommand(currentWorkflowId, Some(mockUri), WorkflowSucceeded, CallOutputs.empty)) + workflowCallbackProbe.expectMsg(msg) deathwatch.expectTerminated(actor) } } From 7cddd109e1125b53b41c25027930bb5ba6c0397c Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 12 Sep 2023 10:40:12 -0400 Subject: [PATCH 18/29] Refer to docs in changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e9b14f5789..a3df74e4d28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,8 @@ variable `APPLICATIONINSIGHTS_INSTRUMENTATIONKEY` to your account's key. [See he ### Workflow Completion Callback Cromwell can be configured to send a POST request to a specified URL when a workflow completes. The request body -includes the workflow id, terminal state, and (if workflow succeeded) final outputs. +includes the workflow id, terminal state, and (if workflow succeeded) final outputs or (if workflow failed) error message. +See `WorkflowCallback` in [ReadTheDocs](https://cromwell.readthedocs.io/en/stable/) for more information. ## 85 Release Notes From 02704486c324b6bf8b9ba07c79bd4654b75e1898 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 12 Sep 2023 10:44:05 -0400 Subject: [PATCH 19/29] Clean up TODOs, not going TO DO them right now --- .../lifecycle/finalization/WorkflowCallbackActor.scala | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index d24994f32a9..0bcea457d9d 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -48,17 +48,12 @@ object WorkflowCallbackConfig extends LazyLogging { case object AzureAuth extends AuthMethod { override def getAccessToken: ErrorOr.ErrorOr[String] = AzureCredentials.getAccessToken() } - // TODO - // case class GoogleAuth(mode: GoogleAuthMode) extends AuthMethod { - // override def getAccessToken: String = ??? - // } def apply(config: Config): WorkflowCallbackConfig = { val backoff = config.as[Option[Config]]("request-backoff").map(SimpleExponentialBackoff(_)) val maxRetries = config.as[Option[Int]]("max-retries") val uri = config.as[Option[String]]("endpoint").flatMap(createAndValidateUri) - // TODO add google auth val authMethod = if (config.hasPath("auth.azure")) { Option(AzureAuth) } else None @@ -73,7 +68,6 @@ object WorkflowCallbackConfig extends LazyLogging { } def createAndValidateUri(uriString: String): Option[URI] = { - // TODO validate Try(new URI(uriString)) match { case Success(uri) => Option(uri) case Failure(err) => @@ -83,7 +77,6 @@ object WorkflowCallbackConfig extends LazyLogging { } } - /** * The WorkflowCallbackActor is responsible for sending a message on workflow completion to a configured endpoint. * This allows for users to build automation around workflow completion without needing to poll Cromwell for @@ -153,7 +146,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, private def makeHeaders: Future[List[HttpHeader]] = { authMethod.toList.map(_.getAccessToken).map { case Valid(header) => Future.successful(header) - case Invalid(err) => Future.failed(new RuntimeException(err.toString)) // TODO better error + case Invalid(err) => Future.failed(new RuntimeException(err.toString)) } .map(t => t.map(t => RawHeader("Authorization", s"Bearer $t"))) .traverse(identity) From a91ddd9231f677bf6a8eecd6018ab73a9ce30696 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 12 Sep 2023 10:58:47 -0400 Subject: [PATCH 20/29] Comments --- .../lifecycle/finalization/WorkflowCallbackActor.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index 0bcea457d9d..5d40c1eed79 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -153,7 +153,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, } private def performCallback(workflowId: WorkflowId, callbackUri: URI, terminalState: WorkflowState, outputs: CallOutputs, failures: List[String]): Future[Unit] = { - // Only send outputs if the workflow succeeded + // Only send outputs if the workflow succeeded, only send an error message if it failed. val callbackPostBody = terminalState match { case WorkflowSucceeded => CallbackMessage(workflowId.toString, terminalState.toString, Option(outputs.outputs.map(entry => (entry._1.name, entry._2))), None) case WorkflowFailed => CallbackMessage(workflowId.toString, terminalState.toString, None, Option(failures)) @@ -170,6 +170,8 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $terminalState to $callbackUri : $err") ) result <- { + // Akka will get upset if we have a response body and leave it totally unread. + // Since there's nothing here we want to read, we need to deliberately discard it. response.entity.discardBytes() Future.unit } From c8ea146bdbb048330091f793dd5f0a25ece86666 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 15 Sep 2023 16:04:23 -0400 Subject: [PATCH 21/29] Update engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala Co-authored-by: Chris Llanwarne --- .../main/scala/cromwell/engine/workflow/WorkflowActor.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index 698f8984a18..a92ea84156d 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -551,9 +551,7 @@ class WorkflowActor(workflowToStart: WorkflowToStart, /* * The submitted workflow options have been previously validated by the CromwellApiHandler. These are - * being recreated so that in case MaterializeWorkflowDescriptor fails, the workflow logs can still - * be copied by accessing the workflow options outside of the EngineWorkflowDescriptor. Used for both - * copying workflow log and sending workflow callback. + * being recreated so that even if the MaterializeWorkflowDescriptor fails, the workflow options can still be accessed outside of the EngineWorkflowDescriptor. Used for both copying workflow log and sending workflow callbacks. */ def bruteForceWorkflowOptions: WorkflowOptions = sources.workflowOptions From 9867876f20fc4c29c7d353536271fe46e9fb577e Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 18 Sep 2023 17:07:37 -0400 Subject: [PATCH 22/29] More WorkflowActor tests --- .../engine/workflow/WorkflowActorSpec.scala | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala index bd8c75d943b..6f7c57ae985 100644 --- a/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala +++ b/server/src/test/scala/cromwell/engine/workflow/WorkflowActorSpec.scala @@ -16,15 +16,17 @@ import cromwell.engine.workflow.WorkflowManagerActor.WorkflowActorWorkComplete import cromwell.engine.workflow.lifecycle.EngineLifecycleActorAbortCommand import cromwell.engine.workflow.lifecycle.execution.WorkflowExecutionActor.{ExecuteWorkflowCommand, WorkflowExecutionAbortedResponse, WorkflowExecutionFailedResponse, WorkflowExecutionSucceededResponse} import cromwell.engine.workflow.lifecycle.finalization.{CopyWorkflowLogsActor, WorkflowCallbackActor} -import cromwell.engine.workflow.lifecycle.finalization.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationSucceededResponse} +import cromwell.engine.workflow.lifecycle.finalization.WorkflowFinalizationActor.{StartFinalizationCommand, WorkflowFinalizationFailedResponse, WorkflowFinalizationSucceededResponse} import cromwell.engine.workflow.lifecycle.initialization.WorkflowInitializationActor.{StartInitializationCommand, WorkflowInitializationAbortedResponse, WorkflowInitializationFailedResponse, WorkflowInitializationSucceededResponse} import cromwell.engine.workflow.lifecycle.materialization.MaterializeWorkflowDescriptorActor.MaterializeWorkflowDescriptorFailureResponse import cromwell.engine.workflow.workflowstore.{StartableState, Submitted, WorkflowHeartbeatConfig, WorkflowToStart} import cromwell.engine.{EngineFilesystems, EngineWorkflowDescriptor} import cromwell.services.metadata.MetadataService.{MetadataWriteSuccess, PutMetadataActionAndRespond} import cromwell.util.SampleWdl.ThreeStep +import cromwell.util.WomMocks import org.scalatest.BeforeAndAfter import org.scalatest.concurrent.Eventually +import wom.values.WomString import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future} @@ -337,15 +339,29 @@ class WorkflowActorSpec extends CromwellTestKitWordSpec with WorkflowDescriptorB } "send a workflow callback message" in { - val actor = createWorkflowActor(FinalizingWorkflowState, workflowCallbackActor = Option(workflowCallbackProbe.ref)) + val actor = createWorkflowActor(ExecutingWorkflowState, workflowCallbackActor = Option(workflowCallbackProbe.ref)) deathwatch watch actor - val msg = WorkflowCallbackActor.PerformCallbackCommand(currentWorkflowId, Some(mockUri), WorkflowSucceeded, CallOutputs.empty, List.empty) + val mockOutputs = WomMocks.mockOutputExpectations(Map("foo" -> WomString("bar"))) + val msg = WorkflowCallbackActor.PerformCallbackCommand(currentWorkflowId, Some(mockUri), WorkflowSucceeded, mockOutputs, List.empty) workflowCallbackProbe.expectNoMessage(AwaitAlmostNothing) + actor ! WorkflowExecutionSucceededResponse(Map.empty, Set(currentWorkflowId), mockOutputs, Set.empty) actor ! WorkflowFinalizationSucceededResponse workflowCallbackProbe.expectMsg(msg) deathwatch.expectTerminated(actor) } + + "send a workflow callback message for a failing workflow" in { + val actor = createWorkflowActor(FinalizingWorkflowState, workflowCallbackActor = Option(workflowCallbackProbe.ref)) + deathwatch watch actor + val errorText = "oh nooo :(" + val msg = WorkflowCallbackActor.PerformCallbackCommand(currentWorkflowId, Some(mockUri), WorkflowFailed, CallOutputs.empty, List(errorText)) + + workflowCallbackProbe.expectNoMessage(AwaitAlmostNothing) + actor ! WorkflowFinalizationFailedResponse(Seq(new RuntimeException(errorText))) + workflowCallbackProbe.expectMsg(msg) + deathwatch.expectTerminated(actor) + } } } From 6c9418950264ca4dfd2f03e842c7d2d854c86518 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 18 Sep 2023 19:42:51 -0400 Subject: [PATCH 23/29] Request callback directly from onTransition rather than sending event --- .../engine/workflow/WorkflowActor.scala | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala index a92ea84156d..3061e2e8a74 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/WorkflowActor.scala @@ -52,7 +52,6 @@ object WorkflowActor { final case class AbortWorkflowWithExceptionCommand(exception: Throwable) extends WorkflowActorCommand case object SendWorkflowHeartbeatCommand extends WorkflowActorCommand case object AwaitMetadataIntegrity - case class PerformWorkflowCallback(uri: Option[String], workflowState: WorkflowState) case class WorkflowFailedResponse(workflowId: WorkflowId, inState: WorkflowActorState, reasons: Seq[Throwable]) @@ -520,17 +519,6 @@ class WorkflowActor(workflowToStart: WorkflowToStart, stay() case Event(AwaitMetadataIntegrity, data) => goto(MetadataIntegrityValidationState) using data.copy(lastStateReached = data.lastStateReached.copy(state = stateName)) - case Event(PerformWorkflowCallback(uri, workflowState), data) => - workflowCallbackActor.foreach { wca => - wca ! PerformCallbackCommand( - workflowId, - uri, - workflowState, - data.workflowFinalOutputs.getOrElse(CallOutputs.empty), - data.lastStateReached.failures.toList.flatMap(_.map(_.getMessage)) - ) - } - stay() } onTransition { @@ -551,7 +539,8 @@ class WorkflowActor(workflowToStart: WorkflowToStart, /* * The submitted workflow options have been previously validated by the CromwellApiHandler. These are - * being recreated so that even if the MaterializeWorkflowDescriptor fails, the workflow options can still be accessed outside of the EngineWorkflowDescriptor. Used for both copying workflow log and sending workflow callbacks. + * being recreated so that even if the MaterializeWorkflowDescriptor fails, the workflow options can still be + * accessed outside of the EngineWorkflowDescriptor. Used for both copying workflow log and sending workflow callbacks. */ def bruteForceWorkflowOptions: WorkflowOptions = sources.workflowOptions @@ -582,8 +571,16 @@ class WorkflowActor(workflowToStart: WorkflowToStart, } // Attempt to perform workflow completion callback - val callbackUri = workflowOptions.get(WorkflowOptions.WorkflowCallbackUri).toOption - self ! PerformWorkflowCallback(callbackUri, terminalState.workflowState) + workflowCallbackActor.foreach { wca => + val callbackUri = workflowOptions.get(WorkflowOptions.WorkflowCallbackUri).toOption + wca ! PerformCallbackCommand( + workflowId, + callbackUri, + terminalState.workflowState, + stateData.workflowFinalOutputs.getOrElse(CallOutputs.empty), + nextStateData.lastStateReached.failures.toList.flatMap(_.map(_.getMessage)) + ) + } // We can't transition from within another transition function, but we can instruct ourselves to with a message: self ! AwaitMetadataIntegrity From fd1b1714977d8ba50becd9d0bb6c85f8b569db0c Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 18 Sep 2023 19:54:03 -0400 Subject: [PATCH 24/29] Create a consistent JSON message schema regardless of workflow status --- CHANGELOG.md | 2 +- docs/cromwell_features/WorkflowCallback.md | 4 ++-- .../finalization/WorkflowCallbackActor.scala | 9 ++------- .../WorkflowCallbackJsonSupport.scala | 4 ++-- .../finalization/WorkflowCallbackActorSpec.scala | 16 ++++++++-------- 5 files changed, 15 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3df74e4d28..76746d972c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ variable `APPLICATIONINSIGHTS_INSTRUMENTATIONKEY` to your account's key. [See he ### Workflow Completion Callback Cromwell can be configured to send a POST request to a specified URL when a workflow completes. The request body -includes the workflow id, terminal state, and (if workflow succeeded) final outputs or (if workflow failed) error message. +includes the workflow id, terminal state, and (if applicable) final outputs or error message. See `WorkflowCallback` in [ReadTheDocs](https://cromwell.readthedocs.io/en/stable/) for more information. ## 85 Release Notes diff --git a/docs/cromwell_features/WorkflowCallback.md b/docs/cromwell_features/WorkflowCallback.md index 23d6be93e0a..a4f75eb941f 100644 --- a/docs/cromwell_features/WorkflowCallback.md +++ b/docs/cromwell_features/WorkflowCallback.md @@ -51,5 +51,5 @@ Below is an example of a callback request body. * `workflowId`: The UUID of the workflow * `state`: The terminal state of the workflow. The list of possible values is: `Succeeded`, `Failed`, `Aborted` - * `outputs`: The final outputs of the workflow, as would be returned from the `api/workflows/{version}/{id}/outputs` endpoint. This field will ONLY be included when the workflow was successful. - * `failures`: A list of strings describing the workflow's failures. This field will ONLY be included if the workflow failed. \ No newline at end of file + * `outputs`: The final outputs of the workflow, as would be returned from the `api/workflows/{version}/{id}/outputs` endpoint. Expected to be empty when the workflow is not successful.. + * `failures`: A list of strings describing the workflow's failures. Expected to be empty if the workflow did not fail. diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index 5d40c1eed79..1974c68ede0 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -18,7 +18,7 @@ import cromwell.cloudsupport.azure.AzureCredentials import cromwell.core.Dispatcher.IoDispatcher import cromwell.core.retry.Retry.withRetry import cromwell.core.retry.SimpleExponentialBackoff -import cromwell.core.{CallOutputs, WorkflowFailed, WorkflowId, WorkflowState, WorkflowSucceeded} +import cromwell.core.{CallOutputs, WorkflowId, WorkflowState} import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackActor.PerformCallbackCommand import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackConfig.AuthMethod import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ @@ -153,12 +153,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, } private def performCallback(workflowId: WorkflowId, callbackUri: URI, terminalState: WorkflowState, outputs: CallOutputs, failures: List[String]): Future[Unit] = { - // Only send outputs if the workflow succeeded, only send an error message if it failed. - val callbackPostBody = terminalState match { - case WorkflowSucceeded => CallbackMessage(workflowId.toString, terminalState.toString, Option(outputs.outputs.map(entry => (entry._1.name, entry._2))), None) - case WorkflowFailed => CallbackMessage(workflowId.toString, terminalState.toString, None, Option(failures)) - case _ => CallbackMessage(workflowId.toString, terminalState.toString, None, None) - } + val callbackPostBody = CallbackMessage(workflowId.toString, terminalState.toString, outputs.outputs.map(entry => (entry._1.name, entry._2)), failures) for { entity <- Marshal(callbackPostBody).to[RequestEntity] headers <- makeHeaders diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala index 4acb37f4e36..eb9b1c7ea9e 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackJsonSupport.scala @@ -6,8 +6,8 @@ import wom.values.WomValue final case class CallbackMessage(workflowId: String, state: String, - outputs: Option[Map[String, WomValue]], - failures: Option[List[String]]) + outputs: Map[String, WomValue], + failures: List[String]) object WorkflowCallbackJsonSupport extends DefaultJsonProtocol { implicit val callbackMessageFormat = jsonFormat4(CallbackMessage) diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala index 8ec8cc40591..3f7609acaca 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala @@ -8,7 +8,7 @@ import akka.testkit.TestProbe import common.mock.MockSugar import cromwell.core.retry.SimpleExponentialBackoff import org.mockito.Mockito._ -import cromwell.core.{TestKitSuite, WorkflowFailed, WorkflowId, WorkflowSucceeded} +import cromwell.core.{CallOutputs, TestKitSuite, WorkflowFailed, WorkflowId, WorkflowSucceeded} import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackActor.PerformCallbackCommand import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ import cromwell.services.metadata.MetadataService.PutMetadataAction @@ -63,8 +63,8 @@ class WorkflowCallbackActorSpec val expectedPostBody = CallbackMessage( workflowId.toString, WorkflowSucceeded.toString, - Option(basicOutputs.outputs.map(entry => (entry._1.name, entry._2))), - None + basicOutputs.outputs.map(entry => (entry._1.name, entry._2)), + List.empty ) val expectedRequest = Post(mockUri.toString, expectedPostBody) val (expectedResultMetadata, expectedUriMetadata, expectedTimestampMetadata) = metadataEvents(workflowId, true) @@ -113,8 +113,8 @@ class WorkflowCallbackActorSpec val expectedPostBody = CallbackMessage( workflowId.toString, WorkflowSucceeded.toString, - Option(basicOutputs.outputs.map(entry => (entry._1.name, entry._2))), - None + basicOutputs.outputs.map(entry => (entry._1.name, entry._2)), + List.empty ) val expectedRequest = Post(mockUri.toString, expectedPostBody) @@ -165,8 +165,8 @@ class WorkflowCallbackActorSpec val expectedPostBody = CallbackMessage( workflowId.toString, WorkflowFailed.toString, - None, - Option(List(errorMsg)) + Map.empty, + List(errorMsg) ) val expectedRequest = Post(mockUri.toString, expectedPostBody) @@ -188,7 +188,7 @@ class WorkflowCallbackActorSpec workflowId = workflowId, uri = Option(mockUri.toString), terminalState = WorkflowFailed, - workflowOutputs = basicOutputs, + workflowOutputs = CallOutputs.empty, List(errorMsg) ) workflowCallbackActor ! cmd From 25e2f3a2a0eb11436ef845448cbade31534e2280 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Mon, 18 Sep 2023 20:02:45 -0400 Subject: [PATCH 25/29] PR feedback --- .../lifecycle/finalization/WorkflowCallbackActor.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index 1974c68ede0..a0116d1cc7e 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -167,8 +167,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, result <- { // Akka will get upset if we have a response body and leave it totally unread. // Since there's nothing here we want to read, we need to deliberately discard it. - response.entity.discardBytes() - Future.unit + response.entity.discardBytes().future } } yield result } From 53925256747c642214fbe2a0e9f0fa4096b69231 Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 19 Sep 2023 14:37:05 -0400 Subject: [PATCH 26/29] Dedicated thread pool for WorkflowCallbackActor with configurable size --- core/src/main/resources/reference.conf | 4 ++- .../finalization/WorkflowCallbackActor.scala | 25 ++++++++++++++----- .../cromwell/server/CromwellRootActor.scala | 1 + .../WorkflowCallbackActorSpec.scala | 5 ++++ 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index c4e7dae6b7d..6ec05cf6025 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -629,9 +629,11 @@ ga4gh { workflow-state-callback { enabled: false + ## The number of threads to allocate for performing callbacks + # num-threads: 5 # endpoint: "http://example.com/foo" # Can be overridden in workflow options # auth.azure: true - # Users can override default retry behavior if desired + ## Users can override default retry behavior if desired # request-backoff { # min: "3 seconds" # max: "5 minutes" diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index a0116d1cc7e..c44edc2f5db 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -1,5 +1,6 @@ package cromwell.engine.workflow.lifecycle.finalization +import akka.Done import akka.actor.{Actor, ActorLogging, ActorRef, ActorSystem, Props} import akka.event.LoggingReceive import akka.http.scaladsl.Http @@ -24,7 +25,7 @@ import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackConfig.Au import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ import net.ceedubs.ficus.Ficus._ -import scala.concurrent.ExecutionContextExecutor +import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.DurationInt import scala.util.Try import cromwell.services.metadata.MetadataService.PutMetadataAction @@ -33,11 +34,12 @@ import cromwell.util.GracefulShutdownHelper.ShutdownCommand import java.net.URI import java.time.Instant -import scala.concurrent.Future +import java.util.concurrent.Executors import scala.util.{Failure, Success} case class WorkflowCallbackConfig(enabled: Boolean, + numThreads: Option[Int], defaultUri: Option[URI], // May be overridden by workflow options retryBackoff: Option[SimpleExponentialBackoff], maxRetries: Option[Int], @@ -50,6 +52,7 @@ object WorkflowCallbackConfig extends LazyLogging { } def apply(config: Config): WorkflowCallbackConfig = { + val numThreads = config.as[Option[Int]]("num-threads") val backoff = config.as[Option[Config]]("request-backoff").map(SimpleExponentialBackoff(_)) val maxRetries = config.as[Option[Int]]("max-retries") val uri = config.as[Option[String]]("endpoint").flatMap(createAndValidateUri) @@ -60,6 +63,7 @@ object WorkflowCallbackConfig extends LazyLogging { WorkflowCallbackConfig( config.getBoolean("enabled"), + numThreads = numThreads, defaultUri = uri, retryBackoff = backoff, maxRetries = maxRetries, @@ -91,6 +95,7 @@ object WorkflowCallbackActor { failureMessage: List[String]) def props(serviceRegistryActor: ActorRef, + numThreads: Option[Int], defaultCallbackUri: Option[URI], retryBackoff: Option[SimpleExponentialBackoff] = None, maxRetries: Option[Int] = None, @@ -99,6 +104,7 @@ object WorkflowCallbackActor { ) = Props( new WorkflowCallbackActor( serviceRegistryActor, + numThreads, defaultCallbackUri, retryBackoff, maxRetries, @@ -108,6 +114,7 @@ object WorkflowCallbackActor { } class WorkflowCallbackActor(serviceRegistryActor: ActorRef, + numThreads: Option[Int], defaultCallbackUri: Option[URI], retryBackoff: Option[SimpleExponentialBackoff], maxRetries: Option[Int], @@ -115,12 +122,19 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, httpClient: CallbackHttpHandler) extends Actor with ActorLogging { - implicit val ec: ExecutionContextExecutor = context.dispatcher + // Create a dedicated thread pool for this actor so its damage is limited if we end up with + // too many threads all taking a long time to do callbacks. If we're frequently saturating + // this thread pool, consider refactoring this actor to maintain a queue of callbacks and + // handle them one at a time, as opposed to starting a thread to perform each as soon as its + // received. + implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(numThreadsWithDefault)) implicit val system: ActorSystem = context.system + private lazy val defaultNumThreads = 5 private lazy val defaultRetryBackoff = SimpleExponentialBackoff(3.seconds, 5.minutes, 1.1) private lazy val defaultMaxRetries = 10 + private lazy val numThreadsWithDefault = numThreads.getOrElse(defaultNumThreads) private lazy val backoffWithDefault = retryBackoff.getOrElse(defaultRetryBackoff) private lazy val maxRetriesWithDefault = maxRetries.getOrElse(defaultMaxRetries) @@ -152,7 +166,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, .traverse(identity) } - private def performCallback(workflowId: WorkflowId, callbackUri: URI, terminalState: WorkflowState, outputs: CallOutputs, failures: List[String]): Future[Unit] = { + private def performCallback(workflowId: WorkflowId, callbackUri: URI, terminalState: WorkflowState, outputs: CallOutputs, failures: List[String]): Future[Done] = { val callbackPostBody = CallbackMessage(workflowId.toString, terminalState.toString, outputs.outputs.map(entry => (entry._1.name, entry._2)), failures) for { entity <- Marshal(callbackPostBody).to[RequestEntity] @@ -164,11 +178,10 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, maxRetries = Option(maxRetriesWithDefault), onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $terminalState to $callbackUri : $err") ) - result <- { + result <- // Akka will get upset if we have a response body and leave it totally unread. // Since there's nothing here we want to read, we need to deliberately discard it. response.entity.discardBytes().future - } } yield result } diff --git a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala index 9d705755bf0..3b1bd5c96dc 100644 --- a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala +++ b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala @@ -128,6 +128,7 @@ abstract class CromwellRootActor(terminator: CromwellTerminator, if (workflowCallbackConfig.enabled) { val props = WorkflowCallbackActor.props( serviceRegistryActor, + workflowCallbackConfig.numThreads, workflowCallbackConfig.defaultUri, workflowCallbackConfig.retryBackoff, workflowCallbackConfig.maxRetries, diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala index 3f7609acaca..a07de800217 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala @@ -73,6 +73,7 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, + None, Option(mockUri), httpClient = mockHttpClient ) @@ -124,6 +125,7 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, + None, Option(mockUri), httpClient = mockHttpClient ) @@ -177,6 +179,7 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, None, + None, retryBackoff = Option(SimpleExponentialBackoff(500.millis, 1.minute, 1.1)), maxRetries = Option(5), httpClient = mockHttpClient @@ -222,6 +225,7 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, None, + None, httpClient = mockHttpClient ) val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorNoUri") @@ -250,6 +254,7 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, None, + None, httpClient = mockHttpClient ) val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorBogusUri") From 9dcfe219c1e4b6aee1e462abe8690c5362a38c2d Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Tue, 19 Sep 2023 14:58:40 -0400 Subject: [PATCH 27/29] More sane default handling for config --- .../finalization/WorkflowCallbackActor.scala | 62 ++++++++----------- .../cromwell/server/CromwellRootActor.scala | 6 +- .../WorkflowCallbackActorSpec.scala | 21 +++---- 3 files changed, 35 insertions(+), 54 deletions(-) diff --git a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala index c44edc2f5db..516184d023a 100644 --- a/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala +++ b/engine/src/main/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActor.scala @@ -21,7 +21,6 @@ import cromwell.core.retry.Retry.withRetry import cromwell.core.retry.SimpleExponentialBackoff import cromwell.core.{CallOutputs, WorkflowId, WorkflowState} import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackActor.PerformCallbackCommand -import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackConfig.AuthMethod import cromwell.engine.workflow.lifecycle.finalization.WorkflowCallbackJsonSupport._ import net.ceedubs.ficus.Ficus._ @@ -39,10 +38,10 @@ import scala.util.{Failure, Success} case class WorkflowCallbackConfig(enabled: Boolean, - numThreads: Option[Int], + numThreads: Int, + retryBackoff: SimpleExponentialBackoff, + maxRetries: Int, defaultUri: Option[URI], // May be overridden by workflow options - retryBackoff: Option[SimpleExponentialBackoff], - maxRetries: Option[Int], authMethod: Option[WorkflowCallbackConfig.AuthMethod]) object WorkflowCallbackConfig extends LazyLogging { @@ -51,10 +50,19 @@ object WorkflowCallbackConfig extends LazyLogging { override def getAccessToken: ErrorOr.ErrorOr[String] = AzureCredentials.getAccessToken() } + private lazy val defaultNumThreads = 5 + private lazy val defaultRetryBackoff = SimpleExponentialBackoff(3.seconds, 5.minutes, 1.1) + private lazy val defaultMaxRetries = 10 + + def empty: WorkflowCallbackConfig = WorkflowCallbackConfig( + false, defaultNumThreads, defaultRetryBackoff, defaultMaxRetries, None, None + ) + def apply(config: Config): WorkflowCallbackConfig = { - val numThreads = config.as[Option[Int]]("num-threads") - val backoff = config.as[Option[Config]]("request-backoff").map(SimpleExponentialBackoff(_)) - val maxRetries = config.as[Option[Int]]("max-retries") + val enabled = config.as[Boolean]("enabled") + val numThreads = config.as[Option[Int]]("num-threads").getOrElse(defaultNumThreads) + val backoff = config.as[Option[Config]]("request-backoff").map(SimpleExponentialBackoff(_)).getOrElse(defaultRetryBackoff) + val maxRetries = config.as[Option[Int]]("max-retries").getOrElse(defaultMaxRetries) val uri = config.as[Option[String]]("endpoint").flatMap(createAndValidateUri) val authMethod = if (config.hasPath("auth.azure")) { @@ -62,11 +70,11 @@ object WorkflowCallbackConfig extends LazyLogging { } else None WorkflowCallbackConfig( - config.getBoolean("enabled"), + enabled = enabled, numThreads = numThreads, - defaultUri = uri, retryBackoff = backoff, maxRetries = maxRetries, + defaultUri = uri, authMethod = authMethod ) } @@ -95,30 +103,18 @@ object WorkflowCallbackActor { failureMessage: List[String]) def props(serviceRegistryActor: ActorRef, - numThreads: Option[Int], - defaultCallbackUri: Option[URI], - retryBackoff: Option[SimpleExponentialBackoff] = None, - maxRetries: Option[Int] = None, - authMethod: Option[WorkflowCallbackConfig.AuthMethod] = None, + callbackConfig: WorkflowCallbackConfig, httpClient: CallbackHttpHandler = CallbackHttpHandlerImpl ) = Props( new WorkflowCallbackActor( serviceRegistryActor, - numThreads, - defaultCallbackUri, - retryBackoff, - maxRetries, - authMethod, + callbackConfig, httpClient) ).withDispatcher(IoDispatcher) } class WorkflowCallbackActor(serviceRegistryActor: ActorRef, - numThreads: Option[Int], - defaultCallbackUri: Option[URI], - retryBackoff: Option[SimpleExponentialBackoff], - maxRetries: Option[Int], - authMethod: Option[AuthMethod], + config: WorkflowCallbackConfig, httpClient: CallbackHttpHandler) extends Actor with ActorLogging { @@ -127,22 +123,14 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, // this thread pool, consider refactoring this actor to maintain a queue of callbacks and // handle them one at a time, as opposed to starting a thread to perform each as soon as its // received. - implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(numThreadsWithDefault)) + implicit val ec = ExecutionContext.fromExecutor(Executors.newFixedThreadPool(config.numThreads)) implicit val system: ActorSystem = context.system - private lazy val defaultNumThreads = 5 - private lazy val defaultRetryBackoff = SimpleExponentialBackoff(3.seconds, 5.minutes, 1.1) - private lazy val defaultMaxRetries = 10 - - private lazy val numThreadsWithDefault = numThreads.getOrElse(defaultNumThreads) - private lazy val backoffWithDefault = retryBackoff.getOrElse(defaultRetryBackoff) - private lazy val maxRetriesWithDefault = maxRetries.getOrElse(defaultMaxRetries) - override def receive: Actor.Receive = LoggingReceive { case PerformCallbackCommand(workflowId, requestedCallbackUri, terminalState, outputs, failures) => // If no uri was provided to us here, fall back to the one in config. If there isn't // one there, do not perform a callback. - val callbackUri: Option[URI] = requestedCallbackUri.map(WorkflowCallbackConfig.createAndValidateUri).getOrElse(defaultCallbackUri) + val callbackUri: Option[URI] = requestedCallbackUri.map(WorkflowCallbackConfig.createAndValidateUri).getOrElse(config.defaultUri) callbackUri.map { uri => performCallback(workflowId, uri, terminalState, outputs, failures) onComplete { case Success(_) => @@ -158,7 +146,7 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, } private def makeHeaders: Future[List[HttpHeader]] = { - authMethod.toList.map(_.getAccessToken).map { + config.authMethod.toList.map(_.getAccessToken).map { case Valid(header) => Future.successful(header) case Invalid(err) => Future.failed(new RuntimeException(err.toString)) } @@ -174,8 +162,8 @@ class WorkflowCallbackActor(serviceRegistryActor: ActorRef, request = HttpRequest(method = HttpMethods.POST, uri = callbackUri.toString, entity = entity).withHeaders(headers) response <- withRetry( () => sendRequestOrFail(request), - backoff = backoffWithDefault, - maxRetries = Option(maxRetriesWithDefault), + backoff = config.retryBackoff, + maxRetries = Option(config.maxRetries), onRetry = err => log.warning(s"Will retry after failure to send workflow callback for workflow $workflowId in state $terminalState to $callbackUri : $err") ) result <- diff --git a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala index 3b1bd5c96dc..c37d6466fcc 100644 --- a/engine/src/main/scala/cromwell/server/CromwellRootActor.scala +++ b/engine/src/main/scala/cromwell/server/CromwellRootActor.scala @@ -128,11 +128,7 @@ abstract class CromwellRootActor(terminator: CromwellTerminator, if (workflowCallbackConfig.enabled) { val props = WorkflowCallbackActor.props( serviceRegistryActor, - workflowCallbackConfig.numThreads, - workflowCallbackConfig.defaultUri, - workflowCallbackConfig.retryBackoff, - workflowCallbackConfig.maxRetries, - workflowCallbackConfig.authMethod + workflowCallbackConfig ) Option(context.actorOf(props, "WorkflowCallbackActor")) } else None diff --git a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala index a07de800217..c469d1dcaa2 100644 --- a/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala +++ b/engine/src/test/scala/cromwell/engine/workflow/lifecycle/finalization/WorkflowCallbackActorSpec.scala @@ -35,6 +35,7 @@ class WorkflowCallbackActorSpec private val serviceRegistryActor = TestProbe("testServiceRegistryActor") private val deathWatch = TestProbe("deathWatch") private val mockUri = new URI("http://example.com") + private val basicConfig = WorkflowCallbackConfig.empty.copy(enabled = true) private val basicOutputs = WomMocks.mockOutputExpectations(List("foo" -> WomString("bar")).toMap) private val httpSuccess = Future.successful(HttpResponse.apply(StatusCodes.OK)) @@ -73,8 +74,7 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, - None, - Option(mockUri), + basicConfig.copy(defaultUri = Option(mockUri)), httpClient = mockHttpClient ) val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorSuccess") @@ -125,8 +125,7 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, - None, - Option(mockUri), + basicConfig.copy(defaultUri = Option(mockUri)), httpClient = mockHttpClient ) val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorFailThenSuccees") @@ -178,10 +177,10 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, - None, - None, - retryBackoff = Option(SimpleExponentialBackoff(500.millis, 1.minute, 1.1)), - maxRetries = Option(5), + basicConfig.copy( + retryBackoff = SimpleExponentialBackoff(500.millis, 1.minute, 1.1), + maxRetries = 5, + ), httpClient = mockHttpClient ) val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorFail") @@ -224,8 +223,7 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, - None, - None, + basicConfig, httpClient = mockHttpClient ) val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorNoUri") @@ -253,8 +251,7 @@ class WorkflowCallbackActorSpec val props = WorkflowCallbackActor.props( serviceRegistryActor.ref, - None, - None, + basicConfig, httpClient = mockHttpClient ) val workflowCallbackActor = system.actorOf(props, "testWorkflowCallbackActorBogusUri") From f8983cf2b8e4247d1597ca23325aa55e69789b2d Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Wed, 20 Sep 2023 09:07:04 -0400 Subject: [PATCH 28/29] Update docs for new config --- docs/cromwell_features/WorkflowCallback.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/cromwell_features/WorkflowCallback.md b/docs/cromwell_features/WorkflowCallback.md index a4f75eb941f..28ae788aaec 100644 --- a/docs/cromwell_features/WorkflowCallback.md +++ b/docs/cromwell_features/WorkflowCallback.md @@ -9,6 +9,7 @@ This feature will only be used if enabled via config. All config items except `e ``` workflow-state-callback { enabled: true + num-threads: 5 endpoint: "http://example.com" auth.azure: true request-backoff { @@ -21,6 +22,7 @@ workflow-state-callback { ``` * `enabled`: This boolean controls whether a callback will be attempted or not. + * `num-threads`: The number of threads Cromwell will allocate for performing callbacks. * `endpoint`: This is the default URL to send the message to. If this is unset, and no URL is set in workflow options, no callback will be sent. * `auth.azure`: If true, and if Cromwell is running in an Azure environment, Cromwell will include an auth header with bearer token generated from local Azure credentials. * `request-backoff` and `max-retries`: Include these to override the default retry behavior (default behavior shown here). From 029b24c8a14f51e25802781a96e6e54fb9721cbc Mon Sep 17 00:00:00 2001 From: Janet Gainer-Dewar Date: Fri, 22 Sep 2023 10:54:28 -0400 Subject: [PATCH 29/29] Comment update --- docs/cromwell_features/WorkflowCallback.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/cromwell_features/WorkflowCallback.md b/docs/cromwell_features/WorkflowCallback.md index 28ae788aaec..275a6ede0a7 100644 --- a/docs/cromwell_features/WorkflowCallback.md +++ b/docs/cromwell_features/WorkflowCallback.md @@ -1,6 +1,7 @@ The workflow callback is a simple way to integrate Cromwell with an external system. When each workflow reaches a terminal -state, Cromwell will POST a message to a provided URL (see below for schema of this message). Messages are sent for root -workflows only, not subworkflows. +state, Cromwell will attempt to POST a message to a provided URL (see below for schema of this message). +Messages are sent for root workflows only, not subworkflows. Callback status information, including success or failure, +will be recorded in workflow metadata with keys containing `workflowCallback`. ### Configuration