cromwell.examples.conf

# This line is required. It pulls in default overrides from the embedded cromwell `application.conf` needed for proper
# performance of cromwell.
include required("application")

# Cromwell HTTP server settings
webservice {
  #port = 8000
  #interface = 0.0.0.0
  #binding-timeout = 5s
  #instance.name = "reference"
}

akka {
  # Optionally set / override any akka settings
}

# Cromwell "system" settings
system {
  # If 'true', a SIGINT will trigger Cromwell to attempt to abort all currently running jobs before exiting
  #abort-jobs-on-terminate = false

  # If 'true', a SIGTERM or SIGINT will trigger Cromwell to attempt to gracefully shutdown in server mode,
  # in particular clearing up all queued database writes before letting the JVM shut down.
  # The shutdown is a multi-phase process, each phase having its own configurable timeout. See the Dev Wiki for more details.
  #graceful-server-shutdown = true

  # If 'true' then when Cromwell starts up, it tries to restart incomplete workflows
  #workflow-restart = true

  # Cromwell will cap the number of running workflows at N
  #max-concurrent-workflows = 5000

  # Cromwell will launch up to N submitted workflows at a time, regardless of how many open workflow slots exist
  #max-workflow-launch-count = 50

  # Number of seconds between workflow launches
  #new-workflow-poll-rate = 20

  # Since the WorkflowLogCopyRouter is initialized in code, this is the number of workers
  #number-of-workflow-log-copy-workers = 10

  # Default number of cache read workers
  #number-of-cache-read-workers = 25

  io {
    # Global Throttling - This is mostly useful for GCS and can be adjusted to match
    # the quota availble on the GCS API
    #number-of-requests = 100000
    #per = 100 seconds

    # Number of times an I/O operation should be attempted before giving up and failing it.
    #number-of-attempts = 5
  }

  # Maximum number of input file bytes allowed in order to read each type.
  # If exceeded a FileSizeTooBig exception will be thrown.
  input-read-limits {

    #lines = 128000

    #bool = 7

    #int = 19

    #float = 50

    #string = 128000

    #json = 128000

    #tsv = 128000

    #map = 128000

    #object = 128000
  }
}

workflow-options {
  # These workflow options will be encrypted when stored in the database
  #encrypted-fields: []

  # AES-256 key to use to encrypt the values in `encrypted-fields`
  #base64-encryption-key: "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="

  # Directory where to write per workflow logs
  #workflow-log-dir: "cromwell-workflow-logs"

  # When true, per workflow logs will be deleted after copying
  #workflow-log-temporary: true

  # Workflow-failure-mode determines what happens to other calls when a call fails. Can be either ContinueWhilePossible or NoNewCalls.
  # Can also be overridden in workflow options. Defaults to NoNewCalls. Uncomment to change:
  #workflow-failure-mode: "ContinueWhilePossible"
}

# Optional call-caching configuration.
call-caching {
  # Allows re-use of existing results for jobs you've already run
  # (default: false)
  #enabled = false

  # Whether to invalidate a cache result forever if we cannot reuse them. Disable this if you expect some cache copies
  # to fail for external reasons which should not invalidate the cache (e.g. auth differences between users):
  # (default: true)
  #invalidate-bad-cache-results = true
}

# Google configuration
google {

  #application-name = "cromwell"

  # Default: just application default
  #auths = [

    # Application default
    #{
    #  name = "application-default"
    #  scheme = "application_default"
    #},

    # Use a refresh token
    #{
    #  name = "user-via-refresh"
    #  scheme = "refresh_token"
    #  client-id = "secret_id"
    #  client-secret = "secret_secret"
    #},


    #{
    #  name = "service-account"
    #  scheme = "service_account"
    #  Choose between PEM file and JSON file as a credential format. They're mutually exclusive.
    #  PEM format:
    #  service-account-id = "my-service-account"
    #  pem-file = "/path/to/file.pem"
    #  JSON format:
    #  json-file = "/path/to/file.json"
    #}

  #]
}

docker {
  hash-lookup {
    # Set this to match your available quota against the Google Container Engine API
    #gcr-api-queries-per-100-seconds = 1000

    # Time in minutes before an entry expires from the docker hashes cache and needs to be fetched again
    #cache-entry-ttl = "20 minutes"

    # Maximum number of elements to be kept in the cache. If the limit is reached, old elements will be removed from the cache
    #cache-size = 200

    # How should docker hashes be looked up. Possible values are "local" and "remote"
    # "local": Lookup hashes on the local docker daemon using the cli
    # "remote": Lookup hashes on docker hub and gcr
    #method = "remote"
  }
}

engine {
  # This instructs the engine which filesystems are at its disposal to perform any IO operation that it might need.
  # For instance, WDL variables declared at the Workflow level will be evaluated using the filesystems declared here.
  # If you intend to be able to run workflows with this kind of declarations:
  # workflow {
  #    String str = read_string("gs://bucket/my-file.txt")
  # }
  # You will need to provide the engine with a gcs filesystem
  # Note that the default filesystem (local) is always available.
  filesystems {
    #  gcs {
    #    auth = "application-default"
    #  }
    local {
      #enabled: true
    }
  }
}

backend {
  # Override the default backend.
  #default = "LocalExample"

  # The list of providers.
  providers {

    # The local provider is included by default in the reference.conf. This is an example.

    # Define a new backend provider.
    LocalExample {
      # The actor that runs the backend. In this case, it's the Shared File System (SFS) ConfigBackend.
      actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"

      # The backend custom configuration.
      config {

        # Optional limits on the number of concurrent jobs
        #concurrent-job-limit = 5

        # If true submits scripts to the bash background using "&". Only usefull for dispatchers that do NOT submit
        # the job and then immediately return a scheduled job id.
        run-in-background = true

        # `script-epilogue` configures a shell command to run after the execution of every command block.
        #
        # If this value is not set explicitly, the default value is `sync`, equivalent to:
        # script-epilogue = "sync"
        #
        # To turn off the default `sync` behavior set this value to an empty string:
        # script-epilogue = ""

        # The list of possible runtime custom attributes.
        runtime-attributes = """
        String? docker
        String? docker_user
        """

        # Submit string when there is no "docker" runtime attribute.
        submit = "/bin/bash ${script}"

        # Submit string when there is a "docker" runtime attribute.
        submit-docker = """
        docker run \
          --rm -i \
          ${"--user " + docker_user} \
          --entrypoint /bin/bash \
          -v ${cwd}:${docker_cwd} \
          ${docker} ${script}
        """

        # Root directory where Cromwell writes job results.  This directory must be
        # visible and writeable by the Cromwell process as well as the jobs that Cromwell
        # launches.
        root = "cromwell-executions"

        # File system configuration.
        filesystems {

          # For SFS backends, the "local" configuration specifies how files are handled.
          local {

            # Try to hard link (ln), then soft-link (ln -s), and if both fail, then copy the files.
            localization: [
              "hard-link", "soft-link", "copy"
            ]

            # Call caching strategies
            caching {
              # When copying a cached result, what type of file duplication should occur. Attempted in the order listed below:
              duplication-strategy: [
                "hard-link", "soft-link", "copy"
              ]

              # Possible values: file, path
              # "file" will compute an md5 hash of the file content.
              # "path" will compute an md5 hash of the file path. This strategy will only be effective if the duplication-strategy (above) is set to "soft-link",
              # in order to allow for the original file path to be hashed.
              hashing-strategy: "file"

              # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash.
              # If false or the md5 does not exist, will proceed with the above-defined hashing strategy.
              check-sibling-md5: false
            }
          }
        }

        # The defaults for runtime attributes if not provided.
        default-runtime-attributes {
          failOnStderr: false
          continueOnReturnCode: 0
        }
      }
    }

    # Other backend examples. Uncomment the block/stanza for your configuration. May need more tweaking/configuration
    # for your specific environment.

    #TES {
    #  actor-factory = "cromwell.backend.impl.tes.TesBackendLifecycleActorFactory"
    #  config {
    #    root = "cromwell-executions"
    #    dockerRoot = "/cromwell-executions"
    #    endpoint = "http://127.0.0.1:9000/v1/jobs"
    #    default-runtime-attributes {
    #      cpu: 1
    #      failOnStderr: false
    #      continueOnReturnCode: 0
    #      memory: "2 GB"
    #      disk: "2 GB"
    #    }
    #  }
    #}

    #SGE {
    #  actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
    #  config {
    #
    #    # Limits the number of concurrent jobs
    #    concurrent-job-limit = 5
    #
    #    runtime-attributes = """
    #    Int cpu = 1
    #    Float? memory_gb
    #    String? sge_queue
    #    String? sge_project
    #    """
    #
    #    submit = """
    #    qsub \
    #    -terse \
    #    -V \
    #    -b n \
    #    -N ${job_name} \
    #    -wd ${cwd} \
    #    -o ${out} \
    #    -e ${err} \
    #    -pe smp ${cpu} \
    #    ${"-l mem_free=" + memory_gb + "g"} \
    #    ${"-q " + sge_queue} \
    #    ${"-P " + sge_project} \
    #    ${script}
    #    """
    #
    #    kill = "qdel ${job_id}"
    #    check-alive = "qstat -j ${job_id}"
    #    job-id-regex = "(\\d+)"
    #  }
    #}

    #LSF {
    #  actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
    #  config {
    #    submit = "bsub -J ${job_name} -cwd ${cwd} -o ${out} -e ${err} /bin/bash ${script}"
    #    kill = "bkill ${job_id}"
    #    check-alive = "bjobs ${job_id}"
    #    job-id-regex = "Job <(\\d+)>.*"
    #  }
    #}

    #SLURM {
    #  actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
    #  config {
    #    runtime-attributes = """
    #    Int runtime_minutes = 600
    #    Int cpus = 2
    #    Int requested_memory_mb_per_core = 8000
    #    String queue = "short"
    #    """
    #
    #    submit = """
    #        sbatch -J ${job_name} -D ${cwd} -o ${out} -e ${err} -t ${runtime_minutes} -p ${queue} \
    #        ${"-n " + cpus} \
    #        --mem-per-cpu=${requested_memory_mb_per_core} \
    #        --wrap "/bin/bash ${script}"
    #    """
    #    kill = "scancel ${job_id}"
    #    check-alive = "squeue -j ${job_id}"
    #    job-id-regex = "Submitted batch job (\\d+).*"
    #  }
    #}

    # Example backend that _only_ runs workflows that specify docker for every command.
    #Docker {
    #  actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
    #  config {
    #    run-in-background = true
    #    runtime-attributes = "String docker"
    #    submit-docker = "docker run --rm -v ${cwd}:${docker_cwd} -i ${docker} /bin/bash < ${script}"
    #  }
    #}

    #HtCondor {
    #  actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"
    #  config {
    #    runtime-attributes = """
    #      Int cpu = 1
    #      Float memory_mb = 512.0
    #      Float disk_kb = 256000.0
    #      String? nativeSpecs
    #      String? docker
    #    """
    #
    #    submit = """
    #      chmod 755 ${script}
    #      cat > ${cwd}/execution/submitFile <<EOF
    #      Iwd=${cwd}/execution
    #      requirements=${nativeSpecs}
    #      leave_in_queue=true
    #      request_memory=${memory_mb}
    #      request_disk=${disk_kb}
    #      error=${err}
    #      output=${out}
    #      log_xml=true
    #      request_cpus=${cpu}
    #      executable=${script}
    #      log=${cwd}/execution/execution.log
    #      queue
    #      EOF
    #      condor_submit ${cwd}/execution/submitFile
    #    """
    #
    #    submit-docker = """
    #      chmod 755 ${script}
    #      cat > ${cwd}/execution/dockerScript <<EOF
    #      #!/bin/bash
    #      docker run --rm -i -v ${cwd}:${docker_cwd} ${docker} /bin/bash ${script}
    #      EOF
    #      chmod 755 ${cwd}/execution/dockerScript
    #      cat > ${cwd}/execution/submitFile <<EOF
    #      Iwd=${cwd}/execution
    #      requirements=${nativeSpecs}
    #      leave_in_queue=true
    #      request_memory=${memory_mb}
    #      request_disk=${disk_kb}
    #      error=${cwd}/execution/stderr
    #      output=${cwd}/execution/stdout
    #      log_xml=true
    #      request_cpus=${cpu}
    #      executable=${cwd}/execution/dockerScript
    #      log=${cwd}/execution/execution.log
    #      queue
    #      EOF
    #      condor_submit ${cwd}/execution/submitFile
    #    """
    #
    #    kill = "condor_rm ${job_id}"
    #    check-alive = "condor_q ${job_id}"
    #    job-id-regex = "(?sm).*cluster (\\d+)..*"
    #  }
    #}

    #Spark {
    # actor-factory = "cromwell.backend.impl.spark.SparkBackendFactory"
    # config {
    #   # Root directory where Cromwell writes job results.  This directory must be
    #    # visible and writeable by the Cromwell process as well as the jobs that Cromwell
    #   # launches.
    #   root: "cromwell-executions"
    #
    #   filesystems {
    #     local {
    #       localization: [
    #         "hard-link", "soft-link", "copy"
    #       ]
    #     }
    #    }
    #      # change (master, deployMode) to (yarn, client), (yarn, cluster)
    #      #  or (spark://hostname:port, cluster) for spark standalone cluster mode
    #     master: "local"
    #     deployMode: "client"
    #  }
    # }

    #JES {
    #  actor-factory = "cromwell.backend.impl.jes.JesBackendLifecycleActorFactory"
    #  config {
    #    # Google project
    #    project = "my-cromwell-workflows"
    #
    #    # Base bucket for workflow executions
    #    root = "gs://my-cromwell-workflows-bucket"
    #
    #    # Set this to the lower of the two values "Queries per 100 seconds" and "Queries per 100 seconds per user" for
    #    # your project.
    #    #
    #    # Used to help determine maximum throughput to the Google Genomics API. Setting this value too low will
    #    # cause a drop in performance. Setting this value too high will cause QPS based locks from Google.
    #    # 1000 is the default "Queries per 100 seconds per user", 50000 is the default "Queries per 100 seconds"
    #    # See https://cloud.google.com/genomics/quotas for more information
    #    genomics-api-queries-per-100-seconds = 1000
    #
    #    # Polling for completion backs-off gradually for slower-running jobs.
    #    # This is the maximum polling interval (in seconds):
    #    maximum-polling-interval = 600
    #
    #    # Optional Dockerhub Credentials. Can be used to access private docker images.
    #    dockerhub {
    #      # account = ""
    #      # token = ""
    #    }
    #
    #    genomics {
    #      # A reference to an auth defined in the `google` stanza at the top.  This auth is used to create
    #      # Pipelines and manipulate auth JSONs.
    #      auth = "application-default"
    #
    #
    #      // alternative service account to use on the launched compute instance
    #      // NOTE: If combined with service account authorization, both that serivce account and this service account
    #      // must be able to read and write to the 'root' GCS path
    #      compute-service-account = "default"
    #
    #      # Endpoint for APIs, no reason to change this unless directed by Google.
    #      endpoint-url = "https://genomics.googleapis.com/"
    #
    #      # Restrict access to VM metadata. Useful in cases when untrusted containers are running under a service
    #      # account not owned by the submitting user
    #      restrict-metadata-access = false
    #    }
    #
    #    filesystems {
    #      gcs {
    #        # A reference to a potentially different auth for manipulating files via engine functions.
    #        auth = "application-default"
    #
    #        caching {
    #          # When a cache hit is found, the following duplication strategy will be followed to use the cached outputs
    #          # Possible values: "copy", "reference". Defaults to "copy"
    #          # "copy": Copy the output files
    #          # "reference": DO NOT copy the output files but point to the original output files instead.
    #          #              Will still make sure than all the original output files exist and are accessible before
    #          #              going forward with the cache hit.
    #          duplication-strategy = "copy"
    #        }
    #      }
    #    }
    #
    #    default-runtime-attributes {
    #      cpu: 1
    #      failOnStderr: false
    #      continueOnReturnCode: 0
    #      memory: "2 GB"
    #      bootDiskSizeGb: 10
    #      # Allowed to be a String, or a list of Strings
    #      disks: "local-disk 10 SSD"
    #      noAddress: false
    #      preemptible: 0
    #      zones: ["us-central1-a", "us-central1-b"]
    #    }
    #  }
    #}

    #AWS {
    #  actor-factory = "cromwell.backend.impl.aws.AwsBackendActorFactory"
    #  config {
    #    ## These two settings are required to authenticate with the ECS service:
    #    accessKeyId = "..."
    #    secretKey = "..."
    #  }
    #}

  }
}

services {
  MetadataService {
    config {
      # Set this value to "Inf" to turn off metadata summary refresh.  The default value is currently "2 seconds".
      # metadata-summary-refresh-interval = "Inf"
      # For higher scale environments, e.g. many workflows and/or jobs, DB write performance for metadata events
      # can improved by writing to the database in batches. Increasing this value can dramatically improve overall
      # performance but will both lead to a higher memory usage as well as increase the risk that metadata events
      # might not have been persisted in the event of a Cromwell crash.
      #
      # For normal usage the default value of 200 should be fine but for larger/production environments we recommend a
      # value of at least 500. There'll be no one size fits all number here so we recommend benchmarking performance and
      # tuning the value to match your environment.
      # db-batch-size = 200
      #
      # Periodically the stored metadata events will be forcibly written to the DB regardless of if the batch size
      # has been reached. This is to prevent situations where events wind up never being written to an incomplete batch
      # with no new events being generated. The default value is currently 5 seconds
      # db-flush-rate = 5 seconds
    }
  }
}

database {
  # mysql example
  #driver = "slick.driver.MySQLDriver$"
  #db {
  #  driver = "com.mysql.jdbc.Driver"
  #  url = "jdbc:mysql://host/cromwell?rewriteBatchedStatements=true"
  #  user = "user"
  #  password = "pass"
  #  connectionTimeout = 5000
  #}

  # For batch inserts the number of inserts to send to the DB at a time
  # insert-batch-size = 2000

  migration {
    # For databases with a very large number of symbols, selecting all the rows at once can generate a variety of
    # problems. In order to avoid any issue, the selection is paginated. This value sets how many rows should be
    # retrieved and processed at a time, before asking for the next chunk.
    #read-batch-size = 100000

    # Because a symbol row can contain any arbitrary wdl value, the amount of metadata rows to insert from a single
    # symbol row can vary from 1 to several thousands (or more). To keep the size of the insert batch from growing out
    # of control we monitor its size and execute/commit when it reaches or exceeds writeBatchSize.
    #write-batch-size = 100000
  }
}