mlcube/model_mlcube/mlcube.yaml

## This YAML file contains MLCube configuration.
## The `gandlf deploy` tool looks for this file to generate your embedded-model MLCube image.
## If you are a model author, this file (or the derivative generated by `gandlf deploy`)
## can be distributed along with your model container to enable use as an MLCube.
## See the MLCube specifications (ex: https://mlcommons.github.io/mlcube/runners/) for additional options.


# Metadata. If you are a model author, change this to reflect your model and organization.
name: MLCommons GaNDLF Generic MLCube
description: MLCommons GaNDLF MLCube, containing functionality for model training, inference and data construction.
authors: 
 - {name: "MLCommons Medical Working Group", email: "gandlf@mlcommons.org", org: "MLCommons" }

## Specifies hardware and software requirements.
platform:
  # Change this to some value >0 to use GPU(s) when running
  accelerator_count: 0
  # Some other sample platform requirement parameters
  #  accelerator_maker: NVIDIA
  #  accelerator_model: A100-80GB
  #  host_memory_gb: 40
  #  need_internet_access: True
  #  host_disk_space_gb: 100

docker:
  # The image tag that will be built/pulled/used. Change to suit your organization/model:version.
  image: mlcommons/gandlf:0.0.1
  
  ## Generally, these build options will only be needed by GaNDLF maintainers.
  # Docker build context relative to $MLCUBE_ROOT. (gandlf_deploy can handle this automatically.)
  build_context: "../"
  # Docker file name within docker build context. Any "Dockerfile-*" in the GaNDLF source repo is valid.
  build_file: "Dockerfile-CUDA11.8"
  
  # These settings should be set by global MLCube configuration, generally not per-deployment.
  # However, some sane defaults (for Docker >19.03) are here:
  #env_args:
  #  CUDA_VISIBLE_DEVICES: "${oc.env:CUDA_VISIBLE_DEVICES}"
  #gpu_args: --gpus all
  

singularity:
  # Image name. Change to suit your organization/model/version
  image: mlcommons-gandlf-0.0.1.simg

## Everything below this point affects how the GaNDLF container is invoked.
## If you are a model author, it is strongly recommended that you do not edit these.
## Please request any new features for deployed containers from the GaNDLF maintainers:
## https://github.com/mlcommons/GaNDLF/issues/new?template=---feature-request.md

tasks:
  train:
  # Trains a new model, creating a model directory, or resumes training on an existing model.
    entrypoint: "gandlf run --train --device cpu"
    parameters:
      inputs: {
        # Path to a data csv such as that constructed by the "construct_csv" task.
        input-data: {type: "file", default: "data.csv"},
        # Path to a GaNDLF config file. See samples for more examples.
        config: {type: "file", default: "config.yml"},
      }
      outputs: {
        # Path to a model directory. Not used if deploying an embedded model.
        model-dir: {type: "directory", default: "model/"},
      }
      
      
  infer:
  # Runs inference on some existing model given new data
    entrypoint: "gandlf run --infer --device cpu"
    parameters:
      inputs: {
        # Path to a data csv such as that constructed by the "construct_csv" task.
        input-data: {type: "file", default: "data.csv"},
        # Path to a GaNDLF config file. See samples for more examples.
        # Currently disabled -- inference defaults to using the model's config.
        #config: {type: "file", default: "config.yml"},
        # Path to a model directory. Not used if deploying an embedded model.
        model-dir: {type: "directory", default: "model/"},
        #device: {type: "str", default: "cpu"},
        #config: {type: file, default: parameters.yaml}
      }
      outputs: {output-path: {type: "directory", default: "inference_results"}}

  construct_csv:
  # Constructs a data csv from a data directory that can be passed to future steps, to prevent issues with path translation between host and container.
    entrypoint: "gandlf construct-csv --relativize-paths"
    parameters:
      inputs: {
        # Do NOT change the position of the inputDir parameter! It is relevant due to MLCube mounting rules.
        # Path to a directory containing input data. Each subject should be a subdirectory, with consistent filenaming conventions.
        input-dir: {type: "directory", default: "data/"},
        # Path to a file containing identifying strings for each channel (and label, if performing segmentation).
        channels-id: {type: "file", default: "channelIDs.yml"},
      }
      outputs: {
        output-file: {type: "file", default: "data.csv"}
        }

  recover_config:
  # Extracts the config file from the embedded model (if any) in the MLCube.
    entrypoint: "gandlf recover-config --mlcube"
    parameters: 
      outputs: {
        output-file: {type: "file", default: "recovered_config.yml"},
      }