-
Notifications
You must be signed in to change notification settings - Fork 80
/
mlcube.yaml
106 lines (92 loc) · 4.75 KB
/
mlcube.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
## This YAML file contains MLCube configuration.
## The `gandlf deploy` tool looks for this file to generate your embedded-model MLCube image.
## If you are a model author, this file (or the derivative generated by `gandlf deploy`)
## can be distributed along with your model container to enable use as an MLCube.
## See the MLCube specifications (ex: https://mlcommons.github.io/mlcube/runners/) for additional options.
# Metadata. If you are a model author, change this to reflect your model and organization.
name: MLCommons GaNDLF Generic MLCube
description: MLCommons GaNDLF MLCube, containing functionality for model training, inference and data construction.
authors:
- {name: "MLCommons Medical Working Group", email: "[email protected]", org: "MLCommons" }
## Specifies hardware and software requirements.
platform:
# Change this to some value >0 to use GPU(s) when running
accelerator_count: 0
# Some other sample platform requirement parameters
# accelerator_maker: NVIDIA
# accelerator_model: A100-80GB
# host_memory_gb: 40
# need_internet_access: True
# host_disk_space_gb: 100
docker:
# The image tag that will be built/pulled/used. Change to suit your organization/model:version.
image: mlcommons/gandlf:0.0.1
## Generally, these build options will only be needed by GaNDLF maintainers.
# Docker build context relative to $MLCUBE_ROOT. (gandlf_deploy can handle this automatically.)
build_context: "../"
# Docker file name within docker build context. Any "Dockerfile-*" in the GaNDLF source repo is valid.
build_file: "Dockerfile-CUDA11.8"
# These settings should be set by global MLCube configuration, generally not per-deployment.
# However, some sane defaults (for Docker >19.03) are here:
#env_args:
# CUDA_VISIBLE_DEVICES: "${oc.env:CUDA_VISIBLE_DEVICES}"
#gpu_args: --gpus all
singularity:
# Image name. Change to suit your organization/model/version
image: mlcommons-gandlf-0.0.1.simg
## Everything below this point affects how the GaNDLF container is invoked.
## If you are a model author, it is strongly recommended that you do not edit these.
## Please request any new features for deployed containers from the GaNDLF maintainers:
## https://github.com/mlcommons/GaNDLF/issues/new?template=---feature-request.md
tasks:
train:
# Trains a new model, creating a model directory, or resumes training on an existing model.
entrypoint: "gandlf run --train --device cpu"
parameters:
inputs: {
# Path to a data csv such as that constructed by the "construct_csv" task.
input-data: {type: "file", default: "data.csv"},
# Path to a GaNDLF config file. See samples for more examples.
config: {type: "file", default: "config.yml"},
}
outputs: {
# Path to a model directory. Not used if deploying an embedded model.
model-dir: {type: "directory", default: "model/"},
}
infer:
# Runs inference on some existing model given new data
entrypoint: "gandlf run --infer --device cpu"
parameters:
inputs: {
# Path to a data csv such as that constructed by the "construct_csv" task.
input-data: {type: "file", default: "data.csv"},
# Path to a GaNDLF config file. See samples for more examples.
# Currently disabled -- inference defaults to using the model's config.
#config: {type: "file", default: "config.yml"},
# Path to a model directory. Not used if deploying an embedded model.
model-dir: {type: "directory", default: "model/"},
#device: {type: "str", default: "cpu"},
#config: {type: file, default: parameters.yaml}
}
outputs: {output-path: {type: "directory", default: "inference_results"}}
construct_csv:
# Constructs a data csv from a data directory that can be passed to future steps, to prevent issues with path translation between host and container.
entrypoint: "gandlf construct-csv --relativize-paths"
parameters:
inputs: {
# Do NOT change the position of the inputDir parameter! It is relevant due to MLCube mounting rules.
# Path to a directory containing input data. Each subject should be a subdirectory, with consistent filenaming conventions.
input-dir: {type: "directory", default: "data/"},
# Path to a file containing identifying strings for each channel (and label, if performing segmentation).
channels-id: {type: "file", default: "channelIDs.yml"},
}
outputs: {
output-file: {type: "file", default: "data.csv"}
}
recover_config:
# Extracts the config file from the embedded model (if any) in the MLCube.
entrypoint: "gandlf recover-config --mlcube"
parameters:
outputs: {
output-file: {type: "file", default: "recovered_config.yml"},
}