Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MLCubes CI #942

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions GANDLF/cli/generate_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import json
from typing import Optional
from pprint import pprint
from pathlib import Path

import pandas as pd
from tqdm import tqdm
import torch
Expand Down Expand Up @@ -174,6 +176,23 @@
elif problem_type == "segmentation":
# read images and then calculate metrics
class_list = parameters["model"]["class_list"]
# check if the paths are relative or absolute, and convert them to absolute paths
cwd = Path(__file__).resolve().parent
for column in input_df.columns:
loc = input_df.columns.get_loc(column)
if (loc == "Target") or (loc == "Prediction"):
# These entries can be considered as paths to files
for index, entry in enumerate(input_df[column]):
if isinstance(entry, str):
this_path = Path(entry)
start_path = Path(cwd)
if start_path.is_file():
start_path = start_path.parent
if not this_path.is_file():
if not this_path.is_absolute():
input_df.loc[index, column] = str(

Check warning on line 193 in GANDLF/cli/generate_metrics.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/cli/generate_metrics.py#L185-L193

Added lines #L185 - L193 were not covered by tests
start_path.joinpath(this_path)
)
for _, row in tqdm(input_df.iterrows(), total=input_df.shape[0]):
current_subject_id = row["SubjectID"]
overall_stats_dict[current_subject_id] = {}
Expand Down
2 changes: 2 additions & 0 deletions mlcube/ci/metrics_mlcube/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
!build.sh
!setup.sh
17 changes: 17 additions & 0 deletions mlcube/ci/metrics_mlcube/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Build:

```
sh build.sh
```

Setup assets to test mlcubes:

```
sh setup.sh
```

Test: (modify `test.sh` to change the test)

```
sh test.sh
```
2 changes: 2 additions & 0 deletions mlcube/ci/metrics_mlcube/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
docker build -t gandlfcpu -f ../../../Dockerfile-CPU ../../..
mlcube configure --mlcube ./mlcube -Pdocker.build_strategy=always
32 changes: 32 additions & 0 deletions mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Metadata. If you are an author, change this to reflect your organization and metrics specifics.
name: MLCommons GaNDLF Generic MLCube
description: MLCommons GaNDLF MLCube, containing functionality for calculating metrics.
authors:
- {name: "MLCommons Medical Working Group", email: "[email protected]", org: "MLCommons" }


docker:
# The image tag that will be built/pulled/used. Change to suit your organization/model:version.
image: mlcommons/gandlf-metrics-mlcube:0.0.1

build_context: "../project"
# Docker file name within docker build context, default is `Dockerfile`.
build_file: "Dockerfile"

## Everything below this point affects how the GaNDLF container is invoked.
## If you are an author, it is strongly recommended that you do not edit these.
## Please request any new features for deployed containers from the GaNDLF maintainers:
## https://github.com/mlcommons/GaNDLF/issues/new?template=---feature-request.md

tasks:
evaluate:
# Runs metrics calculation on predictions
entrypoint: "python3.9 /entrypoint.py"
parameters:
inputs: {
predictions: predictions/,
labels: labels/,
# GaNDLF config file. The name should be `parameters.yaml`
config: {type: file, default: parameters.yaml}
}
outputs: { output-file: { type: "file", default: "results.yaml" } }
4 changes: 4 additions & 0 deletions mlcube/ci/metrics_mlcube/project/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM gandlfcpu

COPY entrypoint.py /entrypoint.py

52 changes: 52 additions & 0 deletions mlcube/ci/metrics_mlcube/project/entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import os
import argparse
import json
import yaml


def convert_json_to_yaml(tmp_json_output, output_file):
with open(tmp_json_output) as f:
results = json.load(f)
with open(output_file, "w") as f:
yaml.dump(results, f)
os.remove(tmp_json_output)


def run_gandlf(predictions, labels, output_file, config):
"""
A function that calls GaNDLF's generate metrics command.

Args:
predictions (str): The path to predictions folder. It must contain a "predictions.csv" file
labels (str): The path to labels folder. It must contain a "targets.csv" file.
output_file (str): The path to the output file/folder
config (str): The path to the parameters file

Note: If predictions and labels CSVs contain paths,
those paths should be relative to the containing folder.
"""
predictions_csv = os.path.join(predictions, "predictions.csv")
labels_csv = os.path.join(labels, "targets.csv")

output_folder = os.path.dirname(output_file)
tmp_json_output = os.path.join(output_folder, "results.json")

exit_status = os.system(
f"gandlf generate-metrics -c {config} -i {labels_csv},{predictions_csv} -o {tmp_json_output}"
)
exit_code = os.WEXITSTATUS(exit_status)
if exit_code != 0:
raise RuntimeError(f"GaNDLF process failed with exit code {exit_code}")
convert_json_to_yaml(tmp_json_output, output_file)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--config", metavar="", type=str, required=True)
parser.add_argument("--predictions", metavar="", type=str, required=True)
parser.add_argument("--output-file", metavar="", type=str, default=None)
parser.add_argument("--labels", metavar="", type=str, required=True)

args = parser.parse_args()

run_gandlf(args.predictions, args.labels, args.output_file, args.config)
3 changes: 3 additions & 0 deletions mlcube/ci/metrics_mlcube/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
wget https://storage.googleapis.com/medperf-storage/gandlf_mlcube_test.tar.gz
tar -xf gandlf_mlcube_test.tar.gz
rm gandlf_mlcube_test.tar.gz
15 changes: 15 additions & 0 deletions mlcube/ci/metrics_mlcube/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# classification
rm ./mlcube/workspace/results.yaml
mlcube run --mlcube ./mlcube \
--task evaluate \
predictions=../../test_classification/predictions \
labels=../../test_classification/labels \
config=../../test_classification/config.yaml

# # segmentation (FAILS BECAUSE OF RELATIVE PATHS)
# rm ./mlcube/workspace/results.yaml
# mlcube run --mlcube ./mlcube \
# --task evaluate \
# predictions=../../test_segmentation/predictions \
# labels=../../test_segmentation/labels \
# config=../../test_segmentation/config.yaml
Loading