Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Bug fix for regression test #496

Merged
merged 9 commits into from
Jun 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .amlignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
.pytest_cache
.mypy_cache
__pycache__
azure-pipelines
.github
datasets
modelweights
outputs
Expand Down
2 changes: 1 addition & 1 deletion .idea/runConfigurations/Template__Run_ML_on_AzureML.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ created.
jobs that run in AzureML.

### Changed
- ([#496](https://github.com/microsoft/InnerEye-DeepLearning/pull/496)) All plots are now saved as PNG, rather than JPG.

### Fixed

Expand Down
2 changes: 1 addition & 1 deletion InnerEye/Common/Statistics/wilcoxon_signed_rank_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def main() -> None:
for line in lines:
print(line)
for basename, fig in plots.items():
fig.savefig(f"{basename}.jpg")
fig.savefig(f"{basename}.png")


if __name__ == "__main__":
Expand Down
68 changes: 33 additions & 35 deletions InnerEye/ML/baselines_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@
from InnerEye.ML.visualizers.metrics_scatterplot import write_to_scatterplot_directory
from InnerEye.ML.visualizers.plot_cross_validation import convert_rows_for_comparisons, may_write_lines_to_file

REGRESSION_TEST_OUTPUT_FOLDER = "OUTPUT"
REGRESSION_TEST_AZUREML_FOLDER = "AZUREML_OUTPUT"
REGRESSION_TEST_AZUREML_PARENT_FOLDER = "AZUREML_PARENT_OUTPUT"
CONTENTS_MISMATCH = "Contents mismatch"
MISSING_FILE = "Missing file"
MISSING_FILE = "Missing"
TEXT_FILE_SUFFIXES = [".txt", ".csv", ".json", ".html", ".md"]


Expand Down Expand Up @@ -222,56 +223,46 @@ def print_lines(prefix: str, lines: List[str]) -> None:

def compare_folder_contents(expected_folder: Path,
actual_folder: Optional[Path] = None,
run: Optional[Run] = None) -> None:
run: Optional[Run] = None) -> List[str]:
"""
Compares a set of files in a folder, against files in either the other folder or files stored in the given
AzureML run. Each file that is present in the "expected" folder must be also present in the "actual" folder
(or the AzureML run), with exactly the same contents, in the same folder structure.
For example, if there is a file "<expected>/foo/bar/contents.txt", then there must also be a file
"<actual>/foo/bar/contents.txt"
If a file is missing, or does not have the expected contents, an exception is raised.
:param expected_folder: A folder with files that are expected to be present.
:param actual_folder: The output folder with the actually produced files.
:param run: An AzureML run
:return: A list of human readable error messages, with message and file path. If no errors are found, the list is
empty.
"""
logging.debug(f"Checking job output against expected files in folder {expected_folder}")
logging.debug(f"Current working directory: {Path.cwd()}")
messages = []
if not expected_folder.is_dir():
raise ValueError(f"Folder with expected files does not exist: {expected_folder}")
if run and is_offline_run_context(run):
logging.warning("Skipping file comparison because the given run context is an AzureML offline run.")
return
return []
files_in_run: List[str] = run.get_file_names() if run else []
temp_folder = Path(tempfile.mkdtemp()) if run else None
for file in expected_folder.rglob("*"):
# rglob also returns folders, skip those
if file.is_dir():
continue
logging.debug(f"Checking file {file}")
# All files stored in AzureML runs use Linux-style path
file_relative = file.relative_to(expected_folder).as_posix()
if str(file_relative).startswith(REGRESSION_TEST_AZUREML_FOLDER) or \
str(file_relative).startswith(REGRESSION_TEST_AZUREML_PARENT_FOLDER):
continue
actual_file: Optional[Path] = None
if actual_folder:
actual_file = actual_folder / file_relative
if not actual_file.is_file():
actual_file = None
elif temp_folder is not None and run is not None:
actual_file = temp_folder / file_relative
if file_relative in files_in_run:
actual_file = temp_folder / file_relative
run.download_file(name=str(file_relative), output_file_path=str(actual_file))
message = compare_files(expected=file, actual=actual_file) if actual_file else "Missing file"
else:
raise ValueError("One of the two arguments run, actual_folder must be provided.")
message = compare_files(expected=file, actual=actual_file) if actual_file.exists() else MISSING_FILE
if message:
logging.debug(f"Error: {message}")
messages.append(f"{message}: {file_relative}")
logging.info(f"File {file_relative}: {message or 'OK'}")
if temp_folder:
shutil.rmtree(temp_folder)
if messages:
raise ValueError(f"Some expected files were missing or did not have the expected contents:{os.linesep}"
f"{os.linesep.join(messages)}")
return messages


def compare_folders_and_run_outputs(expected: Path, actual: Path) -> None:
Expand All @@ -286,17 +277,24 @@ def compare_folders_and_run_outputs(expected: Path, actual: Path) -> None:
"""
if not expected.is_dir():
raise ValueError(f"Folder with expected files does not exist: {expected}")
# First compare the normal output files that the run produces
compare_folder_contents(expected, actual)
# Compare the set of files in the magic folder with the outputs stored in the run context
azureml_folder = expected / REGRESSION_TEST_AZUREML_FOLDER
if azureml_folder.is_dir():
compare_folder_contents(azureml_folder, run=RUN_CONTEXT)
# Compare the set of files in the magic folder with the outputs stored in the run context of the parent run
azureml_parent_folder = expected / REGRESSION_TEST_AZUREML_PARENT_FOLDER
if azureml_parent_folder.is_dir():
if PARENT_RUN_CONTEXT is None:
raise ValueError(f"The set of expected test results in {expected} contains a folder "
f"{REGRESSION_TEST_AZUREML_PARENT_FOLDER}, but the present run is not a cross-validation "
"child run")
compare_folder_contents(azureml_parent_folder, run=PARENT_RUN_CONTEXT)
logging.debug(f"Current working directory: {Path.cwd()}")
messages = []
for (subfolder, message_prefix, actual_folder, run_to_compare) in \
[(REGRESSION_TEST_OUTPUT_FOLDER, "run output files", actual, None),
(REGRESSION_TEST_AZUREML_FOLDER, "AzureML outputs in present run", None, RUN_CONTEXT),
(REGRESSION_TEST_AZUREML_PARENT_FOLDER, "AzureML outputs in parent run", None, PARENT_RUN_CONTEXT)]:
folder = expected / subfolder
if folder.is_dir():
logging.info(f"Comparing results in {folder} against {message_prefix}:")
if actual_folder is None and run_to_compare is None:
raise ValueError(f"The set of expected test results in {expected} contains a folder "
f"{subfolder}, but there is no (parent) run to compare against.")
new_messages = compare_folder_contents(folder, actual_folder=actual_folder, run=run_to_compare)
if new_messages:
messages.append(f"Issues in {message_prefix}:")
messages.extend(new_messages)
else:
logging.info(f"Folder {subfolder} not found, skipping comparison against {message_prefix}.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it really ok to skip here?

if messages:
raise ValueError(f"Some expected files were missing or did not have the expected contents:{os.linesep}"
f"{os.linesep.join(messages)}")
10 changes: 6 additions & 4 deletions InnerEye/ML/run_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
change_working_directory, get_best_epoch_results_path, is_windows, logging_section, logging_to_file, \
print_exception, remove_file_or_directory
from InnerEye.Common.fixed_paths import INNEREYE_PACKAGE_NAME, LOG_FILE_NAME, PYTHON_ENVIRONMENT_NAME
from InnerEye.ML.baselines_util import compare_folder_contents
from InnerEye.ML.baselines_util import compare_folders_and_run_outputs
from InnerEye.ML.common import ModelExecutionMode
from InnerEye.ML.config import SegmentationModelBase
from InnerEye.ML.deep_learning_config import CHECKPOINT_FOLDER, DeepLearningConfig, FINAL_ENSEMBLE_MODEL_FOLDER, \
Expand Down Expand Up @@ -411,8 +411,8 @@ def run(self) -> None:
# run context.
logging.info("Comparing the current results against stored results")
if self.is_normal_run_or_crossval_child_0():
compare_folder_contents(expected_folder=self.container.regression_test_folder,
actual_folder=self.container.outputs_folder)
compare_folders_and_run_outputs(expected=self.container.regression_test_folder,
actual=self.container.outputs_folder)
else:
logging.info("Skipping because this is not cross-validation child run 0.")

Expand Down Expand Up @@ -712,9 +712,11 @@ def copy_file(source: Path, destination_file: str) -> None:
else:
raise ValueError(f"Expected an absolute path to a checkpoint file, but got: {checkpoint}")
model_folder.mkdir(parents=True, exist_ok=True)
# For reproducibility of the files used in regression tests, checkpoint paths need to be sorted.
checkpoints_sorted = sorted(relative_checkpoint_paths)
model_inference_config = ModelInferenceConfig(model_name=self.container.model_name,
model_configs_namespace=self.config_namespace,
checkpoint_paths=relative_checkpoint_paths)
checkpoint_paths=checkpoints_sorted)
# Inference configuration must live in the root folder of the registered model
full_path_to_config = model_folder / fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME
full_path_to_config.write_text(model_inference_config.to_json(), encoding='utf-8') # type: ignore
Expand Down
4 changes: 2 additions & 2 deletions InnerEye/ML/visualizers/metrics_scatterplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def to_dict(data: pd.DataFrame) -> Dict[str, Dict[str, float]]:

def write_to_scatterplot_directory(root_folder: Path, plots: Dict[str, plt.Figure]) -> None:
"""
Writes a file root_folder/scatterplots/basename.jpg for every plot in plots with key "basename".
Writes a file root_folder/scatterplots/basename.png for every plot in plots with key "basename".
:param root_folder: path to a folder
:param plots: dictionary from plot basenames to plots (plt.Figure objects)
"""
Expand All @@ -217,7 +217,7 @@ def write_to_scatterplot_directory(root_folder: Path, plots: Dict[str, plt.Figur
scatterplot_dir.mkdir(parents=True, exist_ok=True)
logging.info(f"There are {len(plots)} plots to write to {scatterplot_dir}")
for basename, fig in plots.items():
fig.savefig(scatterplot_dir / f"{basename}.jpg")
fig.savefig(scatterplot_dir / f"{basename}.png")


def main() -> None:
Expand Down
2 changes: 1 addition & 1 deletion InnerEye/ML/visualizers/plot_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ def plot_metrics(config: PlotCrossValidationConfig,

# save plot
suffix = f"_{sub_df_index}" if len(df_list) > 1 else ""
plot_dst = root / f"{metric_type}_{mode.value}_splits{suffix}.jpg"
plot_dst = root / f"{metric_type}_{mode.value}_splits{suffix}.png"
fig.savefig(plot_dst, bbox_inches='tight')
logging.info("Saved box-plots to: {}".format(plot_dst))

Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"model_name": "BasicModel2Epochs", "checkpoint_paths": ["checkpoints/best_checkpoint.ckpt", "checkpoints/OTHER_RUNS/1/best_checkpoint.ckpt"], "model_configs_namespace": "InnerEye.ML.configs.segmentation.BasicModel2Epochs"}
{"model_name": "BasicModel2Epochs", "checkpoint_paths": ["checkpoints/OTHER_RUNS/1/best_checkpoint.ckpt", "checkpoints/best_checkpoint.ckpt"], "model_configs_namespace": "InnerEye.ML.configs.segmentation.BasicModel2Epochs"}
Loading