Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge plot ids across dvc.yaml files #9898

Merged
merged 14 commits into from
Sep 14, 2023
Merged
13 changes: 4 additions & 9 deletions dvc/render/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from funcy import get_in, last

from dvc.repo.plots import _normpath, infer_data_sources
from dvc.utils.plots import get_plot_id
from dvc.utils.plots import group_definitions_by_id

from .convert import _get_converter

Expand Down Expand Up @@ -36,14 +36,9 @@ def __init__(self, data: Dict):
def group_definitions(self):
groups = defaultdict(list)
for rev, rev_content in self.data.items():
for config_file, config_file_content in (
rev_content.get("definitions", {}).get("data", {}).items()
):
for plot_id, plot_definition in config_file_content.get(
"data", {}
).items():
full_id = get_plot_id(plot_id, config_file)
groups[full_id].append((rev, plot_id, plot_definition))
definitions = rev_content.get("definitions", {}).get("data", {})
for plot_id, definition in group_definitions_by_id(definitions).items():
groups[plot_id].append((rev, *definition))
return dict(groups)

def get_definition_data(self, target_files, rev):
Expand Down
32 changes: 32 additions & 0 deletions dvc/utils/plots.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,36 @@
from collections import defaultdict
from typing import Dict, Tuple


def get_plot_id(config_plot_id: str, config_file_path: str = ""):
return (
f"{config_file_path}::{config_plot_id}" if config_file_path else config_plot_id
)


def group_definitions_by_id(
definitions: Dict[str, Dict]
) -> Dict[str, Tuple[str, Dict]]:
"""
Format ID and extracts plot_definition for each plot.

Arguments:
definitions: dict of {config_file: config_file_content}.

Returns:
Dict of {plot_id: (original_plot_id, plot_definition)}.
"""
groups_by_config: Dict = defaultdict(dict)
groups_by_id: Dict = {}
for config_file, config_file_content in definitions.items():
for plot_id, plot_definition in config_file_content.get("data", {}).items():
groups_by_config[plot_id][config_file] = (plot_id, plot_definition)
# only keep config_file if the same plot_id is in multiple config_files
for plot_id, configs in groups_by_config.items():
if len(configs) == 1:
groups_by_id[plot_id] = next(iter(configs.values()))
else:
for config_file, content in configs.items():
full_id = get_plot_id(plot_id, config_file)
groups_by_id[full_id] = content
return groups_by_id
2 changes: 1 addition & 1 deletion tests/func/plots/test_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,5 +426,5 @@ def test_show_plots_defined_with_native_os_path(tmp_dir, dvc, scm, capsys):
assert "errors" not in json_out

json_data = json_out["data"]
assert json_data[f"dvc.yaml::{top_level_plot}"]
assert json_data[f"{top_level_plot}"]
assert json_data[stage_plot]
55 changes: 55 additions & 0 deletions tests/integration/plots/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,58 @@ def make():
}

return make


@pytest.fixture
def repo_with_dvclive_plots(tmp_dir, scm, dvc, run_copy_metrics):
def make():
metrics = [
{"step": 1, "metric": 0.1},
{"step": 2, "metric": 0.2},
{"step": 3, "metric": 0.3},
]

metrics_path = tmp_dir / "dvclive" / "plots" / "metrics" / "metric.tsv"
metrics_path.parent.mkdir(parents=True)
metrics_path.dump_json(metrics)

plots_config_v1 = [
{
"plots/metrics": {
"x": "step",
}
}
]

from dvc.utils.serialize import modify_yaml

dvcyaml_path_v1 = tmp_dir / "dvclive" / "dvc.yaml"
with modify_yaml(dvcyaml_path_v1) as dvcfile_content:
dvcfile_content["plots"] = plots_config_v1
scm.add([metrics_path, dvcyaml_path_v1])
scm.commit("add dvclive 2.x plots")

yield {
"data": {metrics_path: metrics},
"configs": {dvcyaml_path_v1: plots_config_v1},
}

plots_config_v2 = [
{
"dvclive/plots/metrics": {
"x": "step",
}
}
]

dvcyaml_path_v1.unlink()
dvcyaml_path_v2 = tmp_dir / "dvc.yaml"
with modify_yaml(dvcyaml_path_v2) as dvcfile_content:
dvcfile_content["plots"] = plots_config_v2

yield {
"data": {metrics_path: metrics},
"configs": {dvcyaml_path_v2: plots_config_v2},
}

return make
21 changes: 18 additions & 3 deletions tests/integration/plots/test_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,8 +440,8 @@ def test_repo_with_config_plots(tmp_dir, capsys, repo_with_config_plots):
html_result = extract_vega_specs(
html_path,
[
"dvc.yaml::linear_train_vs_test",
"dvc.yaml::confusion_train_vs_test",
"linear_train_vs_test",
"confusion_train_vs_test",
],
)
ble = _update_datapoints(
Expand All @@ -456,5 +456,20 @@ def test_repo_with_config_plots(tmp_dir, capsys, repo_with_config_plots):
},
)

assert html_result["dvc.yaml::linear_train_vs_test"]["data"]["values"] == ble
assert html_result["linear_train_vs_test"]["data"]["values"] == ble
# TODO check json results once vscode is able to handle flexible plots


@pytest.mark.vscode
def test_repo_with_dvclive_plots(tmp_dir, capsys, repo_with_dvclive_plots):
next(repo_with_dvclive_plots())

for s in ("show", "diff"):
_, json_result, split_json_result = call(capsys, subcommand=s)
expected_result = {
"data": {
"dvclive/plots/metrics/metric.tsv": [],
},
}
assert json_result == expected_result
assert split_json_result == expected_result
158 changes: 122 additions & 36 deletions tests/unit/render/test_match.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,121 @@
import pytest
from funcy import set_in

from dvc.render import VERSION_FIELD
from dvc.render.converter.vega import VegaConverter
from dvc.render.match import PlotsData, _squash_plots_properties, match_defs_renderers


def test_group_definitions():
error = FileNotFoundError()
data = {
"v1": {
"definitions": {
"data": {
"config_file_1": {"data": {"plot_id_1": {}, "plot_id_2": {}}},
"config_file_2": {"data": {"plot_id_3": {}}},
@pytest.mark.parametrize(
"data,expected",
[
pytest.param(
{
"v1": {
"definitions": {
"data": {"config_file_1": {"data": {"plot_id_1": {}}}}
}
}
}
},
"v2": {
"definitions": {
"data": {
"config_file_1": {"error": error},
"config_file_2": {"data": {"plot_id_3": {}}},
},
{"plot_id_1": [("v1", "plot_id_1", {})]},
id="simple",
),
pytest.param(
{
"v1": {
"definitions": {
"data": {
"config_file_1": {"data": {"plot_id_1": {}}},
"config_file_2": {"data": {"plot_id_1": {}}},
}
}
}
}
},
}

},
{
"config_file_1::plot_id_1": [("v1", "plot_id_1", {})],
"config_file_2::plot_id_1": [("v1", "plot_id_1", {})],
},
id="multi_config",
),
pytest.param(
{
"v1": {
"definitions": {
"data": {"config_file_1": {"data": {"plot_id_1": {}}}}
}
},
"v2": {
"definitions": {
"data": {"config_file_2": {"data": {"plot_id_1": {}}}}
}
},
},
{"plot_id_1": [("v1", "plot_id_1", {}), ("v2", "plot_id_1", {})]},
id="multi_rev",
),
pytest.param(
{
"v1": {
"definitions": {
"data": {
"config_file_1": {"data": {"plot_id_1": {}}},
"config_file_2": {"data": {"plot_id_1": {}}},
}
}
},
"v2": {
"definitions": {
"data": {"config_file_1": {"data": {"plot_id_1": {}}}}
}
},
},
{
"config_file_1::plot_id_1": [("v1", "plot_id_1", {})],
"config_file_2::plot_id_1": [("v1", "plot_id_1", {})],
"plot_id_1": [("v2", "plot_id_1", {})],
},
id="multi_rev_multi_config",
),
pytest.param(
{
"v1": {
"definitions": {
"data": {
"config_file_1": {
"data": {"plot_id_1": {}, "plot_id_2": {}}
},
"config_file_2": {"data": {"plot_id_3": {}}},
}
}
},
"v2": {
"definitions": {
"data": {
"config_file_2": {"data": {"plot_id_3": {}}},
}
},
"source": {
"data": {
"config_file_1": {"error": FileNotFoundError()},
}
},
},
},
{
"plot_id_1": [("v1", "plot_id_1", {})],
"plot_id_2": [("v1", "plot_id_2", {})],
"plot_id_3": [
("v1", "plot_id_3", {}),
("v2", "plot_id_3", {}),
],
},
id="all",
),
],
)
def test_group_definitions(data, expected):
grouped = PlotsData(data).group_definitions()

assert grouped == {
"config_file_1::plot_id_1": [("v1", "plot_id_1", {})],
"config_file_1::plot_id_2": [("v1", "plot_id_2", {})],
"config_file_2::plot_id_3": [
("v1", "plot_id_3", {}),
("v2", "plot_id_3", {}),
],
}
assert grouped == expected


def test_match_renderers(M):
Expand All @@ -57,12 +137,6 @@ def test_match_renderers(M):
"data": {"file.json": {"data": [{"x": 1, "y": 1}, {"x": 2, "y": 2}]}}
},
},
"errored_revision": {
"definitions": {
"data": {"config_file_1": {"error": FileNotFoundError()}},
},
"sources": {},
},
"revision_with_no_data": {
"definitions": {
"data": {
Expand Down Expand Up @@ -103,7 +177,7 @@ def test_match_renderers(M):
},
]
assert renderer.properties == {
"title": "config_file_1::plot_id_1",
"title": "plot_id_1",
"x": "x",
"y": "y",
"x_label": "x",
Expand All @@ -129,7 +203,7 @@ def test_flat_datapoints_errors_are_caught(M, mocker):
assert renderer_with_errors.definition_errors == {"v1": M.instance_of(ValueError)}


def test_squash_plots_properties():
def test_squash_plots_properties_revs():
group = [
("v3", "config_file", "plot_id", {"foo": 1}),
("v2", "config_file", "plot_id", {"foo": 2, "bar": 2}),
Expand All @@ -139,3 +213,15 @@ def test_squash_plots_properties():
plot_properties = _squash_plots_properties(group)

assert plot_properties == {"foo": 1, "bar": 2, "baz": 3}


def test_squash_plots_properties_config_files():
group = [
("v1", "config_file1", "plot_id", {"foo": 1}),
("v1", "config_file2", "plot_id", {"foo": 2, "bar": 2}),
("v1", "config_file3", "plot_id", {"baz": 3}),
]

plot_properties = _squash_plots_properties(group)

assert plot_properties == {"foo": 1, "bar": 2, "baz": 3}
18 changes: 18 additions & 0 deletions tests/unit/utils/test_plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from dvc.utils.plots import get_plot_id, group_definitions_by_id


def test_get_plot_id():
assert get_plot_id("plot_id", "config_path") == "config_path::plot_id"
assert get_plot_id("plot_id", "") == "plot_id"


def test_group_definitions_by_id():
definitions = {
"config1": {"data": {"plot1": "definition1", "plot2": "definition2"}},
"config2": {"data": {"plot1": "definition1"}},
}
assert group_definitions_by_id(definitions) == {
"config1::plot1": ("plot1", "definition1"),
"config2::plot1": ("plot1", "definition1"),
"plot2": ("plot2", "definition2"),
}