From 18574152613c2738830188a8c595a18d05de40ad Mon Sep 17 00:00:00 2001 From: Carson Sievert Date: Wed, 18 Sep 2024 13:43:57 -0500 Subject: [PATCH] `HTMLTextDocument()` now preserves the order of serialized dependencies (#95) * Update unit test to check for order of dependencies * Preserve order of dependencies when deduping * Update changelog --- CHANGELOG.md | 2 +- htmltools/_core.py | 13 +++++++++---- tests/test_html_document.py | 6 +++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e7af15..9c7f620 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [UNRELEASED] - +* Fixed an issue with `HTMLTextDocument()` returning extracted `HTMLDependency()`s in a non-determistic order. (#95) ## [0.5.3] 2024-07-18 diff --git a/htmltools/_core.py b/htmltools/_core.py index 2beee8d..68c5499 100644 --- a/htmltools/_core.py +++ b/htmltools/_core.py @@ -1215,19 +1215,24 @@ def _static_extract_serialized_html_deps( # HTMLdependency.get_tag_representation() pattern = r'' dep_strs = re.findall(pattern, html) - # Deduplicate dependencies. htmltools normally would dedupe dependencies, but - # with HTMLTextDocuments, the input HTML would usually have been generated by - # something else (like Quarto) and may not have the dependencies deduped. - dep_strs = list(set(dep_strs)) # Remove the serialized HTML dependencies from the HTML string html = re.sub(pattern, "", html) + # Reconstitute the HTMLDependency objects + # + # Note: htmltools normally would dedupe dependencies, but + # with HTMLTextDocuments, the input HTML would usually have been generated by + # something else (like Quarto) and may not have the dependencies deduped. + seen_deps: set[str] = set() deps: list[HTMLDependency] = [] for dep_str in dep_strs: + if dep_str in seen_deps: + continue args = json.loads(dep_str) dep = HTMLDependency(**args) deps.append(dep) + seen_deps.add(dep_str) return (html, deps) diff --git a/tests/test_html_document.py b/tests/test_html_document.py index f4609f3..35ccb2f 100644 --- a/tests/test_html_document.py +++ b/tests/test_html_document.py @@ -322,9 +322,9 @@ def test_json_roundtrip(): x_str, deps_replace_pattern='' ).render() - # Make sure both deps are present. - assert "testdep" in [d.name for d in rendered["dependencies"]] - assert "testdep2" in [d.name for d in rendered["dependencies"]] + # Make sure both deps are present and in the order they appear in x_str. + assert "testdep2" == rendered["dependencies"][0].name + assert "testdep" == rendered["dependencies"][1].name # Make sure testdep was deduplicated by HTMLTextDocument().render(). assert rendered["dependencies"].count(testdep) == 1