From 18574152613c2738830188a8c595a18d05de40ad Mon Sep 17 00:00:00 2001
From: Carson Sievert <cpsievert1@gmail.com>
Date: Wed, 18 Sep 2024 13:43:57 -0500
Subject: [PATCH] `HTMLTextDocument()` now preserves the order of serialized
 dependencies (#95)

* Update unit test to check for order of dependencies

* Preserve order of dependencies when deduping

* Update changelog
---
 CHANGELOG.md                |  2 +-
 htmltools/_core.py          | 13 +++++++++----
 tests/test_html_document.py |  6 +++---
 3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e7af15..9c7f620 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [UNRELEASED]
 
-
+* Fixed an issue with `HTMLTextDocument()` returning extracted `HTMLDependency()`s in a non-determistic order. (#95)
 
 ## [0.5.3] 2024-07-18
 
diff --git a/htmltools/_core.py b/htmltools/_core.py
index 2beee8d..68c5499 100644
--- a/htmltools/_core.py
+++ b/htmltools/_core.py
@@ -1215,19 +1215,24 @@ def _static_extract_serialized_html_deps(
         # HTMLdependency.get_tag_representation()
         pattern = r'<script type="application/json" data-html-dependency="">((?:.|\r|\n)*?)</script>'
         dep_strs = re.findall(pattern, html)
-        # Deduplicate dependencies. htmltools normally would dedupe dependencies, but
-        # with HTMLTextDocuments, the input HTML would usually have been generated by
-        # something else (like Quarto) and may not have the dependencies deduped.
-        dep_strs = list(set(dep_strs))
 
         # Remove the serialized HTML dependencies from the HTML string
         html = re.sub(pattern, "", html)
 
+        # Reconstitute the HTMLDependency objects
+        #
+        # Note: htmltools normally would dedupe dependencies, but
+        # with HTMLTextDocuments, the input HTML would usually have been generated by
+        # something else (like Quarto) and may not have the dependencies deduped.
+        seen_deps: set[str] = set()
         deps: list[HTMLDependency] = []
         for dep_str in dep_strs:
+            if dep_str in seen_deps:
+                continue
             args = json.loads(dep_str)
             dep = HTMLDependency(**args)
             deps.append(dep)
+            seen_deps.add(dep_str)
 
         return (html, deps)
 
diff --git a/tests/test_html_document.py b/tests/test_html_document.py
index f4609f3..35ccb2f 100644
--- a/tests/test_html_document.py
+++ b/tests/test_html_document.py
@@ -322,9 +322,9 @@ def test_json_roundtrip():
             x_str, deps_replace_pattern='<meta data-foo="">'
         ).render()
 
-        # Make sure both deps are present.
-        assert "testdep" in [d.name for d in rendered["dependencies"]]
-        assert "testdep2" in [d.name for d in rendered["dependencies"]]
+        # Make sure both deps are present and in the order they appear in x_str.
+        assert "testdep2" == rendered["dependencies"][0].name
+        assert "testdep" == rendered["dependencies"][1].name
 
         # Make sure testdep was deduplicated by HTMLTextDocument().render().
         assert rendered["dependencies"].count(testdep) == 1