From 29231a10eb983e25b59c8edc5b6abcb12dbaaabe Mon Sep 17 00:00:00 2001
From: Will Kahn-Greene <willkg@mozilla.com>
Date: Sat, 21 Jan 2023 17:51:59 -0500
Subject: [PATCH 1/5] Convert tags, skip_tags, recognized_tags to sets; fix
 doctests; f-strings

This converts the "tags" argument to BleachHTMLParser to be a set.

This converts the "skip_tags" and "recognized_tags" to linkify things to
be sets.

This updates the documentation fixing example code so that tags,
skip_tags, and recognized_tags are all sets.

This also converts some string interpolation from %s style to f-strings.
---
 bleach/html5lib_shim.py | 242 ++++++++++++++++++++--------------------
 bleach/linkifier.py     |  14 +--
 bleach/sanitizer.py     |  35 +++---
 docs/clean.rst          |  31 ++---
 docs/linkify.rst        |  10 +-
 5 files changed, 166 insertions(+), 166 deletions(-)

diff --git a/bleach/html5lib_shim.py b/bleach/html5lib_shim.py
index 022fe19e..aa5189b1 100644
--- a/bleach/html5lib_shim.py
+++ b/bleach/html5lib_shim.py
@@ -81,127 +81,129 @@
 
 #: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
 #: https://html.spec.whatwg.org/multipage/indices.html#elements-3
-HTML_TAGS = [
-    "a",
-    "abbr",
-    "address",
-    "area",
-    "article",
-    "aside",
-    "audio",
-    "b",
-    "base",
-    "bdi",
-    "bdo",
-    "blockquote",
-    "body",
-    "br",
-    "button",
-    "canvas",
-    "caption",
-    "cite",
-    "code",
-    "col",
-    "colgroup",
-    "data",
-    "datalist",
-    "dd",
-    "del",
-    "details",
-    "dfn",
-    "dialog",
-    "div",
-    "dl",
-    "dt",
-    "em",
-    "embed",
-    "fieldset",
-    "figcaption",
-    "figure",
-    "footer",
-    "form",
-    "h1",
-    "h2",
-    "h3",
-    "h4",
-    "h5",
-    "h6",
-    "head",
-    "header",
-    "hgroup",
-    "hr",
-    "html",
-    "i",
-    "iframe",
-    "img",
-    "input",
-    "ins",
-    "kbd",
-    "keygen",
-    "label",
-    "legend",
-    "li",
-    "link",
-    "map",
-    "mark",
-    "menu",
-    "meta",
-    "meter",
-    "nav",
-    "noscript",
-    "object",
-    "ol",
-    "optgroup",
-    "option",
-    "output",
-    "p",
-    "param",
-    "picture",
-    "pre",
-    "progress",
-    "q",
-    "rp",
-    "rt",
-    "ruby",
-    "s",
-    "samp",
-    "script",
-    "section",
-    "select",
-    "slot",
-    "small",
-    "source",
-    "span",
-    "strong",
-    "style",
-    "sub",
-    "summary",
-    "sup",
-    "table",
-    "tbody",
-    "td",
-    "template",
-    "textarea",
-    "tfoot",
-    "th",
-    "thead",
-    "time",
-    "title",
-    "tr",
-    "track",
-    "u",
-    "ul",
-    "var",
-    "video",
-    "wbr",
-]
+HTML_TAGS = frozenset(
+    (
+        "a",
+        "abbr",
+        "address",
+        "area",
+        "article",
+        "aside",
+        "audio",
+        "b",
+        "base",
+        "bdi",
+        "bdo",
+        "blockquote",
+        "body",
+        "br",
+        "button",
+        "canvas",
+        "caption",
+        "cite",
+        "code",
+        "col",
+        "colgroup",
+        "data",
+        "datalist",
+        "dd",
+        "del",
+        "details",
+        "dfn",
+        "dialog",
+        "div",
+        "dl",
+        "dt",
+        "em",
+        "embed",
+        "fieldset",
+        "figcaption",
+        "figure",
+        "footer",
+        "form",
+        "h1",
+        "h2",
+        "h3",
+        "h4",
+        "h5",
+        "h6",
+        "head",
+        "header",
+        "hgroup",
+        "hr",
+        "html",
+        "i",
+        "iframe",
+        "img",
+        "input",
+        "ins",
+        "kbd",
+        "keygen",
+        "label",
+        "legend",
+        "li",
+        "link",
+        "map",
+        "mark",
+        "menu",
+        "meta",
+        "meter",
+        "nav",
+        "noscript",
+        "object",
+        "ol",
+        "optgroup",
+        "option",
+        "output",
+        "p",
+        "param",
+        "picture",
+        "pre",
+        "progress",
+        "q",
+        "rp",
+        "rt",
+        "ruby",
+        "s",
+        "samp",
+        "script",
+        "section",
+        "select",
+        "slot",
+        "small",
+        "source",
+        "span",
+        "strong",
+        "style",
+        "sub",
+        "summary",
+        "sup",
+        "table",
+        "tbody",
+        "td",
+        "template",
+        "textarea",
+        "tfoot",
+        "th",
+        "thead",
+        "time",
+        "title",
+        "tr",
+        "track",
+        "u",
+        "ul",
+        "var",
+        "video",
+        "wbr",
+    )
+)
 
 
 #: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
 #: from mozilla on 2019.07.11
 #: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
 HTML_TAGS_BLOCK_LEVEL = frozenset(
-    [
+    (
         "address",
         "article",
         "aside",
@@ -235,7 +237,7 @@
         "section",
         "table",
         "ul",
-    ]
+    )
 )
 
 
@@ -476,7 +478,7 @@ class BleachHTMLParser(HTMLParser):
 
     def __init__(self, tags, strip, consume_entities, **kwargs):
         """
-        :arg tags: list of allowed tags--everything else is either stripped or
+        :arg tags: set of allowed tags--everything else is either stripped or
             escaped; if None, then this doesn't look at tags at all
         :arg strip: whether to strip disallowed tags (True) or escape them (False);
             if tags=None, then this doesn't have any effect
@@ -484,7 +486,9 @@ def __init__(self, tags, strip, consume_entities, **kwargs):
             leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)
 
         """
-        self.tags = [tag.lower() for tag in tags] if tags is not None else None
+        self.tags = (
+            frozenset((tag.lower() for tag in tags)) if tags is not None else None
+        )
         self.strip = strip
         self.consume_entities = consume_entities
         super().__init__(**kwargs)
@@ -694,7 +698,7 @@ def escape_base_amp(self, stoken):
                 # Only leave entities in that are not ambiguous. If they're
                 # ambiguous, then we escape the ampersand.
                 if entity is not None and convert_entity(entity) is not None:
-                    yield "&" + entity + ";"
+                    yield f"&{entity};"
 
                     # Length of the entity plus 2--one for & at the beginning
                     # and one for ; at the end
diff --git a/bleach/linkifier.py b/bleach/linkifier.py
index 343f374f..183eb5ba 100644
--- a/bleach/linkifier.py
+++ b/bleach/linkifier.py
@@ -120,8 +120,8 @@ def __init__(
         :arg list callbacks: list of callbacks to run when adjusting tag attributes;
             defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
 
-        :arg list skip_tags: list of tags that you don't want to linkify the
-            contents of; for example, you could set this to ``['pre']`` to skip
+        :arg set skip_tags: set of tags that you don't want to linkify the
+            contents of; for example, you could set this to ``{'pre'}`` to skip
             linkifying contents of ``pre`` tags
 
         :arg bool parse_email: whether or not to linkify email addresses
@@ -130,7 +130,7 @@ def __init__(
 
         :arg email_re: email matching regex
 
-        :arg list recognized_tags: the list of tags that linkify knows about;
+        :arg set recognized_tags: the list of tags that linkify knows about;
             everything else gets escaped
 
         :returns: linkified text as unicode
@@ -145,7 +145,7 @@ def __init__(
         # Create a parser/tokenizer that allows all HTML tags and escapes
         # anything not in that list.
         self.parser = html5lib_shim.BleachHTMLParser(
-            tags=recognized_tags,
+            tags=frozenset(recognized_tags),
             strip=False,
             consume_entities=False,
             namespaceHTMLElements=False,
@@ -221,8 +221,8 @@ def __init__(
         :arg list callbacks: list of callbacks to run when adjusting tag attributes;
             defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
 
-        :arg list skip_tags: list of tags that you don't want to linkify the
-            contents of; for example, you could set this to ``['pre']`` to skip
+        :arg set skip_tags: set of tags that you don't want to linkify the
+            contents of; for example, you could set this to ``{'pre'}`` to skip
             linkifying contents of ``pre`` tags
 
         :arg bool parse_email: whether or not to linkify email addresses
@@ -235,7 +235,7 @@ def __init__(
         super().__init__(source)
 
         self.callbacks = callbacks or []
-        self.skip_tags = skip_tags or []
+        self.skip_tags = skip_tags or {}
         self.parse_email = parse_email
 
         self.url_re = url_re
diff --git a/bleach/sanitizer.py b/bleach/sanitizer.py
index 35ccf71e..8662a879 100644
--- a/bleach/sanitizer.py
+++ b/bleach/sanitizer.py
@@ -35,7 +35,7 @@
 }
 
 #: List of allowed protocols
-ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
+ALLOWED_PROTOCOLS = frozenset(("http", "https", "mailto"))
 
 #: Invisible characters--0 to and including 31 except 9 (tab), 10 (lf), and 13 (cr)
 INVISIBLE_CHARACTERS = "".join(
@@ -180,9 +180,8 @@ def clean(self, text):
         """
         if not isinstance(text, str):
             message = (
-                "argument cannot be of '{name}' type, must be of text type".format(
-                    name=text.__class__.__name__
-                )
+                f"argument cannot be of {text.__class__.__name__!r} type, "
+                + "must be of text type"
             )
             raise TypeError(message)
 
@@ -308,7 +307,7 @@ def __init__(
         html5lib_shim.Filter.__init__(self, source)
 
         self.allowed_tags = frozenset(allowed_tags)
-        self.allowed_protocols = allowed_protocols
+        self.allowed_protocols = frozenset(allowed_protocols)
 
         self.attr_filter = attribute_filter_factory(attributes)
         self.strip_disallowed_tags = strip_disallowed_tags
@@ -603,7 +602,7 @@ def allow_token(self, token):
     def disallowed_token(self, token):
         token_type = token["type"]
         if token_type == "EndTag":
-            token["data"] = "</%s>" % token["name"]
+            token["data"] = f"</{token['name']}>"
 
         elif token["data"]:
             assert token_type in ("StartTag", "EmptyTag")
@@ -619,25 +618,19 @@ def disallowed_token(self, token):
                 if ns is None or ns not in html5lib_shim.prefixes:
                     namespaced_name = name
                 else:
-                    namespaced_name = "{}:{}".format(html5lib_shim.prefixes[ns], name)
-
-                attrs.append(
-                    ' %s="%s"'
-                    % (
-                        namespaced_name,
-                        # NOTE(willkg): HTMLSerializer escapes attribute values
-                        # already, so if we do it here (like HTMLSerializer does),
-                        # then we end up double-escaping.
-                        v,
-                    )
-                )
-            token["data"] = "<{}{}>".format(token["name"], "".join(attrs))
+                    namespaced_name = f"{html5lib_shim.prefixes[ns]}:{name}"
+
+                # NOTE(willkg): HTMLSerializer escapes attribute values
+                # already, so if we do it here (like HTMLSerializer does),
+                # then we end up double-escaping.
+                attrs.append(f' {namespaced_name}="{v}"')
+            token["data"] = f"<{token['name']}{''.join(attrs)}>"
 
         else:
-            token["data"] = "<%s>" % token["name"]
+            token["data"] = f"<{token['name']}>"
 
         if token.get("selfClosing"):
-            token["data"] = token["data"][:-1] + "/>"
+            token["data"] = f"{token['data'][:-1]}/>"
 
         token["type"] = "Characters"
 
diff --git a/docs/clean.rst b/docs/clean.rst
index 9ebf74bb..bcc98112 100644
--- a/docs/clean.rst
+++ b/docs/clean.rst
@@ -21,14 +21,18 @@ can be used in HTML as is.
    templates (mustache, handlebars, angular, jsx, etc), JSON, xhtml, SVG, or
    other contexts.
 
-   For example, this is a safe use of ``clean`` output in an HTML context::
+   For example, this is a safe use of ``clean`` output in an HTML context:
+
+   .. code-block:: html
 
      <p>
        {{ bleach.clean(user_bio) }}
      </p>
 
 
-   This is **not a safe** use of ``clean`` output in an HTML attribute::
+   This is **not a safe** use of ``clean`` output in an HTML attribute:
+
+   .. code-block:: html
 
      <body data-bio="{{ bleach.clean(user_bio) }}">
 
@@ -106,7 +110,7 @@ For example:
 
    >>> bleach.clean(
    ...     '<p class="foo" style="color: red; font-weight: bold;">blah blah blah</p>',
-   ...     tags=['p'],
+   ...     tags={'p'},
    ...     attributes=['class'],
    ... )
    '<p class="foo">blah blah blah</p>'
@@ -134,7 +138,7 @@ and "class" for any tag (including "a" and "img"):
 
    >>> bleach.clean(
    ...    '<img alt="an example" width=500>',
-   ...    tags=['img'],
+   ...    tags={'img'},
    ...    attributes=attrs
    ... )
    '<img alt="an example">'
@@ -160,7 +164,7 @@ For example:
 
    >>> bleach.clean(
    ...    '<a href="http://example.com" title="link">link</a>',
-   ...    tags=['a'],
+   ...    tags={'a'},
    ...    attributes=allow_h,
    ... )
    '<a href="http://example.com">link</a>'
@@ -184,7 +188,7 @@ attributes for specified tags:
 
    >>> bleach.clean(
    ...    '<img src="http://example.com" alt="an example">',
-   ...    tags=['img'],
+   ...    tags={'img'},
    ...    attributes={
    ...        'img': allow_src
    ...    }
@@ -213,7 +217,7 @@ For example, this sets allowed protocols to http, https and smb:
 
    >>> bleach.clean(
    ...     '<a href="smb://more_text">allowed protocol</a>',
-   ...     protocols=['http', 'https', 'smb']
+   ...     protocols={'http', 'https', 'smb'}
    ... )
    '<a href="smb://more_text">allowed protocol</a>'
 
@@ -224,9 +228,10 @@ This adds smb to the Bleach-specified set of allowed protocols:
 
    >>> import bleach
 
+   >>> my_protocols = bleach.ALLOWED_PROTOCOLS.union({'smb'})
    >>> bleach.clean(
    ...     '<a href="smb://more_text">allowed protocol</a>',
-   ...     protocols=bleach.ALLOWED_PROTOCOLS + ['smb']
+   ...     protocols=my_protocols
    ... )
    '<a href="smb://more_text">allowed protocol</a>'
 
@@ -249,7 +254,7 @@ and invalid markup. For example:
    >>> bleach.clean('<span>is not allowed</span>')
    '&lt;span&gt;is not allowed&lt;/span&gt;'
 
-   >>> bleach.clean('<b><span>is not allowed</span></b>', tags=['b'])
+   >>> bleach.clean('<b><span>is not allowed</span></b>', tags={'b'})
    '<b>&lt;span&gt;is not allowed&lt;/span&gt;</b>'
 
 
@@ -263,7 +268,7 @@ If you would rather Bleach stripped this markup entirely, you can pass
    >>> bleach.clean('<span>is not allowed</span>', strip=True)
    'is not allowed'
 
-   >>> bleach.clean('<b><span>is not allowed</span></b>', tags=['b'], strip=True)
+   >>> bleach.clean('<b><span>is not allowed</span></b>', tags={'b'}, strip=True)
    '<b>is not allowed</b>'
 
 
@@ -309,7 +314,7 @@ For example:
 
    >>> css_sanitizer = CSSSanitizer(allowed_css_properties=["color", "font-weight"])
 
-   >>> tags = ['p', 'em', 'strong']
+   >>> tags = {'p', 'em', 'strong'}
    >>> attrs = {
    ...     '*': ['style']
    ... }
@@ -386,7 +391,7 @@ Trivial Filter example:
    ...     'img': ['rel', 'src']
    ... }
    ...
-   >>> TAGS = ['img']
+   >>> TAGS = {'img'}
    >>> cleaner = Cleaner(tags=TAGS, attributes=ATTRS, filters=[MooFilter])
    >>> dirty = 'this is cute! <img src="http://example.com/puppy.jpg" rel="nofollow">'
    >>> cleaner.clean(dirty)
@@ -411,5 +416,3 @@ Using ``bleach.sanitizer.BleachSanitizerFilter``
 use an html5lib filter.
 
 .. autoclass:: bleach.sanitizer.BleachSanitizerFilter
-
-
diff --git a/docs/linkify.rst b/docs/linkify.rst
index 13a1ed1c..a65f9e99 100644
--- a/docs/linkify.rst
+++ b/docs/linkify.rst
@@ -319,7 +319,7 @@ instance.
 
    >>> from bleach.linkifier import Linker
 
-   >>> linker = Linker(skip_tags=['pre'])
+   >>> linker = Linker(skip_tags={'pre'})
    >>> linker.linkify('a b c http://example.com d e f')
    'a b c <a href="http://example.com" rel="nofollow">http://example.com</a> d e f'
 
@@ -410,11 +410,11 @@ For example, using all the defaults:
    >>> from bleach import Cleaner
    >>> from bleach.linkifier import LinkifyFilter
 
-   >>> cleaner = Cleaner(tags=['pre'])
+   >>> cleaner = Cleaner(tags={'pre'})
    >>> cleaner.clean('<pre>http://example.com</pre>')
    '<pre>http://example.com</pre>'
 
-   >>> cleaner = Cleaner(tags=['pre'], filters=[LinkifyFilter])
+   >>> cleaner = Cleaner(tags={'pre'}, filters=[LinkifyFilter])
    >>> cleaner.clean('<pre>http://example.com</pre>')
    '<pre><a href="http://example.com" rel="nofollow">http://example.com</a></pre>'
 
@@ -429,8 +429,8 @@ And passing parameters to ``LinkifyFilter``:
    >>> from bleach.linkifier import LinkifyFilter
 
    >>> cleaner = Cleaner(
-   ...     tags=['pre'],
-   ...     filters=[partial(LinkifyFilter, skip_tags=['pre'])]
+   ...     tags={'pre'},
+   ...     filters=[partial(LinkifyFilter, skip_tags={'pre'})]
    ... )
    ...
    >>> cleaner.clean('<pre>http://example.com</pre>')

From 3085abc6746951224a594493f5b261932b36beae Mon Sep 17 00:00:00 2001
From: Will Kahn-Greene <willkg@mozilla.com>
Date: Sat, 21 Jan 2023 18:09:54 -0500
Subject: [PATCH 2/5] Fix test data to pass sets instead of lists

This fixes the test data to pass sets instead of lists for "tags",
"skip_tags", "recognized_tags", and "protocols".
---
 bleach/linkifier.py   |  3 +-
 tests/test_clean.py   | 94 +++++++++++++++++++++----------------------
 tests/test_css.py     |  4 +-
 tests/test_linkify.py | 32 +++++++--------
 4 files changed, 63 insertions(+), 70 deletions(-)

diff --git a/bleach/linkifier.py b/bleach/linkifier.py
index 183eb5ba..b5a3041e 100644
--- a/bleach/linkifier.py
+++ b/bleach/linkifier.py
@@ -122,7 +122,8 @@ def __init__(
 
         :arg set skip_tags: set of tags that you don't want to linkify the
             contents of; for example, you could set this to ``{'pre'}`` to skip
-            linkifying contents of ``pre`` tags
+            linkifying contents of ``pre`` tags; ``None`` means you don't
+            want linkify to skip any tags
 
         :arg bool parse_email: whether or not to linkify email addresses
 
diff --git a/tests/test_clean.py b/tests/test_clean.py
index 10a91fd0..73946a1f 100644
--- a/tests/test_clean.py
+++ b/tests/test_clean.py
@@ -29,7 +29,7 @@ def test_clean_idempotent(data):
 
 
 def test_clean_idempotent_img():
-    tags = ["img"]
+    tags = {"img"}
     dirty = '<imr src="http://example.com?foo=bar&bar=foo&amp;biz=bash">'
     assert clean(clean(dirty, tags=tags), tags=tags) == clean(dirty, tags=tags)
 
@@ -254,21 +254,21 @@ def test_character_entities_handling(text, expected):
         # a tag is disallowed, so it's stripped
         (
             '<p><a href="http://example.com/">link text</a></p>',
-            {"tags": ["p"]},
+            {"tags": {"p"}},
             "<p>link text</p>",
         ),
         # Test nested disallowed tag
         (
             "<p><span>multiply <span>nested <span>text</span></span></span></p>",
-            {"tags": ["p"]},
+            {"tags": {"p"}},
             "<p>multiply nested text</p>",
         ),
         # (#271)
-        ("<ul><li><script></li></ul>", {"tags": ["ul", "li"]}, "<ul><li></li></ul>"),
+        ("<ul><li><script></li></ul>", {"tags": {"ul", "li"}}, "<ul><li></li></ul>"),
         # Test disallowed tag that's deep in the tree
         (
             '<p><a href="http://example.com/"><img src="http://example.com/"></a></p>',
-            {"tags": ["a", "p"]},
+            {"tags": {"a", "p"}},
             '<p><a href="http://example.com/"></a></p>',
         ),
         # Test isindex -- the parser expands this to a prompt (#279)
@@ -342,9 +342,7 @@ def test_href_with_wrong_tag():
 
 
 def test_disallowed_attr():
-    IMG = [
-        "img",
-    ]
+    IMG = {"img"}
     IMG_ATTR = ["src"]
 
     assert clean('<a onclick="evil" href="test">test</a>') == '<a href="test">test</a>'
@@ -373,9 +371,7 @@ def test_unquoted_event_handler_attr_value():
 
 
 def test_invalid_filter_attr():
-    IMG = [
-        "img",
-    ]
+    IMG = {"img"}
     IMG_ATTR = {
         "img": lambda tag, name, val: name == "src" and val == "http://example.com/"
     }
@@ -400,7 +396,7 @@ def test_invalid_filter_attr():
 
 def test_poster_attribute():
     """Poster attributes should not allow javascript."""
-    tags = ["video"]
+    tags = {"video"}
     attrs = {"video": ["poster"]}
 
     test = '<video poster="javascript:alert(1)"></video>'
@@ -413,7 +409,7 @@ def test_poster_attribute():
 def test_attributes_callable():
     """Verify attributes can take a callable"""
     ATTRS = lambda tag, name, val: name == "title"
-    TAGS = ["a"]
+    TAGS = {"a"}
 
     text = '<a href="/foo" title="blah">example</a>'
     assert clean(text, tags=TAGS, attributes=ATTRS) == '<a title="blah">example</a>'
@@ -425,7 +421,7 @@ def test_attributes_wildcard():
         "*": ["id"],
         "img": ["src"],
     }
-    TAGS = ["img", "em"]
+    TAGS = {"img", "em"}
 
     text = (
         'both <em id="foo" style="color: black">can</em> have <img id="bar" src="foo"/>'
@@ -439,7 +435,7 @@ def test_attributes_wildcard():
 def test_attributes_wildcard_callable():
     """Verify attributes[*] callable works"""
     ATTRS = {"*": lambda tag, name, val: name == "title"}
-    TAGS = ["a"]
+    TAGS = {"a"}
 
     assert (
         clean('<a href="/foo" title="blah">example</a>', tags=TAGS, attributes=ATTRS)
@@ -456,7 +452,7 @@ def img_test(tag, name, val):
     ATTRS = {
         "img": img_test,
     }
-    TAGS = ["img"]
+    TAGS = {"img"}
 
     text = 'foo <img src="http://example.com" alt="blah"> baz'
     assert clean(text, tags=TAGS, attributes=ATTRS) == "foo <img> baz"
@@ -470,7 +466,7 @@ def img_test(tag, name, val):
 def test_attributes_tag_list():
     """Verify attributes[tag] list works"""
     ATTRS = {"a": ["title"]}
-    TAGS = ["a"]
+    TAGS = {"a"}
 
     assert (
         clean('<a href="/foo" title="blah">example</a>', tags=TAGS, attributes=ATTRS)
@@ -481,7 +477,7 @@ def test_attributes_tag_list():
 def test_attributes_list():
     """Verify attributes list works"""
     ATTRS = ["title"]
-    TAGS = ["a"]
+    TAGS = {"a"}
 
     text = '<a href="/foo" title="blah">example</a>'
     assert clean(text, tags=TAGS, attributes=ATTRS) == '<a title="blah">example</a>'
@@ -518,83 +514,83 @@ def test_attributes_list():
         # Specified protocols are allowed
         (
             '<a href="myprotocol://more_text">allowed href</a>',
-            {"protocols": ["myprotocol"]},
+            {"protocols": {"myprotocol"}},
             '<a href="myprotocol://more_text">allowed href</a>',
         ),
         # Unspecified protocols are not allowed
         (
             '<a href="http://example.com">invalid href</a>',
-            {"protocols": ["myprotocol"]},
+            {"protocols": {"myprotocol"}},
             "<a>invalid href</a>",
         ),
         # Anchors are ok
         (
             '<a href="#section-1">foo</a>',
-            {"protocols": []},
+            {"protocols": set()},
             '<a href="#section-1">foo</a>',
         ),
         # Anchor that looks like a domain is ok
         (
             '<a href="#example.com">foo</a>',
-            {"protocols": []},
+            {"protocols": set()},
             '<a href="#example.com">foo</a>',
         ),
         # Allow implicit http/https if allowed
         (
             '<a href="/path">valid</a>',
-            {"protocols": ["http"]},
+            {"protocols": {"http"}},
             '<a href="/path">valid</a>',
         ),
         (
             '<a href="/path">valid</a>',
-            {"protocols": ["https"]},
+            {"protocols": {"https"}},
             '<a href="/path">valid</a>',
         ),
         (
             '<a href="example.com">valid</a>',
-            {"protocols": ["http"]},
+            {"protocols": {"http"}},
             '<a href="example.com">valid</a>',
         ),
         (
             '<a href="example.com:8000">valid</a>',
-            {"protocols": ["http"]},
+            {"protocols": {"http"}},
             '<a href="example.com:8000">valid</a>',
         ),
         (
             '<a href="localhost">valid</a>',
-            {"protocols": ["http"]},
+            {"protocols": {"http"}},
             '<a href="localhost">valid</a>',
         ),
         (
             '<a href="localhost:8000">valid</a>',
-            {"protocols": ["http"]},
+            {"protocols": {"http"}},
             '<a href="localhost:8000">valid</a>',
         ),
         (
             '<a href="192.168.100.100">valid</a>',
-            {"protocols": ["http"]},
+            {"protocols": {"http"}},
             '<a href="192.168.100.100">valid</a>',
         ),
         (
             '<a href="192.168.100.100:8000">valid</a>',
-            {"protocols": ["http"]},
+            {"protocols": {"http"}},
             '<a href="192.168.100.100:8000">valid</a>',
         ),
         pytest.param(
             *(
                 '<a href="192.168.100.100:8000/foo#bar">valid</a>',
-                {"protocols": ["http"]},
+                {"protocols": {"http"}},
                 '<a href="192.168.100.100:8000/foo#bar">valid</a>',
             ),
             marks=pytest.mark.xfail,
         ),
         # Disallow implicit http/https if disallowed
-        ('<a href="example.com">foo</a>', {"protocols": []}, "<a>foo</a>"),
-        ('<a href="example.com:8000">foo</a>', {"protocols": []}, "<a>foo</a>"),
-        ('<a href="localhost">foo</a>', {"protocols": []}, "<a>foo</a>"),
-        ('<a href="localhost:8000">foo</a>', {"protocols": []}, "<a>foo</a>"),
-        ('<a href="192.168.100.100">foo</a>', {"protocols": []}, "<a>foo</a>"),
-        ('<a href="192.168.100.100:8000">foo</a>', {"protocols": []}, "<a>foo</a>"),
+        ('<a href="example.com">foo</a>', {"protocols": set()}, "<a>foo</a>"),
+        ('<a href="example.com:8000">foo</a>', {"protocols": set()}, "<a>foo</a>"),
+        ('<a href="localhost">foo</a>', {"protocols": set()}, "<a>foo</a>"),
+        ('<a href="localhost:8000">foo</a>', {"protocols": set()}, "<a>foo</a>"),
+        ('<a href="192.168.100.100">foo</a>', {"protocols": set()}, "<a>foo</a>"),
+        ('<a href="192.168.100.100:8000">foo</a>', {"protocols": set()}, "<a>foo</a>"),
         # Disallowed protocols with sneaky character entities
         ('<a href="javas&#x09;cript:alert(1)">alert</a>', {}, "<a>alert</a>"),
         ('<a href="&#14;javascript:alert(1)">alert</a>', {}, "<a>alert</a>"),
@@ -613,7 +609,7 @@ def test_uri_value_allowed_protocols(data, kwargs, expected):
 def test_svg_attr_val_allows_ref():
     """Unescape values in svg attrs that allow url references"""
     # Local IRI, so keep it
-    TAGS = ["svg", "rect"]
+    TAGS = {"svg", "rect"}
     ATTRS = {
         "rect": ["fill"],
     }
@@ -625,7 +621,7 @@ def test_svg_attr_val_allows_ref():
     )
 
     # Non-local IRI, so drop it
-    TAGS = ["svg", "rect"]
+    TAGS = {"svg", "rect"}
     ATTRS = {
         "rect": ["fill"],
     }
@@ -649,7 +645,7 @@ def test_svg_attr_val_allows_ref():
 )
 def test_svg_allow_local_href(text, expected):
     """Keep local hrefs for svg elements"""
-    TAGS = ["svg", "pattern"]
+    TAGS = {"svg", "pattern"}
     ATTRS = {
         "pattern": ["id", "href"],
     }
@@ -671,7 +667,7 @@ def test_svg_allow_local_href(text, expected):
 )
 def test_svg_allow_local_href_nonlocal(text, expected):
     """Drop non-local hrefs for svg elements"""
-    TAGS = ["svg", "pattern"]
+    TAGS = {"svg", "pattern"}
     ATTRS = {
         "pattern": ["id", "href"],
     }
@@ -741,7 +737,7 @@ def test_nonexistent_namespace():
     ],
 )
 def test_self_closing_tags_self_close(tag):
-    assert clean(f"<{tag}>", tags=[tag]) == f"<{tag}>"
+    assert clean(f"<{tag}>", tags={tag}) == f"<{tag}>"
 
 
 # tags that get content passed through (i.e. parsed with parseRCDataRawtext)
@@ -770,7 +766,7 @@ def test_self_closing_tags_self_close(tag):
 )
 def test_noscript_rawtag_(raw_tag, data, expected):
     # refs: bug 1615315 / GHSA-q65m-pv3f-wr5r
-    assert clean(data, tags=["noscript", raw_tag]) == expected
+    assert clean(data, tags={"noscript", raw_tag}) == expected
 
 
 @pytest.mark.parametrize(
@@ -803,7 +799,7 @@ def test_namespace_rc_data_element_strip_false(
     #
     # browsers will pull the img out of the namespace and rc data tag resulting in XSS
     assert (
-        clean(data, tags=[namespace_tag, rc_data_element_tag], strip=False) == expected
+        clean(data, tags={namespace_tag, rc_data_element_tag}, strip=False) == expected
     )
 
 
@@ -1087,7 +1083,7 @@ def test_html_comments_escaped(namespace_tag, end_tag, eject_tag, data, expected
     #
     # the ejected elements can trigger XSS
     assert (
-        clean(data, tags=[namespace_tag, end_tag, eject_tag], strip_comments=False)
+        clean(data, tags={namespace_tag, end_tag, eject_tag}, strip_comments=False)
         == expected
     )
 
@@ -1125,7 +1121,7 @@ def test_strip_respects_block_level_elements(text, expected):
     Insert a newline between block level elements
     https://github.com/mozilla/bleach/issues/369
     """
-    assert clean(text, tags=[], strip=True) == expected
+    assert clean(text, tags=set(), strip=True) == expected
 
 
 def get_ids_and_tests():
@@ -1171,7 +1167,7 @@ def test_regressions(test_case):
 
 def test_preserves_attributes_order():
     html = """<a target="_blank" href="https://example.com">Link</a>"""
-    cleaned_html = clean(html, tags=["a"], attributes={"a": ["href", "target"]})
+    cleaned_html = clean(html, tags={"a"}, attributes={"a": ["href", "target"]})
 
     assert cleaned_html == html
 
@@ -1192,7 +1188,7 @@ def test_css_sanitizer_warning(attr):
 
 class TestCleaner:
     def test_basics(self):
-        TAGS = ["span", "br"]
+        TAGS = {"span", "br"}
         ATTRS = {"span": ["style"]}
 
         cleaner = Cleaner(tags=TAGS, attributes=ATTRS)
@@ -1214,7 +1210,7 @@ def __iter__(self):
                     yield token
 
         ATTRS = {"img": ["rel", "src"]}
-        TAGS = ["img"]
+        TAGS = {"img"}
 
         cleaner = Cleaner(tags=TAGS, attributes=ATTRS, filters=[MooFilter])
 
diff --git a/tests/test_css.py b/tests/test_css.py
index d304dfca..7a90961e 100644
--- a/tests/test_css.py
+++ b/tests/test_css.py
@@ -8,7 +8,7 @@
 from bleach.css_sanitizer import CSSSanitizer  # noqa
 
 
-clean = partial(clean, tags=["p"], attributes=["style"])
+clean = partial(clean, tags={"p"}, attributes=["style"])
 
 
 @pytest.mark.parametrize(
@@ -250,7 +250,7 @@ def test_css_parsing_with_entities(data, styles, expected):
     css_sanitizer = CSSSanitizer(allowed_css_properties=styles)
     assert (
         clean(
-            data, tags=["p"], attributes={"p": ["style"]}, css_sanitizer=css_sanitizer
+            data, tags={"p"}, attributes={"p": ["style"]}, css_sanitizer=css_sanitizer
         )
         == expected
     )
diff --git a/tests/test_linkify.py b/tests/test_linkify.py
index a6d96e23..bd838133 100644
--- a/tests/test_linkify.py
+++ b/tests/test_linkify.py
@@ -287,18 +287,16 @@ def test_add_rel_nofollow():
 
 
 def test_url_with_path():
-    assert (
-        linkify("http://example.com/path/to/file")
-        == '<a href="http://example.com/path/to/file" rel="nofollow">'
-        "http://example.com/path/to/file</a>"
+    assert linkify("http://example.com/path/to/file") == (
+        '<a href="http://example.com/path/to/file" rel="nofollow">'
+        + "http://example.com/path/to/file</a>"
     )
 
 
 def test_link_ftp():
-    assert (
-        linkify("ftp://ftp.mozilla.org/some/file")
-        == '<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">'
-        "ftp://ftp.mozilla.org/some/file</a>"
+    assert linkify("ftp://ftp.mozilla.org/some/file") == (
+        '<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">'
+        + "ftp://ftp.mozilla.org/some/file</a>"
     )
 
 
@@ -342,10 +340,9 @@ def test_escaped_html():
 
 
 def test_link_http_complete():
-    assert (
-        linkify("https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f")
-        == '<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f" rel="nofollow">'
-        "https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f</a>"
+    assert linkify("https://user:pass@ftp.mozilla.org/x/y.exe?a=b&c=d&e#f") == (
+        '<a href="https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f" rel="nofollow">'
+        + "https://user:pass@ftp.mozilla.org/x/y.exe?a=b&amp;c=d&amp;e#f</a>"
     )
 
 
@@ -381,17 +378,17 @@ def test_skip_tags():
         + '<pre><a href="http://xx.com" rel="nofollow">http://xx.com'
         + "</a></pre>"
     )
-    assert linkify(simple, skip_tags=["pre"]) == linked
+    assert linkify(simple, skip_tags={"pre"}) == linked
     assert linkify(simple) == all_linked
 
     already_linked = '<pre><a href="http://xx.com">xx</a></pre>'
     nofollowed = '<pre><a href="http://xx.com" rel="nofollow">xx</a></pre>'
     assert linkify(already_linked) == nofollowed
-    assert linkify(already_linked, skip_tags=["pre"]) == nofollowed
+    assert linkify(already_linked, skip_tags={"pre"}) == nofollowed
 
     assert linkify(
         "<pre><code>http://example.com</code></pre>http://example.com",
-        skip_tags=["pre"],
+        skip_tags={"pre"},
     ) == (
         "<pre><code>http://example.com</code></pre>"
         + '<a href="http://example.com" rel="nofollow">http://example.com</a>'
@@ -415,7 +412,6 @@ def test_libgl():
 )
 def test_end_of_sentence(url, periods):
     """example.com. should match."""
-
     assert (
         linkify(f"{url}{periods}")
         == f'<a href="http://{url}" rel="nofollow">{url}</a>{periods}'
@@ -723,14 +719,14 @@ def test_email_re_arg():
 def test_recognized_tags_arg():
     """Verifies that recognized_tags works"""
     # The html parser doesn't recognize "sarcasm" as a tag, so it escapes it
-    linker = Linker(recognized_tags=["p"])
+    linker = Linker(recognized_tags={"p"})
     assert (
         linker.linkify("<p>http://example.com/</p><sarcasm>")
         == '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p>&lt;sarcasm&gt;'  # noqa
     )
 
     # The html parser recognizes "sarcasm" as a tag and fixes it
-    linker = Linker(recognized_tags=["p", "sarcasm"])
+    linker = Linker(recognized_tags={"p", "sarcasm"})
     assert (
         linker.linkify("<p>http://example.com/</p><sarcasm>")
         == '<p><a href="http://example.com/" rel="nofollow">http://example.com/</a></p><sarcasm></sarcasm>'  # noqa

From b2d5c98a4150302e6c03a87653e8644c86542a74 Mon Sep 17 00:00:00 2001
From: Will Kahn-Greene <willkg@mozilla.com>
Date: Sat, 21 Jan 2023 22:02:43 -0500
Subject: [PATCH 3/5] Fix one last "list"

---
 bleach/linkifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bleach/linkifier.py b/bleach/linkifier.py
index b5a3041e..679d7ead 100644
--- a/bleach/linkifier.py
+++ b/bleach/linkifier.py
@@ -131,7 +131,7 @@ def __init__(
 
         :arg email_re: email matching regex
 
-        :arg set recognized_tags: the list of tags that linkify knows about;
+        :arg set recognized_tags: the set of tags that linkify knows about;
             everything else gets escaped
 
         :returns: linkified text as unicode

From 16795c57bddce4e9e2ef3b9899840d1f445b07b7 Mon Sep 17 00:00:00 2001
From: Will Kahn-Greene <willkg@mozilla.com>
Date: Sat, 21 Jan 2023 22:07:52 -0500
Subject: [PATCH 4/5] Missed one last list -> set

---
 docs/clean.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/clean.rst b/docs/clean.rst
index bcc98112..43c81bb8 100644
--- a/docs/clean.rst
+++ b/docs/clean.rst
@@ -61,7 +61,7 @@ For example:
 
    >>> bleach.clean(
    ...     '<b><i>an example</i></b>',
-   ...     tags=['b'],
+   ...     tags={'b'},
    ... )
    '<b>&lt;i&gt;an example&lt;/i&gt;</b>'
 

From aec2c0efc36c2a848abee3c9a60d3a68ff03a65c Mon Sep 17 00:00:00 2001
From: Will Kahn-Greene <willkg@mozilla.com>
Date: Sat, 21 Jan 2023 22:15:02 -0500
Subject: [PATCH 5/5] Switch union to |

---
 docs/clean.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/clean.rst b/docs/clean.rst
index 43c81bb8..800cbdcf 100644
--- a/docs/clean.rst
+++ b/docs/clean.rst
@@ -228,10 +228,9 @@ This adds smb to the Bleach-specified set of allowed protocols:
 
    >>> import bleach
 
-   >>> my_protocols = bleach.ALLOWED_PROTOCOLS.union({'smb'})
    >>> bleach.clean(
    ...     '<a href="smb://more_text">allowed protocol</a>',
-   ...     protocols=my_protocols
+   ...     protocols=bleach.ALLOWED_PROTOCOLS | {'smb'}
    ... )
    '<a href="smb://more_text">allowed protocol</a>'