Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete stubs for bleach #9314

Merged
merged 28 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4375fb5
Mark `bleach` as complete
sobolevn Dec 1, 2022
49d8286
Fix CI
sobolevn Dec 1, 2022
b924759
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 1, 2022
146a810
Merge branch 'main' into mark-bleach
AlexWaygood Feb 19, 2023
18e31e5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 19, 2023
24e2ad3
Fix some obvious merge issues
AlexWaygood Feb 19, 2023
1b526fe
fix more mistakes when merging `main`
AlexWaygood Feb 19, 2023
198f6a6
Update linkifier.pyi
AlexWaygood Feb 19, 2023
22fd76e
Remove unused allowlist entries
AlexWaygood Feb 19, 2023
dc39112
Merge branch 'main' into mark-bleach
AlexWaygood Feb 21, 2023
3245722
Merge branch 'main' into mark-bleach
AlexWaygood Jul 22, 2023
c4767d5
Update stubs/bleach/METADATA.toml
AlexWaygood Jul 22, 2023
704cb27
Update stubs/bleach/bleach/html5lib_shim.pyi
AlexWaygood Jul 22, 2023
5950664
fix mypy
AlexWaygood Jul 22, 2023
c723045
Merge branch 'main' into mark-bleach
Avasam Nov 30, 2023
c9faed8
Post-merge remove bleach from pyrightconfig.stricter.json
Avasam Nov 30, 2023
7b72eef
Merge branch 'main' into mark-bleach
AlexWaygood Feb 5, 2024
96fc2c4
Merge branch 'main' of https://github.com/python/typeshed into mark-b…
Avasam Feb 20, 2024
c79dca1
Ran stubdefaulter
Avasam Feb 20, 2024
7723b65
Complete missing types
Avasam Feb 20, 2024
ea95d05
Address PR comments
Avasam Feb 20, 2024
a4ee82c
More PR comments
Avasam Feb 20, 2024
5a39614
Mark uppercase names as Final
Avasam Feb 20, 2024
3f865d3
Reduce allowlist entried and indicate possible runtime failure
Avasam Feb 21, 2024
5f48c83
typo
Avasam Feb 21, 2024
9c88154
Update stubs/html5lib/html5lib/_inputstream.pyi
Avasam Feb 21, 2024
e318888
Update stubs/bleach/bleach/html5lib_shim.pyi
Avasam Feb 21, 2024
c4aeaa5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pyrightconfig.stricter.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
"stubs/antlr4-python3-runtime",
"stubs/aws-xray-sdk",
"stubs/beautifulsoup4",
"stubs/bleach",
"stubs/boltons",
"stubs/boto",
"stubs/braintree",
Expand Down
8 changes: 6 additions & 2 deletions stubs/bleach/@tests/stubtest_allowlist.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
bleach.css_sanitizer # Requires tinycss2 to be installed
bleach.html5lib_shim.*
# Internal private stuff:
bleach._vendor.*

# Hacks:
bleach.html5lib_shim.InputStreamWithMemory.changeEncoding
Avasam marked this conversation as resolved.
Show resolved Hide resolved
bleach.html5lib_shim.InputStreamWithMemory.reset
4 changes: 2 additions & 2 deletions stubs/bleach/METADATA.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
version = "6.1.*"
requires = ["types-html5lib"]
upstream_repository = "https://github.com/mozilla/bleach"
partial_stub = true

[tool.stubtest]
ignore_missing_stub = true
extras = ["css"]
5 changes: 3 additions & 2 deletions stubs/bleach/bleach/css_sanitizer.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from collections.abc import Container
from typing import Final

ALLOWED_CSS_PROPERTIES: frozenset[str]
ALLOWED_SVG_PROPERTIES: frozenset[str]
ALLOWED_CSS_PROPERTIES: Final[frozenset[str]]
ALLOWED_SVG_PROPERTIES: Final[frozenset[str]]

class CSSSanitizer:
allowed_css_properties: Container[str]
Expand Down
71 changes: 55 additions & 16 deletions stubs/bleach/bleach/html5lib_shim.pyi
Original file line number Diff line number Diff line change
@@ -1,30 +1,69 @@
from _typeshed import Incomplete
from collections.abc import Generator, Iterable, Iterator
import re
from codecs import CodecInfo
from collections.abc import Collection, Generator, Iterable, Iterator
from typing import Any, Final, Protocol

class HTMLParser: # actually html5lib.HTMLParser
def __getattr__(self, __name: str) -> Incomplete: ...
# We don't re-export any `html5lib` types / values here, because they are not
# really public and may change at any time. This is just a helper module,
# import things directly from `html5lib` instead!
from html5lib import HTMLParser
from html5lib._inputstream import HTMLUnicodeInputStream
from html5lib._tokenizer import HTMLTokenizer
from html5lib._trie import Trie
from html5lib.serializer import HTMLSerializer
from html5lib.treewalkers.base import TreeWalker

class Filter: # actually html5lib.filters.base.Filter
source: Incomplete
def __init__(self, source) -> None: ...
def __iter__(self) -> Iterator[Incomplete]: ...
def __getattr__(self, name: str) -> Incomplete: ... # copy attributes from source
# Is actually webencodings.Encoding
class _Encoding(Protocol):
name: str
codec_info: CodecInfo
def __init__(self, name: str, codec_info: CodecInfo) -> None: ...

class SanitizerFilter: # actually html5lib.filters.sanitizer.Filter
def __getattr__(self, __name: str) -> Incomplete: ...
HTML_TAGS: Final[frozenset[str]]
HTML_TAGS_BLOCK_LEVEL: Final[frozenset[str]]
AMP_SPLIT_RE: Final[re.Pattern[str]]
ENTITIES: Final[dict[str, str]]
ENTITIES_TRIE: Final[Trie]
TAG_TOKEN_TYPES: Final[set[int]]
TAG_TOKEN_TYPE_CHARACTERS: Final[int]
TAG_TOKEN_TYPE_END: Final[int]
TAG_TOKEN_TYPE_PARSEERROR: Final[int]
TAG_TOKEN_TYPE_START: Final[int]

class HTMLSerializer: # actually html5lib.serializer.HTMLSerializer
def __getattr__(self, __name: str) -> Incomplete: ...
class InputStreamWithMemory:
position = HTMLUnicodeInputStream.position
def __init__(self, inner_stream: HTMLUnicodeInputStream) -> None: ...
def reset(self) -> None: ...
@property
def errors(self) -> list[str]: ...
@property
def charEncoding(self) -> tuple[_Encoding, str]: ...
# Is a property returning a method, simplified:
def changeEncoding(self, newEncoding: str) -> None: ...
def char(self) -> str: ...
AlexWaygood marked this conversation as resolved.
Show resolved Hide resolved
def charsUntil(self, characters: Collection[str], opposite: bool = False) -> str: ...
def unget(self, char: str | None) -> None: ...
Avasam marked this conversation as resolved.
Show resolved Hide resolved
def get_tag(self) -> str: ...
def start_tag(self) -> None: ...

class BleachHTMLTokenizer(HTMLTokenizer):
consume_entities: bool
stream: InputStreamWithMemory
emitted_last_token: dict[str, Any] | None
def __init__(self, consume_entities: bool = False, **kwargs: Any) -> None: ...

class BleachHTMLParser(HTMLParser):
tags: list[str] | None
strip: bool
consume_entities: bool
def __init__(self, tags: Iterable[str] | None, strip: bool, consume_entities: bool, **kwargs) -> None: ...
def __init__(self, tags: Iterable[str] | None, strip: bool, consume_entities: bool, **kwargs: Any) -> None: ...

class BleachHTMLSerializer(HTMLSerializer):
escape_rcdata: bool
def escape_base_amp(self, stoken: str) -> Generator[str, None, None]: ...
def serialize(self, treewalker, encoding: str | None = None) -> Generator[str, None, None]: ...
def serialize(self, treewalker: TreeWalker, encoding: str | None = None) -> Generator[str, None, None]: ... # type: ignore[override]

def __getattr__(__name: str) -> Incomplete: ...
def convert_entity(value: str) -> str | None: ...
def convert_entities(text: str) -> str: ...
def match_entity(stream: str) -> str | None: ...
def next_possible_entity(text: str) -> Iterator[str]: ...
39 changes: 23 additions & 16 deletions stubs/bleach/bleach/linkifier.pyi
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
from _typeshed import Incomplete
from collections.abc import Container, Iterable, Iterator
from collections.abc import Container, Iterable, Iterator, Sequence
from re import Pattern
from typing import Any, Final
from typing_extensions import TypeAlias

from .callbacks import _Callback
from .html5lib_shim import Filter
from html5lib.filters.base import Filter
from html5lib.treewalkers.base import TreeWalker

DEFAULT_CALLBACKS: list[_Callback]
from .callbacks import _Callback, _HTMLAttrs

TLDS: list[str]
DEFAULT_CALLBACKS: Final[list[_Callback]]
TLDS: Final[list[str]]

def build_url_re(tlds: Iterable[str] = ..., protocols: Iterable[str] = ...) -> Pattern[str]: ...

URL_RE: Pattern[str]
PROTO_RE: Pattern[str]
URL_RE: Final[Pattern[str]]
PROTO_RE: Final[Pattern[str]]

def build_email_re(tlds: Iterable[str] = ...) -> Pattern[str]: ...

EMAIL_RE: Pattern[str]
EMAIL_RE: Final[Pattern[str]]

class Linker:
def __init__(
Expand All @@ -30,6 +33,10 @@ class Linker:
) -> None: ...
def linkify(self, text: str) -> str: ...

# TODO: `_Token` might be converted into `TypedDict`
# or `html5lib` token might be reused
_Token: TypeAlias = dict[str, Any]

class LinkifyFilter(Filter):
callbacks: Iterable[_Callback]
skip_tags: Container[str]
Expand All @@ -38,18 +45,18 @@ class LinkifyFilter(Filter):
email_re: Pattern[str]
def __init__(
self,
source,
source: TreeWalker,
callbacks: Iterable[_Callback] | None = ...,
skip_tags: Container[str] | None = None,
parse_email: bool = False,
url_re: Pattern[str] = ...,
email_re: Pattern[str] = ...,
) -> None: ...
def apply_callbacks(self, attrs, is_new): ...
def extract_character_data(self, token_list): ...
def handle_email_addresses(self, src_iter): ...
def strip_non_url_bits(self, fragment): ...
def handle_links(self, src_iter): ...
def handle_a_tag(self, token_buffer): ...
def extract_entities(self, token): ...
def apply_callbacks(self, attrs: _HTMLAttrs, is_new: bool) -> _HTMLAttrs | None: ...
def extract_character_data(self, token_list: Iterable[_Token]) -> str: ...
def handle_email_addresses(self, src_iter: Iterable[_Token]) -> Iterator[_Token]: ...
def strip_non_url_bits(self, fragment: str) -> tuple[str, str, str]: ...
def handle_links(self, src_iter: Iterable[_Token]) -> Iterator[_Token]: ...
def handle_a_tag(self, token_buffer: Sequence[_Token]) -> Iterator[_Token]: ...
def extract_entities(self, token: _Token) -> Iterator[_Token]: ...
def __iter__(self) -> Iterator[Incomplete]: ...
1 change: 1 addition & 0 deletions stubs/bleach/bleach/parse_shim.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from urllib import parse as parse
50 changes: 27 additions & 23 deletions stubs/bleach/bleach/sanitizer.pyi
Original file line number Diff line number Diff line change
@@ -1,20 +1,27 @@
from _typeshed import Incomplete
from collections.abc import Callable, Iterable
from collections.abc import Callable, Container, Iterable, Iterator
from re import Pattern
from typing import Protocol
from typing import Final, Protocol
from typing_extensions import TypeAlias

from html5lib.filters.base import Filter
from html5lib.filters.sanitizer import Filter as SanitizerFilter
from html5lib.treewalkers.base import TreeWalker

from . import _HTMLAttrKey
from .css_sanitizer import CSSSanitizer
from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer, SanitizerFilter
from .html5lib_shim import BleachHTMLParser, BleachHTMLSerializer
from .linkifier import _Token

ALLOWED_TAGS: Final[frozenset[str]]
ALLOWED_ATTRIBUTES: Final[dict[str, list[str]]]
ALLOWED_PROTOCOLS: Final[frozenset[str]]

ALLOWED_TAGS: frozenset[str]
ALLOWED_ATTRIBUTES: dict[str, list[str]]
ALLOWED_PROTOCOLS: frozenset[str]
INVISIBLE_CHARACTERS: Final[str]
INVISIBLE_CHARACTERS_RE: Final[Pattern[str]]
INVISIBLE_REPLACEMENT_CHAR: Final = "?"

INVISIBLE_CHARACTERS: str
INVISIBLE_CHARACTERS_RE: Pattern[str]
INVISIBLE_REPLACEMENT_CHAR: str
class NoCssSanitizerWarning(UserWarning): ...

# A html5lib Filter class
class _Filter(Protocol):
Expand All @@ -24,18 +31,16 @@ _AttributeFilter: TypeAlias = Callable[[str, str, str], bool]
_AttributeDict: TypeAlias = dict[str, list[str] | _AttributeFilter] | dict[str, list[str]] | dict[str, _AttributeFilter]
_Attributes: TypeAlias = _AttributeFilter | _AttributeDict | list[str]

_TreeWalker: TypeAlias = Callable[[Incomplete], Incomplete]

class Cleaner:
tags: Iterable[str]
attributes: _Attributes
protocols: Iterable[str]
strip: bool
strip_comments: bool
filters: Iterable[_Filter]
filters: Iterable[Filter]
css_sanitizer: CSSSanitizer | None
parser: BleachHTMLParser
walker: _TreeWalker
walker: TreeWalker
serializer: BleachHTMLSerializer
def __init__(
self,
Expand Down Expand Up @@ -63,7 +68,7 @@ class BleachSanitizerFilter(SanitizerFilter):
css_sanitizer: CSSSanitizer | None
def __init__(
self,
source,
source: TreeWalker,
allowed_tags: Iterable[str] = ...,
attributes: _Attributes = ...,
allowed_protocols: Iterable[str] = ...,
Expand All @@ -74,12 +79,11 @@ class BleachSanitizerFilter(SanitizerFilter):
strip_html_comments: bool = True,
css_sanitizer: CSSSanitizer | None = None,
) -> None: ...
def sanitize_stream(self, token_iterator): ...
def merge_characters(self, token_iterator): ...
def __iter__(self): ...
def sanitize_token(self, token): ...
def sanitize_characters(self, token): ...
def sanitize_uri_value(self, value, allowed_protocols): ...
def allow_token(self, token): ...
def disallowed_token(self, token): ...
def sanitize_css(self, style): ...
def sanitize_stream(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
def merge_characters(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
def __iter__(self) -> Iterator[_Token]: ...
def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ...
def sanitize_characters(self, token: _Token) -> _Token | list[_Token]: ...
def sanitize_uri_value(self, value: str, allowed_protocols: Container[str]) -> str | None: ...
def allow_token(self, token: _Token) -> _Token: ...
def disallowed_token(self, token: _Token) -> _Token: ...
15 changes: 11 additions & 4 deletions stubs/html5lib/html5lib/_inputstream.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
from _typeshed import Incomplete, SupportsRead
from typing import Any, overload
from codecs import CodecInfo
from typing import Any, Protocol, overload
from typing_extensions import TypeAlias

# Is actually webencodings.Encoding
class _Encoding(Protocol):
name: str
codec_info: CodecInfo
def __init__(self, name: str, codec_info: CodecInfo) -> None: ...

_UnicodeInputStream: TypeAlias = str | SupportsRead[str]
_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
_InputStream: TypeAlias = _UnicodeInputStream # noqa: Y047 # used in other files
Expand Down Expand Up @@ -42,13 +49,13 @@ def HTMLInputStream(
class HTMLUnicodeInputStream:
reportCharacterErrors: Any
newLines: Any
charEncoding: Any
charEncoding: tuple[_Encoding, str]
dataStream: Any
def __init__(self, source: _UnicodeInputStream) -> None: ...
chunk: str
chunkSize: int
chunkOffset: int
errors: Any
errors: list[str]
prevNumLines: int
prevNumCols: int
def reset(self) -> None: ...
Expand All @@ -70,7 +77,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
same_origin_parent_encoding: Any
likely_encoding: Any
default_encoding: Any
charEncoding: Any
charEncoding: tuple[_Encoding, str]
def __init__(
self,
source: _BinaryInputStream,
Expand Down
Loading