From 8970d002971dd048495a34e12541e2da91f46d16 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 3 May 2022 16:12:50 +0100 Subject: [PATCH 1/5] Improve annotation of `fromstring` `parser` seems like it can be an `HTMLParser. There's code in the wild that does this without obviously being wrong. As far as I can tell from the lxml Cython source, `parser` is a `_Baseparser`. I wasn't sure if the `iterparse` class ought to be allowed here too. I erred on the side of caution. It's possible for this function to return None, if the parser has `recover=True`. I couldn't find a good way to express this without affecting every other call to `fromstring`. (Perhaps one could make the Parser generic over some kind of `Recoverable` indicator type... but that seemed like overkill) Resolves #63. --- lxml-stubs/etree.pyi | 7 +++++-- test-data/test-etree.yml | 10 +++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lxml-stubs/etree.pyi b/lxml-stubs/etree.pyi index 78535e4..4cf8622 100644 --- a/lxml-stubs/etree.pyi +++ b/lxml-stubs/etree.pyi @@ -496,8 +496,11 @@ def parse( source: _FileSource, parser: XMLParser = ..., base_url: _AnyStr = ... ) -> _ElementTree: ... def fromstring( - text: _AnyStr, parser: XMLParser = ..., *, base_url: _AnyStr = ... -) -> _Element: ... + text: _AnyStr, + parser: Union[XMLParser, HTMLParser] = ..., + *, + base_url: _AnyStr = ..., +) -> Optional[_Element]: ... @overload def tostring( element_or_tree: _ElementOrTree, diff --git a/test-data/test-etree.yml b/test-data/test-etree.yml index f0e68ac..4959252 100644 --- a/test-data/test-etree.yml +++ b/test-data/test-etree.yml @@ -3,7 +3,15 @@ main: | from lxml import etree document = etree.fromstring("") - reveal_type(document) # N: Revealed type is "lxml.etree._Element" + reveal_type(document) # N: Revealed type is "Union[lxml.etree._Element, None]" + +- case: etree_from_empty_string_with_parser_recovery_returns_none + disable_cache: true + main: | + from lxml import etree + parser = etree.HTMLParser(recover=True) + document = etree.fromstring("", parser) + reveal_type(document) # N: Revealed type is "Union[lxml.etree._Element, None]" - case: etree_element_find disable_cache: true From fa254a2190a7d01a15b4adf87de974bdc5906576 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 3 May 2022 23:34:42 +0100 Subject: [PATCH 2/5] parse() accepts an HTML parser too --- lxml-stubs/etree.pyi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lxml-stubs/etree.pyi b/lxml-stubs/etree.pyi index 4cf8622..72d5ce3 100644 --- a/lxml-stubs/etree.pyi +++ b/lxml-stubs/etree.pyi @@ -493,7 +493,9 @@ def cleanup_namespaces( keep_ns_prefixes: Optional[Iterable[_AnyStr]] = ..., ) -> None: ... def parse( - source: _FileSource, parser: XMLParser = ..., base_url: _AnyStr = ... + source: _FileSource, + parser: Union[XMLParser, HTMLParser] = ..., + base_url: _AnyStr = ..., ) -> _ElementTree: ... def fromstring( text: _AnyStr, From 07ab5e7b0ec141e62f089e7c8dc20dbe17041203 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 3 May 2022 23:41:04 +0100 Subject: [PATCH 3/5] If no parse is specified, always return _Element This isn't strictly true: one can apparently change the default parser. But it's a nice refinement. --- lxml-stubs/etree.pyi | 8 ++++++++ test-data/test-etree.yml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lxml-stubs/etree.pyi b/lxml-stubs/etree.pyi index 72d5ce3..01fd0c8 100644 --- a/lxml-stubs/etree.pyi +++ b/lxml-stubs/etree.pyi @@ -497,6 +497,14 @@ def parse( parser: Union[XMLParser, HTMLParser] = ..., base_url: _AnyStr = ..., ) -> _ElementTree: ... +@overload +def fromstring( + text: _AnyStr, + parser: None = ..., + *, + base_url: _AnyStr = ..., +) -> _Element: ... +@overload def fromstring( text: _AnyStr, parser: Union[XMLParser, HTMLParser] = ..., diff --git a/test-data/test-etree.yml b/test-data/test-etree.yml index 4959252..c4a8b17 100644 --- a/test-data/test-etree.yml +++ b/test-data/test-etree.yml @@ -3,7 +3,7 @@ main: | from lxml import etree document = etree.fromstring("") - reveal_type(document) # N: Revealed type is "Union[lxml.etree._Element, None]" + reveal_type(document) # N: Revealed type is "lxml.etree._Element" - case: etree_from_empty_string_with_parser_recovery_returns_none disable_cache: true From 547e325ec72a6611a0fed3f43e95528fe176d3c1 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 3 May 2022 23:52:40 +0100 Subject: [PATCH 4/5] Use `_Element | Any` instead of `_Element | None` so as to not burden existing users with having to check the None-ness of `fromstring()` return values --- lxml-stubs/etree.pyi | 2 +- test-data/test-etree.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lxml-stubs/etree.pyi b/lxml-stubs/etree.pyi index 01fd0c8..844a42b 100644 --- a/lxml-stubs/etree.pyi +++ b/lxml-stubs/etree.pyi @@ -510,7 +510,7 @@ def fromstring( parser: Union[XMLParser, HTMLParser] = ..., *, base_url: _AnyStr = ..., -) -> Optional[_Element]: ... +) -> Union[_Element, Any]: ... @overload def tostring( element_or_tree: _ElementOrTree, diff --git a/test-data/test-etree.yml b/test-data/test-etree.yml index c4a8b17..6c9bb2a 100644 --- a/test-data/test-etree.yml +++ b/test-data/test-etree.yml @@ -11,7 +11,7 @@ from lxml import etree parser = etree.HTMLParser(recover=True) document = etree.fromstring("", parser) - reveal_type(document) # N: Revealed type is "Union[lxml.etree._Element, None]" + reveal_type(document) # N: Revealed type is "Union[lxml.etree._Element, Any]" - case: etree_element_find disable_cache: true From 3aca181f94a295ec5d5f6aa2f1534fdaa7d84f3c Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 6 May 2022 10:27:39 +0100 Subject: [PATCH 5/5] `parse` should return `_ElementTree | Any`. Co-authored-by: scoder --- lxml-stubs/etree.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxml-stubs/etree.pyi b/lxml-stubs/etree.pyi index 844a42b..eca3cc4 100644 --- a/lxml-stubs/etree.pyi +++ b/lxml-stubs/etree.pyi @@ -496,7 +496,7 @@ def parse( source: _FileSource, parser: Union[XMLParser, HTMLParser] = ..., base_url: _AnyStr = ..., -) -> _ElementTree: ... +) -> Union[_ElementTree, Any]: ... @overload def fromstring( text: _AnyStr,