-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* removed bleach, replaced w nh3 * added test * modified tests * move content back to original position * remove repeated declaration * remove django type casting * add a few more tests --------- Co-authored-by: Andy Shapiro <[email protected]>
- Loading branch information
1 parent
cc45b0d
commit 59b6c6b
Showing
12 changed files
with
277 additions
and
63 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
"""Sanitize CSS.""" | ||
|
||
import tinycss2 | ||
|
||
ALLOWED_CSS_PROPERTIES = frozenset( | ||
( | ||
"azimuth", | ||
"background-color", | ||
"border-bottom-color", | ||
"border-collapse", | ||
"border-color", | ||
"border-left-color", | ||
"border-right-color", | ||
"border-top-color", | ||
"clear", | ||
"color", | ||
"cursor", | ||
"direction", | ||
"display", | ||
"elevation", | ||
"float", | ||
"font", | ||
"font-family", | ||
"font-size", | ||
"font-style", | ||
"font-variant", | ||
"font-weight", | ||
"height", | ||
"letter-spacing", | ||
"line-height", | ||
"overflow", | ||
"pause", | ||
"pause-after", | ||
"pause-before", | ||
"pitch", | ||
"pitch-range", | ||
"richness", | ||
"speak", | ||
"speak-header", | ||
"speak-numeral", | ||
"speak-punctuation", | ||
"speech-rate", | ||
"stress", | ||
"text-align", | ||
"text-decoration", | ||
"text-indent", | ||
"unicode-bidi", | ||
"vertical-align", | ||
"voice-family", | ||
"volume", | ||
"white-space", | ||
"width", | ||
) | ||
) | ||
|
||
|
||
ALLOWED_SVG_PROPERTIES = frozenset( | ||
( | ||
"fill", | ||
"fill-opacity", | ||
"fill-rule", | ||
"stroke", | ||
"stroke-width", | ||
"stroke-linecap", | ||
"stroke-linejoin", | ||
"stroke-opacity", | ||
) | ||
) | ||
|
||
|
||
class CSSSanitizer: | ||
""" | ||
Santitize CSS elements. | ||
Adapted from Bleach. | ||
https://github.com/mozilla/bleach/blob/main/bleach/css_sanitizer.py | ||
""" | ||
|
||
def __init__( | ||
self, | ||
allowed_css_properties=ALLOWED_CSS_PROPERTIES, | ||
allowed_svg_properties=ALLOWED_SVG_PROPERTIES, | ||
): | ||
"""Add allowed properties.""" | ||
self.allowed_css_properties = allowed_css_properties | ||
self.allowed_svg_properties = allowed_svg_properties | ||
|
||
def sanitize_css(self, style): | ||
"""Sanitizes css in style tags.""" | ||
parsed = tinycss2.parse_declaration_list(style) | ||
|
||
if not parsed: | ||
return "" | ||
|
||
new_tokens = [] | ||
for token in parsed: | ||
if token.type == "declaration": | ||
if ( | ||
token.lower_name in self.allowed_css_properties | ||
or token.lower_name in self.allowed_svg_properties | ||
): | ||
new_tokens.append(token) | ||
elif ( | ||
token.type in ("comment", "whitespace") | ||
and new_tokens | ||
and new_tokens[-1].type != token.type | ||
): | ||
new_tokens.append(token) | ||
|
||
# NOTE(willkg): We currently don't handle AtRule or ParseError and | ||
# so both get silently thrown out | ||
|
||
if not new_tokens: | ||
return "" | ||
|
||
return tinycss2.serialize(new_tokens).strip() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
"""Validate html.""" | ||
|
||
import nh3 | ||
|
||
from .sanitize_css import CSSSanitizer | ||
|
||
valid_html_tags = { | ||
"a", | ||
"blockquote", | ||
"br", | ||
"div", | ||
"em", | ||
"h1", | ||
"h2", | ||
"h3", | ||
"h4", | ||
"h5", | ||
"li", | ||
"mark", | ||
"ol", | ||
"p", | ||
"span", | ||
"strong", | ||
"sub", | ||
"sup", | ||
"s", | ||
"ul", | ||
"u", | ||
} | ||
|
||
valid_html_attrs = { | ||
"*": {"style"}, | ||
"a": {"class", "href"}, | ||
"span": {"class"}, | ||
"mark": {"class"}, | ||
"div": {"class"}, | ||
} | ||
|
||
valid_css_properties = {"color", "background-color"} | ||
valid_svg_properties = {} | ||
|
||
css_sanitizer = CSSSanitizer( | ||
allowed_css_properties=valid_css_properties, | ||
allowed_svg_properties=valid_svg_properties, | ||
) | ||
|
||
|
||
def clean_html(html: str) -> str: | ||
"""Cleans given HTML by removing invalid HTML tags, attributes, and CSS properties. | ||
Note: inner text within invalid HTML tags will still be included. | ||
Args: | ||
html (str): HTML to clean | ||
Returns: | ||
str: cleaned HTML | ||
""" | ||
|
||
def attribute_filter(element, attribute, value): | ||
"""Send styles to CSS sanitizer.""" | ||
if attribute == "style": | ||
return css_sanitizer.sanitize_css(value) | ||
return value | ||
|
||
return nh3.clean( | ||
html, | ||
tags=valid_html_tags, | ||
attributes=valid_html_attrs, | ||
attribute_filter=attribute_filter, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.