Skip to content

Commit

Permalink
refactor: adjust sanitizing and style rules (#22)
Browse files Browse the repository at this point in the history
* refactor: adjust styling by updating rules for sanitizing

* refactor: remove maple support

* fix: sanitize html even if allow_javascript is true

* chore: remove nutmeg workflow and bump version
  • Loading branch information
navinkarkera authored Feb 1, 2023
1 parent 4b7828b commit 0e653d7
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 134 deletions.
40 changes: 0 additions & 40 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,22 +41,6 @@ jobs:
name: Running tests
command: make unit-coverage

test_nutmeg:
<<: *DEFAULT
steps:
- checkout
- attach_workspace:
at: /tmp/workspace
- run:
name: Activate virtualenv
command: echo 'source /tmp/workspace/env/bin/activate' >> $BASH_ENV
- run:
name: Install bleach 5
command: pip install bleach[css]==5.0.0
- run:
name: Running tests
command: make unit-coverage

quality:
<<: *DEFAULT
steps:
Expand All @@ -70,22 +54,6 @@ jobs:
name: Test Quality
command: make quality

quality_nutmeg:
<<: *DEFAULT
steps:
- checkout
- attach_workspace:
at: /tmp/workspace
- run:
name: Activate virtualenv
command: echo 'source /tmp/workspace/env/bin/activate' >> $BASH_ENV
- run:
name: Install bleach 5
command: pip install bleach[css]==5.0.0
- run:
name: Test Quality
command: make quality

workflows:
version: 2
build-test:
Expand All @@ -97,11 +65,3 @@ workflows:
- quality:
requires:
- build
- test_nutmeg:
requires:
- test
- quality
- quality_nutmeg:
requires:
- test
- quality
171 changes: 94 additions & 77 deletions html_xblock/bleaching.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,7 @@
A new HTML XBlock that is designed with security and embedding in mind.
"""
import bleach

try:
from bleach.css_sanitizer import CSSSanitizer
except (ImportError, ModuleNotFoundError):
# NOTE:
# The bleach library changes the way CSS Styles are cleaned in
# version 5.0.0. Since the edx-platform uses version 4.1.0 in
# Maple, this import is handled within a try block.
# This try block CAN BE REMOVED for Nutmeg
CSSSanitizer = None
from bleach.css_sanitizer import CSSSanitizer


class SanitizedText: # pylint: disable=too-few-public-methods
Expand All @@ -20,19 +11,18 @@ class SanitizedText: # pylint: disable=too-few-public-methods
It returns a safe value of the passed text and an unsafe value if requested.
"""

def __init__(self, value, strict=True):
def __init__(self, value, allow_javascript=False):
"""
This initializer takes a raw value that may contain unsafe content and produce a cleaned version of it.
It's very helpful to maintain both versions of the content if we need to use it later as a Database field or so.
:param value: The original string value that came from DB.
:param strict: Whether to strictly process the given text or not.
:param allow_javascript: Whether to allow javascript via script tag.
"""
self.strict = strict
self.allow_javascript = allow_javascript
self.cleaner = self.get_cleaner()

self.adulterated_value = value
self.sanitized_value = self.cleaner.clean(value)
self.value = self.sanitized_value if self.strict else self.adulterated_value
self.value = self.cleaner.clean(value)

def get_cleaner(self):
"""
Expand All @@ -41,115 +31,142 @@ def get_cleaner(self):
It does so by redefining the safe values we're currently using and
considering safe in the platform.
"""
if CSSSanitizer:
# pylint: disable-next=unexpected-keyword-arg
cleaner = bleach.Cleaner(
tags=self._get_allowed_tags(),
attributes=self._get_allowed_attributes(),
css_sanitizer=CSSSanitizer(
allowed_css_properties=self._get_allowed_styles()
)
)
else:
# NOTE: This is maintaining backward compatibility with bleach 4.1.0
# used in Maple release of edx-platform. This can be removed
# for Nutmeg release which uses bleach 5.0.0
# pylint: disable-next=unexpected-keyword-arg
cleaner = bleach.Cleaner(
tags=self._get_allowed_tags(),
attributes=self._get_allowed_attributes(),
styles=self._get_allowed_styles()
# pylint: disable-next=unexpected-keyword-arg
cleaner = bleach.Cleaner(
tags=self._get_allowed_tags(),
attributes=self._get_allowed_attributes(),
css_sanitizer=CSSSanitizer(
allowed_css_properties=self._get_allowed_styles()
)

)
return cleaner

def _get_allowed_tags(self):
"""
This is an override to the original bleaching cleaner ALLOWED_TAGS.
It deals with two bleaching modes: the strict mode, and the trusted mode.
:return: Allowed tags depending on the bleaching mode
"""
tags = [
'a',
'b',
'blockquote',
'code',
'em',
tags = bleach.ALLOWED_TAGS + [
'br',
'caption',
'dd',
'del',
'div',
'dl',
'dt',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'i',
'hr',
'img',
'li',
'span',
'strong',
'pre',
'ol',
'ul',
'p',
'pre',
'caption',
's',
'strike',
'span',
'sub',
'sup',
'table',
'thead',
'tbody',
'td',
'tfoot',
'th',
'thead',
'tr',
'td'
'u',
'iframe',
]

if not self.strict:
tags += ['h1', 'h2', 'script', 'sub', 'sup', 'div', 'abbr', 'iframe']
if self.allow_javascript:
tags += ['script']

return tags

def _get_allowed_attributes(self):
"""
This is an override to the original bleaching cleaner ALLOWED_ATTRIBUTES.
It deals with two bleaching modes, the strict mode, and the trusted mode.
:return: Allowed attributes depending on the bleaching mode
"""
attributes = {
'*': ['class', 'style', 'id'],
'a': ['href', 'title', 'target', 'rel'],
'img': ['src', 'alt', 'width', 'height'],
'p': ['style'],
'pre': ['class'],
'span': ['style'],
'ul': [],
'abbr': ['title'],
'acronym': ['title'],
'audio': ['controls', 'autobuffer', 'autoplay', 'src'],
'img': ['src', 'alt', 'title', 'width', 'height'],
'table': ['class', 'style', 'border', 'cellspacing', 'cellpadding'],
'tr': ['style'],
'td': ['style', 'scope'],
}

if not self.strict:
attributes.update({'abbr': ['title']})
attributes['ul'].append('style')
attributes['img'].append('style')

return attributes

def _get_allowed_styles(self):
"""
This is an override to the original bleaching cleaner ALLOWED_STYLES.
It deals with two bleaching modes, the strict mode, and the trusted mode.
:return: Allowed styles depending on the bleaching mode
"""
styles = [
'background-color', 'border', 'border-collapse', 'border-color', 'border-style', 'color', 'font-family',
'height', 'margin-left', 'margin-right', 'padding-left', 'padding-right', 'text-align', 'text-decoration',
'vertical-align', 'width',
"azimuth",
"background-color",
"border-bottom-color",
"border-collapse",
"border-color",
"border-left-color",
"border-right-color",
"border-top-color",
"clear",
"color",
"cursor",
"direction",
"display",
"elevation",
"float",
"font",
"font-family",
"font-size",
"font-style",
"font-variant",
"font-weight",
"height",
"letter-spacing",
"line-height",
"overflow",
"pause",
"pause-after",
"pause-before",
"pitch",
"pitch-range",
"richness",
"speak",
"speak-header",
"speak-numeral",
"speak-punctuation",
"speech-rate",
"stress",
"text-align",
"text-decoration",
"text-indent",
"unicode-bidi",
"vertical-align",
"voice-family",
"volume",
"white-space",
"width",
'border',
'border-style',
'margin-left',
'margin-right',
'padding-left',
'padding-right',
]

if not self.strict:
styles += ['list-style-type', 'font-size', 'border-width', 'margin']

return styles

def _determine_values(self, other):
Expand All @@ -163,7 +180,7 @@ def _determine_values(self, other):
:return: A tuple of values to be compared.
"""
if isinstance(other, str):
self_value = self.sanitized_value
self_value = self.value
other_value = other
elif isinstance(other, type(self)):
self_value = self.adulterated_value
Expand All @@ -177,13 +194,13 @@ def _determine_values(self, other):

def __str__(self):
"""
:return: The value of the text depending on the strictness level.
:return: The value of the text.
"""
return self.value

def __unicode__(self):
"""
:return: The value of the text depending on the strictness level.
:return: The value of the text.
"""
return self.value

Expand Down
16 changes: 3 additions & 13 deletions html_xblock/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,25 +228,15 @@ def substitute_keywords(self):

return data

@property
def sanitized_html(self):
"""
A property that returns a sanitized text field of the existing data object.
"""
data = self.substitute_keywords()
html = SanitizedText(data)
return html.value

@property
def html(self):
"""
A property that returns this module content data, according to `allow_javascript`.
I.E: Sanitized data if it's true or plain data if it's false.
"""
if self.allow_javascript:
data = self.substitute_keywords()
return data
return self.sanitized_html
data = self.substitute_keywords()
html = SanitizedText(data, allow_javascript=self.allow_javascript)
return html

def get_editable_fields(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion html_xblock/static/html/lms.html
Original file line number Diff line number Diff line change
@@ -1 +1 @@
<div class="html5_xblock">{{ self.html | safe }}</div>
<div class="html5_xblock xmodule_display xmodule_HtmlBlock">{{ self.html | safe }}</div>
3 changes: 1 addition & 2 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,5 @@

xblock-utils==2.2.0
edx-i18n-tools==0.9.1
bleach==4.1.0 # version pinned for Maple
# bleach[css]==5.0.0 # Use this for Nutmeg
bleach[css]==5.0.0
django==3.2.13
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def package_data(pkg, roots):

setup(
name='html-xblock',
version='1.2.2',
version='1.3.0',
description='HTML XBlock will help creating and using a secure and easy-to-use HTML blocks',
license='AGPL v3',
packages=[
Expand Down

0 comments on commit 0e653d7

Please sign in to comment.