diff --git a/.circleci/config.yml b/.circleci/config.yml index a080382..0b7f2b4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -41,22 +41,6 @@ jobs: name: Running tests command: make unit-coverage - test_nutmeg: - <<: *DEFAULT - steps: - - checkout - - attach_workspace: - at: /tmp/workspace - - run: - name: Activate virtualenv - command: echo 'source /tmp/workspace/env/bin/activate' >> $BASH_ENV - - run: - name: Install bleach 5 - command: pip install bleach[css]==5.0.0 - - run: - name: Running tests - command: make unit-coverage - quality: <<: *DEFAULT steps: @@ -70,22 +54,6 @@ jobs: name: Test Quality command: make quality - quality_nutmeg: - <<: *DEFAULT - steps: - - checkout - - attach_workspace: - at: /tmp/workspace - - run: - name: Activate virtualenv - command: echo 'source /tmp/workspace/env/bin/activate' >> $BASH_ENV - - run: - name: Install bleach 5 - command: pip install bleach[css]==5.0.0 - - run: - name: Test Quality - command: make quality - workflows: version: 2 build-test: @@ -97,11 +65,3 @@ workflows: - quality: requires: - build - - test_nutmeg: - requires: - - test - - quality - - quality_nutmeg: - requires: - - test - - quality diff --git a/html_xblock/bleaching.py b/html_xblock/bleaching.py index 520e04a..6ffc8c0 100644 --- a/html_xblock/bleaching.py +++ b/html_xblock/bleaching.py @@ -2,16 +2,7 @@ A new HTML XBlock that is designed with security and embedding in mind. """ import bleach - -try: - from bleach.css_sanitizer import CSSSanitizer -except (ImportError, ModuleNotFoundError): - # NOTE: - # The bleach library changes the way CSS Styles are cleaned in - # version 5.0.0. Since the edx-platform uses version 4.1.0 in - # Maple, this import is handled within a try block. - # This try block CAN BE REMOVED for Nutmeg - CSSSanitizer = None +from bleach.css_sanitizer import CSSSanitizer class SanitizedText: # pylint: disable=too-few-public-methods @@ -20,19 +11,18 @@ class SanitizedText: # pylint: disable=too-few-public-methods It returns a safe value of the passed text and an unsafe value if requested. """ - def __init__(self, value, strict=True): + def __init__(self, value, allow_javascript=False): """ This initializer takes a raw value that may contain unsafe content and produce a cleaned version of it. It's very helpful to maintain both versions of the content if we need to use it later as a Database field or so. :param value: The original string value that came from DB. - :param strict: Whether to strictly process the given text or not. + :param allow_javascript: Whether to allow javascript via script tag. """ - self.strict = strict + self.allow_javascript = allow_javascript self.cleaner = self.get_cleaner() self.adulterated_value = value - self.sanitized_value = self.cleaner.clean(value) - self.value = self.sanitized_value if self.strict else self.adulterated_value + self.value = self.cleaner.clean(value) def get_cleaner(self): """ @@ -41,68 +31,58 @@ def get_cleaner(self): It does so by redefining the safe values we're currently using and considering safe in the platform. """ - if CSSSanitizer: - # pylint: disable-next=unexpected-keyword-arg - cleaner = bleach.Cleaner( - tags=self._get_allowed_tags(), - attributes=self._get_allowed_attributes(), - css_sanitizer=CSSSanitizer( - allowed_css_properties=self._get_allowed_styles() - ) - ) - else: - # NOTE: This is maintaining backward compatibility with bleach 4.1.0 - # used in Maple release of edx-platform. This can be removed - # for Nutmeg release which uses bleach 5.0.0 - # pylint: disable-next=unexpected-keyword-arg - cleaner = bleach.Cleaner( - tags=self._get_allowed_tags(), - attributes=self._get_allowed_attributes(), - styles=self._get_allowed_styles() + # pylint: disable-next=unexpected-keyword-arg + cleaner = bleach.Cleaner( + tags=self._get_allowed_tags(), + attributes=self._get_allowed_attributes(), + css_sanitizer=CSSSanitizer( + allowed_css_properties=self._get_allowed_styles() ) - + ) return cleaner def _get_allowed_tags(self): """ This is an override to the original bleaching cleaner ALLOWED_TAGS. - It deals with two bleaching modes: the strict mode, and the trusted mode. - :return: Allowed tags depending on the bleaching mode """ - tags = [ - 'a', - 'b', - 'blockquote', - 'code', - 'em', + tags = bleach.ALLOWED_TAGS + [ + 'br', + 'caption', + 'dd', + 'del', + 'div', + 'dl', + 'dt', + 'h1', + 'h2', 'h3', 'h4', 'h5', 'h6', - 'i', + 'hr', 'img', - 'li', - 'span', - 'strong', - 'pre', - 'ol', - 'ul', 'p', 'pre', - 'caption', + 's', + 'strike', + 'span', + 'sub', + 'sup', 'table', - 'thead', 'tbody', + 'td', 'tfoot', 'th', + 'thead', 'tr', - 'td' + 'u', + 'iframe', ] - if not self.strict: - tags += ['h1', 'h2', 'script', 'sub', 'sup', 'div', 'abbr', 'iframe'] + if self.allow_javascript: + tags += ['script'] return tags @@ -110,46 +90,83 @@ def _get_allowed_attributes(self): """ This is an override to the original bleaching cleaner ALLOWED_ATTRIBUTES. - It deals with two bleaching modes, the strict mode, and the trusted mode. - :return: Allowed attributes depending on the bleaching mode """ attributes = { + '*': ['class', 'style', 'id'], 'a': ['href', 'title', 'target', 'rel'], - 'img': ['src', 'alt', 'width', 'height'], - 'p': ['style'], - 'pre': ['class'], - 'span': ['style'], - 'ul': [], + 'abbr': ['title'], + 'acronym': ['title'], + 'audio': ['controls', 'autobuffer', 'autoplay', 'src'], + 'img': ['src', 'alt', 'title', 'width', 'height'], 'table': ['class', 'style', 'border', 'cellspacing', 'cellpadding'], 'tr': ['style'], 'td': ['style', 'scope'], } - if not self.strict: - attributes.update({'abbr': ['title']}) - attributes['ul'].append('style') - attributes['img'].append('style') - return attributes def _get_allowed_styles(self): """ This is an override to the original bleaching cleaner ALLOWED_STYLES. - It deals with two bleaching modes, the strict mode, and the trusted mode. - :return: Allowed styles depending on the bleaching mode """ styles = [ - 'background-color', 'border', 'border-collapse', 'border-color', 'border-style', 'color', 'font-family', - 'height', 'margin-left', 'margin-right', 'padding-left', 'padding-right', 'text-align', 'text-decoration', - 'vertical-align', 'width', + "azimuth", + "background-color", + "border-bottom-color", + "border-collapse", + "border-color", + "border-left-color", + "border-right-color", + "border-top-color", + "clear", + "color", + "cursor", + "direction", + "display", + "elevation", + "float", + "font", + "font-family", + "font-size", + "font-style", + "font-variant", + "font-weight", + "height", + "letter-spacing", + "line-height", + "overflow", + "pause", + "pause-after", + "pause-before", + "pitch", + "pitch-range", + "richness", + "speak", + "speak-header", + "speak-numeral", + "speak-punctuation", + "speech-rate", + "stress", + "text-align", + "text-decoration", + "text-indent", + "unicode-bidi", + "vertical-align", + "voice-family", + "volume", + "white-space", + "width", + 'border', + 'border-style', + 'margin-left', + 'margin-right', + 'padding-left', + 'padding-right', ] - if not self.strict: - styles += ['list-style-type', 'font-size', 'border-width', 'margin'] - return styles def _determine_values(self, other): @@ -163,7 +180,7 @@ def _determine_values(self, other): :return: A tuple of values to be compared. """ if isinstance(other, str): - self_value = self.sanitized_value + self_value = self.value other_value = other elif isinstance(other, type(self)): self_value = self.adulterated_value @@ -177,13 +194,13 @@ def _determine_values(self, other): def __str__(self): """ - :return: The value of the text depending on the strictness level. + :return: The value of the text. """ return self.value def __unicode__(self): """ - :return: The value of the text depending on the strictness level. + :return: The value of the text. """ return self.value diff --git a/html_xblock/html.py b/html_xblock/html.py index e081bf2..bf6c8d3 100644 --- a/html_xblock/html.py +++ b/html_xblock/html.py @@ -228,25 +228,15 @@ def substitute_keywords(self): return data - @property - def sanitized_html(self): - """ - A property that returns a sanitized text field of the existing data object. - """ - data = self.substitute_keywords() - html = SanitizedText(data) - return html.value - @property def html(self): """ A property that returns this module content data, according to `allow_javascript`. I.E: Sanitized data if it's true or plain data if it's false. """ - if self.allow_javascript: - data = self.substitute_keywords() - return data - return self.sanitized_html + data = self.substitute_keywords() + html = SanitizedText(data, allow_javascript=self.allow_javascript) + return html def get_editable_fields(self): """ diff --git a/html_xblock/static/html/lms.html b/html_xblock/static/html/lms.html index abe8ee9..11d4c10 100644 --- a/html_xblock/static/html/lms.html +++ b/html_xblock/static/html/lms.html @@ -1 +1 @@ -