refactor: adjust sanitizing and style rules (#22)

* refactor: adjust styling by updating rules for sanitizing * refactor: remove maple support * fix: sanitize html even if allow_javascript is true * chore: remove nutmeg workflow and bump version
open-craft · Feb 1, 2023 · 0e653d7 · 0e653d7
1 parent 4b7828b
commit 0e653d7
Show file tree

Hide file tree

Showing 6 changed files with 100 additions and 134 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -41,22 +41,6 @@ jobs:
           name: Running tests
           command: make unit-coverage
 
-  test_nutmeg:
-    <<: *DEFAULT
-    steps:
-      - checkout
-      - attach_workspace:
-          at: /tmp/workspace
-      - run:
-          name: Activate virtualenv
-          command: echo 'source /tmp/workspace/env/bin/activate' >> $BASH_ENV
-      - run:
-          name: Install bleach 5
-          command: pip install bleach[css]==5.0.0
-      - run:
-          name: Running tests
-          command: make unit-coverage
-
   quality:
     <<: *DEFAULT
     steps:
@@ -70,22 +54,6 @@ jobs:
           name: Test Quality
           command: make quality
 
-  quality_nutmeg:
-    <<: *DEFAULT
-    steps:
-      - checkout
-      - attach_workspace:
-          at: /tmp/workspace
-      - run:
-          name: Activate virtualenv
-          command: echo 'source /tmp/workspace/env/bin/activate' >> $BASH_ENV
-      - run:
-          name: Install bleach 5
-          command: pip install bleach[css]==5.0.0
-      - run:
-          name: Test Quality
-          command: make quality
-
 workflows:
   version: 2
   build-test:
@@ -97,11 +65,3 @@ workflows:
       - quality:
           requires:
             -  build
-      - test_nutmeg:
-          requires:
-            - test
-            - quality
-      - quality_nutmeg:
-          requires:
-            - test
-            - quality
diff --git a/html_xblock/bleaching.py b/html_xblock/bleaching.py
@@ -2,16 +2,7 @@
 A new HTML XBlock that is designed with security and embedding in mind.
 """
 import bleach
-
-try:
-    from bleach.css_sanitizer import CSSSanitizer
-except (ImportError, ModuleNotFoundError):
-    # NOTE:
-    # The bleach library changes the way CSS Styles are cleaned in
-    # version 5.0.0. Since the edx-platform uses version 4.1.0 in
-    # Maple, this import is handled within a try block.
-    # This try block CAN BE REMOVED for Nutmeg
-    CSSSanitizer = None
+from bleach.css_sanitizer import CSSSanitizer
 
 
 class SanitizedText:  # pylint: disable=too-few-public-methods
@@ -20,19 +11,18 @@ class SanitizedText:  # pylint: disable=too-few-public-methods
     It returns a safe value of the passed text and an unsafe value if requested.
     """
 
-    def __init__(self, value, strict=True):
+    def __init__(self, value, allow_javascript=False):
         """
         This initializer takes a raw value that may contain unsafe content and produce a cleaned version of it.
         It's very helpful to maintain both versions of the content if we need to use it later as a Database field or so.
         :param value: The original string value that came from DB.
-        :param strict: Whether to strictly process the given text or not.
+        :param allow_javascript: Whether to allow javascript via script tag.
         """
-        self.strict = strict
+        self.allow_javascript = allow_javascript
         self.cleaner = self.get_cleaner()
 
         self.adulterated_value = value
-        self.sanitized_value = self.cleaner.clean(value)
-        self.value = self.sanitized_value if self.strict else self.adulterated_value
+        self.value = self.cleaner.clean(value)
 
     def get_cleaner(self):
         """
@@ -41,115 +31,142 @@ def get_cleaner(self):
         It does so by redefining the safe values we're currently using and
         considering safe in the platform.
         """
-        if CSSSanitizer:
-            # pylint: disable-next=unexpected-keyword-arg
-            cleaner = bleach.Cleaner(
-                tags=self._get_allowed_tags(),
-                attributes=self._get_allowed_attributes(),
-                css_sanitizer=CSSSanitizer(
-                    allowed_css_properties=self._get_allowed_styles()
-                )
-            )
-        else:
-            # NOTE: This is maintaining backward compatibility with bleach 4.1.0
-            # used in Maple release of edx-platform. This can be removed
-            # for Nutmeg release which uses bleach 5.0.0
-            # pylint: disable-next=unexpected-keyword-arg
-            cleaner = bleach.Cleaner(
-                tags=self._get_allowed_tags(),
-                attributes=self._get_allowed_attributes(),
-                styles=self._get_allowed_styles()
+        # pylint: disable-next=unexpected-keyword-arg
+        cleaner = bleach.Cleaner(
+            tags=self._get_allowed_tags(),
+            attributes=self._get_allowed_attributes(),
+            css_sanitizer=CSSSanitizer(
+                allowed_css_properties=self._get_allowed_styles()
             )
-
+        )
         return cleaner
 
     def _get_allowed_tags(self):
         """
         This is an override to the original bleaching cleaner ALLOWED_TAGS.
 
-        It deals with two bleaching modes: the strict mode, and the trusted mode.
-
         :return: Allowed tags depending on the bleaching mode
         """
-        tags = [
-            'a',
-            'b',
-            'blockquote',
-            'code',
-            'em',
+        tags = bleach.ALLOWED_TAGS + [
+            'br',
+            'caption',
+            'dd',
+            'del',
+            'div',
+            'dl',
+            'dt',
+            'h1',
+            'h2',
             'h3',
             'h4',
             'h5',
             'h6',
-            'i',
+            'hr',
             'img',
-            'li',
-            'span',
-            'strong',
-            'pre',
-            'ol',
-            'ul',
             'p',
             'pre',
-            'caption',
+            's',
+            'strike',
+            'span',
+            'sub',
+            'sup',
             'table',
-            'thead',
             'tbody',
+            'td',
             'tfoot',
             'th',
+            'thead',
             'tr',
-            'td'
+            'u',
+            'iframe',
         ]
 
-        if not self.strict:
-            tags += ['h1', 'h2', 'script', 'sub', 'sup', 'div', 'abbr', 'iframe']
+        if self.allow_javascript:
+            tags += ['script']
 
         return tags
 
     def _get_allowed_attributes(self):
         """
         This is an override to the original bleaching cleaner ALLOWED_ATTRIBUTES.
 
-        It deals with two bleaching modes, the strict mode, and the trusted mode.
-
         :return: Allowed attributes depending on the bleaching mode
         """
         attributes = {
+            '*': ['class', 'style', 'id'],
             'a': ['href', 'title', 'target', 'rel'],
-            'img': ['src', 'alt', 'width', 'height'],
-            'p': ['style'],
-            'pre': ['class'],
-            'span': ['style'],
-            'ul': [],
+            'abbr': ['title'],
+            'acronym': ['title'],
+            'audio': ['controls', 'autobuffer', 'autoplay', 'src'],
+            'img': ['src', 'alt', 'title', 'width', 'height'],
             'table': ['class', 'style', 'border', 'cellspacing', 'cellpadding'],
             'tr': ['style'],
             'td': ['style', 'scope'],
         }
 
-        if not self.strict:
-            attributes.update({'abbr': ['title']})
-            attributes['ul'].append('style')
-            attributes['img'].append('style')
-
         return attributes
 
     def _get_allowed_styles(self):
         """
         This is an override to the original bleaching cleaner ALLOWED_STYLES.
 
-        It deals with two bleaching modes, the strict mode, and the trusted mode.
-
         :return: Allowed styles depending on the bleaching mode
         """
         styles = [
-            'background-color', 'border', 'border-collapse', 'border-color', 'border-style', 'color', 'font-family',
-            'height', 'margin-left', 'margin-right', 'padding-left', 'padding-right', 'text-align', 'text-decoration',
-            'vertical-align', 'width',
+            "azimuth",
+            "background-color",
+            "border-bottom-color",
+            "border-collapse",
+            "border-color",
+            "border-left-color",
+            "border-right-color",
+            "border-top-color",
+            "clear",
+            "color",
+            "cursor",
+            "direction",
+            "display",
+            "elevation",
+            "float",
+            "font",
+            "font-family",
+            "font-size",
+            "font-style",
+            "font-variant",
+            "font-weight",
+            "height",
+            "letter-spacing",
+            "line-height",
+            "overflow",
+            "pause",
+            "pause-after",
+            "pause-before",
+            "pitch",
+            "pitch-range",
+            "richness",
+            "speak",
+            "speak-header",
+            "speak-numeral",
+            "speak-punctuation",
+            "speech-rate",
+            "stress",
+            "text-align",
+            "text-decoration",
+            "text-indent",
+            "unicode-bidi",
+            "vertical-align",
+            "voice-family",
+            "volume",
+            "white-space",
+            "width",
+            'border',
+            'border-style',
+            'margin-left',
+            'margin-right',
+            'padding-left',
+            'padding-right',
         ]
 
-        if not self.strict:
-            styles += ['list-style-type', 'font-size', 'border-width', 'margin']
-
         return styles
 
     def _determine_values(self, other):
@@ -163,7 +180,7 @@ def _determine_values(self, other):
         :return: A tuple of values to be compared.
         """
         if isinstance(other, str):
-            self_value = self.sanitized_value
+            self_value = self.value
             other_value = other
         elif isinstance(other, type(self)):
             self_value = self.adulterated_value
@@ -177,13 +194,13 @@ def _determine_values(self, other):
 
     def __str__(self):
         """
-        :return: The value of the text depending on the strictness level.
+        :return: The value of the text.
         """
         return self.value
 
     def __unicode__(self):
         """
-        :return: The value of the text depending on the strictness level.
+        :return: The value of the text.
         """
         return self.value
 

diff --git a/html_xblock/html.py b/html_xblock/html.py
@@ -228,25 +228,15 @@ def substitute_keywords(self):
 
         return data
 
-    @property
-    def sanitized_html(self):
-        """
-        A property that returns a sanitized text field of the existing data object.
-        """
-        data = self.substitute_keywords()
-        html = SanitizedText(data)
-        return html.value
-
     @property
     def html(self):
         """
         A property that returns this module content data, according to `allow_javascript`.
         I.E: Sanitized data if it's true or plain data if it's false.
         """
-        if self.allow_javascript:
-            data = self.substitute_keywords()
-            return data
-        return self.sanitized_html
+        data = self.substitute_keywords()
+        html = SanitizedText(data, allow_javascript=self.allow_javascript)
+        return html
 
     def get_editable_fields(self):
         """

diff --git a/html_xblock/static/html/lms.html b/html_xblock/static/html/lms.html
@@ -1 +1 @@
-<div class="html5_xblock">{{ self.html | safe }}</div>
+<div class="html5_xblock xmodule_display xmodule_HtmlBlock">{{ self.html | safe }}</div>
diff --git a/requirements/base.txt b/requirements/base.txt
@@ -2,6 +2,5 @@
 
 xblock-utils==2.2.0
 edx-i18n-tools==0.9.1
-bleach==4.1.0  # version pinned for Maple
-# bleach[css]==5.0.0  # Use this for Nutmeg
+bleach[css]==5.0.0
 django==3.2.13
diff --git a/setup.py b/setup.py
@@ -23,7 +23,7 @@ def package_data(pkg, roots):
 
 setup(
     name='html-xblock',
-    version='1.2.2',
+    version='1.3.0',
     description='HTML XBlock will help creating and using a secure and easy-to-use HTML blocks',
     license='AGPL v3',
     packages=[
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		<div class="html5_xblock">{{ self.html \| safe }}</div>
		<div class="html5_xblock xmodule_display xmodule_HtmlBlock">{{ self.html \| safe }}</div>