From 347d371a7f52832726bf3d6a09c615d087016ec2 Mon Sep 17 00:00:00 2001
From: Santos Gallegos <stsewd@protonmail.com>
Date: Thu, 4 Mar 2021 20:16:13 -0500
Subject: [PATCH] Embed: replace pyquery with selectolax

Don't introduce a new dep.
MkDocs doesn't work since it doesn't have fjson files,
I'm deleting the test for mkdocs for now,
I'll add support for MkDocs once we have the parsing done in a more
general way (soon!).
---
 .../embed/tests/data/mkdocs/latest/index.json |   6 -
 readthedocs/embed/tests/test_api.py           |  63 ++--------
 readthedocs/embed/utils.py                    |  20 ++-
 readthedocs/embed/views.py                    | 114 ++++++------------
 requirements/pip.txt                          |   3 -
 5 files changed, 65 insertions(+), 141 deletions(-)
 delete mode 100644 readthedocs/embed/tests/data/mkdocs/latest/index.json
diff --git a/readthedocs/embed/tests/data/mkdocs/latest/index.json b/readthedocs/embed/tests/data/mkdocs/latest/index.json
deleted file mode 100644
index 829508f65b4..00000000000
--- a/readthedocs/embed/tests/data/mkdocs/latest/index.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "content": "<h1 id=\"mkdocs\">MkDocs<a class=\"headerlink\" href=\"#mkdocs\" title=\"Permanent link\">\uf0c1</a></h1>\n<p>Project documentation with&nbsp;Markdown.</p>\n<hr />\n<h2 id=\"overview\">Overview<a class=\"headerlink\" href=\"#overview\" title=\"Permanent link\">\uf0c1</a></h2>\n<p>MkDocs is a <strong>fast</strong>, <strong>simple</strong> and <strong>downright gorgeous</strong> static site\ngenerator that's geared towards building project documentation. Documentation\nsource files are written in Markdown, and configured with a single YAML\nconfiguration file.</p>\n<h3 id=\"host-anywhere\">Host anywhere<a class=\"headerlink\" href=\"#host-anywhere\" title=\"Permanent link\">\uf0c1</a></h3>\n<p>MkDocs builds completely static HTML sites that you can host on GitHub pages,\nAmazon S3, or <a href=\"./user-guide/deploying-your-docs/\">anywhere</a> else you choose.</p>\n<h3 id=\"great-themes-available\">Great themes available<a class=\"headerlink\" href=\"#great-themes-available\" title=\"Permanent link\">\uf0c1</a></h3>\n<p>There's a stack of good looking themes available for MkDocs. Choose between\nthe built in themes: <a href=\"./user-guide/styling-your-docs/#mkdocs\">mkdocs</a> and <a href=\"./user-guide/styling-your-docs/#readthedocs\">readthedocs</a>, select one of the 3rd\nparty themes in the <a href=\"https://github.com/mkdocs/mkdocs/wiki/MkDocs-Themes\">MkDocs wiki</a>, or <a href=\"./user-guide/custom-themes/\">build your own</a>.</p>\n<h3 id=\"preview-your-site-as-you-work\">Preview your site as you work<a class=\"headerlink\" href=\"#preview-your-site-as-you-work\" title=\"Permanent link\">\uf0c1</a></h3>\n<p>The built-in dev-server allows you to preview your documentation as you're\nwriting it. It will even auto-reload and refresh your browser whenever you save\nyour changes.</p>\n<h3 id=\"easy-to-customize\">Easy to customize<a class=\"headerlink\" href=\"#easy-to-customize\" title=\"Permanent link\">\uf0c1</a></h3>\n<p>Get your project documentation looking just the way you want it by customizing\nthe theme.</p>\n<hr />\n<h2 id=\"installation\">Installation<a class=\"headerlink\" href=\"#installation\" title=\"Permanent link\">\uf0c1</a></h2>\n<h3 id=\"install-with-a-package-manager\">Install with a Package Manager<a class=\"headerlink\" href=\"#install-with-a-package-manager\" title=\"Permanent link\">\uf0c1</a></h3>\n<p>If you have and use a package manager (such as <a href=\"https://help.ubuntu.com/community/AptGet/Howto\">apt-get</a>, <a href=\"http://dnf.readthedocs.io/en/latest/index.html\">dnf</a>, <a href=\"http://brew.sh/\">homebrew</a>,\n<a href=\"http://yum.baseurl.org/\">yum</a>, <a href=\"https://chocolatey.org/\">chocolatey</a>, etc.) to install packages on your system, then you may\nwant to search for a \"MkDocs\" package and, if a recent version is available,\ninstall it with your package manager (check your system's documentation for\ndetails). That's it, you're done! Skip down to <a href=\"#getting-started\">Getting Started</a>.</p>\n<p>If your package manager does not have a recent \"MkDocs\" package, you can still\nuse your package manager to install \"Python\" and \"pip\". Then you can use pip to\n<a href=\"#installing-mkdocs\">install MkDocs</a>.</p>\n<h3 id=\"manual-installation\">Manual Installation<a class=\"headerlink\" href=\"#manual-installation\" title=\"Permanent link\">\uf0c1</a></h3>\n<p>In order to manually install MkDocs you'll need <a href=\"https://www.python.org/\">Python</a> installed on your\nsystem, as well as the Python package manager, <a href=\"http://pip.readthedocs.io/en/stable/installing/\">pip</a>. You can check if you have\nthese already installed from the command line:</p>\n<pre><code class=\"bash\">$ python --version\nPython 2.7.2\n$ pip --version\npip 1.5.2\n</code></pre>\n\n<p>MkDocs supports Python versions 2.7, 3.3, 3.4, 3.5 and pypy.</p>\n<h4 id=\"installing-python\">Installing Python<a class=\"headerlink\" href=\"#installing-python\" title=\"Permanent link\">\uf0c1</a></h4>\n<p>Install <a href=\"https://www.python.org/\">Python</a> by downloading an installer appropriate for your system from\n<a href=\"https://www.python.org/downloads/\">python.org</a> and running it.</p>\n<div class=\"admonition note\">\n<p class=\"admonition-title\">Note</p>\n<p>If you are installing Python on Windows, be sure to check the box to have\nPython added to your PATH if the installer offers such an option (it's\nnormally off by default).</p>\n<p><img alt=\"Add Python to PATH\" src=\"./img/win-py-install.png\" /></p>\n</div>\n<h4 id=\"installing-pip\">Installing pip<a class=\"headerlink\" href=\"#installing-pip\" title=\"Permanent link\">\uf0c1</a></h4>\n<p>If you're using a recent version of Python, the Python package manager, <a href=\"http://pip.readthedocs.io/en/stable/installing/\">pip</a>,\nis most likely installed by default. However, you may need to upgrade pip to the\nlasted version:</p>\n<pre><code class=\"bash\">pip install --upgrade pip\n</code></pre>\n\n<p>If you need to install <a href=\"http://pip.readthedocs.io/en/stable/installing/\">pip</a> for the first time, download <a href=\"https://bootstrap.pypa.io/get-pip.py\">get-pip.py</a>.\nThen run the following command to install it:</p>\n<pre><code class=\"bash\">python get-pip.py\n</code></pre>\n\n<h4 id=\"installing-mkdocs\">Installing MkDocs<a class=\"headerlink\" href=\"#installing-mkdocs\" title=\"Permanent link\">\uf0c1</a></h4>\n<p>Install the <code>mkdocs</code> package using pip:</p>\n<pre><code class=\"bash\">pip install mkdocs\n</code></pre>\n\n<p>You should now have the <code>mkdocs</code> command installed on your system. Run <code>mkdocs\n--version</code> to check that everything worked okay.</p>\n<pre><code class=\"bash\">$ mkdocs --version\nmkdocs, version 0.15.3\n</code></pre>\n\n<div class=\"admonition note\">\n<p class=\"admonition-title\">Note</p>\n<p>If you are using Windows, some of the above commands may not work\nout-of-the-box.</p>\n<p>A quick solution may be to preface every Python command with <code>python -m</code>\nlike this:</p>\n<pre><code>python -m pip install mkdocs\npython -m mkdocs\n</code></pre>\n<p>For a more permanent solution, you may need to edit your <code>PATH</code> environment\nvariable to include the <code>Scripts</code> directory of your Python installation.\nRecent versions of Python include a script to do this for you. Navigate to\nyour Python installation directory (for example <code>C:\\Python34\\</code>), open the\n<code>Tools</code>, then <code>Scripts</code> folder, and run the <code>win_add2path.py</code> file by double\nclicking on it. Alternatively, you can <a href=\"https://svn.python.org/projects/python/trunk/Tools/scripts/win_add2path.py\">download</a> the script and run it\n(<code>python win_add2path.py</code>).</p>\n</div>\n<hr />\n<h2 id=\"getting-started\">Getting Started<a class=\"headerlink\" href=\"#getting-started\" title=\"Permanent link\">\uf0c1</a></h2>\n<p>Getting started is super easy.</p>\n<pre><code class=\"bash\">mkdocs new my-project\ncd my-project\n</code></pre>\n\n<p>Take a moment to review the initial project that has been created for you.</p>\n<p><img alt=\"The initial MkDocs layout\" src=\"./img/initial-layout.png\" /></p>\n<p>There's a single configuration file named <code>mkdocs.yml</code>, and a folder named\n<code>docs</code> that will contain your documentation source files. Right now the <code>docs</code>\nfolder just contains a single documentation page, named <code>index.md</code>.</p>\n<p>MkDocs comes with a built-in dev-server that lets you preview your documentation\nas you work on it. Make sure you're in the same directory as the <code>mkdocs.yml</code>\nconfiguration file, and then start the server by running the <code>mkdocs serve</code>\ncommand:</p>\n<pre><code class=\"bash\">$ mkdocs serve\nINFO    -  Building documentation...\nINFO    -  Cleaning site directory\n[I 160402 15:50:43 server:271] Serving on http://127.0.0.1:8000\n[I 160402 15:50:43 handlers:58] Start watching changes\n[I 160402 15:50:43 handlers:60] Start detecting changes\n</code></pre>\n\n<p>Open up <code>http://127.0.0.1:8000/</code> in your browser, and you'll see the default\nhome page being displayed:</p>\n<p><img alt=\"The MkDocs live server\" src=\"./img/screenshot.png\" /></p>\n<p>The dev-server also supports auto-reloading, and will rebuild your documentation\nwhenever anything in the configuration file, documentation directory, or theme\ndirectory changes.</p>\n<p>Open the <code>docs/index.md</code> document in your text editor of choice, change the\ninitial heading to <code>MkLorum</code>, and save your changes. Your browser will\nauto-reload and you should see your updated documentation immediately.</p>\n<p>Now try editing the configuration file: <code>mkdocs.yml</code>. Change the\n<a href=\"./user-guide/configuration/#site_name\"><code>site_name</code></a> setting to <code>MkLorum</code> and save the file.</p>\n<pre><code class=\"yaml\">site_name: MkLorum\n</code></pre>\n\n<p>Your browser should immediately reload, and you'll see your new site name take\neffect.</p>\n<p><img alt=\"The site_name setting\" src=\"./img/site-name.png\" /></p>\n<h2 id=\"adding-pages\">Adding pages<a class=\"headerlink\" href=\"#adding-pages\" title=\"Permanent link\">\uf0c1</a></h2>\n<p>Now add a second page to your documentation:</p>\n<pre><code class=\"bash\">curl 'https://jaspervdj.be/lorem-markdownum/markdown.txt' &gt; docs/about.md\n</code></pre>\n\n<p>As our documentation site will include some navigation headers, you may want to\nedit the configuration file and add some information about the order, title, and\nnesting of each page in the navigation header by adding a <a href=\"./user-guide/configuration/#pages\"><code>pages</code></a>\nsetting:</p>\n<pre><code class=\"yaml\">site_name: MkLorum\npages:\n    - Home: index.md\n    - About: about.md\n</code></pre>\n\n<p>Save your changes and you'll now see a navigation bar with <code>Home</code> and <code>About</code>\nitems on the left as well as <code>Search</code>, <code>Previous</code>, and <code>Next</code> items on the\nright.</p>\n<p><img alt=\"Screenshot\" src=\"./img/multipage.png\" /></p>\n<p>Try the menu items and navigate back and forth between pages. Then click on\n<code>Search</code>. A search dialog will appear, allowing you to search for any text on\nany page. Notice that the search results include every occurrence of the search\nterm on the site and links directly to the section of the page in which the\nsearch term appears. You get of all that with no effort or configuration on your\npart!</p>\n<p><img alt=\"Screenshot\" src=\"./img/search.png\" /></p>\n<h2 id=\"theming-our-documentation\">Theming our documentation<a class=\"headerlink\" href=\"#theming-our-documentation\" title=\"Permanent link\">\uf0c1</a></h2>\n<p>Now change the configuration file to alter how the documentation is displayed by\nchanging the theme. Edit the <code>mkdocs.yml</code> file and add a <a href=\"./user-guide/configuration/#theme\"><code>theme</code></a> setting:</p>\n<pre><code class=\"yaml\">site_name: MkLorum\npages:\n    - Home: index.md\n    - About: about.md\ntheme: readthedocs\n</code></pre>\n\n<p>Save your changes, and you'll see the ReadTheDocs theme being used.</p>\n<p><img alt=\"Screenshot\" src=\"./img/readthedocs.png\" /></p>\n<h2 id=\"changing-the-favicon-icon\">Changing the Favicon Icon<a class=\"headerlink\" href=\"#changing-the-favicon-icon\" title=\"Permanent link\">\uf0c1</a></h2>\n<p>By default, MkDocs uses the <a href=\"./img/favicon.ico\">MkDocs favicon</a> icon. To use a different icon, create\nan <code>img</code> subdirectory in your <code>docs_dir</code> and copy your custom <code>favicon.ico</code> file\nto that directory. MkDocs will automatically detect and use that file as your\nfavicon icon.</p>\n<h2 id=\"building-the-site\">Building the site<a class=\"headerlink\" href=\"#building-the-site\" title=\"Permanent link\">\uf0c1</a></h2>\n<p>That's looking good. You're ready to deploy the first pass of your <code>MkLorum</code>\ndocumentation. First build the documentation:</p>\n<pre><code class=\"bash\">mkdocs build\n</code></pre>\n\n<p>This will create a new directory, named <code>site</code>. Take a look inside the\ndirectory:</p>\n<pre><code class=\"bash\">$ ls site\nabout  fonts  index.html  license  search.html\ncss    img    js          mkdocs   sitemap.xml\n</code></pre>\n\n<p>Notice that your source documentation has been output as two HTML files named\n<code>index.html</code> and <code>about/index.html</code>. You also have various other media that's\nbeen copied into the <code>site</code> directory as part of the documentation theme. You\neven have a <code>sitemap.xml</code> file and <code>mkdocs/search_index.json</code>.</p>\n<p>If you're using source code control such as <code>git</code> you probably don't want to\ncheck your documentation builds into the repository. Add a line containing\n<code>site/</code> to your <code>.gitignore</code> file.</p>\n<pre><code class=\"bash\">echo &quot;site/&quot; &gt;&gt; .gitignore\n</code></pre>\n\n<p>If you're using another source code control tool you'll want to check its\ndocumentation on how to ignore specific directories.</p>\n<p>After some time, files may be removed from the documentation but they will still\nreside in the <code>site</code> directory. To remove those stale files, just run <code>mkdocs</code>\nwith the <code>--clean</code> switch.</p>\n<pre><code class=\"bash\">mkdocs build --clean\n</code></pre>\n\n<h2 id=\"other-commands-and-options\">Other Commands and Options<a class=\"headerlink\" href=\"#other-commands-and-options\" title=\"Permanent link\">\uf0c1</a></h2>\n<p>There are various other commands and options available. For a complete list of\ncommands, use the <code>--help</code> flag:</p>\n<pre><code class=\"bash\">mkdocs --help\n</code></pre>\n\n<p>To view a list of options available on a given command, use the <code>--help</code> flag\nwith that command. For example, to get a list of all options available for the\n<code>build</code> command run the following:</p>\n<pre><code class=\"bash\">mkdocs build --help\n</code></pre>\n\n<h2 id=\"deploying\">Deploying<a class=\"headerlink\" href=\"#deploying\" title=\"Permanent link\">\uf0c1</a></h2>\n<p>The documentation site that you just built only uses static files so you'll be\nable to host it from pretty much anywhere. <a href=\"https://help.github.com/articles/creating-project-pages-manually/\">GitHub project pages</a> and <a href=\"http://docs.aws.amazon.com/AmazonS3/latest/dev/WebsiteHosting.html\">Amazon\nS3</a> may be good hosting options, depending upon your needs. Upload the contents\nof the entire <code>site</code> directory to wherever you're hosting your website from and\nyou're done. For specific instructions on a number of common hosts, see the\n<a href=\"./user-guide/deploying-your-docs/\">Deploying your Docs</a> page.</p>\n<h2 id=\"getting-help\">Getting help<a class=\"headerlink\" href=\"#getting-help\" title=\"Permanent link\">\uf0c1</a></h2>\n<p>To get help with MkDocs, please use the <a href=\"https://groups.google.com/forum/#!forum/mkdocs\">discussion group</a>, <a href=\"https://github.com/mkdocs/mkdocs/issues\">GitHub issues</a> or\nthe MkDocs IRC channel <code>#mkdocs</code> on freenode.</p>", 
-    "url": "/", 
-    "language": "en", 
-    "title": "Home"
-}
\ No newline at end of file
diff --git a/readthedocs/embed/tests/test_api.py b/readthedocs/embed/tests/test_api.py
index 83073a12852..e0d3bd80d2c 100644
--- a/readthedocs/embed/tests/test_api.py
+++ b/readthedocs/embed/tests/test_api.py
@@ -6,7 +6,7 @@
 
 import pytest
 from django_dynamic_fixture import get
-from pyquery import PyQuery
+from selectolax.parser import HTMLParser
 
 from readthedocs.builds.constants import LATEST
 from readthedocs.embed.views import do_embed
@@ -49,8 +49,15 @@ def _patch_sphinx_json_file(self, storage_mock, json_file, html_file):
         )
 
     def _get_html_content(self, html_file):
-        section_content = [PyQuery(html_file.open().read()).outerHtml()]
-        return section_content
+        content = HTMLParser(html_file.open().read())
+        # We override all links inside the embed,
+        # when doing so, the href attribute gets moved to the end.
+        # Do the same here.
+        for anchor in content.css('a'):
+            href = anchor.attributes.get('href')
+            if href and 'project.readthedocs.io' in href:
+                anchor.attrs['href'] = href
+        return content.body.child.html
 
     @mock.patch('readthedocs.embed.views.build_media_storage')
     def test_embed_unknown_section(self, storage_mock):
@@ -119,7 +126,7 @@ def test_embed_sphinx(self, storage_mock, section):
         )
 
         expected = {
-            'content': section_content,
+            'content': [section_content],
             'headers': [
                 # TODO: return the full id here
                 {'I Need Secrets (or Environment Variables) in my Build': '#'},
@@ -175,7 +182,7 @@ def test_embed_sphinx_bibtex(self, storage_mock, section):
         )
 
         expected = {
-            'content': section_content,
+            'content': [section_content],
             'headers': [
                 {'Getting Started': '#'},
                 {'Overview': '#overview'},
@@ -236,7 +243,7 @@ def test_embed_sphinx_glossary(self, storage_mock, section):
         )
 
         expected = {
-            'content': section_content,
+            'content': [section_content],
             'headers': [
                 {'Glossary': '#'},
             ],
@@ -250,47 +257,3 @@ def test_embed_sphinx_glossary(self, storage_mock, section):
         }
 
         assert response.data == expected
-
-    @mock.patch('readthedocs.embed.views.build_media_storage')
-    def test_embed_mkdocs(self, storage_mock):
-        json_file = data_path / 'mkdocs/latest/index.json'
-        storage_mock.exists.return_value = True
-        storage_mock.open.side_effect = self._mock_open(
-            json_file.open().read()
-        )
-
-        self.version.documentation_type = MKDOCS
-        self.version.save()
-
-        response = do_embed(
-            project=self.project,
-            version=self.version,
-            doc='index',
-            section='Installation',
-            path='index.html',
-        )
-
-        expected = {
-            'content': mock.ANY,  # too long to compare here
-            'headers': [
-                {'Overview': 'overview'},
-                {'Installation': 'installation'},
-                {'Getting Started': 'getting-started'},
-                {'Adding pages': 'adding-pages'},
-                {'Theming our documentation': 'theming-our-documentation'},
-                {'Changing the Favicon Icon': 'changing-the-favicon-icon'},
-                {'Building the site': 'building-the-site'},
-                {'Other Commands and Options': 'other-commands-and-options'},
-                {'Deploying': 'deploying'},
-                {'Getting help': 'getting-help'},
-            ],
-            'url': 'http://project.readthedocs.io/en/latest/index.html',
-            'meta': {
-                'project': 'project',
-                'version': 'latest',
-                'doc': 'index',
-                'section': 'Installation',
-            },
-        }
-
-        assert response.data == expected
diff --git a/readthedocs/embed/utils.py b/readthedocs/embed/utils.py
index 95f8640749f..014fcfdf934 100644
--- a/readthedocs/embed/utils.py
+++ b/readthedocs/embed/utils.py
@@ -3,10 +3,20 @@
 
 def recurse_while_none(element):
     """Recursively find the leaf node with the ``href`` attribute."""
-    if element.text is None and element.getchildren():
-        return recurse_while_none(element.getchildren()[0])
+    children = list(element.iter())
+    if children:
+        return recurse_while_none(children[0])
 
-    href = element.attrib.get('href')
+    href = element.attributes.get('href')
     if not href:
-        href = element.attrib.get('id')
-    return {element.text: href}
+        href = element.attributes.get('id')
+    return {element.text(): href}
+
+
+def next_tag(element):
+    """Return the next non-text sibling of element."""
+    while element:
+        element = element.next
+        if element.tag != '-text':
+            return element
+    return None
diff --git a/readthedocs/embed/views.py b/readthedocs/embed/views.py
index f9ad80481bf..963c953fbfa 100644
--- a/readthedocs/embed/views.py
+++ b/readthedocs/embed/views.py
@@ -10,18 +10,18 @@
 from django.template.defaultfilters import slugify
 from django.utils.functional import cached_property
 from docutils.nodes import make_id
-from pyquery import PyQuery as PQ  # noqa
 from rest_framework import status
 from rest_framework.renderers import BrowsableAPIRenderer, JSONRenderer
 from rest_framework.response import Response
 from rest_framework.views import APIView
+from selectolax.parser import HTMLParser
 
 from readthedocs.api.v2.permissions import IsAuthorizedToViewVersion
 from readthedocs.builds.constants import EXTERNAL
 from readthedocs.core.resolver import resolve
 from readthedocs.core.unresolver import unresolve
 from readthedocs.core.utils.extend import SettingsOverrideObject
-from readthedocs.embed.utils import recurse_while_none
+from readthedocs.embed.utils import next_tag, recurse_while_none
 from readthedocs.projects.models import Project
 from readthedocs.storage import build_media_storage
 
@@ -35,7 +35,7 @@ def escape_selector(selector):
     return ret
 
 
-def clean_links(obj, url):
+def clean_links(node, url):
     """
     Rewrite (internal) links to make them absolute.
 
@@ -44,24 +44,24 @@ def clean_links(obj, url):
     3. prepend URL (without filename) to internal relative links
     """
     if url is None:
-        return obj
+        return node
 
-    for link in obj.find('a'):
+    for link in node.css('a'):
         base_url = urlparse(url)
         # We need to make all internal links, to be absolute
-        href = link.attrib['href']
+        href = link.attributes.get('href')
+        if not href:
+            continue
+
         parsed_href = urlparse(href)
         if parsed_href.scheme or parsed_href.path.startswith('/'):
-            # don't change external links
+            # don't change absolute paths/URLs
             continue
 
         if not parsed_href.path and parsed_href.fragment:
             # href="#section-link"
-            new_href = base_url.geturl() + href
-            link.attrib['href'] = new_href
-            continue
-
-        if not base_url.path.endswith('/'):
+            link.attrs['href'] = base_url.geturl() + href
+        elif not base_url.path.endswith('/'):
             # internal relative link
             # href="../../another.html" and ``base_url`` is not HTMLDir
             # (e.g. /en/latest/deep/internal/section/page.html)
@@ -73,11 +73,7 @@ def clean_links(obj, url):
             path, _ = base_url.path.rsplit('/', 1)
             # append the value of href (../../another.html) to the base URL.
             base_url = base_url._replace(path=path + '/')
-
-        new_href = base_url.geturl() + href
-        link.attrib['href'] = new_href
-
-    return obj
+            link.attrs['href'] = base_url.geturl() + href
 
 
 class EmbedAPIBase(APIView):
@@ -276,7 +272,7 @@ def parse_sphinx(content, section, url):
 
     headers = [
         recurse_while_none(element)
-        for element in PQ(toc)('a')
+        for element in HTMLParser(toc).css('a')
     ]
 
     if not section and headers:
@@ -288,7 +284,7 @@ def parse_sphinx(content, section, url):
     if not section:
         return [], headers, None
 
-    body_obj = PQ(body)
+    body_obj = HTMLParser(body)
     escaped_section = escape_selector(section)
 
     elements_id = [
@@ -297,22 +293,24 @@ def parse_sphinx(content, section, url):
         make_id(escaped_section),
         f'module-{escaped_section}',
     ]
-    query_result = []
+    query_result = None
     for element_id in elements_id:
         if not element_id:
             continue
-        query_result = body_obj(f'#{element_id}')
+        query_result = body_obj.css_first(f'#{element_id}')
         if query_result:
             break
 
     if not query_result:
-        selector = f':header:contains("{escaped_section}")'
-        query_result = body_obj(selector).parent()
+        selector = f'[header~={escaped_section}]'
+        query_result = body_obj.css_first(selector)
+        if query_result:
+            query_result = query_result.parent
 
     # Handle ``dt`` special cases
-    if len(query_result) == 1 and query_result[0].tag == 'dt':
-        parent = query_result.parent()
-        if 'glossary' in parent.attr('class'):
+    if query_result and query_result.tag == 'dt':
+        parent = query_result.parent
+        if 'glossary' in parent.attributes.get('class'):
             # Sphinx HTML structure for term glossary puts the ``id`` in the
             # ``dt`` element with the title of the term. In this case, we
             # need to return the next sibling which contains the definition
@@ -324,8 +322,8 @@ def parse_sphinx(content, section, url):
             # <dd>Text definition for the term</dd>
             # ...
             # </dl>
-            query_result = query_result.next()
-        elif 'citation' in parent.attr('class'):
+            query_result = next_tag(query_result)
+        elif 'citation' in parent.attributes.get('class'):
             # Sphinx HTML structure for sphinxcontrib-bibtex puts the ``id`` in the
             # ``dt`` element with the title of the cite. In this case, we
             # need to return the next sibling which contains the cite itself.
@@ -336,7 +334,7 @@ def parse_sphinx(content, section, url):
             # <dd>Content of the cite</dd>
             # ...
             # </dl>
-            query_result = query_result.next()
+            query_result = next_tag(query_result)
         else:
             # Sphinx HTML structure for definition list puts the ``id``
             # the ``dt`` element, instead of the ``dl``. This makes
@@ -352,58 +350,20 @@ def parse_sphinx(content, section, url):
             # </dl>
             query_result = parent
 
-    def dump(obj):
-        """Handle API-based doc HTML."""
-        if obj[0].tag in ['span', 'h2']:
-            return obj.parent().outerHtml()
-        return obj.outerHtml()
+    # Return the outer html for these elements
+    if query_result and query_result.tag in ['span', 'h2'] and query_result.parent:
+        query_result = query_result.parent
 
-    ret = [
-        dump(clean_links(PQ(obj), url))
-        for obj in query_result
-    ]
-    return ret, headers, section
+    section_html = []
+    if query_result:
+        clean_links(query_result, url)
+        section_html = [query_result.html]
+
+    return section_html, headers, section
 
 
 def parse_mkdocs(content, section, url):  # pylint: disable=unused-argument
     """Get the embed content for the section."""
     ret = []
     headers = []
-
-    if not content or not content.get('content'):
-        return (None, None, section)
-
-    body = content['content']
-    for element in PQ(body)('h2'):
-        headers.append(recurse_while_none(element))
-
-    if not section and headers:
-        # If no section is sent, return the content of the first one
-        section = list(headers[0].keys())[0].lower()
-
-    if section:
-        body_obj = PQ(body)
-        escaped_section = escape_selector(section)
-        section_list = body_obj(
-            ':header:contains("{title}")'.format(title=str(escaped_section)))
-        for num in range(len(section_list)):
-            header2 = section_list.eq(num)
-            # h2_title = h2.text().strip()
-            # section_id = h2.attr('id')
-            h2_content = ""
-            next_p = header2.next()
-            while next_p:
-                if next_p[0].tag == 'h2':
-                    break
-                h2_html = next_p.outerHtml()
-                if h2_html:
-                    h2_content += "\n%s\n" % h2_html
-                next_p = next_p.next()
-            if h2_content:
-                ret.append(h2_content)
-                # ret.append({
-                #     'id': section_id,
-                #     'title': h2_title,
-                #     'content': h2_content,
-                # })
-    return (ret, headers, section)
+    return ret, headers, section
diff --git a/requirements/pip.txt b/requirements/pip.txt
index 539cf25e327..0d1d8fbb2d0 100644
--- a/requirements/pip.txt
+++ b/requirements/pip.txt
@@ -51,9 +51,6 @@ elasticsearch-dsl==7.3.0  # pyup: <8.0
 django-elasticsearch-dsl==7.1.4  # pyup: <8.0
 selectolax==0.2.10
 
-# embed
-pyquery==1.4.3
-
 # NOTE: this dep can be removed in python 3.7 in favor of ``date.fromisoformat``
 python-dateutil==2.8.1