Skip to content

Commit

Permalink
Embed: replace pyquery with selectolax
Browse files Browse the repository at this point in the history
Don't introduce a new dep.
MkDocs doesn't work since it doesn't have fjson files,
I'm deleting the test for mkdocs for now,
I'll add support for MkDocs once we have the parsing done in a more
general way (soon!).
  • Loading branch information
stsewd committed Mar 5, 2021
1 parent 9b8e506 commit 347d371
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 141 deletions.
6 changes: 0 additions & 6 deletions readthedocs/embed/tests/data/mkdocs/latest/index.json

This file was deleted.

63 changes: 13 additions & 50 deletions readthedocs/embed/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pytest
from django_dynamic_fixture import get
from pyquery import PyQuery
from selectolax.parser import HTMLParser

from readthedocs.builds.constants import LATEST
from readthedocs.embed.views import do_embed
Expand Down Expand Up @@ -49,8 +49,15 @@ def _patch_sphinx_json_file(self, storage_mock, json_file, html_file):
)

def _get_html_content(self, html_file):
section_content = [PyQuery(html_file.open().read()).outerHtml()]
return section_content
content = HTMLParser(html_file.open().read())
# We override all links inside the embed,
# when doing so, the href attribute gets moved to the end.
# Do the same here.
for anchor in content.css('a'):
href = anchor.attributes.get('href')
if href and 'project.readthedocs.io' in href:
anchor.attrs['href'] = href
return content.body.child.html

@mock.patch('readthedocs.embed.views.build_media_storage')
def test_embed_unknown_section(self, storage_mock):
Expand Down Expand Up @@ -119,7 +126,7 @@ def test_embed_sphinx(self, storage_mock, section):
)

expected = {
'content': section_content,
'content': [section_content],
'headers': [
# TODO: return the full id here
{'I Need Secrets (or Environment Variables) in my Build': '#'},
Expand Down Expand Up @@ -175,7 +182,7 @@ def test_embed_sphinx_bibtex(self, storage_mock, section):
)

expected = {
'content': section_content,
'content': [section_content],
'headers': [
{'Getting Started': '#'},
{'Overview': '#overview'},
Expand Down Expand Up @@ -236,7 +243,7 @@ def test_embed_sphinx_glossary(self, storage_mock, section):
)

expected = {
'content': section_content,
'content': [section_content],
'headers': [
{'Glossary': '#'},
],
Expand All @@ -250,47 +257,3 @@ def test_embed_sphinx_glossary(self, storage_mock, section):
}

assert response.data == expected

@mock.patch('readthedocs.embed.views.build_media_storage')
def test_embed_mkdocs(self, storage_mock):
json_file = data_path / 'mkdocs/latest/index.json'
storage_mock.exists.return_value = True
storage_mock.open.side_effect = self._mock_open(
json_file.open().read()
)

self.version.documentation_type = MKDOCS
self.version.save()

response = do_embed(
project=self.project,
version=self.version,
doc='index',
section='Installation',
path='index.html',
)

expected = {
'content': mock.ANY, # too long to compare here
'headers': [
{'Overview': 'overview'},
{'Installation': 'installation'},
{'Getting Started': 'getting-started'},
{'Adding pages': 'adding-pages'},
{'Theming our documentation': 'theming-our-documentation'},
{'Changing the Favicon Icon': 'changing-the-favicon-icon'},
{'Building the site': 'building-the-site'},
{'Other Commands and Options': 'other-commands-and-options'},
{'Deploying': 'deploying'},
{'Getting help': 'getting-help'},
],
'url': 'http://project.readthedocs.io/en/latest/index.html',
'meta': {
'project': 'project',
'version': 'latest',
'doc': 'index',
'section': 'Installation',
},
}

assert response.data == expected
20 changes: 15 additions & 5 deletions readthedocs/embed/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,20 @@

def recurse_while_none(element):
"""Recursively find the leaf node with the ``href`` attribute."""
if element.text is None and element.getchildren():
return recurse_while_none(element.getchildren()[0])
children = list(element.iter())
if children:
return recurse_while_none(children[0])

href = element.attrib.get('href')
href = element.attributes.get('href')
if not href:
href = element.attrib.get('id')
return {element.text: href}
href = element.attributes.get('id')
return {element.text(): href}


def next_tag(element):
"""Return the next non-text sibling of element."""
while element:
element = element.next
if element.tag != '-text':
return element
return None
Loading

0 comments on commit 347d371

Please sign in to comment.