From 61554ac8b782b7885efd5f638b71281cba3631e3 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 5 Jul 2021 16:15:14 +0200 Subject: [PATCH 01/48] Move `clean_links` to embed/utils.py This function will be shared between APIv2 and APIv3. --- readthedocs/embed/utils.py | 45 ++++++++++++++++++++++++++++++++++++ readthedocs/embed/views.py | 47 +------------------------------------- 2 files changed, 46 insertions(+), 46 deletions(-) diff --git a/readthedocs/embed/utils.py b/readthedocs/embed/utils.py index 95f8640749f..77e6fc38b5c 100644 --- a/readthedocs/embed/utils.py +++ b/readthedocs/embed/utils.py @@ -10,3 +10,48 @@ def recurse_while_none(element): if not href: href = element.attrib.get('id') return {element.text: href} + + +def clean_links(obj, url): + """ + Rewrite (internal) links to make them absolute. + + 1. external links are not changed + 2. prepend URL to links that are just fragments (e.g. #section) + 3. prepend URL (without filename) to internal relative links + """ + if url is None: + return obj + + for link in obj.find('a'): + base_url = urlparse(url) + # We need to make all internal links, to be absolute + href = link.attrib['href'] + parsed_href = urlparse(href) + if parsed_href.scheme or parsed_href.path.startswith('/'): + # don't change external links + continue + + if not parsed_href.path and parsed_href.fragment: + # href="#section-link" + new_href = base_url.geturl() + href + link.attrib['href'] = new_href + continue + + if not base_url.path.endswith('/'): + # internal relative link + # href="../../another.html" and ``base_url`` is not HTMLDir + # (e.g. /en/latest/deep/internal/section/page.html) + # we want to remove the trailing filename (page.html) and use the rest as base URL + # The resulting absolute link should be + # https://slug.readthedocs.io/en/latest/deep/internal/section/../../another.html + + # remove the filename (page.html) from the original document URL (base_url) and, + path, _ = base_url.path.rsplit('/', 1) + # append the value of href (../../another.html) to the base URL. + base_url = base_url._replace(path=path + '/') + + new_href = base_url.geturl() + href + link.attrib['href'] = new_href + + return obj diff --git a/readthedocs/embed/views.py b/readthedocs/embed/views.py index 7e85423adf5..5886bc2698d 100644 --- a/readthedocs/embed/views.py +++ b/readthedocs/embed/views.py @@ -22,7 +22,7 @@ from readthedocs.core.resolver import resolve from readthedocs.core.unresolver import unresolve from readthedocs.core.utils.extend import SettingsOverrideObject -from readthedocs.embed.utils import recurse_while_none +from readthedocs.embed.utils import recurse_while_none, clean_links from readthedocs.projects.models import Project from readthedocs.storage import build_media_storage @@ -36,51 +36,6 @@ def escape_selector(selector): return ret -def clean_links(obj, url): - """ - Rewrite (internal) links to make them absolute. - - 1. external links are not changed - 2. prepend URL to links that are just fragments (e.g. #section) - 3. prepend URL (without filename) to internal relative links - """ - if url is None: - return obj - - for link in obj.find('a'): - base_url = urlparse(url) - # We need to make all internal links, to be absolute - href = link.attrib['href'] - parsed_href = urlparse(href) - if parsed_href.scheme or parsed_href.path.startswith('/'): - # don't change external links - continue - - if not parsed_href.path and parsed_href.fragment: - # href="#section-link" - new_href = base_url.geturl() + href - link.attrib['href'] = new_href - continue - - if not base_url.path.endswith('/'): - # internal relative link - # href="../../another.html" and ``base_url`` is not HTMLDir - # (e.g. /en/latest/deep/internal/section/page.html) - # we want to remove the trailing filename (page.html) and use the rest as base URL - # The resulting absolute link should be - # https://slug.readthedocs.io/en/latest/deep/internal/section/../../another.html - - # remove the filename (page.html) from the original document URL (base_url) and, - path, _ = base_url.path.rsplit('/', 1) - # append the value of href (../../another.html) to the base URL. - base_url = base_url._replace(path=path + '/') - - new_href = base_url.geturl() + href - link.attrib['href'] = new_href - - return obj - - class EmbedAPIBase(CachedResponseMixin, APIView): # pylint: disable=line-too-long From 311ada4f5b816b60a28bc1d0856c728540058e69 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 6 Jul 2021 13:18:12 +0200 Subject: [PATCH 02/48] `clean_links` response HTML in raw instead of a PyQuery object Making `clean_links` to response HTML in raw allows us to reuse this code in APIv2 and APIv3. --- readthedocs/embed/utils.py | 12 ++++++++++-- readthedocs/embed/views.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/readthedocs/embed/utils.py b/readthedocs/embed/utils.py index 77e6fc38b5c..613956249ce 100644 --- a/readthedocs/embed/utils.py +++ b/readthedocs/embed/utils.py @@ -1,5 +1,6 @@ """Embed utils.""" - +from urllib.parse import urlparse +from pyquery import PyQuery as PQ # noqa def recurse_while_none(element): """Recursively find the leaf node with the ``href`` attribute.""" @@ -12,7 +13,7 @@ def recurse_while_none(element): return {element.text: href} -def clean_links(obj, url): +def clean_links(obj, url, html_raw_response=False): """ Rewrite (internal) links to make them absolute. @@ -20,6 +21,10 @@ def clean_links(obj, url): 2. prepend URL to links that are just fragments (e.g. #section) 3. prepend URL (without filename) to internal relative links """ + + # TODO: do not depend on PyQuery + obj = PQ(obj) + if url is None: return obj @@ -54,4 +59,7 @@ def clean_links(obj, url): new_href = base_url.geturl() + href link.attrib['href'] = new_href + if html_raw_response: + return obj.outerHtml() + return obj diff --git a/readthedocs/embed/views.py b/readthedocs/embed/views.py index 5886bc2698d..7dc2ef24701 100644 --- a/readthedocs/embed/views.py +++ b/readthedocs/embed/views.py @@ -326,7 +326,7 @@ def dump(obj): return obj.outerHtml() ret = [ - dump(clean_links(PQ(obj), url)) + dump(clean_links(obj, url)) for obj in query_result ] return ret, headers, section From 76d0bf5dc35afeec97a49707eeaa2468ebf8e017 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 6 Jul 2021 13:28:07 +0200 Subject: [PATCH 03/48] Implement the minimal version of the contract - support external sites for allowed domains (via setting) - given a URL and a fragment, it returns the exact HTML portion if found - cache external request for some time - rate-limit external requests to 50/m per domain - uses `requests.get` for external sites (only public documents) - uses Django storage for local hosted sites (allows only PUBLIC versions) - reuses `clean_links` util function (depends on PyQuery) from EmbedAPI v2 --- readthedocs/embed/v3/__init__.py | 0 readthedocs/embed/v3/urls.py | 8 + readthedocs/embed/v3/views.py | 209 +++++++++++++++++++++++++ readthedocs/settings/base.py | 6 + readthedocs/settings/docker_compose.py | 6 + readthedocs/urls.py | 1 + 6 files changed, 230 insertions(+) create mode 100644 readthedocs/embed/v3/__init__.py create mode 100644 readthedocs/embed/v3/urls.py create mode 100644 readthedocs/embed/v3/views.py diff --git a/readthedocs/embed/v3/__init__.py b/readthedocs/embed/v3/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/embed/v3/urls.py b/readthedocs/embed/v3/urls.py new file mode 100644 index 00000000000..5d7c51852d5 --- /dev/null +++ b/readthedocs/embed/v3/urls.py @@ -0,0 +1,8 @@ +from django.conf.urls import url + +from .views import EmbedAPI + + +urlpatterns = [ + url(r'', EmbedAPI.as_view(), name='embed_api_v3'), +] diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py new file mode 100644 index 00000000000..6c7237e9900 --- /dev/null +++ b/readthedocs/embed/v3/views.py @@ -0,0 +1,209 @@ +"""Views for the EmbedAPI v3 app.""" + +import logging +import re +from urllib.parse import urlparse +import requests + +from selectolax.parser import HTMLParser +from pyquery import PyQuery as PQ # noqa + +from django.conf import settings +from django.core.cache import cache +from django.shortcuts import get_object_or_404 +from django.utils.functional import cached_property +from rest_framework import status +from rest_framework.permissions import AllowAny +from rest_framework.renderers import BrowsableAPIRenderer, JSONRenderer +from rest_framework.response import Response +from rest_framework.views import APIView + +from readthedocs.api.v2.mixins import CachedResponseMixin +from readthedocs.core.unresolver import unresolve +from readthedocs.core.utils.extend import SettingsOverrideObject +from readthedocs.embed.utils import clean_links +from readthedocs.projects.constants import PUBLIC +from readthedocs.storage import build_media_storage + +log = logging.getLogger(__name__) + + + +class EmbedAPIBase(CachedResponseMixin, APIView): + + # pylint: disable=line-too-long + + """ + Embed a section of content from any Read the Docs page. + + ### Arguments + + * url (with fragment) (required) + * doctool + * doctoolversion + + ### Example + + GET https://readthedocs.org/api/v3/embed/?url=https://docs.readthedocs.io/en/latest/features.html%23#full-text-search + + """ # noqa + + permission_classes = [AllowAny] + renderer_classes = [JSONRenderer, BrowsableAPIRenderer] + + @cached_property + def unresolved_url(self): + url = self.request.GET.get('url') + if not url: + return None + return unresolve(url) + + def _download_page_content(self, url): + cached_response = cache.get(url) + if cached_response: + log.debug('Cached response. url=%s', url) + return cached_response + + response = requests.get(url) + if response.ok: + cache.set(url, response.text, timeout=60 * 5) + return response.text + + def _get_page_content_from_storage(self): + project = self.unresolved_url.project + version = get_object_or_404( + project.versions, + slug=self.unresolved_url.version_slug, + # Only allow PUBLIC versions when getting the content from our storage + privacy_level=PUBLIC, + ) + storage_path = project.get_storage_path( + 'html', + version_slug=version.slug, + include_file=False, + version_type=version.type, + ) + file_path = build_media_storage.join( + storage_path, + self.unresolved_url.filename, + ) + try: + with build_media_storage.open(file_path) as fd: + return fd.read() + except Exception: # noqa + log.warning('Unable to read file. file_path=%s', file_path) + + return None + + def _get_content_by_fragment(self, url, fragment, external): + if external: + url_without_fragment = urlparse(url)._replace(fragment='').geturl() + page_content = self._download_page_content(url_without_fragment) + else: + page_content = self._get_page_content_from_storage() + node = HTMLParser(page_content).css_first(f'#{fragment}') + if node: + return node.html + + def get(self, request): + url = request.GET.get('url') + doctool = request.GET.get('doctool') + doctoolversion = request.GET.get('doctoolversion') + if not url: + return Response( + { + 'error': ( + 'Invalid arguments. ' + 'Please provide "url".' + ) + }, + status=status.HTTP_400_BAD_REQUEST + ) + + if not all([doctool, doctoolversion]) and any([doctool, doctoolversion]): + return Response( + { + 'error': ( + 'Invalid arguments. ' + 'Please provide "doctool" and "doctoolversion" or none of them.' + ) + }, + status=status.HTTP_400_BAD_REQUEST + ) + + # Note that ``readthedocs.core.unresolver.unresolve`` returns ``None`` + # when it can find the project in our database + external = self.unresolved_url is None + + parsed_url = urlparse(url) + external_domain = parsed_url.netloc + if external and external_domain: + allowed_domain = False + for domain in settings.RTD_EMBED_API_EXTERNAL_DOMAINS: + if re.match(domain, external_domain): + allowed_domain = True + break + + if not allowed_domain: + return Response( + { + 'error': ( + 'External domain not allowed. ' + f'domain={external_domain}' + ) + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + # Check rate-limit for this particular domain + cache_key = f'embed-api-{external_domain}' + cache.get_or_set(cache_key, 0, timeout=60) + cache.incr(cache_key) + if cache.get(cache_key) > settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT: + log.info('Too many requests for this domain. domain=%s', external_domain) + return Response( + { + 'error': ( + 'Too many requests for this domain. ' + f'domain={external_domain}' + ) + }, + status=status.HTTP_429_TOO_MANY_REQUESTS, + ) + + fragment = parsed_url.fragment + content_requested = self._get_content_by_fragment(url, fragment, external) + if not content_requested: + return Response( + { + 'error': ( + "Can't find content for section: " + f"url={url} fragment={fragment}" + ) + }, + status=status.HTTP_404_NOT_FOUND + ) + + response = { + 'url': url, + 'fragment': fragment, + 'content': clean_links( + content_requested, + url, + html_raw_response=True, + ), + 'external': external, + } + + if not external: + response.update({ + 'project': self.unresolved_url.project.slug, + 'version': self.unresolved_url.version_slug, + 'language': self.unresolved_url.language_slug, + 'path': self.unresolved_url.filename, + }) + return Response(response) + + +class EmbedAPI(SettingsOverrideObject): + _default_class = EmbedAPIBase diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 887d81ef670..60fa4e7c10c 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -763,3 +763,9 @@ def DOCKER_LIMITS(self): }, }, } + + RTD_EMBED_API_EXTERNAL_DOMAINS = [ + r'docs\.python\.org', + r'docs\.scipy\.org', + ] + RTD_EMBED_API_DOMAIN_RATE_LIMIT = 50 diff --git a/readthedocs/settings/docker_compose.py b/readthedocs/settings/docker_compose.py index 4f598a91b7f..496c5bf7439 100644 --- a/readthedocs/settings/docker_compose.py +++ b/readthedocs/settings/docker_compose.py @@ -63,6 +63,12 @@ def RTD_EXT_THEME_DEV_SERVER(self): RTD_CLEAN_AFTER_BUILD = True + @property + def RTD_EMBED_API_EXTERNAL_DOMAINS(self): + domains = super().RTD_EMBED_API_EXTERNAL_DOMAINS + domains.append(r'.*\.readthedocs\.io') + return domains + @property def LOGGING(self): logging = super().LOGGING diff --git a/readthedocs/urls.py b/readthedocs/urls.py index 37f791e79cb..51ba6bb8b02 100644 --- a/readthedocs/urls.py +++ b/readthedocs/urls.py @@ -66,6 +66,7 @@ include('rest_framework.urls', namespace='rest_framework') ), url(r'^api/v3/', include('readthedocs.api.v3.urls')), + url(r'^api/v3/embed/', include('readthedocs.embed.v3.urls')), ] i18n_urls = [ From e841b989d2d7b44a1af19b91354166c6a9b070d5 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 6 Jul 2021 13:56:38 +0200 Subject: [PATCH 04/48] Add docs.sympy.org to the allowed domains This project has already tried our extension but since they are not hosted on Read the Docs it didn't work for them. See https://github.com/sympy/sympy/issues/17456 --- readthedocs/settings/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 60fa4e7c10c..d6aa99b55f4 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -767,5 +767,6 @@ def DOCKER_LIMITS(self): RTD_EMBED_API_EXTERNAL_DOMAINS = [ r'docs\.python\.org', r'docs\.scipy\.org', + r'docs\.sympy\.org', ] RTD_EMBED_API_DOMAIN_RATE_LIMIT = 50 From 78f6656af99dc8b8fd174ac177461b59bc7aeeab Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 6 Jul 2021 17:05:48 +0200 Subject: [PATCH 05/48] Implement doctool= usage for Sphinx `dt` known cases --- readthedocs/embed/v3/views.py | 97 +++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 4 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 6c7237e9900..1f3646cea8d 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -95,14 +95,97 @@ def _get_page_content_from_storage(self): return None - def _get_content_by_fragment(self, url, fragment, external): + def _get_content_by_fragment(self, url, fragment, external, doctool, doctoolversion): if external: url_without_fragment = urlparse(url)._replace(fragment='').geturl() page_content = self._download_page_content(url_without_fragment) else: page_content = self._get_page_content_from_storage() - node = HTMLParser(page_content).css_first(f'#{fragment}') - if node: + + return self._parse_based_on_doctool(page_content, fragment, doctool, doctoolversion) + + def _find_main_node(self, html): + main_node = html.css_first('[role=main]') + if main_node: + return main_node + + main_node = html.css_first('main') + if main_node: + return main_node + + first_header = html.body.css_first('h1') + if first_header: + return first_header.parent + + def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversion): + node = None + if fragment: + selector = f'#{fragment}' + node = HTMLParser(page_content).css_first(selector) + else: + html = HTMLParser(page_content) + node = self._find_main_node(html) + + if not node: + return + + if not doctool: + return node.html + + if doctool == 'sphinx': + # Handle ``dt`` special cases + if node.tag == 'dt': + if 'glossary' in node.parent.attributes.get('class'): + # Sphinx HTML structure for term glossary puts the ``id`` in the + # ``dt`` element with the title of the term. In this case, we + # return the parent node which contains the definition list + # and remove all ``dt/dd`` that are not the requested one + + # Structure: + #
+ #
definition
+ #
Text definition for the term
+ # ... + #
+ parent = node.parent + for n in parent.traverse(): + if n not in (node, node.next): + n.remove() + node = node.parent + + elif 'citation' in node.parent.attributes.get('class'): + # Sphinx HTML structure for sphinxcontrib-bibtex puts the ``id`` in the + # ``dt`` element with the title of the cite. In this case, we + # return the parent node which contains the definition list + # and remove all ``dt/dd`` that are not the requested one + + # Structure: + #
+ #
Title of the cite
+ #
Content of the cite
+ # ... + #
+ parent = node.parent + for n in parent.traverse(): + if n not in (node, node.next): + n.remove() + node = node.parent + + else: + # Sphinx HTML structure for definition list puts the ``id`` + # the ``dt`` element, instead of the ``dl``. This makes + # the backend to return just the title of the definition. If we + # detect this case, we return the parent with the whole ``dl`` tag + + # Structure: + #
+ #
+ # config + #
+ #

Text with a description

+ #
+ node = node.parent + return node.html def get(self, request): @@ -172,7 +255,13 @@ def get(self, request): ) fragment = parsed_url.fragment - content_requested = self._get_content_by_fragment(url, fragment, external) + content_requested = self._get_content_by_fragment( + url, + fragment, + external, + doctool, + doctoolversion, + ) if not content_requested: return Response( { From fe3a50757abb71d8aff2e1dd808c1b0aee11a16b Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 6 Jul 2021 17:13:56 +0200 Subject: [PATCH 06/48] Use timeout= when requesting external pages --- readthedocs/embed/v3/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 1f3646cea8d..2370b86f531 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -64,7 +64,7 @@ def _download_page_content(self, url): log.debug('Cached response. url=%s', url) return cached_response - response = requests.get(url) + response = requests.get(url, timeout=1) if response.ok: cache.set(url, response.text, timeout=60 * 5) return response.text From 28e4e83f51230286118172b587763956f8788649 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 6 Jul 2021 17:28:27 +0200 Subject: [PATCH 07/48] Handle requests TooManyRedirects and other errors --- readthedocs/embed/v3/views.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 2370b86f531..62b46cc5afb 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -64,7 +64,15 @@ def _download_page_content(self, url): log.debug('Cached response. url=%s', url) return cached_response - response = requests.get(url, timeout=1) + try: + response = requests.get(url, timeout=1) + except requests.exceptions.TooManyRedirects: + log.warning('Too many redirects. url=%s', url) + return + except Exception: # noqa + log.warning('There was an error reading the URL requested. url=%s', url) + return + if response.ok: cache.set(url, response.text, timeout=60 * 5) return response.text From 71b9e2d0998a0e223deb0681c9b955c7ddcac63a Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 7 Jul 2021 15:10:29 +0200 Subject: [PATCH 08/48] Use cache-keys and djangos settings for timeouts --- readthedocs/embed/v3/views.py | 11 ++++++++--- readthedocs/settings/base.py | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 62b46cc5afb..bd2c76aa8b7 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -59,7 +59,8 @@ def unresolved_url(self): return unresolve(url) def _download_page_content(self, url): - cached_response = cache.get(url) + cache_key = f'embed-api-{url}' + cached_response = cache.get(cache_key) if cached_response: log.debug('Cached response. url=%s', url) return cached_response @@ -74,7 +75,11 @@ def _download_page_content(self, url): return if response.ok: - cache.set(url, response.text, timeout=60 * 5) + cache.set( + cache_key, + response.text, + timeout=settings.RTD_EMBED_API_PAGE_CACHE_TIMEOUT, + ) return response.text def _get_page_content_from_storage(self): @@ -248,7 +253,7 @@ def get(self, request): # Check rate-limit for this particular domain cache_key = f'embed-api-{external_domain}' - cache.get_or_set(cache_key, 0, timeout=60) + cache.get_or_set(cache_key, 0, timeout=settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT_TIMEOUT) cache.incr(cache_key) if cache.get(cache_key) > settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT: log.info('Too many requests for this domain. domain=%s', external_domain) diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index d6aa99b55f4..bcb3f457669 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -769,4 +769,6 @@ def DOCKER_LIMITS(self): r'docs\.scipy\.org', r'docs\.sympy\.org', ] + RTD_EMBED_API_PAGE_CACHE_TIMEOUT = 5 * 10 RTD_EMBED_API_DOMAIN_RATE_LIMIT = 50 + RTD_EMBED_API_DOMAIN_RATE_LIMIT_TIMEOUT = 60 From 1aa70e35a8775d26750b8de994e9abea9dcb3cb5 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 7 Jul 2021 15:11:05 +0200 Subject: [PATCH 09/48] More logs and comments --- readthedocs/embed/v3/views.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index bd2c76aa8b7..5db8ad238bb 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -87,7 +87,8 @@ def _get_page_content_from_storage(self): version = get_object_or_404( project.versions, slug=self.unresolved_url.version_slug, - # Only allow PUBLIC versions when getting the content from our storage + # Only allow PUBLIC versions when getting the content from our + # storage for privacy/security reasons privacy_level=PUBLIC, ) storage_path = project.get_storage_path( @@ -120,14 +121,17 @@ def _get_content_by_fragment(self, url, fragment, external, doctool, doctoolvers def _find_main_node(self, html): main_node = html.css_first('[role=main]') if main_node: + log.info('Main node found. selector=[role=main]') return main_node main_node = html.css_first('main') if main_node: + log.info('Main node found. selector=main') return main_node first_header = html.body.css_first('h1') if first_header: + log.info('Main node found. selector=h1') return first_header.parent def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversion): @@ -227,8 +231,8 @@ def get(self, request): status=status.HTTP_400_BAD_REQUEST ) - # Note that ``readthedocs.core.unresolver.unresolve`` returns ``None`` - # when it can find the project in our database + # NOTE: ``readthedocs.core.unresolver.unresolve`` returns ``None`` when + # it can find the project in our database external = self.unresolved_url is None parsed_url = urlparse(url) @@ -241,6 +245,7 @@ def get(self, request): break if not allowed_domain: + log.info('Domain not allowed. domain=%s url=%s', external_domain, url) return Response( { 'error': ( @@ -256,7 +261,7 @@ def get(self, request): cache.get_or_set(cache_key, 0, timeout=settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT_TIMEOUT) cache.incr(cache_key) if cache.get(cache_key) > settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT: - log.info('Too many requests for this domain. domain=%s', external_domain) + log.warning('Too many requests for this domain. domain=%s', external_domain) return Response( { 'error': ( @@ -267,7 +272,11 @@ def get(self, request): status=status.HTTP_429_TOO_MANY_REQUESTS, ) + # NOTE: we could validate the fragment if we want. It must contain at + # least one character, cannot start with a number, and must not contain + # whitespaces (spaces, tabs, etc.). fragment = parsed_url.fragment + content_requested = self._get_content_by_fragment( url, fragment, @@ -276,6 +285,7 @@ def get(self, request): doctoolversion, ) if not content_requested: + log.warning('Identifier not found. url=%s', url) return Response( { 'error': ( From 5bab78052abb4e18f7a4f35a1271cfb70e4ff7fc Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 7 Jul 2021 15:11:14 +0200 Subject: [PATCH 10/48] Remove one unneeded conditional --- readthedocs/embed/v3/views.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 5db8ad238bb..581f6b407a7 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -146,9 +146,6 @@ def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversio if not node: return - if not doctool: - return node.html - if doctool == 'sphinx': # Handle ``dt`` special cases if node.tag == 'dt': @@ -203,7 +200,7 @@ def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversio # node = node.parent - return node.html + return node.html def get(self, request): url = request.GET.get('url') From 59fe0f13c18104e49c52b7634bd37326f67e3381 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 7 Jul 2021 17:10:28 +0200 Subject: [PATCH 11/48] Return fragment=null if there is no fragment --- readthedocs/embed/v3/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 581f6b407a7..c5dee844f05 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -295,7 +295,7 @@ def get(self, request): response = { 'url': url, - 'fragment': fragment, + 'fragment': fragment if fragment else None, 'content': clean_links( content_requested, url, From 517d3e82dd8277af8e22f1cb58c5a50d18b12004 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 12 Jul 2021 16:10:26 +0200 Subject: [PATCH 12/48] Use setting for request.get `timeout=` argument --- readthedocs/embed/v3/views.py | 2 +- readthedocs/settings/base.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index c5dee844f05..4da1791f53d 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -66,7 +66,7 @@ def _download_page_content(self, url): return cached_response try: - response = requests.get(url, timeout=1) + response = requests.get(url, timeout=settings.RTD_EMBED_API_DEFAULT_REQUEST_TIMEOUT) except requests.exceptions.TooManyRedirects: log.warning('Too many redirects. url=%s', url) return diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index bcb3f457669..a7fa2a35a13 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -770,5 +770,6 @@ def DOCKER_LIMITS(self): r'docs\.sympy\.org', ] RTD_EMBED_API_PAGE_CACHE_TIMEOUT = 5 * 10 + RTD_EMBED_API_DEFAULT_REQUEST_TIMEOUT = 1 RTD_EMBED_API_DOMAIN_RATE_LIMIT = 50 RTD_EMBED_API_DOMAIN_RATE_LIMIT_TIMEOUT = 60 From 562c2602dbfa685009e8efb9e70b16d61ef67e7a Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 12 Jul 2021 16:13:17 +0200 Subject: [PATCH 13/48] Log exception to track wrong URLs in Sentry --- readthedocs/embed/v3/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 4da1791f53d..76356cdb7ba 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -68,10 +68,10 @@ def _download_page_content(self, url): try: response = requests.get(url, timeout=settings.RTD_EMBED_API_DEFAULT_REQUEST_TIMEOUT) except requests.exceptions.TooManyRedirects: - log.warning('Too many redirects. url=%s', url) + log.exception('Too many redirects. url=%s', url) return except Exception: # noqa - log.warning('There was an error reading the URL requested. url=%s', url) + log.exception('There was an error reading the URL requested. url=%s', url) return if response.ok: From 73e2ee377e8fc6970567457a701828b7984aaadd Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 12 Jul 2021 16:17:04 +0200 Subject: [PATCH 14/48] Sanitize the URL inside `_download_page_content` Remove `query` and `fragment` from it before requesting it. --- readthedocs/embed/v3/views.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 76356cdb7ba..2a130bc7cd5 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -59,6 +59,9 @@ def unresolved_url(self): return unresolve(url) def _download_page_content(self, url): + # Sanitize the URL before requesting it + url = urlparse(url)._replace(fragment='', query='').geturl() + cache_key = f'embed-api-{url}' cached_response = cache.get(cache_key) if cached_response: @@ -111,8 +114,7 @@ def _get_page_content_from_storage(self): def _get_content_by_fragment(self, url, fragment, external, doctool, doctoolversion): if external: - url_without_fragment = urlparse(url)._replace(fragment='').geturl() - page_content = self._download_page_content(url_without_fragment) + page_content = self._download_page_content(url) else: page_content = self._get_page_content_from_storage() From 854a44b7db636cb0bf5a50e7b25f662b46f928d9 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 12 Jul 2021 16:21:44 +0200 Subject: [PATCH 15/48] Handle malformed URLs (not netloc or scheme) --- readthedocs/embed/v3/views.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 2a130bc7cd5..62842a0fe40 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -236,7 +236,18 @@ def get(self, request): parsed_url = urlparse(url) external_domain = parsed_url.netloc - if external and external_domain: + if not external_domain or not parsed_url.scheme: + return Response( + { + 'error': ( + 'The URL requested is malformed. ' + f'url={url}' + ) + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + if external: allowed_domain = False for domain in settings.RTD_EMBED_API_EXTERNAL_DOMAINS: if re.match(domain, external_domain): From 49e02d5148851435e367659ea486fc94ac936cdb Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 12 Jul 2021 16:27:14 +0200 Subject: [PATCH 16/48] Use for/else syntax sugar instead of a temporary variable --- readthedocs/embed/v3/views.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 62842a0fe40..031c84ec110 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -248,13 +248,10 @@ def get(self, request): ) if external: - allowed_domain = False for domain in settings.RTD_EMBED_API_EXTERNAL_DOMAINS: if re.match(domain, external_domain): - allowed_domain = True break - - if not allowed_domain: + else: log.info('Domain not allowed. domain=%s url=%s', external_domain, url) return Response( { From be32bd355a3b91242810c0657a44e9528c0536d0 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 12 Jul 2021 16:29:39 +0200 Subject: [PATCH 17/48] Call `clean_links` before creating the response --- readthedocs/embed/v3/views.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 031c84ec110..c7239aa2ba7 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -303,14 +303,17 @@ def get(self, request): status=status.HTTP_404_NOT_FOUND ) + # Make links from the content to be absolute + content = clean_links( + content_requested, + url, + html_raw_response=True, + ) + response = { 'url': url, 'fragment': fragment if fragment else None, - 'content': clean_links( - content_requested, - url, - html_raw_response=True, - ), + 'content': content, 'external': external, } From cc2227c531368f2a90b7f57c20d527a1f41a1fb1 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 12 Jul 2021 16:32:12 +0200 Subject: [PATCH 18/48] Do not depend on impicit state: pass the required arguments --- readthedocs/embed/v3/views.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index c7239aa2ba7..cccdd6669d2 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -85,11 +85,10 @@ def _download_page_content(self, url): ) return response.text - def _get_page_content_from_storage(self): - project = self.unresolved_url.project + def _get_page_content_from_storage(self, project, version_slug, filename): version = get_object_or_404( project.versions, - slug=self.unresolved_url.version_slug, + slug=version_slug, # Only allow PUBLIC versions when getting the content from our # storage for privacy/security reasons privacy_level=PUBLIC, @@ -102,7 +101,7 @@ def _get_page_content_from_storage(self): ) file_path = build_media_storage.join( storage_path, - self.unresolved_url.filename, + filename, ) try: with build_media_storage.open(file_path) as fd: @@ -116,7 +115,10 @@ def _get_content_by_fragment(self, url, fragment, external, doctool, doctoolvers if external: page_content = self._download_page_content(url) else: - page_content = self._get_page_content_from_storage() + project = self.unresolved_url.project + version_slug = self.unresolved_url.version_slug + filename = self.unresolved_url.filename + page_content = self._get_page_content_from_storage(project, version_slug, filename) return self._parse_based_on_doctool(page_content, fragment, doctool, doctoolversion) From 1a72c076fa91735476fa2a9ebd40a0d30e9d5317 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 12 Jul 2021 16:44:08 +0200 Subject: [PATCH 19/48] Don't return metadata (project, version, language, path) Returning this metadata makes the internal/external document requests being different. Let's avoid this for now since we are not using this metadata yet. We can add it when we needed in the future. --- readthedocs/embed/v3/views.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index cccdd6669d2..190c6d5202e 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -318,14 +318,6 @@ def get(self, request): 'content': content, 'external': external, } - - if not external: - response.update({ - 'project': self.unresolved_url.project.slug, - 'version': self.unresolved_url.version_slug, - 'language': self.unresolved_url.language_slug, - 'path': self.unresolved_url.filename, - }) return Response(response) From 7b6b4939b175e0687ea8216bc415358c59135071 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 15 Jul 2021 10:29:16 +0200 Subject: [PATCH 20/48] Update readthedocs/embed/v3/views.py Co-authored-by: Eric Holscher <25510+ericholscher@users.noreply.github.com> --- readthedocs/embed/v3/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 190c6d5202e..5ace78a962f 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -233,7 +233,7 @@ def get(self, request): ) # NOTE: ``readthedocs.core.unresolver.unresolve`` returns ``None`` when - # it can find the project in our database + # it can't find the project in our database external = self.unresolved_url is None parsed_url = urlparse(url) From 418d9ac9215fdf29c82b096bbcecec5de95cfdbf Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 19 Jul 2021 15:38:53 +0200 Subject: [PATCH 21/48] Improve the response http status codes - too many redirects - error reading the external URL requested --- readthedocs/embed/v3/views.py | 52 ++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 5ace78a962f..3a1b56ed496 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -68,15 +68,7 @@ def _download_page_content(self, url): log.debug('Cached response. url=%s', url) return cached_response - try: - response = requests.get(url, timeout=settings.RTD_EMBED_API_DEFAULT_REQUEST_TIMEOUT) - except requests.exceptions.TooManyRedirects: - log.exception('Too many redirects. url=%s', url) - return - except Exception: # noqa - log.exception('There was an error reading the URL requested. url=%s', url) - return - + response = requests.get(url, timeout=settings.RTD_EMBED_API_DEFAULT_REQUEST_TIMEOUT) if response.ok: cache.set( cache_key, @@ -286,13 +278,41 @@ def get(self, request): # whitespaces (spaces, tabs, etc.). fragment = parsed_url.fragment - content_requested = self._get_content_by_fragment( - url, - fragment, - external, - doctool, - doctoolversion, - ) + try: + content_requested = self._get_content_by_fragment( + url, + fragment, + external, + doctool, + doctoolversion, + ) + except requests.exceptions.TooManyRedirects: + log.exception('Too many redirects. url=%s', url) + return Response( + { + 'error': ( + 'The URL requested generates too many redirects. ' + f'url={url}' + ) + }, + # TODO: review these status codes to find out which on is better here + # 400 Bad Request + # 502 Bad Gateway + # 503 Service Unavailable + status=status.HTTP_400_BAD_REQUEST, + ) + except Exception: # noqa + log.exception('There was an error reading the URL requested. url=%s', url) + return Response( + { + 'error': ( + 'There was an error reading the URL requested. ' + f'url={url}' + ) + }, + status=status.HTTP_400_BAD_REQUEST, + ) + if not content_requested: log.warning('Identifier not found. url=%s', url) return Response( From 8a77043363c2a9361f84461cd9b414b6f0a10be9 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 19 Jul 2021 15:39:48 +0200 Subject: [PATCH 22/48] Sanitize URL before passing it to `clean_liniks` --- readthedocs/embed/v3/views.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 3a1b56ed496..d2f4e389429 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -325,10 +325,12 @@ def get(self, request): status=status.HTTP_404_NOT_FOUND ) + # Sanitize the URL before requesting it + sanitized_url = urlparse(url)._replace(fragment='', query='').geturl() # Make links from the content to be absolute content = clean_links( content_requested, - url, + sanitized_url, html_raw_response=True, ) From d9ca50e2db5d1f101ad6a6e0fcf39dd9e37e4bab Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 19 Jul 2021 15:40:04 +0200 Subject: [PATCH 23/48] Comment to sanitize `cache_key` by URL --- readthedocs/embed/v3/views.py | 1 + 1 file changed, 1 insertion(+) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index d2f4e389429..fa48b7b2714 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -62,6 +62,7 @@ def _download_page_content(self, url): # Sanitize the URL before requesting it url = urlparse(url)._replace(fragment='', query='').geturl() + # TODO: sanitize the cache key just in case, maybe by hashing it cache_key = f'embed-api-{url}' cached_response = cache.get(cache_key) if cached_response: From 3be9b8e3c9d05c0554bda72d54eec7b9f8f370ea Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 19 Jul 2021 15:40:31 +0200 Subject: [PATCH 24/48] Update import for `clean_links` in tests --- readthedocs/embed/tests/test_links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/embed/tests/test_links.py b/readthedocs/embed/tests/test_links.py index 8650a361282..aab2f81b230 100644 --- a/readthedocs/embed/tests/test_links.py +++ b/readthedocs/embed/tests/test_links.py @@ -3,7 +3,7 @@ import pytest from pyquery import PyQuery -from readthedocs.embed.views import clean_links +from readthedocs.embed.utils import clean_links URLData = namedtuple('URLData', ['docurl', 'href', 'expected']) From a27122ac1db8e3ef98006341d135a0f511b3e2e6 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 19 Jul 2021 16:35:27 +0200 Subject: [PATCH 25/48] Do not call selectolax if there is no content --- readthedocs/embed/v3/views.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index fa48b7b2714..09ee03fc3f7 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -132,6 +132,9 @@ def _find_main_node(self, html): return first_header.parent def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversion): + if not page_content: + return + node = None if fragment: selector = f'#{fragment}' From 84c3d18430c3e940282afea0e4ea435e3e905ebb Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 19 Jul 2021 17:28:07 +0200 Subject: [PATCH 26/48] Check if the domain is valid before calling `unresolver` --- readthedocs/embed/v3/views.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 09ee03fc3f7..4cf5a3fcac4 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -228,13 +228,9 @@ def get(self, request): status=status.HTTP_400_BAD_REQUEST ) - # NOTE: ``readthedocs.core.unresolver.unresolve`` returns ``None`` when - # it can't find the project in our database - external = self.unresolved_url is None - parsed_url = urlparse(url) - external_domain = parsed_url.netloc - if not external_domain or not parsed_url.scheme: + domain = parsed_url.netloc + if not domain or not parsed_url.scheme: return Response( { 'error': ( @@ -245,33 +241,36 @@ def get(self, request): status=status.HTTP_400_BAD_REQUEST, ) + # NOTE: ``readthedocs.core.unresolver.unresolve`` returns ``None`` when + # it can't find the project in our database + external = self.unresolved_url is None if external: - for domain in settings.RTD_EMBED_API_EXTERNAL_DOMAINS: - if re.match(domain, external_domain): + for allowed_domain in settings.RTD_EMBED_API_EXTERNAL_DOMAINS: + if re.match(allowed_domain, domain): break else: - log.info('Domain not allowed. domain=%s url=%s', external_domain, url) + log.info('Domain not allowed. domain=%s url=%s', domain, url) return Response( { 'error': ( 'External domain not allowed. ' - f'domain={external_domain}' + f'domain={domain}' ) }, status=status.HTTP_400_BAD_REQUEST, ) # Check rate-limit for this particular domain - cache_key = f'embed-api-{external_domain}' + cache_key = f'embed-api-{domain}' cache.get_or_set(cache_key, 0, timeout=settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT_TIMEOUT) cache.incr(cache_key) if cache.get(cache_key) > settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT: - log.warning('Too many requests for this domain. domain=%s', external_domain) + log.warning('Too many requests for this domain. domain=%s', domain) return Response( { 'error': ( 'Too many requests for this domain. ' - f'domain={external_domain}' + f'domain={domain}' ) }, status=status.HTTP_429_TOO_MANY_REQUESTS, From 7adccb23e26ca0222de91d0ac66a93ef0be1c692 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 19 Jul 2021 17:30:12 +0200 Subject: [PATCH 27/48] Remove tedius warnings from pytest --- pytest.ini | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pytest.ini b/pytest.ini index 15e67f8a2d3..c1ebbb0f85b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -13,3 +13,9 @@ filterwarnings = ignore:Pagination may yield inconsistent results with an unordered object_list.*:django.core.paginator.UnorderedObjectListWarning # docutils ignore:'U' mode is deprecated:DeprecationWarning + # slumber + ignore:Using 'method_whitelist' with Retry is deprecated and will be removed in v2.0.*:DeprecationWarning + # kombu + ignore:SelectableGroups dict interface is deprecated.*:DeprecationWarning + # django + ignore:Remove the context parameter from JSONField.*:django.utils.deprecation.RemovedInDjango30Warning \ No newline at end of file From a1cf45a009261259cc107eaee1ce17527ddea20e Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 19 Jul 2021 18:24:15 +0200 Subject: [PATCH 28/48] Initial test suite for EmbedAPI v3 --- pytest.ini | 4 +- readthedocs/conftest.py | 6 + readthedocs/embed/v3/tests/__init__.py | 0 .../v3/tests/examples/default/bibtex-cite.rst | 9 + .../v3/tests/examples/default/chapter-i.rst | 11 + .../embed/v3/tests/examples/default/conf.py | 21 ++ .../tests/examples/default/configuration.rst | 12 ++ .../v3/tests/examples/default/glossary.rst | 9 + .../embed/v3/tests/examples/default/index.rst | 9 + .../embed/v3/tests/examples/default/refs.bib | 6 + readthedocs/embed/v3/tests/test_basics.py | 89 ++++++++ .../embed/v3/tests/test_external_pages.py | 194 ++++++++++++++++++ .../embed/v3/tests/test_internal_pages.py | 68 ++++++ readthedocs/embed/v3/tests/utils.py | 8 + readthedocs/embed/v3/views.py | 20 +- tox.ini | 10 + 16 files changed, 467 insertions(+), 9 deletions(-) create mode 100644 readthedocs/embed/v3/tests/__init__.py create mode 100644 readthedocs/embed/v3/tests/examples/default/bibtex-cite.rst create mode 100644 readthedocs/embed/v3/tests/examples/default/chapter-i.rst create mode 100644 readthedocs/embed/v3/tests/examples/default/conf.py create mode 100644 readthedocs/embed/v3/tests/examples/default/configuration.rst create mode 100644 readthedocs/embed/v3/tests/examples/default/glossary.rst create mode 100644 readthedocs/embed/v3/tests/examples/default/index.rst create mode 100644 readthedocs/embed/v3/tests/examples/default/refs.bib create mode 100644 readthedocs/embed/v3/tests/test_basics.py create mode 100644 readthedocs/embed/v3/tests/test_external_pages.py create mode 100644 readthedocs/embed/v3/tests/test_internal_pages.py create mode 100644 readthedocs/embed/v3/tests/utils.py diff --git a/pytest.ini b/pytest.ini index c1ebbb0f85b..d06623bde72 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,9 +1,11 @@ [pytest] -addopts = --reuse-db --strict-markers +addopts = --strict-markers markers = search serve proxito + embed_api + sphinx python_files = tests.py test_*.py *_tests.py filterwarnings = # Ignore external dependencies warning deprecations diff --git a/readthedocs/conftest.py b/readthedocs/conftest.py index 0dc0b840141..de296516abf 100644 --- a/readthedocs/conftest.py +++ b/readthedocs/conftest.py @@ -1,6 +1,12 @@ import pytest from rest_framework.test import APIClient + +pytest_plugins = ( + 'sphinx.testing.fixtures', +) + + @pytest.fixture def api_client(): return APIClient() diff --git a/readthedocs/embed/v3/tests/__init__.py b/readthedocs/embed/v3/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/embed/v3/tests/examples/default/bibtex-cite.rst b/readthedocs/embed/v3/tests/examples/default/bibtex-cite.rst new file mode 100644 index 00000000000..bac1deac36c --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/bibtex-cite.rst @@ -0,0 +1,9 @@ +sphinxcontrib-bibtex +==================== + +See https://sphinxcontrib-bibtex.readthedocs.io/en/latest/ for more information about how to use ``sphinxcontrib-bibtex``. + +See :cite:t:`1987:nelson` for an introduction to non-standard analysis. +Non-standard analysis is fun :cite:p:`1987:nelson`. + +.. bibliography:: diff --git a/readthedocs/embed/v3/tests/examples/default/chapter-i.rst b/readthedocs/embed/v3/tests/examples/default/chapter-i.rst new file mode 100644 index 00000000000..6bf55dad0f6 --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/chapter-i.rst @@ -0,0 +1,11 @@ +:orphan: + +Chapter I +========= + +This is Chapter I. + +Section I +--------- + +This the Section I inside Chapter I. diff --git a/readthedocs/embed/v3/tests/examples/default/conf.py b/readthedocs/embed/v3/tests/examples/default/conf.py new file mode 100644 index 00000000000..3c07223a704 --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/conf.py @@ -0,0 +1,21 @@ +# conf.py to run tests + +master_doc = 'index' +extensions = [ + 'sphinx.ext.autosectionlabel', +] + +try: + import sphinxcontrib.bibtex + extensions.append('sphinxcontrib.bibtex') + bibtex_bibfiles = ['refs.bib'] +except ImportError: + pass + + +def setup(app): + app.add_object_type( + 'confval', # directivename + 'confval', # rolename + 'pair: %s; configuration value', # indextemplate + ) diff --git a/readthedocs/embed/v3/tests/examples/default/configuration.rst b/readthedocs/embed/v3/tests/examples/default/configuration.rst new file mode 100644 index 00000000000..7ac6465b9f0 --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/configuration.rst @@ -0,0 +1,12 @@ +Configuration +============= + +Examples of configurations. + +.. confval:: config1 + + Description: This the description for config1 + + Default: ``'Default value for config'`` + + Type: bool diff --git a/readthedocs/embed/v3/tests/examples/default/glossary.rst b/readthedocs/embed/v3/tests/examples/default/glossary.rst new file mode 100644 index 00000000000..f8f50705e4d --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/glossary.rst @@ -0,0 +1,9 @@ +Glossary +-------- + +Example using a ``:term:`` role :term:`Read the Docs`. + +.. glossary:: + + Read the Docs + Best company ever. diff --git a/readthedocs/embed/v3/tests/examples/default/index.rst b/readthedocs/embed/v3/tests/examples/default/index.rst new file mode 100644 index 00000000000..540bed0984c --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/index.rst @@ -0,0 +1,9 @@ +Title +===== + +This is an example page used to test EmbedAPI parsing features. + +Sub-title +--------- + +This is a reference to :ref:`sub-title`. diff --git a/readthedocs/embed/v3/tests/examples/default/refs.bib b/readthedocs/embed/v3/tests/examples/default/refs.bib new file mode 100644 index 00000000000..8be9d662d21 --- /dev/null +++ b/readthedocs/embed/v3/tests/examples/default/refs.bib @@ -0,0 +1,6 @@ +@Book{1987:nelson, + author = {Edward Nelson}, + title = {Radically Elementary Probability Theory}, + publisher = {Princeton University Press}, + year = {1987} +} diff --git a/readthedocs/embed/v3/tests/test_basics.py b/readthedocs/embed/v3/tests/test_basics.py new file mode 100644 index 00000000000..79aa1f1ca09 --- /dev/null +++ b/readthedocs/embed/v3/tests/test_basics.py @@ -0,0 +1,89 @@ +import pytest + +from django.conf import settings +from django.core.cache import cache +from django.urls import reverse + +from .utils import srcdir + + +@pytest.mark.django_db +@pytest.mark.embed_api +class TestEmbedAPIv3Basics: + + @pytest.fixture(autouse=True) + def setup_method(self, settings): + settings.USE_SUBDOMAIN = True + settings.PUBLIC_DOMAIN = 'readthedocs.io' + settings.RTD_EMBED_API_EXTERNAL_DOMAINS = ['docs.project.com'] + + self.api_url = reverse('embed_api_v3') + + yield + cache.clear() + + def test_not_url_query_argument(self, client): + params = {} + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': 'Invalid arguments. Please provide "url".'} + + def test_doctool_and_not_doctoolversion_query_argument(self, client): + params = { + 'url': 'https://docs.project.com#title', + 'doctool': 'sphinx', + } + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': 'Invalid arguments. Please provide "doctool" and "doctoolversion" or none of them.'} + + def test_not_doctool_and_doctoolversion_query_argument(self, client): + params = { + 'url': 'https://docs.project.com#title', + 'doctoolversion': '4.1.0', + } + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': 'Invalid arguments. Please provide "doctool" and "doctoolversion" or none of them.'} + + def test_not_allowed_domain(self, client): + params = { + 'url': 'https://docs.notalloweddomain.com#title', + } + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': 'External domain not allowed. domain=docs.notalloweddomain.com'} + + def test_malformed_url(self, client): + params = { + 'url': 'https:///page.html#title', + } + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': f'The URL requested is malformed. url={params["url"]}'} + + def test_rate_limit_domain(self, client): + params = { + 'url': 'https://docs.project.com#title', + } + cache_key = 'embed-api-docs.project.com' + cache.set(cache_key, settings.RTD_EMBED_API_DOMAIN_RATE_LIMIT) + + response = client.get(self.api_url, params) + assert response.status_code == 429 + assert response.json() == {'error': 'Too many requests for this domain. domain=docs.project.com'} + + def test_infinite_redirect(self, client, requests_mock): + requests_mock.get( + 'https://docs.project.com', + status_code=302, + headers={ + 'Location': 'https://docs.project.com', + }, + ) + params = { + 'url': 'https://docs.project.com#title', + } + response = client.get(self.api_url, params) + assert response.status_code == 400 + assert response.json() == {'error': f'The URL requested generates too many redirects. url={params["url"]}'} diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py new file mode 100644 index 00000000000..07c17600522 --- /dev/null +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -0,0 +1,194 @@ +import os + +import pytest + +from django.conf import settings +from django.core.cache import cache +from django.urls import reverse + +from .utils import srcdir + + +@pytest.mark.django_db +@pytest.mark.embed_api +class TestEmbedAPIv3ExternalPages: + + @pytest.fixture(autouse=True) + def setup_method(self, settings): + settings.USE_SUBDOMAIN = True + settings.PUBLIC_DOMAIN = 'readthedocs.io' + settings.RTD_EMBED_API_EXTERNAL_DOMAINS = ['docs.project.com'] + + self.api_url = reverse('embed_api_v3') + + yield + cache.clear() + + @pytest.mark.sphinx('html', srcdir=srcdir, freshenv=True) + def test_default_main_section(self, app, client, requests_mock): + app.build() + path = app.outdir / 'index.html' + assert path.exists() is True + content = open(path).read() + requests_mock.get('https://docs.project.com', text=content) + + params = { + 'url': 'https://docs.project.com', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://docs.project.com', + 'fragment': None, + 'content': '
\n \n
\n

Title

\n

This is an example page used to test EmbedAPI parsing features.

\n
\n

Sub-title

\n

This is a reference to Sub-title.

\n
\n
\n\n\n
', + 'external': True, + } + + @pytest.mark.sphinx('html', srcdir=srcdir, freshenv=True) + def test_specific_identifier(self, app, client, requests_mock): + app.build() + path = app.outdir / 'index.html' + assert path.exists() is True + content = open(path).read() + requests_mock.get('https://docs.project.com', text=content) + + params = { + 'url': 'https://docs.project.com#sub-title', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://docs.project.com#sub-title', + 'fragment': 'sub-title', + 'content': '
\n

Sub-title

\n

This is a reference to Sub-title.

\n
', + 'external': True, + } + + @pytest.mark.sphinx('html', srcdir=srcdir, freshenv=True) + def test_dl_identifier(self, app, client, requests_mock): + app.build() + path = app.outdir / 'configuration.html' + assert path.exists() is True + content = open(path).read() + requests_mock.get('https://docs.project.com/configuration.html', text=content) + + params = { + 'url': 'https://docs.project.com/configuration.html#confval-config1', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://docs.project.com/configuration.html#confval-config1', + 'fragment': 'confval-config1', + 'content': '
\nconfig1
', + 'external': True, + } + + @pytest.mark.sphinx('html', srcdir=srcdir, freshenv=True) + def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): + app.build() + path = app.outdir / 'configuration.html' + assert path.exists() is True + content = open(path).read() + requests_mock.get('https://docs.project.com/configuration.html', text=content) + + # Calling the API without doctool/doctoolversion + params = { + 'url': 'https://docs.project.com/configuration.html#confval-config1', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://docs.project.com/configuration.html#confval-config1', + 'fragment': 'confval-config1', + 'content': '
\nconfig1
', + 'external': True, + } + + # Calling the API with doctool/doctoolversion + params = { + 'url': 'https://docs.project.com/configuration.html#confval-config1', + 'doctool': 'sphinx', + 'doctoolversion': '4.1.0', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://docs.project.com/configuration.html#confval-config1', + 'fragment': 'confval-config1', + 'content': '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
', + 'external': True, + } + + @pytest.mark.sphinx('html', srcdir=srcdir, freshenv=True) + @pytest.mark.skipif(os.environ.get('TOX_ENV_NAME') != 'embed-api', reason='sphinxcontrib-bibtex is not installed') + def test_citation_identifier_doctool_sphinx(self, app, client, requests_mock): + app.build() + path = app.outdir / 'bibtex-cite.html' + assert path.exists() is True + content = open(path).read() + requests_mock.get('https://docs.project.com/bibtex-cite.html', text=content) + + # Calling the API without doctool/doctoolversion + params = { + 'url': 'https://docs.project.com/bibtex-cite.html#id4', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://docs.project.com/bibtex-cite.html#id4', + 'fragment': 'id4', + 'content': '
Nel87(1,2)
', + 'external': True, + } + + # Calling the API with doctool/doctoolversion + params = { + 'url': 'https://docs.project.com/bibtex-cite.html#id4', + 'doctool': 'sphinx', + 'doctoolversion': '4.1.0', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://docs.project.com/bibtex-cite.html#id4', + 'fragment': 'id4', + 'content': '
\n
Nel87(1,2)
\n

Edward Nelson. Radically Elementary Probability Theory. Princeton University Press, 1987.

\n
\n
', + 'external': True, + } + + @pytest.mark.sphinx('html', srcdir=srcdir, freshenv=True) + def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): + app.build() + path = app.outdir / 'glossary.html' + assert path.exists() is True + content = open(path).read() + requests_mock.get('https://docs.project.com/glossary.html', text=content) + + # Calling the API without doctool/doctoolversion + params = { + 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', + 'fragment': 'term-Read-the-Docs', + 'content': '
Read the Docs
', + 'external': True, + } + + # Calling the API with doctool/doctoolversion + params = { + 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', + 'doctool': 'sphinx', + 'doctoolversion': '4.1.0', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', + 'fragment': 'term-Read-the-Docs', + 'content': '
\n
Read the Docs

Best company ever.

\n
\n
', + 'external': True, + } diff --git a/readthedocs/embed/v3/tests/test_internal_pages.py b/readthedocs/embed/v3/tests/test_internal_pages.py new file mode 100644 index 00000000000..b5a0d6ff6ab --- /dev/null +++ b/readthedocs/embed/v3/tests/test_internal_pages.py @@ -0,0 +1,68 @@ +from contextlib import contextmanager +from unittest import mock +import pytest + +import django_dynamic_fixture as fixture + +from django.conf import settings +from django.core.cache import cache +from django.urls import reverse + +from readthedocs.projects.models import Project +from readthedocs.builds.models import Version + +from .utils import srcdir + + +@pytest.mark.django_db +@pytest.mark.embed_api +class TestEmbedAPIv3InternalPages: + + @pytest.fixture(autouse=True) + def setup_method(self, settings): + settings.USE_SUBDOMAIN = True + settings.PUBLIC_DOMAIN = 'readthedocs.io' + settings.RTD_EMBED_API_EXTERNAL_DOMAINS = [] + + self.api_url = reverse('embed_api_v3') + + self.project = fixture.get( + Project, + slug='project' + ) + + yield + cache.clear() + + def _mock_open(self, content): + @contextmanager + def f(*args, **kwargs): + read_mock = mock.MagicMock() + read_mock.read.return_value = content + yield read_mock + return f + + def _patch_storage_open(self, storage_mock, content): + storage_mock.exists.return_value = True + storage_mock.open.side_effect = self._mock_open(content) + + @pytest.mark.sphinx('html', srcdir=srcdir, freshenv=True) + @mock.patch('readthedocs.embed.v3.views.build_media_storage') + def test_default_main_section(self, build_media_storage, app, client): + app.build() + path = app.outdir / 'index.html' + assert path.exists() is True + content = open(path).read() + self._patch_storage_open(build_media_storage, content) + + params = { + 'url': 'https://project.readthedocs.io/en/latest/', + } + response = client.get(self.api_url, params) + assert response.status_code == 200 + assert response.json() == { + 'url': 'https://project.readthedocs.io/en/latest/', + 'fragment': None, + 'content': '
\n \n
\n

Title

\n

This is an example page used to test EmbedAPI parsing features.

\n
\n

Sub-title

\n

This is a reference to Sub-title.

\n
\n
\n\n\n
', + 'external': False, + } diff --git a/readthedocs/embed/v3/tests/utils.py b/readthedocs/embed/v3/tests/utils.py new file mode 100644 index 00000000000..9dffe8f058f --- /dev/null +++ b/readthedocs/embed/v3/tests/utils.py @@ -0,0 +1,8 @@ +import os + + +srcdir = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + 'examples', + 'default', +) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 4cf5a3fcac4..b7ce611bdfc 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -161,10 +161,12 @@ def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversio #
Text definition for the term
# ... # - parent = node.parent - for n in parent.traverse(): - if n not in (node, node.next): - n.remove() + + # TODO: figure it out if it's needed to remove the siblings here + # parent = node.parent + # for n in parent.traverse(): + # if n not in (node, node.next): + # n.remove() node = node.parent elif 'citation' in node.parent.attributes.get('class'): @@ -179,10 +181,12 @@ def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversio #
Content of the cite
# ... # - parent = node.parent - for n in parent.traverse(): - if n not in (node, node.next): - n.remove() + + # TODO: figure it out if it's needed to remove the siblings here + # parent = node.parent + # for n in parent.traverse(): + # if n not in (node, node.next): + # n.remove() node = node.parent else: diff --git a/tox.ini b/tox.ini index 8752e5cc8c7..dd9e171ed2b 100644 --- a/tox.ini +++ b/tox.ini @@ -25,6 +25,16 @@ commands = export DJANGO_SETTINGS_MODULE=readthedocs.settings.proxito.test; \ pytest --cov-report= --cov-config {toxinidir}/.coveragerc --cov=. --cov-append -m proxito --suppress-no-test-exit-code {posargs}' +[testenv:embed-api] +description = run test suite for the EmbedAPIv3 with {basepython} +deps = + -r{toxinidir}/requirements/testing.txt + sphinxcontrib-bibtex + # TODO: find a way to pin multiple versions of Sphinx here +commands = + /bin/sh -c '\ + export DJANGO_SETTINGS_MODULE=readthedocs.settings.test; \ + pytest -m embed_api {posargs:{env:TOX_POSARGS:}}' [testenv:docs] description = build readthedocs documentation From ec2fd5f5d0b0ba2ca9ad912f818371cb0f1cbb4c Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 16 Aug 2021 16:16:53 +0200 Subject: [PATCH 29/48] Add `doctoolwriter` to allow `html4` and `html5` on Sphinx --- readthedocs/embed/v3/tests/test_external_pages.py | 15 +++++++++------ readthedocs/embed/v3/views.py | 9 ++++++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index 07c17600522..9aef4c251e4 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -92,7 +92,7 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/configuration.html', text=content) - # Calling the API without doctool/doctoolversion + # Calling the API without doctool/doctoolversion/doctoolwriter params = { 'url': 'https://docs.project.com/configuration.html#confval-config1', } @@ -105,11 +105,12 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): 'external': True, } - # Calling the API with doctool/doctoolversion + # Calling the API with doctool/doctoolversion/doctoolwriter params = { 'url': 'https://docs.project.com/configuration.html#confval-config1', 'doctool': 'sphinx', 'doctoolversion': '4.1.0', + 'doctoolwriter': 'html4', } response = client.get(self.api_url, params) assert response.status_code == 200 @@ -129,7 +130,7 @@ def test_citation_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/bibtex-cite.html', text=content) - # Calling the API without doctool/doctoolversion + # Calling the API without doctool/doctoolversion/doctoolwriter params = { 'url': 'https://docs.project.com/bibtex-cite.html#id4', } @@ -142,11 +143,12 @@ def test_citation_identifier_doctool_sphinx(self, app, client, requests_mock): 'external': True, } - # Calling the API with doctool/doctoolversion + # Calling the API with doctool/doctoolversion/doctoolwriter params = { 'url': 'https://docs.project.com/bibtex-cite.html#id4', 'doctool': 'sphinx', 'doctoolversion': '4.1.0', + 'doctoolwriter': 'html5', } response = client.get(self.api_url, params) assert response.status_code == 200 @@ -165,7 +167,7 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/glossary.html', text=content) - # Calling the API without doctool/doctoolversion + # Calling the API without doctool/doctoolversion/doctoolwriter params = { 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', } @@ -178,11 +180,12 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): 'external': True, } - # Calling the API with doctool/doctoolversion + # Calling the API with doctool/doctoolversion/doctoolwriter params = { 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', 'doctool': 'sphinx', 'doctoolversion': '4.1.0', + 'doctoolwriter': 'html4', } response = client.get(self.api_url, params) assert response.status_code == 200 diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index b7ce611bdfc..3312cde9347 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -41,6 +41,7 @@ class EmbedAPIBase(CachedResponseMixin, APIView): * url (with fragment) (required) * doctool * doctoolversion + * doctoolwriter ### Example @@ -104,7 +105,7 @@ def _get_page_content_from_storage(self, project, version_slug, filename): return None - def _get_content_by_fragment(self, url, fragment, external, doctool, doctoolversion): + def _get_content_by_fragment(self, url, fragment, external, doctool, doctoolversion, doctoolwriter): if external: page_content = self._download_page_content(url) else: @@ -210,6 +211,7 @@ def get(self, request): url = request.GET.get('url') doctool = request.GET.get('doctool') doctoolversion = request.GET.get('doctoolversion') + doctoolwriter = request.GET.get('doctoolwriter') if not url: return Response( { @@ -221,12 +223,12 @@ def get(self, request): status=status.HTTP_400_BAD_REQUEST ) - if not all([doctool, doctoolversion]) and any([doctool, doctoolversion]): + if not all([doctool, doctoolversion, doctoolwriter]) and any([doctool, doctoolversion, doctoolwriter]): return Response( { 'error': ( 'Invalid arguments. ' - 'Please provide "doctool" and "doctoolversion" or none of them.' + 'Please provide "doctool", "doctoolversion" and "doctoolwriter" or none of them.' ) }, status=status.HTTP_400_BAD_REQUEST @@ -292,6 +294,7 @@ def get(self, request): external, doctool, doctoolversion, + doctoolwriter, ) except requests.exceptions.TooManyRedirects: log.exception('Too many redirects. url=%s', url) From f05da3f25df9bd356d3cab7e8d87400f72b3a1dc Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 16 Aug 2021 16:17:39 +0200 Subject: [PATCH 30/48] Run EmbedAPIv3 test on a different tox environment We need to install different versions of Sphinx to make sure that we support them when querying HTML via EmbedAPIv3. Splitting this into a different `tox.embedapi.ini` allows us to re-use all the requirements but install a different Sphinx version per environment. --- .circleci/config.yml | 10 ++++++++++ tox.embedapi.ini | 33 +++++++++++++++++++++++++++++++++ tox.ini | 11 ----------- 3 files changed, 43 insertions(+), 11 deletions(-) create mode 100644 tox.embedapi.ini diff --git a/.circleci/config.yml b/.circleci/config.yml index 7516f9c8d3a..4d32b560be7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -17,6 +17,16 @@ jobs: - run: pip install --user tox - run: tox -e py36,codecov + tests-embedapi: + docker: + - image: 'cimg/python:3.6' + steps: + - checkout + - run: git submodule sync + - run: git submodule update --init + - run: pip install --user tox + - run: tox -c tox.embedapi.ini + checks: docker: - image: 'cimg/python:3.6' diff --git a/tox.embedapi.ini b/tox.embedapi.ini new file mode 100644 index 00000000000..1358f2ea9a8 --- /dev/null +++ b/tox.embedapi.ini @@ -0,0 +1,33 @@ +[tox] +envlist = sphinx-{18,20,21,22,23,24,30,31,32,33,34,35,40,41,latest} + +[testenv] +description = run test suite for the EmbedAPIv3 +install_command = + # Install requirements in multiple steps because we don't want to install + # Sphinx from `requirements/pip.txt` but from the `deps=` field. + /bin/sh -c ' \ + cat {toxinidir}/requirements/pip.txt | grep -v "Sphinx" > {toxinidir}/requirements/embedapi.txt; \ + sed {toxinidir}/requirements/testing.txt -e "s|pip.txt|embedapi.txt|g" > {toxinidir}/requirements/testing.embedapi.txt; \ + pip install -r {toxinidir}/requirements/testing.embedapi.txt; \ + pip install $*;' -- {opts} {packages} +deps = + sphinx-18: Sphinx~=1.8.0 + sphinx-20: Sphinx~=2.0.0 + sphinx-21: Sphinx~=2.1.0 + sphinx-22: Sphinx~=2.2.0 + sphinx-23: Sphinx~=2.3.0 + sphinx-24: Sphinx~=2.4.0 + sphinx-30: Sphinx~=3.0.0 + sphinx-31: Sphinx~=3.1.0 + sphinx-32: Sphinx~=3.2.0 + sphinx-33: Sphinx~=3.3.0 + sphinx-34: Sphinx~=3.4.0 + sphinx-35: Sphinx~=3.5.0 + sphinx-40: Sphinx~=4.0.0 + sphinx-41: Sphinx~=4.1.0 + sphinx-latest: Sphinx +setenv = + DJANGO_SETTINGS_MODULE=readthedocs.settings.test +changedir = {toxinidir}/readthedocs +commands = pytest -m embed_api {posargs} \ No newline at end of file diff --git a/tox.ini b/tox.ini index dd9e171ed2b..085a5b2e0e8 100644 --- a/tox.ini +++ b/tox.ini @@ -25,17 +25,6 @@ commands = export DJANGO_SETTINGS_MODULE=readthedocs.settings.proxito.test; \ pytest --cov-report= --cov-config {toxinidir}/.coveragerc --cov=. --cov-append -m proxito --suppress-no-test-exit-code {posargs}' -[testenv:embed-api] -description = run test suite for the EmbedAPIv3 with {basepython} -deps = - -r{toxinidir}/requirements/testing.txt - sphinxcontrib-bibtex - # TODO: find a way to pin multiple versions of Sphinx here -commands = - /bin/sh -c '\ - export DJANGO_SETTINGS_MODULE=readthedocs.settings.test; \ - pytest -m embed_api {posargs:{env:TOX_POSARGS:}}' - [testenv:docs] description = build readthedocs documentation changedir = {toxinidir}/docs From 7f3e3a9bf67ffc92b19a49b0f2854aa4e38c8bb9 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 16 Aug 2021 16:23:58 +0200 Subject: [PATCH 31/48] Fix tests with proper error message --- readthedocs/embed/v3/tests/test_basics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readthedocs/embed/v3/tests/test_basics.py b/readthedocs/embed/v3/tests/test_basics.py index 79aa1f1ca09..f45c22d6ae0 100644 --- a/readthedocs/embed/v3/tests/test_basics.py +++ b/readthedocs/embed/v3/tests/test_basics.py @@ -35,7 +35,7 @@ def test_doctool_and_not_doctoolversion_query_argument(self, client): } response = client.get(self.api_url, params) assert response.status_code == 400 - assert response.json() == {'error': 'Invalid arguments. Please provide "doctool" and "doctoolversion" or none of them.'} + assert response.json() == {'error': 'Invalid arguments. Please provide "doctool", "doctoolversion" and "doctoolwriter" or none of them.'} def test_not_doctool_and_doctoolversion_query_argument(self, client): params = { @@ -44,7 +44,7 @@ def test_not_doctool_and_doctoolversion_query_argument(self, client): } response = client.get(self.api_url, params) assert response.status_code == 400 - assert response.json() == {'error': 'Invalid arguments. Please provide "doctool" and "doctoolversion" or none of them.'} + assert response.json() == {'error': 'Invalid arguments. Please provide "doctool", "doctoolversion" and "doctoolwriter" or none of them.'} def test_not_allowed_domain(self, client): params = { From bcfba371e58c9481cab2493cfa94309fa305bc9c Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 16 Aug 2021 16:24:42 +0200 Subject: [PATCH 32/48] Run tests-embedapi in CircleCI --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 4d32b560be7..2dae075a6ab 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -55,3 +55,4 @@ workflows: jobs: - checks - tests + - tests-embedapi From 905cbcf5c635515736feb9922394a1ec25b96519 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 16 Aug 2021 18:14:45 +0200 Subject: [PATCH 33/48] Consider docutils 0.16 and 0.17 when checking HTML output --- .../embed/v3/tests/test_external_pages.py | 43 ++++++++++++++++--- .../embed/v3/tests/test_internal_pages.py | 15 +++++-- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index 9aef4c251e4..06936abe45e 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -1,7 +1,10 @@ +import docutils import os import pytest +from packaging.version import Version + from django.conf import settings from django.core.cache import cache from django.urls import reverse @@ -37,10 +40,16 @@ def test_default_main_section(self, app, client, requests_mock): } response = client.get(self.api_url, params) assert response.status_code == 200 + + if Version(docutils.__version__) >= Version('0.17'): + content = '
\n \n
\n

Title

\n

This is an example page used to test EmbedAPI parsing features.

\n
\n

Sub-title

\n

This is a reference to Sub-title.

\n
\n
\n\n\n
' + else: + content = '
\n \n
\n

Title

\n

This is an example page used to test EmbedAPI parsing features.

\n
\n

Sub-title

\n

This is a reference to Sub-title.

\n
\n
\n\n\n
' + assert response.json() == { 'url': 'https://docs.project.com', 'fragment': None, - 'content': '
\n \n
\n

Title

\n

This is an example page used to test EmbedAPI parsing features.

\n
\n

Sub-title

\n

This is a reference to Sub-title.

\n
\n
\n\n\n
', + 'content': content, 'external': True, } @@ -57,10 +66,16 @@ def test_specific_identifier(self, app, client, requests_mock): } response = client.get(self.api_url, params) assert response.status_code == 200 + + if Version(docutils.__version__) >= Version('0.17'): + content = '
\n

Sub-title

\n

This is a reference to Sub-title.

\n
' + else: + content = '
\n

Sub-title

\n

This is a reference to Sub-title.

\n
' + assert response.json() == { 'url': 'https://docs.project.com#sub-title', 'fragment': 'sub-title', - 'content': '
\n

Sub-title

\n

This is a reference to Sub-title.

\n
', + 'content': content, 'external': True, } @@ -77,10 +92,16 @@ def test_dl_identifier(self, app, client, requests_mock): } response = client.get(self.api_url, params) assert response.status_code == 200 + + if Version(docutils.__version__) >= Version('0.17'): + content = '
\nconfig1
' + else: + content = '
\nconfig1
' + assert response.json() == { 'url': 'https://docs.project.com/configuration.html#confval-config1', 'fragment': 'confval-config1', - 'content': '
\nconfig1
', + 'content': content, 'external': True, } @@ -98,10 +119,16 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): } response = client.get(self.api_url, params) assert response.status_code == 200 + + if Version(docutils.__version__) >= Version('0.17'): + content = '
\nconfig1
' + else: + content = '
\nconfig1
' + assert response.json() == { 'url': 'https://docs.project.com/configuration.html#confval-config1', 'fragment': 'confval-config1', - 'content': '
\nconfig1
', + 'content': content, 'external': True, } @@ -114,10 +141,16 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): } response = client.get(self.api_url, params) assert response.status_code == 200 + + if Version(docutils.__version__) >= Version('0.17'): + content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' + else: + content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' + assert response.json() == { 'url': 'https://docs.project.com/configuration.html#confval-config1', 'fragment': 'confval-config1', - 'content': '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
', + 'content': content, 'external': True, } diff --git a/readthedocs/embed/v3/tests/test_internal_pages.py b/readthedocs/embed/v3/tests/test_internal_pages.py index b5a0d6ff6ab..dbf47435568 100644 --- a/readthedocs/embed/v3/tests/test_internal_pages.py +++ b/readthedocs/embed/v3/tests/test_internal_pages.py @@ -1,6 +1,9 @@ +import docutils +import pytest + from contextlib import contextmanager +from packaging.version import Version from unittest import mock -import pytest import django_dynamic_fixture as fixture @@ -9,7 +12,6 @@ from django.urls import reverse from readthedocs.projects.models import Project -from readthedocs.builds.models import Version from .utils import srcdir @@ -60,9 +62,16 @@ def test_default_main_section(self, build_media_storage, app, client): } response = client.get(self.api_url, params) assert response.status_code == 200 + + # Note the difference between `
` and `
` + if Version(docutils.__version__) >= Version('0.17'): + content = '
\n \n
\n

Title

\n

This is an example page used to test EmbedAPI parsing features.

\n
\n

Sub-title

\n

This is a reference to Sub-title.

\n
\n
\n\n\n
' + else: + content = '
\n \n
\n

Title

\n

This is an example page used to test EmbedAPI parsing features.

\n
\n

Sub-title

\n

This is a reference to Sub-title.

\n
\n
\n\n\n
' + assert response.json() == { 'url': 'https://project.readthedocs.io/en/latest/', 'fragment': None, - 'content': '
\n \n
\n

Title

\n

This is an example page used to test EmbedAPI parsing features.

\n
\n

Sub-title

\n

This is a reference to Sub-title.

\n
\n
\n\n\n
', + 'content': content, 'external': False, } From b0dc81f37adf87bd39bed92519e6e3ab283f4d27 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 16 Aug 2021 18:25:44 +0200 Subject: [PATCH 34/48] Revert "Fix tests with proper error message" This reverts commit 7f3e3a9bf67ffc92b19a49b0f2854aa4e38c8bb9. --- readthedocs/embed/v3/tests/test_basics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readthedocs/embed/v3/tests/test_basics.py b/readthedocs/embed/v3/tests/test_basics.py index f45c22d6ae0..79aa1f1ca09 100644 --- a/readthedocs/embed/v3/tests/test_basics.py +++ b/readthedocs/embed/v3/tests/test_basics.py @@ -35,7 +35,7 @@ def test_doctool_and_not_doctoolversion_query_argument(self, client): } response = client.get(self.api_url, params) assert response.status_code == 400 - assert response.json() == {'error': 'Invalid arguments. Please provide "doctool", "doctoolversion" and "doctoolwriter" or none of them.'} + assert response.json() == {'error': 'Invalid arguments. Please provide "doctool" and "doctoolversion" or none of them.'} def test_not_doctool_and_doctoolversion_query_argument(self, client): params = { @@ -44,7 +44,7 @@ def test_not_doctool_and_doctoolversion_query_argument(self, client): } response = client.get(self.api_url, params) assert response.status_code == 400 - assert response.json() == {'error': 'Invalid arguments. Please provide "doctool", "doctoolversion" and "doctoolwriter" or none of them.'} + assert response.json() == {'error': 'Invalid arguments. Please provide "doctool" and "doctoolversion" or none of them.'} def test_not_allowed_domain(self, client): params = { From 9810f89c490588f156d9f4834fbbed10381d75c4 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 16 Aug 2021 18:26:22 +0200 Subject: [PATCH 35/48] Revert "Add `doctoolwriter` to allow `html4` and `html5` on Sphinx" This reverts commit ec2fd5f5d0b0ba2ca9ad912f818371cb0f1cbb4c. --- readthedocs/embed/v3/tests/test_external_pages.py | 15 ++++++--------- readthedocs/embed/v3/views.py | 9 +++------ 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index 06936abe45e..02bc8118aec 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -113,7 +113,7 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/configuration.html', text=content) - # Calling the API without doctool/doctoolversion/doctoolwriter + # Calling the API without doctool/doctoolversion params = { 'url': 'https://docs.project.com/configuration.html#confval-config1', } @@ -132,12 +132,11 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): 'external': True, } - # Calling the API with doctool/doctoolversion/doctoolwriter + # Calling the API with doctool/doctoolversion params = { 'url': 'https://docs.project.com/configuration.html#confval-config1', 'doctool': 'sphinx', 'doctoolversion': '4.1.0', - 'doctoolwriter': 'html4', } response = client.get(self.api_url, params) assert response.status_code == 200 @@ -163,7 +162,7 @@ def test_citation_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/bibtex-cite.html', text=content) - # Calling the API without doctool/doctoolversion/doctoolwriter + # Calling the API without doctool/doctoolversion params = { 'url': 'https://docs.project.com/bibtex-cite.html#id4', } @@ -176,12 +175,11 @@ def test_citation_identifier_doctool_sphinx(self, app, client, requests_mock): 'external': True, } - # Calling the API with doctool/doctoolversion/doctoolwriter + # Calling the API with doctool/doctoolversion params = { 'url': 'https://docs.project.com/bibtex-cite.html#id4', 'doctool': 'sphinx', 'doctoolversion': '4.1.0', - 'doctoolwriter': 'html5', } response = client.get(self.api_url, params) assert response.status_code == 200 @@ -200,7 +198,7 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/glossary.html', text=content) - # Calling the API without doctool/doctoolversion/doctoolwriter + # Calling the API without doctool/doctoolversion params = { 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', } @@ -213,12 +211,11 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): 'external': True, } - # Calling the API with doctool/doctoolversion/doctoolwriter + # Calling the API with doctool/doctoolversion params = { 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', 'doctool': 'sphinx', 'doctoolversion': '4.1.0', - 'doctoolwriter': 'html4', } response = client.get(self.api_url, params) assert response.status_code == 200 diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 3312cde9347..b7ce611bdfc 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -41,7 +41,6 @@ class EmbedAPIBase(CachedResponseMixin, APIView): * url (with fragment) (required) * doctool * doctoolversion - * doctoolwriter ### Example @@ -105,7 +104,7 @@ def _get_page_content_from_storage(self, project, version_slug, filename): return None - def _get_content_by_fragment(self, url, fragment, external, doctool, doctoolversion, doctoolwriter): + def _get_content_by_fragment(self, url, fragment, external, doctool, doctoolversion): if external: page_content = self._download_page_content(url) else: @@ -211,7 +210,6 @@ def get(self, request): url = request.GET.get('url') doctool = request.GET.get('doctool') doctoolversion = request.GET.get('doctoolversion') - doctoolwriter = request.GET.get('doctoolwriter') if not url: return Response( { @@ -223,12 +221,12 @@ def get(self, request): status=status.HTTP_400_BAD_REQUEST ) - if not all([doctool, doctoolversion, doctoolwriter]) and any([doctool, doctoolversion, doctoolwriter]): + if not all([doctool, doctoolversion]) and any([doctool, doctoolversion]): return Response( { 'error': ( 'Invalid arguments. ' - 'Please provide "doctool", "doctoolversion" and "doctoolwriter" or none of them.' + 'Please provide "doctool" and "doctoolversion" or none of them.' ) }, status=status.HTTP_400_BAD_REQUEST @@ -294,7 +292,6 @@ def get(self, request): external, doctool, doctoolversion, - doctoolwriter, ) except requests.exceptions.TooManyRedirects: log.exception('Too many redirects. url=%s', url) From 05936d3f2c748f92914ca71f3268a815f8c2c65e Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 16 Aug 2021 18:28:44 +0200 Subject: [PATCH 36/48] Lint --- readthedocs/embed/utils.py | 2 ++ readthedocs/embed/v3/views.py | 25 +++++++++++++------------ readthedocs/embed/views.py | 1 - 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/readthedocs/embed/utils.py b/readthedocs/embed/utils.py index 613956249ce..94ad78bfe59 100644 --- a/readthedocs/embed/utils.py +++ b/readthedocs/embed/utils.py @@ -1,7 +1,9 @@ """Embed utils.""" + from urllib.parse import urlparse from pyquery import PyQuery as PQ # noqa + def recurse_while_none(element): """Recursively find the leaf node with the ``href`` attribute.""" if element.text is None and element.getchildren(): diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index b7ce611bdfc..1592781dcee 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -28,10 +28,10 @@ log = logging.getLogger(__name__) - class EmbedAPIBase(CachedResponseMixin, APIView): # pylint: disable=line-too-long + # pylint: disable=no-self-use """ Embed a section of content from any Read the Docs page. @@ -97,7 +97,7 @@ def _get_page_content_from_storage(self, project, version_slug, filename): filename, ) try: - with build_media_storage.open(file_path) as fd: + with build_media_storage.open(file_path) as fd: # pylint: disable=invalid-name return fd.read() except Exception: # noqa log.warning('Unable to read file. file_path=%s', file_path) @@ -206,10 +206,11 @@ def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversio return node.html - def get(self, request): + def get(self, request): # noqa url = request.GET.get('url') doctool = request.GET.get('doctool') doctoolversion = request.GET.get('doctoolversion') + if not url: return Response( { @@ -235,15 +236,15 @@ def get(self, request): parsed_url = urlparse(url) domain = parsed_url.netloc if not domain or not parsed_url.scheme: - return Response( - { - 'error': ( - 'The URL requested is malformed. ' - f'url={url}' - ) - }, - status=status.HTTP_400_BAD_REQUEST, - ) + return Response( + { + 'error': ( + 'The URL requested is malformed. ' + f'url={url}' + ) + }, + status=status.HTTP_400_BAD_REQUEST, + ) # NOTE: ``readthedocs.core.unresolver.unresolve`` returns ``None`` when # it can't find the project in our database diff --git a/readthedocs/embed/views.py b/readthedocs/embed/views.py index 7dc2ef24701..315fe4dde0f 100644 --- a/readthedocs/embed/views.py +++ b/readthedocs/embed/views.py @@ -4,7 +4,6 @@ import json import logging import re -from urllib.parse import urlparse from django.shortcuts import get_object_or_404 from django.template.defaultfilters import slugify From 333c8926709bdee5f88ab10a5c31979670113cc4 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 16 Aug 2021 18:37:44 +0200 Subject: [PATCH 37/48] Disable unused-argument for now --- readthedocs/embed/v3/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 1592781dcee..5d5a1d2e265 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -131,7 +131,7 @@ def _find_main_node(self, html): log.info('Main node found. selector=h1') return first_header.parent - def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversion): + def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversion): # pylint: disable=unused-argument if not page_content: return From c4751c79b555b9b7f487cba7e857e5289356d1b0 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Aug 2021 15:16:46 +0200 Subject: [PATCH 38/48] Make test for sphinxcontrib-bibtex to pass --- readthedocs/embed/v3/tests/examples/default/conf.py | 10 +++------- readthedocs/embed/v3/tests/test_external_pages.py | 1 - tox.embedapi.ini | 1 + 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/readthedocs/embed/v3/tests/examples/default/conf.py b/readthedocs/embed/v3/tests/examples/default/conf.py index 3c07223a704..b8fe3483942 100644 --- a/readthedocs/embed/v3/tests/examples/default/conf.py +++ b/readthedocs/embed/v3/tests/examples/default/conf.py @@ -1,17 +1,13 @@ # conf.py to run tests +import sphinxcontrib.bibtex master_doc = 'index' extensions = [ 'sphinx.ext.autosectionlabel', + 'sphinxcontrib.bibtex', ] -try: - import sphinxcontrib.bibtex - extensions.append('sphinxcontrib.bibtex') - bibtex_bibfiles = ['refs.bib'] -except ImportError: - pass - +bibtex_bibfiles = ['refs.bib'] def setup(app): app.add_object_type( diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index 02bc8118aec..3ba6b74e91b 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -154,7 +154,6 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): } @pytest.mark.sphinx('html', srcdir=srcdir, freshenv=True) - @pytest.mark.skipif(os.environ.get('TOX_ENV_NAME') != 'embed-api', reason='sphinxcontrib-bibtex is not installed') def test_citation_identifier_doctool_sphinx(self, app, client, requests_mock): app.build() path = app.outdir / 'bibtex-cite.html' diff --git a/tox.embedapi.ini b/tox.embedapi.ini index 1358f2ea9a8..baa1ae8f814 100644 --- a/tox.embedapi.ini +++ b/tox.embedapi.ini @@ -10,6 +10,7 @@ install_command = cat {toxinidir}/requirements/pip.txt | grep -v "Sphinx" > {toxinidir}/requirements/embedapi.txt; \ sed {toxinidir}/requirements/testing.txt -e "s|pip.txt|embedapi.txt|g" > {toxinidir}/requirements/testing.embedapi.txt; \ pip install -r {toxinidir}/requirements/testing.embedapi.txt; \ + pip install sphinxcontrib-bibtex; \ pip install $*;' -- {opts} {packages} deps = sphinx-18: Sphinx~=1.8.0 From 7dd4b4f8540de474ef935688fc1e7ff2893c4b23 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Aug 2021 15:29:00 +0200 Subject: [PATCH 39/48] Auto-delete _build directory after test run --- readthedocs/embed/v3/tests/conftest.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 readthedocs/embed/v3/tests/conftest.py diff --git a/readthedocs/embed/v3/tests/conftest.py b/readthedocs/embed/v3/tests/conftest.py new file mode 100644 index 00000000000..0efc9948632 --- /dev/null +++ b/readthedocs/embed/v3/tests/conftest.py @@ -0,0 +1,14 @@ +import os +import shutil +import pytest + +from .utils import srcdir + + +@pytest.fixture(autouse=True, scope='module') +def remove_sphinx_build_output(): + """Remove _build/ folder, if exist.""" + for path in (srcdir,): + build_path = os.path.join(path, '_build') + if os.path.exists(build_path): + shutil.rmtree(build_path) From d134ab985de6d945a5e4c390ee92896f9d8c943f Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Aug 2021 16:08:19 +0200 Subject: [PATCH 40/48] Checks that depend on Sphinx version (3.5) --- .../embed/v3/tests/test_external_pages.py | 47 +++++++++++++------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index 3ba6b74e91b..40432b0d8fe 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -2,6 +2,7 @@ import os import pytest +import sphinx from packaging.version import Version @@ -93,10 +94,10 @@ def test_dl_identifier(self, app, client, requests_mock): response = client.get(self.api_url, params) assert response.status_code == 200 - if Version(docutils.__version__) >= Version('0.17'): + if sphinx.version_info >= (3, 5, 0): content = '
\nconfig1
' else: - content = '
\nconfig1
' + content = '
\nconfig1
' assert response.json() == { 'url': 'https://docs.project.com/configuration.html#confval-config1', @@ -120,10 +121,10 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): response = client.get(self.api_url, params) assert response.status_code == 200 - if Version(docutils.__version__) >= Version('0.17'): + if sphinx.version_info >= (3, 5, 0): content = '
\nconfig1
' else: - content = '
\nconfig1
' + content = '
\nconfig1
' assert response.json() == { 'url': 'https://docs.project.com/configuration.html#confval-config1', @@ -141,10 +142,10 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): response = client.get(self.api_url, params) assert response.status_code == 200 - if Version(docutils.__version__) >= Version('0.17'): + if sphinx.version_info >= (3, 5, 0): content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' else: - content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' + content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' assert response.json() == { 'url': 'https://docs.project.com/configuration.html#confval-config1', @@ -197,30 +198,48 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/glossary.html', text=content) + if sphinx.version_info >= (3, 5, 0): + fragment = 'term-Read-the-Docs' + else: + fragment = 'term-read-the-docs' + # Calling the API without doctool/doctoolversion + url = f'https://docs.project.com/glossary.html#{fragment}' params = { - 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', + 'url': url, } response = client.get(self.api_url, params) assert response.status_code == 200 + + if sphinx.version_info >= (3, 5, 0): + content = f'
Read the Docs
' + else: + content = f'
Read the Docs
' + assert response.json() == { - 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', - 'fragment': 'term-Read-the-Docs', - 'content': '
Read the Docs
', + 'url': url, + 'fragment': fragment, + 'content': content, 'external': True, } # Calling the API with doctool/doctoolversion params = { - 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', + 'url': url, 'doctool': 'sphinx', 'doctoolversion': '4.1.0', } response = client.get(self.api_url, params) assert response.status_code == 200 + + if sphinx.version_info >= (3, 5, 0): + content = f'
\n
Read the Docs

Best company ever.

\n
\n
' + else: + content = '
\n
Read the Docs

Best company ever.

\n
\n
' + assert response.json() == { - 'url': 'https://docs.project.com/glossary.html#term-Read-the-Docs', - 'fragment': 'term-Read-the-Docs', - 'content': '
\n
Read the Docs

Best company ever.

\n
\n
', + 'url': url, + 'content': content, + 'fragment': fragment, 'external': True, } From 4b5f2d69332d5c607ad8ef6c2551463dcf9e8fbe Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Aug 2021 16:21:07 +0200 Subject: [PATCH 41/48] Don't make doctoolversion= attribute mandatory when passing doctool= --- readthedocs/embed/v3/tests/test_basics.py | 18 ------------------ .../embed/v3/tests/test_external_pages.py | 15 ++++++--------- readthedocs/embed/v3/views.py | 11 ----------- 3 files changed, 6 insertions(+), 38 deletions(-) diff --git a/readthedocs/embed/v3/tests/test_basics.py b/readthedocs/embed/v3/tests/test_basics.py index 79aa1f1ca09..56bf7e2b965 100644 --- a/readthedocs/embed/v3/tests/test_basics.py +++ b/readthedocs/embed/v3/tests/test_basics.py @@ -28,24 +28,6 @@ def test_not_url_query_argument(self, client): assert response.status_code == 400 assert response.json() == {'error': 'Invalid arguments. Please provide "url".'} - def test_doctool_and_not_doctoolversion_query_argument(self, client): - params = { - 'url': 'https://docs.project.com#title', - 'doctool': 'sphinx', - } - response = client.get(self.api_url, params) - assert response.status_code == 400 - assert response.json() == {'error': 'Invalid arguments. Please provide "doctool" and "doctoolversion" or none of them.'} - - def test_not_doctool_and_doctoolversion_query_argument(self, client): - params = { - 'url': 'https://docs.project.com#title', - 'doctoolversion': '4.1.0', - } - response = client.get(self.api_url, params) - assert response.status_code == 400 - assert response.json() == {'error': 'Invalid arguments. Please provide "doctool" and "doctoolversion" or none of them.'} - def test_not_allowed_domain(self, client): params = { 'url': 'https://docs.notalloweddomain.com#title', diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index 40432b0d8fe..cd5778655a4 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -114,7 +114,7 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/configuration.html', text=content) - # Calling the API without doctool/doctoolversion + # Calling the API without doctool params = { 'url': 'https://docs.project.com/configuration.html#confval-config1', } @@ -133,11 +133,10 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): 'external': True, } - # Calling the API with doctool/doctoolversion + # Calling the API with doctool params = { 'url': 'https://docs.project.com/configuration.html#confval-config1', 'doctool': 'sphinx', - 'doctoolversion': '4.1.0', } response = client.get(self.api_url, params) assert response.status_code == 200 @@ -162,7 +161,7 @@ def test_citation_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/bibtex-cite.html', text=content) - # Calling the API without doctool/doctoolversion + # Calling the API without doctool params = { 'url': 'https://docs.project.com/bibtex-cite.html#id4', } @@ -175,11 +174,10 @@ def test_citation_identifier_doctool_sphinx(self, app, client, requests_mock): 'external': True, } - # Calling the API with doctool/doctoolversion + # Calling the API with doctool params = { 'url': 'https://docs.project.com/bibtex-cite.html#id4', 'doctool': 'sphinx', - 'doctoolversion': '4.1.0', } response = client.get(self.api_url, params) assert response.status_code == 200 @@ -203,7 +201,7 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): else: fragment = 'term-read-the-docs' - # Calling the API without doctool/doctoolversion + # Calling the API without doctool url = f'https://docs.project.com/glossary.html#{fragment}' params = { 'url': url, @@ -223,11 +221,10 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): 'external': True, } - # Calling the API with doctool/doctoolversion + # Calling the API with doctool params = { 'url': url, 'doctool': 'sphinx', - 'doctoolversion': '4.1.0', } response = client.get(self.api_url, params) assert response.status_code == 200 diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 5d5a1d2e265..6668222367e 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -222,17 +222,6 @@ def get(self, request): # noqa status=status.HTTP_400_BAD_REQUEST ) - if not all([doctool, doctoolversion]) and any([doctool, doctoolversion]): - return Response( - { - 'error': ( - 'Invalid arguments. ' - 'Please provide "doctool" and "doctoolversion" or none of them.' - ) - }, - status=status.HTTP_400_BAD_REQUEST - ) - parsed_url = urlparse(url) domain = parsed_url.netloc if not domain or not parsed_url.scheme: From 0056e90746e59fb63553229cd97592895b7d42b8 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Aug 2021 17:25:09 +0200 Subject: [PATCH 42/48] Sphinx 3.5 seems to be different on its HTML --- .../embed/v3/tests/test_external_pages.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index cd5778655a4..7ae0629d1f1 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -121,10 +121,12 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): response = client.get(self.api_url, params) assert response.status_code == 200 - if sphinx.version_info >= (3, 5, 0): - content = '
\nconfig1
' - else: + if sphinx.version_info < (3, 5, 0): content = '
\nconfig1
' + elif sphinx.version_info[:2] == (3, 5): + content = '
\nconfig1
' + else: + content = '
\nconfig1
' assert response.json() == { 'url': 'https://docs.project.com/configuration.html#confval-config1', @@ -141,10 +143,14 @@ def test_dl_identifier_doctool_sphinx(self, app, client, requests_mock): response = client.get(self.api_url, params) assert response.status_code == 200 - if sphinx.version_info >= (3, 5, 0): - content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' - else: + if sphinx.version_info < (3, 0, 0): # <3.0 content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' + elif sphinx.version_info[:2] == (3, 5): + content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' + elif sphinx.version_info < (4, 0, 0): # >3.0,=!3.5.x,<4.0 + content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' + else: # >=4.0 + content = '
\n
\nconfig1
\n

Description: This the description for config1

\n

Default: \'Default value for config\'

\n

Type: bool

\n
' assert response.json() == { 'url': 'https://docs.project.com/configuration.html#confval-config1', @@ -196,6 +202,7 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): content = open(path).read() requests_mock.get('https://docs.project.com/glossary.html', text=content) + # Note there are differences on the case of the fragment if sphinx.version_info >= (3, 5, 0): fragment = 'term-Read-the-Docs' else: @@ -232,7 +239,7 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): if sphinx.version_info >= (3, 5, 0): content = f'
\n
Read the Docs

Best company ever.

\n
\n
' else: - content = '
\n
Read the Docs

Best company ever.

\n
\n
' + content = f'
\n
Read the Docs

Best company ever.

\n
\n
' assert response.json() == { 'url': url, From cf3da8abaa09af8f0b88bb8b6f9600f41dc43233 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Aug 2021 17:38:07 +0200 Subject: [PATCH 43/48] Lint --- readthedocs/embed/v3/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index 6668222367e..dc856c6201e 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -131,7 +131,8 @@ def _find_main_node(self, html): log.info('Main node found. selector=h1') return first_header.parent - def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversion): # pylint: disable=unused-argument + def _parse_based_on_doctool(self, page_content, fragment, doctool, doctoolversion): + # pylint: disable=unused-argument if not page_content: return From aeaad766d17010d70aa47985fef47138644d2756 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Aug 2021 17:38:45 +0200 Subject: [PATCH 44/48] Fragment case changed on 3.0.0 --- readthedocs/embed/v3/tests/test_external_pages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index 7ae0629d1f1..32ba7222d2f 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -203,7 +203,7 @@ def test_glossary_identifier_doctool_sphinx(self, app, client, requests_mock): requests_mock.get('https://docs.project.com/glossary.html', text=content) # Note there are differences on the case of the fragment - if sphinx.version_info >= (3, 5, 0): + if sphinx.version_info >= (3, 0, 0): fragment = 'term-Read-the-Docs' else: fragment = 'term-read-the-docs' From ef5a14cff07a1713ff463ca95ed193c7597564e6 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Aug 2021 17:40:57 +0200 Subject: [PATCH 45/48] Don't run EmbedAPIv3 tests by default --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 085a5b2e0e8..494a54c2e34 100644 --- a/tox.ini +++ b/tox.ini @@ -19,7 +19,7 @@ basepython = commands = /bin/sh -c '\ export DJANGO_SETTINGS_MODULE=readthedocs.settings.test; \ - pytest --cov-report= --cov-config {toxinidir}/.coveragerc --cov=. --suppress-no-test-exit-code -m "not proxito" {posargs:{env:TOX_POSARGS:-m "not search and not proxito"}}' + pytest --cov-report= --cov-config {toxinidir}/.coveragerc --cov=. --suppress-no-test-exit-code -m "not proxito and not embed_api" {posargs:{env:TOX_POSARGS:-m "not search and not proxito and not embed_api"}}' /bin/sh -c '\ export DJANGO_SETTINGS_MODULE=readthedocs.settings.proxito.test; \ From 160e51b60ee018622099cebee1b604e16f69c7e0 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 18 Aug 2021 17:44:35 +0200 Subject: [PATCH 46/48] Sphinx 3.5 adds an on
--- readthedocs/embed/v3/tests/test_external_pages.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index 32ba7222d2f..db1c1e22436 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -94,10 +94,13 @@ def test_dl_identifier(self, app, client, requests_mock): response = client.get(self.api_url, params) assert response.status_code == 200 - if sphinx.version_info >= (3, 5, 0): - content = '
\nconfig1
' - else: + + if sphinx.version_info < (3, 5, 0): content = '
\nconfig1
' + elif sphinx.version_info[:2] == (3, 5): + content = '
\nconfig1
' + else: + content = '
\nconfig1
' assert response.json() == { 'url': 'https://docs.project.com/configuration.html#confval-config1', From cc768938374f921a0463a72f43530d73900acaef Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Thu, 9 Sep 2021 11:41:33 +0200 Subject: [PATCH 47/48] Update readthedocs/embed/v3/tests/test_external_pages.py Co-authored-by: Eric Holscher <25510+ericholscher@users.noreply.github.com> --- readthedocs/embed/v3/tests/test_external_pages.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/readthedocs/embed/v3/tests/test_external_pages.py b/readthedocs/embed/v3/tests/test_external_pages.py index db1c1e22436..e1377da5f4c 100644 --- a/readthedocs/embed/v3/tests/test_external_pages.py +++ b/readthedocs/embed/v3/tests/test_external_pages.py @@ -42,6 +42,8 @@ def test_default_main_section(self, app, client, requests_mock): response = client.get(self.api_url, params) assert response.status_code == 200 + # The output is different because docutils is outputting this, + # and we're not sanitizing it, but just passing it through. if Version(docutils.__version__) >= Version('0.17'): content = '
\n \n
\n

Title

\n

This is an example page used to test EmbedAPI parsing features.

\n
\n

Sub-title

\n

This is a reference to Sub-title.

\n
\n
\n\n\n
' else: From 5e25c471073d641ed72b4c9ae029d5f3682a7b62 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Tue, 21 Sep 2021 10:48:51 +0200 Subject: [PATCH 48/48] Log url= together with fragment= --- readthedocs/embed/v3/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/embed/v3/views.py b/readthedocs/embed/v3/views.py index dc856c6201e..c4256fa0237 100644 --- a/readthedocs/embed/v3/views.py +++ b/readthedocs/embed/v3/views.py @@ -312,7 +312,7 @@ def get(self, request): # noqa ) if not content_requested: - log.warning('Identifier not found. url=%s', url) + log.warning('Identifier not found. url=%s fragment=%s', url, fragment) return Response( { 'error': (