Skip to content

Commit

Permalink
Check missing links for engage pages
Browse files Browse the repository at this point in the history
  • Loading branch information
carkod committed Nov 24, 2021
1 parent 7056fd0 commit cf99d3b
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 19 deletions.
53 changes: 45 additions & 8 deletions canonicalwebteam/discourse/parsers/base_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Standard library
import os
import re
import flask
from urllib.parse import urlparse, urlunparse

# Packages
Expand All @@ -21,6 +22,19 @@
)


class ParsingError(Exception):
flask.current_app.extensions["sentry"].captureException(Exception)
pass


class MissingContentError(ParsingError):
def __init__(self, error):
super().__init__(error)

flask.current_app.extensions["sentry"].captureMessage(error)
pass


class BaseParser:
"""
Parsers used commonly by Tutorials and Engage pages
Expand All @@ -35,6 +49,7 @@ def __init__(self, api, index_topic_id, url_prefix):
self.warnings = []
self.url_map = {}
self.redirect_map = {}
self.metadata_errors = []

def parse_topic(self, topic):
"""
Expand Down Expand Up @@ -286,18 +301,40 @@ def _parse_metadata(self, index_soup, section_name):
if value.find("a"):
row_dict["topic_name"] = value.find("a").text

# Beautiful soup renders URLs as anchors
# Avoid that default behaviour
if value.find("a") and (
value.find("a")["href"] == value.find("a").text
):
value.contents[0] = value.find("a").text
# Only engage pages need a link
if value.findAll("a", href=True):
if value.find("a")["href"] == value.find("a").text:
value.contents[0] = value.find("a").text

else:
error_message = f"Warning: Link not found when parsing row {index + 1} \
\"{row_dict['topic_name']}\" {titles[index]}. This row has been skipped."
self.metadata_errors.append(error_message)
row_dict = None
MissingContentError(error_message)
break

# Missing path will cause the engage item in index to not
# link to the corresponding page
# Missing type will cause resource_name to be empty in
# thank-you pages
# This error does not need breaking, because it does not
# break the page
if (
(titles[index] == "path") or (titles[index] == "type")
) and ((value.text == "") or (value.text is None)):
error_message = f"Warning: Link not found when parsing row {index + 1} \
\"{row_dict['topic_name']}\" {titles[index]}. This row has been skipped."
self.metadata_errors.append(error_message)
row_dict = None
MissingContentError(error_message)
break

row_dict[titles[index]] = "".join(
str(content) for content in value.contents
)

topics_metadata.append(row_dict)
if row_dict:
topics_metadata.append(row_dict)

return topics_metadata

Expand Down
12 changes: 2 additions & 10 deletions canonicalwebteam/discourse/parsers/engage.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,8 @@ def parse(self):
raw_index_soup, self.url_prefix, self.index_topic_id, "Metadata"
)

# Avoid markdown error to break site
try:
# Parse list of topics
self.metadata = self._parse_metadata(raw_index_soup, "Metadata")
self.takeovers = self._parse_metadata(raw_index_soup, "Takeovers")
except IndexError:
self.metadata = []
self.takeovers = []
self.warnings.append("Failed to parse metadata correctly")
self.metadata = self._parse_metadata(raw_index_soup, "Metadata")
self.takeovers = self._parse_metadata(raw_index_soup, "Takeovers")

if index_topic["id"] != self.index_topic_id:
# Get body and navigation HTML
Expand Down Expand Up @@ -120,7 +113,6 @@ def parse_topic(self, topic):
),
"related": current_topic_related,
"topic_path": topic_path,
"errors": warnings,
}

def resolve_path(self, relative_path):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name="canonicalwebteam.discourse",
version="4.0.5",
version="4.0.6",
author="Canonical webteam",
author_email="[email protected]",
url="https://github.com/canonical-webteam/canonicalwebteam.docs",
Expand Down

0 comments on commit cf99d3b

Please sign in to comment.