Skip to content

Commit

Permalink
Fix tutorials key errors (canonical#117)
Browse files Browse the repository at this point in the history
  • Loading branch information
carkod committed Dec 8, 2021
1 parent 02f26a5 commit 8c79609
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 15 deletions.
26 changes: 17 additions & 9 deletions canonicalwebteam/discourse/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,11 +306,14 @@ def _parse_metadata(self, index_soup, section_name):
value.contents[0] = value.find("a").text

else:
error_message = "Warning: Link not found when "
f"parsing row {index + 1}"
f"\"{row_dict['topic_name']}\" {titles[index]}."
"This row has been skipped."
self.metadata_errors.append(error_message)
error_message = (
f"Warning: Link not found when parsing row"
f' {index + 1} "{row_dict["topic_name"]}"'
f" {titles[index]}. This row has been skipped."
)
if error_message not in self.metadata_errors:
self.metadata_errors.append(error_message)

row_dict = None
MissingContentError(error_message)
break
Expand All @@ -324,10 +327,15 @@ def _parse_metadata(self, index_soup, section_name):
if (
(titles[index] == "path") or (titles[index] == "type")
) and ((value.text == "") or (value.text is None)):
error_message = "Warning: Link not found when"
f" parsing row {index + 1} {row_dict['topic_name']}\""
f"{titles[index]}. This row has been skipped."
self.metadata_errors.append(error_message)
error_message = (
f"Warning: Title not found when parsing row"
f' {index + 1} "{row_dict["topic_name"]}"'
f" {titles[index]}."
)

if error_message not in self.metadata_errors:
self.metadata_errors.append(error_message)

row_dict = None
MissingContentError(error_message)
break
Expand Down
31 changes: 26 additions & 5 deletions canonicalwebteam/discourse/parsers/tutorials.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
import flask

# Packages
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

# Local
from canonicalwebteam.discourse.parsers.base_parser import BaseParser

allowed_tutorial_keys = ["summary", "categories", "difficulty", "author"]


class TutorialParser(BaseParser):
def __init__(self, api, index_topic_id, url_prefix):
self.tutorials = None

self.errors = []
return super().__init__(api, index_topic_id, url_prefix)

def parse(self):
Expand Down Expand Up @@ -111,6 +115,7 @@ def _get_tutorials_topics(self):

response = self.api.get_topics(topics)
tutorial_data = []
self.errors = []

for topic in response:
topic_soup = BeautifulSoup(
Expand All @@ -132,12 +137,28 @@ def _get_tutorials_topics(self):
)

metadata = {"id": topic[0], "title": topic[1], "link": link}
error_message = None
for row in rows:
key = row.select_one("td:first-child").text.lower()
value = row.select_one("td:last-child").text
metadata[key] = value

tutorial_data.append(metadata)
# Markdown errors made by discourse users
if key not in allowed_tutorial_keys:
error_message = (
f'The tutorial "{topic[1]}" contains an incorrect'
f' key error "{key}", only'
f' {", ".join(allowed_tutorial_keys)} are allowed.'
f" This tutorial has been skipped"
)
flask.current_app.extensions["sentry"].captureMessage(
error_message
)
if error_message not in self.errors:
self.errors.append(error_message)
break
else:
value = row.select_one("td:last-child").text
metadata[key] = value
if not error_message:
tutorial_data.append(metadata)

# Tutorial will be in the same order as in the URLs table
return sorted(tutorial_data, key=lambda x: topics.index(x["id"]))
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
version="4.0.6",
author="Canonical webteam",
author_email="[email protected]",
url="https://github.com/canonical-webteam/canonicalwebteam.docs",
url=(
"https://github.com/canonical-web-and-design/"
"canonicalwebteam.discourse"
),
description=(
"Flask extension to integrate discourse content generated "
"to docs to your website."
Expand Down

0 comments on commit 8c79609

Please sign in to comment.