From 8c79609f1646fce6c1a8748c24dbe3a2cde670a0 Mon Sep 17 00:00:00 2001 From: Carlos Wu Date: Mon, 6 Dec 2021 10:13:53 +0000 Subject: [PATCH] Fix tutorials key errors (#117) --- .../discourse/parsers/base_parser.py | 26 ++++++++++------ .../discourse/parsers/tutorials.py | 31 ++++++++++++++++--- setup.py | 5 ++- 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/canonicalwebteam/discourse/parsers/base_parser.py b/canonicalwebteam/discourse/parsers/base_parser.py index 7edeaa0..f1b0256 100644 --- a/canonicalwebteam/discourse/parsers/base_parser.py +++ b/canonicalwebteam/discourse/parsers/base_parser.py @@ -306,11 +306,14 @@ def _parse_metadata(self, index_soup, section_name): value.contents[0] = value.find("a").text else: - error_message = "Warning: Link not found when " - f"parsing row {index + 1}" - f"\"{row_dict['topic_name']}\" {titles[index]}." - "This row has been skipped." - self.metadata_errors.append(error_message) + error_message = ( + f"Warning: Link not found when parsing row" + f' {index + 1} "{row_dict["topic_name"]}"' + f" {titles[index]}. This row has been skipped." + ) + if error_message not in self.metadata_errors: + self.metadata_errors.append(error_message) + row_dict = None MissingContentError(error_message) break @@ -324,10 +327,15 @@ def _parse_metadata(self, index_soup, section_name): if ( (titles[index] == "path") or (titles[index] == "type") ) and ((value.text == "") or (value.text is None)): - error_message = "Warning: Link not found when" - f" parsing row {index + 1} {row_dict['topic_name']}\"" - f"{titles[index]}. This row has been skipped." - self.metadata_errors.append(error_message) + error_message = ( + f"Warning: Title not found when parsing row" + f' {index + 1} "{row_dict["topic_name"]}"' + f" {titles[index]}." + ) + + if error_message not in self.metadata_errors: + self.metadata_errors.append(error_message) + row_dict = None MissingContentError(error_message) break diff --git a/canonicalwebteam/discourse/parsers/tutorials.py b/canonicalwebteam/discourse/parsers/tutorials.py index c63b5bb..1b382e1 100644 --- a/canonicalwebteam/discourse/parsers/tutorials.py +++ b/canonicalwebteam/discourse/parsers/tutorials.py @@ -1,3 +1,5 @@ +import flask + # Packages from bs4 import BeautifulSoup from datetime import datetime, timedelta @@ -5,11 +7,13 @@ # Local from canonicalwebteam.discourse.parsers.base_parser import BaseParser +allowed_tutorial_keys = ["summary", "categories", "difficulty", "author"] + class TutorialParser(BaseParser): def __init__(self, api, index_topic_id, url_prefix): self.tutorials = None - + self.errors = [] return super().__init__(api, index_topic_id, url_prefix) def parse(self): @@ -111,6 +115,7 @@ def _get_tutorials_topics(self): response = self.api.get_topics(topics) tutorial_data = [] + self.errors = [] for topic in response: topic_soup = BeautifulSoup( @@ -132,12 +137,28 @@ def _get_tutorials_topics(self): ) metadata = {"id": topic[0], "title": topic[1], "link": link} + error_message = None for row in rows: key = row.select_one("td:first-child").text.lower() - value = row.select_one("td:last-child").text - metadata[key] = value - - tutorial_data.append(metadata) + # Markdown errors made by discourse users + if key not in allowed_tutorial_keys: + error_message = ( + f'The tutorial "{topic[1]}" contains an incorrect' + f' key error "{key}", only' + f' {", ".join(allowed_tutorial_keys)} are allowed.' + f" This tutorial has been skipped" + ) + flask.current_app.extensions["sentry"].captureMessage( + error_message + ) + if error_message not in self.errors: + self.errors.append(error_message) + break + else: + value = row.select_one("td:last-child").text + metadata[key] = value + if not error_message: + tutorial_data.append(metadata) # Tutorial will be in the same order as in the URLs table return sorted(tutorial_data, key=lambda x: topics.index(x["id"])) diff --git a/setup.py b/setup.py index 83ac85e..b1f0c1d 100755 --- a/setup.py +++ b/setup.py @@ -7,7 +7,10 @@ version="4.0.6", author="Canonical webteam", author_email="webteam@canonical.com", - url="https://github.com/canonical-webteam/canonicalwebteam.docs", + url=( + "https://github.com/canonical-web-and-design/" + "canonicalwebteam.discourse" + ), description=( "Flask extension to integrate discourse content generated " "to docs to your website."