From 5f43567d58a9c65e2fb69c1a430346e31f8ce575 Mon Sep 17 00:00:00 2001 From: Marcin Badurowicz Date: Tue, 11 Feb 2020 18:01:02 +0100 Subject: [PATCH] Add removal of lists in posts so they won't be taken part into analytics --- src/HtmlCleaner.cs | 5 +++-- src/PostAnalyzer.cs | 2 +- test/HtmlCleanerTests.cs | 14 ++++++++++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/HtmlCleaner.cs b/src/HtmlCleaner.cs index b506fcf..2d237dd 100644 --- a/src/HtmlCleaner.cs +++ b/src/HtmlCleaner.cs @@ -51,13 +51,14 @@ public static string RemoveProperCode(string postText) } /// - /// Removes quotations, which should be ignored + /// Removes quotations, lists and other HTML tags which content should be ignored /// /// Post content to be cleaned (in HTML) /// Post content with Blockquote elements removed - public static string RemoveQuotes(string postText) + public static string RemoveHTMLContent(string postText) { postText = Regex.Replace(postText, "
(.|\n)*?
", "", RegexOptions.Multiline); + postText = Regex.Replace(postText, "", "", RegexOptions.Multiline); return postText; } diff --git a/src/PostAnalyzer.cs b/src/PostAnalyzer.cs index ed29f4c..2ce6556 100644 --- a/src/PostAnalyzer.cs +++ b/src/PostAnalyzer.cs @@ -78,7 +78,7 @@ private NotFormattedCodeFound CheckForUnformattedCode(CoyoteApi.Post post) { var text = HtmlCleaner.RemoveProperCode(post.html); text = HtmlCleaner.RemoveDownloadLinks(text); - text = HtmlCleaner.RemoveQuotes(text); + text = HtmlCleaner.RemoveHTMLContent(text); foreach (var para in text.Split("

").Select(CleanParagraph)) { diff --git a/test/HtmlCleanerTests.cs b/test/HtmlCleanerTests.cs index cb4017c..af7de67 100644 --- a/test/HtmlCleanerTests.cs +++ b/test/HtmlCleanerTests.cs @@ -27,15 +27,25 @@ public void RemoveDownloadLinks_PostWithDownloadLinks_LinksRemoved() } [Fact] - public void RemoveQuotes_PostWithQuotes_QuotesRemoved() + public void RemoveHtmlContent_PostWithQuotes_QuotesRemoved() { string postText = "
cytat
nie cytat
cytat
"; - var result = HtmlCleaner.RemoveQuotes(postText); + var result = HtmlCleaner.RemoveHTMLContent(postText); Assert.Equal("nie cytat", result); } + [Fact] + public void RemoveHtmlContent_PostWithList_ListRemoved() + { + string postText = ""; + + var result = HtmlCleaner.RemoveHTMLContent(postText); + + Assert.Equal(string.Empty, result); + } + [Fact] public void StripTags_TextWithTags_TagsRemoved() {