From 5f43567d58a9c65e2fb69c1a430346e31f8ce575 Mon Sep 17 00:00:00 2001
From: Marcin Badurowicz
Date: Tue, 11 Feb 2020 18:01:02 +0100
Subject: [PATCH] Add removal of lists in posts so they won't be taken part
into analytics
---
src/HtmlCleaner.cs | 5 +++--
src/PostAnalyzer.cs | 2 +-
test/HtmlCleanerTests.cs | 14 ++++++++++++--
3 files changed, 16 insertions(+), 5 deletions(-)
diff --git a/src/HtmlCleaner.cs b/src/HtmlCleaner.cs
index b506fcf..2d237dd 100644
--- a/src/HtmlCleaner.cs
+++ b/src/HtmlCleaner.cs
@@ -51,13 +51,14 @@ public static string RemoveProperCode(string postText)
}
///
- /// Removes quotations, which should be ignored
+ /// Removes quotations, lists and other HTML tags which content should be ignored
///
/// Post content to be cleaned (in HTML)
/// Post content with Blockquote elements removed
- public static string RemoveQuotes(string postText)
+ public static string RemoveHTMLContent(string postText)
{
postText = Regex.Replace(postText, "(.|\n)*?
", "", RegexOptions.Multiline);
+ postText = Regex.Replace(postText, "", "", RegexOptions.Multiline);
return postText;
}
diff --git a/src/PostAnalyzer.cs b/src/PostAnalyzer.cs
index ed29f4c..2ce6556 100644
--- a/src/PostAnalyzer.cs
+++ b/src/PostAnalyzer.cs
@@ -78,7 +78,7 @@ private NotFormattedCodeFound CheckForUnformattedCode(CoyoteApi.Post post)
{
var text = HtmlCleaner.RemoveProperCode(post.html);
text = HtmlCleaner.RemoveDownloadLinks(text);
- text = HtmlCleaner.RemoveQuotes(text);
+ text = HtmlCleaner.RemoveHTMLContent(text);
foreach (var para in text.Split("
").Select(CleanParagraph))
{
diff --git a/test/HtmlCleanerTests.cs b/test/HtmlCleanerTests.cs
index cb4017c..af7de67 100644
--- a/test/HtmlCleanerTests.cs
+++ b/test/HtmlCleanerTests.cs
@@ -27,15 +27,25 @@ public void RemoveDownloadLinks_PostWithDownloadLinks_LinksRemoved()
}
[Fact]
- public void RemoveQuotes_PostWithQuotes_QuotesRemoved()
+ public void RemoveHtmlContent_PostWithQuotes_QuotesRemoved()
{
string postText = "cytat
nie cytatcytat
";
- var result = HtmlCleaner.RemoveQuotes(postText);
+ var result = HtmlCleaner.RemoveHTMLContent(postText);
Assert.Equal("nie cytat", result);
}
+ [Fact]
+ public void RemoveHtmlContent_PostWithList_ListRemoved()
+ {
+ string postText = "\n- \n
libGDX
\n \n- \n
jMonkeyEngine
\n \n- \n
lwjgl
\n \n
";
+
+ var result = HtmlCleaner.RemoveHTMLContent(postText);
+
+ Assert.Equal(string.Empty, result);
+ }
+
[Fact]
public void StripTags_TextWithTags_TagsRemoved()
{