Skip to content

Commit

Permalink
Add removal of lists in posts so they won't be taken part into analytics
Browse files Browse the repository at this point in the history
  • Loading branch information
ktos committed Feb 11, 2020
1 parent 5bfc4f5 commit 5f43567
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
5 changes: 3 additions & 2 deletions src/HtmlCleaner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,14 @@ public static string RemoveProperCode(string postText)
}

/// <summary>
/// Removes quotations, which should be ignored
/// Removes quotations, lists and other HTML tags which content should be ignored
/// </summary>
/// <param name="postText">Post content to be cleaned (in HTML)</param>
/// <returns>Post content with Blockquote elements removed</returns>
public static string RemoveQuotes(string postText)
public static string RemoveHTMLContent(string postText)
{
postText = Regex.Replace(postText, "<blockquote>(.|\n)*?</blockquote>", "", RegexOptions.Multiline);
postText = Regex.Replace(postText, "<ul>(.|\n)*?</ul>", "", RegexOptions.Multiline);
return postText;
}

Expand Down
2 changes: 1 addition & 1 deletion src/PostAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ private NotFormattedCodeFound CheckForUnformattedCode(CoyoteApi.Post post)
{
var text = HtmlCleaner.RemoveProperCode(post.html);
text = HtmlCleaner.RemoveDownloadLinks(text);
text = HtmlCleaner.RemoveQuotes(text);
text = HtmlCleaner.RemoveHTMLContent(text);

foreach (var para in text.Split("</p>").Select(CleanParagraph))
{
Expand Down
14 changes: 12 additions & 2 deletions test/HtmlCleanerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,25 @@ public void RemoveDownloadLinks_PostWithDownloadLinks_LinksRemoved()
}

[Fact]
public void RemoveQuotes_PostWithQuotes_QuotesRemoved()
public void RemoveHtmlContent_PostWithQuotes_QuotesRemoved()
{
string postText = "<blockquote>cytat</blockquote>nie cytat<blockquote>cytat</blockquote>";

var result = HtmlCleaner.RemoveQuotes(postText);
var result = HtmlCleaner.RemoveHTMLContent(postText);

Assert.Equal("nie cytat", result);
}

[Fact]
public void RemoveHtmlContent_PostWithList_ListRemoved()
{
string postText = "<ul>\n<li>\n<p>libGDX</p>\n</li>\n<li>\n<p>jMonkeyEngine</p>\n</li>\n<li>\n<p>lwjgl</p>\n</li>\n</ul>";

var result = HtmlCleaner.RemoveHTMLContent(postText);

Assert.Equal(string.Empty, result);
}

[Fact]
public void StripTags_TextWithTags_TagsRemoved()
{
Expand Down

0 comments on commit 5f43567

Please sign in to comment.