Skip to content

Commit

Permalink
Unified methods used for parsing the HTML across the different export…
Browse files Browse the repository at this point in the history
… versions (#94)

Co-authored-by: Marcin Przywóski <[email protected]>
  • Loading branch information
marcin-przywoski and Marcin Przywóski authored Oct 20, 2024
1 parent 4cb114c commit 9752130
Showing 1 changed file with 9 additions and 9 deletions.
18 changes: 9 additions & 9 deletions ExportViewer.Core/Services/HtmlParsingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,22 @@ public async Task<IEnumerable<Message>> GetMessages (string filePath , CultureIn
{
Parallel.ForEach(divs , node =>
{
if ((node.Descendents().OfType<IHtmlDivElement>().First(x => x.ClassList.Contains("_3-94") && x.ClassList.Contains("_2lem")).TextContent != "") && node.Descendents().OfType<IHtmlAnchorElement>().Any() && (node.Descendents().OfType<IHtmlAnchorElement>().First(x => x.HasAttribute("href"))).GetAttribute("href") != "")
{
string href = node.Descendents().OfType<IHtmlAnchorElement>().First(x => x.HasAttribute("href")).GetAttribute("href");
var divImage = node.QuerySelector("img._2yuc._3-96");
var divVideo = node.QuerySelector("video._2yuc._3-96");
var divDate = node.QuerySelector("div._3-94._2lem");

if (((divImage != null && divDate != null) || (divVideo != null && divDate != null)) && !string.IsNullOrEmpty(divDate.TextContent))
{
string href = divImage != null ? divImage.GetAttribute("src") : divVideo.GetAttribute("src");
if ((!href.StartsWith("http") || !href.StartsWith("https")) && (href.EndsWith(".jpg") || href.EndsWith(".png") || href.EndsWith(".gif") || href.EndsWith(".mp4")))
{
string divDate = node.Descendents().OfType<IHtmlDivElement>().First(x => x.ClassList.Contains("_3-94") && x.ClassList.Contains("_2lem")).TextContent;
DateTime parsedDate = Convert.ToDateTime(divDate.TextContent , locale);

DateTime date = Convert.ToDateTime(divDate , locale);

if (File.Exists(Path.Combine(exportLocation, href)))
if (File.Exists(Path.Combine(exportLocation , href)))
{
messages.Add(new Message { Link = href , Date = date });
messages.Add(new Message { Link = href , Date = parsedDate });
}
}

}
});

Expand Down

0 comments on commit 9752130

Please sign in to comment.