Skip to content

Commit

Permalink
Fixed issue with media parsing
Browse files Browse the repository at this point in the history
* Added missing folder string in GetExportFiles method

* Modified logic in GetMessages method to process multiple links instead of one per message
  • Loading branch information
marcin-przywoski authored Feb 20, 2024
1 parent 4c4e699 commit 11ec95a
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 25 deletions.
2 changes: 1 addition & 1 deletion ExportViewer.Core/Services/DataParsingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public Task<IEnumerable<string>> GetExportFiles (string exportLocation , ExportT
Console.WriteLine($"Directory {subDirectoryLocation} does not exist.");
}
}
foreach (var subDirectory in new[] { "archived_threads/" , "filtered_threads/" , "inbox/" })
foreach (var subDirectory in new[] { "archived_threads/" , "filtered_threads/" , "inbox/", "e2ee_cutover/" })
{
var subDirectoryLocation = Path.Combine(exportLocation , "your_activity_across_facebook" , "messages" , subDirectory);
var fileExtension = fileExtensions[type];
Expand Down
80 changes: 56 additions & 24 deletions ExportViewer.Core/Services/HtmlParsingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,39 +104,71 @@ public async Task<IEnumerable<Message>> GetMessages (string filePath , CultureIn
Parallel.ForEach(divs , node =>
{

var divImage = node.QuerySelector("img._a6_o._3-96");
var divVideo = node.QuerySelector("video._a6_o._3-96");
var divImages = node.QuerySelectorAll("img._a6_o._3-96");
var divVideos = node.QuerySelectorAll("video._a6_o._3-96");
var divDate = node.QuerySelector("div._3-94._a6-o")?.QuerySelector("div._a72d");

if (((divImage != null && divDate != null) || (divVideo != null && divDate != null)) && !string.IsNullOrEmpty(divDate.TextContent))
if (((divImages != null && divDate != null) || (divVideos != null && divDate != null)) && !string.IsNullOrEmpty(divDate.TextContent))
{
string href = divImage != null ? divImage.GetAttribute("src") : divVideo.GetAttribute("src");
if ((!href.StartsWith("http") || !href.StartsWith("https")) && (href.EndsWith(".jpg") || href.EndsWith(".png") || href.EndsWith(".gif") || href.EndsWith(".mp4")))
Parallel.ForEach(divImages , divImage =>
{
if (Thread.CurrentThread.CurrentCulture.IsReadOnly || locale.IsReadOnly)
{
var clone = Thread.CurrentThread.CurrentCulture.Clone() as CultureInfo;
clone.DateTimeFormat.PMDesignator = "po południu";
clone.DateTimeFormat.AMDesignator = "rano";
Thread.CurrentThread.CurrentCulture = clone;
Thread.CurrentThread.CurrentUICulture = clone;
locale = clone;
}
else
string href = divImage.GetAttribute("src");
if ((!href.StartsWith("http") || !href.StartsWith("https")) && (href.EndsWith(".jpg") || href.EndsWith(".png") || href.EndsWith(".gif")))
{
locale.DateTimeFormat.PMDesignator = "po południu";
locale.DateTimeFormat.AMDesignator = "rano";
if (Thread.CurrentThread.CurrentCulture.IsReadOnly || locale.IsReadOnly)
{
var clone = Thread.CurrentThread.CurrentCulture.Clone() as CultureInfo;
clone.DateTimeFormat.PMDesignator = "po południu";
clone.DateTimeFormat.AMDesignator = "rano";
Thread.CurrentThread.CurrentCulture = clone;
Thread.CurrentThread.CurrentUICulture = clone;
locale = clone;
}
else
{
locale.DateTimeFormat.PMDesignator = "po południu";
locale.DateTimeFormat.AMDesignator = "rano";
}


DateTime parsedDate = DateTime.ParseExact(divDate.TextContent , "MMM dd, yyyy h:mm:sstt" , locale);

if (File.Exists(exportLocation + href))
{
messages.Add(new Message { Link = href , Date = parsedDate });
}
}
});



DateTime parsedDate = DateTime.ParseExact(divDate.TextContent , "MMM dd, yyyy h:mm:sstt" , locale);

if (File.Exists(exportLocation + href))
Parallel.ForEach(divVideos , divVideo =>
{
string href = divVideo.GetAttribute("src");
if ((!href.StartsWith("http") || !href.StartsWith("https")) && href.EndsWith(".mp4"))
{
messages.Add(new Message { Link = href , Date = parsedDate });
if (Thread.CurrentThread.CurrentCulture.IsReadOnly || locale.IsReadOnly)
{
var clone = Thread.CurrentThread.CurrentCulture.Clone() as CultureInfo;
clone.DateTimeFormat.PMDesignator = "po południu";
clone.DateTimeFormat.AMDesignator = "rano";
Thread.CurrentThread.CurrentCulture = clone;
Thread.CurrentThread.CurrentUICulture = clone;
locale = clone;
}
else
{
locale.DateTimeFormat.PMDesignator = "po południu";
locale.DateTimeFormat.AMDesignator = "rano";
}


DateTime parsedDate = DateTime.ParseExact(divDate.TextContent , "MMM dd, yyyy h:mm:sstt" , locale);

if (File.Exists(exportLocation + href))
{
messages.Add(new Message { Link = href , Date = parsedDate });
}
}
}
});
}

});
Expand Down

0 comments on commit 11ec95a

Please sign in to comment.