Skip to content

Commit

Permalink
Check onenote URLs (#1733)
Browse files Browse the repository at this point in the history
* Check onenote URLs
#1046

* translations

* notebook vs section
  • Loading branch information
stevencohn authored Dec 27, 2024
1 parent a5601f7 commit 6a970f1
Show file tree
Hide file tree
Showing 13 changed files with 361 additions and 45 deletions.
247 changes: 202 additions & 45 deletions OneMore/Commands/References/CheckUrlsCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ namespace River.OneMoreAddIn.Commands
{
using River.OneMoreAddIn.Models;
using River.OneMoreAddIn.Styles;
using River.OneMoreAddIn.UI;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
Expand All @@ -14,10 +15,19 @@ namespace River.OneMoreAddIn.Commands
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;
using Resx = Properties.Resources;


internal class CheckUrlsCommand : Command
{
private OneNote one;
private Page page;
private List<XElement> candidates;
private Dictionary<string, OneNote.HyperlinkInfo> map;

private int badCount;
private Exception exception;


public CheckUrlsCommand()
{
Expand All @@ -28,45 +38,21 @@ public override async Task Execute(params object[] args)
{
if (!HttpClientFactory.IsNetworkAvailable())
{
ShowInfo(Properties.Resources.NetwordConnectionUnavailable);
ShowInfo(Resx.NetwordConnectionUnavailable);
return;
}

await using var one = new OneNote(out var page, out _);

var count = await HasInvalidUrls(page);
if (count > 0)
{
await one.Update(page);

UI.MoreMessageBox.ShowWarning(owner, $"Found {count} invalid URLs on this page");
}
}


private async Task<int> HasInvalidUrls(Page page)
{
var elements = GetCandiateElements(page);

// parallelize internet access for all chosen hyperlinks on the page...

var count = 0;
if (elements.Any())
await using (one = new OneNote(out page, out _))
{
// must use a thread-safe collection here
var tasks = new ConcurrentBag<Task<int>>();

foreach (var element in elements)
candidates = GetCandiateElements(page);
if (candidates.Count > 0)
{
// do not use await in the body loop; just build list of tasks
tasks.Add(ValidateUrls(element));
}
var progressDialog = new ProgressDialog(Execute);

await Task.WhenAll(tasks.ToArray());
count = tasks.Sum(t => t.IsFaulted ? 0 : t.Result);
// report results on UI thread after execution
progressDialog.RunModeless(ReportResult);
}
}

return count;
}


Expand Down Expand Up @@ -105,16 +91,127 @@ private static List<XElement> GetCandiateElements(Page page)
}


private async Task<int> ValidateUrls(XElement element)
// Invoked by the ProgressDialog OnShown callback
private async Task Execute(ProgressDialog progress, CancellationToken token)
{
logger.Start();
logger.StartClock();

var scope = GetOneNoteScope();
if (scope != OneNote.Scope.Self)
{
await BuildHyperlinkMap(scope, progress, token);
}

progress.SetMaximum(candidates.Count);
progress.SetMessage(string.Format(Resx.CheckUrlsCommand_checkingMsg, candidates.Count));

try
{
await ValidateUrls(progress);
if (badCount > 0)
{
await one.Update(page);
}
}
catch (Exception exc)
{
logger.WriteLine("error validating URLs", exc);
exception = exc;
}

progress.Close();

logger.WriteTime("check complete");
logger.End();
}


private OneNote.Scope GetOneNoteScope()
{
/*
* Notebook reference will start with "onenote:https://...."
* onenote:https://d.docs.live.net/6925.../&amp;section-id={...}&amp;page-id={...}&amp;end
*
* Any pages within this notebook will have a base-path=https...
* onenote:...&amp;section-id={...}&amp;page-id={...}&amp;end&amp;base-path=https://d...
*
* Possible future optimization: collect all named notebooks/sections since the
* notebook URI contains the exact names, here "OneMore Wiki" and "Get Started"
* https://d.docs.live.net/.../Documents/OneMore%20Wiki/Get%20Started.one
*/

var scope = OneNote.Scope.Self;

foreach (var candidate in candidates)
{
var data = candidates.DescendantNodes().OfType<XCData>();
if (data.Any(d => d.Value.Contains("<a\nhref=\"onenote:http")))
{
return OneNote.Scope.Notebooks;
}

if (data.Any(d => d.Value.Contains("<a\nhref=\"onenote:")))
{
if (scope == OneNote.Scope.Self)
{
scope = OneNote.Scope.Sections;
}
}
}

return scope;
}


private async Task BuildHyperlinkMap(
OneNote.Scope scope, ProgressDialog progress, CancellationToken token)
{
map = await new HyperlinkProvider(one).BuildHyperlinkMap(scope, token,
async (count) =>
{
progress.SetMaximum(count);
progress.SetMessage(string.Format(Resx.CheckUrlsCommand_mappingMsg, count));
await Task.Yield();
},
async () =>
{
progress.Increment();
await Task.Yield();
});
}


private async Task ValidateUrls(ProgressDialog progress)
{
// parallelize internet access for all chosen hyperlinks on the page...

// must use a thread-safe collection here
var tasks = new ConcurrentBag<Task>();

foreach (var candidate in candidates)
{
// do not use await in the body loop; just build list of tasks
tasks.Add(ValidateUrl(candidate, progress));
}

await Task.WhenAll(tasks.ToArray());
//var count = tasks.Sum(t => t.IsFaulted ? 0 : t.Result);
}


private async Task ValidateUrl(XElement element, ProgressDialog progress)
{
var cdata = element.GetCData();
var wrapper = cdata.GetWrapper();

var count = 0;
var count = badCount;
foreach (var anchor in wrapper.Elements("a"))
{
progress.Increment();

var href = anchor.Attribute("href")?.Value;
if (ValidWebAddress(href))
if (ValidAddress(href))
{
if (await InvalidUrl(href))
{
Expand Down Expand Up @@ -144,34 +241,68 @@ private async Task<int> ValidateUrls(XElement element)
);
}

count++;
Interlocked.Increment(ref badCount);
}
}
}

if (count > 0)
if (badCount > count)
{
cdata.ReplaceWith(wrapper.GetInnerXml());
}

return count;
}


private static bool ValidWebAddress(string href)
private static bool ValidAddress(string href)
{
return
!string.IsNullOrWhiteSpace(href) &&
href.StartsWith("http") &&
if (string.IsNullOrWhiteSpace(href))
{
return false;
}

if (href.StartsWith("http") &&
!(
href.StartsWith("https://onedrive.live.com/view.aspx") &&
href.Contains("onedrive.live.com/view.aspx") &&
href.Contains("&id=documents") &&
href.Contains(".one")
);
))
{
return true;
}

return
href.StartsWith("onenote:") &&
href.Contains("section-id=") &&
href.Contains("page-id=");
}


private async Task<bool> InvalidUrl(string url)
{
if (url.StartsWith("onenote:") && url.Contains("page-id="))
{
return InvalidOneNoteUrl(url);
}
else
{
return await InvalidWebUrl(url);
}
}


private bool InvalidOneNoteUrl(string url)
{
var match = Regex.Match(url, @"section-id=({[^}]+})&page-id=({[^}]+})");
if (match.Success)
{
return !map.ContainsKey(match.Groups[2].Value);
}

return false;
}


private async Task<bool> InvalidWebUrl(string url)
{
var invalid = false;

Expand Down Expand Up @@ -212,5 +343,31 @@ private async Task<bool> InvalidUrl(string url)

return invalid;
}


private void ReportResult(object sender, EventArgs e)
{
// report results back on the main UI thread...

if (sender is ProgressDialog progress)
{
// otherwise ShowMessage window will appear behind progress dialog
progress.Visible = false;
}

if (exception is null)
{
if (badCount > 0)
{
MoreMessageBox.ShowWarning(owner,
string.Format(Resx.CheckUrlsCommand_invaldiMsg, badCount));
}
}
else
{
MoreMessageBox.ShowErrorWithLogLink(owner, exception.Message);
}
}

}
}
27 changes: 27 additions & 0 deletions OneMore/Properties/Resources.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions OneMore/Properties/Resources.ar-SA.resx
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,18 @@
<value>متغير غير محدد "{0}"</value>
<comment>error</comment>
</data>
<data name="CheckUrlsCommand_checkingMsg" xml:space="preserve">
<value>التحقق من {0} عناوين URL</value>
<comment>message</comment>
</data>
<data name="CheckUrlsCommand_invaldiMsg" xml:space="preserve">
<value>تم العثور على {0} عنوان URL غير صالح في هذه الصفحة</value>
<comment>message</comment>
</data>
<data name="CheckUrlsCommand_mappingMsg" xml:space="preserve">
<value>تعيين مراجع الصفحة {0}.</value>
<comment>message</comment>
</data>
<data name="CleanRemindersCommand_count" xml:space="preserve">
<value>تم تنظيف {0} من التذكيرات المعزولة</value>
<comment>message box</comment>
Expand Down
12 changes: 12 additions & 0 deletions OneMore/Properties/Resources.de-DE.resx
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,18 @@ Umkehren</value>
<value>Undefinierte Variable „{0}“</value>
<comment>error</comment>
</data>
<data name="CheckUrlsCommand_checkingMsg" xml:space="preserve">
<value>{0} URLs werden überprüft</value>
<comment>message</comment>
</data>
<data name="CheckUrlsCommand_invaldiMsg" xml:space="preserve">
<value>Auf dieser Seite wurden {0} ungültige URLs gefunden</value>
<comment>message</comment>
</data>
<data name="CheckUrlsCommand_mappingMsg" xml:space="preserve">
<value>Zuordnung von {0} Seitenverweisen</value>
<comment>message</comment>
</data>
<data name="CleanRemindersCommand_count" xml:space="preserve">
<value>{0} verwaiste Erinnerungen wurden bereinigt</value>
<comment>message box</comment>
Expand Down
Loading

0 comments on commit 6a970f1

Please sign in to comment.