Skip to content

Commit

Permalink
fix a whitespace handling bug
Browse files Browse the repository at this point in the history
fixes #74
  • Loading branch information
Christian Kruse committed Jan 13, 2024
1 parent 7538400 commit 65c2524
Show file tree
Hide file tree
Showing 4 changed files with 552 additions and 12 deletions.
11 changes: 11 additions & 0 deletions lib/microformats2.ex
Original file line number Diff line number Diff line change
Expand Up @@ -125,5 +125,16 @@ defmodule Microformats2 do
|> String.replace(~r/\012/, "
")
|> String.replace(~r/\013/, "")
|> Floki.parse_document()
|> normalize_tag_names()
end

defp normalize_tag_names({:ok, tree}) do
{:ok,
Floki.traverse_and_update(tree, fn
{tag, attrs, children} -> {String.trim(tag), attrs, children}
other -> other
end)}
end

defp normalize_tag_names(other), do: other
end
339 changes: 339 additions & 0 deletions test/documents/real_world_nesting.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,339 @@
<!DOCTYPE html>
<html lang="en-US">
<head prefix="og: http://ogp.me/ns#">
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="mobile-web-app-capable" content="yes" />
<meta name="apple-mobile-web-app-capable" content="yes" />
<meta name="msapplication-TileColor" content="#434279" />
<meta name="description" content="Web developer, video gamer, tabletop gamer, and comic reader" />

<title>A boost of &#39;gilest.org: Make the indie web easier&#39;</title>
<!-- <link rel="favicon" href="/favicon.ico" /> -->
<link rel="icon favicon" href="/images/pint.svg" />
<link rel="stylesheet" href="/css/base.css" />
<link rel="stylesheet" href="/css/pub.css" />
<link rel="stylesheet" href="/css/utils.css" />
<link rel="stylesheet" href="/css/scroll.css" />
<link rel="stylesheet" href="/css/print.css" media="print" />

<link rel="stylesheet" href="/css/note.css" />


<link rel="manifest" href="/manifest.json" />
<!-- <link rel="search" href="/opensearch.xml" title="Adhoc Systems" type="application/opensearchdescription+xml" /> -->
<link rel="index" href="/" />
<link rel="micropub" href="/micropub" />
<link rel="microsub" href="https://aperture.p3k.io/microsub/150" />
<!-- <link rel="webmention" href="https://webmention.io/adhoc.systems/webmention" /> -->
<link rel="webmention" href="https://adhoc.systems/webmention" />
<link rel="pingback" href="https://webmention.io/adhoc.systems/xmlrpc" />

<link rel="me" href="https://twitter.com/0x1C3B00DA" />

<link rel="me" href="https://toot.cafe/@zack" />

<link rel="me" href="https://huffduffer.com/0x1C3B00DA" />

<!--
<link rel="authorization_endpoint" href="https://indieauth.com/auth" />
<link rel="token_endpoint" href="https://tokens.indieauth.com/token" />
-->
<link rel="authorization_endpoint" href="https://indieauth.com/auth" />
<link rel="token_endpoint" href="https://tokens.indieauth.com/token" />

<link rel="alternate" href="https://adhoc.systems/boosts/51052d4f-a968-4a72-9150-225134e90423" type="application/activity+json" />

<link
rel="alternate"
type="application/rss+xml"
title="Index (RSS)"
href="https://adhoc.systems/feed.xml"
/>


<link
rel="alternate"
type="application/rss+xml"
title="Notes (RSS)"
href="https://adhoc.systems/notes/feed.xml"
/>


<link
rel="alternate"
type="application/rss+xml"
title="Articles (RSS)"
href="https://adhoc.systems/articles/feed.xml"
/>


<link
rel="alternate"
type="application/rss+xml"
title="Boosts (RSS)"
href="https://adhoc.systems/boosts/feed.xml"
/>


<link
rel="alternate"
type="application/rss+xml"
title="Likes (RSS)"
href="https://adhoc.systems/likes/feed.xml"
/>


<link
rel="alternate"
type="application/rss+xml"
title="Bookmarks (RSS)"
href="https://adhoc.systems/bookmarks/feed.xml"
/>


<link
rel="feed"
type="text/html"
title="Index"
href="/"
/>


<link
rel="feed"
type="text/html"
title="Notes - Adhoc Systems"
href="/notes"
/>


<link
rel="feed"
type="text/html"
title="Articles - Adhoc Systems"
href="/articles"
/>


<link
rel="feed"
type="text/html"
title="Photos - Adhoc Systems"
href="/photos"
/>


<link
rel="feed"
type="text/html"
title="Boosts - Adhoc Systems"
href="/boosts"
/>


<link
rel="feed"
type="text/html"
title="Likes - Adhoc Systems"
href="/likes"
/>


<link
rel="feed"
type="text/html"
title="Bookmarks - Adhoc Systems"
href="/bookmarks"
/>


<meta property="og:site_name" content="Adhoc Systems" />
<meta property="og:locale" content="en-US" />
<meta property="og:url" content="adhoc.systems" />

<meta property="og:description" content="" />

</head>
<body class="" >
<a class="skip-link" href="#main-content">Skip to main content</a>
<div id="top"></div>
<div class="scroll-container scroll-container--intersecting"></div>
<header class="main-header">
<h1 class="p-name" itemprop="name">
<a class="u-url" href="/" itemprop="url" rel="me" title="Home">Adhoc Systems</a>
</h1>
<nav class="main-nav flex jc-between">
<div>
<a href="/notes" title="Zachary Dunn&#39;s Notes">Notes</a>
<span class="current-color mar-0 pad-0">|</span>
<a href="/recipes" title="Zachary Dunn&#39;s Recipes">Recipes</a>
<span class="current-color mar-0 pad-0">|</span>
<a href="/about" title="About Zachary Dunn">About Me</a>
</div>

<div class="right">

</div>
</nav>
</header>
<main class="h-entry" itemscope itemtype="https://schema.org/BlogPosting" >


<article class="entry card entry--boost" id="main-content">
<header class="note__header">
<div class="entry__badge">


<svg class="svg-icon svg-icon--feather">
<use xlink:href="/images/lucide-sprite.svg#repeat" />
</svg>



</div>
<address class="p-author h-card h-card--mini" itemprop="author" itemscope itemtype="https://schema.org/Person">
<a class="u-url" href="/" rel="me" itemprop="url">
<img
class="u-photo avatar"
src="/images/avatar.jpg"
alt="Zachary Dunn's avatar."
itemprop="image"
loading="lazy"
/>
</a>
<div class="col">
<h1 class="p-name" itemprop="name">Zachary Dunn</h1>
<h4 class="m-0">
<a href="https://matrix.to/#/@zack:mozilla.org?via=mozilla.org&via=matrix.org">
@<span class="p-nickname" itemprop="alternateName">zack</span>:mozilla.org
</a>
</h4>
</div>
</address>

<h4 class="entry__timestamp timestamp">
Published on
<a href="https://adhoc.systems/boosts/51052d4f-a968-4a72-9150-225134e90423" class="u-url" itemprop="url">
<time class="dt-published" datetime="2024-01-09 17:11:10Z" itemprop="datePublished">
2024-01-09 17:11:10Z
</time>
</a>
</h4>


</header>


<section
class="block__inner u-repost-of h-cite"
itemprop="citation "
itemscope
itemtype="https://schema.org/CreativeWork"
>

<div class="entry__remote-authorship">
<a
href="https://gilest.org/indie-easy.html"
class="u-url entry__link"
itemprop="url"
>

<span class="p-name" itemprop="headline">gilest.org: Make the indie web easier</span>

</a>

</div>
</section>



<section class="tags mar-1">

<a href="/tags/indieweb" class="p-category hashtag" title="IndieWeb Tag" itemprop="keywords">
<svg class="svg-icon svg-icon--feather svg-icon--tag">
<use xlink:href="/images/lucide-sprite.svg#tag" />
</svg>
<span>IndieWeb</span>
</a>

<a href="/tags/tech" class="p-category hashtag" title="tech Tag" itemprop="keywords">
<svg class="svg-icon svg-icon--feather svg-icon--tag">
<use xlink:href="/images/lucide-sprite.svg#tag" />
</svg>
<span>tech</span>
</a>

<a href="/tags/decentralization" class="p-category hashtag" title="decentralization Tag" itemprop="keywords">
<svg class="svg-icon svg-icon--feather svg-icon--tag">
<use xlink:href="/images/lucide-sprite.svg#tag" />
</svg>
<span>decentralization</span>
</a>

</section>



</article>

<section class="comments mar-1">
<h2 class="border-bottom">Comments</h2>

<p>
There aren't any comments on this post yet. To comment, publish your reply online and send
a <a href="https://indieweb.org/Webmention">webmention.</a>
<p>


</section>

</main>

<a
href="#top"
id="scrolltop"
class="back-to-top fab"
role="button"
aria-label="Back to Top"
x-data="scrollProgress"
>
<svg viewBox="0 0 100 100" width="50px" height="50px" tabindex="-1" role="img" aria-labelledby="title">
<title id="title">Back To Top</title>
<circle
class="progress-ring highlight-boost"
fill="transparent"
r="47"
cx="50"
cy="50"
x-ref="progressCircle"
:stroke-dasharray="circumference"
:stroke-dashoffset="circumference - percent / 100 * circumference"
/>
<path d="M 50 30 L 25 55 L 30 60 L 50 40 L 70 60 L 75 55 L 50 30 Z" fill="white" stroke="white" />
</svg>
</a>

<footer class="main-footer">
<span>&copy; Copyright 2018 by <a href="/" title="Home">Zachary Dunn</a></span>
<div class="current-color mar-0 pad-0"></div>

<div class="rss">
<a href="https://adhoc.systems/boosts/feed.xml" rel="alternate" class="rss__link" title="Boosts Feed (RSS)">
<span class="visually-hidden">Boosts Feed (RSS)</span>
<img
src="/images/feed.svg"
class="rss__icon"
loading="lazy"
/>
</a>
<a href="https://aboutfeeds.com/" class="rss__explainer">Help! What is this?</a>
</div>

</footer>
<script type="module" src="/js/app.js"></script>
<script type="module" src="/js/scroll.js"></script>
<script defer src="https://unpkg.com/[email protected]/dist/cdn.min.js"></script>
</body>
</html>
Loading

0 comments on commit 65c2524

Please sign in to comment.