Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: write at-example outputs over threshold to a file #2247

Merged
merged 19 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
MarkdownAST = "d0879d2d-cac2-40c8-9cee-1863dc0c7391"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

Expand Down
139 changes: 132 additions & 7 deletions src/html/HTMLWriter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import Markdown
using MarkdownAST: MarkdownAST, Node
import JSON
import Base64
import SHA

import ..Documenter
using Documenter: NavNode
Expand Down Expand Up @@ -360,6 +362,12 @@
**`warn_outdated`** inserts a warning if the current page is not the newest version of the
documentation.

**`example_size_threshold`** specifies the size threshold above which the `@example` and other block
outputs get written to files, rather than being included in the HTML page. This mechanism is present
to reduce the size of the generated HTML files that contain a lot of figures etc.
Setting it to `nothing` will disable writing to files, and setting to `0` means that all files
mortenpi marked this conversation as resolved.
Show resolved Hide resolved
will be written to files. Defaults to `1 KiB`.

**`size_threshold`** sets the maximum allowed HTML file size (in bytes) that Documenter is allowed to
generate for a page. If the generated HTML file is larged than this, Documenter will throw an error and
the build will fail. If set to `nothing`, the file sizes are not checked. Defaults to `200 KiB` (but
Expand Down Expand Up @@ -444,6 +452,7 @@
highlightjs :: Union{String,Nothing}
size_threshold :: Int
size_threshold_warn :: Int
example_size_threshold :: Int

function HTML(;
prettyurls :: Bool = true,
Expand All @@ -465,8 +474,9 @@
prerender :: Bool = false,
node :: Union{Cmd,String,Nothing} = nothing,
highlightjs :: Union{String,Nothing} = nothing,
size_threshold :: Union{Integer, Nothing} = 200 * 2^10,
size_threshold_warn :: Union{Integer, Nothing} = 100 * 2^10,
size_threshold :: Union{Integer, Nothing} = 200 * 2^10, # 200 KiB
size_threshold_warn :: Union{Integer, Nothing} = 100 * 2^10, # 100 KiB
example_size_threshold :: Union{Integer, Nothing} = 2^10, # 1 KiB

# deprecated keywords
edit_branch :: Union{String, Nothing, Default} = Default(nothing),
Expand Down Expand Up @@ -512,11 +522,16 @@
elseif size_threshold_warn > size_threshold
throw(ArgumentError("size_threshold_warn ($size_threshold_warn) must be smaller than size_threshold ($size_threshold)"))
end
if isnothing(example_size_threshold)
example_size_threshold = typemax(Int)

Check warning on line 526 in src/html/HTMLWriter.jl

View check run for this annotation

Codecov / codecov/patch

src/html/HTMLWriter.jl#L526

Added line #L526 was not covered by tests
elseif example_size_threshold <= 0
mortenpi marked this conversation as resolved.
Show resolved Hide resolved
throw(ArgumentError("example_size_threshold must be non-negative, got $(example_size_threshold)"))

Check warning on line 528 in src/html/HTMLWriter.jl

View check run for this annotation

Codecov / codecov/patch

src/html/HTMLWriter.jl#L528

Added line #L528 was not covered by tests
end
isa(edit_link, Default) && (edit_link = edit_link[])
new(prettyurls, disable_git, edit_link, repolink, canonical, assets, analytics,
collapselevel, sidebar_sitename, highlights, mathengine, description, footer,
ansicolor, lang, warn_outdated, prerender, node, highlightjs,
size_threshold, size_threshold_warn,
size_threshold, size_threshold_warn, example_size_threshold,
)
end
end
Expand Down Expand Up @@ -1756,6 +1771,93 @@
end
end

"""
Generates a unique file for the output of an at-example block if it goes over the configured
size threshold, and returns the filename (that should be in the same directory are the
corresponding HTML file). If the data is under the threshold, no file is created, and the
function returns `nothing`.
"""
function write_data_file(dctx::DCtx, data::Union{Vector{UInt8},AbstractString}; suffix::AbstractString)
ctx, navnode = dctx.ctx, dctx.navnode
# If we're under the threshold, we return `nothing`, indicating to the caller that
# they should inline the file instead.
if length(data) < ctx.settings.example_size_threshold
return nothing
end
slug = dataslug(data)
datafile = data_filename(dctx, slug, suffix)
mkpath(dirname(datafile.path)) # generally, the directory for the HTML page will not exist yet
write(datafile.path, data)
# In all cases the file should be places in the same directory as the HTML file,
# so we only need the filename to generate a valid relative href.
return datafile.filename
end

function data_filename(dctx::DCtx, slug::AbstractString, suffix::AbstractString)
ctx, navnode = dctx.ctx, dctx.navnode
# We want to
dir, pagename = splitdir(navnode.page)
# Let's normalize the filename of the page by removing .md extensions (if present).
# We'll keep other extensions though.
if endswith(pagename, ".md")
pagename = first(splitext(pagename))
end

filename_prefix = if ctx.settings.prettyurls
# If pretty URLs are enabled, we would normally have
# foo/bar.md -> foo/bar/index.html, and we then generate files
# like foo/bar/$(slug).png
# However, if we have foo/index.md, then it becomes foo/index.html,
# and so we want to differentiate the data filenames just in case,
# and so they become foo/index-$(slug).png
if pagename == "index"
string("index-", slug)
else
# We also need to update dir from foo/ to foo/bar here, since we want the
# file to end up at foo/bar/$(slug).png
dir = joinpath(dir, pagename)
slug
end
else
# If pretty URLs are disabled, then
# foo/bar.md becomes foo/bar.html, and we always want to add the
# Markdown filename to the data filename, i.e. foo/bar-$(slug).png
string(pagename, "-", slug)
end
# Now we need to find a valid file name, in case there are existing duplicates.
filename = find_valid_data_file(joinpath(ctx.doc.user.build, dir), filename_prefix, suffix)
return (;
filename,
path = joinpath(ctx.doc.user.build, dir, filename),
)
end

function find_valid_data_file(directory::AbstractString, prefix::AbstractString, suffix::AbstractString)
# We'll try 10_000 different filename.. if this doesn't work, then something is probably really
# badly wrong, and so we just crash.
for i in 0:10_000
filename = if i == 0
string(prefix, suffix)
else
string(prefix, '-', lpad(string(i), 3, '0'), suffix)
end
ispath(joinpath(directory, filename)) || return filename
end
error("""

Check warning on line 1846 in src/html/HTMLWriter.jl

View check run for this annotation

Codecov / codecov/patch

src/html/HTMLWriter.jl#L1845-L1846

Added lines #L1845 - L1846 were not covered by tests
Unable to find valid file name for an at-example output:
directory = $(directory)
prefix = $(prefix)
suffix = $(suffix)""")
end

"""
Returns the first `limit` characters of the hex SHA1 of the data `bytes`.
"""
function dataslug(bytes::Union{Vector{UInt8},AbstractString}; limit=8)::String
full_sha = bytes2hex(SHA.sha1(bytes))
return first(full_sha, limit)
end

"""
Returns the full path of a [`Documenter.NavNode`](@ref) relative to `src/`.
"""
Expand Down Expand Up @@ -2137,13 +2239,18 @@
return if haskey(d, MIME"text/html"())
rawhtml(d[MIME"text/html"()])
elseif haskey(d, MIME"image/svg+xml"())
@tags img
svg = d[MIME"image/svg+xml"()]
svg_tag_match = match(r"<svg[^>]*>", svg)
if svg_tag_match === nothing
# There is no svg tag so we don't do any more advanced
# processing and just return the svg as HTML.
# The svg string should be invalid but that's not our concern here.
rawhtml(svg)
elseif length(svg) > dctx.ctx.settings.example_size_threshold
mortenpi marked this conversation as resolved.
Show resolved Hide resolved
filename = write_data_file(dctx, svg; suffix=".svg")
@assert !isnothing(filename)
img[:src => filename, :alt => "Example block output"]
else
# The xmlns attribute has to be present for data:image/svg+xml
# to work (https://stackoverflow.com/questions/18467982).
Expand Down Expand Up @@ -2181,13 +2288,13 @@
end

elseif haskey(d, MIME"image/png"())
rawhtml(string("<img src=\"data:image/png;base64,", d[MIME"image/png"()], "\" />"))
domify_show_image_binary(dctx, "png", d)
elseif haskey(d, MIME"image/webp"())
rawhtml(string("<img src=\"data:image/webp;base64,", d[MIME"image/webp"()], "\" />"))
domify_show_image_binary(dctx, "webp", d)
elseif haskey(d, MIME"image/gif"())
rawhtml(string("<img src=\"data:image/gif;base64,", d[MIME"image/gif"()], "\" />"))
domify_show_image_binary(dctx, "gif", d)
elseif haskey(d, MIME"image/jpeg"())
rawhtml(string("<img src=\"data:image/jpeg;base64,", d[MIME"image/jpeg"()], "\" />"))
domify_show_image_binary(dctx, "jpeg", d)
elseif haskey(d, MIME"text/latex"())
# If the show(io, ::MIME"text/latex", x) output is already wrapped in \[ ... \] or $$ ... $$, we
# unwrap it first, since when we output Markdown.LaTeX objects we put the correct
Expand Down Expand Up @@ -2215,6 +2322,24 @@
end
end

function domify_show_image_binary(dctx::DCtx, filetype::AbstractString, d::Dict{MIME,Any})
@tags img
mime_name = "image/$filetype"
mime = MIME{Symbol(mime_name)}()
# When we construct `d` in the expander pipeline, we call `stringmime`, which
# base64-encodes the bytes, so the values in the dictionary are base64-encoded.
# So if we do write it to a file, we need to decode it first.
data_base64 = d[mime]
filename = write_data_file(dctx, Base64.base64decode(data_base64); suffix=".$filetype")
alt = (:alt => "Example block output")
if isnothing(filename)
src = string("data:$(mime_name);base64,", data_base64)
img[:src => src, alt]
else
img[:src => filename, alt]
end
end

# filehrefs
# ------------------------------------------------------------------------------

Expand Down
Binary file added test/examples/images/big.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/examples/images/big.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Loading