From bd5ff7031c8bd0ef17f9a46d2a9d6b59a2657407 Mon Sep 17 00:00:00 2001 From: Sebastian Pech Date: Thu, 5 Sep 2019 10:13:49 +0200 Subject: [PATCH 1/4] Add argument for disabling unicode escaping --- src/Weave.jl | 5 ++++- src/chunks.jl | 3 ++- src/format.jl | 41 +++++++++++++++++++++++------------------ test/formatter_test.jl | 17 +++++++++++++++++ 4 files changed, 46 insertions(+), 20 deletions(-) diff --git a/src/Weave.jl b/src/Weave.jl index db141d8f..a9513190 100644 --- a/src/Weave.jl +++ b/src/Weave.jl @@ -95,12 +95,15 @@ function weave(source ; doctype = :auto, throw_errors = false, template = nothing, highlight_theme = nothing, css = nothing, pandoc_options = String[]::Array{String}, - latex_cmd = "xelatex") + latex_cmd = "xelatex",escape_unicode=true) doc = read_doc(source, informat) doctype == :auto && (doctype = detect_doctype(doc.source)) doc.doctype = doctype + # Set unicode escape variable + doc.escape_unicode = escape_unicode + # Read args from document header, overrides command line args if haskey(doc.header, "options") (doctype, informat, out_path, args, mod, fig_path, fig_ext, diff --git a/src/chunks.jl b/src/chunks.jl index 717c9d43..89b9e6f9 100644 --- a/src/chunks.jl +++ b/src/chunks.jl @@ -19,11 +19,12 @@ mutable struct WeaveDoc highlight_theme fig_path::AbstractString chunk_defaults::Dict{Symbol,Any} + escape_unicode::Bool function WeaveDoc(source, chunks, header) path, fname = splitdir(abspath(source)) basename = splitext(fname)[1] new(source, basename, path, chunks, "", nothing, "", "", header, - "", "", Highlights.Themes.DefaultTheme, "", deepcopy(rcParams[:chunk_defaults])) + "", "", Highlights.Themes.DefaultTheme, "", deepcopy(rcParams[:chunk_defaults]),true) end end diff --git a/src/format.jl b/src/format.jl index ee9b5c5c..33686113 100644 --- a/src/format.jl +++ b/src/format.jl @@ -30,7 +30,7 @@ function format(doc::WeaveDoc) end for chunk in copy(doc.chunks) - result = format_chunk(chunk, formatdict, docformat) + result = format_chunk(chunk, formatdict, docformat; escape_unicode=doc.escape_unicode) push!(formatted, result) end @@ -114,7 +114,7 @@ function strip_header(chunk::DocChunk) return chunk end -function format_chunk(chunk::DocChunk, formatdict, docformat) +function format_chunk(chunk::DocChunk, formatdict, docformat; escape_unicode=true) return join([format_inline(c) for c in chunk.content], "") end @@ -141,7 +141,7 @@ function addspace(op, inline) return op end -function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2tex) +function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2tex; escape_unicode=true) out = IOBuffer() io = IOBuffer() for inline in chunk.content @@ -157,10 +157,11 @@ function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2tex) end end ioformat!(io, out) - return uc2tex(String(take!(out))) + escape_unicode && return uc2tex(String(take!(out))) + return String(take!(out)) end -function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2HTML) +function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2HTML; escape_unicode=true) out = IOBuffer() io = IOBuffer() fun = WeaveMarkdown.html @@ -180,7 +181,7 @@ function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2HTML) return String(take!(out)) end -function format_chunk(chunk::CodeChunk, formatdict, docformat) +function format_chunk(chunk::CodeChunk, formatdict, docformat; escape_unicode=true) #Fill undefined options with format specific defaults chunk.options[:out_width] == nothing && (chunk.options[:out_width] = formatdict[:out_width]) @@ -196,7 +197,7 @@ function format_chunk(chunk::CodeChunk, formatdict, docformat) chunk.content = indent(chunk.content, formatdict[:indent]) end - chunk.content = format_code(chunk.content, docformat) + chunk.content = format_code(chunk.content, docformat; escape_unicode=escape_unicode) if !chunk.options[:eval] if chunk.options[:echo] @@ -226,10 +227,10 @@ function format_chunk(chunk::CodeChunk, formatdict, docformat) else if chunk.options[:wrap] chunk.output = "\n" * wraplines(chunk.output, chunk.options[:line_width]) - chunk.output = format_output(chunk.output, docformat) + chunk.output = format_output(chunk.output, docformat, escape_unicode=escape_unicode) else chunk.output = "\n" * rstrip(chunk.output) - chunk.output = format_output(chunk.output, docformat) + chunk.output = format_output(chunk.output, docformat, escape_unicode=escape_unicode) end if haskey(formatdict, :indent) @@ -253,26 +254,30 @@ function format_chunk(chunk::CodeChunk, formatdict, docformat) return result end -function format_output(result::AbstractString, docformat) +function format_output(result::AbstractString, docformat;escape_unicode=true) return result end -function format_output(result::AbstractString, docformat::JMarkdown2HTML) +function format_output(result::AbstractString, docformat::JMarkdown2HTML;escape_unicode=true) return Markdown.htmlesc(result) end -function format_output(result::AbstractString, docformat::JMarkdown2tex) - return uc2tex(result, true) +function format_output(result::AbstractString, docformat::JMarkdown2tex;escape_unicode=true) + # Highligts has some extra escaping defined, eg of $, ", ... + result_escaped = sprint( (io, x) -> Highlights.Format.escape(io, MIME("text/latex"), x, charescape=true), result) + escape_unicode && return uc2tex(result_escaped, true) + return result_escaped end -function format_code(result::AbstractString, docformat) +function format_code(result::AbstractString, docformat;escape_unicode=true) return result end -function format_code(result::AbstractString, docformat::JMarkdown2tex) +function format_code(result::AbstractString, docformat::JMarkdown2tex;escape_unicode=true) highlighted = highlight(MIME("text/latex"), strip(result), Highlights.Lexers.JuliaLexer, docformat.formatdict[:theme]) - return uc2tex(highlighted) + escape_unicode && return uc2tex(highlighted) + return highlighted #return "\\begin{minted}[mathescape, fontsize=\\small, xleftmargin=0.5em]{julia}\n$result\n\\end{minted}\n" end @@ -305,12 +310,12 @@ function texify(s) return ts end -function format_code(result::AbstractString, docformat::JMarkdown2HTML) +function format_code(result::AbstractString, docformat::JMarkdown2HTML;escape_unicode=true) return highlight(MIME("text/html"), strip(result), Highlights.Lexers.JuliaLexer, docformat.formatdict[:theme]) end -function format_code(result::AbstractString, docformat::Pandoc2HTML) +function format_code(result::AbstractString, docformat::Pandoc2HTML;escape_unicode=true) return highlight(MIME("text/html"), strip(result), Highlights.Lexers.JuliaLexer, docformat.formatdict[:theme]) end diff --git a/test/formatter_test.jl b/test/formatter_test.jl index e074df41..cd2d91d3 100644 --- a/test/formatter_test.jl +++ b/test/formatter_test.jl @@ -112,3 +112,20 @@ ldoc = Weave.run(parsed, doctype = "md2tex") mdoc = Weave.run(parsed, doctype = "github") @test mdoc.chunks[1].rich_output == "\n\n### Small markdown sample\n\n**Hello** from `code` block.\n\n" @test mdoc.chunks[2].rich_output == "\n\n* one\n* two\n* three\n\n" + + +# Test disable escaping of unicode +content = """ +# Test chunk +α +""" + +dchunk = Weave.DocChunk(content, 1, 1) + +pformat = Weave.formats["md2tex"] + +f = Weave.format_chunk(dchunk, pformat.formatdict, pformat) +@test f == "\\section{Test chunk}\n\\ensuremath{\\alpha}\n\n" + +f = Weave.format_chunk(dchunk, pformat.formatdict, pformat,escape_unicode=false) +@test f == "\\section{Test chunk}\nα\n\n" From da66a8f272e350fecb6383fe47f23506429b0d3f Mon Sep 17 00:00:00 2001 From: Sebastian Pech Date: Thu, 5 Sep 2019 10:19:13 +0200 Subject: [PATCH 2/4] Add escape_unicode to docstring --- src/Weave.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Weave.jl b/src/Weave.jl index a9513190..34c15321 100644 --- a/src/Weave.jl +++ b/src/Weave.jl @@ -76,14 +76,15 @@ Weave an input document to output file. * `cache_path`: where of cached output will be saved. * `cache`: controls caching of code: `:off` = no caching, `:all` = cache everything, `:user` = cache based on chunk options, `:refresh`, run all code chunks and save new cache. -* `throw_errors` if `false` errors are included in output document and the whole document is +* `throw_errors`: if `false` errors are included in output document and the whole document is executed. if `true` errors are thrown when they occur. -* `template` : Template (file path) or MustacheTokens for md2html or md2tex formats. -* `highlight_theme` : Theme (Highlights.AbstractTheme) for used syntax highlighting -* `css` : CSS (file path) used for md2html format -* `pandoc_options` = String array of options to pass to pandoc for `pandoc2html` and +* `template`: Template (file path) or MustacheTokens for md2html or md2tex formats. +* `highlight_theme`: Theme (Highlights.AbstractTheme) for used syntax highlighting +* `css`: CSS (file path) used for md2html format +* `pandoc_options`: String array of options to pass to pandoc for `pandoc2html` and `pandoc2pdf` formats e.g. ["--toc", "-N"] -* `latex_cmd` the command used to make pdf from .tex +* `latex_cmd`: the command used to make pdf from .tex +* `escape_unicode`: if set to true (default), try to convert unicode characters to respective LaTeX command **Note:** Run Weave from terminal and not using IJulia, Juno or ESS, they tend to mess with capturing output. """ From 0018f0e24c8a1c2723e6df062b53cb17b86136b4 Mon Sep 17 00:00:00 2001 From: Sebastian Pech Date: Tue, 15 Oct 2019 17:18:50 +0200 Subject: [PATCH 3/4] Fix new expected output in ref for new escape char --- test/documents/inline/markdown_beamer.tex.ref | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/documents/inline/markdown_beamer.tex.ref b/test/documents/inline/markdown_beamer.tex.ref index 7db8c273..1645e0f9 100644 --- a/test/documents/inline/markdown_beamer.tex.ref +++ b/test/documents/inline/markdown_beamer.tex.ref @@ -9,7 +9,7 @@ Some inline output \begin{lstlisting} -(*@\HLJLnf{println}@*)(*@\HLJLp{(}@*)(*@\HLJLs{"Testing output"}@*)(*@\HLJLp{)}@*) +(*@\HLJLnf{println}@*)(*@\HLJLp{(}@*)(*@\HLJLs{"{}Testing{\mbox{\space}}output"{}}@*)(*@\HLJLp{)}@*) \end{lstlisting} \begin{lstlisting} From c9e26d5638c7570574885b2cf80ba2042d8626df Mon Sep 17 00:00:00 2001 From: Sebastian Pech Date: Tue, 15 Oct 2019 21:48:05 +0200 Subject: [PATCH 4/4] Rename escape_unicode and move to tex based formatdicts --- src/Weave.jl | 10 +++++----- src/chunks.jl | 3 +-- src/format.jl | 36 ++++++++++++++++++------------------ src/formatters.jl | 9 ++++++--- src/run.jl | 7 ++++++- test/formatter_test.jl | 26 ++++++++++++++++++++++++-- 6 files changed, 60 insertions(+), 31 deletions(-) diff --git a/src/Weave.jl b/src/Weave.jl index 34c15321..204027d1 100644 --- a/src/Weave.jl +++ b/src/Weave.jl @@ -84,7 +84,9 @@ Weave an input document to output file. * `pandoc_options`: String array of options to pass to pandoc for `pandoc2html` and `pandoc2pdf` formats e.g. ["--toc", "-N"] * `latex_cmd`: the command used to make pdf from .tex -* `escape_unicode`: if set to true (default), try to convert unicode characters to respective LaTeX command +* `latex_keep_unicode`: if set to true (default is false), do not convert unicode characters to their +respective latex representation. This is especially useful if a font and tex-engine with support for unicode +characters are used. **Note:** Run Weave from terminal and not using IJulia, Juno or ESS, they tend to mess with capturing output. """ @@ -96,14 +98,12 @@ function weave(source ; doctype = :auto, throw_errors = false, template = nothing, highlight_theme = nothing, css = nothing, pandoc_options = String[]::Array{String}, - latex_cmd = "xelatex",escape_unicode=true) + latex_cmd = "xelatex",latex_keep_unicode=false) doc = read_doc(source, informat) doctype == :auto && (doctype = detect_doctype(doc.source)) doc.doctype = doctype - # Set unicode escape variable - doc.escape_unicode = escape_unicode # Read args from document header, overrides command line args if haskey(doc.header, "options") @@ -126,7 +126,7 @@ function weave(source ; doctype = :auto, mod = mod, out_path=out_path, args = args, fig_path = fig_path, fig_ext = fig_ext, cache_path = cache_path, cache=cache, - throw_errors = throw_errors) + throw_errors = throw_errors,latex_keep_unicode=latex_keep_unicode) formatted = format(doc) outname = get_outname(out_path, doc) diff --git a/src/chunks.jl b/src/chunks.jl index 89b9e6f9..717c9d43 100644 --- a/src/chunks.jl +++ b/src/chunks.jl @@ -19,12 +19,11 @@ mutable struct WeaveDoc highlight_theme fig_path::AbstractString chunk_defaults::Dict{Symbol,Any} - escape_unicode::Bool function WeaveDoc(source, chunks, header) path, fname = splitdir(abspath(source)) basename = splitext(fname)[1] new(source, basename, path, chunks, "", nothing, "", "", header, - "", "", Highlights.Themes.DefaultTheme, "", deepcopy(rcParams[:chunk_defaults]),true) + "", "", Highlights.Themes.DefaultTheme, "", deepcopy(rcParams[:chunk_defaults])) end end diff --git a/src/format.jl b/src/format.jl index 33686113..2c000313 100644 --- a/src/format.jl +++ b/src/format.jl @@ -30,7 +30,7 @@ function format(doc::WeaveDoc) end for chunk in copy(doc.chunks) - result = format_chunk(chunk, formatdict, docformat; escape_unicode=doc.escape_unicode) + result = format_chunk(chunk, formatdict, docformat) push!(formatted, result) end @@ -114,7 +114,7 @@ function strip_header(chunk::DocChunk) return chunk end -function format_chunk(chunk::DocChunk, formatdict, docformat; escape_unicode=true) +function format_chunk(chunk::DocChunk, formatdict, docformat) return join([format_inline(c) for c in chunk.content], "") end @@ -141,7 +141,7 @@ function addspace(op, inline) return op end -function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2tex; escape_unicode=true) +function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2tex) out = IOBuffer() io = IOBuffer() for inline in chunk.content @@ -157,11 +157,11 @@ function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2tex; esc end end ioformat!(io, out) - escape_unicode && return uc2tex(String(take!(out))) + formatdict[:keep_unicode] || return uc2tex(String(take!(out))) return String(take!(out)) end -function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2HTML; escape_unicode=true) +function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2HTML) out = IOBuffer() io = IOBuffer() fun = WeaveMarkdown.html @@ -181,7 +181,7 @@ function format_chunk(chunk::DocChunk, formatdict, docformat::JMarkdown2HTML; es return String(take!(out)) end -function format_chunk(chunk::CodeChunk, formatdict, docformat; escape_unicode=true) +function format_chunk(chunk::CodeChunk, formatdict, docformat) #Fill undefined options with format specific defaults chunk.options[:out_width] == nothing && (chunk.options[:out_width] = formatdict[:out_width]) @@ -197,7 +197,7 @@ function format_chunk(chunk::CodeChunk, formatdict, docformat; escape_unicode=tr chunk.content = indent(chunk.content, formatdict[:indent]) end - chunk.content = format_code(chunk.content, docformat; escape_unicode=escape_unicode) + chunk.content = format_code(chunk.content, docformat) if !chunk.options[:eval] if chunk.options[:echo] @@ -227,10 +227,10 @@ function format_chunk(chunk::CodeChunk, formatdict, docformat; escape_unicode=tr else if chunk.options[:wrap] chunk.output = "\n" * wraplines(chunk.output, chunk.options[:line_width]) - chunk.output = format_output(chunk.output, docformat, escape_unicode=escape_unicode) + chunk.output = format_output(chunk.output, docformat) else chunk.output = "\n" * rstrip(chunk.output) - chunk.output = format_output(chunk.output, docformat, escape_unicode=escape_unicode) + chunk.output = format_output(chunk.output, docformat) end if haskey(formatdict, :indent) @@ -254,29 +254,29 @@ function format_chunk(chunk::CodeChunk, formatdict, docformat; escape_unicode=tr return result end -function format_output(result::AbstractString, docformat;escape_unicode=true) +function format_output(result::AbstractString, docformat) return result end -function format_output(result::AbstractString, docformat::JMarkdown2HTML;escape_unicode=true) +function format_output(result::AbstractString, docformat::JMarkdown2HTML) return Markdown.htmlesc(result) end -function format_output(result::AbstractString, docformat::JMarkdown2tex;escape_unicode=true) +function format_output(result::AbstractString, docformat::JMarkdown2tex) # Highligts has some extra escaping defined, eg of $, ", ... result_escaped = sprint( (io, x) -> Highlights.Format.escape(io, MIME("text/latex"), x, charescape=true), result) - escape_unicode && return uc2tex(result_escaped, true) + docformat.formatdict[:keep_unicode] || return uc2tex(result_escaped, true) return result_escaped end -function format_code(result::AbstractString, docformat;escape_unicode=true) +function format_code(result::AbstractString, docformat) return result end -function format_code(result::AbstractString, docformat::JMarkdown2tex;escape_unicode=true) +function format_code(result::AbstractString, docformat::JMarkdown2tex) highlighted = highlight(MIME("text/latex"), strip(result), Highlights.Lexers.JuliaLexer, docformat.formatdict[:theme]) - escape_unicode && return uc2tex(highlighted) + docformat.formatdict[:keep_unicode] || return uc2tex(highlighted) return highlighted #return "\\begin{minted}[mathescape, fontsize=\\small, xleftmargin=0.5em]{julia}\n$result\n\\end{minted}\n" end @@ -310,12 +310,12 @@ function texify(s) return ts end -function format_code(result::AbstractString, docformat::JMarkdown2HTML;escape_unicode=true) +function format_code(result::AbstractString, docformat::JMarkdown2HTML) return highlight(MIME("text/html"), strip(result), Highlights.Lexers.JuliaLexer, docformat.formatdict[:theme]) end -function format_code(result::AbstractString, docformat::Pandoc2HTML;escape_unicode=true) +function format_code(result::AbstractString, docformat::Pandoc2HTML) return highlight(MIME("text/html"), strip(result), Highlights.Lexers.JuliaLexer, docformat.formatdict[:theme]) end diff --git a/src/formatters.jl b/src/formatters.jl index eb175785..2ddabb56 100644 --- a/src/formatters.jl +++ b/src/formatters.jl @@ -18,7 +18,8 @@ const tex = Tex("Latex with custom code environments", :fig_env=> "figure", :fig_pos => "htpb", :doctype => "tex", - :mimetypes => ["application/pdf", "image/png", "text/latex", "text/plain"] + :mimetypes => ["application/pdf", "image/png", "text/latex", "text/plain"], + :keep_unicode => false, )) const texminted = Tex("Latex using minted for highlighting", @@ -35,7 +36,8 @@ const texminted = Tex("Latex using minted for highlighting", :fig_env=> "figure", :fig_pos => "htpb", :doctype => "texminted", - :mimetypes => ["application/pdf", "image/png", "text/latex", "text/plain"] + :mimetypes => ["application/pdf", "image/png", "text/latex", "text/plain"], + :keep_unicode => false, )) struct Pandoc @@ -147,7 +149,8 @@ const md2tex = JMarkdown2tex("Julia markdown to latex", Dict{Symbol,Any}( :out_width => "\\linewidth", :mimetypes => ["application/pdf", "image/png", "image/jpg", "text/latex", "text/markdown", "text/plain"], - :doctype=> "md2tex")) + :doctype=> "md2tex", + :keep_unicode=>false)) struct MultiMarkdown diff --git a/src/run.jl b/src/run.jl index 77bc177d..9828aa1a 100644 --- a/src/run.jl +++ b/src/run.jl @@ -26,13 +26,18 @@ Run code chunks and capture output from parsed document. function Base.run(doc::WeaveDoc; doctype = :auto, mod::Union{Module, Symbol} = :sandbox, out_path=:doc, args=Dict(), fig_path = "figures", fig_ext = nothing, - cache_path = "cache", cache = :off, throw_errors=false) + cache_path = "cache", cache = :off, throw_errors=false, latex_keep_unicode=false) #cache :all, :user, :off, :refresh doc.cwd = get_cwd(doc, out_path) doctype == :auto && (doctype = detect_doctype(doc.source)) doc.doctype = doctype doc.format = formats[doctype] + + if (haskey(doc.format.formatdict, :keep_unicode)) + doc.format.formatdict[:keep_unicode] = latex_keep_unicode + end + isdir(doc.cwd) || mkpath(doc.cwd) if occursin("2pdf", doctype) && cache == :off diff --git a/test/formatter_test.jl b/test/formatter_test.jl index cd2d91d3..f79df2b2 100644 --- a/test/formatter_test.jl +++ b/test/formatter_test.jl @@ -126,6 +126,28 @@ pformat = Weave.formats["md2tex"] f = Weave.format_chunk(dchunk, pformat.formatdict, pformat) @test f == "\\section{Test chunk}\n\\ensuremath{\\alpha}\n\n" - -f = Weave.format_chunk(dchunk, pformat.formatdict, pformat,escape_unicode=false) +pformat.formatdict[:keep_unicode] = true +f = Weave.format_chunk(dchunk, pformat.formatdict, pformat) @test f == "\\section{Test chunk}\nα\n\n" + +function doc_from_string(str) + parsed = Weave.parse_doc(str,"markdown") + header = Weave.parse_header(parsed[1]) + Weave.WeaveDoc("",parsed,header) +end + +doc_content = """ +```julia +α = 10 +``` +""" + +parsed = doc_from_string(doc_content) +ldoc = Weave.run(parsed, doctype = "md2tex") +@test occursin(Weave.uc2tex("α"),Weave.format(ldoc)) +@test !occursin("α",Weave.format(ldoc)) + +parsed = doc_from_string(doc_content) +ldoc = Weave.run(parsed, doctype = "md2tex",latex_keep_unicode=true) +@test occursin("α",Weave.format(ldoc)) +@test !occursin(Weave.uc2tex("α"),Weave.format(ldoc))