From 6e4e7e2e8ca182c9c39d5211debcbeec5ab9653f Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Mon, 26 Jun 2023 07:50:34 +1000 Subject: [PATCH] Fix diagnostics covering trailing text Diagnostics may sometimes cover trailing text which wasn't consumed. For example, `parseatom(")")` doesn't consume the errant trailing ')', but the diagnostic refers to this character. In this case, `SourceFile` needs to cover the location of the diagnostic in addition to any text which is consumed. As part of this, mark `sourcetext(::ParseStream)` as deprecated, in favour of constructing a `SourceFile` to wrap things up more neatly. Also fix a bug in `highlight()` for empty `SourceFile`s. --- src/parse_stream.jl | 27 +++++++++++++++------------ src/parser.jl | 3 ++- src/source_files.jl | 4 +++- test/diagnostics.jl | 15 ++++++++++++--- test/source_files.jl | 3 +++ 5 files changed, 35 insertions(+), 17 deletions(-) diff --git a/src/parse_stream.jl b/src/parse_stream.jl index 2e99adaa..3a27edce 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -331,7 +331,7 @@ function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) end function show_diagnostics(io::IO, stream::ParseStream) - show_diagnostics(io, stream.diagnostics, sourcetext(stream)) + show_diagnostics(io, stream.diagnostics, SourceFile(stream)) end # We manage a pool of stream positions as parser working space @@ -1078,17 +1078,8 @@ function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream; end end -""" - sourcetext(stream::ParseStream; steal_textbuf=true) - -Return the source text being parsed by this `ParseStream` as a UTF-8 encoded -string. - -If `steal_textbuf==true`, this is permitted to steal the content of the -stream's text buffer. Note that this leaves the `ParseStream` in an invalid -state for further parsing. -""" function sourcetext(stream::ParseStream; steal_textbuf=false) + Base.depwarn("Use of `sourcetext(::ParseStream)` is deprecated. Use `SourceFile(stream)` instead", :sourcetext) root = stream.text_root # The following kinda works but makes the return type of this method type # unstable. (Also codeunit(root) == UInt8 doesn't imply UTF-8 encoding?) @@ -1109,7 +1100,19 @@ function sourcetext(stream::ParseStream; steal_textbuf=false) end function SourceFile(stream::ParseStream; kws...) - return SourceFile(sourcetext(stream); first_index=first_byte(stream), kws...) + fbyte = first_byte(stream) + lbyte = maximum((last_byte(d) for d in stream.diagnostics); + init=last_byte(stream)) + # See also sourcetext() + srcroot = stream.text_root + str = if srcroot isa String + SubString(srcroot, fbyte, thisind(srcroot, lbyte)) + elseif srcroot isa SubString{String} + SubString(srcroot, fbyte, thisind(srcroot, lbyte)) + else + SubString(String(stream.textbuf[fbyte:lbyte])) + end + return SourceFile(str; first_index=first_byte(stream), kws...) end """ diff --git a/src/parser.jl b/src/parser.jl index a59839ca..9ca609d8 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -3561,7 +3561,8 @@ function parse_atom(ps::ParseState, check_identifiers=true) msg = leading_kind == K"EndMarker" ? "premature end of input" : "unexpected `$(untokenize(leading_kind))`" - bump_invisible(ps, K"error", error=msg) + emit_diagnostic(ps, error=msg) + bump_invisible(ps, K"error") else bump(ps, error="invalid syntax atom") end diff --git a/src/source_files.jl b/src/source_files.jl index 2ba33e1d..72811077 100644 --- a/src/source_files.jl +++ b/src/source_files.jl @@ -219,7 +219,9 @@ function highlight(io::IO, source::SourceFile, range::UnitRange; print(io, source[x:p-1]) _printstyled(io, hitext; bgcolor=color) print(io, source[q+1:d]) - source[thisind(source, d)] == '\n' || print(io, "\n") + if d >= firstindex(source) && source[thisind(source, d)] != '\n' + print(io, "\n") + end _print_marker_line(io, source[a:p-1], hitext, true, true, marker_line_color, note, notecolor) else # x -------------- diff --git a/test/diagnostics.jl b/test/diagnostics.jl index 9bba7bb3..423bb882 100644 --- a/test/diagnostics.jl +++ b/test/diagnostics.jl @@ -1,6 +1,6 @@ -function diagnostic(str; only_first=false, allow_multiple=false) +function diagnostic(str; only_first=false, allow_multiple=false, rule=:all) stream = ParseStream(str) - parse!(stream) + parse!(stream, rule=rule) if allow_multiple stream.diagnostics else @@ -52,7 +52,11 @@ end Diagnostic(6, 5, :error, "invalid macro name") @test diagnostic("a, , b") == - Diagnostic(4, 3, :error, "unexpected `,`") + Diagnostic(4, 4, :error, "unexpected `,`") + @test diagnostic(")", allow_multiple=true) == [ + Diagnostic(1, 1, :error, "unexpected `)`") + Diagnostic(1, 1, :error, "extra tokens after end of expression") + ] @test diagnostic("if\nfalse\nend") == Diagnostic(3, 3, :error, "missing condition in `if`") @@ -99,6 +103,11 @@ end @test diagnostic("\"\$(x,y)\"") == Diagnostic(3, 7, :error, "invalid interpolation syntax") + + @test diagnostic("", rule=:statement) == + Diagnostic(1, 0, :error, "premature end of input") + @test diagnostic("", rule=:atom) == + Diagnostic(1, 0, :error, "premature end of input") end @testset "parser warnings" begin diff --git a/test/source_files.jl b/test/source_files.jl index 374a858b..9a1548f1 100644 --- a/test/source_files.jl +++ b/test/source_files.jl @@ -115,6 +115,9 @@ end @test sprint(highlight, SourceFile("a\nα"), 1:4) == "┌\na\nα\n┘" @test sprint(highlight, SourceFile("a\nb\nα"), 3:3) == "a\nb\n╙\nα" + # empty files + @test sprint(highlight, SourceFile(""), 1:0) == "└" + # Multi-line ranges @test sprint(highlight, src, 1:7) == """ ┌───