Skip to content

Commit

Permalink
Prototype for conversion to CSTParser.EXPR
Browse files Browse the repository at this point in the history
This demonstrates the basic approach working, but various things don't
work here yet due to at least two things:
* Mismatch between the way the two packages tokenize the input, eg, for
  the delimiters in strings.
* Missing "trivia nodes" in JuliaSyntax (eg, a brackets node). These
  should probably be added.
  • Loading branch information
c42f committed Mar 10, 2022
1 parent 98bd80c commit 9c21de0
Showing 1 changed file with 268 additions and 0 deletions.
268 changes: 268 additions & 0 deletions prototypes/cst_conversion.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
# A prototype for converting JuliaSyntax data structures into CSTParser.EXPR.

using CSTParser

using JuliaSyntax
using JuliaSyntax: GreenNode, SyntaxHead, SourceFile, TaggedRange,
@K_str, @KSet_cmd,
haschildren, is_syntax_kind, is_keyword, is_operator, is_identifier, head, kind, span,
is_infix, is_trivia, untokenize, TzTokens, children

# See CSTParser.tokenkindtoheadmap
function tokenkindtoheadmap(k::TzTokens.Kind)
k === TzTokens.COMMA ? :COMMA :
k === TzTokens.LPAREN ? :LPAREN :
k === TzTokens.RPAREN ? :RPAREN :
k === TzTokens.LSQUARE ? :LSQUARE :
k === TzTokens.RSQUARE ? :RSQUARE :
k === TzTokens.LBRACE ? :LBRACE :
k === TzTokens.RBRACE ? :RBRACE :
k === TzTokens.AT_SIGN ? :ATSIGN :
k === TzTokens.DOT ? :DOT :
k === TzTokens.ABSTRACT ? :ABSTRACT :
k === TzTokens.BAREMODULE ? :BAREMODULE :
k === TzTokens.BEGIN ? :BEGIN :
k === TzTokens.BREAK ? :BREAK :
k === TzTokens.CATCH ? :CATCH :
k === TzTokens.CONST ? :CONST :
k === TzTokens.CONTINUE ? :CONTINUE :
k === TzTokens.DO ? :DO :
k === TzTokens.ELSE ? :ELSE :
k === TzTokens.ELSEIF ? :ELSEIF :
k === TzTokens.END ? :END :
k === TzTokens.EXPORT ? :EXPORT :
k === TzTokens.FINALLY ? :FINALLY :
k === TzTokens.FOR ? :FOR :
k === TzTokens.FUNCTION ? :FUNCTION :
k === TzTokens.GLOBAL ? :GLOBAL :
k === TzTokens.IF ? :IF :
k === TzTokens.IMPORT ? :IMPORT :
k === TzTokens.LET ? :LET :
k === TzTokens.LOCAL ? :LOCAL :
k === TzTokens.MACRO ? :MACRO :
k === TzTokens.MODULE ? :MODULE :
k === TzTokens.MUTABLE ? :MUTABLE :
k === TzTokens.OUTER ? :OUTER :
k === TzTokens.PRIMITIVE ? :PRIMITIVE :
k === TzTokens.QUOTE ? :QUOTE :
k === TzTokens.RETURN ? :RETURN :
k === TzTokens.STRUCT ? :STRUCT :
k === TzTokens.TRY ? :TRY :
k === TzTokens.TYPE ? :TYPE :
k === TzTokens.USING ? :USING :
k === TzTokens.WHILE ? :WHILE :
k === TzTokens.INTEGER ? :INTEGER :
k === TzTokens.BIN_INT ? :BININT :
k === TzTokens.HEX_INT ? :HEXINT :
k === TzTokens.OCT_INT ? :OCTINT :
k === TzTokens.FLOAT ? :FLOAT :
k === TzTokens.STRING ? :STRING :
# k === TzTokens.TRIPLE_STRING ? :TRIPLESTRING :
k === TzTokens.CHAR ? :CHAR :
k === TzTokens.CMD ? :CMD :
# k === TzTokens.TRIPLE_CMD ? :TRIPLECMD :
k === TzTokens.TRUE ? :TRUE :
k === TzTokens.FALSE ? :FALSE :
k === TzTokens.ENDMARKER ? :errortoken :
error("Unknown token $k")
end

# Things which are "trailing trivia" according to CSTParser
#
# "Trailing trivia" is trivia which will be attached to the end of a node.
is_cst_trailing_trivia(x) = kind(x) in KSet`Whitespace NewlineWs Comment ;`

# Convert GreenNode into CSTParser.EXPR
function cst(source::SourceFile, raw_node::GreenNode{SyntaxHead}, position::Integer=1)
node_start = position
cs = children(raw_node)
i = 1
args = CSTParser.EXPR[]
trivia = CSTParser.EXPR[]
last_trivia_span = 0
while i <= length(cs)
raw = cs[i]
if haschildren(raw)
c = cst(source, raw, position)
push!(args, c)
last_trivia_span = c.fullspan - c.span
position += span(raw)
else
start_pos = position
token_start = i
inner_span = span(raw)
position += span(raw)
# Here we append any trailing trivia tokens to the node.
while i < length(cs) && is_cst_trailing_trivia(cs[i+1])
position += span(cs[i+1])
i += 1
end
full_span = position - start_pos
last_trivia_span = full_span - inner_span

# Leaf node
k = kind(raw)
val_range = start_pos:(start_pos + inner_span - 1)
val = source[val_range]

if kind(raw) == K"nothing"
# First `nothing` token in file seems to require this. Why I don't know.
inner_span = full_span
end

# See CSTParser.literalmap. Which we can't use directly because we've
# customized Tokenize.jl :-(
cst_head = k === TzTokens.NOTHING ? :NOTHING :
# FIXME: Following probably need special handling
k === TzTokens.MACRO_NAME ? :IDENTIFIER :
k === TzTokens.CMD_MACRO_NAME ? :IDENTIFIER :
k === TzTokens.STRING_MACRO_NAME ? :IDENTIFIER :
k === TzTokens.DQUOTE ? :DQUOTE :
k === TzTokens.BACKTICK ? :BACKTICK :
is_operator(k) ? :OPERATOR :
is_identifier(k) ? :IDENTIFIER :
tokenkindtoheadmap(k)
# FIXME: STRING, TRIPLE_STRING, CMD, TRIPLE_CMD, need special handling:
# * STRING doesn't incude delimiters (DQUOTE tokens)
# * CMD doesn't include delimiters (BACKTICK tokens)
# * TRIPLE_STRING is a composite of STRING and TRIPLE_DQUOTE
# * TRIPLE_CMD is a composite of CMD and TRIPLE_BACKTICK
# They don't exist anymore as individual tokens

push!(is_trivia(raw) ? trivia : args,
CSTParser.EXPR(cst_head, nothing, nothing, full_span, inner_span, val,
nothing, nothing))
end
i += 1
end

if is_infix(raw_node)
args[1], args[2] = args[2], args[1]
# TODO: Other argument swizzling, as done in SyntaxNode -> Expr conversions
end

full_span = position - node_start
inner_span = full_span - last_trivia_span
k = kind(raw_node)
cst_head = k == K"toplevel" ? :file :
is_operator(k) ? popfirst!(trivia) :
Symbol(lowercase(string(kind(raw_node))))
x = CSTParser.EXPR(cst_head, args,
isempty(trivia) ? nothing : trivia,
full_span, inner_span, nothing, nothing, nothing)
for a in args
a.parent = x
end
for a in trivia
a.parent = x
end
return x
end


# Some steps of conversion to CSTParser.EXPR is most conveniently done on the
# raw ParseStream representation. In particular, CSTParser.EXPR attaches
# some types of trivia to the end of nontrivia or trivia tokens.
#
# This function reassociates trivia with nonterminal nodes to make converting
# to CSTParser.EXPR a *local* operation on green tree nodes.
function parse_for_cst(text)
stream = JuliaSyntax.ParseStream(text)

# Insert initial nothing node if necessary to anchor trailing whitespace.
if is_cst_trailing_trivia(peek(stream, skip_whitespace=false))
JuliaSyntax.bump_invisible(stream, K"nothing")
end
JuliaSyntax.parse(stream, rule=:toplevel)

# Fix up start of stream
ranges = stream.ranges
@assert kind(ranges[end]) == K"toplevel"
ranges[end] = let r = ranges[end]
TaggedRange(r.head, 1, r.last_token)
end

# Rearrange whitespace trivia tokens so that they're always *trailing*
# siblings of non-whitespace trivia tokens.
#
# This is required for later conversion to CSTParser.EXPR
tokens = stream.tokens
for (i,range) in enumerate(ranges)
first_token = range.first_token
while first_token < length(tokens) &&
is_cst_trailing_trivia(tokens[first_token])
first_token += 1
end
last_token = range.last_token
while last_token < length(tokens) &&
is_cst_trailing_trivia(tokens[last_token+1])
last_token += 1
end
ranges[i] = TaggedRange(head(range), first_token, last_token)
end

return JuliaSyntax.build_tree(JuliaSyntax.GreenNode, stream)
end

# CSTParser.EXPR equality; should be in CSTParser...
function Base.:(==)(x::CSTParser.EXPR, y::CSTParser.EXPR)
# Debugging hacks:
if x.head != y.head
@info "Trivia mismatch" x.head y.head
end
if x.trivia != y.trivia
@info "Trivia mismatch" x.trivia y.trivia
end
if x.fullspan != y.fullspan
@info "Fullspan mismatch" x y x.fullspan y.fullspan
end
if x.span != y.span
@info "Span mismatch" x y x.span y.span
end
if x.val != y.val
@info "Trivia mismatch" x.val y.val
end

return x.head == y.head &&
x.args == y.args &&
x.trivia == y.trivia &&
x.fullspan == y.fullspan &&
x.span == y.span &&
x.val == y.val &&
x.meta == y.meta
end

# Some things which work
#text = " 1 + 2 * 3 "
#text = "[ 1 ; 2 ;]"
#text = "for i=1:10\nx\ny\nend"
#text = "100.00"
text = """
function f(x,y)
s = 0
for i = 1:10
s += x - i^y
end
end
"""

# Some things which don't yet work
#
# Macro names
# text = "@A.asdf x y"
#
# Bracket nodes don't exist yet in JuliaSyntax
# text = "(a + b)"
#
# Strings have separate delimiters. Will need to put them back together.
# text = "\"str\""

source = SourceFile(text)

ex = parse_for_cst(text)
# show(stdout, MIME"text/plain"(), ex, text)

y = CSTParser.parse(text, true)
x = cst(source, ex)
x == y

0 comments on commit 9c21de0

Please sign in to comment.