Skip to content

Commit

Permalink
get rid of the startpos field
Browse files Browse the repository at this point in the history
this makes untokenization slightly more annoying
  • Loading branch information
KristofferC committed Aug 18, 2022
1 parent e40e911 commit 52bc042
Show file tree
Hide file tree
Showing 2 changed files with 231 additions and 193 deletions.
36 changes: 6 additions & 30 deletions src/tokenize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,32 +25,23 @@ TOKEN_ERROR_DESCRIPTION = Dict{Kind, String}(
struct Token
kind::Kind
# Offsets into a string or buffer
startbyte::Int # The byte where the token start in the buffer
endbyte::Int # The byte where the token ended in the buffer
dotop::Bool
suffix::Bool
end
function Token(kind::Kind, startbyte::Int, endbyte::Int)
Token(kind, startbyte, endbyte, false, false)
function Token(kind::Kind, endbyte::Int)
Token(kind, endbyte, false, false)
end
Token() = Token(K"error", 0, 0, false, false)
Token() = Token(K"error", 0, false, false)

const EMPTY_TOKEN = Token()

kind(t::Token) = t.kind

startbyte(t::Token) = t.startbyte
endbyte(t::Token) = t.endbyte


function untokenize(t::Token, str::String)
String(codeunits(str)[1 .+ (t.startbyte:t.endbyte)])
end

function Base.show(io::IO, t::Token)
print(io, rpad(string(startbyte(t), "-", endbyte(t)), 11, " "))
print(io, rpad(kind(t), 15, " "))
end

#-------------------------------------------------------------------------------
# Lexer
Expand All @@ -77,9 +68,7 @@ Ideally a lexer is stateless but some state is needed here for:
"""
mutable struct Lexer{IO_t <: IO}
io::IO_t

token_startpos::Int

last_token::Kind
string_states::Vector{StringState}
chars::Tuple{Char,Char,Char,Char}
Expand Down Expand Up @@ -156,13 +145,6 @@ Return the latest `Token`'s starting position.
"""
startpos(l::Lexer) = l.token_startpos

"""
startpos!(l::Lexer, i::Integer)
Set a new starting position.
"""
startpos!(l::Lexer, i::Integer) = l.token_startpos = i

"""
peekchar(l::Lexer)
Expand All @@ -171,14 +153,14 @@ Returns the next character without changing the lexer's state.
peekchar(l::Lexer) = l.chars[2]

"""
dpeekchar(l::Lexer)
dpeekchar(l::Lexer)
Returns the next two characters without changing the lexer's state.
"""
dpeekchar(l::Lexer) = l.chars[2], l.chars[3]

"""
peekchar3(l::Lexer)
peekchar3(l::Lexer)
Returns the next three characters without changing the lexer's state.
"""
Expand All @@ -198,8 +180,6 @@ Determine whether the end of the lexer's underlying buffer has been reached.
"""
Base.eof(l::Lexer) = eof(l.io)

Base.seek(l::Lexer, pos) = seek(l.io, pos)

"""
start_token!(l::Lexer)
Expand All @@ -215,9 +195,6 @@ end
Returns the next character and increments the current position.
"""
function readchar end


function readchar(l::Lexer)
c = readchar(l.io)
l.chars = (l.chars[2], l.chars[3], l.chars[4], c)
Expand Down Expand Up @@ -271,8 +248,7 @@ function emit(l::Lexer, kind::Kind)
suffix = true
end
end

tok = Token(kind, startpos(l), position(l) - 1, l.dotop, suffix)
tok = Token(kind, position(l) - 1, l.dotop, suffix)

l.dotop = false
l.last_token = kind
Expand Down
Loading

0 comments on commit 52bc042

Please sign in to comment.