Skip to content

Commit

Permalink
Add character APIs for locations
Browse files Browse the repository at this point in the history
  • Loading branch information
kddnewton committed Nov 20, 2023
1 parent 9fb276e commit b6f37ae
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 23 deletions.
2 changes: 1 addition & 1 deletion lib/prism/ffi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def parse_comments(code, **options)
loader = Serialize::Loader.new(source, buffer.read)

loader.load_header
loader.load_force_encoding
loader.load_encoding
loader.load_start_line
loader.load_comments
end
Expand Down
58 changes: 46 additions & 12 deletions lib/prism/parse_result.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,40 +25,50 @@ def initialize(source, start_line = 1, offsets = compute_offsets(source))

# Perform a byteslice on the source code using the given byte offset and
# byte length.
def slice(offset, length)
source.byteslice(offset, length)
def slice(byte_offset, length)
source.byteslice(byte_offset, length)
end

# Binary search through the offsets to find the line number for the given
# byte offset.
def line(value)
start_line + find_line(value)
def line(byte_offset)
start_line + find_line(byte_offset)
end

# Return the byte offset of the start of the line corresponding to the given
# byte offset.
def line_offset(value)
offsets[find_line(value)]
def line_start(byte_offset)
offsets[find_line(byte_offset)]
end

# Return the column number for the given byte offset.
def column(value)
value - offsets[find_line(value)]
def column(byte_offset)
byte_offset - line_start(byte_offset)
end

# Return the character offset for the given byte offset.
def character_offset(byte_offset)
source.byteslice(0, byte_offset).length
end

# Return the column number in characters for the given byte offset.
def character_column(byte_offset)
character_offset(byte_offset) - character_offset(line_start(byte_offset))
end

private

# Binary search through the offsets to find the line number for the given
# byte offset.
def find_line(value)
def find_line(byte_offset)
left = 0
right = offsets.length - 1

while left <= right
mid = left + (right - left) / 2
return mid if offsets[mid] == value
return mid if offsets[mid] == byte_offset

if offsets[mid] < value
if offsets[mid] < byte_offset
left = mid + 1
else
right = mid - 1
Expand Down Expand Up @@ -121,19 +131,31 @@ def slice
source.slice(start_offset, length)
end

# The character offset from the beginning of the source where this location
# starts.
def start_character_offset
source.character_offset(start_offset)
end

# The byte offset from the beginning of the source where this location ends.
def end_offset
start_offset + length
end

# The character offset from the beginning of the source where this location
# ends.
def end_character_offset
source.character_offset(end_offset)
end

# The line number where this location starts.
def start_line
source.line(start_offset)
end

# The content of the line where this location starts before this location.
def start_line_slice
offset = source.line_offset(start_offset)
offset = source.line_start(start_offset)
source.slice(offset, start_offset - offset)
end

Expand All @@ -148,12 +170,24 @@ def start_column
source.column(start_offset)
end

# The column number in characters where this location ends from the start of
# the line.
def start_character_column
source.character_column(start_offset)
end

# The column number in bytes where this location ends from the start of the
# line.
def end_column
source.column(end_offset)
end

# The column number in characters where this location ends from the start of
# the line.
def end_character_column
source.character_column(end_offset)
end

# Implement the hash pattern matching interface for Location.
def deconstruct_keys(keys)
{ start_offset: start_offset, end_offset: end_offset }
Expand Down
14 changes: 4 additions & 10 deletions templates/lib/prism/serialize.rb.erb
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,9 @@ module Prism
end

def load_encoding
Encoding.find(io.read(load_varint))
end

def load_force_encoding
@encoding = load_encoding
@encoding = Encoding.find(io.read(load_varint))
@input = input.force_encoding(@encoding).freeze
@encoding
end

def load_start_line
Expand Down Expand Up @@ -121,18 +118,15 @@ module Prism
encoding = load_encoding
load_start_line
comments, magic_comments, errors, warnings = load_metadata

if encoding != @encoding
tokens.each { |token,| token.value.force_encoding(encoding) }
end
tokens.each { |token,| token.value.force_encoding(encoding) }

raise "Expected to consume all bytes while deserializing" unless @io.eof?
Prism::ParseResult.new(tokens, comments, magic_comments, errors, warnings, @source)
end

def load_nodes
load_header
load_force_encoding
load_encoding
load_start_line

comments, magic_comments, errors, warnings = load_metadata
Expand Down
32 changes: 32 additions & 0 deletions test/prism/ruby_api_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,38 @@ def test_location_join
end
end

def test_location_character_offsets
program = Prism.parse("πŸ˜€ + πŸ˜€\n😍 ||= 😍").value

# first πŸ˜€
location = program.statements.body.first.receiver.location
assert_equal 0, location.start_character_offset
assert_equal 1, location.end_character_offset
assert_equal 0, location.start_character_column
assert_equal 1, location.end_character_column

# second πŸ˜€
location = program.statements.body.first.arguments.arguments.first.location
assert_equal 4, location.start_character_offset
assert_equal 5, location.end_character_offset
assert_equal 4, location.start_character_column
assert_equal 5, location.end_character_column

# first 😍
location = program.statements.body.last.name_loc
assert_equal 6, location.start_character_offset
assert_equal 7, location.end_character_offset
assert_equal 0, location.start_character_column
assert_equal 1, location.end_character_column

# second 😍
location = program.statements.body.last.value.location
assert_equal 12, location.start_character_offset
assert_equal 13, location.end_character_offset
assert_equal 6, location.start_character_column
assert_equal 7, location.end_character_column
end

private

def parse_expression(source)
Expand Down

0 comments on commit b6f37ae

Please sign in to comment.