Skip to content

Commit

Permalink
Overhaul unicode parsing
Browse files Browse the repository at this point in the history
It now instead iterates the actual unicode code points. This is better
than what it was previously doing but it is still not entirely correct
w.r.t to unicode sequences.

This handling of unicode code points does however make it slightly
easier to handle UTF-16 if needed in the future.

This also adds some long needed tests for buffer methods.
  • Loading branch information
abbec committed Aug 28, 2024
1 parent 991283f commit 121fe22
Show file tree
Hide file tree
Showing 17 changed files with 761 additions and 662 deletions.
473 changes: 288 additions & 185 deletions src/dged/buffer.c

Large diffs are not rendered by default.

44 changes: 30 additions & 14 deletions src/dged/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,13 +295,13 @@ struct location buffer_end(struct buffer *buffer);
uint32_t buffer_num_lines(struct buffer *buffer);

/**
* Get the number of chars in a given line in buffer.
* Get the line length in number of column positions.
*
* @param [in] buffer The buffer to use.
* @param [in] line The line to get number of chars for.
* @returns The number of chars in @ref line.
* @param [in] line The line to get number of columns for.
* @returns The number of column positions in the current line.
*/
uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line);
uint32_t buffer_line_length(struct buffer *buffer, uint32_t line);

/**
* Insert a newline in the buffer.
Expand Down Expand Up @@ -555,6 +555,13 @@ uint32_t buffer_add_reload_hook(struct buffer *buffer, reload_hook_cb callback,
void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id,
remove_hook_cb callback);

struct edit_location {
struct region coordinates;
struct region bytes;
uint64_t global_byte_begin;
uint64_t global_byte_end;
};

/**
* Buffer insert hook callback function.
*
Expand All @@ -565,9 +572,8 @@ void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id,
* @param end_idx The global byte offset to the end of where text was inserted.
* @param userdata The userdata as sent in to @ref buffer_add_insert_hook.
*/
typedef void (*insert_hook_cb)(struct buffer *buffer, struct region inserted,
uint32_t begin_idx, uint32_t end_idx,
void *userdata);
typedef void (*insert_hook_cb)(struct buffer *buffer,
struct edit_location inserted, void *userdata);

/**
* Add an insert hook, called when text is inserted into the @p buffer.
Expand Down Expand Up @@ -600,9 +606,8 @@ void buffer_remove_insert_hook(struct buffer *buffer, uint32_t hook_id,
* @param end_idx The global byte offset to the end of the removed text.
* @param userdata The userdata as sent in to @ref buffer_add_delete_hook.
*/
typedef void (*delete_hook_cb)(struct buffer *buffer, struct region removed,
uint32_t begin_idx, uint32_t end_idx,
void *userdata);
typedef void (*delete_hook_cb)(struct buffer *buffer,
struct edit_location removed, void *userdata);

/**
* Add a delete hook, called when text is removed from the @p buffer.
Expand Down Expand Up @@ -724,10 +729,6 @@ void buffer_update(struct buffer *buffer, struct buffer_update_params *params);
*/
void buffer_render(struct buffer *buffer, struct buffer_render_params *params);

// TODO: move this to where it makes sense
uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col,
uint32_t end_col);

/**
* Sort lines in a buffer alphabetically.
*
Expand All @@ -738,4 +739,19 @@ uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col,
void buffer_sort_lines(struct buffer *buffer, uint32_t start_line,
uint32_t end_line);

struct location buffer_location_to_byte_coords(struct buffer *buffer,
struct location coords);

struct match_result {
struct location at;
bool found;
};

struct match_result
buffer_find_prev_in_line(struct buffer *buffer, struct location start,
bool (*predicate)(const struct codepoint *c));
struct match_result
buffer_find_next_in_line(struct buffer *buffer, struct location start,
bool (*predicate)(const struct codepoint *c));

#endif
40 changes: 17 additions & 23 deletions src/dged/buffer_view.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ void buffer_view_backward_nlines(struct buffer_view *view, uint32_t nlines) {
}

void buffer_view_goto_end_of_line(struct buffer_view *view) {
view->dot.col = buffer_num_chars(view->buffer, view->dot.line);
view->dot.col = buffer_line_length(view->buffer, view->dot.line);
}

void buffer_view_goto_beginning_of_line(struct buffer_view *view) {
Expand Down Expand Up @@ -224,15 +224,22 @@ void buffer_view_delete_word(struct buffer_view *view) {
}

void buffer_view_kill_line(struct buffer_view *view) {
uint32_t nchars =
buffer_num_chars(view->buffer, view->dot.line) - view->dot.col;
if (nchars == 0) {
nchars = 1;
uint32_t ncols =
buffer_line_length(view->buffer, view->dot.line) - view->dot.col;

uint32_t line = view->dot.line;
uint32_t col = view->dot.col + ncols;

// kill the newline if we are at the end of the line
if (ncols == 0) {
struct location loc = buffer_next_char(view->buffer, view->dot);
line = loc.line;
col = loc.col;
}

struct region reg = region_new(view->dot, (struct location){
.line = view->dot.line,
.col = view->dot.col + nchars,
.line = line,
.col = col,
});

buffer_cut(view->buffer, reg);
Expand All @@ -241,7 +248,8 @@ void buffer_view_kill_line(struct buffer_view *view) {
void buffer_view_sort_lines(struct buffer_view *view) {
struct region reg = region_new(view->dot, view->mark);
if (view->mark_set && region_has_size(reg)) {
if (reg.end.line > 0 && buffer_num_chars(view->buffer, reg.end.line) == 0) {
if (reg.end.line > 0 &&
buffer_line_length(view->buffer, reg.end.line) == 0) {
reg.end.line -= 1;
}

Expand Down Expand Up @@ -271,21 +279,7 @@ struct location buffer_view_dot_to_relative(struct buffer_view *view) {
}

struct location buffer_view_dot_to_visual(struct buffer_view *view) {
// calculate visual column index for dot column
struct text_chunk c = buffer_line(view->buffer, view->dot.line);
uint32_t width = visual_string_width(c.text, c.nbytes, 0, view->dot.col);
if (view->scroll.col > 0) {
width -= visual_string_width(c.text, c.nbytes, 0, view->scroll.col);
}

struct location l = buffer_view_dot_to_relative(view);
l.col = width + view->fringe_width;

if (c.allocated) {
free(c.text);
}

return l;
return buffer_view_dot_to_relative(view);
}

void buffer_view_undo(struct buffer_view *view) {
Expand Down
33 changes: 12 additions & 21 deletions src/dged/display.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct push_fmt_cmd {
struct repeat_cmd {
uint32_t col;
uint32_t row;
int32_t c;
uint32_t c;
uint32_t nrepeat;
};

Expand Down Expand Up @@ -135,21 +135,7 @@ void display_destroy(struct display *display) {
uint32_t display_width(struct display *display) { return display->width; }
uint32_t display_height(struct display *display) { return display->height; }

void putch(uint8_t c) {
// TODO: move this to buffer rendering
if (c < ' ') {
fprintf(stdout, "^%c", c + 0x40);
} else if (c == 0x7f) {
fprintf(stdout, "^?");
} else if (utf8_byte_is_unicode_start(c) ||
utf8_byte_is_unicode_continuation(c)) {
putc(c, stdout);
} else if (c >= ' ' && c < 0x7f) {
putc(c, stdout);
} else {
fprintf(stdout, "|0x%02x|", c);
}
}
void putch(uint8_t c) { putc(c, stdout); }

static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) {
if (fmt_stack == NULL || fmt_stack_len == 0) {
Expand All @@ -164,6 +150,7 @@ static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) {

void putch_ws(uint8_t c, bool show_whitespace, uint8_t *fmt_stack,
uint32_t fmt_stack_len) {
// TODO: tab width needs to be sent here
if (show_whitespace && c == '\t') {
fputs("\x1b[90m → \x1b[39m", stdout);
apply_fmt(fmt_stack, fmt_stack_len);
Expand Down Expand Up @@ -295,7 +282,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col,
}

void command_list_draw_repeated(struct command_list *list, uint32_t col,
uint32_t row, int32_t c, uint32_t nrepeat) {
uint32_t row, uint32_t c, uint32_t nrepeat) {
struct repeat_cmd *cmd = add_command(list, RenderCommand_Repeat)->repeat;
cmd->col = col;
cmd->row = row;
Expand Down Expand Up @@ -401,10 +388,14 @@ void display_render(struct display *display,
display_move_cursor(display, repeat_cmd->row + cl->yoffset,
repeat_cmd->col + cl->xoffset);
apply_fmt(fmt_stack, fmt_stack_len);
uint32_t nbytes = utf8_nbytes((uint8_t *)&repeat_cmd->c, 4, 1);
for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) {
putbytes((uint8_t *)&repeat_cmd->c, nbytes, show_whitespace_state,
fmt_stack, fmt_stack_len);
struct utf8_codepoint_iterator iter =
create_utf8_codepoint_iterator((uint8_t *)&repeat_cmd->c, 4, 0);
struct codepoint *codepoint = utf8_next_codepoint(&iter);
if (codepoint != NULL) {
for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) {
putbytes((uint8_t *)&repeat_cmd->c, codepoint->nbytes,
show_whitespace_state, fmt_stack, fmt_stack_len);
}
}
break;
}
Expand Down
2 changes: 1 addition & 1 deletion src/dged/display.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col,
* @param nrepeat Number of times to repeat byte.
*/
void command_list_draw_repeated(struct command_list *list, uint32_t col,
uint32_t row, int32_t c, uint32_t nrepeat);
uint32_t row, uint32_t c, uint32_t nrepeat);

void command_list_draw_command_list(struct command_list *list,
struct command_list *to_draw);
32 changes: 18 additions & 14 deletions src/dged/keyboard.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,24 @@ void parse_keys(uint8_t *bytes, uint32_t nbytes, struct key *out_keys,
} else if (utf8_byte_is_unicode_continuation(b)) {
// do nothing for these
} else { // ascii char or unicode start byte (self-inserting)
uint32_t nb = utf8_byte_is_unicode_start(b)
? utf8_nbytes(bytes + bytei, nbytes - bytei, 1)
: 1;

// "compress" number of keys if previous key was also a
// "simple" key
if (prev_kp != NULL && prev_kp->mod == None) {
prev_kp->end += nb;
} else {
kp->mod = None;
kp->key = b;
kp->start = bytei;
kp->end = bytei + nb;
++nkps;
// TODO: do this better
struct utf8_codepoint_iterator iter =
create_utf8_codepoint_iterator(bytes + bytei, nbytes - bytei, 0);
struct codepoint *codepoint = utf8_next_codepoint(&iter);
if (codepoint != NULL) {
uint32_t nb = codepoint->nbytes;

// "compress" number of keys if previous key was also a
// "simple" key
if (prev_kp != NULL && prev_kp->mod == None) {
prev_kp->end += nb;
} else {
kp->mod = None;
kp->key = b;
kp->start = bytei;
kp->end = bytei + nb;
++nkps;
}
}
}
}
Expand Down
68 changes: 29 additions & 39 deletions src/dged/syntax.c
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,8 @@ static void update_parser(struct buffer *buffer, void *userdata,
: origin.line + height;
ts_query_cursor_set_point_range(
cursor, (TSPoint){.row = origin.line, .column = origin.col},
(TSPoint){.row = end_line, .column = buffer_num_chars(buffer, end_line)});
(TSPoint){.row = end_line,
.column = buffer_line_length(buffer, end_line)});
ts_query_cursor_exec(cursor, h->query, ts_tree_root_node(h->tree));

TSQueryMatch match;
Expand Down Expand Up @@ -406,47 +407,39 @@ static void update_parser(struct buffer *buffer, void *userdata,
continue;
}

buffer_add_text_property(
buffer,
(struct location){.line = start.row,
.col = text_byteindex_to_col(
buffer->text, start.row, start.column)},
(struct location){.line = end.row,
.col = text_byteindex_to_col(buffer->text, end.row,
end.column - 1)},
(struct text_property){
.type = TextProperty_Colors,
.colors =
(struct text_property_colors){
.set_fg = true,
.fg = color,
},
});
text_add_property(buffer->text, start.row, start.column, end.row,
end.column > 0 ? end.column - 1 : 0,
(struct text_property){
.type = TextProperty_Colors,
.colors =
(struct text_property_colors){
.set_fg = true,
.fg = color,
},
});
}
}

ts_query_cursor_delete(cursor);
}

static void text_removed(struct buffer *buffer, struct region removed,
uint32_t begin_idx, uint32_t end_idx, void *userdata) {
static void text_removed(struct buffer *buffer, struct edit_location removed,
void *userdata) {
struct highlight *h = (struct highlight *)userdata;

TSPoint begin = {.row = removed.begin.line,
.column = text_col_to_byteindex(
buffer->text, removed.begin.line, removed.begin.col)};
TSPoint begin = {.row = removed.bytes.begin.line,
.column = removed.bytes.begin.col};
TSPoint new_end = begin;
TSPoint old_end = {.row = removed.end.line,
.column = text_col_to_byteindex(
buffer->text, removed.end.line, removed.end.col)};
TSPoint old_end = {.row = removed.bytes.end.line,
.column = removed.bytes.end.col};

TSInputEdit edit = {
.start_point = begin,
.old_end_point = old_end,
.new_end_point = new_end,
.start_byte = begin_idx,
.old_end_byte = end_idx,
.new_end_byte = begin_idx,
.start_byte = removed.global_byte_begin,
.old_end_byte = removed.global_byte_end,
.new_end_byte = removed.global_byte_begin,
};

ts_tree_edit(h->tree, &edit);
Expand Down Expand Up @@ -479,27 +472,24 @@ static void buffer_reloaded(struct buffer *buffer, void *userdata) {
}
}

static void text_inserted(struct buffer *buffer, struct region inserted,
uint32_t begin_idx, uint32_t end_idx,
static void text_inserted(struct buffer *buffer, struct edit_location inserted,
void *userdata) {
struct timer *text_inserted = timer_start("syntax.txt-inserted");
struct highlight *h = (struct highlight *)userdata;

TSPoint begin = {.row = inserted.begin.line,
.column = text_col_to_byteindex(
buffer->text, inserted.begin.line, inserted.begin.col)};
TSPoint begin = {.row = inserted.bytes.begin.line,
.column = inserted.bytes.begin.col};
TSPoint old_end = begin;
TSPoint new_end = {.row = inserted.end.line,
.column = text_col_to_byteindex(
buffer->text, inserted.end.line, inserted.end.col)};
TSPoint new_end = {.row = inserted.bytes.end.line,
.column = inserted.bytes.end.col};

TSInputEdit edit = {
.start_point = begin,
.old_end_point = old_end,
.new_end_point = new_end,
.start_byte = begin_idx,
.old_end_byte = begin_idx,
.new_end_byte = end_idx,
.start_byte = inserted.global_byte_begin,
.old_end_byte = inserted.global_byte_begin,
.new_end_byte = inserted.global_byte_end,
};

ts_tree_edit(h->tree, &edit);
Expand Down
Loading

0 comments on commit 121fe22

Please sign in to comment.