From dec5963c26dd17d8ac99180eda0e07bfe6508bd6 Mon Sep 17 00:00:00 2001 From: Albert Cervin Date: Fri, 23 Aug 2024 17:07:27 +0200 Subject: [PATCH] WIP explod unicode world --- src/dged/buffer.c | 439 +++++++++++++++++++++++++---------------- src/dged/buffer.h | 44 +++-- src/dged/buffer_view.c | 31 +-- src/dged/display.c | 33 ++-- src/dged/display.h | 2 +- src/dged/keyboard.c | 32 +-- src/dged/minibuffer.c | 5 +- src/dged/syntax.c | 68 +++---- src/dged/text.c | 233 +++++++--------------- src/dged/text.h | 54 +++-- src/dged/utf8.c | 152 +++++++++----- src/dged/utf8.h | 28 ++- src/main/cmds.c | 2 +- src/main/completion.c | 108 +++++----- 14 files changed, 637 insertions(+), 594 deletions(-) diff --git a/src/dged/buffer.c b/src/dged/buffer.c index 6051f69..1d92326 100644 --- a/src/dged/buffer.c +++ b/src/dged/buffer.c @@ -157,6 +157,42 @@ void buffer_static_teardown() { } } +static uint32_t get_tab_width(struct buffer *buffer) { + struct setting *tw = lang_setting(&buffer->lang, "tab-width"); + if (tw == NULL) { + tw = settings_get("editor.tab-width"); + } + + uint32_t tab_width = 4; + if (tw != NULL && tw->value.type == Setting_Number) { + tab_width = tw->value.number_value; + } + return tab_width; +} + +static bool use_tabs(struct buffer *buffer) { + struct setting *ut = lang_setting(&buffer->lang, "use-tabs"); + if (ut == NULL) { + ut = settings_get("editor.use-tabs"); + } + + bool use_tabs = false; + if (ut != NULL && ut->value.type == Setting_Bool) { + use_tabs = ut->value.bool_value; + } + + return use_tabs; +} + +static uint32_t visual_char_width(struct codepoint *codepoint, + uint32_t tab_width) { + if (codepoint->codepoint == '\t') { + return tab_width; + } else { + return unicode_visual_char_width(codepoint); + } +} + static struct buffer create_internal(const char *name, char *filename) { struct buffer b = (struct buffer){ .filename = filename, @@ -185,7 +221,7 @@ static struct buffer create_internal(const char *name, char *filename) { static void strip_final_newline(struct buffer *b) { uint32_t nlines = text_num_lines(b->text); - if (nlines > 0 && text_line_length(b->text, nlines - 1) == 0) { + if (nlines > 0 && buffer_line_length(b, nlines - 1) == 0) { text_delete(b->text, nlines - 1, 0, nlines - 1, 1); } } @@ -207,7 +243,7 @@ static void buffer_read_from_file(struct buffer *b) { int bytes = fread(buff, 1, 4096, file); if (bytes > 0) { uint32_t ignore; - text_append(b->text, buff, bytes, &ignore, &ignore); + text_append(b->text, buff, bytes, &ignore); } else if (bytes == 0) { break; // EOF } else { @@ -239,71 +275,66 @@ static void write_line(struct text_chunk *chunk, void *userdata) { fputc('\n', file); } -static bool is_word_break(uint8_t c) { +static bool is_word_break(const struct codepoint *codepoint) { + uint32_t c = codepoint->codepoint; return c == ' ' || c == '.' || c == '(' || c == ')' || c == '[' || c == ']' || - c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':'; + c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':' || + c == '"'; } -static bool is_word_char(uint8_t c) { return !is_word_break(c); } - -struct match_result { - struct location at; - bool found; -}; - -static struct match_result find_next_in_line(struct buffer *buffer, - struct location start, - bool (*predicate)(uint8_t c)) { - struct text_chunk line = text_get_line(buffer->text, start.line); - bool found = false; +static bool is_word_char(const struct codepoint *c) { + return !is_word_break(c); +} - if (line.nbytes == 0) { +static struct match_result +find_next_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)) { + if (text_line_size(buffer->text, start.line) == 0) { return (struct match_result){.at = start, .found = false}; } - uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col); - while (bytei < line.nbytes) { - if (predicate(line.text[bytei])) { + bool found = false; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, start.line); + uint32_t coli = 0, tab_width = get_tab_width(buffer); + struct codepoint *codepoint; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL) { + if (coli >= start.col && predicate(codepoint)) { found = true; break; } - ++bytei; + + coli += visual_char_width(codepoint, tab_width); } - uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei); return (struct match_result){ - .at = (struct location){.line = start.line, .col = target_col}, - .found = found}; + .at = (struct location){.line = start.line, .col = coli}, .found = found}; } -static struct match_result find_prev_in_line(struct buffer *buffer, - struct location start, - bool (*predicate)(uint8_t c)) { - struct text_chunk line = text_get_line(buffer->text, start.line); - bool found = false; +static struct match_result +find_prev_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)) { - if (line.nbytes == 0) { + if (text_line_size(buffer->text, start.line) == 0) { return (struct match_result){.at = start, .found = false}; } - uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col); - while (bytei > 0) { - if (predicate(line.text[bytei])) { + bool found = false; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, start.line); + uint32_t coli = 0, tab_width = get_tab_width(buffer); + struct codepoint *codepoint; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL && coli < start.col) { + if (predicate(codepoint)) { found = true; break; } - --bytei; - } - // first byte on line can also be a match - if (predicate(line.text[bytei])) { - found = true; + coli += visual_char_width(codepoint, tab_width); } - uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei); return (struct match_result){ - .at = (struct location){.line = start.line, .col = target_col}, - .found = found}; + .at = (struct location){.line = start.line, .col = coli}, .found = found}; } static struct text_chunk *copy_region(struct buffer *buffer, @@ -315,13 +346,52 @@ static struct text_chunk *copy_region(struct buffer *buffer, free(curr->text); } + struct location begin_bytes = + buffer_location_to_byte_coords(buffer, region.begin); + struct location end_bytes = + buffer_location_to_byte_coords(buffer, region.end); + struct text_chunk txt = - text_get_region(buffer->text, region.begin.line, region.begin.col, - region.end.line, region.end.col); + text_get_region(buffer->text, begin_bytes.line, begin_bytes.col, + end_bytes.line, end_bytes.col); *curr = txt; return curr; } +static struct location do_indent(struct buffer *buffer, struct location at, + uint32_t tab_width, bool use_tabs) { + if (use_tabs) { + return buffer_add(buffer, at, (uint8_t *)"\t", 1); + } else { + return buffer_add(buffer, at, (uint8_t *)" ", + tab_width > 16 ? 16 : tab_width); + } +} + +static uint64_t to_global_offset(struct buffer *buffer, + struct location bytecoords) { + uint32_t line = bytecoords.line; + uint32_t col = bytecoords.col; + uint32_t byteoff = 0; + uint32_t nlines = buffer_num_lines(buffer); + + if (nlines == 0) { + return 0; + } + + for (uint32_t l = 0; l < line && l < nlines; ++l) { + // +1 for newline + byteoff += text_line_size(buffer->text, l) + 1; + } + + // handle last line + uint32_t l = line < nlines ? line : nlines - 1; + uint32_t nbytes = text_line_size(buffer->text, l); + byteoff += col <= nbytes ? col : nbytes + 1; + + return byteoff; +} + /* --------------------- buffer methods -------------------- */ struct buffer buffer_create(const char *name) { @@ -452,18 +522,29 @@ struct location buffer_add(struct buffer *buffer, struct location at, struct location initial = at; struct location final = at; - uint32_t lines_added, cols_added; - text_insert_at(buffer->text, initial.line, initial.col, text, nbytes, - &lines_added, &cols_added); + struct location at_bytes = buffer_location_to_byte_coords(buffer, at); + + uint32_t lines_added; + text_insert_at(buffer->text, at_bytes.line, at_bytes.col, text, nbytes, + &lines_added); // move to after inserted text if (lines_added > 0) { final = buffer_clamp(buffer, (int64_t)at.line + lines_added, 0); } else { + uint32_t cols_added = 0, tab_width = get_tab_width(buffer); + struct utf8_codepoint_iterator iter = + create_utf8_codepoint_iterator(text, nbytes, 0); + struct codepoint *codepoint; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL) { + cols_added += visual_char_width(codepoint, tab_width); + } final = buffer_clamp(buffer, (int64_t)at.line, (int64_t)at.col + cols_added); } + struct location final_bytes = buffer_location_to_byte_coords(buffer, final); + undo_push_add( &buffer->undo, (struct undo_add){.begin = {.row = initial.line, .col = initial.col}, @@ -474,11 +555,17 @@ struct location buffer_add(struct buffer *buffer, struct location at, (struct undo_boundary){.save_point = false}); } - uint32_t begin_idx = text_global_idx(buffer->text, initial.line, initial.col); - uint32_t end_idx = text_global_idx(buffer->text, final.line, final.col); + uint32_t begin_idx = to_global_offset(buffer, at_bytes); + uint32_t end_idx = to_global_offset(buffer, final_bytes); VEC_FOR_EACH(&buffer->hooks->insert_hooks, struct insert_hook * h) { - h->callback(buffer, region_new(initial, final), begin_idx, end_idx, + h->callback(buffer, + (struct edit_location){ + .coordinates = region_new(initial, final), + .bytes = region_new(at_bytes, final_bytes), + .global_byte_begin = begin_idx, + .global_byte_end = end_idx, + }, h->userdata); } @@ -488,15 +575,16 @@ struct location buffer_add(struct buffer *buffer, struct location at, struct location buffer_set_text(struct buffer *buffer, uint8_t *text, uint32_t nbytes) { - uint32_t lines, cols; + uint32_t lines_added; text_clear(buffer->text); - text_append(buffer->text, text, nbytes, &lines, &cols); + text_append(buffer->text, text, nbytes, &lines_added); // if last line is empty, remove it strip_final_newline(buffer); - return buffer_clamp(buffer, lines, cols); + return buffer_clamp(buffer, lines_added, + buffer_line_length(buffer, lines_added)); } void buffer_clear(struct buffer *buffer) { text_clear(buffer->text); } @@ -524,9 +612,17 @@ struct location buffer_previous_char(struct buffer *buffer, } --dot.line; - dot.col = buffer_num_chars(buffer, dot.line); + dot.col = buffer_line_length(buffer, dot.line); } else { - --dot.col; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, dot.line); + struct codepoint *codepoint; + uint32_t coli = 0; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL && coli < dot.col) { + coli += visual_char_width(codepoint, get_tab_width(buffer)); + } + + dot.col = coli; } return dot; @@ -571,14 +667,14 @@ struct location buffer_previous_line(struct buffer *buffer, } --dot.line; - uint32_t nchars = buffer_num_chars(buffer, dot.line); + uint32_t nchars = buffer_line_length(buffer, dot.line); uint32_t new_col = dot.col > nchars ? nchars : dot.col; return dot; } struct location buffer_next_char(struct buffer *buffer, struct location dot) { - if (dot.col == buffer_num_chars(buffer, dot.line)) { + if (dot.col == buffer_line_length(buffer, dot.line)) { uint32_t lastline = buffer->lazy_row_add ? buffer_num_lines(buffer) : buffer_num_lines(buffer) - 1; if (dot.line == lastline) { @@ -588,7 +684,16 @@ struct location buffer_next_char(struct buffer *buffer, struct location dot) { dot.col = 0; ++dot.line; } else { - ++dot.col; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, dot.line); + struct codepoint *codepoint; + uint32_t coli = 0; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL && + coli <= dot.col) { + coli += visual_char_width(codepoint, get_tab_width(buffer)); + } + + dot.col = coli; } return dot; @@ -635,7 +740,7 @@ struct location buffer_next_line(struct buffer *buffer, struct location dot) { ++dot.line; uint32_t new_col = dot.col; - uint32_t nchars = buffer_num_chars(buffer, dot.line); + uint32_t nchars = buffer_line_length(buffer, dot.line); new_col = new_col > nchars ? nchars : new_col; return dot; @@ -664,8 +769,8 @@ struct location buffer_clamp(struct buffer *buffer, int64_t line, int64_t col) { // clamp col if (col < 0) { col = 0; - } else if (col > buffer_num_chars(buffer, line)) { - col = buffer_num_chars(buffer, line); + } else if (col > buffer_line_length(buffer, line)) { + col = buffer_line_length(buffer, line); } location.col = col; @@ -681,7 +786,7 @@ struct location buffer_end(struct buffer *buffer) { return (struct location){.line = nlines, .col = 0}; } else { return (struct location){.line = nlines - 1, - .col = buffer_num_chars(buffer, nlines - 1)}; + .col = buffer_line_length(buffer, nlines - 1)}; } } @@ -689,55 +794,22 @@ uint32_t buffer_num_lines(struct buffer *buffer) { return text_num_lines(buffer->text); } -uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line) { - if (line >= buffer_num_lines(buffer)) { - return 0; +uint32_t buffer_line_length(struct buffer *buffer, uint32_t line) { + uint32_t tab_size = get_tab_width(buffer), len = 0; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, line); + struct codepoint *codepoint; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL) { + len += visual_char_width(codepoint, tab_size); } - return text_line_length(buffer->text, line); + return len; } struct location buffer_newline(struct buffer *buffer, struct location at) { return buffer_add(buffer, at, (uint8_t *)"\n", 1); } -static uint32_t get_tab_width(struct buffer *buffer) { - struct setting *tw = lang_setting(&buffer->lang, "tab-width"); - if (tw == NULL) { - tw = settings_get("editor.tab-width"); - } - - uint32_t tab_width = 4; - if (tw != NULL && tw->value.type == Setting_Number) { - tab_width = tw->value.number_value; - } - return tab_width; -} - -static bool use_tabs(struct buffer *buffer) { - struct setting *ut = lang_setting(&buffer->lang, "use-tabs"); - if (ut == NULL) { - ut = settings_get("editor.use-tabs"); - } - - bool use_tabs = false; - if (ut != NULL && ut->value.type == Setting_Bool) { - use_tabs = ut->value.bool_value; - } - - return use_tabs; -} - -static struct location do_indent(struct buffer *buffer, struct location at, - uint32_t tab_width, bool use_tabs) { - if (use_tabs) { - return buffer_add(buffer, at, (uint8_t *)"\t", 1); - } else { - return buffer_add(buffer, at, (uint8_t *)" ", - tab_width > 16 ? 16 : tab_width); - } -} - struct location buffer_indent(struct buffer *buffer, struct location at) { return do_indent(buffer, at, get_tab_width(buffer), use_tabs(buffer)); } @@ -888,9 +960,14 @@ struct location buffer_delete(struct buffer *buffer, struct region region) { return region.begin; } + struct location begin_bytes = + buffer_location_to_byte_coords(buffer, region.begin); + struct location end_bytes = + buffer_location_to_byte_coords(buffer, region.end); + struct text_chunk txt = - text_get_region(buffer->text, region.begin.line, region.begin.col, - region.end.line, region.end.col); + text_get_region(buffer->text, begin_bytes.line, begin_bytes.col, + end_bytes.line, end_bytes.col); undo_push_boundary(&buffer->undo, (struct undo_boundary){.save_point = false}); @@ -903,17 +980,22 @@ struct location buffer_delete(struct buffer *buffer, struct region region) { undo_push_boundary(&buffer->undo, (struct undo_boundary){.save_point = false}); - uint32_t begin_idx = - text_global_idx(buffer->text, region.begin.line, region.begin.col); - uint32_t end_idx = - text_global_idx(buffer->text, region.end.line, region.end.col); + uint64_t begin_idx = to_global_offset(buffer, begin_bytes); + uint64_t end_idx = to_global_offset(buffer, end_bytes); - text_delete(buffer->text, region.begin.line, region.begin.col, - region.end.line, region.end.col); + text_delete(buffer->text, begin_bytes.line, begin_bytes.col, end_bytes.line, + end_bytes.col); buffer->modified = true; VEC_FOR_EACH(&buffer->hooks->delete_hooks, struct delete_hook * h) { - h->callback(buffer, region, begin_idx, end_idx, h->userdata); + h->callback(buffer, + (struct edit_location){ + .coordinates = region, + .bytes = region_new(begin_bytes, end_bytes), + .global_byte_begin = begin_idx, + .global_byte_end = end_idx, + }, + h->userdata); } return region.begin; @@ -1035,27 +1117,6 @@ struct cmdbuf { struct buffer *buffer; }; -static uint32_t visual_char_width(uint8_t *byte, uint32_t maxlen) { - if (*byte == '\t') { - return 4; - } else { - return utf8_visual_char_width(byte, maxlen); - } -} - -uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col, - uint32_t end_col) { - uint32_t start_byte = utf8_nbytes(txt, len, start_col); - uint32_t end_byte = utf8_nbytes(txt, len, end_col); - - uint32_t width = 0; - for (uint32_t bytei = start_byte; bytei < end_byte; ++bytei) { - width += visual_char_width(&txt[bytei], len - bytei); - } - - return width; -} - static void apply_properties(struct command_list *cmds, struct text_property *properties[], uint32_t nproperties) { @@ -1097,65 +1158,66 @@ void render_line(struct text_chunk *line, void *userdata) { command_list_set_show_whitespace(cmdbuf->cmds, cmdbuf->show_ws); // calculate scroll offsets - uint32_t scroll_bytes = - utf8_nbytes(line->text, line->nbytes, cmdbuf->origin.col); - uint32_t text_nbytes_scroll = - scroll_bytes > line->nbytes ? 0 : line->nbytes - scroll_bytes; - uint8_t *text = line->text + scroll_bytes; - - uint32_t visual_col_start = 0; - uint32_t cur_visual_col = 0; - uint32_t start_byte = 0, text_nbytes = 0; struct text_property *properties[32] = {0}; uint64_t prev_properties_hash = 0; - for (uint32_t cur_byte = start_byte, coli = 0; - cur_byte < text_nbytes_scroll && cur_visual_col < cmdbuf->width && - coli < line->nchars - cmdbuf->origin.col; - ++coli) { + uint32_t tab_width = get_tab_width(cmdbuf->buffer); - uint32_t bytes_remaining = text_nbytes_scroll - cur_byte; - uint32_t char_nbytes = utf8_nbytes(text + cur_byte, bytes_remaining, 1); - uint32_t char_vwidth = visual_char_width(text + cur_byte, bytes_remaining); + // handle scroll column offset + uint32_t coli = 0, bytei = 0; + struct utf8_codepoint_iterator iter = text_chunk_codepoint_iterator(line); + struct codepoint *codepoint; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL && + coli < cmdbuf->origin.col) { + coli += visual_char_width(codepoint, tab_width); + bytei += codepoint->nbytes; + } + + // coli is the visual column + coli = 0; + uint32_t drawn_bytei = bytei; + uint32_t drawn_coli = coli; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL && + coli < cmdbuf->width) { // calculate character properties uint32_t nproperties = 0; - text_get_properties( - cmdbuf->buffer->text, - (struct location){.line = line->line, .col = coli + cmdbuf->origin.col}, - properties, 32, &nproperties); + text_get_properties(cmdbuf->buffer->text, line->line, bytei, properties, 32, + &nproperties); // if we have any new or lost props, flush text up until now, reset // and re-apply current properties uint64_t new_properties_hash = properties_hash(properties, nproperties); if (new_properties_hash != prev_properties_hash) { - command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line, - text + start_byte, cur_byte - start_byte); + command_list_draw_text(cmdbuf->cmds, cmdbuf->origin.col + drawn_coli, + visual_line, line->text + drawn_bytei, + bytei - drawn_bytei); command_list_reset_color(cmdbuf->cmds); - visual_col_start = cur_visual_col; - start_byte = cur_byte; + drawn_coli = coli; + drawn_bytei = bytei; // apply new properties apply_properties(cmdbuf->cmds, properties, nproperties); } prev_properties_hash = new_properties_hash; - cur_byte += char_nbytes; - text_nbytes += char_nbytes; - cur_visual_col += char_vwidth; + bytei += codepoint->nbytes; + coli += visual_char_width(codepoint, tab_width); } // flush remaining - command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line, - text + start_byte, text_nbytes - start_byte); + command_list_draw_text(cmdbuf->cmds, cmdbuf->origin.col + drawn_coli, + visual_line, line->text + drawn_bytei, + bytei - drawn_bytei); command_list_reset_color(cmdbuf->cmds); command_list_set_show_whitespace(cmdbuf->cmds, false); - if (cur_visual_col < cmdbuf->width) { - command_list_draw_repeated(cmdbuf->cmds, cur_visual_col, visual_line, ' ', - cmdbuf->width - cur_visual_col); + // TODO: considering the whole screen is cleared, is this really needed? + if (coli < cmdbuf->width) { + command_list_draw_repeated(cmdbuf->cmds, coli, visual_line, ' ', + cmdbuf->width - coli); } } @@ -1200,19 +1262,19 @@ void buffer_render(struct buffer *buffer, struct buffer_render_params *params) { void buffer_add_text_property(struct buffer *buffer, struct location start, struct location end, struct text_property property) { - text_add_property( - buffer->text, (struct location){.line = start.line, .col = start.col}, - (struct location){.line = end.line, .col = end.col}, property); + struct location bytestart = buffer_location_to_byte_coords(buffer, start); + struct location byteend = buffer_location_to_byte_coords(buffer, end); + text_add_property(buffer->text, bytestart.line, bytestart.col, byteend.line, + byteend.col, property); } void buffer_get_text_properties(struct buffer *buffer, struct location location, struct text_property **properties, uint32_t max_nproperties, uint32_t *nproperties) { - text_get_properties( - buffer->text, - (struct location){.line = location.line, .col = location.col}, properties, - max_nproperties, nproperties); + struct location bytecoords = buffer_location_to_byte_coords(buffer, location); + text_get_properties(buffer->text, bytecoords.line, bytecoords.col, properties, + max_nproperties, nproperties); } void buffer_clear_text_properties(struct buffer *buffer) { @@ -1244,9 +1306,12 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line, (struct location){.line = end + 1, .col = 0}); struct s8 *lines = (struct s8 *)malloc(sizeof(struct s8) * ntosort); - struct text_chunk txt = - text_get_region(buffer->text, region.begin.line, region.begin.col, - region.end.line, region.end.col); + + struct location bytebeg = + buffer_location_to_byte_coords(buffer, region.begin); + struct location byteend = buffer_location_to_byte_coords(buffer, region.end); + struct text_chunk txt = text_get_region( + buffer->text, bytebeg.line, bytebeg.col, byteend.line, byteend.col); uint32_t line_start = 0; uint32_t curr_line = 0; @@ -1278,3 +1343,29 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line, free(txt.text); } } + +struct location buffer_location_to_byte_coords(struct buffer *buffer, + struct location coords) { + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, coords.line); + uint32_t byteoffset = 0, col = 0, tab_width = get_tab_width(buffer); + struct codepoint *codepoint; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL && col < coords.col) { + byteoffset += codepoint->nbytes; + col += visual_char_width(codepoint, tab_width); + } + + return (struct location){.line = coords.line, .col = byteoffset}; +} + +struct match_result +buffer_find_prev_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)) { + return find_prev_in_line(buffer, start, predicate); +} + +struct match_result +buffer_find_next_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)) { + return find_next_in_line(buffer, start, predicate); +} diff --git a/src/dged/buffer.h b/src/dged/buffer.h index cd5bd95..c9fe2ca 100644 --- a/src/dged/buffer.h +++ b/src/dged/buffer.h @@ -295,13 +295,13 @@ struct location buffer_end(struct buffer *buffer); uint32_t buffer_num_lines(struct buffer *buffer); /** - * Get the number of chars in a given line in buffer. + * Get the line length in number of column positions. * * @param [in] buffer The buffer to use. - * @param [in] line The line to get number of chars for. - * @returns The number of chars in @ref line. + * @param [in] line The line to get number of columns for. + * @returns The number of column positions in the current line. */ -uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line); +uint32_t buffer_line_length(struct buffer *buffer, uint32_t line); /** * Insert a newline in the buffer. @@ -555,6 +555,13 @@ uint32_t buffer_add_reload_hook(struct buffer *buffer, reload_hook_cb callback, void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id, remove_hook_cb callback); +struct edit_location { + struct region coordinates; + struct region bytes; + uint64_t global_byte_begin; + uint64_t global_byte_end; +}; + /** * Buffer insert hook callback function. * @@ -565,9 +572,8 @@ void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id, * @param end_idx The global byte offset to the end of where text was inserted. * @param userdata The userdata as sent in to @ref buffer_add_insert_hook. */ -typedef void (*insert_hook_cb)(struct buffer *buffer, struct region inserted, - uint32_t begin_idx, uint32_t end_idx, - void *userdata); +typedef void (*insert_hook_cb)(struct buffer *buffer, + struct edit_location inserted, void *userdata); /** * Add an insert hook, called when text is inserted into the @p buffer. @@ -600,9 +606,8 @@ void buffer_remove_insert_hook(struct buffer *buffer, uint32_t hook_id, * @param end_idx The global byte offset to the end of the removed text. * @param userdata The userdata as sent in to @ref buffer_add_delete_hook. */ -typedef void (*delete_hook_cb)(struct buffer *buffer, struct region removed, - uint32_t begin_idx, uint32_t end_idx, - void *userdata); +typedef void (*delete_hook_cb)(struct buffer *buffer, + struct edit_location removed, void *userdata); /** * Add a delete hook, called when text is removed from the @p buffer. @@ -724,10 +729,6 @@ void buffer_update(struct buffer *buffer, struct buffer_update_params *params); */ void buffer_render(struct buffer *buffer, struct buffer_render_params *params); -// TODO: move this to where it makes sense -uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col, - uint32_t end_col); - /** * Sort lines in a buffer alphabetically. * @@ -738,4 +739,19 @@ uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col, void buffer_sort_lines(struct buffer *buffer, uint32_t start_line, uint32_t end_line); +struct location buffer_location_to_byte_coords(struct buffer *buffer, + struct location coords); + +struct match_result { + struct location at; + bool found; +}; + +struct match_result +buffer_find_prev_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)); +struct match_result +buffer_find_next_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)); + #endif diff --git a/src/dged/buffer_view.c b/src/dged/buffer_view.c index 4e67d78..f10c5e7 100644 --- a/src/dged/buffer_view.c +++ b/src/dged/buffer_view.c @@ -128,7 +128,7 @@ void buffer_view_backward_nlines(struct buffer_view *view, uint32_t nlines) { } void buffer_view_goto_end_of_line(struct buffer_view *view) { - view->dot.col = buffer_num_chars(view->buffer, view->dot.line); + view->dot.col = buffer_line_length(view->buffer, view->dot.line); } void buffer_view_goto_beginning_of_line(struct buffer_view *view) { @@ -224,15 +224,15 @@ void buffer_view_delete_word(struct buffer_view *view) { } void buffer_view_kill_line(struct buffer_view *view) { - uint32_t nchars = - buffer_num_chars(view->buffer, view->dot.line) - view->dot.col; - if (nchars == 0) { - nchars = 1; + uint32_t ncols = + buffer_line_length(view->buffer, view->dot.line) - view->dot.col; + if (ncols == 0) { + ncols = 1; } struct region reg = region_new(view->dot, (struct location){ .line = view->dot.line, - .col = view->dot.col + nchars, + .col = view->dot.col + ncols, }); buffer_cut(view->buffer, reg); @@ -241,7 +241,8 @@ void buffer_view_kill_line(struct buffer_view *view) { void buffer_view_sort_lines(struct buffer_view *view) { struct region reg = region_new(view->dot, view->mark); if (view->mark_set && region_has_size(reg)) { - if (reg.end.line > 0 && buffer_num_chars(view->buffer, reg.end.line) == 0) { + if (reg.end.line > 0 && + buffer_line_length(view->buffer, reg.end.line) == 0) { reg.end.line -= 1; } @@ -271,21 +272,7 @@ struct location buffer_view_dot_to_relative(struct buffer_view *view) { } struct location buffer_view_dot_to_visual(struct buffer_view *view) { - // calculate visual column index for dot column - struct text_chunk c = buffer_line(view->buffer, view->dot.line); - uint32_t width = visual_string_width(c.text, c.nbytes, 0, view->dot.col); - if (view->scroll.col > 0) { - width -= visual_string_width(c.text, c.nbytes, 0, view->scroll.col); - } - - struct location l = buffer_view_dot_to_relative(view); - l.col = width + view->fringe_width; - - if (c.allocated) { - free(c.text); - } - - return l; + return buffer_view_dot_to_relative(view); } void buffer_view_undo(struct buffer_view *view) { diff --git a/src/dged/display.c b/src/dged/display.c index bc604f0..ea3f459 100644 --- a/src/dged/display.c +++ b/src/dged/display.c @@ -60,7 +60,7 @@ struct push_fmt_cmd { struct repeat_cmd { uint32_t col; uint32_t row; - int32_t c; + uint32_t c; uint32_t nrepeat; }; @@ -135,21 +135,7 @@ void display_destroy(struct display *display) { uint32_t display_width(struct display *display) { return display->width; } uint32_t display_height(struct display *display) { return display->height; } -void putch(uint8_t c) { - // TODO: move this to buffer rendering - if (c < ' ') { - fprintf(stdout, "^%c", c + 0x40); - } else if (c == 0x7f) { - fprintf(stdout, "^?"); - } else if (utf8_byte_is_unicode_start(c) || - utf8_byte_is_unicode_continuation(c)) { - putc(c, stdout); - } else if (c >= ' ' && c < 0x7f) { - putc(c, stdout); - } else { - fprintf(stdout, "|0x%02x|", c); - } -} +void putch(uint8_t c) { putc(c, stdout); } static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) { if (fmt_stack == NULL || fmt_stack_len == 0) { @@ -164,6 +150,7 @@ static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) { void putch_ws(uint8_t c, bool show_whitespace, uint8_t *fmt_stack, uint32_t fmt_stack_len) { + // TODO: tab width needs to be sent here if (show_whitespace && c == '\t') { fputs("\x1b[90m → \x1b[39m", stdout); apply_fmt(fmt_stack, fmt_stack_len); @@ -295,7 +282,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col, } void command_list_draw_repeated(struct command_list *list, uint32_t col, - uint32_t row, int32_t c, uint32_t nrepeat) { + uint32_t row, uint32_t c, uint32_t nrepeat) { struct repeat_cmd *cmd = add_command(list, RenderCommand_Repeat)->repeat; cmd->col = col; cmd->row = row; @@ -401,10 +388,14 @@ void display_render(struct display *display, display_move_cursor(display, repeat_cmd->row + cl->yoffset, repeat_cmd->col + cl->xoffset); apply_fmt(fmt_stack, fmt_stack_len); - uint32_t nbytes = utf8_nbytes((uint8_t *)&repeat_cmd->c, 4, 1); - for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) { - putbytes((uint8_t *)&repeat_cmd->c, nbytes, show_whitespace_state, - fmt_stack, fmt_stack_len); + struct utf8_codepoint_iterator iter = + create_utf8_codepoint_iterator((uint8_t *)&repeat_cmd->c, 4, 0); + struct codepoint *codepoint = utf8_next_codepoint(&iter); + if (codepoint != NULL) { + for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) { + putbytes((uint8_t *)&repeat_cmd->c, codepoint->nbytes, + show_whitespace_state, fmt_stack, fmt_stack_len); + } } break; } diff --git a/src/dged/display.h b/src/dged/display.h index 0fda30d..f9c7ef8 100644 --- a/src/dged/display.h +++ b/src/dged/display.h @@ -238,7 +238,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col, * @param nrepeat Number of times to repeat byte. */ void command_list_draw_repeated(struct command_list *list, uint32_t col, - uint32_t row, int32_t c, uint32_t nrepeat); + uint32_t row, uint32_t c, uint32_t nrepeat); void command_list_draw_command_list(struct command_list *list, struct command_list *to_draw); diff --git a/src/dged/keyboard.c b/src/dged/keyboard.c index 26eb308..04565e0 100644 --- a/src/dged/keyboard.c +++ b/src/dged/keyboard.c @@ -78,20 +78,24 @@ void parse_keys(uint8_t *bytes, uint32_t nbytes, struct key *out_keys, } else if (utf8_byte_is_unicode_continuation(b)) { // do nothing for these } else { // ascii char or unicode start byte (self-inserting) - uint32_t nb = utf8_byte_is_unicode_start(b) - ? utf8_nbytes(bytes + bytei, nbytes - bytei, 1) - : 1; - - // "compress" number of keys if previous key was also a - // "simple" key - if (prev_kp != NULL && prev_kp->mod == None) { - prev_kp->end += nb; - } else { - kp->mod = None; - kp->key = b; - kp->start = bytei; - kp->end = bytei + nb; - ++nkps; + // TODO: do this better + struct utf8_codepoint_iterator iter = + create_utf8_codepoint_iterator(bytes + bytei, nbytes - bytei, 0); + struct codepoint *codepoint = utf8_next_codepoint(&iter); + if (codepoint != NULL) { + uint32_t nb = codepoint->nbytes; + + // "compress" number of keys if previous key was also a + // "simple" key + if (prev_kp != NULL && prev_kp->mod == None) { + prev_kp->end += nb; + } else { + kp->mod = None; + kp->key = b; + kp->start = bytei; + kp->end = bytei + nb; + ++nkps; + } } } } diff --git a/src/dged/minibuffer.c b/src/dged/minibuffer.c index 64b0a98..d31850b 100644 --- a/src/dged/minibuffer.c +++ b/src/dged/minibuffer.c @@ -237,11 +237,12 @@ static void minibuffer_abort_prompt_internal(bool clear) { if (clear) { minibuffer_clear(); } - g_minibuffer.prompt_active = false; - if (g_minibuffer.prev_window != NULL) { + if (g_minibuffer.prompt_active && g_minibuffer.prev_window != NULL) { windows_set_active(g_minibuffer.prev_window); } + + g_minibuffer.prompt_active = false; } void minibuffer_abort_prompt() { minibuffer_abort_prompt_internal(true); } diff --git a/src/dged/syntax.c b/src/dged/syntax.c index 8d0fd1a..569dc70 100644 --- a/src/dged/syntax.c +++ b/src/dged/syntax.c @@ -342,7 +342,8 @@ static void update_parser(struct buffer *buffer, void *userdata, : origin.line + height; ts_query_cursor_set_point_range( cursor, (TSPoint){.row = origin.line, .column = origin.col}, - (TSPoint){.row = end_line, .column = buffer_num_chars(buffer, end_line)}); + (TSPoint){.row = end_line, + .column = buffer_line_length(buffer, end_line)}); ts_query_cursor_exec(cursor, h->query, ts_tree_root_node(h->tree)); TSQueryMatch match; @@ -406,47 +407,39 @@ static void update_parser(struct buffer *buffer, void *userdata, continue; } - buffer_add_text_property( - buffer, - (struct location){.line = start.row, - .col = text_byteindex_to_col( - buffer->text, start.row, start.column)}, - (struct location){.line = end.row, - .col = text_byteindex_to_col(buffer->text, end.row, - end.column - 1)}, - (struct text_property){ - .type = TextProperty_Colors, - .colors = - (struct text_property_colors){ - .set_fg = true, - .fg = color, - }, - }); + text_add_property(buffer->text, start.row, start.column, end.row, + end.column > 0 ? end.column - 1 : 0, + (struct text_property){ + .type = TextProperty_Colors, + .colors = + (struct text_property_colors){ + .set_fg = true, + .fg = color, + }, + }); } } ts_query_cursor_delete(cursor); } -static void text_removed(struct buffer *buffer, struct region removed, - uint32_t begin_idx, uint32_t end_idx, void *userdata) { +static void text_removed(struct buffer *buffer, struct edit_location removed, + void *userdata) { struct highlight *h = (struct highlight *)userdata; - TSPoint begin = {.row = removed.begin.line, - .column = text_col_to_byteindex( - buffer->text, removed.begin.line, removed.begin.col)}; + TSPoint begin = {.row = removed.bytes.begin.line, + .column = removed.bytes.begin.col}; TSPoint new_end = begin; - TSPoint old_end = {.row = removed.end.line, - .column = text_col_to_byteindex( - buffer->text, removed.end.line, removed.end.col)}; + TSPoint old_end = {.row = removed.bytes.end.line, + .column = removed.bytes.end.col}; TSInputEdit edit = { .start_point = begin, .old_end_point = old_end, .new_end_point = new_end, - .start_byte = begin_idx, - .old_end_byte = end_idx, - .new_end_byte = begin_idx, + .start_byte = removed.global_byte_begin, + .old_end_byte = removed.global_byte_end, + .new_end_byte = removed.global_byte_begin, }; ts_tree_edit(h->tree, &edit); @@ -479,27 +472,24 @@ static void buffer_reloaded(struct buffer *buffer, void *userdata) { } } -static void text_inserted(struct buffer *buffer, struct region inserted, - uint32_t begin_idx, uint32_t end_idx, +static void text_inserted(struct buffer *buffer, struct edit_location inserted, void *userdata) { struct timer *text_inserted = timer_start("syntax.txt-inserted"); struct highlight *h = (struct highlight *)userdata; - TSPoint begin = {.row = inserted.begin.line, - .column = text_col_to_byteindex( - buffer->text, inserted.begin.line, inserted.begin.col)}; + TSPoint begin = {.row = inserted.bytes.begin.line, + .column = inserted.bytes.begin.col}; TSPoint old_end = begin; - TSPoint new_end = {.row = inserted.end.line, - .column = text_col_to_byteindex( - buffer->text, inserted.end.line, inserted.end.col)}; + TSPoint new_end = {.row = inserted.bytes.end.line, + .column = inserted.bytes.end.col}; TSInputEdit edit = { .start_point = begin, .old_end_point = old_end, .new_end_point = new_end, - .start_byte = begin_idx, - .old_end_byte = begin_idx, - .new_end_byte = end_idx, + .start_byte = inserted.global_byte_begin, + .old_end_byte = inserted.global_byte_begin, + .new_end_byte = inserted.global_byte_end, }; ts_tree_edit(h->tree, &edit); diff --git a/src/dged/text.c b/src/dged/text.c index 3d1078f..18ab04f 100644 --- a/src/dged/text.c +++ b/src/dged/text.c @@ -18,7 +18,6 @@ struct line { uint8_t *data; uint8_t flags; uint32_t nbytes; - uint32_t nchars; }; struct text_property_entry { @@ -54,11 +53,9 @@ void text_destroy(struct text *text) { text->lines[li].data = NULL; text->lines[li].flags = 0; text->lines[li].nbytes = 0; - text->lines[li].nchars = 0; } free(text->lines); - free(text); } @@ -68,68 +65,25 @@ void text_clear(struct text *text) { text->lines[li].data = NULL; text->lines[li].flags = 0; text->lines[li].nbytes = 0; - text->lines[li].nchars = 0; } text->nlines = 0; text_clear_properties(text); } -// given `char_idx` as a character index, return the byte index -uint32_t charidx_to_byteidx(struct line *line, uint32_t char_idx) { - if (line->nchars == 0) { - return 0; - } - - if (char_idx > line->nchars) { - return line->nbytes - 1; - } - - return utf8_nbytes(line->data, line->nbytes, char_idx); -} - -uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col) { - return charidx_to_byteidx(&text->lines[line], col); -} - -// given `byte_idx` as a byte index, return the character index -uint32_t byteidx_to_charidx(struct line *line, uint32_t byte_idx) { - if (byte_idx > line->nbytes) { - return line->nchars; +struct utf8_codepoint_iterator +text_line_codepoint_iterator(const struct text *text, uint32_t lineidx) { + if (lineidx >= text_num_lines(text)) { + return create_utf8_codepoint_iterator(NULL, 0, 0); } - return utf8_nchars(line->data, byte_idx); + return create_utf8_codepoint_iterator(text->lines[lineidx].data, + text->lines[lineidx].nbytes, 0); } -uint32_t text_byteindex_to_col(struct text *text, uint32_t line, - uint32_t byteindex) { - return byteidx_to_charidx(&text->lines[line], byteindex); -} - -uint32_t text_global_idx(struct text *text, uint32_t line, uint32_t col) { - uint32_t byteoff = 0; - uint32_t nlines = text_num_lines(text); - - if (nlines == 0) { - return 0; - } - - for (uint32_t l = 0; l < line && l < nlines; ++l) { - // +1 for newline - byteoff += text_line_size(text, l) + 1; - } - - uint32_t l = line < nlines ? line : nlines - 1; - uint32_t nchars = text_line_length(text, l); - uint32_t c = col < nchars ? col : nchars; - byteoff += text_col_to_byteindex(text, l, c); - - if (col > nchars) { - // account for newline - ++byteoff; - } - - return byteoff; +struct utf8_codepoint_iterator +text_chunk_codepoint_iterator(const struct text_chunk *chunk) { + return create_utf8_codepoint_iterator(chunk->text, chunk->nbytes, 0); } void append_empty_lines(struct text *text, uint32_t numlines) { @@ -145,17 +99,10 @@ void append_empty_lines(struct text *text, uint32_t numlines) { struct line *nline = &text->lines[text->nlines]; nline->data = NULL; nline->nbytes = 0; - nline->nchars = 0; nline->flags = 0; ++text->nlines; } - - if (text->nlines > text->capacity) { - printf("text->nlines: %d, text->capacity: %d\n", text->nlines, - text->capacity); - raise(SIGTRAP); - } } void ensure_line(struct text *text, uint32_t line) { @@ -166,8 +113,8 @@ void ensure_line(struct text *text, uint32_t line) { // It is assumed that `data` does not contain any \n, that is handled by // higher-level functions -void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data, - uint32_t len, uint32_t nchars) { +static void insert_at(struct text *text, uint32_t line, uint32_t offset, + uint8_t *data, uint32_t len) { if (len == 0) { return; @@ -178,11 +125,10 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data, struct line *l = &text->lines[line]; l->nbytes += len; - l->nchars += nchars; l->flags = LineChanged; l->data = realloc(l->data, l->nbytes); - uint32_t bytei = charidx_to_byteidx(l, col); + uint32_t bytei = offset; // move following bytes out of the way if (bytei + len < l->nbytes) { @@ -194,15 +140,7 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data, memcpy(l->data + bytei, data, len); } -uint32_t text_line_length(struct text *text, uint32_t lineidx) { - if (lineidx >= text_num_lines(text)) { - return 0; - } - - return text->lines[lineidx].nchars; -} - -uint32_t text_line_size(struct text *text, uint32_t lineidx) { +uint32_t text_line_size(const struct text *text, uint32_t lineidx) { if (lineidx >= text_num_lines(text)) { return 0; } @@ -210,20 +148,19 @@ uint32_t text_line_size(struct text *text, uint32_t lineidx) { return text->lines[lineidx].nbytes; } -uint32_t text_num_lines(struct text *text) { return text->nlines; } +uint32_t text_num_lines(const struct text *text) { return text->nlines; } + +static void split_line(struct text *text, uint32_t offset, uint32_t lineidx, + uint32_t newlineidx) { + struct line *line = &text->lines[lineidx]; + struct line *next = &text->lines[newlineidx]; -void split_line(uint32_t col, struct line *line, struct line *next) { uint8_t *data = line->data; uint32_t nbytes = line->nbytes; - uint32_t nchars = line->nchars; - - uint32_t chari = col; - uint32_t bytei = charidx_to_byteidx(line, chari); + uint32_t bytei = offset; line->nbytes = bytei; - line->nchars = chari; next->nbytes = nbytes - bytei; - next->nchars = nchars - chari; line->flags = next->flags = line->flags; next->data = NULL; @@ -260,7 +197,7 @@ void shift_lines(struct text *text, uint32_t start, int32_t direction) { memmove(dest, src, nlines * sizeof(struct line)); } -void new_line_at(struct text *text, uint32_t line, uint32_t col) { +void new_line_at(struct text *text, uint32_t line, uint32_t offset) { ensure_line(text, line); uint32_t newline = line + 1; @@ -274,7 +211,7 @@ void new_line_at(struct text *text, uint32_t line, uint32_t col) { } // split line if needed - split_line(col, &text->lines[line], &text->lines[newline]); + split_line(text, offset, line, newline); } void delete_line(struct text *text, uint32_t line) { @@ -294,29 +231,25 @@ void delete_line(struct text *text, uint32_t line) { --text->nlines; text->lines[text->nlines].data = NULL; text->lines[text->nlines].nbytes = 0; - text->lines[text->nlines].nchars = 0; } -void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col, - uint8_t *bytes, uint32_t nbytes, - uint32_t *lines_added, uint32_t *cols_added) { +static void text_insert_at_inner(struct text *text, uint32_t line, + uint32_t offset, uint8_t *bytes, + uint32_t nbytes, uint32_t *lines_added) { uint32_t linelen = 0, start_line = line; - *cols_added = 0; for (uint32_t bytei = 0; bytei < nbytes; ++bytei) { uint8_t byte = bytes[bytei]; if (byte == '\n') { uint8_t *line_data = bytes + (bytei - linelen); - uint32_t nchars = utf8_nchars(line_data, linelen); + insert_at(text, line, offset, line_data, linelen); - insert_at(text, line, col, line_data, linelen, nchars); - - col += nchars; - new_line_at(text, line, col); + offset += linelen; + new_line_at(text, line, offset); ++line; linelen = 0; - col = 0; + offset = 0; } else { ++linelen; } @@ -325,30 +258,26 @@ void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col, // handle remaining if (linelen > 0) { uint8_t *line_data = bytes + (nbytes - linelen); - uint32_t nchars = utf8_nchars(line_data, linelen); - insert_at(text, line, col, line_data, linelen, nchars); - *cols_added = nchars; + insert_at(text, line, offset, line_data, linelen); } *lines_added = line - start_line; } void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes, - uint32_t *lines_added, uint32_t *cols_added) { + uint32_t *lines_added) { uint32_t line = text->nlines > 0 ? text->nlines - 1 : 0; - uint32_t col = text_line_length(text, line); - - text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added); + uint32_t offset = text_line_size(text, line); + text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added); } -void text_insert_at(struct text *text, uint32_t line, uint32_t col, - uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added, - uint32_t *cols_added) { - text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added); +void text_insert_at(struct text *text, uint32_t line, uint32_t offset, + uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added) { + text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added); } -void text_delete(struct text *text, uint32_t start_line, uint32_t start_col, - uint32_t end_line, uint32_t end_col) { +void text_delete(struct text *text, uint32_t start_line, uint32_t start_offset, + uint32_t end_line, uint32_t end_offset) { if (text->nlines == 0) { return; @@ -362,45 +291,44 @@ void text_delete(struct text *text, uint32_t start_line, uint32_t start_col, if (end_line > maxline) { end_line = maxline; - end_col = text->lines[end_line].nchars; + end_offset = text_line_size(text, end_line); } struct line *firstline = &text->lines[start_line]; struct line *lastline = &text->lines[end_line]; // clamp column - if (start_col > firstline->nchars) { - start_col = firstline->nchars > 0 ? firstline->nchars - 1 : 0; + uint32_t firstline_len = text_line_size(text, start_line); + if (start_offset > firstline_len) { + start_offset = firstline_len > 0 ? firstline_len - 1 : 0; } // handle deletion of newlines - if (end_col > lastline->nchars) { + uint32_t lastline_len = text_line_size(text, end_line); + if (end_offset > lastline_len) { if (end_line + 1 < text->nlines) { - end_col = 0; + end_offset = 0; ++end_line; lastline = &text->lines[end_line]; } else { - end_col = lastline->nchars; + end_offset = lastline_len; } } - uint32_t bytei = utf8_nbytes(lastline->data, lastline->nbytes, end_col); + uint32_t srcbytei = end_offset; + uint32_t dstbytei = start_offset; + uint32_t ncopy = lastline->nbytes - srcbytei; if (lastline == firstline) { // in this case we can "overwrite" - uint32_t dstbytei = - utf8_nbytes(firstline->data, firstline->nbytes, start_col); - memmove(firstline->data + dstbytei, lastline->data + bytei, - lastline->nbytes - bytei); + memmove(firstline->data + dstbytei, lastline->data + srcbytei, ncopy); } else { // otherwise we actually have to copy from the last line - insert_at(text, start_line, start_col, lastline->data + bytei, - lastline->nbytes - bytei, lastline->nchars - end_col); + insert_at(text, start_line, start_offset, lastline->data + srcbytei, ncopy); } - firstline->nchars = start_col + (lastline->nchars - end_col); - firstline->nbytes = - utf8_nbytes(firstline->data, firstline->nbytes, start_col) + - (lastline->nbytes - bytei); + // new byte count is whatever we had before (left of dstbytei) + // plus what we copied + firstline->nbytes = dstbytei + ncopy; // delete full lines, backwards to not shift old, crappy data upwards for (uint32_t linei = end_line >= text->nlines ? end_line - 1 : end_line; @@ -429,7 +357,6 @@ void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines, .allocated = false, .text = src_line->data, .nbytes = src_line->nbytes, - .nchars = src_line->nchars, .line = li, }; callback(&line, userdata); @@ -441,8 +368,8 @@ struct text_chunk text_get_line(struct text *text, uint32_t line) { return (struct text_chunk){ .text = src_line->data, .nbytes = src_line->nbytes, - .nchars = src_line->nchars, .line = line, + .allocated = false, }; } @@ -453,33 +380,34 @@ struct copy_cmd { }; struct text_chunk text_get_region(struct text *text, uint32_t start_line, - uint32_t start_col, uint32_t end_line, - uint32_t end_col) { - if (start_line == end_line && start_col == end_col) { + uint32_t start_offset, uint32_t end_line, + uint32_t end_offset) { + if (start_line == end_line && start_offset == end_offset) { return (struct text_chunk){0}; } struct line *first_line = &text->lines[start_line]; struct line *last_line = &text->lines[end_line]; + uint32_t first_line_len = first_line->nbytes; + uint32_t last_line_len = last_line->nbytes; - if (start_col > first_line->nchars) { + if (start_offset > first_line_len) { return (struct text_chunk){0}; } // handle copying of newlines - if (end_col > last_line->nchars) { + if (end_offset > last_line_len) { ++end_line; - end_col = 0; + end_offset = 0; last_line = &text->lines[end_line]; } uint32_t nlines = end_line - start_line + 1; struct copy_cmd *copy_cmds = calloc(nlines, sizeof(struct copy_cmd)); - uint32_t total_chars = 0, total_bytes = 0; + uint32_t total_bytes = 0; for (uint32_t line = start_line; line <= end_line; ++line) { struct line *l = &text->lines[line]; - total_chars += l->nchars; total_bytes += l->nbytes; struct copy_cmd *cmd = ©_cmds[line - start_line]; @@ -490,19 +418,14 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line, // correct first line struct copy_cmd *cmd_first = ©_cmds[0]; - uint32_t byteoff = - utf8_nbytes(first_line->data, first_line->nbytes, start_col); - cmd_first->byteoffset += byteoff; - cmd_first->nbytes -= byteoff; - total_bytes -= byteoff; - total_chars -= start_col; + cmd_first->byteoffset += start_offset; + cmd_first->nbytes -= start_offset; + total_bytes -= start_offset; // correct last line struct copy_cmd *cmd_last = ©_cmds[nlines - 1]; - uint32_t byteindex = utf8_nbytes(last_line->data, last_line->nbytes, end_col); - cmd_last->nbytes -= (last_line->nbytes - byteindex); - total_bytes -= (last_line->nbytes - byteindex); - total_chars -= (last_line->nchars - end_col); + cmd_last->nbytes -= (last_line->nbytes - end_offset); + total_bytes -= (last_line->nbytes - end_offset); uint8_t *data = (uint8_t *)malloc( total_bytes + /* nr of newline chars */ (end_line - start_line)); @@ -518,7 +441,6 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line, data[curr] = '\n'; ++curr; ++total_bytes; - ++total_chars; } } @@ -527,28 +449,25 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line, .text = data, .line = 0, .nbytes = total_bytes, - .nchars = total_chars, .allocated = true, }; } -bool text_line_contains_unicode(struct text *text, uint32_t line) { - return text->lines[line].nbytes != text->lines[line].nchars; -} - -void text_add_property(struct text *text, struct location start, - struct location end, struct text_property property) { +void text_add_property(struct text *text, uint32_t start_line, + uint32_t start_offset, uint32_t end_line, + uint32_t end_offset, struct text_property property) { struct text_property_entry entry = { - .start = start, - .end = end, + .start = (struct location){.line = start_line, .col = start_offset}, + .end = (struct location){.line = end_line, .col = end_offset}, .property = property, }; VEC_PUSH(&text->properties, entry); } -void text_get_properties(struct text *text, struct location location, +void text_get_properties(struct text *text, uint32_t line, uint32_t offset, struct text_property **properties, uint32_t max_nproperties, uint32_t *nproperties) { + struct location location = {.line = line, .col = offset}; uint32_t nres = 0; VEC_FOR_EACH(&text->properties, struct text_property_entry * prop) { if (location_is_between(location, prop->start, prop->end)) { diff --git a/src/dged/text.h b/src/dged/text.h index 8b49ef4..28bd325 100644 --- a/src/dged/text.h +++ b/src/dged/text.h @@ -6,9 +6,16 @@ #include #include "location.h" +#include "utf8.h" struct text; -struct render_command; + +struct text_chunk { + uint8_t *text; + uint32_t nbytes; + uint32_t line; + bool allocated; +}; struct text *text_create(uint32_t initial_capacity); void text_destroy(struct text *text); @@ -18,31 +25,21 @@ void text_destroy(struct text *text); */ void text_clear(struct text *text); -void text_insert_at(struct text *text, uint32_t line, uint32_t col, - uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added, - uint32_t *cols_added); +void text_insert_at(struct text *text, uint32_t line, uint32_t offset, + uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added); void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes, - uint32_t *lines_added, uint32_t *cols_added); + uint32_t *lines_added); -void text_delete(struct text *text, uint32_t start_line, uint32_t start_col, - uint32_t end_line, uint32_t end_col); +void text_delete(struct text *text, uint32_t start_line, uint32_t start_offset, + uint32_t end_line, uint32_t end_offset); -uint32_t text_num_lines(struct text *text); -uint32_t text_line_length(struct text *text, uint32_t lineidx); -uint32_t text_line_size(struct text *text, uint32_t lineidx); -uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col); -uint32_t text_byteindex_to_col(struct text *text, uint32_t line, - uint32_t byteindex); -uint32_t text_global_idx(struct text *text, uint32_t line, uint32_t col); - -struct text_chunk { - uint8_t *text; - uint32_t nbytes; - uint32_t nchars; - uint32_t line; - bool allocated; -}; +uint32_t text_num_lines(const struct text *text); +uint32_t text_line_size(const struct text *text, uint32_t lineidx); +struct utf8_codepoint_iterator +text_line_codepoint_iterator(const struct text *text, uint32_t lineidx); +struct utf8_codepoint_iterator +text_chunk_codepoint_iterator(const struct text_chunk *chunk); typedef void (*chunk_cb)(struct text_chunk *chunk, void *userdata); void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines, @@ -52,10 +49,8 @@ void text_for_each_chunk(struct text *text, chunk_cb callback, void *userdata); struct text_chunk text_get_line(struct text *text, uint32_t line); struct text_chunk text_get_region(struct text *text, uint32_t start_line, - uint32_t start_col, uint32_t end_line, - uint32_t end_col); - -bool text_line_contains_unicode(struct text *text, uint32_t line); + uint32_t start_offset, uint32_t end_line, + uint32_t end_offset); enum text_property_type { TextProperty_Colors, @@ -77,10 +72,11 @@ struct text_property { }; }; -void text_add_property(struct text *text, struct location start, - struct location end, struct text_property property); +void text_add_property(struct text *text, uint32_t start_line, + uint32_t start_offset, uint32_t end_line, + uint32_t end_offset, struct text_property property); -void text_get_properties(struct text *text, struct location location, +void text_get_properties(struct text *text, uint32_t line, uint32_t offset, struct text_property **properties, uint32_t max_nproperties, uint32_t *nproperties); diff --git a/src/dged/utf8.c b/src/dged/utf8.c index 52de2da..ede4fb1 100644 --- a/src/dged/utf8.c +++ b/src/dged/utf8.c @@ -1,5 +1,6 @@ #include "utf8.h" +#include #include #include @@ -10,76 +11,125 @@ bool utf8_byte_is_unicode_continuation(uint8_t byte) { bool utf8_byte_is_unicode(uint8_t byte) { return (byte & 0x80) != 0x0; } bool utf8_byte_is_ascii(uint8_t byte) { return !utf8_byte_is_unicode(byte); } -uint32_t utf8_nbytes_in_char(uint8_t byte) { - // length of char is the number of leading ones - // flip it and count number of leading zeros - uint8_t invb = ~byte; - return __builtin_clz((uint32_t)invb) - 24; +enum utf8_state { + Utf8_Accept = 0, + Utf8_Reject = 1, +}; + +// clang-format off +static const uint8_t utf8d[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 +}; +// clang-format on + +/* + * emoji decoding algorithm from + * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + */ +static enum utf8_state decode(enum utf8_state *state, uint32_t *codep, + uint32_t byte) { + uint32_t type = utf8d[byte]; + + *codep = (*state != Utf8_Accept) ? (byte & 0x3fu) | (*codep << 6) + : (0xff >> type) & (byte); + + *state = utf8d[256 + *state * 16 + type]; + return *state; +} + +static struct codepoint next_utf8_codepoint(uint8_t *bytes, uint64_t nbytes) { + uint32_t codepoint = 0; + enum utf8_state state = Utf8_Accept; + uint32_t bi = 0; + while (bi < nbytes) { + enum utf8_state res = decode(&state, &codepoint, bytes[bi]); + ++bi; + + if (res == Utf8_Accept || res == Utf8_Reject) { + break; + } + } + + if (state == Utf8_Reject) { + codepoint = 0xfffd; + } + + return (struct codepoint){.codepoint = codepoint, .nbytes = bi}; } -// TODO: grapheme clusters, this returns the number of unicode code points +struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter) { + if (iter->offset >= iter->nbytes) { + return NULL; + } + + iter->current = next_utf8_codepoint(iter->data + iter->offset, + iter->nbytes - iter->offset); + iter->offset += iter->current.nbytes; + return &iter->current; +} + +struct utf8_codepoint_iterator +create_utf8_codepoint_iterator(uint8_t *data, uint64_t len, + uint64_t initial_offset) { + return (struct utf8_codepoint_iterator){ + .data = data, + .nbytes = len, + .offset = initial_offset, + }; +} + +/* TODO: grapheme clusters and other classification, this + * returns the number of unicode code points + */ uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes) { + uint32_t bi = 0; uint32_t nchars = 0; - uint32_t expected = 0; - for (uint32_t bi = 0; bi < nbytes; ++bi) { - uint8_t byte = bytes[bi]; - if (utf8_byte_is_unicode(byte)) { - if (utf8_byte_is_unicode_start(byte)) { - expected = utf8_nbytes_in_char(byte) - 1; - } else { // continuation byte - --expected; - if (expected == 0) { - ++nchars; - } - } - } else { // ascii - ++nchars; - } + while (bi < nbytes) { + struct codepoint codepoint = next_utf8_codepoint(bytes + bi, nbytes - bi); + ++nchars; + bi += codepoint.nbytes; } + return nchars; } -// TODO: grapheme clusters, this uses the number of unicode code points +/* TODO: grapheme clusters and other classification, this + * returns the number of unicode code points + */ uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars) { - uint32_t bi = 0; uint32_t chars = 0; uint32_t expected = 0; while (chars < nchars && bi < nbytes) { - uint8_t byte = bytes[bi]; - if (utf8_byte_is_unicode(byte)) { - if (utf8_byte_is_unicode_start(byte)) { - expected = utf8_nbytes_in_char(byte) - 1; - } else { // continuation char - --expected; - if (expected == 0) { - ++chars; - } - } - } else { // ascii - ++chars; - } - - ++bi; + struct codepoint codepoint = next_utf8_codepoint(bytes + bi, nbytes - bi); + bi += codepoint.nbytes; + ++chars; } + // TODO: reject invalid? return bi; } -uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len) { - if (utf8_byte_is_unicode_start(*bytes)) { - wchar_t wc; - size_t nbytes = 0; - if ((nbytes = mbrtowc(&wc, (char *)bytes, len, NULL)) > 0) { - size_t w = wcwidth(wc); - return w > 0 ? w : 2; - } else { - return 1; - } - } else if (utf8_byte_is_unicode_continuation(*bytes)) { - return 0; +uint32_t unicode_visual_char_width(const struct codepoint *codepoint) { + if (codepoint->nbytes > 0) { + // TODO: use unicode classification instead + size_t w = wcwidth(codepoint->codepoint); + return w >= 0 ? w : 2; } else { - return 1; + return 0; } } diff --git a/src/dged/utf8.h b/src/dged/utf8.h index 04aa242..22ce22d 100644 --- a/src/dged/utf8.h +++ b/src/dged/utf8.h @@ -1,19 +1,37 @@ +#ifndef _UTF8_H +#define _UTF8_H + #include #include +struct codepoint { + uint32_t codepoint; + uint32_t nbytes; +}; + +struct utf8_codepoint_iterator { + uint8_t *data; + uint64_t nbytes; + uint64_t offset; + struct codepoint current; +}; + +struct utf8_codepoint_iterator +create_utf8_codepoint_iterator(uint8_t *data, uint64_t len, + uint64_t initial_offset); +struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter); + /*! * \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of * length `nbytes`, represents */ uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes); -/* Return the number of bytes used to make up the next `nchars` characters */ -uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars); +uint32_t unicode_visual_char_width(const struct codepoint *codepoint); -/* true if `byte` is a unicode byte sequence start byte */ bool utf8_byte_is_unicode_start(uint8_t byte); bool utf8_byte_is_unicode_continuation(uint8_t byte); -bool utf8_byte_is_ascii(uint8_t byte); bool utf8_byte_is_unicode(uint8_t byte); +bool utf8_byte_is_ascii(uint8_t byte); -uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len); +#endif diff --git a/src/main/cmds.c b/src/main/cmds.c index 4da8346..18f333d 100644 --- a/src/main/cmds.c +++ b/src/main/cmds.c @@ -258,7 +258,7 @@ void buffer_to_list_line(struct buffer *buffer, void *userdata) { buffer_add_text_property( listbuf, (struct location){.line = begin.line, .col = 0}, (struct location){.line = begin.line, - .col = buffer_num_chars(listbuf, begin.line)}, + .col = buffer_line_length(listbuf, begin.line)}, (struct text_property){.type = TextProperty_Data, .userdata = buffer}); } } diff --git a/src/main/completion.c b/src/main/completion.c index 52bf6f8..4ffbc46 100644 --- a/src/main/completion.c +++ b/src/main/completion.c @@ -40,6 +40,11 @@ static struct buffer *g_target_buffer = NULL; static void hide_completion(); +static bool is_space(const struct codepoint *c) { + // TODO: utf8 whitespace and other whitespace + return c->codepoint == ' '; +} + static uint32_t complete_path(struct completion_context ctx, void *userdata); static struct completion_provider g_path_provider = { .name = "path", @@ -214,32 +219,30 @@ static void update_completions(struct buffer *buffer, } } -static void on_buffer_delete(struct buffer *buffer, struct region deleted, - uint32_t start_idx, uint32_t end_idx, - void *userdata) { +static void on_buffer_delete(struct buffer *buffer, + struct edit_location deleted, void *userdata) { struct active_completion_ctx *ctx = (struct active_completion_ctx *)userdata; if (g_state.active) { - update_completions(buffer, ctx, deleted.begin); + update_completions(buffer, ctx, deleted.coordinates.begin); } } -static void on_buffer_insert(struct buffer *buffer, struct region inserted, - uint32_t start_idx, uint32_t end_idx, - void *userdata) { +static void on_buffer_insert(struct buffer *buffer, + struct edit_location inserted, void *userdata) { struct active_completion_ctx *ctx = (struct active_completion_ctx *)userdata; if (!g_state.active) { uint32_t nchars = 0; switch (ctx->trigger.kind) { case CompletionTrigger_Input: - for (uint32_t line = inserted.begin.line; line <= inserted.end.line; - ++line) { - nchars += buffer_num_chars(buffer, line); + for (uint32_t line = inserted.coordinates.begin.line; + line <= inserted.coordinates.end.line; ++line) { + nchars += buffer_line_length(buffer, line); } - nchars -= - inserted.begin.col + - (buffer_num_chars(buffer, inserted.end.line) - inserted.end.col); + nchars -= inserted.coordinates.begin.col + + (buffer_line_length(buffer, inserted.coordinates.end.line) - + inserted.coordinates.end.col); ctx->trigger_current_nchars += nchars; @@ -260,16 +263,16 @@ static void on_buffer_insert(struct buffer *buffer, struct region inserted, g_state.ctx = ctx; } - update_completions(buffer, ctx, inserted.end); + update_completions(buffer, ctx, inserted.coordinates.end); } static void update_completion_buffer(struct buffer *buffer, void *userdata) { buffer_add_text_property( g_target_buffer, (struct location){.line = g_state.current_completion, .col = 0}, - (struct location){ - .line = g_state.current_completion, - .col = buffer_num_chars(g_target_buffer, g_state.current_completion)}, + (struct location){.line = g_state.current_completion, + .col = buffer_line_length(g_target_buffer, + g_state.current_completion)}, (struct text_property){.type = TextProperty_Colors, .colors = (struct text_property_colors){ .set_bg = false, @@ -433,26 +436,18 @@ static uint32_t complete_path(struct completion_context ctx, void *userdata) { if (ctx.buffer == minibuffer_buffer()) { txt = minibuffer_content(); } else { - txt = buffer_line(ctx.buffer, ctx.location.line); - uint32_t end_idx = text_col_to_byteindex( - ctx.buffer->text, ctx.location.line, ctx.location.col); - - for (uint32_t bytei = end_idx; bytei > 0; --bytei) { - if (txt.text[bytei] == ' ') { - start_idx = bytei + 1; - break; - } - } - - if (start_idx >= end_idx) { + struct match_result start = + buffer_find_prev_in_line(ctx.buffer, ctx.location, is_space); + if (!start.found) { + start.at = (struct location){.line = ctx.location.line, .col = 0}; return 0; } - - txt.nbytes = end_idx - start_idx; + txt = buffer_region(ctx.buffer, region_new(start.at, ctx.location)); } - char *path = calloc(txt.nbytes + 1, sizeof(uint8_t)); - memcpy(path, txt.text + start_idx, txt.nbytes); + char *path = calloc(txt.nbytes + 1, sizeof(char)); + memcpy(path, txt.text, txt.nbytes); + path[txt.nbytes] = '\0'; if (txt.allocated) { free(txt.text); @@ -562,25 +557,18 @@ static uint32_t complete_buffers(struct completion_context ctx, if (ctx.buffer == minibuffer_buffer()) { txt = minibuffer_content(); } else { - txt = buffer_line(ctx.buffer, ctx.location.line); - uint32_t end_idx = text_col_to_byteindex( - ctx.buffer->text, ctx.location.line, ctx.location.col); - for (uint32_t bytei = end_idx; bytei > 0; --bytei) { - if (txt.text[bytei] == ' ') { - start_idx = bytei + 1; - break; - } - } - - if (start_idx >= end_idx) { + struct match_result start = + buffer_find_prev_in_line(ctx.buffer, ctx.location, is_space); + if (!start.found) { + start.at = (struct location){.line = ctx.location.line, .col = 0}; return 0; } - - txt.nbytes = end_idx - start_idx; + txt = buffer_region(ctx.buffer, region_new(start.at, ctx.location)); } - char *needle = calloc(txt.nbytes + 1, sizeof(uint8_t)); - memcpy(needle, txt.text + start_idx, txt.nbytes); + char *needle = calloc(txt.nbytes + 1, sizeof(char)); + memcpy(needle, txt.text, txt.nbytes); + needle[txt.nbytes] = '\0'; if (txt.allocated) { free(txt.text); @@ -619,31 +607,23 @@ static uint32_t complete_commands(struct completion_context ctx, if (commands == NULL) { return 0; } - struct text_chunk txt = {0}; uint32_t start_idx = 0; if (ctx.buffer == minibuffer_buffer()) { txt = minibuffer_content(); } else { - txt = buffer_line(ctx.buffer, ctx.location.line); - uint32_t end_idx = text_col_to_byteindex( - ctx.buffer->text, ctx.location.line, ctx.location.col); - for (uint32_t bytei = end_idx; bytei > 0; --bytei) { - if (txt.text[bytei] == ' ') { - start_idx = bytei + 1; - break; - } - } - - if (start_idx >= end_idx) { + struct match_result start = + buffer_find_prev_in_line(ctx.buffer, ctx.location, is_space); + if (!start.found) { + start.at = (struct location){.line = ctx.location.line, .col = 0}; return 0; } - - txt.nbytes = end_idx - start_idx; + txt = buffer_region(ctx.buffer, region_new(start.at, ctx.location)); } - char *needle = calloc(txt.nbytes + 1, sizeof(uint8_t)); - memcpy(needle, txt.text + start_idx, txt.nbytes); + char *needle = calloc(txt.nbytes + 1, sizeof(char)); + memcpy(needle, txt.text, txt.nbytes); + needle[txt.nbytes] = '\0'; if (txt.allocated) { free(txt.text);