diff --git a/dged.nix b/dged.nix index a8f1a8f..2c0d994 100644 --- a/dged.nix +++ b/dged.nix @@ -10,6 +10,8 @@ , valgrind , linkFarm , fetchFromGitHub +, glibcLocalesUtf8 +, strace }: stdenv.mkDerivation { name = "dged"; @@ -32,6 +34,10 @@ stdenv.mkDerivation { bmake docs ''; + # needed for tests to work in sandboxed builds + LANG = "en_US.UTF-8"; + LOCALE_ARCHIVE = "${glibcLocalesUtf8}/lib/locale/locale-archive"; + TREESITTER_GRAMMARS = with tree-sitter-grammars; linkFarm "tree-sitter-grammars" rec { "bash" = tree-sitter-bash; diff --git a/src/dged/buffer.c b/src/dged/buffer.c index 6051f69..1062a47 100644 --- a/src/dged/buffer.c +++ b/src/dged/buffer.c @@ -157,6 +157,42 @@ void buffer_static_teardown() { } } +static uint32_t get_tab_width(struct buffer *buffer) { + struct setting *tw = lang_setting(&buffer->lang, "tab-width"); + if (tw == NULL) { + tw = settings_get("editor.tab-width"); + } + + uint32_t tab_width = 4; + if (tw != NULL && tw->value.type == Setting_Number) { + tab_width = tw->value.number_value; + } + return tab_width; +} + +static bool use_tabs(struct buffer *buffer) { + struct setting *ut = lang_setting(&buffer->lang, "use-tabs"); + if (ut == NULL) { + ut = settings_get("editor.use-tabs"); + } + + bool use_tabs = false; + if (ut != NULL && ut->value.type == Setting_Bool) { + use_tabs = ut->value.bool_value; + } + + return use_tabs; +} + +static uint32_t visual_char_width(struct codepoint *codepoint, + uint32_t tab_width) { + if (codepoint->codepoint == '\t') { + return tab_width; + } else { + return unicode_visual_char_width(codepoint); + } +} + static struct buffer create_internal(const char *name, char *filename) { struct buffer b = (struct buffer){ .filename = filename, @@ -185,7 +221,7 @@ static struct buffer create_internal(const char *name, char *filename) { static void strip_final_newline(struct buffer *b) { uint32_t nlines = text_num_lines(b->text); - if (nlines > 0 && text_line_length(b->text, nlines - 1) == 0) { + if (nlines > 0 && buffer_line_length(b, nlines - 1) == 0) { text_delete(b->text, nlines - 1, 0, nlines - 1, 1); } } @@ -207,7 +243,7 @@ static void buffer_read_from_file(struct buffer *b) { int bytes = fread(buff, 1, 4096, file); if (bytes > 0) { uint32_t ignore; - text_append(b->text, buff, bytes, &ignore, &ignore); + text_append(b->text, buff, bytes, &ignore); } else if (bytes == 0) { break; // EOF } else { @@ -239,70 +275,66 @@ static void write_line(struct text_chunk *chunk, void *userdata) { fputc('\n', file); } -static bool is_word_break(uint8_t c) { +static bool is_word_break(const struct codepoint *codepoint) { + uint32_t c = codepoint->codepoint; return c == ' ' || c == '.' || c == '(' || c == ')' || c == '[' || c == ']' || - c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':'; + c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':' || + c == '"'; } -static bool is_word_char(uint8_t c) { return !is_word_break(c); } - -struct match_result { - struct location at; - bool found; -}; - -static struct match_result find_next_in_line(struct buffer *buffer, - struct location start, - bool (*predicate)(uint8_t c)) { - struct text_chunk line = text_get_line(buffer->text, start.line); - bool found = false; +static bool is_word_char(const struct codepoint *c) { + return !is_word_break(c); +} - if (line.nbytes == 0) { +static struct match_result +find_next_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)) { + if (text_line_size(buffer->text, start.line) == 0) { return (struct match_result){.at = start, .found = false}; } - uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col); - while (bytei < line.nbytes) { - if (predicate(line.text[bytei])) { + bool found = false; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, start.line); + uint32_t coli = 0, tab_width = get_tab_width(buffer); + struct codepoint *codepoint; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL) { + if (coli >= start.col && predicate(codepoint)) { found = true; break; } - ++bytei; + + coli += visual_char_width(codepoint, tab_width); } - uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei); return (struct match_result){ - .at = (struct location){.line = start.line, .col = target_col}, - .found = found}; + .at = (struct location){.line = start.line, .col = coli}, .found = found}; } -static struct match_result find_prev_in_line(struct buffer *buffer, - struct location start, - bool (*predicate)(uint8_t c)) { - struct text_chunk line = text_get_line(buffer->text, start.line); - bool found = false; +static struct match_result +find_prev_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)) { - if (line.nbytes == 0) { + if (text_line_size(buffer->text, start.line) == 0) { return (struct match_result){.at = start, .found = false}; } - uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col); - while (bytei > 0) { - if (predicate(line.text[bytei])) { + bool found = false; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, start.line); + uint32_t coli = 0, tab_width = get_tab_width(buffer), found_at; + struct codepoint *codepoint; + while (coli < start.col && (codepoint = utf8_next_codepoint(&iter)) != NULL) { + if (predicate(codepoint)) { found = true; - break; + found_at = coli; } - --bytei; - } - - // first byte on line can also be a match - if (predicate(line.text[bytei])) { - found = true; + coli += visual_char_width(codepoint, tab_width); } - uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei); return (struct match_result){ - .at = (struct location){.line = start.line, .col = target_col}, + .at = + (struct location){.line = start.line, .col = found ? found_at : coli}, .found = found}; } @@ -315,13 +347,52 @@ static struct text_chunk *copy_region(struct buffer *buffer, free(curr->text); } + struct location begin_bytes = + buffer_location_to_byte_coords(buffer, region.begin); + struct location end_bytes = + buffer_location_to_byte_coords(buffer, region.end); + struct text_chunk txt = - text_get_region(buffer->text, region.begin.line, region.begin.col, - region.end.line, region.end.col); + text_get_region(buffer->text, begin_bytes.line, begin_bytes.col, + end_bytes.line, end_bytes.col); *curr = txt; return curr; } +static struct location do_indent(struct buffer *buffer, struct location at, + uint32_t tab_width, bool use_tabs) { + if (use_tabs) { + return buffer_add(buffer, at, (uint8_t *)"\t", 1); + } else { + return buffer_add(buffer, at, (uint8_t *)" ", + tab_width > 16 ? 16 : tab_width); + } +} + +static uint64_t to_global_offset(struct buffer *buffer, + struct location bytecoords) { + uint32_t line = bytecoords.line; + uint32_t col = bytecoords.col; + uint32_t byteoff = 0; + uint32_t nlines = buffer_num_lines(buffer); + + if (nlines == 0) { + return 0; + } + + for (uint32_t l = 0; l < line && l < nlines; ++l) { + // +1 for newline + byteoff += text_line_size(buffer->text, l) + 1; + } + + // handle last line + uint32_t l = line < nlines ? line : nlines - 1; + uint32_t nbytes = text_line_size(buffer->text, l); + byteoff += col <= nbytes ? col : nbytes + 1; + + return byteoff; +} + /* --------------------- buffer methods -------------------- */ struct buffer buffer_create(const char *name) { @@ -452,18 +523,29 @@ struct location buffer_add(struct buffer *buffer, struct location at, struct location initial = at; struct location final = at; - uint32_t lines_added, cols_added; - text_insert_at(buffer->text, initial.line, initial.col, text, nbytes, - &lines_added, &cols_added); + struct location at_bytes = buffer_location_to_byte_coords(buffer, at); + + uint32_t lines_added; + text_insert_at(buffer->text, at_bytes.line, at_bytes.col, text, nbytes, + &lines_added); // move to after inserted text if (lines_added > 0) { final = buffer_clamp(buffer, (int64_t)at.line + lines_added, 0); } else { + uint32_t cols_added = 0, tab_width = get_tab_width(buffer); + struct utf8_codepoint_iterator iter = + create_utf8_codepoint_iterator(text, nbytes, 0); + struct codepoint *codepoint; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL) { + cols_added += visual_char_width(codepoint, tab_width); + } final = buffer_clamp(buffer, (int64_t)at.line, (int64_t)at.col + cols_added); } + struct location final_bytes = buffer_location_to_byte_coords(buffer, final); + undo_push_add( &buffer->undo, (struct undo_add){.begin = {.row = initial.line, .col = initial.col}, @@ -474,11 +556,17 @@ struct location buffer_add(struct buffer *buffer, struct location at, (struct undo_boundary){.save_point = false}); } - uint32_t begin_idx = text_global_idx(buffer->text, initial.line, initial.col); - uint32_t end_idx = text_global_idx(buffer->text, final.line, final.col); + uint32_t begin_idx = to_global_offset(buffer, at_bytes); + uint32_t end_idx = to_global_offset(buffer, final_bytes); VEC_FOR_EACH(&buffer->hooks->insert_hooks, struct insert_hook * h) { - h->callback(buffer, region_new(initial, final), begin_idx, end_idx, + h->callback(buffer, + (struct edit_location){ + .coordinates = region_new(initial, final), + .bytes = region_new(at_bytes, final_bytes), + .global_byte_begin = begin_idx, + .global_byte_end = end_idx, + }, h->userdata); } @@ -488,15 +576,16 @@ struct location buffer_add(struct buffer *buffer, struct location at, struct location buffer_set_text(struct buffer *buffer, uint8_t *text, uint32_t nbytes) { - uint32_t lines, cols; + uint32_t lines_added; text_clear(buffer->text); - text_append(buffer->text, text, nbytes, &lines, &cols); + text_append(buffer->text, text, nbytes, &lines_added); // if last line is empty, remove it strip_final_newline(buffer); - return buffer_clamp(buffer, lines, cols); + return buffer_clamp(buffer, lines_added, + buffer_line_length(buffer, lines_added)); } void buffer_clear(struct buffer *buffer) { text_clear(buffer->text); } @@ -524,9 +613,18 @@ struct location buffer_previous_char(struct buffer *buffer, } --dot.line; - dot.col = buffer_num_chars(buffer, dot.line); + dot.col = buffer_line_length(buffer, dot.line); } else { - --dot.col; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, dot.line); + struct codepoint *codepoint; + uint32_t coli = 0, tab_width = get_tab_width(buffer), last_width = 0; + while (coli < dot.col && (codepoint = utf8_next_codepoint(&iter)) != NULL) { + last_width = visual_char_width(codepoint, tab_width); + coli += last_width; + } + + dot.col = coli - last_width; } return dot; @@ -571,14 +669,14 @@ struct location buffer_previous_line(struct buffer *buffer, } --dot.line; - uint32_t nchars = buffer_num_chars(buffer, dot.line); + uint32_t nchars = buffer_line_length(buffer, dot.line); uint32_t new_col = dot.col > nchars ? nchars : dot.col; return dot; } struct location buffer_next_char(struct buffer *buffer, struct location dot) { - if (dot.col == buffer_num_chars(buffer, dot.line)) { + if (dot.col == buffer_line_length(buffer, dot.line)) { uint32_t lastline = buffer->lazy_row_add ? buffer_num_lines(buffer) : buffer_num_lines(buffer) - 1; if (dot.line == lastline) { @@ -588,7 +686,16 @@ struct location buffer_next_char(struct buffer *buffer, struct location dot) { dot.col = 0; ++dot.line; } else { - ++dot.col; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, dot.line); + struct codepoint *codepoint; + uint32_t coli = 0; + while (coli <= dot.col && + (codepoint = utf8_next_codepoint(&iter)) != NULL) { + coli += visual_char_width(codepoint, get_tab_width(buffer)); + } + + dot.col = coli; } return dot; @@ -635,7 +742,7 @@ struct location buffer_next_line(struct buffer *buffer, struct location dot) { ++dot.line; uint32_t new_col = dot.col; - uint32_t nchars = buffer_num_chars(buffer, dot.line); + uint32_t nchars = buffer_line_length(buffer, dot.line); new_col = new_col > nchars ? nchars : new_col; return dot; @@ -664,8 +771,8 @@ struct location buffer_clamp(struct buffer *buffer, int64_t line, int64_t col) { // clamp col if (col < 0) { col = 0; - } else if (col > buffer_num_chars(buffer, line)) { - col = buffer_num_chars(buffer, line); + } else if (col > buffer_line_length(buffer, line)) { + col = buffer_line_length(buffer, line); } location.col = col; @@ -681,7 +788,7 @@ struct location buffer_end(struct buffer *buffer) { return (struct location){.line = nlines, .col = 0}; } else { return (struct location){.line = nlines - 1, - .col = buffer_num_chars(buffer, nlines - 1)}; + .col = buffer_line_length(buffer, nlines - 1)}; } } @@ -689,55 +796,22 @@ uint32_t buffer_num_lines(struct buffer *buffer) { return text_num_lines(buffer->text); } -uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line) { - if (line >= buffer_num_lines(buffer)) { - return 0; +uint32_t buffer_line_length(struct buffer *buffer, uint32_t line) { + uint32_t tab_size = get_tab_width(buffer), len = 0; + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, line); + struct codepoint *codepoint; + while ((codepoint = utf8_next_codepoint(&iter)) != NULL) { + len += visual_char_width(codepoint, tab_size); } - return text_line_length(buffer->text, line); + return len; } struct location buffer_newline(struct buffer *buffer, struct location at) { return buffer_add(buffer, at, (uint8_t *)"\n", 1); } -static uint32_t get_tab_width(struct buffer *buffer) { - struct setting *tw = lang_setting(&buffer->lang, "tab-width"); - if (tw == NULL) { - tw = settings_get("editor.tab-width"); - } - - uint32_t tab_width = 4; - if (tw != NULL && tw->value.type == Setting_Number) { - tab_width = tw->value.number_value; - } - return tab_width; -} - -static bool use_tabs(struct buffer *buffer) { - struct setting *ut = lang_setting(&buffer->lang, "use-tabs"); - if (ut == NULL) { - ut = settings_get("editor.use-tabs"); - } - - bool use_tabs = false; - if (ut != NULL && ut->value.type == Setting_Bool) { - use_tabs = ut->value.bool_value; - } - - return use_tabs; -} - -static struct location do_indent(struct buffer *buffer, struct location at, - uint32_t tab_width, bool use_tabs) { - if (use_tabs) { - return buffer_add(buffer, at, (uint8_t *)"\t", 1); - } else { - return buffer_add(buffer, at, (uint8_t *)" ", - tab_width > 16 ? 16 : tab_width); - } -} - struct location buffer_indent(struct buffer *buffer, struct location at) { return do_indent(buffer, at, get_tab_width(buffer), use_tabs(buffer)); } @@ -778,16 +852,13 @@ struct location buffer_undo(struct buffer *buffer, struct location dot) { case Undo_Add: { struct undo_add *add = &rec->add; - pos = - buffer_delete(buffer, (struct region){.begin = - (struct location){ - .line = add->begin.row, - .col = add->begin.col, - }, - .end = (struct location){ - .line = add->end.row, - .col = add->end.col, - }}); + pos = buffer_delete(buffer, + (struct region){ + .begin = (struct location){.line = add->begin.row, + .col = add->begin.col}, + .end = (struct location){.line = add->end.row, + .col = add->end.col}, + }); break; } @@ -888,9 +959,14 @@ struct location buffer_delete(struct buffer *buffer, struct region region) { return region.begin; } + struct location begin_bytes = + buffer_location_to_byte_coords(buffer, region.begin); + struct location end_bytes = + buffer_location_to_byte_coords(buffer, region.end); + struct text_chunk txt = - text_get_region(buffer->text, region.begin.line, region.begin.col, - region.end.line, region.end.col); + text_get_region(buffer->text, begin_bytes.line, begin_bytes.col, + end_bytes.line, end_bytes.col); undo_push_boundary(&buffer->undo, (struct undo_boundary){.save_point = false}); @@ -903,17 +979,22 @@ struct location buffer_delete(struct buffer *buffer, struct region region) { undo_push_boundary(&buffer->undo, (struct undo_boundary){.save_point = false}); - uint32_t begin_idx = - text_global_idx(buffer->text, region.begin.line, region.begin.col); - uint32_t end_idx = - text_global_idx(buffer->text, region.end.line, region.end.col); + uint64_t begin_idx = to_global_offset(buffer, begin_bytes); + uint64_t end_idx = to_global_offset(buffer, end_bytes); - text_delete(buffer->text, region.begin.line, region.begin.col, - region.end.line, region.end.col); + text_delete(buffer->text, begin_bytes.line, begin_bytes.col, end_bytes.line, + end_bytes.col); buffer->modified = true; VEC_FOR_EACH(&buffer->hooks->delete_hooks, struct delete_hook * h) { - h->callback(buffer, region, begin_idx, end_idx, h->userdata); + h->callback(buffer, + (struct edit_location){ + .coordinates = region, + .bytes = region_new(begin_bytes, end_bytes), + .global_byte_begin = begin_idx, + .global_byte_end = end_idx, + }, + h->userdata); } return region.begin; @@ -1035,27 +1116,6 @@ struct cmdbuf { struct buffer *buffer; }; -static uint32_t visual_char_width(uint8_t *byte, uint32_t maxlen) { - if (*byte == '\t') { - return 4; - } else { - return utf8_visual_char_width(byte, maxlen); - } -} - -uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col, - uint32_t end_col) { - uint32_t start_byte = utf8_nbytes(txt, len, start_col); - uint32_t end_byte = utf8_nbytes(txt, len, end_col); - - uint32_t width = 0; - for (uint32_t bytei = start_byte; bytei < end_byte; ++bytei) { - width += visual_char_width(&txt[bytei], len - bytei); - } - - return width; -} - static void apply_properties(struct command_list *cmds, struct text_property *properties[], uint32_t nproperties) { @@ -1097,65 +1157,67 @@ void render_line(struct text_chunk *line, void *userdata) { command_list_set_show_whitespace(cmdbuf->cmds, cmdbuf->show_ws); // calculate scroll offsets - uint32_t scroll_bytes = - utf8_nbytes(line->text, line->nbytes, cmdbuf->origin.col); - uint32_t text_nbytes_scroll = - scroll_bytes > line->nbytes ? 0 : line->nbytes - scroll_bytes; - uint8_t *text = line->text + scroll_bytes; - - uint32_t visual_col_start = 0; - uint32_t cur_visual_col = 0; - uint32_t start_byte = 0, text_nbytes = 0; struct text_property *properties[32] = {0}; uint64_t prev_properties_hash = 0; - for (uint32_t cur_byte = start_byte, coli = 0; - cur_byte < text_nbytes_scroll && cur_visual_col < cmdbuf->width && - coli < line->nchars - cmdbuf->origin.col; - ++coli) { + uint32_t tab_width = get_tab_width(cmdbuf->buffer); + + // handle scroll column offset + uint32_t coli = 0, bytei = 0; + struct utf8_codepoint_iterator iter = text_chunk_codepoint_iterator(line); + struct codepoint *codepoint; + while (coli < cmdbuf->origin.col && + (codepoint = utf8_next_codepoint(&iter)) != NULL) { + coli += visual_char_width(codepoint, tab_width); + bytei += codepoint->nbytes; + } - uint32_t bytes_remaining = text_nbytes_scroll - cur_byte; - uint32_t char_nbytes = utf8_nbytes(text + cur_byte, bytes_remaining, 1); - uint32_t char_vwidth = visual_char_width(text + cur_byte, bytes_remaining); + // coli is the visual column [0..width-1] + coli = 0; + uint32_t drawn_bytei = bytei; + uint32_t drawn_coli = coli; + while (coli < cmdbuf->width && + (codepoint = utf8_next_codepoint(&iter)) != NULL) { // calculate character properties uint32_t nproperties = 0; - text_get_properties( - cmdbuf->buffer->text, - (struct location){.line = line->line, .col = coli + cmdbuf->origin.col}, - properties, 32, &nproperties); + text_get_properties(cmdbuf->buffer->text, line->line, bytei, properties, 32, + &nproperties); // if we have any new or lost props, flush text up until now, reset // and re-apply current properties uint64_t new_properties_hash = properties_hash(properties, nproperties); if (new_properties_hash != prev_properties_hash) { - command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line, - text + start_byte, cur_byte - start_byte); + command_list_draw_text(cmdbuf->cmds, drawn_coli, visual_line, + line->text + drawn_bytei, bytei - drawn_bytei); command_list_reset_color(cmdbuf->cmds); - visual_col_start = cur_visual_col; - start_byte = cur_byte; + drawn_coli = coli; + drawn_bytei = bytei; // apply new properties apply_properties(cmdbuf->cmds, properties, nproperties); } prev_properties_hash = new_properties_hash; - cur_byte += char_nbytes; - text_nbytes += char_nbytes; - cur_visual_col += char_vwidth; + bytei += codepoint->nbytes; + coli += visual_char_width(codepoint, tab_width); } // flush remaining - command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line, - text + start_byte, text_nbytes - start_byte); + command_list_draw_text(cmdbuf->cmds, drawn_coli, visual_line, + line->text + drawn_bytei, bytei - drawn_bytei); + + drawn_coli = coli; + drawn_bytei = bytei; command_list_reset_color(cmdbuf->cmds); command_list_set_show_whitespace(cmdbuf->cmds, false); - if (cur_visual_col < cmdbuf->width) { - command_list_draw_repeated(cmdbuf->cmds, cur_visual_col, visual_line, ' ', - cmdbuf->width - cur_visual_col); + // TODO: considering the whole screen is cleared, is this really needed? + if (drawn_coli < cmdbuf->width) { + command_list_draw_repeated(cmdbuf->cmds, drawn_coli, visual_line, ' ', + cmdbuf->width - drawn_coli); } } @@ -1200,19 +1262,19 @@ void buffer_render(struct buffer *buffer, struct buffer_render_params *params) { void buffer_add_text_property(struct buffer *buffer, struct location start, struct location end, struct text_property property) { - text_add_property( - buffer->text, (struct location){.line = start.line, .col = start.col}, - (struct location){.line = end.line, .col = end.col}, property); + struct location bytestart = buffer_location_to_byte_coords(buffer, start); + struct location byteend = buffer_location_to_byte_coords(buffer, end); + text_add_property(buffer->text, bytestart.line, bytestart.col, byteend.line, + byteend.col, property); } void buffer_get_text_properties(struct buffer *buffer, struct location location, struct text_property **properties, uint32_t max_nproperties, uint32_t *nproperties) { - text_get_properties( - buffer->text, - (struct location){.line = location.line, .col = location.col}, properties, - max_nproperties, nproperties); + struct location bytecoords = buffer_location_to_byte_coords(buffer, location); + text_get_properties(buffer->text, bytecoords.line, bytecoords.col, properties, + max_nproperties, nproperties); } void buffer_clear_text_properties(struct buffer *buffer) { @@ -1244,9 +1306,12 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line, (struct location){.line = end + 1, .col = 0}); struct s8 *lines = (struct s8 *)malloc(sizeof(struct s8) * ntosort); - struct text_chunk txt = - text_get_region(buffer->text, region.begin.line, region.begin.col, - region.end.line, region.end.col); + + struct location bytebeg = + buffer_location_to_byte_coords(buffer, region.begin); + struct location byteend = buffer_location_to_byte_coords(buffer, region.end); + struct text_chunk txt = text_get_region( + buffer->text, bytebeg.line, bytebeg.col, byteend.line, byteend.col); uint32_t line_start = 0; uint32_t curr_line = 0; @@ -1278,3 +1343,41 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line, free(txt.text); } } + +struct location buffer_location_to_byte_coords(struct buffer *buffer, + struct location coords) { + struct utf8_codepoint_iterator iter = + text_line_codepoint_iterator(buffer->text, coords.line); + uint32_t byteoffset = 0, col = 0, tab_width = get_tab_width(buffer); + struct codepoint *codepoint; + + /* Let this walk up to (and including the target column) to + * make sure we account for zero-width characters when calculating the + * byte offset. + */ + while (col <= coords.col && + (codepoint = utf8_next_codepoint(&iter)) != NULL) { + byteoffset += codepoint->nbytes; + col += visual_char_width(codepoint, tab_width); + } + + /* Remove the byte-width of the last char again since it gives us the + * position right before it while still taking zero-width codepoints + * into account. + */ + return (struct location){.line = coords.line, + .col = byteoffset - + (codepoint != NULL ? codepoint->nbytes : 0)}; +} + +struct match_result +buffer_find_prev_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)) { + return find_prev_in_line(buffer, start, predicate); +} + +struct match_result +buffer_find_next_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)) { + return find_next_in_line(buffer, start, predicate); +} diff --git a/src/dged/buffer.h b/src/dged/buffer.h index cd5bd95..c9fe2ca 100644 --- a/src/dged/buffer.h +++ b/src/dged/buffer.h @@ -295,13 +295,13 @@ struct location buffer_end(struct buffer *buffer); uint32_t buffer_num_lines(struct buffer *buffer); /** - * Get the number of chars in a given line in buffer. + * Get the line length in number of column positions. * * @param [in] buffer The buffer to use. - * @param [in] line The line to get number of chars for. - * @returns The number of chars in @ref line. + * @param [in] line The line to get number of columns for. + * @returns The number of column positions in the current line. */ -uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line); +uint32_t buffer_line_length(struct buffer *buffer, uint32_t line); /** * Insert a newline in the buffer. @@ -555,6 +555,13 @@ uint32_t buffer_add_reload_hook(struct buffer *buffer, reload_hook_cb callback, void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id, remove_hook_cb callback); +struct edit_location { + struct region coordinates; + struct region bytes; + uint64_t global_byte_begin; + uint64_t global_byte_end; +}; + /** * Buffer insert hook callback function. * @@ -565,9 +572,8 @@ void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id, * @param end_idx The global byte offset to the end of where text was inserted. * @param userdata The userdata as sent in to @ref buffer_add_insert_hook. */ -typedef void (*insert_hook_cb)(struct buffer *buffer, struct region inserted, - uint32_t begin_idx, uint32_t end_idx, - void *userdata); +typedef void (*insert_hook_cb)(struct buffer *buffer, + struct edit_location inserted, void *userdata); /** * Add an insert hook, called when text is inserted into the @p buffer. @@ -600,9 +606,8 @@ void buffer_remove_insert_hook(struct buffer *buffer, uint32_t hook_id, * @param end_idx The global byte offset to the end of the removed text. * @param userdata The userdata as sent in to @ref buffer_add_delete_hook. */ -typedef void (*delete_hook_cb)(struct buffer *buffer, struct region removed, - uint32_t begin_idx, uint32_t end_idx, - void *userdata); +typedef void (*delete_hook_cb)(struct buffer *buffer, + struct edit_location removed, void *userdata); /** * Add a delete hook, called when text is removed from the @p buffer. @@ -724,10 +729,6 @@ void buffer_update(struct buffer *buffer, struct buffer_update_params *params); */ void buffer_render(struct buffer *buffer, struct buffer_render_params *params); -// TODO: move this to where it makes sense -uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col, - uint32_t end_col); - /** * Sort lines in a buffer alphabetically. * @@ -738,4 +739,19 @@ uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col, void buffer_sort_lines(struct buffer *buffer, uint32_t start_line, uint32_t end_line); +struct location buffer_location_to_byte_coords(struct buffer *buffer, + struct location coords); + +struct match_result { + struct location at; + bool found; +}; + +struct match_result +buffer_find_prev_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)); +struct match_result +buffer_find_next_in_line(struct buffer *buffer, struct location start, + bool (*predicate)(const struct codepoint *c)); + #endif diff --git a/src/dged/buffer_view.c b/src/dged/buffer_view.c index 4e67d78..f3dd2b9 100644 --- a/src/dged/buffer_view.c +++ b/src/dged/buffer_view.c @@ -128,7 +128,7 @@ void buffer_view_backward_nlines(struct buffer_view *view, uint32_t nlines) { } void buffer_view_goto_end_of_line(struct buffer_view *view) { - view->dot.col = buffer_num_chars(view->buffer, view->dot.line); + view->dot.col = buffer_line_length(view->buffer, view->dot.line); } void buffer_view_goto_beginning_of_line(struct buffer_view *view) { @@ -224,15 +224,22 @@ void buffer_view_delete_word(struct buffer_view *view) { } void buffer_view_kill_line(struct buffer_view *view) { - uint32_t nchars = - buffer_num_chars(view->buffer, view->dot.line) - view->dot.col; - if (nchars == 0) { - nchars = 1; + uint32_t ncols = + buffer_line_length(view->buffer, view->dot.line) - view->dot.col; + + uint32_t line = view->dot.line; + uint32_t col = view->dot.col + ncols; + + // kill the newline if we are at the end of the line + if (ncols == 0) { + struct location loc = buffer_next_char(view->buffer, view->dot); + line = loc.line; + col = loc.col; } struct region reg = region_new(view->dot, (struct location){ - .line = view->dot.line, - .col = view->dot.col + nchars, + .line = line, + .col = col, }); buffer_cut(view->buffer, reg); @@ -241,7 +248,8 @@ void buffer_view_kill_line(struct buffer_view *view) { void buffer_view_sort_lines(struct buffer_view *view) { struct region reg = region_new(view->dot, view->mark); if (view->mark_set && region_has_size(reg)) { - if (reg.end.line > 0 && buffer_num_chars(view->buffer, reg.end.line) == 0) { + if (reg.end.line > 0 && + buffer_line_length(view->buffer, reg.end.line) == 0) { reg.end.line -= 1; } @@ -271,21 +279,7 @@ struct location buffer_view_dot_to_relative(struct buffer_view *view) { } struct location buffer_view_dot_to_visual(struct buffer_view *view) { - // calculate visual column index for dot column - struct text_chunk c = buffer_line(view->buffer, view->dot.line); - uint32_t width = visual_string_width(c.text, c.nbytes, 0, view->dot.col); - if (view->scroll.col > 0) { - width -= visual_string_width(c.text, c.nbytes, 0, view->scroll.col); - } - - struct location l = buffer_view_dot_to_relative(view); - l.col = width + view->fringe_width; - - if (c.allocated) { - free(c.text); - } - - return l; + return buffer_view_dot_to_relative(view); } void buffer_view_undo(struct buffer_view *view) { diff --git a/src/dged/display.c b/src/dged/display.c index bc604f0..ea3f459 100644 --- a/src/dged/display.c +++ b/src/dged/display.c @@ -60,7 +60,7 @@ struct push_fmt_cmd { struct repeat_cmd { uint32_t col; uint32_t row; - int32_t c; + uint32_t c; uint32_t nrepeat; }; @@ -135,21 +135,7 @@ void display_destroy(struct display *display) { uint32_t display_width(struct display *display) { return display->width; } uint32_t display_height(struct display *display) { return display->height; } -void putch(uint8_t c) { - // TODO: move this to buffer rendering - if (c < ' ') { - fprintf(stdout, "^%c", c + 0x40); - } else if (c == 0x7f) { - fprintf(stdout, "^?"); - } else if (utf8_byte_is_unicode_start(c) || - utf8_byte_is_unicode_continuation(c)) { - putc(c, stdout); - } else if (c >= ' ' && c < 0x7f) { - putc(c, stdout); - } else { - fprintf(stdout, "|0x%02x|", c); - } -} +void putch(uint8_t c) { putc(c, stdout); } static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) { if (fmt_stack == NULL || fmt_stack_len == 0) { @@ -164,6 +150,7 @@ static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) { void putch_ws(uint8_t c, bool show_whitespace, uint8_t *fmt_stack, uint32_t fmt_stack_len) { + // TODO: tab width needs to be sent here if (show_whitespace && c == '\t') { fputs("\x1b[90m โ†’ \x1b[39m", stdout); apply_fmt(fmt_stack, fmt_stack_len); @@ -295,7 +282,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col, } void command_list_draw_repeated(struct command_list *list, uint32_t col, - uint32_t row, int32_t c, uint32_t nrepeat) { + uint32_t row, uint32_t c, uint32_t nrepeat) { struct repeat_cmd *cmd = add_command(list, RenderCommand_Repeat)->repeat; cmd->col = col; cmd->row = row; @@ -401,10 +388,14 @@ void display_render(struct display *display, display_move_cursor(display, repeat_cmd->row + cl->yoffset, repeat_cmd->col + cl->xoffset); apply_fmt(fmt_stack, fmt_stack_len); - uint32_t nbytes = utf8_nbytes((uint8_t *)&repeat_cmd->c, 4, 1); - for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) { - putbytes((uint8_t *)&repeat_cmd->c, nbytes, show_whitespace_state, - fmt_stack, fmt_stack_len); + struct utf8_codepoint_iterator iter = + create_utf8_codepoint_iterator((uint8_t *)&repeat_cmd->c, 4, 0); + struct codepoint *codepoint = utf8_next_codepoint(&iter); + if (codepoint != NULL) { + for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) { + putbytes((uint8_t *)&repeat_cmd->c, codepoint->nbytes, + show_whitespace_state, fmt_stack, fmt_stack_len); + } } break; } diff --git a/src/dged/display.h b/src/dged/display.h index 0fda30d..f9c7ef8 100644 --- a/src/dged/display.h +++ b/src/dged/display.h @@ -238,7 +238,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col, * @param nrepeat Number of times to repeat byte. */ void command_list_draw_repeated(struct command_list *list, uint32_t col, - uint32_t row, int32_t c, uint32_t nrepeat); + uint32_t row, uint32_t c, uint32_t nrepeat); void command_list_draw_command_list(struct command_list *list, struct command_list *to_draw); diff --git a/src/dged/keyboard.c b/src/dged/keyboard.c index 26eb308..04565e0 100644 --- a/src/dged/keyboard.c +++ b/src/dged/keyboard.c @@ -78,20 +78,24 @@ void parse_keys(uint8_t *bytes, uint32_t nbytes, struct key *out_keys, } else if (utf8_byte_is_unicode_continuation(b)) { // do nothing for these } else { // ascii char or unicode start byte (self-inserting) - uint32_t nb = utf8_byte_is_unicode_start(b) - ? utf8_nbytes(bytes + bytei, nbytes - bytei, 1) - : 1; - - // "compress" number of keys if previous key was also a - // "simple" key - if (prev_kp != NULL && prev_kp->mod == None) { - prev_kp->end += nb; - } else { - kp->mod = None; - kp->key = b; - kp->start = bytei; - kp->end = bytei + nb; - ++nkps; + // TODO: do this better + struct utf8_codepoint_iterator iter = + create_utf8_codepoint_iterator(bytes + bytei, nbytes - bytei, 0); + struct codepoint *codepoint = utf8_next_codepoint(&iter); + if (codepoint != NULL) { + uint32_t nb = codepoint->nbytes; + + // "compress" number of keys if previous key was also a + // "simple" key + if (prev_kp != NULL && prev_kp->mod == None) { + prev_kp->end += nb; + } else { + kp->mod = None; + kp->key = b; + kp->start = bytei; + kp->end = bytei + nb; + ++nkps; + } } } } diff --git a/src/dged/syntax.c b/src/dged/syntax.c index 8d0fd1a..569dc70 100644 --- a/src/dged/syntax.c +++ b/src/dged/syntax.c @@ -342,7 +342,8 @@ static void update_parser(struct buffer *buffer, void *userdata, : origin.line + height; ts_query_cursor_set_point_range( cursor, (TSPoint){.row = origin.line, .column = origin.col}, - (TSPoint){.row = end_line, .column = buffer_num_chars(buffer, end_line)}); + (TSPoint){.row = end_line, + .column = buffer_line_length(buffer, end_line)}); ts_query_cursor_exec(cursor, h->query, ts_tree_root_node(h->tree)); TSQueryMatch match; @@ -406,47 +407,39 @@ static void update_parser(struct buffer *buffer, void *userdata, continue; } - buffer_add_text_property( - buffer, - (struct location){.line = start.row, - .col = text_byteindex_to_col( - buffer->text, start.row, start.column)}, - (struct location){.line = end.row, - .col = text_byteindex_to_col(buffer->text, end.row, - end.column - 1)}, - (struct text_property){ - .type = TextProperty_Colors, - .colors = - (struct text_property_colors){ - .set_fg = true, - .fg = color, - }, - }); + text_add_property(buffer->text, start.row, start.column, end.row, + end.column > 0 ? end.column - 1 : 0, + (struct text_property){ + .type = TextProperty_Colors, + .colors = + (struct text_property_colors){ + .set_fg = true, + .fg = color, + }, + }); } } ts_query_cursor_delete(cursor); } -static void text_removed(struct buffer *buffer, struct region removed, - uint32_t begin_idx, uint32_t end_idx, void *userdata) { +static void text_removed(struct buffer *buffer, struct edit_location removed, + void *userdata) { struct highlight *h = (struct highlight *)userdata; - TSPoint begin = {.row = removed.begin.line, - .column = text_col_to_byteindex( - buffer->text, removed.begin.line, removed.begin.col)}; + TSPoint begin = {.row = removed.bytes.begin.line, + .column = removed.bytes.begin.col}; TSPoint new_end = begin; - TSPoint old_end = {.row = removed.end.line, - .column = text_col_to_byteindex( - buffer->text, removed.end.line, removed.end.col)}; + TSPoint old_end = {.row = removed.bytes.end.line, + .column = removed.bytes.end.col}; TSInputEdit edit = { .start_point = begin, .old_end_point = old_end, .new_end_point = new_end, - .start_byte = begin_idx, - .old_end_byte = end_idx, - .new_end_byte = begin_idx, + .start_byte = removed.global_byte_begin, + .old_end_byte = removed.global_byte_end, + .new_end_byte = removed.global_byte_begin, }; ts_tree_edit(h->tree, &edit); @@ -479,27 +472,24 @@ static void buffer_reloaded(struct buffer *buffer, void *userdata) { } } -static void text_inserted(struct buffer *buffer, struct region inserted, - uint32_t begin_idx, uint32_t end_idx, +static void text_inserted(struct buffer *buffer, struct edit_location inserted, void *userdata) { struct timer *text_inserted = timer_start("syntax.txt-inserted"); struct highlight *h = (struct highlight *)userdata; - TSPoint begin = {.row = inserted.begin.line, - .column = text_col_to_byteindex( - buffer->text, inserted.begin.line, inserted.begin.col)}; + TSPoint begin = {.row = inserted.bytes.begin.line, + .column = inserted.bytes.begin.col}; TSPoint old_end = begin; - TSPoint new_end = {.row = inserted.end.line, - .column = text_col_to_byteindex( - buffer->text, inserted.end.line, inserted.end.col)}; + TSPoint new_end = {.row = inserted.bytes.end.line, + .column = inserted.bytes.end.col}; TSInputEdit edit = { .start_point = begin, .old_end_point = old_end, .new_end_point = new_end, - .start_byte = begin_idx, - .old_end_byte = begin_idx, - .new_end_byte = end_idx, + .start_byte = inserted.global_byte_begin, + .old_end_byte = inserted.global_byte_begin, + .new_end_byte = inserted.global_byte_end, }; ts_tree_edit(h->tree, &edit); diff --git a/src/dged/text.c b/src/dged/text.c index 3d1078f..18ab04f 100644 --- a/src/dged/text.c +++ b/src/dged/text.c @@ -18,7 +18,6 @@ struct line { uint8_t *data; uint8_t flags; uint32_t nbytes; - uint32_t nchars; }; struct text_property_entry { @@ -54,11 +53,9 @@ void text_destroy(struct text *text) { text->lines[li].data = NULL; text->lines[li].flags = 0; text->lines[li].nbytes = 0; - text->lines[li].nchars = 0; } free(text->lines); - free(text); } @@ -68,68 +65,25 @@ void text_clear(struct text *text) { text->lines[li].data = NULL; text->lines[li].flags = 0; text->lines[li].nbytes = 0; - text->lines[li].nchars = 0; } text->nlines = 0; text_clear_properties(text); } -// given `char_idx` as a character index, return the byte index -uint32_t charidx_to_byteidx(struct line *line, uint32_t char_idx) { - if (line->nchars == 0) { - return 0; - } - - if (char_idx > line->nchars) { - return line->nbytes - 1; - } - - return utf8_nbytes(line->data, line->nbytes, char_idx); -} - -uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col) { - return charidx_to_byteidx(&text->lines[line], col); -} - -// given `byte_idx` as a byte index, return the character index -uint32_t byteidx_to_charidx(struct line *line, uint32_t byte_idx) { - if (byte_idx > line->nbytes) { - return line->nchars; +struct utf8_codepoint_iterator +text_line_codepoint_iterator(const struct text *text, uint32_t lineidx) { + if (lineidx >= text_num_lines(text)) { + return create_utf8_codepoint_iterator(NULL, 0, 0); } - return utf8_nchars(line->data, byte_idx); + return create_utf8_codepoint_iterator(text->lines[lineidx].data, + text->lines[lineidx].nbytes, 0); } -uint32_t text_byteindex_to_col(struct text *text, uint32_t line, - uint32_t byteindex) { - return byteidx_to_charidx(&text->lines[line], byteindex); -} - -uint32_t text_global_idx(struct text *text, uint32_t line, uint32_t col) { - uint32_t byteoff = 0; - uint32_t nlines = text_num_lines(text); - - if (nlines == 0) { - return 0; - } - - for (uint32_t l = 0; l < line && l < nlines; ++l) { - // +1 for newline - byteoff += text_line_size(text, l) + 1; - } - - uint32_t l = line < nlines ? line : nlines - 1; - uint32_t nchars = text_line_length(text, l); - uint32_t c = col < nchars ? col : nchars; - byteoff += text_col_to_byteindex(text, l, c); - - if (col > nchars) { - // account for newline - ++byteoff; - } - - return byteoff; +struct utf8_codepoint_iterator +text_chunk_codepoint_iterator(const struct text_chunk *chunk) { + return create_utf8_codepoint_iterator(chunk->text, chunk->nbytes, 0); } void append_empty_lines(struct text *text, uint32_t numlines) { @@ -145,17 +99,10 @@ void append_empty_lines(struct text *text, uint32_t numlines) { struct line *nline = &text->lines[text->nlines]; nline->data = NULL; nline->nbytes = 0; - nline->nchars = 0; nline->flags = 0; ++text->nlines; } - - if (text->nlines > text->capacity) { - printf("text->nlines: %d, text->capacity: %d\n", text->nlines, - text->capacity); - raise(SIGTRAP); - } } void ensure_line(struct text *text, uint32_t line) { @@ -166,8 +113,8 @@ void ensure_line(struct text *text, uint32_t line) { // It is assumed that `data` does not contain any \n, that is handled by // higher-level functions -void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data, - uint32_t len, uint32_t nchars) { +static void insert_at(struct text *text, uint32_t line, uint32_t offset, + uint8_t *data, uint32_t len) { if (len == 0) { return; @@ -178,11 +125,10 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data, struct line *l = &text->lines[line]; l->nbytes += len; - l->nchars += nchars; l->flags = LineChanged; l->data = realloc(l->data, l->nbytes); - uint32_t bytei = charidx_to_byteidx(l, col); + uint32_t bytei = offset; // move following bytes out of the way if (bytei + len < l->nbytes) { @@ -194,15 +140,7 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data, memcpy(l->data + bytei, data, len); } -uint32_t text_line_length(struct text *text, uint32_t lineidx) { - if (lineidx >= text_num_lines(text)) { - return 0; - } - - return text->lines[lineidx].nchars; -} - -uint32_t text_line_size(struct text *text, uint32_t lineidx) { +uint32_t text_line_size(const struct text *text, uint32_t lineidx) { if (lineidx >= text_num_lines(text)) { return 0; } @@ -210,20 +148,19 @@ uint32_t text_line_size(struct text *text, uint32_t lineidx) { return text->lines[lineidx].nbytes; } -uint32_t text_num_lines(struct text *text) { return text->nlines; } +uint32_t text_num_lines(const struct text *text) { return text->nlines; } + +static void split_line(struct text *text, uint32_t offset, uint32_t lineidx, + uint32_t newlineidx) { + struct line *line = &text->lines[lineidx]; + struct line *next = &text->lines[newlineidx]; -void split_line(uint32_t col, struct line *line, struct line *next) { uint8_t *data = line->data; uint32_t nbytes = line->nbytes; - uint32_t nchars = line->nchars; - - uint32_t chari = col; - uint32_t bytei = charidx_to_byteidx(line, chari); + uint32_t bytei = offset; line->nbytes = bytei; - line->nchars = chari; next->nbytes = nbytes - bytei; - next->nchars = nchars - chari; line->flags = next->flags = line->flags; next->data = NULL; @@ -260,7 +197,7 @@ void shift_lines(struct text *text, uint32_t start, int32_t direction) { memmove(dest, src, nlines * sizeof(struct line)); } -void new_line_at(struct text *text, uint32_t line, uint32_t col) { +void new_line_at(struct text *text, uint32_t line, uint32_t offset) { ensure_line(text, line); uint32_t newline = line + 1; @@ -274,7 +211,7 @@ void new_line_at(struct text *text, uint32_t line, uint32_t col) { } // split line if needed - split_line(col, &text->lines[line], &text->lines[newline]); + split_line(text, offset, line, newline); } void delete_line(struct text *text, uint32_t line) { @@ -294,29 +231,25 @@ void delete_line(struct text *text, uint32_t line) { --text->nlines; text->lines[text->nlines].data = NULL; text->lines[text->nlines].nbytes = 0; - text->lines[text->nlines].nchars = 0; } -void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col, - uint8_t *bytes, uint32_t nbytes, - uint32_t *lines_added, uint32_t *cols_added) { +static void text_insert_at_inner(struct text *text, uint32_t line, + uint32_t offset, uint8_t *bytes, + uint32_t nbytes, uint32_t *lines_added) { uint32_t linelen = 0, start_line = line; - *cols_added = 0; for (uint32_t bytei = 0; bytei < nbytes; ++bytei) { uint8_t byte = bytes[bytei]; if (byte == '\n') { uint8_t *line_data = bytes + (bytei - linelen); - uint32_t nchars = utf8_nchars(line_data, linelen); + insert_at(text, line, offset, line_data, linelen); - insert_at(text, line, col, line_data, linelen, nchars); - - col += nchars; - new_line_at(text, line, col); + offset += linelen; + new_line_at(text, line, offset); ++line; linelen = 0; - col = 0; + offset = 0; } else { ++linelen; } @@ -325,30 +258,26 @@ void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col, // handle remaining if (linelen > 0) { uint8_t *line_data = bytes + (nbytes - linelen); - uint32_t nchars = utf8_nchars(line_data, linelen); - insert_at(text, line, col, line_data, linelen, nchars); - *cols_added = nchars; + insert_at(text, line, offset, line_data, linelen); } *lines_added = line - start_line; } void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes, - uint32_t *lines_added, uint32_t *cols_added) { + uint32_t *lines_added) { uint32_t line = text->nlines > 0 ? text->nlines - 1 : 0; - uint32_t col = text_line_length(text, line); - - text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added); + uint32_t offset = text_line_size(text, line); + text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added); } -void text_insert_at(struct text *text, uint32_t line, uint32_t col, - uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added, - uint32_t *cols_added) { - text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added); +void text_insert_at(struct text *text, uint32_t line, uint32_t offset, + uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added) { + text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added); } -void text_delete(struct text *text, uint32_t start_line, uint32_t start_col, - uint32_t end_line, uint32_t end_col) { +void text_delete(struct text *text, uint32_t start_line, uint32_t start_offset, + uint32_t end_line, uint32_t end_offset) { if (text->nlines == 0) { return; @@ -362,45 +291,44 @@ void text_delete(struct text *text, uint32_t start_line, uint32_t start_col, if (end_line > maxline) { end_line = maxline; - end_col = text->lines[end_line].nchars; + end_offset = text_line_size(text, end_line); } struct line *firstline = &text->lines[start_line]; struct line *lastline = &text->lines[end_line]; // clamp column - if (start_col > firstline->nchars) { - start_col = firstline->nchars > 0 ? firstline->nchars - 1 : 0; + uint32_t firstline_len = text_line_size(text, start_line); + if (start_offset > firstline_len) { + start_offset = firstline_len > 0 ? firstline_len - 1 : 0; } // handle deletion of newlines - if (end_col > lastline->nchars) { + uint32_t lastline_len = text_line_size(text, end_line); + if (end_offset > lastline_len) { if (end_line + 1 < text->nlines) { - end_col = 0; + end_offset = 0; ++end_line; lastline = &text->lines[end_line]; } else { - end_col = lastline->nchars; + end_offset = lastline_len; } } - uint32_t bytei = utf8_nbytes(lastline->data, lastline->nbytes, end_col); + uint32_t srcbytei = end_offset; + uint32_t dstbytei = start_offset; + uint32_t ncopy = lastline->nbytes - srcbytei; if (lastline == firstline) { // in this case we can "overwrite" - uint32_t dstbytei = - utf8_nbytes(firstline->data, firstline->nbytes, start_col); - memmove(firstline->data + dstbytei, lastline->data + bytei, - lastline->nbytes - bytei); + memmove(firstline->data + dstbytei, lastline->data + srcbytei, ncopy); } else { // otherwise we actually have to copy from the last line - insert_at(text, start_line, start_col, lastline->data + bytei, - lastline->nbytes - bytei, lastline->nchars - end_col); + insert_at(text, start_line, start_offset, lastline->data + srcbytei, ncopy); } - firstline->nchars = start_col + (lastline->nchars - end_col); - firstline->nbytes = - utf8_nbytes(firstline->data, firstline->nbytes, start_col) + - (lastline->nbytes - bytei); + // new byte count is whatever we had before (left of dstbytei) + // plus what we copied + firstline->nbytes = dstbytei + ncopy; // delete full lines, backwards to not shift old, crappy data upwards for (uint32_t linei = end_line >= text->nlines ? end_line - 1 : end_line; @@ -429,7 +357,6 @@ void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines, .allocated = false, .text = src_line->data, .nbytes = src_line->nbytes, - .nchars = src_line->nchars, .line = li, }; callback(&line, userdata); @@ -441,8 +368,8 @@ struct text_chunk text_get_line(struct text *text, uint32_t line) { return (struct text_chunk){ .text = src_line->data, .nbytes = src_line->nbytes, - .nchars = src_line->nchars, .line = line, + .allocated = false, }; } @@ -453,33 +380,34 @@ struct copy_cmd { }; struct text_chunk text_get_region(struct text *text, uint32_t start_line, - uint32_t start_col, uint32_t end_line, - uint32_t end_col) { - if (start_line == end_line && start_col == end_col) { + uint32_t start_offset, uint32_t end_line, + uint32_t end_offset) { + if (start_line == end_line && start_offset == end_offset) { return (struct text_chunk){0}; } struct line *first_line = &text->lines[start_line]; struct line *last_line = &text->lines[end_line]; + uint32_t first_line_len = first_line->nbytes; + uint32_t last_line_len = last_line->nbytes; - if (start_col > first_line->nchars) { + if (start_offset > first_line_len) { return (struct text_chunk){0}; } // handle copying of newlines - if (end_col > last_line->nchars) { + if (end_offset > last_line_len) { ++end_line; - end_col = 0; + end_offset = 0; last_line = &text->lines[end_line]; } uint32_t nlines = end_line - start_line + 1; struct copy_cmd *copy_cmds = calloc(nlines, sizeof(struct copy_cmd)); - uint32_t total_chars = 0, total_bytes = 0; + uint32_t total_bytes = 0; for (uint32_t line = start_line; line <= end_line; ++line) { struct line *l = &text->lines[line]; - total_chars += l->nchars; total_bytes += l->nbytes; struct copy_cmd *cmd = ©_cmds[line - start_line]; @@ -490,19 +418,14 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line, // correct first line struct copy_cmd *cmd_first = ©_cmds[0]; - uint32_t byteoff = - utf8_nbytes(first_line->data, first_line->nbytes, start_col); - cmd_first->byteoffset += byteoff; - cmd_first->nbytes -= byteoff; - total_bytes -= byteoff; - total_chars -= start_col; + cmd_first->byteoffset += start_offset; + cmd_first->nbytes -= start_offset; + total_bytes -= start_offset; // correct last line struct copy_cmd *cmd_last = ©_cmds[nlines - 1]; - uint32_t byteindex = utf8_nbytes(last_line->data, last_line->nbytes, end_col); - cmd_last->nbytes -= (last_line->nbytes - byteindex); - total_bytes -= (last_line->nbytes - byteindex); - total_chars -= (last_line->nchars - end_col); + cmd_last->nbytes -= (last_line->nbytes - end_offset); + total_bytes -= (last_line->nbytes - end_offset); uint8_t *data = (uint8_t *)malloc( total_bytes + /* nr of newline chars */ (end_line - start_line)); @@ -518,7 +441,6 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line, data[curr] = '\n'; ++curr; ++total_bytes; - ++total_chars; } } @@ -527,28 +449,25 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line, .text = data, .line = 0, .nbytes = total_bytes, - .nchars = total_chars, .allocated = true, }; } -bool text_line_contains_unicode(struct text *text, uint32_t line) { - return text->lines[line].nbytes != text->lines[line].nchars; -} - -void text_add_property(struct text *text, struct location start, - struct location end, struct text_property property) { +void text_add_property(struct text *text, uint32_t start_line, + uint32_t start_offset, uint32_t end_line, + uint32_t end_offset, struct text_property property) { struct text_property_entry entry = { - .start = start, - .end = end, + .start = (struct location){.line = start_line, .col = start_offset}, + .end = (struct location){.line = end_line, .col = end_offset}, .property = property, }; VEC_PUSH(&text->properties, entry); } -void text_get_properties(struct text *text, struct location location, +void text_get_properties(struct text *text, uint32_t line, uint32_t offset, struct text_property **properties, uint32_t max_nproperties, uint32_t *nproperties) { + struct location location = {.line = line, .col = offset}; uint32_t nres = 0; VEC_FOR_EACH(&text->properties, struct text_property_entry * prop) { if (location_is_between(location, prop->start, prop->end)) { diff --git a/src/dged/text.h b/src/dged/text.h index 8b49ef4..28bd325 100644 --- a/src/dged/text.h +++ b/src/dged/text.h @@ -6,9 +6,16 @@ #include #include "location.h" +#include "utf8.h" struct text; -struct render_command; + +struct text_chunk { + uint8_t *text; + uint32_t nbytes; + uint32_t line; + bool allocated; +}; struct text *text_create(uint32_t initial_capacity); void text_destroy(struct text *text); @@ -18,31 +25,21 @@ void text_destroy(struct text *text); */ void text_clear(struct text *text); -void text_insert_at(struct text *text, uint32_t line, uint32_t col, - uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added, - uint32_t *cols_added); +void text_insert_at(struct text *text, uint32_t line, uint32_t offset, + uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added); void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes, - uint32_t *lines_added, uint32_t *cols_added); + uint32_t *lines_added); -void text_delete(struct text *text, uint32_t start_line, uint32_t start_col, - uint32_t end_line, uint32_t end_col); +void text_delete(struct text *text, uint32_t start_line, uint32_t start_offset, + uint32_t end_line, uint32_t end_offset); -uint32_t text_num_lines(struct text *text); -uint32_t text_line_length(struct text *text, uint32_t lineidx); -uint32_t text_line_size(struct text *text, uint32_t lineidx); -uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col); -uint32_t text_byteindex_to_col(struct text *text, uint32_t line, - uint32_t byteindex); -uint32_t text_global_idx(struct text *text, uint32_t line, uint32_t col); - -struct text_chunk { - uint8_t *text; - uint32_t nbytes; - uint32_t nchars; - uint32_t line; - bool allocated; -}; +uint32_t text_num_lines(const struct text *text); +uint32_t text_line_size(const struct text *text, uint32_t lineidx); +struct utf8_codepoint_iterator +text_line_codepoint_iterator(const struct text *text, uint32_t lineidx); +struct utf8_codepoint_iterator +text_chunk_codepoint_iterator(const struct text_chunk *chunk); typedef void (*chunk_cb)(struct text_chunk *chunk, void *userdata); void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines, @@ -52,10 +49,8 @@ void text_for_each_chunk(struct text *text, chunk_cb callback, void *userdata); struct text_chunk text_get_line(struct text *text, uint32_t line); struct text_chunk text_get_region(struct text *text, uint32_t start_line, - uint32_t start_col, uint32_t end_line, - uint32_t end_col); - -bool text_line_contains_unicode(struct text *text, uint32_t line); + uint32_t start_offset, uint32_t end_line, + uint32_t end_offset); enum text_property_type { TextProperty_Colors, @@ -77,10 +72,11 @@ struct text_property { }; }; -void text_add_property(struct text *text, struct location start, - struct location end, struct text_property property); +void text_add_property(struct text *text, uint32_t start_line, + uint32_t start_offset, uint32_t end_line, + uint32_t end_offset, struct text_property property); -void text_get_properties(struct text *text, struct location location, +void text_get_properties(struct text *text, uint32_t line, uint32_t offset, struct text_property **properties, uint32_t max_nproperties, uint32_t *nproperties); diff --git a/src/dged/utf8.c b/src/dged/utf8.c index 52de2da..ede4fb1 100644 --- a/src/dged/utf8.c +++ b/src/dged/utf8.c @@ -1,5 +1,6 @@ #include "utf8.h" +#include #include #include @@ -10,76 +11,125 @@ bool utf8_byte_is_unicode_continuation(uint8_t byte) { bool utf8_byte_is_unicode(uint8_t byte) { return (byte & 0x80) != 0x0; } bool utf8_byte_is_ascii(uint8_t byte) { return !utf8_byte_is_unicode(byte); } -uint32_t utf8_nbytes_in_char(uint8_t byte) { - // length of char is the number of leading ones - // flip it and count number of leading zeros - uint8_t invb = ~byte; - return __builtin_clz((uint32_t)invb) - 24; +enum utf8_state { + Utf8_Accept = 0, + Utf8_Reject = 1, +}; + +// clang-format off +static const uint8_t utf8d[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df + 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef + 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff + 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 + 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 + 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 + 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 +}; +// clang-format on + +/* + * emoji decoding algorithm from + * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + */ +static enum utf8_state decode(enum utf8_state *state, uint32_t *codep, + uint32_t byte) { + uint32_t type = utf8d[byte]; + + *codep = (*state != Utf8_Accept) ? (byte & 0x3fu) | (*codep << 6) + : (0xff >> type) & (byte); + + *state = utf8d[256 + *state * 16 + type]; + return *state; +} + +static struct codepoint next_utf8_codepoint(uint8_t *bytes, uint64_t nbytes) { + uint32_t codepoint = 0; + enum utf8_state state = Utf8_Accept; + uint32_t bi = 0; + while (bi < nbytes) { + enum utf8_state res = decode(&state, &codepoint, bytes[bi]); + ++bi; + + if (res == Utf8_Accept || res == Utf8_Reject) { + break; + } + } + + if (state == Utf8_Reject) { + codepoint = 0xfffd; + } + + return (struct codepoint){.codepoint = codepoint, .nbytes = bi}; } -// TODO: grapheme clusters, this returns the number of unicode code points +struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter) { + if (iter->offset >= iter->nbytes) { + return NULL; + } + + iter->current = next_utf8_codepoint(iter->data + iter->offset, + iter->nbytes - iter->offset); + iter->offset += iter->current.nbytes; + return &iter->current; +} + +struct utf8_codepoint_iterator +create_utf8_codepoint_iterator(uint8_t *data, uint64_t len, + uint64_t initial_offset) { + return (struct utf8_codepoint_iterator){ + .data = data, + .nbytes = len, + .offset = initial_offset, + }; +} + +/* TODO: grapheme clusters and other classification, this + * returns the number of unicode code points + */ uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes) { + uint32_t bi = 0; uint32_t nchars = 0; - uint32_t expected = 0; - for (uint32_t bi = 0; bi < nbytes; ++bi) { - uint8_t byte = bytes[bi]; - if (utf8_byte_is_unicode(byte)) { - if (utf8_byte_is_unicode_start(byte)) { - expected = utf8_nbytes_in_char(byte) - 1; - } else { // continuation byte - --expected; - if (expected == 0) { - ++nchars; - } - } - } else { // ascii - ++nchars; - } + while (bi < nbytes) { + struct codepoint codepoint = next_utf8_codepoint(bytes + bi, nbytes - bi); + ++nchars; + bi += codepoint.nbytes; } + return nchars; } -// TODO: grapheme clusters, this uses the number of unicode code points +/* TODO: grapheme clusters and other classification, this + * returns the number of unicode code points + */ uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars) { - uint32_t bi = 0; uint32_t chars = 0; uint32_t expected = 0; while (chars < nchars && bi < nbytes) { - uint8_t byte = bytes[bi]; - if (utf8_byte_is_unicode(byte)) { - if (utf8_byte_is_unicode_start(byte)) { - expected = utf8_nbytes_in_char(byte) - 1; - } else { // continuation char - --expected; - if (expected == 0) { - ++chars; - } - } - } else { // ascii - ++chars; - } - - ++bi; + struct codepoint codepoint = next_utf8_codepoint(bytes + bi, nbytes - bi); + bi += codepoint.nbytes; + ++chars; } + // TODO: reject invalid? return bi; } -uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len) { - if (utf8_byte_is_unicode_start(*bytes)) { - wchar_t wc; - size_t nbytes = 0; - if ((nbytes = mbrtowc(&wc, (char *)bytes, len, NULL)) > 0) { - size_t w = wcwidth(wc); - return w > 0 ? w : 2; - } else { - return 1; - } - } else if (utf8_byte_is_unicode_continuation(*bytes)) { - return 0; +uint32_t unicode_visual_char_width(const struct codepoint *codepoint) { + if (codepoint->nbytes > 0) { + // TODO: use unicode classification instead + size_t w = wcwidth(codepoint->codepoint); + return w >= 0 ? w : 2; } else { - return 1; + return 0; } } diff --git a/src/dged/utf8.h b/src/dged/utf8.h index 04aa242..22ce22d 100644 --- a/src/dged/utf8.h +++ b/src/dged/utf8.h @@ -1,19 +1,37 @@ +#ifndef _UTF8_H +#define _UTF8_H + #include #include +struct codepoint { + uint32_t codepoint; + uint32_t nbytes; +}; + +struct utf8_codepoint_iterator { + uint8_t *data; + uint64_t nbytes; + uint64_t offset; + struct codepoint current; +}; + +struct utf8_codepoint_iterator +create_utf8_codepoint_iterator(uint8_t *data, uint64_t len, + uint64_t initial_offset); +struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter); + /*! * \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of * length `nbytes`, represents */ uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes); -/* Return the number of bytes used to make up the next `nchars` characters */ -uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars); +uint32_t unicode_visual_char_width(const struct codepoint *codepoint); -/* true if `byte` is a unicode byte sequence start byte */ bool utf8_byte_is_unicode_start(uint8_t byte); bool utf8_byte_is_unicode_continuation(uint8_t byte); -bool utf8_byte_is_ascii(uint8_t byte); bool utf8_byte_is_unicode(uint8_t byte); +bool utf8_byte_is_ascii(uint8_t byte); -uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len); +#endif diff --git a/src/main/cmds.c b/src/main/cmds.c index 4da8346..18f333d 100644 --- a/src/main/cmds.c +++ b/src/main/cmds.c @@ -258,7 +258,7 @@ void buffer_to_list_line(struct buffer *buffer, void *userdata) { buffer_add_text_property( listbuf, (struct location){.line = begin.line, .col = 0}, (struct location){.line = begin.line, - .col = buffer_num_chars(listbuf, begin.line)}, + .col = buffer_line_length(listbuf, begin.line)}, (struct text_property){.type = TextProperty_Data, .userdata = buffer}); } } diff --git a/src/main/completion.c b/src/main/completion.c index 52bf6f8..4ffbc46 100644 --- a/src/main/completion.c +++ b/src/main/completion.c @@ -40,6 +40,11 @@ static struct buffer *g_target_buffer = NULL; static void hide_completion(); +static bool is_space(const struct codepoint *c) { + // TODO: utf8 whitespace and other whitespace + return c->codepoint == ' '; +} + static uint32_t complete_path(struct completion_context ctx, void *userdata); static struct completion_provider g_path_provider = { .name = "path", @@ -214,32 +219,30 @@ static void update_completions(struct buffer *buffer, } } -static void on_buffer_delete(struct buffer *buffer, struct region deleted, - uint32_t start_idx, uint32_t end_idx, - void *userdata) { +static void on_buffer_delete(struct buffer *buffer, + struct edit_location deleted, void *userdata) { struct active_completion_ctx *ctx = (struct active_completion_ctx *)userdata; if (g_state.active) { - update_completions(buffer, ctx, deleted.begin); + update_completions(buffer, ctx, deleted.coordinates.begin); } } -static void on_buffer_insert(struct buffer *buffer, struct region inserted, - uint32_t start_idx, uint32_t end_idx, - void *userdata) { +static void on_buffer_insert(struct buffer *buffer, + struct edit_location inserted, void *userdata) { struct active_completion_ctx *ctx = (struct active_completion_ctx *)userdata; if (!g_state.active) { uint32_t nchars = 0; switch (ctx->trigger.kind) { case CompletionTrigger_Input: - for (uint32_t line = inserted.begin.line; line <= inserted.end.line; - ++line) { - nchars += buffer_num_chars(buffer, line); + for (uint32_t line = inserted.coordinates.begin.line; + line <= inserted.coordinates.end.line; ++line) { + nchars += buffer_line_length(buffer, line); } - nchars -= - inserted.begin.col + - (buffer_num_chars(buffer, inserted.end.line) - inserted.end.col); + nchars -= inserted.coordinates.begin.col + + (buffer_line_length(buffer, inserted.coordinates.end.line) - + inserted.coordinates.end.col); ctx->trigger_current_nchars += nchars; @@ -260,16 +263,16 @@ static void on_buffer_insert(struct buffer *buffer, struct region inserted, g_state.ctx = ctx; } - update_completions(buffer, ctx, inserted.end); + update_completions(buffer, ctx, inserted.coordinates.end); } static void update_completion_buffer(struct buffer *buffer, void *userdata) { buffer_add_text_property( g_target_buffer, (struct location){.line = g_state.current_completion, .col = 0}, - (struct location){ - .line = g_state.current_completion, - .col = buffer_num_chars(g_target_buffer, g_state.current_completion)}, + (struct location){.line = g_state.current_completion, + .col = buffer_line_length(g_target_buffer, + g_state.current_completion)}, (struct text_property){.type = TextProperty_Colors, .colors = (struct text_property_colors){ .set_bg = false, @@ -433,26 +436,18 @@ static uint32_t complete_path(struct completion_context ctx, void *userdata) { if (ctx.buffer == minibuffer_buffer()) { txt = minibuffer_content(); } else { - txt = buffer_line(ctx.buffer, ctx.location.line); - uint32_t end_idx = text_col_to_byteindex( - ctx.buffer->text, ctx.location.line, ctx.location.col); - - for (uint32_t bytei = end_idx; bytei > 0; --bytei) { - if (txt.text[bytei] == ' ') { - start_idx = bytei + 1; - break; - } - } - - if (start_idx >= end_idx) { + struct match_result start = + buffer_find_prev_in_line(ctx.buffer, ctx.location, is_space); + if (!start.found) { + start.at = (struct location){.line = ctx.location.line, .col = 0}; return 0; } - - txt.nbytes = end_idx - start_idx; + txt = buffer_region(ctx.buffer, region_new(start.at, ctx.location)); } - char *path = calloc(txt.nbytes + 1, sizeof(uint8_t)); - memcpy(path, txt.text + start_idx, txt.nbytes); + char *path = calloc(txt.nbytes + 1, sizeof(char)); + memcpy(path, txt.text, txt.nbytes); + path[txt.nbytes] = '\0'; if (txt.allocated) { free(txt.text); @@ -562,25 +557,18 @@ static uint32_t complete_buffers(struct completion_context ctx, if (ctx.buffer == minibuffer_buffer()) { txt = minibuffer_content(); } else { - txt = buffer_line(ctx.buffer, ctx.location.line); - uint32_t end_idx = text_col_to_byteindex( - ctx.buffer->text, ctx.location.line, ctx.location.col); - for (uint32_t bytei = end_idx; bytei > 0; --bytei) { - if (txt.text[bytei] == ' ') { - start_idx = bytei + 1; - break; - } - } - - if (start_idx >= end_idx) { + struct match_result start = + buffer_find_prev_in_line(ctx.buffer, ctx.location, is_space); + if (!start.found) { + start.at = (struct location){.line = ctx.location.line, .col = 0}; return 0; } - - txt.nbytes = end_idx - start_idx; + txt = buffer_region(ctx.buffer, region_new(start.at, ctx.location)); } - char *needle = calloc(txt.nbytes + 1, sizeof(uint8_t)); - memcpy(needle, txt.text + start_idx, txt.nbytes); + char *needle = calloc(txt.nbytes + 1, sizeof(char)); + memcpy(needle, txt.text, txt.nbytes); + needle[txt.nbytes] = '\0'; if (txt.allocated) { free(txt.text); @@ -619,31 +607,23 @@ static uint32_t complete_commands(struct completion_context ctx, if (commands == NULL) { return 0; } - struct text_chunk txt = {0}; uint32_t start_idx = 0; if (ctx.buffer == minibuffer_buffer()) { txt = minibuffer_content(); } else { - txt = buffer_line(ctx.buffer, ctx.location.line); - uint32_t end_idx = text_col_to_byteindex( - ctx.buffer->text, ctx.location.line, ctx.location.col); - for (uint32_t bytei = end_idx; bytei > 0; --bytei) { - if (txt.text[bytei] == ' ') { - start_idx = bytei + 1; - break; - } - } - - if (start_idx >= end_idx) { + struct match_result start = + buffer_find_prev_in_line(ctx.buffer, ctx.location, is_space); + if (!start.found) { + start.at = (struct location){.line = ctx.location.line, .col = 0}; return 0; } - - txt.nbytes = end_idx - start_idx; + txt = buffer_region(ctx.buffer, region_new(start.at, ctx.location)); } - char *needle = calloc(txt.nbytes + 1, sizeof(uint8_t)); - memcpy(needle, txt.text + start_idx, txt.nbytes); + char *needle = calloc(txt.nbytes + 1, sizeof(char)); + memcpy(needle, txt.text, txt.nbytes); + needle[txt.nbytes] = '\0'; if (txt.allocated) { free(txt.text); diff --git a/test/buffer.c b/test/buffer.c index a4b318e..6624e95 100644 --- a/test/buffer.c +++ b/test/buffer.c @@ -1,11 +1,12 @@ #include #include "dged/buffer.h" +#include "dged/settings.h" #include "assert.h" #include "test.h" -void test_add() { +static void test_add() { struct buffer b = buffer_create("test-buffer"); ASSERT(buffer_num_lines(&b) == 0, "Expected buffer to have zero lines"); @@ -19,7 +20,7 @@ void test_add() { buffer_destroy(&b); } -void test_word_at() { +static void test_word_at() { struct buffer b = buffer_create("test-word-at-buffer"); const char *txt = "word1 (word2). Another"; buffer_add(&b, (struct location){.line = 0, .col = 0}, (uint8_t *)txt, @@ -40,8 +41,7 @@ void test_word_at() { "Expected word to span cols 7..12"); // test that clamping works correctly - struct region word3 = - buffer_word_at(&b, (struct location){.line = 0, .col = 100}); + struct region word3 = buffer_word_at(&b, buffer_clamp(&b, 0, 100)); ASSERT(region_has_size(word3), "expected 0,100 to be in the last word"); ASSERT(word3.begin.col == 15 && word3.end.col == 22, "Expected word to span cols 15..22"); @@ -49,7 +49,87 @@ void test_word_at() { buffer_destroy(&b); } +static void test_line_len(void) { + struct buffer b = buffer_create("test-line-length-buffer"); + const char *txt = "Look! Banana ๐ŸŒ"; + buffer_add(&b, (struct location){.line = 0, .col = 0}, (uint8_t *)txt, + strlen(txt)); + ASSERT(buffer_line_length(&b, 0) == 15, + "Expected banana line to be 15 chars wide"); +} + +static void test_char_movement(void) { + struct buffer b = buffer_create("test-char-movement-buffer"); + const char *txt = "abcdefgh ๐ŸŽฏjklmn\tab"; + buffer_add(&b, buffer_end(&b), (uint8_t *)txt, strlen(txt)); + struct location next = + buffer_next_char(&b, (struct location){.line = 0, .col = 0}); + ASSERT(next.col == 1, "Expected next char to be next char"); + + next = buffer_next_char(&b, (struct location){.line = 0, .col = 9}); + ASSERT(next.col == 11, + "Expected a double width char to result in a 2 column move"); + + next = buffer_next_char(&b, (struct location){.line = 0, .col = 16}); + uint64_t tab_width = settings_get("editor.tab-width")->value.number_value; + ASSERT(next.col == 16 + tab_width, + "Expected a tab to result in a move the width of a tab"); + + struct location prev = + buffer_previous_char(&b, (struct location){.line = 0, .col = 0}); + ASSERT(prev.col == 0 && prev.line == 0, + "Expected backwards motion from 0,0 not to be possible"); + + prev = buffer_previous_char(&b, (struct location){.line = 0, .col = 11}); + ASSERT(prev.col == 9, + "Expected a double width char to result in a 2 column move"); + + prev = buffer_previous_char( + &b, (struct location){.line = 0, .col = 16 + tab_width}); + ASSERT(prev.col == 16, + "Expected a tab move backwards to step over the width of a tab"); +} + +static void test_word_movement(void) { + struct buffer b = buffer_create("test-word-movement-buffer"); + + const char *txt = " word1, word2 \"word3\" word4"; + buffer_add(&b, buffer_end(&b), (uint8_t *)txt, strlen(txt)); + struct location next = + buffer_next_word(&b, (struct location){.line = 0, .col = 0}); + ASSERT(next.col == 1, "Expected next word to start at col 1"); + + next = buffer_next_word(&b, (struct location){.line = 0, .col = 1}); + ASSERT(next.col == 8, "Expected next word to start at col 8"); + + next = buffer_next_word(&b, (struct location){.line = 0, .col = 8}); + ASSERT(next.col == 15, "Expected next word to start at col 15"); + + next = buffer_next_word(&b, (struct location){.line = 0, .col = 15}); + ASSERT(next.col == 22, "Expected next word to start at col 22"); + + struct location prev = + buffer_previous_word(&b, (struct location){.line = 0, .col = 26}); + ASSERT(prev.col == 22, "Expected previous word to start at col 22"); + + prev = buffer_previous_word(&b, (struct location){.line = 0, .col = 22}); + ASSERT(prev.col == 15, "Expected previous word to start at col 15"); + + prev = buffer_previous_word(&b, (struct location){.line = 0, .col = 0}); + ASSERT(prev.col == 0 && prev.line == 0, + "Expected previous word to not go before beginning of buffer"); +} + void run_buffer_tests() { + settings_init(10); + settings_set_default( + "editor.tab-width", + (struct setting_value){.type = Setting_Number, .number_value = 4}); + run_test(test_add); run_test(test_word_at); + run_test(test_line_len); + run_test(test_char_movement); + run_test(test_word_movement); + settings_destroy(); } diff --git a/test/main.c b/test/main.c index 4c241b3..e6c5306 100644 --- a/test/main.c +++ b/test/main.c @@ -52,5 +52,6 @@ int main() { ((uint64_t)test_begin.tv_sec * 1e9 + (uint64_t)test_begin.tv_nsec); printf("\n๐ŸŽ‰ \x1b[1;32mDone! All tests successful in %.2f ms!\x1b[0m\n", (double)elapsed_nanos / 1e6); + return 0; } diff --git a/test/text.c b/test/text.c index 9faa663..f890e7b 100644 --- a/test/text.c +++ b/test/text.c @@ -15,22 +15,19 @@ void assert_line_eq(struct text_chunk line, const char *txt, const char *msg) { void assert_line_equal(struct text_chunk *line) {} void test_add_text() { - uint32_t lines_added, cols_added; + uint32_t lines_added; /* use a silly small initial capacity to test re-alloc */ struct text *t = text_create(1); const char *txt = "This is line 1\n"; - text_insert_at(t, 0, 0, (uint8_t *)txt, strlen(txt), &lines_added, - &cols_added); + text_insert_at(t, 0, 0, (uint8_t *)txt, strlen(txt), &lines_added); - ASSERT(text_line_size(t, 0) == 14 && text_line_length(t, 0) == 14, - "Expected line 1 to have 14 chars and 14 bytes"); + ASSERT(text_line_size(t, 0) == 14, "Expected line 1 to be 14 bytes"); assert_line_eq(text_get_line(t, 0), "This is line 1", "Expected line 1 to be line 1"); const char *txt2 = "This is line 2\n"; - text_insert_at(t, 1, 0, (uint8_t *)txt2, strlen(txt2), &lines_added, - &cols_added); + text_insert_at(t, 1, 0, (uint8_t *)txt2, strlen(txt2), &lines_added); ASSERT(text_num_lines(t) == 3, "Expected text to have three lines after second insertion"); assert_line_eq(text_get_line(t, 1), "This is line 2", @@ -38,8 +35,7 @@ void test_add_text() { // simulate indentation const char *txt3 = " "; - text_insert_at(t, 0, 0, (uint8_t *)txt3, strlen(txt3), &lines_added, - &cols_added); + text_insert_at(t, 0, 0, (uint8_t *)txt3, strlen(txt3), &lines_added); ASSERT(text_num_lines(t) == 3, "Expected text to have three lines after second insertion"); assert_line_eq(text_get_line(t, 0), " This is line 1", @@ -48,7 +44,7 @@ void test_add_text() { "Expected line 2 to be line 2 still"); // insert newline in middle of line - text_insert_at(t, 1, 4, (uint8_t *)"\n", 1, &lines_added, &cols_added); + text_insert_at(t, 1, 4, (uint8_t *)"\n", 1, &lines_added); ASSERT(text_num_lines(t) == 4, "Expected text to have four lines after inserting a new line"); assert_line_eq(text_get_line(t, 1), "This", "Expected line 2 to be split"); @@ -56,11 +52,11 @@ void test_add_text() { "Expected line 2 to be split"); // insert newline before line 1 - text_insert_at(t, 1, 0, (uint8_t *)"\n", 1, &lines_added, &cols_added); + text_insert_at(t, 1, 0, (uint8_t *)"\n", 1, &lines_added); ASSERT( text_num_lines(t) == 5, "Expected to have five lines after adding an empty line in the middle"); - ASSERT(text_line_length(t, 1) == 0, "Expected line 2 to be empty"); + ASSERT(text_line_size(t, 1) == 0, "Expected line 2 to be empty"); assert_line_eq(text_get_line(t, 2), "This", "Expected line 3 to be previous line 2"); assert_line_eq(text_get_line(t, 3), " is line 2", @@ -70,37 +66,35 @@ void test_add_text() { } void test_delete_text() { - uint32_t lines_added, cols_added; + uint32_t lines_added; struct text *t = text_create(10); const char *txt = "This is line 1"; - text_insert_at(t, 0, 0, (uint8_t *)txt, strlen(txt), &lines_added, - &cols_added); + text_insert_at(t, 0, 0, (uint8_t *)txt, strlen(txt), &lines_added); text_delete(t, 0, 12, 0, 14); - ASSERT(text_line_length(t, 0) == 12, - "Expected line to be 12 chars after deleting two"); + ASSERT(text_line_size(t, 0) == 12, + "Expected line to be 12 bytes after deleting two"); ASSERT(strncmp((const char *)text_get_line(t, 0).text, "This is line", text_line_size(t, 0)) == 0, - "Expected two chars to be deleted"); + "Expected two bytes to be deleted"); text_delete(t, 0, 0, 10, 10); ASSERT(text_get_line(t, 0).nbytes == 0, - "Expected line to be empty after many chars removed"); + "Expected line to be empty after many bytes removed"); const char *txt2 = "This is line 1\nThis is line 2\nThis is line 3"; - text_insert_at(t, 0, 0, (uint8_t *)txt2, strlen(txt2), &lines_added, - &cols_added); + text_insert_at(t, 0, 0, (uint8_t *)txt2, strlen(txt2), &lines_added); ASSERT(text_num_lines(t) == 3, "Expected to have three lines after inserting as many"); text_delete(t, 1, 11, 1, 14); - ASSERT(text_line_length(t, 1) == 11, - "Expected line to contain 11 chars after deletion"); + ASSERT(text_line_size(t, 1) == 11, + "Expected line to contain 11 bytes after deletion"); struct text_chunk line = text_get_line(t, 1); ASSERT(strncmp((const char *)line.text, "This is lin", line.nbytes) == 0, "Expected deleted characters to be gone in the second line"); - text_delete(t, 1, 0, 1, text_line_length(t, 1) + 1); + text_delete(t, 1, 0, 1, text_line_size(t, 1) + 1); ASSERT(text_num_lines(t) == 2, "Expected to have two lines after deleting one"); struct text_chunk line2 = text_get_line(t, 1); @@ -110,8 +104,8 @@ void test_delete_text() { struct text *t3 = text_create(10); const char *delete_me = "This is line๐ŸŽ™\nQ"; text_insert_at(t3, 0, 0, (uint8_t *)delete_me, strlen(delete_me), - &lines_added, &cols_added); - text_delete(t3, 0, 13, 0, 14); + &lines_added); + text_delete(t3, 0, 16, 1, 0); struct text_chunk top_line = text_get_line(t3, 0); ASSERT(strncmp((const char *)top_line.text, "This is line๐ŸŽ™Q", top_line.nbytes) == 0, @@ -123,33 +117,13 @@ void test_delete_text() { struct text *t4 = text_create(10); const char *deletable_text = "Only one line kinda"; text_append(t4, (uint8_t *)deletable_text, strlen(deletable_text), - &lines_added, &cols_added); + &lines_added); text_delete(t4, 0, 19, 0, 20); ASSERT(text_num_lines(t4) == 1, "Expected the line to still be there"); - ASSERT(text_line_length(t4, 0) == 19, + ASSERT(text_line_size(t4, 0) == 19, "Expected nothing to have happened to the line"); - // test utf-8 - struct text *t2 = text_create(10); - const char *txt3 = "Emojis: ๐Ÿ‡ซ๐Ÿ‡ฎ ๐Ÿฎ\n"; - text_insert_at(t2, 0, 0, (uint8_t *)txt3, strlen(txt3), &lines_added, - &cols_added); - - // TODO: Fix when graphemes are implemented, should be 11, right now it counts - // the two unicode code points ๐Ÿ‡ซ and ๐Ÿ‡ฎ as two chars. - ASSERT(text_line_length(t2, 0) == 12, - "Line length should be 12 (even though there " - "are more bytes in the line)."); - - text_delete(t2, 0, 10, 0, 12); - ASSERT(text_line_length(t2, 0) == 10, - "Line length should be 10 after deleting the cow emoji and a space"); - struct text_chunk line3 = text_get_line(t2, 0); - ASSERT(strncmp((const char *)line3.text, "Emojis: ๐Ÿ‡ซ๐Ÿ‡ฎ", line3.nbytes) == 0, - "Expected cow emoji plus space to be deleted"); - text_destroy(t); - text_destroy(t2); text_destroy(t3); text_destroy(t4); } diff --git a/test/utf8.c b/test/utf8.c index d67c409..c5094c7 100644 --- a/test/utf8.c +++ b/test/utf8.c @@ -6,11 +6,6 @@ #include "assert.h" #include "test.h" -void test_nchars_nbytes() { - ASSERT(utf8_nchars((uint8_t *)"๐Ÿ‘ด", strlen("๐Ÿ‘ด")) == 1, - "Expected old man emoji to be 1 char"); - ASSERT(utf8_nbytes((uint8_t *)"๐Ÿ‘ด", strlen("๐Ÿ‘ด"), 1) == 4, - "Expected old man emoji to be 4 bytes"); -} +void test_nchars_nbytes() {} void run_utf8_tests() { run_test(test_nchars_nbytes); }