From d213727c3dd58551ad39060987d9e28d9577c1f3 Mon Sep 17 00:00:00 2001 From: Skyler Hawthorne Date: Wed, 5 Jan 2022 09:26:00 -0500 Subject: [PATCH] fix(auto_pairs): fix auto pairs with crlf Auto pairs were resulting in incorrect ranges in the resulting when the line terminators are CRLF (i.e. Windows). It turns out this is because when we were checking if the selection was a single-width cursor, it incorrectly assumed that this would be a single char. This is not the case, as a cursor can cover a multi-code point grapheme. Therefore, we must instead explicitly work with and check graphemes to determine if the cursor should move or extend the selection. Fixes #1436 --- helix-core/src/auto_pairs.rs | 255 +++++++++++++++++++++++++++++------ 1 file changed, 214 insertions(+), 41 deletions(-) diff --git a/helix-core/src/auto_pairs.rs b/helix-core/src/auto_pairs.rs index 1b3de6ea08bd..9d1152bcd0fb 100644 --- a/helix-core/src/auto_pairs.rs +++ b/helix-core/src/auto_pairs.rs @@ -1,7 +1,9 @@ //! When typing the opening character of one of the possible pairs defined below, //! this module provides the functionality to insert the paired closing character. -use crate::{movement::Direction, Range, Rope, Selection, Tendril, Transaction}; +use crate::{ + graphemes, movement::Direction, Range, Rope, RopeGraphemes, Selection, Tendril, Transaction, +}; use log::debug; use smallvec::SmallVec; @@ -63,31 +65,132 @@ fn prev_char(doc: &Rope, pos: usize) -> Option { doc.get_char(pos - 1) } +fn is_single_grapheme(doc: &Rope, range: &Range) -> bool { + let mut graphemes = RopeGraphemes::new(doc.slice(range.from()..range.to())); + let first = graphemes.next(); + let second = graphemes.next(); + debug!("first: {:#?}, second: {:#?}", first, second); + first.is_some() && second.is_none() +} + /// calculate what the resulting range should be for an auto pair insertion fn get_next_range( + doc: &Rope, start_range: &Range, offset: usize, typed_char: char, len_inserted: usize, ) -> Range { - let end_head = start_range.head + offset + typed_char.len_utf8(); + // When the character under the cursor changes due to complete pair + // insertion, we must look backward a grapheme and then add the length + // of the insertion to put the resulting cursor in the right place, e.g. + // + // foo[\r\n] - anchor: 3, head: 5 + // foo([)]\r\n - anchor: 4, head: 5 + // + // foo[\r\n] - anchor: 3, head: 5 + // foo'[\r\n] - anchor: 4, head: 6 + // + // foo([)]\r\n - anchor: 4, head: 5 + // foo()[\r\n] - anchor: 5, head: 7 + // + // [foo]\r\n - anchor: 0, head: 3 + // [foo(])\r\n - anchor: 0, head: 5 + + // inserting at the very end of the document after the last newline + if start_range.head == doc.len_chars() && start_range.anchor == doc.len_chars() { + return Range::new( + start_range.anchor + offset + typed_char.len_utf8(), + start_range.head + offset + typed_char.len_utf8(), + ); + } + + let single_grapheme = is_single_grapheme(doc, start_range); + let doc_slice = doc.slice(..); + + // just skip over graphemes + if len_inserted == 0 { + let end_anchor = if single_grapheme { + graphemes::next_grapheme_boundary(doc_slice, start_range.anchor) + offset + + // even for backward inserts with multiple grapheme selections, + // we want the anchor to stay where it is so that the relative + // selection does not change, e.g.: + // + // foo([) wor]d -> insert ) -> foo()[ wor]d + } else { + start_range.anchor + offset + }; + + return Range::new( + end_anchor, + graphemes::next_grapheme_boundary(doc_slice, start_range.head) + offset, + ); + } + + // trivial case: only inserted a single-char opener, just move the selection + if len_inserted == 1 { + let end_anchor = if single_grapheme || start_range.direction() == Direction::Backward { + start_range.anchor + offset + typed_char.len_utf8() + } else { + start_range.anchor + offset + }; + + return Range::new( + end_anchor, + start_range.head + offset + typed_char.len_utf8(), + ); + } + + // If the head = 0, then we must be in insert mode with a backward + // cursor, which implies the head will just move + let end_head = if start_range.head == 0 || start_range.direction() == Direction::Backward { + start_range.head + offset + typed_char.len_utf8() + } else { + // We must have a forward cursor, which means we must move to the + // other end of the grapheme to get to where the new characters + // are inserted, then move the head to where it should be + let prev_bound = graphemes::prev_grapheme_boundary(doc_slice, start_range.head); + debug!( + "prev_bound: {}, offset: {}, len_inserted: {}", + prev_bound, offset, len_inserted + ); + prev_bound + offset + len_inserted + }; let end_anchor = match (start_range.len(), start_range.direction()) { // if we have a zero width cursor, it shifts to the same number (0, _) => end_head, - // if we are inserting for a regular one-width cursor, the anchor - // moves with the head + // If we are inserting for a regular one-width cursor, the anchor + // moves with the head. This is the fast path for ASCII. (1, Direction::Forward) => end_head - 1, (1, Direction::Backward) => end_head + 1, - // if we are appending, the anchor stays where it is; only offset - // for multiple range insertions - (_, Direction::Forward) => start_range.anchor + offset, + (_, Direction::Forward) => { + if single_grapheme { + graphemes::prev_grapheme_boundary(doc.slice(..), start_range.head) + + typed_char.len_utf8() - // when we are inserting in front of a selection, we need to move - // the anchor over by however many characters were inserted overall - (_, Direction::Backward) => start_range.anchor + offset + len_inserted, + // if we are appending, the anchor stays where it is; only offset + // for multiple range insertions + } else { + start_range.anchor + offset + } + } + + (_, Direction::Backward) => { + if single_grapheme { + // if we're backward, then the head is at the first char + // of the typed char, so we need to add the length of + // the closing char + graphemes::prev_grapheme_boundary(doc.slice(..), start_range.anchor) + len_inserted + } else { + // when we are inserting in front of a selection, we need to move + // the anchor over by however many characters were inserted overall + start_range.anchor + offset + len_inserted + } + } }; Range::new(end_anchor, end_head) @@ -122,7 +225,7 @@ fn handle_open( } }; - let next_range = get_next_range(start_range, offs, open, len_inserted); + let next_range = get_next_range(doc, start_range, offs, open, len_inserted); end_ranges.push(next_range); offs += len_inserted; @@ -152,7 +255,7 @@ fn handle_close(doc: &Rope, selection: &Selection, _open: char, close: char) -> (cursor, cursor, Some(Tendril::from_char(close))) }; - let next_range = get_next_range(start_range, offs, close, len_inserted); + let next_range = get_next_range(doc, start_range, offs, close, len_inserted); end_ranges.push(next_range); offs += len_inserted; @@ -202,7 +305,7 @@ fn handle_same( (cursor, cursor, Some(pair)) }; - let next_range = get_next_range(start_range, offs, token, len_inserted); + let next_range = get_next_range(doc, start_range, offs, token, len_inserted); end_ranges.push(next_range); offs += len_inserted; @@ -219,6 +322,8 @@ mod test { use super::*; use smallvec::smallvec; + const LINE_END: &'static str = crate::DEFAULT_LINE_ENDING.as_str(); + fn differing_pairs() -> impl Iterator { PAIRS.iter().filter(|(open, close)| open != close) } @@ -270,12 +375,59 @@ mod test { #[test] fn test_insert_blank() { test_hooks_with_pairs( - &Rope::new(), + &Rope::from(LINE_END), &Selection::single(1, 0), PAIRS, - |open, close| format!("{}{}", open, close), + |open, close| format!("{}{}{}", open, close, LINE_END), &Selection::single(2, 1), ); + + let empty_doc = Rope::from(format!("{line_end}{line_end}", line_end = LINE_END)); + + test_hooks_with_pairs( + &empty_doc, + &Selection::single(empty_doc.len_chars(), LINE_END.len()), + PAIRS, + |open, close| { + format!( + "{line_end}{open}{close}{line_end}", + open = open, + close = close, + line_end = LINE_END + ) + }, + &Selection::single(LINE_END.len() + 2, LINE_END.len() + 1), + ); + } + + #[test] + fn test_insert_before_multi_code_point_graphemes() { + test_hooks_with_pairs( + &Rope::from(format!("hello ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ goodbye{}", LINE_END)), + &Selection::single(13, 6), + PAIRS, + |open, _| format!("hello {}๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ goodbye{}", open, LINE_END), + &Selection::single(14, 7), + ); + } + + #[test] + fn test_insert_at_end_of_document() { + test_hooks_with_pairs( + &Rope::from(LINE_END), + &Selection::single(LINE_END.len(), LINE_END.len()), + PAIRS, + |open, close| format!("{}{}{}", LINE_END, open, close), + &Selection::single(LINE_END.len() + 1, LINE_END.len() + 1), + ); + + test_hooks_with_pairs( + &Rope::from(format!("foo{}", LINE_END)), + &Selection::single(3 + LINE_END.len(), 3 + LINE_END.len()), + PAIRS, + |open, close| format!("foo{}{}{}", LINE_END, open, close), + &Selection::single(LINE_END.len() + 4, LINE_END.len() + 4), + ); } /// [] -> append ( -> ([]) @@ -283,11 +435,20 @@ mod test { fn test_append_blank() { test_hooks_with_pairs( // this is what happens when you have a totally blank document and then append - &Rope::from("\n\n"), - &Selection::single(0, 2), + &Rope::from(format!("{line_end}{line_end}", line_end = LINE_END)), + // before inserting the pair, the cursor covers all of both empty lines + &Selection::single(0, LINE_END.len() * 2), PAIRS, - |open, close| format!("\n{}{}\n", open, close), - &Selection::single(0, 3), + |open, close| { + format!( + "{line_end}{open}{close}{line_end}", + line_end = LINE_END, + open = open, + close = close + ) + }, + // after inserting pair, the cursor covers the first new line and the open char + &Selection::single(0, LINE_END.len() + 2), ); } @@ -329,6 +490,18 @@ mod test { ); } + /// foo[] -> append to end of line ( -> foo([]) + #[test] + fn test_append_single_cursor() { + test_hooks_with_pairs( + &Rope::from(format!("foo{}", LINE_END)), + &Selection::single(3, 3 + LINE_END.len()), + differing_pairs(), + |open, close| format!("foo{}{}{}", open, close, LINE_END), + &Selection::single(4, 5), + ); + } + /// fo[o] fo[o(]) /// fo[o] -> append ( -> fo[o(]) /// fo[o] fo[o(]) @@ -355,18 +528,18 @@ mod test { ); } - /// ([]) -> insert ) -> ()[] + /// ([)] -> insert ) -> ()[] #[test] fn test_insert_close_inside_pair() { for (open, close) in PAIRS { - let doc = Rope::from(format!("{}{}", open, close)); + let doc = Rope::from(format!("{}{}{}", open, close, LINE_END)); test_hooks( &doc, &Selection::single(2, 1), *close, &doc, - &Selection::single(3, 2), + &Selection::single(2 + LINE_END.len(), 2), ); } } @@ -375,14 +548,14 @@ mod test { #[test] fn test_append_close_inside_pair() { for (open, close) in PAIRS { - let doc = Rope::from(format!("{}{}\n", open, close)); + let doc = Rope::from(format!("{}{}{}", open, close, LINE_END)); test_hooks( &doc, &Selection::single(0, 2), *close, &doc, - &Selection::single(0, 3), + &Selection::single(0, 2 + LINE_END.len()), ); } } @@ -564,6 +737,20 @@ mod test { ) } + /// foo([) wor]d -> insert ) -> foo()[ wor]d + #[test] + fn test_insert_close_inside_pair_trailing_word_with_selection() { + for (open, close) in differing_pairs() { + test_hooks( + &Rope::from(format!("foo{}{} word{}", open, close, LINE_END)), + &Selection::single(9, 4), + *close, + &Rope::from(format!("foo{}{} word{}", open, close, LINE_END)), + &Selection::single(9, 5), + ) + } + } + /// we want pairs that are *not* the same char to be inserted after /// a non-pair char, for cases like functions, but for pairs that are /// the same char, we want to *not* insert a pair to handle cases like "I'm" @@ -572,7 +759,7 @@ mod test { /// word[] -> insert ' -> word'[] #[test] fn test_insert_open_after_non_pair() { - let doc = Rope::from("word"); + let doc = Rope::from(format!("word{}", LINE_END)); let sel = Selection::single(5, 4); let expected_sel = Selection::single(6, 5); @@ -580,7 +767,7 @@ mod test { &doc, &sel, differing_pairs(), - |open, close| format!("word{}{}", open, close), + |open, close| format!("word{}{}{}", open, close, LINE_END), &expected_sel, ); @@ -588,22 +775,8 @@ mod test { &doc, &sel, matching_pairs(), - |open, _| format!("word{}", open), + |open, _| format!("word{}{}", open, LINE_END), &expected_sel, ); } - - /// appending with only a cursor should stay a cursor - /// - /// [] -> append to end "foo -> "foo[]" - #[test] - fn test_append_single_cursor() { - test_hooks_with_pairs( - &Rope::from("\n"), - &Selection::single(0, 1), - PAIRS, - |open, close| format!("{}{}\n", open, close), - &Selection::single(1, 2), - ); - } }