From c14af066bbdc4d81dee391376dec46dbd0a4e835 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 10 Jan 2024 13:06:57 -0500 Subject: [PATCH 1/3] syntax: Track parent LanguageLayer IDs This commit adds a `parent` field to the `LanguageLayer`. This information is conveniently already available when we parse injections. This will be used in the child commit to create a type that can traverse injection layers using this information. --- helix-core/src/syntax.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 0e8917191c91..6414f3ce9841 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1090,6 +1090,7 @@ impl Syntax { start_point: Point::new(0, 0), end_point: Point::new(usize::MAX, usize::MAX), }], + parent: None, }; // track scope_descriptor: a Vec of scopes for item in tree @@ -1360,6 +1361,7 @@ impl Syntax { depth, ranges, flags: LayerUpdateFlags::empty(), + parent: Some(layer_id), }; // Find an identical existing layer @@ -1525,6 +1527,7 @@ pub struct LanguageLayer { pub ranges: Vec, pub depth: u32, flags: LayerUpdateFlags, + parent: Option, } /// This PartialEq implementation only checks if that From 1459f570c0c81e5162b1beffc5dcd8db6d518aba Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 10 Jan 2024 15:58:44 -0500 Subject: [PATCH 2/3] Add a TreeCursor type that travels over injection layers This uses the layer parentage information from the parent commit to traverse the layers. It's a similar API to `tree_sitter:TreeCursor` but internally it does not use a `tree_sitter::TreeCursor` currently because that interface is behaving very unexpectedly. Using the `next_sibling`/`prev_sibling`/`parent` API on `tree_sitter::Node` reflects the previous code's behavior so this should result in no surprising changes. --- helix-core/src/syntax.rs | 16 ++- helix-core/src/syntax/tree_cursor.rs | 160 +++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 3 deletions(-) create mode 100644 helix-core/src/syntax/tree_cursor.rs diff --git a/helix-core/src/syntax.rs b/helix-core/src/syntax.rs index 6414f3ce9841..78abc0b0a740 100644 --- a/helix-core/src/syntax.rs +++ b/helix-core/src/syntax.rs @@ -1,3 +1,5 @@ +mod tree_cursor; + use crate::{ auto_pairs::AutoPairs, chars::char_is_line_ending, @@ -32,6 +34,8 @@ use serde::{ser::SerializeSeq, Deserialize, Serialize}; use helix_loader::grammar::{get_language, load_runtime_file}; +pub use tree_cursor::TreeCursor; + fn deserialize_regex<'de, D>(deserializer: D) -> Result, D::Error> where D: serde::Deserializer<'de>, @@ -1495,6 +1499,12 @@ impl Syntax { .descendant_for_byte_range(start, end) } + pub fn walk(&self) -> TreeCursor<'_> { + // data structure to find the smallest range that contains a point + // when some of the ranges in the structure can overlap. + TreeCursor::new(&self.layers, self.root) + } + // Commenting // comment_strings_for_pos // is_commented @@ -1723,7 +1733,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use tree_sitter::{ Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError, - QueryMatch, Range, TextProvider, Tree, TreeCursor, + QueryMatch, Range, TextProvider, Tree, }; const CANCELLATION_CHECK_INTERVAL: usize = 100; @@ -2657,7 +2667,7 @@ pub fn pretty_print_tree(fmt: &mut W, node: Node) -> fmt::Result fn pretty_print_tree_impl( fmt: &mut W, - cursor: &mut TreeCursor, + cursor: &mut tree_sitter::TreeCursor, depth: usize, ) -> fmt::Result { let node = cursor.node(); @@ -2967,7 +2977,7 @@ mod test { // rule but `name` and `body` belong to an unnamed helper `_method_rest`. // This can cause a bug with a pretty-printing implementation that // uses `Node::field_name_for_child` to determine field names but is - // fixed when using `TreeCursor::field_name`. + // fixed when using `tree_sitter::TreeCursor::field_name`. let source = "def self.method_name true end"; diff --git a/helix-core/src/syntax/tree_cursor.rs b/helix-core/src/syntax/tree_cursor.rs new file mode 100644 index 000000000000..d9d140c9f747 --- /dev/null +++ b/helix-core/src/syntax/tree_cursor.rs @@ -0,0 +1,160 @@ +use std::{cmp::Reverse, ops::Range}; + +use super::{LanguageLayer, LayerId}; + +use slotmap::HopSlotMap; +use tree_sitter::Node; + +/// The byte range of an injection layer. +/// +/// Injection ranges may overlap, but all overlapping parts are subsets of their parent ranges. +/// This allows us to sort the ranges ahead of time in order to efficiently find a range that +/// contains a point with maximum depth. +#[derive(Debug)] +struct InjectionRange { + start: usize, + end: usize, + layer_id: LayerId, + depth: u32, +} + +pub struct TreeCursor<'a> { + layers: &'a HopSlotMap, + root: LayerId, + current: LayerId, + injection_ranges: Vec, + // TODO: Ideally this would be a `tree_sitter::TreeCursor<'a>` but + // that returns very surprising results in testing. + cursor: Node<'a>, +} + +impl<'a> TreeCursor<'a> { + pub(super) fn new(layers: &'a HopSlotMap, root: LayerId) -> Self { + let mut injection_ranges = Vec::new(); + + for (layer_id, layer) in layers.iter() { + // Skip the root layer + if layer.parent.is_none() { + continue; + } + for byte_range in layer.ranges.iter() { + let range = InjectionRange { + start: byte_range.start_byte, + end: byte_range.end_byte, + layer_id, + depth: layer.depth, + }; + injection_ranges.push(range); + } + } + + injection_ranges.sort_unstable_by_key(|range| (range.end, Reverse(range.depth))); + + let cursor = layers[root].tree().root_node(); + + Self { + layers, + root, + current: root, + injection_ranges, + cursor, + } + } + + pub fn node(&self) -> Node<'a> { + self.cursor + } + + pub fn goto_parent(&mut self) -> bool { + if let Some(parent) = self.node().parent() { + self.cursor = parent; + return true; + } + + // If we are already on the root layer, we cannot ascend. + if self.current == self.root { + return false; + } + + // Ascend to the parent layer. + let range = self.node().byte_range(); + let parent_id = self.layers[self.current] + .parent + .expect("non-root layers have a parent"); + self.current = parent_id; + let root = self.layers[self.current].tree().root_node(); + self.cursor = root + .descendant_for_byte_range(range.start, range.end) + .unwrap_or(root); + + true + } + + /// Finds the injection layer that has exactly the same range as the given `range`. + fn layer_id_of_byte_range(&self, search_range: Range) -> Option { + let start_idx = self + .injection_ranges + .partition_point(|range| range.end < search_range.end); + + self.injection_ranges[start_idx..] + .iter() + .take_while(|range| range.end == search_range.end) + .find_map(|range| (range.start == search_range.start).then_some(range.layer_id)) + } + + pub fn goto_first_child(&mut self) -> bool { + // Check if the current node's range is an exact injection layer range. + if let Some(layer_id) = self + .layer_id_of_byte_range(self.node().byte_range()) + .filter(|&layer_id| layer_id != self.current) + { + // Switch to the child layer. + self.current = layer_id; + self.cursor = self.layers[self.current].tree().root_node(); + true + } else if let Some(child) = self.cursor.child(0) { + // Otherwise descend in the current tree. + self.cursor = child; + true + } else { + false + } + } + + pub fn goto_next_sibling(&mut self) -> bool { + if let Some(sibling) = self.cursor.next_sibling() { + self.cursor = sibling; + true + } else { + false + } + } + + pub fn goto_prev_sibling(&mut self) -> bool { + if let Some(sibling) = self.cursor.prev_sibling() { + self.cursor = sibling; + true + } else { + false + } + } + + /// Finds the injection layer that contains the given start-end range. + fn layer_id_containing_byte_range(&self, start: usize, end: usize) -> LayerId { + let start_idx = self + .injection_ranges + .partition_point(|range| range.end < end); + + self.injection_ranges[start_idx..] + .iter() + .take_while(|range| range.start < end) + .find_map(|range| (range.start <= start).then_some(range.layer_id)) + .unwrap_or(self.root) + } + + pub fn reset_to_byte_range(&mut self, start: usize, end: usize) { + self.current = self.layer_id_containing_byte_range(start, end); + let root = self.layers[self.current].tree().root_node(); + self.cursor = root.descendant_for_byte_range(start, end).unwrap_or(root); + } +} From 6f47990789837f3e6b55e3278b11cfea011a332c Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 10 Jan 2024 14:31:05 -0500 Subject: [PATCH 3/3] Reimplement tree motions in terms of syntax::TreeCursor This uses the new TreeCursor type from the parent commit to reimplement the tree-sitter motions (`A-p/o/i/n`). Other tree-sitter related features like textobjects are not touched with this change and will need a different, unrelated approach to solve. --- helix-core/src/object.rs | 86 ++++++++++++++++--------------- helix-term/src/commands.rs | 14 +++-- helix-term/tests/test/movement.rs | 57 ++++++++++++++++++++ 3 files changed, 107 insertions(+), 50 deletions(-) diff --git a/helix-core/src/object.rs b/helix-core/src/object.rs index d2d4fe70ac02..0df105f1a517 100644 --- a/helix-core/src/object.rs +++ b/helix-core/src/object.rs @@ -1,42 +1,52 @@ -use crate::{Range, RopeSlice, Selection, Syntax}; -use tree_sitter::Node; +use crate::{syntax::TreeCursor, Range, RopeSlice, Selection, Syntax}; pub fn expand_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { - select_node_impl(syntax, text, selection, |mut node, from, to| { - while node.start_byte() == from && node.end_byte() == to { - node = node.parent()?; + let cursor = &mut syntax.walk(); + + selection.transform(|range| { + let from = text.char_to_byte(range.from()); + let to = text.char_to_byte(range.to()); + + let byte_range = from..to; + cursor.reset_to_byte_range(from, to); + + while cursor.node().byte_range() == byte_range { + if !cursor.goto_parent() { + break; + } } - Some(node) + + let node = cursor.node(); + let from = text.byte_to_char(node.start_byte()); + let to = text.byte_to_char(node.end_byte()); + + Range::new(to, from).with_direction(range.direction()) }) } pub fn shrink_selection(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { - select_node_impl(syntax, text, selection, |descendant, _from, _to| { - descendant.child(0).or(Some(descendant)) + select_node_impl(syntax, text, selection, |cursor| { + cursor.goto_first_child(); }) } -pub fn select_sibling( - syntax: &Syntax, - text: RopeSlice, - selection: Selection, - sibling_fn: &F, -) -> Selection -where - F: Fn(Node) -> Option, -{ - select_node_impl(syntax, text, selection, |descendant, _from, _to| { - find_sibling_recursive(descendant, sibling_fn) +pub fn select_next_sibling(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { + select_node_impl(syntax, text, selection, |cursor| { + while !cursor.goto_next_sibling() { + if !cursor.goto_parent() { + break; + } + } }) } -fn find_sibling_recursive(node: Node, sibling_fn: F) -> Option -where - F: Fn(Node) -> Option, -{ - sibling_fn(node).or_else(|| { - node.parent() - .and_then(|node| find_sibling_recursive(node, sibling_fn)) +pub fn select_prev_sibling(syntax: &Syntax, text: RopeSlice, selection: Selection) -> Selection { + select_node_impl(syntax, text, selection, |cursor| { + while !cursor.goto_prev_sibling() { + if !cursor.goto_parent() { + break; + } + } }) } @@ -44,33 +54,25 @@ fn select_node_impl( syntax: &Syntax, text: RopeSlice, selection: Selection, - select_fn: F, + motion: F, ) -> Selection where - F: Fn(Node, usize, usize) -> Option, + F: Fn(&mut TreeCursor), { - let tree = syntax.tree(); + let cursor = &mut syntax.walk(); selection.transform(|range| { let from = text.char_to_byte(range.from()); let to = text.char_to_byte(range.to()); - let node = match tree - .root_node() - .descendant_for_byte_range(from, to) - .and_then(|node| select_fn(node, from, to)) - { - Some(node) => node, - None => return range, - }; + cursor.reset_to_byte_range(from, to); + motion(cursor); + + let node = cursor.node(); let from = text.byte_to_char(node.start_byte()); let to = text.byte_to_char(node.end_byte()); - if range.head < range.anchor { - Range::new(to, from) - } else { - Range::new(from, to) - } + Range::new(from, to).with_direction(range.direction()) }) } diff --git a/helix-term/src/commands.rs b/helix-term/src/commands.rs index a5e79a53992f..7de0ed63aaaf 100644 --- a/helix-term/src/commands.rs +++ b/helix-term/src/commands.rs @@ -26,7 +26,6 @@ use helix_core::{ syntax::{BlockCommentToken, LanguageServerFeature}, text_annotations::TextAnnotations, textobject, - tree_sitter::Node, unicode::width::UnicodeWidthChar, visual_offset_from_block, Deletion, LineEnding, Position, Range, Rope, RopeGraphemes, RopeReader, RopeSlice, Selection, SmallVec, Tendril, Transaction, @@ -4768,18 +4767,17 @@ fn shrink_selection(cx: &mut Context) { cx.editor.apply_motion(motion); } -fn select_sibling_impl(cx: &mut Context, sibling_fn: &'static F) +fn select_sibling_impl(cx: &mut Context, sibling_fn: F) where - F: Fn(Node) -> Option, + F: Fn(&helix_core::Syntax, RopeSlice, Selection) -> Selection + 'static, { - let motion = |editor: &mut Editor| { + let motion = move |editor: &mut Editor| { let (view, doc) = current!(editor); if let Some(syntax) = doc.syntax() { let text = doc.text().slice(..); let current_selection = doc.selection(view.id); - let selection = - object::select_sibling(syntax, text, current_selection.clone(), sibling_fn); + let selection = sibling_fn(syntax, text, current_selection.clone()); doc.set_selection(view.id, selection); } }; @@ -4787,11 +4785,11 @@ where } fn select_next_sibling(cx: &mut Context) { - select_sibling_impl(cx, &|node| Node::next_sibling(&node)) + select_sibling_impl(cx, object::select_next_sibling) } fn select_prev_sibling(cx: &mut Context) { - select_sibling_impl(cx, &|node| Node::prev_sibling(&node)) + select_sibling_impl(cx, object::select_prev_sibling) } fn move_node_bound_impl(cx: &mut Context, dir: Direction, movement: Movement) { diff --git a/helix-term/tests/test/movement.rs b/helix-term/tests/test/movement.rs index 4ebaae854fdc..1c25032c2f1b 100644 --- a/helix-term/tests/test/movement.rs +++ b/helix-term/tests/test/movement.rs @@ -635,3 +635,60 @@ async fn test_surround_delete() -> anyhow::Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread")] +async fn tree_sitter_motions_work_across_injections() -> anyhow::Result<()> { + test_with_config( + AppBuilder::new().with_file("foo.html", None), + ( + "", + "", + "", + ), + ) + .await?; + + // When the full injected layer is selected, expand_selection jumps to + // a more shallow layer. + test_with_config( + AppBuilder::new().with_file("foo.html", None), + ( + "", + "", + "#[|]#", + ), + ) + .await?; + + test_with_config( + AppBuilder::new().with_file("foo.html", None), + ( + "", + "", + "", + ), + ) + .await?; + + test_with_config( + AppBuilder::new().with_file("foo.html", None), + ( + "", + "", + "", + ), + ) + .await?; + + test_with_config( + AppBuilder::new().with_file("foo.html", None), + ( + "", + "", + "", + ), + ) + .await?; + + Ok(()) +}