Minor doc fixes in nucleo_matcher

helix-editor · Dec 13, 2024 · f16469b · f16469b
1 parent d9d707d
commit f16469b
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 18 deletions.
diff --git a/matcher/src/chars.rs b/matcher/src/chars.rs
@@ -1,12 +1,12 @@
-//! Utilities for working with (unicode) characters/codepoints
+//! Utilities for working with (Unicode) characters and codepoints.
 
 use std::fmt::{self, Debug, Display};
 
 #[cfg(feature = "unicode-casefold")]
 use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
 use crate::Config;
 
-//autogenerated by generate-ucd
+// autogenerated by generate-ucd
 #[allow(warnings)]
 #[rustfmt::skip]
 #[cfg(feature = "unicode-casefold")]
@@ -82,6 +82,7 @@ impl Char for AsciiChar {
         self
     }
 }
+
 fn char_class_non_ascii(c: char) -> CharClass {
     if c.is_lowercase() {
         CharClass::Lower
@@ -97,6 +98,7 @@ fn char_class_non_ascii(c: char) -> CharClass {
         CharClass::NonWord
     }
 }
+
 impl Char for char {
     const ASCII: bool = false;
     #[inline(always)]
@@ -149,7 +151,7 @@ pub use normalize::normalize;
 #[cfg(feature = "unicode-segmentation")]
 use unicode_segmentation::UnicodeSegmentation;
 
-/// Converts a character to lower case using simple unicode case folding
+/// Converts a character to lower case using simple Unicode case folding.
 #[cfg(feature = "unicode-casefold")]
 #[inline(always)]
 pub fn to_lower_case(c: char) -> char {
@@ -158,8 +160,9 @@ pub fn to_lower_case(c: char) -> char {
         .map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
 }
 
-/// Checks if a character is upper case according to simple unicode case folding.
-/// if the `unicode-casefold` feature is disable the equivalent std function is used
+/// Checks if a character is upper case according to simple Unicode case folding.
+///
+/// If the `unicode-casefold` feature is disabled, the equivalent std function is used instead.
 #[inline(always)]
 pub fn is_upper_case(c: char) -> bool {
     #[cfg(feature = "unicode-casefold")]
@@ -182,10 +185,15 @@ pub(crate) enum CharClass {
     Number,
 }
 
-/// Nucleo cannot match graphemes as single units. To work around
-/// that we only use the first codepoint of each grapheme. This
-/// iterator returns the first character of each unicode grapheme
-/// in a string and is used for constructing `Utf32Str(ing)`.
+/// Returns an iterator over single-codepoint representations of each grapheme in the provided
+/// text.
+///
+/// For the most part, this is simply the first `char` of a grapheme. The main exception is the
+/// windows-style newline `\r\n`, which is normalized to the char `'\n'`.
+///
+/// This workaround mainly exists since Nucleo cannot match graphemes as single units, so we
+/// must internally map each grapheme to a simpler in-memory representation. This method is used
+/// when constructing `Utf32Str(ing)`.
 pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
     #[cfg(feature = "unicode-segmentation")]
     let res = text.graphemes(true).map(|grapheme| {

diff --git a/matcher/src/chars/normalize.rs b/matcher/src/chars/normalize.rs
@@ -1,9 +1,9 @@
 /// Normalize a Unicode character by converting Latin characters which are variants
-/// of ASCII characters to their latin equivalent.
+/// of ASCII characters to their Latin equivalents.
 ///
 /// Note that this method acts on single `char`s: if you want to perform full normalization, you
 /// should first split on graphemes, and then normalize each grapheme by normalizing the first
-/// `char` in the grapheme.
+/// `char` in each grapheme. See the [`graphemes`](super::graphemes) function for more detail.
 ///
 /// If a character does not normalize to a single ASCII character, no normalization is performed.
 ///
@@ -15,7 +15,7 @@
 /// - [Latin Extended Additional](https://en.wikipedia.org/wiki/Latin_Extended_Additional)
 /// - [Superscripts and Subscripts](https://en.wikipedia.org/wiki/Superscripts_and_Subscripts)
 ///
-/// If the character does not fall in this block, it is not normalized.
+/// If the character does not fall in any of these blocks, it is not normalized.
 ///
 /// # Example
 /// ```

diff --git a/matcher/src/config.rs b/matcher/src/config.rs
@@ -29,8 +29,8 @@ pub struct Config {
 }
 
 impl Config {
-    /// The default config for nucleo, implemented as a constant since
-    /// Default::default can not be called in a const context
+    /// The default configuration for nucleo, implemented as a constant since
+    /// [`Default::default`] cannot be called in a `const` context.
     pub const DEFAULT: Self = {
         Config {
             delimiter_chars: b"/,:;|",

diff --git a/matcher/src/lib.rs b/matcher/src/lib.rs
@@ -183,9 +183,11 @@ impl Default for Matcher {
 }
 
 impl Matcher {
-    /// Creates a new matcher instance, note that this will eagerly allocate a
-    /// fairly large chunk of heap memory (around 135KB currently but subject to
-    /// change) so matchers should be reused if called often (like in a loop).
+    /// Creates a new matcher instance.
+    ///
+    /// This will eagerly allocate a fairly large chunk of heap memory (around 135KB
+    /// currently, but subject to change) so matchers should be reused if called often,
+    /// such as in a loop.
     pub fn new(config: Config) -> Self {
         Self {
             config,

diff --git a/matcher/src/utf32_str.rs b/matcher/src/utf32_str.rs
@@ -95,6 +95,7 @@ fn has_ascii_graphemes(string: &str) -> bool {
 #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
 pub enum Utf32Str<'a> {
     /// A string represented as ASCII encoded bytes.
+    ///
     /// Correctness invariant: must only contain valid ASCII (`<= 127`)
     Ascii(&'a [u8]),
     /// A string represented as an array of unicode codepoints (basically UTF-32).
@@ -301,7 +302,8 @@ impl DoubleEndedIterator for Chars<'_> {
 /// See the API documentation for [`Utf32Str`] for more detail.
 pub enum Utf32String {
     /// A string represented as ASCII encoded bytes.
-    /// Correctness invariant: must only contain valid ASCII (<=127)
+    ///
+    /// Correctness invariant: must only contain valid ASCII (`<= 127`)
     Ascii(Box<str>),
     /// A string represented as an array of unicode codepoints (basically UTF-32).
     Unicode(Box<[char]>),