reduce some string allocation in Vocabulary (#1355)

Humanizr · Feb 16, 2024 · 8970d74 · 8970d74
1 parent 62941ba
commit 8970d74
Show file tree

Hide file tree

Showing 2 changed files with 153 additions and 156 deletions.
diff --git a/src/Humanizer/GlobalUsings.cs b/src/Humanizer/GlobalUsings.cs
@@ -1 +1,2 @@
 global using System.Globalization;
+global using System.Text.RegularExpressions;
diff --git a/src/Humanizer/Inflections/Vocabulary.cs b/src/Humanizer/Inflections/Vocabulary.cs
@@ -1,202 +1,198 @@
-using System.Text.RegularExpressions;
-
-namespace Humanizer
+namespace Humanizer;
+
+/// <summary>
+/// A container for exceptions to simple pluralization/singularization rules.
+/// Vocabularies.Default contains an extensive list of rules for US English.
+/// At this time, multiple vocabularies and removing existing rules are not supported.
+/// </summary>
+public class Vocabulary
 {
+    internal Vocabulary()
+    {
+    }
+
+    readonly List<Rule> plurals = [];
+    readonly List<Rule> singulars = [];
+    readonly HashSet<string> uncountables = new(StringComparer.CurrentCultureIgnoreCase);
+    readonly Regex letterS = new("^([sS])[sS]*$");
+
     /// <summary>
-    /// A container for exceptions to simple pluralization/singularization rules.
-    /// Vocabularies.Default contains an extensive list of rules for US English.
-    /// At this time, multiple vocabularies and removing existing rules are not supported.
+    /// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx, e.g. "person" and "people".
     /// </summary>
-    public class Vocabulary
+    /// <param name="singular">The singular form of the irregular word, e.g. "person".</param>
+    /// <param name="plural">The plural form of the irregular word, e.g. "people".</param>
+    /// <param name="matchEnding">True to match these words on their own as well as at the end of longer words. False, otherwise.</param>
+    public void AddIrregular(string singular, string plural, bool matchEnding = true)
     {
-        internal Vocabulary()
+        if (matchEnding)
         {
+            var singularSubstring = singular.Substring(1);
+            var pluralSubString = plural.Substring(1);
+            AddPlural($"({singular[0]}){singularSubstring}$", $"$1{pluralSubString}");
+            AddSingular($"({plural[0]}){pluralSubString}$", $"$1{singularSubstring}");
         }
-
-        private readonly List<Rule> _plurals = new List<Rule>();
-        private readonly List<Rule> _singulars = new List<Rule>();
-        private readonly HashSet<string> _uncountables = new(StringComparer.CurrentCultureIgnoreCase);
-        private readonly Regex _letterS = new Regex("^([sS])[sS]*$");
-
-        /// <summary>
-        /// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx, e.g. "person" and "people".
-        /// </summary>
-        /// <param name="singular">The singular form of the irregular word, e.g. "person".</param>
-        /// <param name="plural">The plural form of the irregular word, e.g. "people".</param>
-        /// <param name="matchEnding">True to match these words on their own as well as at the end of longer words. False, otherwise.</param>
-        public void AddIrregular(string singular, string plural, bool matchEnding = true)
+        else
         {
-            if (matchEnding)
-            {
-                AddPlural("(" + singular[0] + ")" + singular.Substring(1) + "$", "$1" + plural.Substring(1));
-                AddSingular("(" + plural[0] + ")" + plural.Substring(1) + "$", "$1" + singular.Substring(1));
-            }
-            else
-            {
-                AddPlural($"^{singular}$", plural);
-                AddSingular($"^{plural}$", singular);
-            }
+            AddPlural($"^{singular}$", plural);
+            AddSingular($"^{plural}$", singular);
         }
+    }
 
-        /// <summary>
-        /// Adds an uncountable word to the vocabulary, e.g. "fish".  Will be ignored when plurality is changed.
-        /// </summary>
-        /// <param name="word">Word to be added to the list of uncountables.</param>
-        public void AddUncountable(string word)
-        {
-            _uncountables.Add(word);
-        }
+    /// <summary>
+    /// Adds an uncountable word to the vocabulary, e.g. "fish".  Will be ignored when plurality is changed.
+    /// </summary>
+    /// <param name="word">Word to be added to the list of uncountables.</param>
+    public void AddUncountable(string word) =>
+        uncountables.Add(word);
 
-        /// <summary>
-        /// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses"
-        /// </summary>
-        /// <param name="rule">RegEx to be matched, case insensitive, e.g. "(bus)es$"</param>
-        /// <param name="replacement">RegEx replacement  e.g. "$1"</param>
-        public void AddPlural(string rule, string replacement)
-        {
-            _plurals.Add(new Rule(rule, replacement));
-        }
+    /// <summary>
+    /// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses"
+    /// </summary>
+    /// <param name="rule">RegEx to be matched, case insensitive, e.g. "(bus)es$"</param>
+    /// <param name="replacement">RegEx replacement  e.g. "$1"</param>
+    public void AddPlural(string rule, string replacement) =>
+        plurals.Add(new(rule, replacement));
 
-        /// <summary>
-        /// Adds a rule to the vocabulary that does not follow trivial rules for singularization, e.g. "vertices/indices -> "vertex/index"
-        /// </summary>
-        /// <param name="rule">RegEx to be matched, case insensitive, e.g. ""(vert|ind)ices$""</param>
-        /// <param name="replacement">RegEx replacement  e.g. "$1ex"</param>
-        public void AddSingular(string rule, string replacement)
-        {
-            _singulars.Add(new Rule(rule, replacement));
-        }
+    /// <summary>
+    /// Adds a rule to the vocabulary that does not follow trivial rules for singularization, e.g. "vertices/indices -> "vertex/index"
+    /// </summary>
+    /// <param name="rule">RegEx to be matched, case insensitive, e.g. ""(vert|ind)ices$""</param>
+    /// <param name="replacement">RegEx replacement  e.g. "$1ex"</param>
+    public void AddSingular(string rule, string replacement) =>
+        singulars.Add(new(rule, replacement));
 
-        /// <summary>
-        /// Pluralizes the provided input considering irregular words
-        /// </summary>
-        /// <param name="word">Word to be pluralized</param>
-        /// <param name="inputIsKnownToBeSingular">Normally you call Pluralize on singular words; but if you're unsure call it with false</param>
-        public string Pluralize(string word, bool inputIsKnownToBeSingular = true)
+    /// <summary>
+    /// Pluralizes the provided input considering irregular words
+    /// </summary>
+    /// <param name="word">Word to be pluralized</param>
+    /// <param name="inputIsKnownToBeSingular">Normally you call Pluralize on singular words; but if you're unsure call it with false</param>
+    public string Pluralize(string word, bool inputIsKnownToBeSingular = true)
+    {
+        var s = LetterS(word);
+        if (s != null)
         {
-            var s = LetterS(word);
-            if (s != null)
-            {
-                return s + "s";
-            }
+            return s + "s";
+        }
 
-            var result = ApplyRules(_plurals, word, false);
+        var result = ApplyRules(plurals, word, false);
 
-            if (inputIsKnownToBeSingular)
-            {
-                return result ?? word;
-            }
-
-            var asSingular = ApplyRules(_singulars, word, false);
-            var asSingularAsPlural = ApplyRules(_plurals, asSingular, false);
-            if (asSingular != null && asSingular != word && asSingular + "s" != word && asSingularAsPlural == word && result != word)
-            {
-                return word;
-            }
-
-            return result;
+        if (inputIsKnownToBeSingular)
+        {
+            return result ?? word;
         }
 
-        /// <summary>
-        /// Singularizes the provided input considering irregular words
-        /// </summary>
-        /// <param name="word">Word to be singularized</param>
-        /// <param name="inputIsKnownToBePlural">Normally you call Singularize on plural words; but if you're unsure call it with false</param>
-        /// <param name="skipSimpleWords">Skip singularizing single words that have an 's' on the end</param>
-        public string Singularize(string word, bool inputIsKnownToBePlural = true, bool skipSimpleWords = false)
+        var asSingular = ApplyRules(singulars, word, false);
+        var asSingularAsPlural = ApplyRules(plurals, asSingular, false);
+        if (asSingular != null &&
+            asSingular != word &&
+            asSingular + "s" != word &&
+            asSingularAsPlural == word &&
+            result != word)
         {
-            var s = LetterS(word);
-            if (s != null)
-            {
-                return s;
-            }
+            return word;
+        }
 
-            var result = ApplyRules(_singulars, word, skipSimpleWords);
+        return result;
+    }
 
-            if (inputIsKnownToBePlural)
-            {
-                return result ?? word;
-            }
+    /// <summary>
+    /// Singularizes the provided input considering irregular words
+    /// </summary>
+    /// <param name="word">Word to be singularized</param>
+    /// <param name="inputIsKnownToBePlural">Normally you call Singularize on plural words; but if you're unsure call it with false</param>
+    /// <param name="skipSimpleWords">Skip singularizing single words that have an 's' on the end</param>
+    public string Singularize(string word, bool inputIsKnownToBePlural = true, bool skipSimpleWords = false)
+    {
+        var s = LetterS(word);
+        if (s != null)
+        {
+            return s;
+        }
 
-            // the Plurality is unknown so we should check all possibilities
-            var asPlural = ApplyRules(_plurals, word, false);
-            var asPluralAsSingular = ApplyRules(_singulars, asPlural, false);
-            if (asPlural != word && word + "s" != asPlural && asPluralAsSingular == word && result != word)
-            {
-                return word;
-            }
+        var result = ApplyRules(singulars, word, skipSimpleWords);
 
+        if (inputIsKnownToBePlural)
+        {
             return result ?? word;
         }
 
-        private string ApplyRules(IList<Rule> rules, string word, bool skipFirstRule)
+        // the Plurality is unknown so we should check all possibilities
+        var asPlural = ApplyRules(plurals, word, false);
+        var asPluralAsSingular = ApplyRules(singulars, asPlural, false);
+        if (asPlural == word ||
+            word + "s" == asPlural ||
+            asPluralAsSingular != word ||
+            result == word)
         {
-            if (word == null)
-            {
-                return null;
-            }
-
-            if (word.Length < 1)
-            {
-                return word;
-            }
+            return result ?? word;
+        }
 
-            if (IsUncountable(word))
-            {
-                return word;
-            }
+        return word;
+    }
 
-            var result = word;
-            var end = skipFirstRule ? 1 : 0;
-            for (var i = rules.Count - 1; i >= end; i--)
-            {
-                if ((result = rules[i].Apply(word)) != null)
-                {
-                    break;
-                }
-            }
-            return result != null ? MatchUpperCase(word, result) : result;
+    string ApplyRules(IList<Rule> rules, string word, bool skipFirstRule)
+    {
+        if (word == null)
+        {
+            return null;
         }
 
-        private bool IsUncountable(string word)
+        if (word.Length < 1)
         {
-            return _uncountables.Contains(word);
+            return word;
         }
 
-        private string MatchUpperCase(string word, string replacement)
+        if (IsUncountable(word))
         {
-            return char.IsUpper(word[0]) && char.IsLower(replacement[0]) ? char.ToUpper(replacement[0]) + replacement.Substring(1) : replacement;
+            return word;
         }
 
-        /// <summary>
-        /// If the word is the letter s, singular or plural, return the letter s singular
-        /// </summary>
-        private string LetterS(string word)
+        var result = word;
+        var end = skipFirstRule ? 1 : 0;
+        for (var i = rules.Count - 1; i >= end; i--)
         {
-            var s = _letterS.Match(word);
-            return s.Groups.Count > 1 ? s.Groups[1].Value : null;
+            if ((result = rules[i].Apply(word)) != null)
+            {
+                break;
+            }
         }
 
-        private class Rule
+        if (result == null)
         {
-            private readonly Regex _regex;
-            private readonly string _replacement;
+            return null;
+        }
 
-            public Rule(string pattern, string replacement)
-            {
-                _regex = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptionsUtil.Compiled);
-                _replacement = replacement;
-            }
+        return MatchUpperCase(word, result);
+    }
 
-            public string Apply(string word)
-            {
-                if (!_regex.IsMatch(word))
-                {
-                    return null;
-                }
+    bool IsUncountable(string word) =>
+        uncountables.Contains(word);
 
-                return _regex.Replace(word, _replacement);
+    static string MatchUpperCase(string word, string replacement) =>
+        char.IsUpper(word[0]) &&
+        char.IsLower(replacement[0]) ? char.ToUpper(replacement[0]) + replacement.Substring(1) : replacement;
+
+    /// <summary>
+    /// If the word is the letter s, singular or plural, return the letter s singular
+    /// </summary>
+    string LetterS(string word)
+    {
+        var s = letterS.Match(word);
+        return s.Groups.Count > 1 ? s.Groups[1].Value : null;
+    }
+
+    class Rule(string pattern, string replacement)
+    {
+        private readonly Regex regex = new(pattern, RegexOptions.IgnoreCase | RegexOptionsUtil.Compiled);
+
+        public string Apply(string word)
+        {
+            if (!regex.IsMatch(word))
+            {
+                return null;
             }
+
+            return regex.Replace(word, replacement);
         }
     }
-}
+}
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		global using System.Globalization;
		global using System.Text.RegularExpressions;