From b10e7f06dcc2322c2ebab7f069deed8ea52af5a8 Mon Sep 17 00:00:00 2001 From: Roberto T Date: Thu, 4 Jan 2024 13:43:14 -0600 Subject: [PATCH 1/4] DYN-6535 DynamoRevit Improve Search In a search criteria like "list replace" I've implemented a fix that will check on many nodes we have when the terms splitted by empty space (in this case list and replace) so if it reaches the limit then we discard the split search and execute a normal search. --- .../Utilities/LuceneSearchUtility.cs | 46 +++++++++++++------ 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/DynamoCore/Utilities/LuceneSearchUtility.cs b/src/DynamoCore/Utilities/LuceneSearchUtility.cs index 94e8b518f79..d35af764ca0 100644 --- a/src/DynamoCore/Utilities/LuceneSearchUtility.cs +++ b/src/DynamoCore/Utilities/LuceneSearchUtility.cs @@ -82,6 +82,13 @@ public enum LuceneStorage FILE_SYSTEM } + public enum SearchType + { + Normal, + ByCategory, + ByEmptySpace + } + // Used for creating the StandardAnalyzer internal Analyzer Analyzer; @@ -264,7 +271,9 @@ internal void SetDocumentFieldValue(Document doc, string field, string value, bo /// internal string CreateSearchQuery(string[] fields, string SearchTerm) { + SearchType searchType = SearchType.Normal; int fuzzyLogicMaxEdits = LuceneConfig.FuzzySearchMinEdits; + const int MaxNodeNamesRepeated = 20; // Use a larger max edit value - more tolerant with typo when search term is longer than threshold if (SearchTerm.Length > LuceneConfig.FuzzySearchMaxEditsThreshold) { @@ -273,13 +282,21 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) var booleanQuery = new BooleanQuery(); string searchTerm = QueryParser.Escape(SearchTerm); - var bCategoryBasedSearch = searchTerm.Contains('.') ? true : false; + + if (searchTerm.Contains('.')) + searchType = SearchType.ByCategory; + else if (searchTerm.Contains(' ')) + searchType = SearchType.ByEmptySpace; + else + searchType = SearchType.Normal; + + var trimmedSearchTerm = searchType == SearchType.ByEmptySpace ? searchTerm.Replace(" ", "") : searchTerm; foreach (string f in fields) { //Needs to be again due that now a query can contain different values per field (e.g. CategorySplitted:list, Name:tr) searchTerm = QueryParser.Escape(SearchTerm); - if (bCategoryBasedSearch == true) + if (searchType == SearchType.ByCategory) { //This code section should be only executed if the search criteria is CategoryBased like "category.nodename" if (f != nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) && @@ -297,26 +314,26 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) } } - FuzzyQuery fuzzyQuery; - if (searchTerm.Length > LuceneConfig.FuzzySearchMinimalTermLength) - { - fuzzyQuery = new FuzzyQuery(new Term(f, searchTerm), fuzzyLogicMaxEdits); - booleanQuery.Add(fuzzyQuery, Occur.SHOULD); - } - //For normal search we don't consider the fields NameSplitted and CategorySplitted if ((f == nameof(LuceneConfig.NodeFieldsEnum.NameSplitted) || - f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) && bCategoryBasedSearch == false) + f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) && searchType != SearchType.ByCategory) continue; //This case is for when the user type something like "list.", I mean, not specifying the node name or part of it if (string.IsNullOrEmpty(searchTerm)) continue; - var fieldQuery = CalculateFieldWeight(f, searchTerm); - var wildcardQuery = CalculateFieldWeight(f, searchTerm, true); + FuzzyQuery fuzzyQuery; + if (searchTerm.Length > LuceneConfig.FuzzySearchMinimalTermLength) + { + fuzzyQuery = new FuzzyQuery(new Term(f, searchType == SearchType.ByEmptySpace ? trimmedSearchTerm : searchTerm), fuzzyLogicMaxEdits); + booleanQuery.Add(fuzzyQuery, Occur.SHOULD); + } - if (bCategoryBasedSearch && f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) + var fieldQuery = CalculateFieldWeight(f, searchType == SearchType.ByEmptySpace ? trimmedSearchTerm : searchTerm); + var wildcardQuery = CalculateFieldWeight(f, searchType == SearchType.ByEmptySpace ? trimmedSearchTerm : searchTerm, true); + + if (searchType == SearchType.ByCategory && f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) { booleanQuery.Add(fieldQuery, Occur.MUST); booleanQuery.Add(wildcardQuery, Occur.MUST); @@ -331,6 +348,9 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) { foreach (string s in searchTerm.Split(' ', '.')) { + int nodesFrequency = dynamoModel.SearchModel.Entries.Where(entry => entry.Name.ToLower().Contains(s) && !string.IsNullOrEmpty(s)).Count(); + if (nodesFrequency > MaxNodeNamesRepeated) continue; + if (string.IsNullOrEmpty(s)) continue; if (s.Length > LuceneConfig.FuzzySearchMinimalTermLength) From 906e11fdc009e7d78b997e930f5b143c18200815 Mon Sep 17 00:00:00 2001 From: Roberto T Date: Mon, 22 Jan 2024 17:17:55 -0600 Subject: [PATCH 2/4] DYN-6535 DynamoRevit Improve Search Adding extra comments --- src/DynamoCore/Utilities/LuceneSearchUtility.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/DynamoCore/Utilities/LuceneSearchUtility.cs b/src/DynamoCore/Utilities/LuceneSearchUtility.cs index d35af764ca0..1d729099213 100644 --- a/src/DynamoCore/Utilities/LuceneSearchUtility.cs +++ b/src/DynamoCore/Utilities/LuceneSearchUtility.cs @@ -84,8 +84,13 @@ public enum LuceneStorage public enum SearchType { + //Normal search using just one word matching a specific node name Normal, + + //Search by category using the "." character for example "list.re" ByCategory, + + //Search using an empty space as separator like "get parameters" or "set parameters" ByEmptySpace } @@ -271,9 +276,13 @@ internal void SetDocumentFieldValue(Document doc, string field, string value, bo /// internal string CreateSearchQuery(string[] fields, string SearchTerm) { + //By Default the search will be normal SearchType searchType = SearchType.Normal; int fuzzyLogicMaxEdits = LuceneConfig.FuzzySearchMinEdits; + + //Max number of nodes allowed in the search when is a ByEmptySpace search const int MaxNodeNamesRepeated = 20; + // Use a larger max edit value - more tolerant with typo when search term is longer than threshold if (SearchTerm.Length > LuceneConfig.FuzzySearchMaxEditsThreshold) { @@ -348,6 +357,7 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) { foreach (string s in searchTerm.Split(' ', '.')) { + //If is a ByEmptySpace search and the splitted words match with more than MaxNodeNamesRepeated nodes then the word is skipped int nodesFrequency = dynamoModel.SearchModel.Entries.Where(entry => entry.Name.ToLower().Contains(s) && !string.IsNullOrEmpty(s)).Count(); if (nodesFrequency > MaxNodeNamesRepeated) continue; From ff1b100724613ce899caaa4ff62097b28f0bdfaf Mon Sep 17 00:00:00 2001 From: Roberto T Date: Tue, 23 Jan 2024 16:02:38 -0600 Subject: [PATCH 3/4] DYN-6535 DynamoRevit Improve Search Code Review Convert the ByEmptySpace to be a property and remove it from the enum. --- .../Utilities/LuceneSearchUtility.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/DynamoCore/Utilities/LuceneSearchUtility.cs b/src/DynamoCore/Utilities/LuceneSearchUtility.cs index 1d729099213..047337b86b7 100644 --- a/src/DynamoCore/Utilities/LuceneSearchUtility.cs +++ b/src/DynamoCore/Utilities/LuceneSearchUtility.cs @@ -73,6 +73,8 @@ internal class LuceneSearchUtility /// internal static readonly LuceneStartConfig DefaultPkgIndexStartConfig = new LuceneStartConfig(LuceneSearchUtility.LuceneStorage.FILE_SYSTEM, LuceneConfig.PackagesIndexingDirectory); + private bool hasEmptySpaces { get; set; } + public enum LuceneStorage { //Lucene Storage will be located in RAM and all the info indexed will be lost when Dynamo app is closed @@ -88,10 +90,7 @@ public enum SearchType Normal, //Search by category using the "." character for example "list.re" - ByCategory, - - //Search using an empty space as separator like "get parameters" or "set parameters" - ByEmptySpace + ByCategory } // Used for creating the StandardAnalyzer @@ -279,6 +278,7 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) //By Default the search will be normal SearchType searchType = SearchType.Normal; int fuzzyLogicMaxEdits = LuceneConfig.FuzzySearchMinEdits; + hasEmptySpaces = false; //Max number of nodes allowed in the search when is a ByEmptySpace search const int MaxNodeNamesRepeated = 20; @@ -295,11 +295,11 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) if (searchTerm.Contains('.')) searchType = SearchType.ByCategory; else if (searchTerm.Contains(' ')) - searchType = SearchType.ByEmptySpace; + hasEmptySpaces = true; else searchType = SearchType.Normal; - var trimmedSearchTerm = searchType == SearchType.ByEmptySpace ? searchTerm.Replace(" ", "") : searchTerm; + var trimmedSearchTerm = hasEmptySpaces == true ? searchTerm.Replace(" ", "") : searchTerm; foreach (string f in fields) { @@ -335,12 +335,12 @@ internal string CreateSearchQuery(string[] fields, string SearchTerm) FuzzyQuery fuzzyQuery; if (searchTerm.Length > LuceneConfig.FuzzySearchMinimalTermLength) { - fuzzyQuery = new FuzzyQuery(new Term(f, searchType == SearchType.ByEmptySpace ? trimmedSearchTerm : searchTerm), fuzzyLogicMaxEdits); + fuzzyQuery = new FuzzyQuery(new Term(f, hasEmptySpaces == true ? trimmedSearchTerm : searchTerm), fuzzyLogicMaxEdits); booleanQuery.Add(fuzzyQuery, Occur.SHOULD); } - var fieldQuery = CalculateFieldWeight(f, searchType == SearchType.ByEmptySpace ? trimmedSearchTerm : searchTerm); - var wildcardQuery = CalculateFieldWeight(f, searchType == SearchType.ByEmptySpace ? trimmedSearchTerm : searchTerm, true); + var fieldQuery = CalculateFieldWeight(f, hasEmptySpaces == true ? trimmedSearchTerm : searchTerm); + var wildcardQuery = CalculateFieldWeight(f, hasEmptySpaces == true ? trimmedSearchTerm : searchTerm, true); if (searchType == SearchType.ByCategory && f == nameof(LuceneConfig.NodeFieldsEnum.CategorySplitted)) { From 584f79405e3c0092a6f2a6cbbfcd8b5694b59f11 Mon Sep 17 00:00:00 2001 From: Roberto T Date: Wed, 24 Jan 2024 12:42:25 -0600 Subject: [PATCH 4/4] DYN-6535 DynamoRevit Improve Search Code Review Adding extra comments --- src/DynamoCore/Utilities/LuceneSearchUtility.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/DynamoCore/Utilities/LuceneSearchUtility.cs b/src/DynamoCore/Utilities/LuceneSearchUtility.cs index 047337b86b7..579fac5fb12 100644 --- a/src/DynamoCore/Utilities/LuceneSearchUtility.cs +++ b/src/DynamoCore/Utilities/LuceneSearchUtility.cs @@ -84,6 +84,9 @@ public enum LuceneStorage FILE_SYSTEM } + /// + /// This enum will be used to identify which can of search should be executed based in the user search criteria + /// public enum SearchType { //Normal search using just one word matching a specific node name