Skip to content

Commit

Permalink
Lucene search in package manager (#14098)
Browse files Browse the repository at this point in the history
* Lucene search in package manager

* Move to LuceneSearchViewModel

* LuceneSearchUtility

* Obsolete the search method in package manager view model.

* Update PackageManagerClientViewModel.cs

* Move Lucene methods into a dynamo core utility

* Update LuceneSearchUtility.cs

* Update LuceneSearchUtility.cs

* comments

* Update SearchViewModel.cs
  • Loading branch information
reddyashish authored Jun 27, 2023
1 parent 893106e commit 452a2d3
Show file tree
Hide file tree
Showing 7 changed files with 475 additions and 212 deletions.
45 changes: 37 additions & 8 deletions src/DynamoCore/Configuration/LuceneConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,21 @@ internal class LuceneConfig
/// </summary>
internal static int FuzzySearchWeight = 2;

/// <summary>
/// Parent directory where information is indexed.
/// </summary>
internal static string Index = "Index";

/// <summary>
/// Directory where Nodes info are indexed
/// </summary>
internal static string NodesIndexingDirectory = "Nodes";

/// <summary>
/// Directory where packages info are indexed
/// </summary>
internal static string PackagesIndexingDirectory = "Packages";

/// <summary>
/// This represent the fields that will be indexed when initializing Lucene Search
/// </summary>
Expand Down Expand Up @@ -107,17 +122,31 @@ public enum IndexFieldsEnum
/// <summary>
/// Documentation - Documentation of the node
/// </summary>
Documentation
Documentation,

/// <summary>
/// Hosts - Package hosts
/// </summary>
Hosts
}

/// <summary>
/// Fields to be indexed by Lucene Search
/// Nodes Fields to be indexed by Lucene Search
/// </summary>
public static string[] NodeIndexFields = { nameof(IndexFieldsEnum.Name),
nameof(IndexFieldsEnum.FullCategoryName),
nameof(IndexFieldsEnum.Description),
nameof(IndexFieldsEnum.SearchKeywords),
nameof(IndexFieldsEnum.DocName),
nameof(IndexFieldsEnum.Documentation)};


/// <summary>
/// Package Fields to be indexed by Lucene Search
/// </summary>
public static string[] IndexFields = { nameof(IndexFieldsEnum.Name),
nameof(IndexFieldsEnum.FullCategoryName),
nameof(IndexFieldsEnum.Description),
nameof(IndexFieldsEnum.SearchKeywords),
nameof(IndexFieldsEnum.DocName),
nameof(IndexFieldsEnum.Documentation)};
public static string[] PackageIndexFields = { nameof(IndexFieldsEnum.Name),
nameof(IndexFieldsEnum.Description),
nameof(IndexFieldsEnum.SearchKeywords),
nameof(IndexFieldsEnum.Hosts)};
}
}
125 changes: 21 additions & 104 deletions src/DynamoCore/Models/DynamoModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
using DynamoServices;
using Greg;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
Expand Down Expand Up @@ -133,17 +132,16 @@ public partial class DynamoModel : IDynamoModel, IDisposable, IEngineControllerM
private Timer backupFilesTimer;
private Dictionary<Guid, string> backupFilesDict = new Dictionary<Guid, string>();
internal readonly Stopwatch stopwatch = Stopwatch.StartNew();
internal IndexWriter writer;
internal Lucene.Net.Store.Directory indexDir;
internal DirectoryReader dirReader;
internal List<string> addedFields;

/// <summary>
/// Indicating if ASM is loaded correctly, defaulting to true because integrators most likely have code for ASM preloading
/// During sandbox initializing, Dynamo checks specifically if ASM loading was correct
/// </summary>
internal bool IsASMLoaded = true;

// Lucene search utility to perform indexing operations.
internal LuceneSearchUtility LuceneSearchUtility { get; set; }

#endregion

#region static properties
Expand Down Expand Up @@ -618,34 +616,6 @@ public static DynamoModel Start(IStartConfiguration configuration)
// Token representing the standard library directory
internal static readonly string StandardLibraryToken = @"%StandardLibrary%";


private void InitializeLuceneConfig()
{
addedFields = new List<string>();

DirectoryInfo webBrowserUserDataFolder;
var userDataDir = new DirectoryInfo(pathManager.UserDataDirectory);
webBrowserUserDataFolder = userDataDir.Exists ? userDataDir : null;

string indexPath = Path.Combine(webBrowserUserDataFolder.FullName, "Index");
indexDir = Lucene.Net.Store.FSDirectory.Open(indexPath);

// Create an analyzer to process the text
SearchModel.Analyzer = SearchModel.CreateAnalyzerByLanguage(PreferenceSettings.Locale);

// When running parallel tests several are trying to write in the AppData folder then the job
// is failing and in a wrong state so we prevent to initialize Lucene index writer during test mode.
if (!IsTestMode)
{
// Create an index writer
IndexWriterConfig indexConfig = new IndexWriterConfig(LuceneConfig.LuceneNetVersion, SearchModel.Analyzer)
{
OpenMode = OpenMode.CREATE
};
writer = new IndexWriter(indexDir, indexConfig);
}
}

/// <summary>
/// Default constructor for DynamoModel
/// </summary>
Expand Down Expand Up @@ -958,7 +928,8 @@ protected DynamoModel(IStartConfiguration config)

CustomNodeManager = new CustomNodeManager(NodeFactory, MigrationManager, LibraryServices);

InitializeLuceneConfig();
LuceneSearchUtility = new LuceneSearchUtility(this);
LuceneSearchUtility.InitializeLuceneConfig(LuceneConfig.NodesIndexingDirectory);

InitializeCustomNodeManager();

Expand Down Expand Up @@ -1389,8 +1360,8 @@ public void Dispose()
}

// Lucene disposals (just if LuceneNET was initialized)
indexDir?.Dispose();
dirReader?.Dispose();
LuceneSearchUtility.indexDir?.Dispose();
LuceneSearchUtility.dirReader?.Dispose();

#if DEBUG
CurrentWorkspace.NodeAdded -= CrashOnDemand.CurrentWorkspace_NodeAdded;
Expand Down Expand Up @@ -1474,7 +1445,7 @@ private void InitializeCustomNodeManager()

private void InitializeIncludedNodes()
{
var iDoc = InitializeIndexDocument();
var iDoc = LuceneSearchUtility.InitializeIndexDocumentForNodes();

var customNodeData = new TypeLoadData(typeof(Function));
NodeFactory.AddLoader(new CustomNodeLoader(CustomNodeManager, IsTestMode));
Expand Down Expand Up @@ -1643,13 +1614,12 @@ private void InitializeNodeLibrary()
// Without the index files on disk, the dirReader cant be initialized correctly. So does the searcher.
if (!IsTestMode)
{
dirReader = writer?.GetReader(applyAllDeletes: true);
IndexSearcher searcher = new IndexSearcher(dirReader);
SearchModel.Searcher = searcher;
LuceneSearchUtility.dirReader = LuceneSearchUtility.writer?.GetReader(applyAllDeletes: true);
LuceneSearchUtility.Searcher = new IndexSearcher(LuceneSearchUtility.dirReader);

writer?.Commit();
writer?.Dispose();
writer = null;
LuceneSearchUtility.writer?.Commit();
LuceneSearchUtility.writer?.Dispose();
LuceneSearchUtility.writer = null;
}
}

Expand Down Expand Up @@ -1702,7 +1672,7 @@ internal void LoadNodeLibrary(Assembly assem, bool suppressZeroTouchLibraryLoad

private void LoadNodeModels(List<TypeLoadData> nodes, bool isPackageMember)
{
var iDoc = InitializeIndexDocument();
var iDoc = LuceneSearchUtility.InitializeIndexDocumentForNodes();
foreach (var type in nodes)
{
// Protect ourselves from exceptions thrown by malformed third party nodes.
Expand Down Expand Up @@ -3268,33 +3238,6 @@ private NodeModelSearchElement AddNodeTypeToSearch(TypeLoadData typeLoadData)
return node;
}

/// <summary>
/// Initialize Lucene index document object for reuse
/// </summary>
/// <returns></returns>
private Document InitializeIndexDocument()
{
if (IsTestMode) return null;

var name = new TextField(nameof(LuceneConfig.IndexFieldsEnum.Name), string.Empty, Field.Store.YES);
var fullCategory = new TextField(nameof(LuceneConfig.IndexFieldsEnum.FullCategoryName), string.Empty, Field.Store.YES);
var description = new TextField(nameof(LuceneConfig.IndexFieldsEnum.Description), string.Empty, Field.Store.YES);
var keywords = new TextField(nameof(LuceneConfig.IndexFieldsEnum.SearchKeywords), string.Empty, Field.Store.YES);
var docName = new StringField(nameof(LuceneConfig.IndexFieldsEnum.DocName), string.Empty, Field.Store.YES);
var fullDoc = new TextField(nameof(LuceneConfig.IndexFieldsEnum.Documentation), string.Empty, Field.Store.YES);

var d = new Document()
{
fullCategory,
name,
description,
keywords,
fullDoc,
docName
};
return d;
}

/// <summary>
/// Add node information to Lucene index
/// </summary>
Expand All @@ -3303,40 +3246,14 @@ private Document InitializeIndexDocument()
private void AddNodeTypeToSearchIndex(NodeSearchElement node, Document doc)
{
if (IsTestMode) return;
if (addedFields == null) return;
if (LuceneSearchUtility.addedFields == null) return;

SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.FullCategoryName), node.FullCategoryName);
SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.Name), node.Name);
SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.Description), node.Description);
if (node.SearchKeywords.Count > 0) SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.SearchKeywords), node.SearchKeywords.Aggregate((x, y) => x + " " + y), true, true);
LuceneSearchUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.FullCategoryName), node.FullCategoryName);
LuceneSearchUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.Name), node.Name);
LuceneSearchUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.Description), node.Description);
if (node.SearchKeywords.Count > 0) LuceneSearchUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.SearchKeywords), node.SearchKeywords.Aggregate((x, y) => x + " " + y), true, true);

writer?.AddDocument(doc);
}

//TODO:
//isLast option is used for the last value set in the document, and it will fetch all the other field not set for the document and add them with an empty string.
//isTextField is used when the value need to be tokenized(broken down into pieces), whereas StringTextFields are tokenized.
//The SetDocumentFieldValue method should be optimized later
private void SetDocumentFieldValue(Document doc, string field, string value, bool isTextField = true, bool isLast = false)
{
addedFields.Add(field);
if (isTextField && !field.Equals("DocName"))
{
((TextField)doc.GetField(field)).SetStringValue(value);
}
else
{
((StringField)doc.GetField(field)).SetStringValue(value);
}
if (isLast)
{
List<string> diff = LuceneConfig.IndexFields.Except(addedFields).ToList();
foreach (var d in diff)
{
SetDocumentFieldValue(doc, d, "");
}
addedFields.Clear();
}
LuceneSearchUtility.writer?.AddDocument(doc);
}

/// <summary>
Expand Down Expand Up @@ -3367,7 +3284,7 @@ internal void HideUnhideNamespace(bool hide, string library, string namespc)

internal void AddZeroTouchNodesToSearch(IEnumerable<FunctionGroup> functionGroups)
{
var iDoc = InitializeIndexDocument();
var iDoc = LuceneSearchUtility.InitializeIndexDocumentForNodes();
foreach (var funcGroup in functionGroups)
AddZeroTouchNodeToSearch(funcGroup, iDoc);
}
Expand Down
12 changes: 2 additions & 10 deletions src/DynamoCore/Search/NodeSearchModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,19 @@
using System.Xml;
using Dynamo.Configuration;
using Dynamo.Graph.Nodes;
using Dynamo.Logging;
using Dynamo.Search.SearchElements;
using DynamoUtilities;
using Dynamo.Logging;
using Lucene.Net.Search;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Br;
using Lucene.Net.Analysis.Cjk;
using Lucene.Net.Analysis.Cz;
using Lucene.Net.Analysis.De;
using Lucene.Net.Analysis.En;
using Lucene.Net.Analysis.Es;
using Lucene.Net.Analysis.Fr;
using Lucene.Net.Analysis.It;
using Lucene.Net.Analysis.Ru;
using Lucene.Net.Analysis.En;
using Lucene.Net.Analysis.Standard;

namespace Dynamo.Search
Expand All @@ -27,13 +26,6 @@ namespace Dynamo.Search
/// </summary>
public class NodeSearchModel : SearchLibrary<NodeSearchElement, NodeModel>
{

// Holds the instance for the IndexSearcher
internal IndexSearcher Searcher;

// Used in DynamoModel for creating the StandardAnalyzer
internal Analyzer Analyzer;

/// <summary>
/// Construct a NodeSearchModel object
/// </summary>
Expand Down
Loading

0 comments on commit 452a2d3

Please sign in to comment.