Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lucene search in package manager #14098

Merged
merged 11 commits into from
Jun 27, 2023
45 changes: 37 additions & 8 deletions src/DynamoCore/Configuration/LuceneConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,21 @@ internal class LuceneConfig
/// </summary>
internal static int FuzzySearchWeight = 2;

/// <summary>
/// Parent directory where information is indexed.
/// </summary>
internal static string Index = "Index";

/// <summary>
/// Directory where Nodes info are indexed
/// </summary>
internal static string NodesIndexingDirectory = "Nodes";

/// <summary>
/// Directory where packages info are indexed
/// </summary>
internal static string PackagesIndexingDirectory = "Packages";

/// <summary>
/// This represent the fields that will be indexed when initializing Lucene Search
/// </summary>
Expand Down Expand Up @@ -107,17 +122,31 @@ public enum IndexFieldsEnum
/// <summary>
/// Documentation - Documentation of the node
/// </summary>
Documentation
Documentation,

/// <summary>
/// Hosts - Package hosts
/// </summary>
Hosts
}

/// <summary>
/// Fields to be indexed by Lucene Search
/// Nodes Fields to be indexed by Lucene Search
/// </summary>
public static string[] NodeIndexFields = { nameof(IndexFieldsEnum.Name),
nameof(IndexFieldsEnum.FullCategoryName),
nameof(IndexFieldsEnum.Description),
nameof(IndexFieldsEnum.SearchKeywords),
nameof(IndexFieldsEnum.DocName),
nameof(IndexFieldsEnum.Documentation)};


/// <summary>
/// Package Fields to be indexed by Lucene Search
/// </summary>
public static string[] IndexFields = { nameof(IndexFieldsEnum.Name),
nameof(IndexFieldsEnum.FullCategoryName),
nameof(IndexFieldsEnum.Description),
nameof(IndexFieldsEnum.SearchKeywords),
nameof(IndexFieldsEnum.DocName),
nameof(IndexFieldsEnum.Documentation)};
public static string[] PackageIndexFields = { nameof(IndexFieldsEnum.Name),
nameof(IndexFieldsEnum.Description),
nameof(IndexFieldsEnum.SearchKeywords),
nameof(IndexFieldsEnum.Hosts)};
}
}
125 changes: 21 additions & 104 deletions src/DynamoCore/Models/DynamoModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
using DynamoServices;
using Greg;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
Expand Down Expand Up @@ -133,17 +132,16 @@ public partial class DynamoModel : IDynamoModel, IDisposable, IEngineControllerM
private Timer backupFilesTimer;
private Dictionary<Guid, string> backupFilesDict = new Dictionary<Guid, string>();
internal readonly Stopwatch stopwatch = Stopwatch.StartNew();
internal IndexWriter writer;
internal Lucene.Net.Store.Directory indexDir;
internal DirectoryReader dirReader;
internal List<string> addedFields;

/// <summary>
/// Indicating if ASM is loaded correctly, defaulting to true because integrators most likely have code for ASM preloading
/// During sandbox initializing, Dynamo checks specifically if ASM loading was correct
/// </summary>
internal bool IsASMLoaded = true;

// Lucene search utility to perform indexing operations.
internal LuceneSearchUtility LuceneSearchUtility { get; set; }

#endregion

#region static properties
Expand Down Expand Up @@ -618,34 +616,6 @@ public static DynamoModel Start(IStartConfiguration configuration)
// Token representing the standard library directory
internal static readonly string StandardLibraryToken = @"%StandardLibrary%";


private void InitializeLuceneConfig()
{
addedFields = new List<string>();

DirectoryInfo webBrowserUserDataFolder;
var userDataDir = new DirectoryInfo(pathManager.UserDataDirectory);
webBrowserUserDataFolder = userDataDir.Exists ? userDataDir : null;

string indexPath = Path.Combine(webBrowserUserDataFolder.FullName, "Index");
indexDir = Lucene.Net.Store.FSDirectory.Open(indexPath);

// Create an analyzer to process the text
SearchModel.Analyzer = SearchModel.CreateAnalyzerByLanguage(PreferenceSettings.Locale);

// When running parallel tests several are trying to write in the AppData folder then the job
// is failing and in a wrong state so we prevent to initialize Lucene index writer during test mode.
if (!IsTestMode)
{
// Create an index writer
IndexWriterConfig indexConfig = new IndexWriterConfig(LuceneConfig.LuceneNetVersion, SearchModel.Analyzer)
{
OpenMode = OpenMode.CREATE
};
writer = new IndexWriter(indexDir, indexConfig);
}
}

/// <summary>
/// Default constructor for DynamoModel
/// </summary>
Expand Down Expand Up @@ -958,7 +928,8 @@ protected DynamoModel(IStartConfiguration config)

CustomNodeManager = new CustomNodeManager(NodeFactory, MigrationManager, LibraryServices);

InitializeLuceneConfig();
LuceneSearchUtility = new LuceneSearchUtility(this);
LuceneSearchUtility.InitializeLuceneConfig(LuceneConfig.NodesIndexingDirectory);

InitializeCustomNodeManager();

Expand Down Expand Up @@ -1389,8 +1360,8 @@ public void Dispose()
}

// Lucene disposals (just if LuceneNET was initialized)
indexDir?.Dispose();
dirReader?.Dispose();
LuceneSearchUtility.indexDir?.Dispose();
LuceneSearchUtility.dirReader?.Dispose();

#if DEBUG
CurrentWorkspace.NodeAdded -= CrashOnDemand.CurrentWorkspace_NodeAdded;
Expand Down Expand Up @@ -1474,7 +1445,7 @@ private void InitializeCustomNodeManager()

private void InitializeIncludedNodes()
{
var iDoc = InitializeIndexDocument();
var iDoc = LuceneSearchUtility.InitializeIndexDocumentForNodes();

var customNodeData = new TypeLoadData(typeof(Function));
NodeFactory.AddLoader(new CustomNodeLoader(CustomNodeManager, IsTestMode));
Expand Down Expand Up @@ -1643,13 +1614,12 @@ private void InitializeNodeLibrary()
// Without the index files on disk, the dirReader cant be initialized correctly. So does the searcher.
if (!IsTestMode)
{
dirReader = writer?.GetReader(applyAllDeletes: true);
IndexSearcher searcher = new IndexSearcher(dirReader);
SearchModel.Searcher = searcher;
LuceneSearchUtility.dirReader = LuceneSearchUtility.writer?.GetReader(applyAllDeletes: true);
LuceneSearchUtility.Searcher = new IndexSearcher(LuceneSearchUtility.dirReader);

writer?.Commit();
writer?.Dispose();
writer = null;
LuceneSearchUtility.writer?.Commit();
LuceneSearchUtility.writer?.Dispose();
LuceneSearchUtility.writer = null;
}
}

Expand Down Expand Up @@ -1702,7 +1672,7 @@ internal void LoadNodeLibrary(Assembly assem, bool suppressZeroTouchLibraryLoad

private void LoadNodeModels(List<TypeLoadData> nodes, bool isPackageMember)
{
var iDoc = InitializeIndexDocument();
var iDoc = LuceneSearchUtility.InitializeIndexDocumentForNodes();
foreach (var type in nodes)
{
// Protect ourselves from exceptions thrown by malformed third party nodes.
Expand Down Expand Up @@ -3268,33 +3238,6 @@ private NodeModelSearchElement AddNodeTypeToSearch(TypeLoadData typeLoadData)
return node;
}

/// <summary>
/// Initialize Lucene index document object for reuse
/// </summary>
/// <returns></returns>
private Document InitializeIndexDocument()
{
if (IsTestMode) return null;

var name = new TextField(nameof(LuceneConfig.IndexFieldsEnum.Name), string.Empty, Field.Store.YES);
var fullCategory = new TextField(nameof(LuceneConfig.IndexFieldsEnum.FullCategoryName), string.Empty, Field.Store.YES);
var description = new TextField(nameof(LuceneConfig.IndexFieldsEnum.Description), string.Empty, Field.Store.YES);
var keywords = new TextField(nameof(LuceneConfig.IndexFieldsEnum.SearchKeywords), string.Empty, Field.Store.YES);
var docName = new StringField(nameof(LuceneConfig.IndexFieldsEnum.DocName), string.Empty, Field.Store.YES);
var fullDoc = new TextField(nameof(LuceneConfig.IndexFieldsEnum.Documentation), string.Empty, Field.Store.YES);

var d = new Document()
{
fullCategory,
name,
description,
keywords,
fullDoc,
docName
};
return d;
}

/// <summary>
/// Add node information to Lucene index
/// </summary>
Expand All @@ -3303,40 +3246,14 @@ private Document InitializeIndexDocument()
private void AddNodeTypeToSearchIndex(NodeSearchElement node, Document doc)
{
if (IsTestMode) return;
if (addedFields == null) return;
if (LuceneSearchUtility.addedFields == null) return;

SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.FullCategoryName), node.FullCategoryName);
SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.Name), node.Name);
SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.Description), node.Description);
if (node.SearchKeywords.Count > 0) SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.SearchKeywords), node.SearchKeywords.Aggregate((x, y) => x + " " + y), true, true);
LuceneSearchUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.FullCategoryName), node.FullCategoryName);
LuceneSearchUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.Name), node.Name);
LuceneSearchUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.Description), node.Description);
if (node.SearchKeywords.Count > 0) LuceneSearchUtility.SetDocumentFieldValue(doc, nameof(LuceneConfig.IndexFieldsEnum.SearchKeywords), node.SearchKeywords.Aggregate((x, y) => x + " " + y), true, true);

writer?.AddDocument(doc);
}

//TODO:
//isLast option is used for the last value set in the document, and it will fetch all the other field not set for the document and add them with an empty string.
//isTextField is used when the value need to be tokenized(broken down into pieces), whereas StringTextFields are tokenized.
//The SetDocumentFieldValue method should be optimized later
private void SetDocumentFieldValue(Document doc, string field, string value, bool isTextField = true, bool isLast = false)
{
addedFields.Add(field);
if (isTextField && !field.Equals("DocName"))
{
((TextField)doc.GetField(field)).SetStringValue(value);
}
else
{
((StringField)doc.GetField(field)).SetStringValue(value);
}
if (isLast)
{
List<string> diff = LuceneConfig.IndexFields.Except(addedFields).ToList();
foreach (var d in diff)
{
SetDocumentFieldValue(doc, d, "");
}
addedFields.Clear();
}
LuceneSearchUtility.writer?.AddDocument(doc);
}

/// <summary>
Expand Down Expand Up @@ -3367,7 +3284,7 @@ internal void HideUnhideNamespace(bool hide, string library, string namespc)

internal void AddZeroTouchNodesToSearch(IEnumerable<FunctionGroup> functionGroups)
{
var iDoc = InitializeIndexDocument();
var iDoc = LuceneSearchUtility.InitializeIndexDocumentForNodes();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@RobertGlobant20 Not related to this task but maybe yours, can we explore way to append search info to existing index files? This would work either for new nodes or packages

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@QilongTang yes, right now I'm exploring how to append search info for the task of showing node-packages installed in the package search results.

foreach (var funcGroup in functionGroups)
AddZeroTouchNodeToSearch(funcGroup, iDoc);
}
Expand Down
12 changes: 2 additions & 10 deletions src/DynamoCore/Search/NodeSearchModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,19 @@
using System.Xml;
using Dynamo.Configuration;
using Dynamo.Graph.Nodes;
using Dynamo.Logging;
using Dynamo.Search.SearchElements;
using DynamoUtilities;
using Dynamo.Logging;
using Lucene.Net.Search;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Br;
using Lucene.Net.Analysis.Cjk;
using Lucene.Net.Analysis.Cz;
using Lucene.Net.Analysis.De;
using Lucene.Net.Analysis.En;
using Lucene.Net.Analysis.Es;
using Lucene.Net.Analysis.Fr;
using Lucene.Net.Analysis.It;
using Lucene.Net.Analysis.Ru;
using Lucene.Net.Analysis.En;
using Lucene.Net.Analysis.Standard;

namespace Dynamo.Search
Expand All @@ -27,13 +26,6 @@ namespace Dynamo.Search
/// </summary>
public class NodeSearchModel : SearchLibrary<NodeSearchElement, NodeModel>
{

// Holds the instance for the IndexSearcher
internal IndexSearcher Searcher;

// Used in DynamoModel for creating the StandardAnalyzer
internal Analyzer Analyzer;

/// <summary>
/// Construct a NodeSearchModel object
/// </summary>
Expand Down
Loading