From f2447e322259ada1ae81d737f69ba97964d0ea5b Mon Sep 17 00:00:00 2001 From: Shad Storhaug Date: Sun, 22 May 2022 21:21:13 +0700 Subject: [PATCH] Lucene.Net.Analysis.Fa/PersianAnalyzer: Reverted changes from #571 as was done in apache/lucene#904. Changed TestPersianStemFilter to use mocks. --- .../Analysis/Fa/PersianAnalyzer.cs | 31 ++----------------- .../Analysis/Fa/TestPersianStemFilter.cs | 23 ++++++++------ 2 files changed, 16 insertions(+), 38 deletions(-) diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianAnalyzer.cs index c4fcef0c69..5fc4b38659 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianAnalyzer.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianAnalyzer.cs @@ -1,7 +1,6 @@ // Lucene version compatibility level 4.8.1 using Lucene.Net.Analysis.Ar; using Lucene.Net.Analysis.Core; -using Lucene.Net.Analysis.Miscellaneous; using Lucene.Net.Analysis.Standard; using Lucene.Net.Analysis.Util; using Lucene.Net.Util; @@ -81,14 +80,12 @@ private static CharArraySet LoadDefaultStopSet() // LUCENENET: Avoid static cons } } - private readonly CharArraySet stemExclusionSet; - /// /// Builds an analyzer with the default stop words: /// . /// public PersianAnalyzer(LuceneVersion matchVersion) - : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET) + : this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET) { } @@ -100,25 +97,8 @@ public PersianAnalyzer(LuceneVersion matchVersion) /// /// a stopword set public PersianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords) - : this(matchVersion, stopwords, CharArraySet.EMPTY_SET) - { - } - - /// - /// Builds an analyzer with the given stop word. If a none-empty stem exclusion set is - /// provided this analyzer will add a before - /// . - /// - /// - /// lucene compatibility version - /// - /// a stopword set - /// - /// a set of terms not to be stemmed - public PersianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet) - : base(matchVersion, stopwords) + : base(matchVersion, stopwords) { - this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet)); } /// @@ -153,12 +133,7 @@ protected internal override TokenStreamComponents CreateComponents(string fieldN * the order here is important: the stopword list is normalized with the * above! */ - result = new StopFilter(m_matchVersion, result, m_stopwords); - if (stemExclusionSet.Count > 0) - { - result = new SetKeywordMarkerFilter(result, stemExclusionSet); - } - return new TokenStreamComponents(source, new PersianStemFilter(result)); + return new TokenStreamComponents(source, new StopFilter(m_matchVersion, result, m_stopwords)); } /// diff --git a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianStemFilter.cs b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianStemFilter.cs index f5916327e3..e4e5d7fa84 100644 --- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianStemFilter.cs +++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Fa/TestPersianStemFilter.cs @@ -1,7 +1,6 @@ // Lucene version compatibility level 9.2 using Lucene.Net.Analysis.Core; using Lucene.Net.Analysis.Miscellaneous; -using Lucene.Net.Analysis.Standard; using Lucene.Net.Analysis.Util; using NUnit.Framework; using System.IO; @@ -25,14 +24,20 @@ namespace Lucene.Net.Analysis.Fa * limitations under the License. */ - /// - /// Test the Persian Normalization Filter - /// - /// - + /// Test the Persian Normalization Filter public class TestPersianStemFilter : BaseTokenStreamTestCase { - internal PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT); + private Analyzer a; + + public override void SetUp() + { + base.SetUp(); + a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) => + { + Tokenizer source = new MockTokenizer(reader); + return new TokenStreamComponents(source, new PersianStemFilter(source)); + }); + } [Test] public virtual void TestAnSuffix() @@ -94,9 +99,7 @@ public virtual void TestWithKeywordAttribute() { CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true); set.Add("ساهدهات"); -#pragma warning disable 612, 618 - StandardTokenizer tokenStream = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات")); -#pragma warning restore 612, 618 + MockTokenizer tokenStream = new MockTokenizer(new StringReader("ساهدهات")); PersianStemFilter filter = new PersianStemFilter(new SetKeywordMarkerFilter(tokenStream, set)); AssertTokenStreamContents(filter, new string[] { "ساهدهات" });