Skip to content

Commit

Permalink
Lucene.Net.Analysis.Fa/PersianAnalyzer: Reverted changes from apache#571
Browse files Browse the repository at this point in the history
 as was done in apache/lucene#904. Changed TestPersianStemFilter to use mocks.
  • Loading branch information
NightOwl888 committed May 22, 2022
1 parent 5a89b32 commit f2447e3
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 38 deletions.
31 changes: 3 additions & 28 deletions src/Lucene.Net.Analysis.Common/Analysis/Fa/PersianAnalyzer.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Lucene version compatibility level 4.8.1
using Lucene.Net.Analysis.Ar;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Miscellaneous;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
Expand Down Expand Up @@ -81,14 +80,12 @@ private static CharArraySet LoadDefaultStopSet() // LUCENENET: Avoid static cons
}
}

private readonly CharArraySet stemExclusionSet;

/// <summary>
/// Builds an analyzer with the default stop words:
/// <see cref="DEFAULT_STOPWORD_FILE"/>.
/// </summary>
public PersianAnalyzer(LuceneVersion matchVersion)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
: this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET)
{
}

Expand All @@ -100,25 +97,8 @@ public PersianAnalyzer(LuceneVersion matchVersion)
/// <param name="stopwords">
/// a stopword set </param>
public PersianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords)
: this(matchVersion, stopwords, CharArraySet.EMPTY_SET)
{
}

/// <summary>
/// Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
/// provided this analyzer will add a <see cref="SetKeywordMarkerFilter"/> before
/// <see cref="PersianStemFilter"/>.
/// </summary>
/// <param name="matchVersion">
/// lucene compatibility version </param>
/// <param name="stopwords">
/// a stopword set </param>
/// <param name="stemExclusionSet">
/// a set of terms not to be stemmed </param>
public PersianAnalyzer(LuceneVersion matchVersion, CharArraySet stopwords, CharArraySet stemExclusionSet)
: base(matchVersion, stopwords)
: base(matchVersion, stopwords)
{
this.stemExclusionSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(matchVersion, stemExclusionSet));
}

/// <summary>
Expand Down Expand Up @@ -153,12 +133,7 @@ protected internal override TokenStreamComponents CreateComponents(string fieldN
* the order here is important: the stopword list is normalized with the
* above!
*/
result = new StopFilter(m_matchVersion, result, m_stopwords);
if (stemExclusionSet.Count > 0)
{
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
}
return new TokenStreamComponents(source, new PersianStemFilter(result));
return new TokenStreamComponents(source, new StopFilter(m_matchVersion, result, m_stopwords));
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Lucene version compatibility level 9.2
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Miscellaneous;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis.Util;
using NUnit.Framework;
using System.IO;
Expand All @@ -25,14 +24,20 @@ namespace Lucene.Net.Analysis.Fa
* limitations under the License.
*/

/// <summary>
/// Test the Persian Normalization Filter
///
/// </summary>

/// <summary>Test the Persian Normalization Filter</summary>
public class TestPersianStemFilter : BaseTokenStreamTestCase
{
internal PersianAnalyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
private Analyzer a;

public override void SetUp()
{
base.SetUp();
a = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
{
Tokenizer source = new MockTokenizer(reader);
return new TokenStreamComponents(source, new PersianStemFilter(source));
});
}

[Test]
public virtual void TestAnSuffix()
Expand Down Expand Up @@ -94,9 +99,7 @@ public virtual void TestWithKeywordAttribute()
{
CharArraySet set = new CharArraySet(TEST_VERSION_CURRENT, 1, true);
set.Add("ساهدهات");
#pragma warning disable 612, 618
StandardTokenizer tokenStream = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader("ساهدهات"));
#pragma warning restore 612, 618
MockTokenizer tokenStream = new MockTokenizer(new StringReader("ساهدهات"));

PersianStemFilter filter = new PersianStemFilter(new SetKeywordMarkerFilter(tokenStream, set));
AssertTokenStreamContents(filter, new string[] { "ساهدهات" });
Expand Down

0 comments on commit f2447e3

Please sign in to comment.