Skip to content

Commit

Permalink
TO REVERT: Lucene.Net.ICU: Added locking to ICUTokenizer to only allo…
Browse files Browse the repository at this point in the history
…w a single thread to manipulate the BreakIterator at a time. This can be reverted when the BreakIterator issue is fixed.
  • Loading branch information
NightOwl888 committed Aug 24, 2020
1 parent 08018b2 commit ffc8f2a
Showing 1 changed file with 18 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ public sealed class ICUTokenizer : Tokenizer
private readonly ITypeAttribute typeAtt;
private readonly IScriptAttribute scriptAtt;

private static readonly object syncLock = new object(); // LUCENENET specific - workaround until BreakIterator is made thread safe (LUCENENET TODO: TO REVERT)

/// <summary>
/// Construct a new <see cref="ICUTokenizer"/> that breaks text into words from the given
/// <see cref="TextReader"/>.
Expand Down Expand Up @@ -109,23 +111,27 @@ public ICUTokenizer(AttributeFactory factory, TextReader input, ICUTokenizerConf

public override bool IncrementToken()
{
ClearAttributes();
if (length == 0)
Refill();
while (!IncrementTokenBuffer())
lock (syncLock)
{
Refill();
if (length <= 0) // no more bytes to read;
return false;
ClearAttributes();
if (length == 0)
Refill();
while (!IncrementTokenBuffer())
{
Refill();
if (length <= 0) // no more bytes to read;
return false;
}
return true;
}
return true;
}


public override void Reset()
{
base.Reset();
breaker.SetText(buffer, 0, 0);
lock (syncLock)
breaker.SetText(buffer, 0, 0);
length = usableLength = offset = 0;
}

Expand Down Expand Up @@ -187,7 +193,8 @@ private void Refill()
*/
}

breaker.SetText(buffer, 0, Math.Max(0, usableLength));
lock (syncLock)
breaker.SetText(buffer, 0, Math.Max(0, usableLength));
}

// TODO: refactor to a shared readFully somewhere
Expand Down Expand Up @@ -236,7 +243,7 @@ private bool IncrementTokenBuffer()
offsetAtt.SetOffset(CorrectOffset(offset + start), CorrectOffset(offset + end));
typeAtt.Type = config.GetType(breaker.ScriptCode, breaker.RuleStatus);
scriptAtt.Code = breaker.ScriptCode;

return true;
}
}
Expand Down

0 comments on commit ffc8f2a

Please sign in to comment.