From 4ed70f2c3cb183679b39565786508d95922ef4de Mon Sep 17 00:00:00 2001 From: elliVM <126466762+elliVM@users.noreply.github.com> Date: Fri, 19 Apr 2024 12:24:56 +0300 Subject: [PATCH] set blf_01 Tokenizer max token count to 0 so entangled tokens are not generated (#31) --- src/main/scala/com/teragrep/functions/dpf_03/TokenizerUDF.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/teragrep/functions/dpf_03/TokenizerUDF.java b/src/main/scala/com/teragrep/functions/dpf_03/TokenizerUDF.java index 9299f71..aa12570 100644 --- a/src/main/scala/com/teragrep/functions/dpf_03/TokenizerUDF.java +++ b/src/main/scala/com/teragrep/functions/dpf_03/TokenizerUDF.java @@ -64,7 +64,7 @@ public class TokenizerUDF implements UDF1> { public List call(String s) throws Exception { if (tokenizer == null) { // "lazy" init - tokenizer = new Tokenizer(32); + tokenizer = new Tokenizer(0); } // create empty Scala immutable List