From 85ae9142eee0594405d25ba38efd1e866a099f7e Mon Sep 17 00:00:00 2001 From: Ketan Verma <9292653+ketanv3@users.noreply.github.com> Date: Tue, 28 Nov 2023 02:18:32 +0530 Subject: [PATCH] Improve boolean parsing performance (#11308) Signed-off-by: Ketan Verma Signed-off-by: Ketan Verma <9292653+ketanv3@users.noreply.github.com> Signed-off-by: Shivansh Arora --- CHANGELOG.md | 1 + .../java/org/opensearch/common/Booleans.java | 183 +++++++----------- .../org/opensearch/common/BooleansTests.java | 76 +++----- .../index/mapper/BooleanFieldMapper.java | 2 +- 4 files changed, 94 insertions(+), 168 deletions(-) rename {server => libs/common}/src/test/java/org/opensearch/common/BooleansTests.java (58%) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd7686fb02938..4b18d64f3d8a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -148,6 +148,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057)) - Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087)) - Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528)) +- Improve boolean parsing performance ([#11308](https://github.com/opensearch-project/OpenSearch/pull/11308)) ### Deprecated diff --git a/libs/common/src/main/java/org/opensearch/common/Booleans.java b/libs/common/src/main/java/org/opensearch/common/Booleans.java index 2ca061820b2eb..ab7ad37e92612 100644 --- a/libs/common/src/main/java/org/opensearch/common/Booleans.java +++ b/libs/common/src/main/java/org/opensearch/common/Booleans.java @@ -45,30 +45,72 @@ private Booleans() { /** * Parses a char[] representation of a boolean value to boolean. * - * @return true iff the sequence of chars is "true", false iff the sequence of chars is "false" or the - * provided default value iff either text is null or length == 0. + * @return true iff the sequence of chars is "true", false iff the sequence of + * chars is "false" or the provided default value iff either text is null or length == 0. * @throws IllegalArgumentException if the string cannot be parsed to boolean. */ public static boolean parseBoolean(char[] text, int offset, int length, boolean defaultValue) { - if (text == null || length == 0) { + if (text == null) { return defaultValue; - } else { - return parseBoolean(new String(text, offset, length)); } + + switch (length) { + case 0: + return defaultValue; + case 1: + case 2: + case 3: + default: + break; + case 4: + if (text[offset] == 't' && text[offset + 1] == 'r' && text[offset + 2] == 'u' && text[offset + 3] == 'e') { + return true; + } + break; + case 5: + if (text[offset] == 'f' + && text[offset + 1] == 'a' + && text[offset + 2] == 'l' + && text[offset + 3] == 's' + && text[offset + 4] == 'e') { + return false; + } + break; + } + + throw new IllegalArgumentException( + "Failed to parse value [" + new String(text, offset, length) + "] as only [true] or [false] are allowed." + ); } /** - * returns true iff the sequence of chars is one of "true","false". + * Returns true iff the sequence of chars is one of "true", "false". * * @param text sequence to check * @param offset offset to start * @param length length to check */ public static boolean isBoolean(char[] text, int offset, int length) { - if (text == null || length == 0) { + if (text == null) { return false; } - return isBoolean(new String(text, offset, length)); + + switch (length) { + case 0: + case 1: + case 2: + case 3: + default: + return false; + case 4: + return text[offset] == 't' && text[offset + 1] == 'r' && text[offset + 2] == 'u' && text[offset + 3] == 'e'; + case 5: + return text[offset] == 'f' + && text[offset + 1] == 'a' + && text[offset + 2] == 'l' + && text[offset + 3] == 's' + && text[offset + 4] == 'e'; + } } public static boolean isBoolean(String value) { @@ -91,63 +133,45 @@ public static boolean parseBoolean(String value) { throw new IllegalArgumentException("Failed to parse value [" + value + "] as only [true] or [false] are allowed."); } - private static boolean hasText(CharSequence str) { - if (str == null || str.length() == 0) { - return false; - } - int strLen = str.length(); - for (int i = 0; i < strLen; i++) { - if (!Character.isWhitespace(str.charAt(i))) { - return true; - } - } - return false; - } - /** + * Parses a string representation of a boolean value to boolean. + * Note the subtle difference between this and {@link #parseBoolean(char[], int, int, boolean)}; this returns the + * default value even when the value is non-zero length containing all whitespaces (possibly overlooked, but + * preserving this behavior for compatibility reasons). Use {@link #parseBooleanStrict(String, boolean)} instead. * * @param value text to parse. - * @param defaultValue The default value to return if the provided value is null. + * @param defaultValue The default value to return if the provided value is null or blank. * @return see {@link #parseBoolean(String)} */ + @Deprecated public static boolean parseBoolean(String value, boolean defaultValue) { - if (hasText(value)) { - return parseBoolean(value); - } - return defaultValue; - } - - public static Boolean parseBoolean(String value, Boolean defaultValue) { - if (hasText(value)) { - return parseBoolean(value); + if (value == null || value.isBlank()) { + return defaultValue; } - return defaultValue; + return parseBoolean(value); } - /** - * Returns {@code false} if text is in "false", "0", "off", "no"; else, {@code true}. - * - * @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #parseBoolean(String, Boolean)} instead. - */ @Deprecated - public static Boolean parseBooleanLenient(String value, Boolean defaultValue) { - if (value == null) { // only for the null case we do that here! + public static Boolean parseBoolean(String value, Boolean defaultValue) { + if (value == null || value.isBlank()) { return defaultValue; } - return parseBooleanLenient(value, false); + return parseBoolean(value); } /** - * Returns {@code false} if text is in "false", "0", "off", "no"; else, {@code true}. + * Parses a string representation of a boolean value to boolean. + * Analogous to {@link #parseBoolean(char[], int, int, boolean)}. * - * @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #parseBoolean(String, boolean)} instead. + * @return true iff the sequence of chars is "true", false iff the sequence of + * chars is "false", or the provided default value iff either text is null or length == 0. + * @throws IllegalArgumentException if the string cannot be parsed to boolean. */ - @Deprecated - public static boolean parseBooleanLenient(String value, boolean defaultValue) { - if (value == null) { + public static boolean parseBooleanStrict(String value, boolean defaultValue) { + if (value == null || value.length() == 0) { return defaultValue; } - return !(value.equals("false") || value.equals("0") || value.equals("off") || value.equals("no")); + return parseBoolean(value); } /** @@ -163,71 +187,4 @@ public static boolean isFalse(String value) { public static boolean isTrue(String value) { return "true".equals(value); } - - /** - * Returns {@code false} if text is in "false", "0", "off", "no"; else, {@code true}. - * - * @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #parseBoolean(char[], int, int, boolean)} instead - */ - @Deprecated - public static boolean parseBooleanLenient(char[] text, int offset, int length, boolean defaultValue) { - if (text == null || length == 0) { - return defaultValue; - } - if (length == 1) { - return text[offset] != '0'; - } - if (length == 2) { - return !(text[offset] == 'n' && text[offset + 1] == 'o'); - } - if (length == 3) { - return !(text[offset] == 'o' && text[offset + 1] == 'f' && text[offset + 2] == 'f'); - } - if (length == 5) { - return !(text[offset] == 'f' - && text[offset + 1] == 'a' - && text[offset + 2] == 'l' - && text[offset + 3] == 's' - && text[offset + 4] == 'e'); - } - return true; - } - - /** - * returns true if the a sequence of chars is one of "true","false","on","off","yes","no","0","1" - * - * @param text sequence to check - * @param offset offset to start - * @param length length to check - * - * @deprecated Only kept to provide automatic upgrades for pre 6.0 indices. Use {@link #isBoolean(char[], int, int)} instead. - */ - @Deprecated - public static boolean isBooleanLenient(char[] text, int offset, int length) { - if (text == null || length == 0) { - return false; - } - if (length == 1) { - return text[offset] == '0' || text[offset] == '1'; - } - if (length == 2) { - return (text[offset] == 'n' && text[offset + 1] == 'o') || (text[offset] == 'o' && text[offset + 1] == 'n'); - } - if (length == 3) { - return (text[offset] == 'o' && text[offset + 1] == 'f' && text[offset + 2] == 'f') - || (text[offset] == 'y' && text[offset + 1] == 'e' && text[offset + 2] == 's'); - } - if (length == 4) { - return (text[offset] == 't' && text[offset + 1] == 'r' && text[offset + 2] == 'u' && text[offset + 3] == 'e'); - } - if (length == 5) { - return (text[offset] == 'f' - && text[offset + 1] == 'a' - && text[offset + 2] == 'l' - && text[offset + 3] == 's' - && text[offset + 4] == 'e'); - } - return false; - } - } diff --git a/server/src/test/java/org/opensearch/common/BooleansTests.java b/libs/common/src/test/java/org/opensearch/common/BooleansTests.java similarity index 58% rename from server/src/test/java/org/opensearch/common/BooleansTests.java rename to libs/common/src/test/java/org/opensearch/common/BooleansTests.java index 7e4a0ad8e456b..578ec742d126d 100644 --- a/server/src/test/java/org/opensearch/common/BooleansTests.java +++ b/libs/common/src/test/java/org/opensearch/common/BooleansTests.java @@ -34,10 +34,7 @@ import org.opensearch.test.OpenSearchTestCase; -import java.util.Locale; - import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.nullValue; public class BooleansTests extends OpenSearchTestCase { private static final String[] NON_BOOLEANS = new String[] { @@ -81,8 +78,23 @@ public void testParseBooleanWithFallback() { assertFalse(Booleans.parseBoolean(null, Boolean.FALSE)); assertTrue(Booleans.parseBoolean(null, Boolean.TRUE)); + assertFalse(Booleans.parseBoolean("", false)); + assertTrue(Booleans.parseBoolean("", true)); + assertNull(Booleans.parseBoolean("", null)); + assertFalse(Booleans.parseBoolean("", Boolean.FALSE)); + assertTrue(Booleans.parseBoolean("", Boolean.TRUE)); + + assertFalse(Booleans.parseBoolean(" \t\n", false)); + assertTrue(Booleans.parseBoolean(" \t\n", true)); + assertNull(Booleans.parseBoolean(" \t\n", null)); + assertFalse(Booleans.parseBoolean(" \t\n", Boolean.FALSE)); + assertTrue(Booleans.parseBoolean(" \t\n", Boolean.TRUE)); + assertTrue(Booleans.parseBoolean("true", randomFrom(Boolean.TRUE, Boolean.FALSE, null))); assertFalse(Booleans.parseBoolean("false", randomFrom(Boolean.TRUE, Boolean.FALSE, null))); + + assertTrue(Booleans.parseBoolean(new char[0], 0, 0, true)); + assertFalse(Booleans.parseBoolean(new char[0], 0, 0, false)); } public void testParseNonBooleanWithFallback() { @@ -109,56 +121,12 @@ public void testParseNonBoolean() { } } - public void testIsBooleanLenient() { - String[] booleans = new String[] { "true", "false", "on", "off", "yes", "no", "0", "1" }; - String[] notBooleans = new String[] { "11", "00", "sdfsdfsf", "F", "T" }; - assertThat(Booleans.isBooleanLenient(null, 0, 1), is(false)); - - for (String b : booleans) { - String t = "prefix" + b + "suffix"; - assertTrue( - "failed to recognize [" + b + "] as boolean", - Booleans.isBooleanLenient(t.toCharArray(), "prefix".length(), b.length()) - ); - } - - for (String nb : notBooleans) { - String t = "prefix" + nb + "suffix"; - assertFalse("recognized [" + nb + "] as boolean", Booleans.isBooleanLenient(t.toCharArray(), "prefix".length(), nb.length())); - } - } - - public void testParseBooleanLenient() { - assertThat(Booleans.parseBooleanLenient(randomFrom("true", "on", "yes", "1"), randomBoolean()), is(true)); - assertThat(Booleans.parseBooleanLenient(randomFrom("false", "off", "no", "0"), randomBoolean()), is(false)); - assertThat(Booleans.parseBooleanLenient(randomFrom("true", "on", "yes").toUpperCase(Locale.ROOT), randomBoolean()), is(true)); - assertThat(Booleans.parseBooleanLenient(null, false), is(false)); - assertThat(Booleans.parseBooleanLenient(null, true), is(true)); - - assertThat( - Booleans.parseBooleanLenient(randomFrom("true", "on", "yes", "1"), randomFrom(Boolean.TRUE, Boolean.FALSE, null)), - is(true) - ); - assertThat( - Booleans.parseBooleanLenient(randomFrom("false", "off", "no", "0"), randomFrom(Boolean.TRUE, Boolean.FALSE, null)), - is(false) - ); - assertThat( - Booleans.parseBooleanLenient( - randomFrom("true", "on", "yes").toUpperCase(Locale.ROOT), - randomFrom(Boolean.TRUE, Boolean.FALSE, null) - ), - is(true) - ); - assertThat(Booleans.parseBooleanLenient(null, Boolean.FALSE), is(false)); - assertThat(Booleans.parseBooleanLenient(null, Boolean.TRUE), is(true)); - assertThat(Booleans.parseBooleanLenient(null, null), nullValue()); - - char[] chars = randomFrom("true", "on", "yes", "1").toCharArray(); - assertThat(Booleans.parseBooleanLenient(chars, 0, chars.length, randomBoolean()), is(true)); - chars = randomFrom("false", "off", "no", "0").toCharArray(); - assertThat(Booleans.parseBooleanLenient(chars, 0, chars.length, randomBoolean()), is(false)); - chars = randomFrom("true", "on", "yes").toUpperCase(Locale.ROOT).toCharArray(); - assertThat(Booleans.parseBooleanLenient(chars, 0, chars.length, randomBoolean()), is(true)); + public void testParseBooleanStrict() { + assertTrue(Booleans.parseBooleanStrict("true", false)); + assertFalse(Booleans.parseBooleanStrict("false", true)); + assertTrue(Booleans.parseBooleanStrict(null, true)); + assertFalse(Booleans.parseBooleanStrict("", false)); + expectThrows(IllegalArgumentException.class, () -> Booleans.parseBooleanStrict("foobar", false)); + expectThrows(IllegalArgumentException.class, () -> Booleans.parseBooleanStrict(" \t\n", false)); } } diff --git a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java index 22eeed205e2c8..3c7925809415a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java @@ -193,7 +193,7 @@ protected Boolean parseSourceValue(Object value) { return (Boolean) value; } else { String textValue = value.toString(); - return Booleans.parseBoolean(textValue.toCharArray(), 0, textValue.length(), false); + return Booleans.parseBooleanStrict(textValue, false); } } };