From a235643ae81fb87ba6949bea5b4843c551476ea5 Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Thu, 29 Jun 2023 10:44:02 +0530 Subject: [PATCH] Check UTF16 string size before converting to String to avoid OOME (#7963) * Check UTF16 string size before converting to string to avoid OOME Signed-off-by: Rishab Nahata Signed-off-by: Shivansh Arora --- CHANGELOG.md | 2 + .../common/bytes/AbstractBytesReference.java | 14 ++++++- .../bytes/AbstractBytesReferenceTestCase.java | 39 +++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93b2296475fd1..240c016524752 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -139,8 +139,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Snapshot Interop] Add Changes in Create Snapshot Flow for remote store interoperability. ([#7118](https://github.com/opensearch-project/OpenSearch/pull/7118)) - Allow insecure string settings to warn-log usage and advise to migration of a newer secure variant ([#5496](https://github.com/opensearch-project/OpenSearch/pull/5496)) - Add self-organizing hash table to improve the performance of bucket aggregations ([#7652](https://github.com/opensearch-project/OpenSearch/pull/7652)) +- Check UTF16 string size before converting to String to avoid OOME ([#7963](https://github.com/opensearch-project/OpenSearch/pull/7963)) - Move ZSTD compression codecs out of the sandbox ([#7908](https://github.com/opensearch-project/OpenSearch/pull/7908)) + ### Deprecated ### Removed diff --git a/libs/core/src/main/java/org/opensearch/common/bytes/AbstractBytesReference.java b/libs/core/src/main/java/org/opensearch/common/bytes/AbstractBytesReference.java index 7b3c71321e4f0..043d45223498e 100644 --- a/libs/core/src/main/java/org/opensearch/common/bytes/AbstractBytesReference.java +++ b/libs/core/src/main/java/org/opensearch/common/bytes/AbstractBytesReference.java @@ -33,6 +33,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefIterator; +import org.apache.lucene.util.UnicodeUtil; import org.opensearch.common.io.stream.StreamInput; import org.opensearch.core.xcontent.XContentBuilder; @@ -49,6 +50,7 @@ public abstract class AbstractBytesReference implements BytesReference { private Integer hash = null; // we cache the hash of this reference since it can be quite costly to re-calculated it + private static final int MAX_UTF16_LENGTH = Integer.MAX_VALUE >> 1; @Override public int getInt(int index) { @@ -80,9 +82,19 @@ public void writeTo(OutputStream os) throws IOException { } } + protected int getMaxUTF16Length() { + return MAX_UTF16_LENGTH; + } + @Override public String utf8ToString() { - return toBytesRef().utf8ToString(); + BytesRef bytesRef = toBytesRef(); + final char[] ref = new char[bytesRef.length]; + final int len = UnicodeUtil.UTF8toUTF16(bytesRef, ref); + if (len > getMaxUTF16Length()) { + throw new IllegalArgumentException("UTF16 String size is " + len + ", should be less than " + getMaxUTF16Length()); + } + return new String(ref, 0, len); } @Override diff --git a/test/framework/src/main/java/org/opensearch/common/bytes/AbstractBytesReferenceTestCase.java b/test/framework/src/main/java/org/opensearch/common/bytes/AbstractBytesReferenceTestCase.java index dca46c37ca7d2..dd71711f9154c 100644 --- a/test/framework/src/main/java/org/opensearch/common/bytes/AbstractBytesReferenceTestCase.java +++ b/test/framework/src/main/java/org/opensearch/common/bytes/AbstractBytesReferenceTestCase.java @@ -478,6 +478,45 @@ public void testToUtf8() throws IOException { // TODO: good way to test? } + public void testUTF8toString_ExceedsMaxLength() { + AbstractBytesReference abr = new TestAbstractBytesReference(); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, abr::utf8ToString); + assertTrue(e.getMessage().contains("UTF16 String size is")); + assertTrue(e.getMessage().contains("should be less than")); + } + + static class TestAbstractBytesReference extends AbstractBytesReference { + @Override + public byte get(int index) { + return 0; + } + + @Override + public int length() { + return 0; + } + + @Override + public BytesReference slice(int from, int length) { + return null; + } + + @Override + public long ramBytesUsed() { + return 0; + } + + @Override + public BytesRef toBytesRef() { + return new BytesRef("UTF16 length exceed test"); + } + + @Override + public int getMaxUTF16Length() { + return 1; + } + } + public void testToBytesRef() throws IOException { int length = randomIntBetween(0, PAGE_SIZE); BytesReference pbr = newBytesReference(length);