Skip to content

Commit

Permalink
Check UTF16 string size before converting to String to avoid OOME (op…
Browse files Browse the repository at this point in the history
…ensearch-project#7963)

* Check UTF16 string size before converting to string to avoid OOME

Signed-off-by: Rishab Nahata <[email protected]>
Signed-off-by: Shivansh Arora <[email protected]>
  • Loading branch information
imRishN authored and shiv0408 committed Apr 25, 2024
1 parent 8f938b7 commit a235643
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Snapshot Interop] Add Changes in Create Snapshot Flow for remote store interoperability. ([#7118](https://github.com/opensearch-project/OpenSearch/pull/7118))
- Allow insecure string settings to warn-log usage and advise to migration of a newer secure variant ([#5496](https://github.com/opensearch-project/OpenSearch/pull/5496))
- Add self-organizing hash table to improve the performance of bucket aggregations ([#7652](https://github.com/opensearch-project/OpenSearch/pull/7652))
- Check UTF16 string size before converting to String to avoid OOME ([#7963](https://github.com/opensearch-project/OpenSearch/pull/7963))
- Move ZSTD compression codecs out of the sandbox ([#7908](https://github.com/opensearch-project/OpenSearch/pull/7908))


### Deprecated

### Removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.UnicodeUtil;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.core.xcontent.XContentBuilder;

Expand All @@ -49,6 +50,7 @@
public abstract class AbstractBytesReference implements BytesReference {

private Integer hash = null; // we cache the hash of this reference since it can be quite costly to re-calculated it
private static final int MAX_UTF16_LENGTH = Integer.MAX_VALUE >> 1;

@Override
public int getInt(int index) {
Expand Down Expand Up @@ -80,9 +82,19 @@ public void writeTo(OutputStream os) throws IOException {
}
}

protected int getMaxUTF16Length() {
return MAX_UTF16_LENGTH;
}

@Override
public String utf8ToString() {
return toBytesRef().utf8ToString();
BytesRef bytesRef = toBytesRef();
final char[] ref = new char[bytesRef.length];
final int len = UnicodeUtil.UTF8toUTF16(bytesRef, ref);
if (len > getMaxUTF16Length()) {
throw new IllegalArgumentException("UTF16 String size is " + len + ", should be less than " + getMaxUTF16Length());
}
return new String(ref, 0, len);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,45 @@ public void testToUtf8() throws IOException {
// TODO: good way to test?
}

public void testUTF8toString_ExceedsMaxLength() {
AbstractBytesReference abr = new TestAbstractBytesReference();
IllegalArgumentException e = assertThrows(IllegalArgumentException.class, abr::utf8ToString);
assertTrue(e.getMessage().contains("UTF16 String size is"));
assertTrue(e.getMessage().contains("should be less than"));
}

static class TestAbstractBytesReference extends AbstractBytesReference {
@Override
public byte get(int index) {
return 0;
}

@Override
public int length() {
return 0;
}

@Override
public BytesReference slice(int from, int length) {
return null;
}

@Override
public long ramBytesUsed() {
return 0;
}

@Override
public BytesRef toBytesRef() {
return new BytesRef("UTF16 length exceed test");
}

@Override
public int getMaxUTF16Length() {
return 1;
}
}

public void testToBytesRef() throws IOException {
int length = randomIntBetween(0, PAGE_SIZE);
BytesReference pbr = newBytesReference(length);
Expand Down

0 comments on commit a235643

Please sign in to comment.