Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport -2.x] Check UTF16 string size before converting to String to avoid OOME #8344

Merged
merged 1 commit into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Refactor] Metadata members from ImmutableOpenMap to j.u.Map ([#7165](https://github.com/opensearch-project/OpenSearch/pull/7165))
- [Refactor] more ImmutableOpenMap to jdk Map in cluster package ([#7301](https://github.com/opensearch-project/OpenSearch/pull/7301))
- [Refactor] ImmutableOpenMap to j.u.Map in IndexMetadata ([#7306](https://github.com/opensearch-project/OpenSearch/pull/7306))
- Check UTF16 string size before converting to String to avoid OOME ([#7963](https://github.com/opensearch-project/OpenSearch/pull/7963))

### Deprecated

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.UnicodeUtil;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.core.xcontent.XContentBuilder;

Expand All @@ -49,6 +50,7 @@
public abstract class AbstractBytesReference implements BytesReference {

private Integer hash = null; // we cache the hash of this reference since it can be quite costly to re-calculated it
private static final int MAX_UTF16_LENGTH = Integer.MAX_VALUE >> 1;

@Override
public int getInt(int index) {
Expand Down Expand Up @@ -80,9 +82,19 @@ public void writeTo(OutputStream os) throws IOException {
}
}

protected int getMaxUTF16Length() {
return MAX_UTF16_LENGTH;
}

@Override
public String utf8ToString() {
return toBytesRef().utf8ToString();
BytesRef bytesRef = toBytesRef();
final char[] ref = new char[bytesRef.length];
final int len = UnicodeUtil.UTF8toUTF16(bytesRef, ref);
if (len > getMaxUTF16Length()) {
throw new IllegalArgumentException("UTF16 String size is " + len + ", should be less than " + getMaxUTF16Length());
}
return new String(ref, 0, len);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,45 @@ public void testToUtf8() throws IOException {
// TODO: good way to test?
}

public void testUTF8toString_ExceedsMaxLength() {
AbstractBytesReference abr = new TestAbstractBytesReference();
IllegalArgumentException e = assertThrows(IllegalArgumentException.class, abr::utf8ToString);
assertTrue(e.getMessage().contains("UTF16 String size is"));
assertTrue(e.getMessage().contains("should be less than"));
}

static class TestAbstractBytesReference extends AbstractBytesReference {
@Override
public byte get(int index) {
return 0;
}

@Override
public int length() {
return 0;
}

@Override
public BytesReference slice(int from, int length) {
return null;
}

@Override
public long ramBytesUsed() {
return 0;
}

@Override
public BytesRef toBytesRef() {
return new BytesRef("UTF16 length exceed test");
}

@Override
public int getMaxUTF16Length() {
return 1;
}
}

public void testToBytesRef() throws IOException {
int length = randomIntBetween(0, PAGE_SIZE);
BytesReference pbr = newBytesReference(length);
Expand Down