Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-17338: [Java] The maximum request memory of BaseVariableWidthVector should limit to Integer.MAX_VALUE #13815

Merged
merged 4 commits into from
Aug 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public abstract class BaseVariableWidthVector extends BaseValueVector
implements VariableWidthVector, FieldVector, VectorDefinitionSetter {
private static final int DEFAULT_RECORD_BYTE_COUNT = 8;
private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT;
private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE);
private int lastValueCapacity;
private long lastValueAllocationSizeInBytes;

Expand Down Expand Up @@ -430,9 +431,10 @@ public void allocateNew(int valueCount) {

/* Check if the data buffer size is within bounds. */
private void checkDataBufferSize(long size) {
if (size > MAX_ALLOCATION_SIZE || size < 0) {
if (size > MAX_BUFFER_SIZE || size < 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I assume the check for negative size values is for overflows, but even so, the appearance of a negative in the error message text below could be misleading as a literal reading would say that a negative number is more than the max allowed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small update the error message for the overflow case.

throw new OversizedAllocationException("Memory required for vector " +
" is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
"is (" + size + "), which is overflow or more than max allowed (" + MAX_BUFFER_SIZE + "). " +
"You could consider using LargeVarCharVector/LargeVarBinaryVector for large strings/large bytes types");
}
}

Expand All @@ -445,10 +447,10 @@ private long computeAndCheckOffsetsBufferSize(int valueCount) {
* an additional slot in offset buffer.
*/
final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH);
if (size > MAX_ALLOCATION_SIZE) {
if (size > MAX_BUFFER_SIZE) {
throw new OversizedAllocationException("Memory required for vector capacity " +
valueCount +
" is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
" is (" + size + "), which is more than max allowed (" + MAX_BUFFER_SIZE + ")");
}
return size;
}
Expand Down Expand Up @@ -514,13 +516,33 @@ public void reallocDataBuffer() {
newAllocationSize = INITIAL_BYTE_COUNT * 2L;
}
}
newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);

reallocDataBuffer(newAllocationSize);
}

/**
* Reallocate the data buffer to given size. Data Buffer stores the actual data for
* VARCHAR or VARBINARY elements in the vector. The actual allocate size may be larger
* than the request one because it will round up the provided value to the nearest
* power of two.
*
* @param desiredAllocSize the desired new allocation size
* @throws OversizedAllocationException if the desired new size is more than
* max allowed
* @throws OutOfMemoryException if the internal memory allocation fails
*/
public void reallocDataBuffer(long desiredAllocSize) {
if (desiredAllocSize == 0) {
return;
}

final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
assert newAllocationSize >= 1;

checkDataBufferSize(newAllocationSize);

final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity);
newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity());
valueBuffer.getReferenceManager().release();
valueBuffer = newBuf;
lastValueAllocationSizeInBytes = valueBuffer.capacity();
Expand Down Expand Up @@ -1250,9 +1272,10 @@ protected final void handleSafe(int index, int dataLength) {
while (index >= getValueCapacity()) {
reallocValidityAndOffsetBuffers();
}
final int startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1);
while (valueBuffer.capacity() < (startOffset + dataLength)) {
reallocDataBuffer();
final long startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1);
final long targetCapacity = startOffset + dataLength;
if (valueBuffer.capacity() < targetCapacity) {
reallocDataBuffer(targetCapacity);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,25 @@ public void testNullableVarType2() {
}
}

@Test(expected = OversizedAllocationException.class)
public void testReallocateCheckSuccess() {

// Create a new value vector for 1024 integers.
try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) {
vector.allocateNew(1024 * 10, 1024);

vector.set(0, STR1);
// Check the sample strings.
assertArrayEquals(STR1, vector.get(0));

// update the index offset to a larger one
ArrowBuf offsetBuf = vector.getOffsetBuffer();
offsetBuf.setInt(VarBinaryVector.OFFSET_WIDTH, Integer.MAX_VALUE - 5);

vector.setValueLengthSafe(1, 6);
}
}


/*
* generic tests
Expand Down