diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index 866dd9e218fc1..2a89590bf8440 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -46,6 +46,7 @@ public abstract class BaseVariableWidthVector extends BaseValueVector implements VariableWidthVector, FieldVector, VectorDefinitionSetter { private static final int DEFAULT_RECORD_BYTE_COUNT = 8; private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; + private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); private int lastValueCapacity; private long lastValueAllocationSizeInBytes; @@ -430,9 +431,10 @@ public void allocateNew(int valueCount) { /* Check if the data buffer size is within bounds. */ private void checkDataBufferSize(long size) { - if (size > MAX_ALLOCATION_SIZE || size < 0) { + if (size > MAX_BUFFER_SIZE || size < 0) { throw new OversizedAllocationException("Memory required for vector " + - " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")"); + "is (" + size + "), which is overflow or more than max allowed (" + MAX_BUFFER_SIZE + "). " + + "You could consider using LargeVarCharVector/LargeVarBinaryVector for large strings/large bytes types"); } } @@ -445,10 +447,10 @@ private long computeAndCheckOffsetsBufferSize(int valueCount) { * an additional slot in offset buffer. */ final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH); - if (size > MAX_ALLOCATION_SIZE) { + if (size > MAX_BUFFER_SIZE) { throw new OversizedAllocationException("Memory required for vector capacity " + valueCount + - " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")"); + " is (" + size + "), which is more than max allowed (" + MAX_BUFFER_SIZE + ")"); } return size; } @@ -514,13 +516,33 @@ public void reallocDataBuffer() { newAllocationSize = INITIAL_BYTE_COUNT * 2L; } } - newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + + reallocDataBuffer(newAllocationSize); + } + + /** + * Reallocate the data buffer to given size. Data Buffer stores the actual data for + * VARCHAR or VARBINARY elements in the vector. The actual allocate size may be larger + * than the request one because it will round up the provided value to the nearest + * power of two. + * + * @param desiredAllocSize the desired new allocation size + * @throws OversizedAllocationException if the desired new size is more than + * max allowed + * @throws OutOfMemoryException if the internal memory allocation fails + */ + public void reallocDataBuffer(long desiredAllocSize) { + if (desiredAllocSize == 0) { + return; + } + + final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); assert newAllocationSize >= 1; checkDataBufferSize(newAllocationSize); final ArrowBuf newBuf = allocator.buffer(newAllocationSize); - newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity); + newBuf.setBytes(0, valueBuffer, 0, valueBuffer.capacity()); valueBuffer.getReferenceManager().release(); valueBuffer = newBuf; lastValueAllocationSizeInBytes = valueBuffer.capacity(); @@ -1250,9 +1272,10 @@ protected final void handleSafe(int index, int dataLength) { while (index >= getValueCapacity()) { reallocValidityAndOffsetBuffers(); } - final int startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1); - while (valueBuffer.capacity() < (startOffset + dataLength)) { - reallocDataBuffer(); + final long startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1); + final long targetCapacity = startOffset + dataLength; + if (valueBuffer.capacity() < targetCapacity) { + reallocDataBuffer(targetCapacity); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 516daa2362280..0928d3eb03082 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -1137,6 +1137,25 @@ public void testNullableVarType2() { } } + @Test(expected = OversizedAllocationException.class) + public void testReallocateCheckSuccess() { + + // Create a new value vector for 1024 integers. + try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(1024 * 10, 1024); + + vector.set(0, STR1); + // Check the sample strings. + assertArrayEquals(STR1, vector.get(0)); + + // update the index offset to a larger one + ArrowBuf offsetBuf = vector.getOffsetBuffer(); + offsetBuf.setInt(VarBinaryVector.OFFSET_WIDTH, Integer.MAX_VALUE - 5); + + vector.setValueLengthSafe(1, 6); + } + } + /* * generic tests