Skip to content

Commit

Permalink
feat: initial functional C Data interface
Browse files Browse the repository at this point in the history
  • Loading branch information
vibhatha committed Jun 4, 2024
1 parent 00e6eb7 commit ae2a169
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,16 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.AutoCloseables;
import org.apache.arrow.util.VisibleForTesting;
import org.apache.arrow.vector.BaseVariableWidthViewVector;
import org.apache.arrow.vector.BitVectorHelper;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DurationVector;
Expand Down Expand Up @@ -117,10 +121,19 @@ private ArrowBuf importOffsets(ArrowType type, long bytesPerSlot) {
return importBuffer(type, 1, capacity);
}

private ArrowBuf importView(ArrowType type) {
final long capacity = (long) fieldNode.getLength() * BaseVariableWidthViewVector.ELEMENT_SIZE;
return importBuffer(type, 1, capacity);
}

private ArrowBuf importData(ArrowType type, long capacity) {
return importBuffer(type, 2, capacity);
}

private ArrowBuf importViewData(ArrowType type, int index, long capacity) {
return importBuffer(type, index, capacity);
}

private ArrowBuf maybeImportBitmap(ArrowType type) {
checkState(
buffers.length > 0,
Expand Down Expand Up @@ -210,9 +223,46 @@ public List<ArrowBuf> visit(ArrowType.Utf8 type) {
}
}

private List<ArrowBuf> visitVariableWidthView(ArrowType type) {
try (ArrowBuf view = importView(type)) {
List<ArrowBuf> buffers = new ArrayList<>();
view.getReferenceManager().retain();
ArrowBuf maybeValidityBuffer = maybeImportBitmap(type);
buffers.add(maybeValidityBuffer);
buffers.add(view);
final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE;
final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH;
final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH;
// Map to store the data buffer index and the total length of data in that buffer
Map<Integer, Long> dataBufferInfo = new HashMap<>();
for (int i = 0; i < fieldNode.getLength(); i++) {
final int length = view.getInt((long) i * elementSize);
if (length > BaseVariableWidthViewVector.INLINE_SIZE) {
assert maybeValidityBuffer != null;
if (BitVectorHelper.get(maybeValidityBuffer, i) == 1) {
final int bufferIndex =
view.getInt(((long) i * elementSize) + lengthWidth + prefixWidth);
if (dataBufferInfo.containsKey(bufferIndex)) {
dataBufferInfo.compute(bufferIndex, (key, value) -> value != null ? value + (long) length : 0);
} else {
dataBufferInfo.put(bufferIndex, (long) length);
}
}
}
}
// fixed buffers for Utf8View or BinaryView are validity and view buffers
final int fixedBufferCount = 2;
// import data buffers
for (Map.Entry<Integer, Long> entry : dataBufferInfo.entrySet()) {
buffers.add(importViewData(type, entry.getKey() + fixedBufferCount, entry.getValue()));
}
return buffers;
}
}

@Override
public List<ArrowBuf> visit(ArrowType.Utf8View type) {
throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported");
return visitVariableWidthView(type);
}

@Override
Expand Down Expand Up @@ -245,7 +295,7 @@ public List<ArrowBuf> visit(ArrowType.Binary type) {

@Override
public List<ArrowBuf> visit(ArrowType.BinaryView type) {
throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported");
return visitVariableWidthView(type);
}

@Override
Expand Down
8 changes: 8 additions & 0 deletions java/c/src/main/java/org/apache/arrow/c/Format.java
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ static String asString(ArrowType arrowType) {
}
case Utf8:
return "u";
case Utf8View:
return "vu";
case BinaryView:
return "VZ";
case NONE:
throw new IllegalArgumentException("Arrow type ID is NONE");
default:
Expand Down Expand Up @@ -290,6 +294,10 @@ static ArrowType asType(String format, long flags)
case "+m":
boolean keysSorted = (flags & Flags.ARROW_FLAG_MAP_KEYS_SORTED) != 0;
return new ArrowType.Map(keysSorted);
case "vu":
return new ArrowType.Utf8View();
case "VZ":
return new ArrowType.BinaryView();
default:
String[] parts = format.split(":", 2);
if (parts.length == 2) {
Expand Down
40 changes: 25 additions & 15 deletions java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ViewVarBinaryVector;
import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.ZeroVector;
import org.apache.arrow.vector.compare.VectorEqualsVisitor;
import org.apache.arrow.vector.complex.FixedSizeListVector;
Expand Down Expand Up @@ -518,21 +519,30 @@ public void testVarBinaryVector() {
}

@Test
public void testViewVarBinaryVector() {
// TODO: fix the required methods
/*
* 1. `getFieldBuffers()` in `BaseVariableWidthViewVector`
* 2. `getChildrenFromFields()` in `BaseVariableWidthViewVector`
* 3. `getFieldBuffers()` in `BaseVariableWidthViewVector`
* 4. `exportCDataBuffers` in `BaseVariableWidthViewVector`
* 5. `initializeChildrenFromFields` in `BaseVariableWidthViewVector`
* 6. `ArrayImporter` -> `BufferImportTypeVisitor` -> and usual steps to get buffers and populate them (update `visit(ArrowType.Utf8View type)` method
* 7. `loadFieldBuffers` in `BaseVariableWidthViewVector`
* 8. `transferPair` in `BaseVariableWidthViewVector`
* 9. `RangeEqualVisitor` -> visit(`BaseVariableWidthVector` left, Range range) -> `compareBaseVariableWidthVectors(Range range)`
*/
try (final ViewVarBinaryVector vector = new ViewVarBinaryVector("v", allocator)) {
setVector(vector, "abc".getBytes(), "def".getBytes(), null);
public void testViewVector() {
// VarCharViewVector with short strings
try (final ViewVarCharVector vector = new ViewVarCharVector("v1", allocator)) {
setVector(vector, "abc".getBytes(StandardCharsets.UTF_8), "def".getBytes(StandardCharsets.UTF_8), null);
assertTrue(roundtrip(vector, ViewVarCharVector.class));
}

// VarCharViewVector with long strings
try (final ViewVarCharVector vector = new ViewVarCharVector("v2", allocator)) {
setVector(vector, "01234567890123".getBytes(StandardCharsets.UTF_8),
"01234567890123567".getBytes(StandardCharsets.UTF_8), null);
assertTrue(roundtrip(vector, ViewVarCharVector.class));
}

// VarBinaryViewVector with short values
try (final ViewVarBinaryVector vector = new ViewVarBinaryVector("v3", allocator)) {
setVector(vector, "abc".getBytes(StandardCharsets.UTF_8), "def".getBytes(StandardCharsets.UTF_8), null);
assertTrue(roundtrip(vector, ViewVarBinaryVector.class));
}

// VarBinaryViewVector with long values
try (final ViewVarBinaryVector vector = new ViewVarBinaryVector("v4", allocator)) {
setVector(vector, "01234567890123".getBytes(StandardCharsets.UTF_8),
"01234567890123567".getBytes(StandardCharsets.UTF_8), null);
assertTrue(roundtrip(vector, ViewVarBinaryVector.class));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1670,8 +1670,20 @@ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {

@Override
public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr, long nullValue) {
// TODO: Implement this method
throw new UnsupportedOperationException(
"exportCDataBuffers is not supported for VariableWidthVector");
exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true);

if (viewBuffer.capacity() == 0) {
// Empty view buffer is allowed here.
// We set `retain = false` to explicitly not increase the ref count for the exported buffer.
// The ref count of the newly created buffer (i.e., 1) already represents the usage
// of the imported side.
exportBuffer(allocator.buffer(INITIAL_BYTE_COUNT), buffers, buffersPtr, nullValue, false);
} else {
exportBuffer(viewBuffer, buffers, buffersPtr, nullValue, true);
}

for (int i = 0; i < dataBuffers.size(); i++) {
exportBuffer(dataBuffers.get(i), buffers, buffersPtr, nullValue, true);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -205,14 +205,12 @@ public void setSafe(int index, NullableViewVarBinaryHolder holder) {
*/
@Override
public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
// TODO: https://github.com/apache/arrow/issues/40932
throw new UnsupportedOperationException("Unsupported operation");
return new TransferImpl(ref, allocator);
}

@Override
public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
// TODO: https://github.com/apache/arrow/issues/40932
throw new UnsupportedOperationException("Unsupported operation");
return new TransferImpl(field, allocator);
}

/**
Expand All @@ -223,7 +221,42 @@ public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
*/
@Override
public TransferPair makeTransferPair(ValueVector to) {
// TODO: https://github.com/apache/arrow/issues/40932
throw new UnsupportedOperationException("Unsupported operation");
return new TransferImpl((ViewVarBinaryVector) to);
}

private class TransferImpl implements TransferPair {
ViewVarBinaryVector to;

public TransferImpl(String ref, BufferAllocator allocator) {
to = new ViewVarBinaryVector(ref, field.getFieldType(), allocator);
}

public TransferImpl(Field field, BufferAllocator allocator) {
to = new ViewVarBinaryVector(field, allocator);
}

public TransferImpl(ViewVarBinaryVector to) {
this.to = to;
}

@Override
public ViewVarBinaryVector getTo() {
return to;
}

@Override
public void transfer() {
transferTo(to);
}

@Override
public void splitAndTransfer(int startIndex, int length) {
splitAndTransferTo(startIndex, length, to);
}

@Override
public void copyValueSafe(int fromIndex, int toIndex) {
to.copyFromSafe(fromIndex, toIndex, ViewVarBinaryVector.this);
}
}
}

0 comments on commit ae2a169

Please sign in to comment.