From fb0277e2adf7550cbedd8941df23cd1c24f57888 Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Thu, 17 Aug 2023 20:15:23 -0500 Subject: [PATCH] [JNI] Adds HostColumnVector.EventHandler for spillability checks (#13898) This change adds the exact same API for event handling that was added to the device-side `ColumnVector` here https://github.com/rapidsai/cudf/pull/13279. We are going to need this to make `HostColumnVector`, or a batch of them, spillable in this spark-rapids feature: https://github.com/NVIDIA/spark-rapids/issues/8882. Authors: - Alessandro Bellina (https://github.com/abellina) Approvers: - Gera Shegalov (https://github.com/gerashegalov) - Robert (Bobby) Evans (https://github.com/revans2) URL: https://github.com/rapidsai/cudf/pull/13898 --- .../java/ai/rapids/cudf/ColumnVector.java | 4 +- .../java/ai/rapids/cudf/HostColumnVector.java | 45 ++++++++++++++++++- .../java/ai/rapids/cudf/ColumnVectorTest.java | 29 ++++++++++++ 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index 0595d58c7cc..30e92d2367f 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -50,8 +50,8 @@ public interface EventHandler { * * @note the callback is invoked with this `ColumnVector`'s lock held. * - * @param cv - a reference to the ColumnVector we are closing - * @param refCount - the updated ref count for this ColumnVector at the time + * @param cv reference to the ColumnVector we are closing + * @param refCount the updated ref count for this ColumnVector at the time * of invocation */ void onClosed(ColumnVector cv, int refCount); diff --git a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java index 6cb7767784a..7993989825d 100644 --- a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,12 +39,31 @@ * and call incRefCount to increment the reference count. */ public final class HostColumnVector extends HostColumnVectorCore { + /** + * Interface to handle events for this HostColumnVector. Only invoked during + * close, hence `onClosed` is the only event. + */ + public interface EventHandler { + /** + * `onClosed` is invoked with the updated `refCount` during `close`. + * The last invocation of `onClosed` will be with `refCount=0`. + * + * @note the callback is invoked with this `HostColumnVector`'s lock held. + * + * @param cv reference to the HostColumnVector we are closing + * @param refCount the updated ref count for this HostColumnVector at + * the time of invocation + */ + void onClosed(HostColumnVector cv, int refCount); + } + /** * The size in bytes of an offset entry */ static final int OFFSET_SIZE = DType.INT32.getSizeInBytes(); private int refCount; + private EventHandler eventHandler; /** * Create a new column vector with data populated on the host. @@ -93,6 +112,27 @@ public HostColumnVector(DType type, long rows, Optional nullCount, incRefCountInternal(true); } + /** + * Set an event handler for this host vector. This method can be invoked with + * null to unset the handler. + * + * @param newHandler - the EventHandler to use from this point forward + * @return the prior event handler, or null if not set. + */ + public synchronized EventHandler setEventHandler(EventHandler newHandler) { + EventHandler prev = this.eventHandler; + this.eventHandler = newHandler; + return prev; + } + + /** + * Returns the current event handler for this HostColumnVector or null if no + * handler is associated. + */ + public synchronized EventHandler getEventHandler() { + return this.eventHandler; + } + /** * This is a really ugly API, but it is possible that the lifecycle of a column of * data may not have a clear lifecycle thanks to java and GC. This API informs the leak @@ -110,6 +150,9 @@ public void noWarnLeakExpected() { public synchronized void close() { refCount--; offHeap.delRef(); + if (eventHandler != null) { + eventHandler.onClosed(this, refCount); + } if (refCount == 0) { offHeap.clean(false); for( HostColumnVectorCore child : children) { diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index b462d70ccd2..0e1fbad6129 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -6791,6 +6791,18 @@ public void testEventHandlerIsCalledForEachClose() { assertEquals(1, onClosedWasCalled.get()); } + @Test + public void testHostEventHandlerIsCalledForEachClose() { + final AtomicInteger onClosedWasCalled = new AtomicInteger(0); + try (HostColumnVector cv = HostColumnVector.fromInts(1,2,3,4)) { + cv.setEventHandler((col, refCount) -> { + assertEquals(cv, col); + onClosedWasCalled.incrementAndGet(); + }); + } + assertEquals(1, onClosedWasCalled.get()); + } + @Test public void testEventHandlerIsNotCalledIfNotSet() { final AtomicInteger onClosedWasCalled = new AtomicInteger(0); @@ -6808,6 +6820,23 @@ public void testEventHandlerIsNotCalledIfNotSet() { assertEquals(0, onClosedWasCalled.get()); } + @Test + public void testHostEventHandlerIsNotCalledIfNotSet() { + final AtomicInteger onClosedWasCalled = new AtomicInteger(0); + try (HostColumnVector cv = HostColumnVector.fromInts(1,2,3,4)) { + assertNull(cv.getEventHandler()); + } + assertEquals(0, onClosedWasCalled.get()); + + try (HostColumnVector cv = HostColumnVector.fromInts(1,2,3,4)) { + cv.setEventHandler((col, refCount) -> { + onClosedWasCalled.incrementAndGet(); + }); + cv.setEventHandler(null); + } + assertEquals(0, onClosedWasCalled.get()); + } + /** * Test that the ColumnView with unknown null-counts still returns * the correct null-count when queried.