From 6e500d1c4187bb54ce296022e4005005cbc22253 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 31 Jan 2022 09:01:14 -0500
Subject: [PATCH 01/14] Add timing chart for libcudf build metrics report page
(#10038)
Adds build time chart similar to ninjatracing output to the "Build Metrics Report" in a cudf build.
Other changes include
- showing times in minutes, seconds, or milliseconds as appropriate
- showing sizes in MB, KB, or bytes as appropriate
- adding a 4th color (white) for compile times less than 1s
- changed logic for identifying entries uniquely by hash instead of name (both in .ninja_log)
Authors:
- David Wendt (https://github.com/davidwendt)
Approvers:
- Vukasin Milovanovic (https://github.com/vuule)
- AJ Schmidt (https://github.com/ajschmidt8)
- Karthikeyan (https://github.com/karthikeyann)
URL: https://github.com/rapidsai/cudf/pull/10038
---
build.sh | 1 +
ci/cpu/build.sh | 1 +
cpp/scripts/sort_ninja_log.py | 281 +++++++++++++++++++++++++++-------
3 files changed, 228 insertions(+), 55 deletions(-)
diff --git a/build.sh b/build.sh
index 45074a6645f..c2eba134c35 100755
--- a/build.sh
+++ b/build.sh
@@ -230,6 +230,7 @@ if buildAll || hasArg libcudf; then
fi
echo "$MSG"
python ${REPODIR}/cpp/scripts/sort_ninja_log.py ${LIB_BUILD_DIR}/.ninja_log --fmt html --msg "$MSG" > ${LIB_BUILD_DIR}/ninja_log.html
+ cp ${LIB_BUILD_DIR}/.ninja_log ${LIB_BUILD_DIR}/ninja.log
fi
if [[ ${INSTALL_TARGET} != "" ]]; then
diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index f23296038f2..6f19f174da0 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -85,6 +85,7 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then
gpuci_logger "Copying build metrics results"
mkdir -p "$WORKSPACE/build-metrics"
cp "$LIBCUDF_BUILD_DIR/ninja_log.html" "$WORKSPACE/build-metrics/BuildMetrics.html"
+ cp "$LIBCUDF_BUILD_DIR/ninja.log" "$WORKSPACE/build-metrics/ninja.log"
fi
gpuci_logger "Build conda pkg for libcudf_kafka"
diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py
index bac6697da82..33c369b254f 100755
--- a/cpp/scripts/sort_ninja_log.py
+++ b/cpp/scripts/sort_ninja_log.py
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
#
import argparse
import os
@@ -34,49 +34,63 @@
# build a map of the log entries
entries = {}
with open(log_file, "r") as log:
+ last = 0
+ files = {}
for line in log:
entry = line.split()
if len(entry) > 4:
- elapsed = int(entry[1]) - int(entry[0])
obj_file = entry[3]
file_size = (
os.path.getsize(os.path.join(log_path, obj_file))
if os.path.exists(obj_file)
else 0
)
- entries[entry[3]] = (elapsed, file_size)
+ start = int(entry[0])
+ end = int(entry[1])
+ # logic based on ninjatracing
+ if end < last:
+ files = {}
+ last = end
+ files.setdefault(entry[4], (entry[3], start, end, file_size))
-# check file could be loaded
+ # build entries from files dict
+ for entry in files.values():
+ entries[entry[0]] = (entry[1], entry[2], entry[3])
+
+# check file could be loaded and we have entries to report
if len(entries) == 0:
print("Could not parse", log_file)
exit()
-# sort the keys by build time (descending order)
-keys = list(entries.keys())
-sl = sorted(keys, key=lambda k: entries[k][0], reverse=True)
+# sort the entries by build-time (descending order)
+sorted_list = sorted(
+ list(entries.keys()),
+ key=lambda k: entries[k][1] - entries[k][0],
+ reverse=True,
+)
-if output_fmt == "xml":
- # output results in XML format
+# output results in XML format
+def output_xml(entries, sorted_list, args):
root = ET.Element("testsuites")
testsuite = ET.Element(
"testsuite",
attrib={
"name": "build-time",
- "tests": str(len(keys)),
+ "tests": str(len(sorted_list)),
"failures": str(0),
"errors": str(0),
},
)
root.append(testsuite)
- for key in sl:
- entry = entries[key]
- elapsed = float(entry[0]) / 1000
+ for name in sorted_list:
+ entry = entries[name]
+ build_time = float(entry[1] - entry[0]) / 1000
item = ET.Element(
"testcase",
attrib={
"classname": "BuildTime",
- "name": key,
- "time": str(elapsed),
+ "name": name,
+ "time": str(build_time),
},
)
testsuite.append(item)
@@ -85,62 +99,219 @@
xmlstr = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" ")
print(xmlstr)
-elif output_fmt == "html":
- # output results in HTML format
- print("
Sorted Ninja Build Times")
- # Note: Jenkins does not support style defined in the html
+
+# utility converts a millisecond value to a colum width in pixels
+def time_to_width(value, end):
+ # map a value from (0,end) to (0,1000)
+ r = (float(value) / float(end)) * 1000.0
+ return int(r)
+
+
+# assign each entry to a thread by analyzing the start/end times and
+# slotting them into thread buckets where they fit
+def assign_entries_to_threads(entries):
+ # first sort the entries' keys by end timestamp
+ sorted_keys = sorted(
+ list(entries.keys()), key=lambda k: entries[k][1], reverse=True
+ )
+
+ # build the chart data by assigning entries to threads
+ results = {}
+ threads = []
+ for name in sorted_keys:
+ entry = entries[name]
+
+ # assign this entry by finding the first available thread identified
+ # by the thread's current start time greater than the entry's end time
+ tid = -1
+ for t in range(len(threads)):
+ if threads[t] >= entry[1]:
+ threads[t] = entry[0]
+ tid = t
+ break
+
+ # if no current thread found, create a new one with this entry
+ if tid < 0:
+ threads.append(entry[0])
+ tid = len(threads) - 1
+
+ # add entry name to the array associated with this tid
+ if tid not in results.keys():
+ results[tid] = []
+ results[tid].append(name)
+
+ # first entry has the last end time
+ end_time = entries[sorted_keys[0]][1]
+
+ # return the threaded entries and the last end time
+ return (results, end_time)
+
+
+# output chart results in HTML format
+def output_html(entries, sorted_list, args):
+ print("Build Metrics Report")
+ # Note: Jenkins does not support javascript nor style defined in the html
# https://www.jenkins.io/doc/book/security/configuring-content-security-policy/
print("")
if args.msg is not None:
print("", args.msg, "
")
- print("")
- print(
- "File | ",
- "Compile time (ms) | ",
- "Size (bytes) |
---|
",
- sep="",
- )
- summary = {"red": 0, "yellow": 0, "green": 0}
+
+ # map entries to threads
+ # the end_time is used to scale all the entries to a fixed output width
+ threads, end_time = assign_entries_to_threads(entries)
+
+ # color ranges for build times
+ summary = {"red": 0, "yellow": 0, "green": 0, "white": 0}
red = "bgcolor='#FFBBD0'"
yellow = "bgcolor='#FFFF80'"
green = "bgcolor='#AAFFBD'"
- for key in sl:
- result = entries[key]
- elapsed = result[0]
- color = green
- if elapsed > 300000: # 5 minutes
- color = red
- summary["red"] += 1
- elif elapsed > 120000: # 2 minutes
- color = yellow
- summary["yellow"] += 1
- else:
- summary["green"] += 1
+ white = "bgcolor='#FFFFFF'"
+
+ # create the build-time chart
+ print("")
+ for tid in range(len(threads)):
+ names = threads[tid]
+ # sort the names for this thread by start time
+ names = sorted(names, key=lambda k: entries[k][0])
+
+ # use the last entry's end time as the total row size
+ # (this is an estimate and does not have to be exact)
+ last_entry = entries[names[len(names) - 1]]
+ last_time = time_to_width(last_entry[1], end_time)
print(
- "",
- key,
- " | ",
- result[0],
- " | ",
- result[1],
- " |
",
+ "
")
+
+ prev_end = 0 # used for spacing between entries
+
+ # write out each entry for this thread as a column for a single row
+ for name in names:
+ entry = entries[name]
+ start = entry[0]
+ end = entry[1]
+
+ # this handles minor gaps between end of the
+ # previous entry and the start of the next
+ if prev_end > 0 and start > prev_end:
+ size = time_to_width(start - prev_end, end_time)
+ print(" | ")
+ # adjust for the cellspacing
+ prev_end = end + int(end_time / 500)
+
+ # format the build-time
+ build_time = end - start
+ build_time_str = str(build_time) + " ms"
+ if build_time > 120000: # 2 minutes
+ minutes = int(build_time / 60000)
+ seconds = int(((build_time / 60000) - minutes) * 60)
+ build_time_str = "{:d}:{:02d} min".format(minutes, seconds)
+ elif build_time > 1000:
+ build_time_str = "{:.3f} s".format(build_time / 1000)
+
+ # assign color and accumulate legend values
+ color = white
+ if build_time > 300000: # 5 minutes
+ color = red
+ summary["red"] += 1
+ elif build_time > 120000: # 2 minutes
+ color = yellow
+ summary["yellow"] += 1
+ elif build_time > 1000: # 1 second
+ color = green
+ summary["green"] += 1
+ else:
+ summary["white"] += 1
+
+ # compute the pixel width based on build-time
+ size = max(time_to_width(build_time, end_time), 2)
+ # output the column for this entry
+ print("", end="")
+ # use a slightly smaller, fixed-width font
+ print("", end="")
+
+ # add the file-name if it fits, otherwise, truncate the name
+ file_name = os.path.basename(name)
+ if len(file_name) + 3 > size / 7:
+ abbr_size = int(size / 7) - 3
+ if abbr_size > 1:
+ print(file_name[:abbr_size], "...", sep="", end="")
+ else:
+ print(file_name, end="")
+ # done with this entry
+ print(" | ")
+ # update the entry with just the computed output info
+ entries[name] = (build_time_str, color, entry[2])
+
+ # add a filler column at the end of each row
+ print(" | |
")
+
+ # done with the chart
+ print("
")
+
+ # output detail table in build-time descending order
+ print("")
+ print(
+ "File | ",
+ "Compile time | ",
+ "Size |
---|
",
+ sep="",
+ )
+ for name in sorted_list:
+ entry = entries[name]
+ build_time_str = entry[0]
+ color = entry[1]
+ file_size = entry[2]
+
+ # format file size
+ file_size_str = ""
+ if file_size > 1000000:
+ file_size_str = "{:.3f} MB".format(file_size / 1000000)
+ elif file_size > 1000:
+ file_size_str = "{:.3f} KB".format(file_size / 1000)
+ elif file_size > 0:
+ file_size_str = str(file_size) + " bytes"
+
+ # output entry row
+ print("
", name, " | ", sep="", end="")
+ print("", build_time_str, " | ", sep="", end="")
+ print("", file_size_str, " |
", sep="")
+
+ print("
")
+
# include summary table with color legend
+ print("")
print("time > 5 minutes | ")
print("", summary["red"], " |
")
print("2 minutes < time < 5 minutes | ")
print("", summary["yellow"], " |
")
- print("time < 2 minutes | ")
+ print("
1 second < time < 2 minutes | ")
print("", summary["green"], " |
")
+ print("time < 1 second | ")
+ print("", summary["white"], " |
")
print("
")
-else:
- # output results in CSV format
+
+# output results in CSV format
+def output_csv(entries, sorted_list, args):
print("time,size,file")
- for key in sl:
- result = entries[key]
- print(result[0], result[1], key, sep=",")
+ for name in sorted_list:
+ entry = entries[name]
+ build_time = entry[1] - entry[0]
+ file_size = entry[2]
+ print(build_time, file_size, name, sep=",")
+
+
+if output_fmt == "xml":
+ output_xml(entries, sorted_list, args)
+elif output_fmt == "html":
+ output_html(entries, sorted_list, args)
+else:
+ output_csv(entries, sorted_list, args)
From b217d7ea9fc77d4ff8eee41460f4aa657046268a Mon Sep 17 00:00:00 2001
From: Alfred Xu
Date: Mon, 31 Jan 2022 23:06:55 +0800
Subject: [PATCH 02/14] JNI: Rewrite growBuffersAndRows to accelerate the
HostColumnBuilder (#10025)
According to https://github.com/NVIDIA/spark-rapids/issues/4393, current PR takes several measures to speed up the buffer growing during the build of `HostColumnVector`:
1. Introduce `rowCapacity` to cache the maximum number of rows/bytes
2. Introduce pura Java method `byteSizeOfNullMask` to get the size of the validity buffer
3. Reorganize the code structure to reduce the number of method calls
I have tested this PR with the spark-rapids tests locally.
BTW, shall we clean up the `HostColumnVector.Builder` and replace all the usages of `Builder` with `ColumnBuilder`?
Authors:
- Alfred Xu (https://github.com/sperlingxx)
Approvers:
- Robert (Bobby) Evans (https://github.com/revans2)
URL: https://github.com/rapidsai/cudf/pull/10025
---
.../main/java/ai/rapids/cudf/ColumnView.java | 24 +-
java/src/main/java/ai/rapids/cudf/DType.java | 4 +-
.../java/ai/rapids/cudf/HostColumnVector.java | 322 +++++++++++-------
java/src/main/native/src/ColumnViewJni.cpp | 10 -
.../ai/rapids/cudf/ByteColumnVectorTest.java | 106 ++++--
.../ai/rapids/cudf/ColumnBuilderHelper.java | 158 +++++++++
.../java/ai/rapids/cudf/ColumnVectorTest.java | 1 -
.../rapids/cudf/DecimalColumnVectorTest.java | 64 +++-
.../rapids/cudf/DoubleColumnVectorTest.java | 54 ++-
.../ai/rapids/cudf/IntColumnVectorTest.java | 82 +++--
.../ai/rapids/cudf/LongColumnVectorTest.java | 82 +++--
11 files changed, 650 insertions(+), 257 deletions(-)
create mode 100644 java/src/test/java/ai/rapids/cudf/ColumnBuilderHelper.java
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 422311fc8e0..cc1fa46becb 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -3248,7 +3248,7 @@ public final ColumnVector listIndexOf(Scalar key, FindOptions findOption) {
* The index is set to null if one of the following is true:
* 1. The search key row is null.
* 2. The list row is null.
- * @param key ColumnView of search keys.
+ * @param keys ColumnView of search keys.
* @param findOption Whether to find the first index of the key, or the last.
* @return The resultant column of int32 indices
*/
@@ -3284,6 +3284,17 @@ public final Scalar getScalarElement(int index) {
return new Scalar(getType(), getElement(getNativeView(), index));
}
+ /**
+ * Get the number of bytes needed to allocate a validity buffer for the given number of rows.
+ * According to cudf::bitmask_allocation_size_bytes, the padding boundary for null mask is 64 bytes.
+ */
+ static long getValidityBufferSize(int numRows) {
+ // number of bytes required = Math.ceil(number of bits / 8)
+ long actualBytes = ((long) numRows + 7) >> 3;
+ // padding to the multiplies of the padding boundary(64 bytes)
+ return ((actualBytes + 63) >> 6) << 6;
+ }
+
/////////////////////////////////////////////////////////////////////////////
// INTERNAL/NATIVE ACCESS
/////////////////////////////////////////////////////////////////////////////
@@ -3701,7 +3712,7 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat
* Native method to find the first (or last) index of each search key in the specified column,
* in each row of a list column.
* @param nativeView the column view handle of the list
- * @param scalarColumnHandle handle to the search key column
+ * @param keyColumnHandle handle to the search key column
* @param isFindFirst Whether to find the first index of the key, or the last.
* @return column handle of the resultant column of int32 indices
*/
@@ -3881,11 +3892,6 @@ private static native long bitwiseMergeAndSetValidity(long baseHandle, long[] vi
private static native long copyWithBooleanColumnAsValidity(long exemplarViewHandle,
long boolColumnViewHandle) throws CudfException;
- /**
- * Get the number of bytes needed to allocate a validity buffer for the given number of rows.
- */
- static native long getNativeValidPointerSize(int size);
-
////////
// Native cudf::column_view life cycle and metadata access methods. Life cycle methods
// should typically only be called from the OffHeap inner class.
@@ -3975,7 +3981,7 @@ static ColumnVector createColumnVector(DType type, int rows, HostMemoryBuffer da
DeviceMemoryBuffer mainValidDevBuff = null;
DeviceMemoryBuffer mainOffsetsDevBuff = null;
if (mainColValid != null) {
- long validLen = getNativeValidPointerSize(mainColRows);
+ long validLen = getValidityBufferSize(mainColRows);
mainValidDevBuff = DeviceMemoryBuffer.allocate(validLen);
mainValidDevBuff.copyFromHostBuffer(mainColValid, 0, validLen);
}
@@ -4084,7 +4090,7 @@ private static NestedColumnVector createNestedColumnVector(DType type, long rows
data.copyFromHostBuffer(dataBuffer, 0, dataLen);
}
if (validityBuffer != null) {
- long validLen = getNativeValidPointerSize((int)rows);
+ long validLen = getValidityBufferSize((int)rows);
valid = DeviceMemoryBuffer.allocate(validLen);
valid.copyFromHostBuffer(validityBuffer, 0, validLen);
}
diff --git a/java/src/main/java/ai/rapids/cudf/DType.java b/java/src/main/java/ai/rapids/cudf/DType.java
index 742501be375..2e5b0202dc5 100644
--- a/java/src/main/java/ai/rapids/cudf/DType.java
+++ b/java/src/main/java/ai/rapids/cudf/DType.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -307,7 +307,7 @@ public static DType fromJavaBigDecimal(BigDecimal dec) {
return new DType(DTypeEnum.DECIMAL128, -dec.scale());
}
throw new IllegalArgumentException("Precision " + dec.precision() +
- " exceeds max precision cuDF can support " + DECIMAL64_MAX_PRECISION);
+ " exceeds max precision cuDF can support " + DECIMAL128_MAX_PRECISION);
}
/**
diff --git a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java
index 0fe7d7a5df8..3abc6db385d 100644
--- a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java
+++ b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java
@@ -199,7 +199,7 @@ public ColumnVector copyToDevice() {
}
HostMemoryBuffer hvalid = this.offHeap.valid;
if (hvalid != null) {
- long validLen = ColumnView.getNativeValidPointerSize((int) rows);
+ long validLen = ColumnView.getValidityBufferSize((int) rows);
valid = DeviceMemoryBuffer.allocate(validLen);
valid.copyFromHostBuffer(hvalid, 0, validLen);
}
@@ -858,7 +858,7 @@ public static HostColumnVector timestampNanoSecondsFromBoxedLongs(Long... values
* Build
*/
- public static final class ColumnBuilder implements AutoCloseable {
+ public static final class ColumnBuilder implements AutoCloseable {
private DType type;
private HostMemoryBuffer data;
@@ -869,28 +869,78 @@ public static final class ColumnBuilder implements AutoCloseable {
private boolean nullable;
private long rows;
private long estimatedRows;
+ private long rowCapacity = 0L;
+ private long validCapacity = 0L;
private boolean built = false;
private List childBuilders = new ArrayList<>();
+ private Runnable nullHandler;
- private int currentIndex = 0;
- private int currentByteIndex = 0;
-
+ // The value of currentIndex can't exceed Int32.Max. Storing currentIndex as a long is to
+ // adapt HostMemoryBuffer.setXXX, which requires a long offset.
+ private long currentIndex = 0;
+ // Only for Strings: pointer of the byte (data) buffer
+ private int currentStringByteIndex = 0;
+ // Use bit shift instead of multiply to transform row offset to byte offset
+ private int bitShiftBySize = 0;
+ private static final int bitShiftByOffset = (int)(Math.log(OFFSET_SIZE) / Math.log(2));
public ColumnBuilder(HostColumnVector.DataType type, long estimatedRows) {
this.type = type.getType();
this.nullable = type.isNullable();
this.rows = 0;
- this.estimatedRows = estimatedRows;
+ this.estimatedRows = Math.max(estimatedRows, 1L);
+ this.bitShiftBySize = (int)(Math.log(this.type.getSizeInBytes()) / Math.log(2));
+
+ // initialize the null handler according to the data type
+ this.setupNullHandler();
+
for (int i = 0; i < type.getNumChildren(); i++) {
childBuilders.add(new ColumnBuilder(type.getChild(i), estimatedRows));
}
}
+ private void setupNullHandler() {
+ if (this.type == DType.LIST) {
+ this.nullHandler = () -> {
+ this.growListBuffersAndRows();
+ this.growValidBuffer();
+ setNullAt(currentIndex++);
+ offsets.setInt(currentIndex << bitShiftByOffset, childBuilders.get(0).getCurrentIndex());
+ };
+ } else if (this.type == DType.STRING) {
+ this.nullHandler = () -> {
+ this.growStringBuffersAndRows(0);
+ this.growValidBuffer();
+ setNullAt(currentIndex++);
+ offsets.setInt(currentIndex << bitShiftByOffset, currentStringByteIndex);
+ };
+ } else if (this.type == DType.STRUCT) {
+ this.nullHandler = () -> {
+ this.growStructBuffersAndRows();
+ this.growValidBuffer();
+ setNullAt(currentIndex++);
+ for (ColumnBuilder childBuilder : childBuilders) {
+ childBuilder.appendNull();
+ }
+ };
+ } else {
+ this.nullHandler = () -> {
+ this.growFixedWidthBuffersAndRows();
+ this.growValidBuffer();
+ setNullAt(currentIndex++);
+ };
+ }
+ }
+
public HostColumnVector build() {
List hostColumnVectorCoreList = new ArrayList<>();
for (ColumnBuilder childBuilder : childBuilders) {
hostColumnVectorCoreList.add(childBuilder.buildNestedInternal());
}
+ // Aligns the valid buffer size with other buffers in terms of row size, because it grows lazily.
+ if (valid != null) {
+ growValidBuffer();
+ }
HostColumnVector hostColumnVector = new HostColumnVector(type, rows, Optional.of(nullCount), data, valid, offsets,
hostColumnVectorCoreList);
built = true;
@@ -902,6 +952,10 @@ private HostColumnVectorCore buildNestedInternal() {
for (ColumnBuilder childBuilder : childBuilders) {
hostColumnVectorCoreList.add(childBuilder.buildNestedInternal());
}
+ // Aligns the valid buffer size with other buffers in terms of row size, because it grows lazily.
+ if (valid != null) {
+ growValidBuffer();
+ }
return new HostColumnVectorCore(type, rows, Optional.of(nullCount), data, valid, offsets, hostColumnVectorCoreList);
}
@@ -929,71 +983,113 @@ public ColumnBuilder appendStructValues(StructData... inputList) {
}
/**
- * A method that is responsible for growing the buffers as needed
- * and incrementing the row counts when we append values or nulls.
- * @param hasNull indicates whether the validity buffer needs to be considered, as the
- * nullcount may not have been fully calculated yet
- * @param length used for strings
+ * Grows valid buffer lazily. The valid buffer won't be materialized until the first null
+ * value appended. This method reuses the rowCapacity to track the sizes of column.
+ * Therefore, please call specific growBuffer method to update rowCapacity before calling
+ * this method.
+ */
+ private void growValidBuffer() {
+ if (valid == null) {
+ long maskBytes = ColumnView.getValidityBufferSize((int) rowCapacity);
+ valid = HostMemoryBuffer.allocate(maskBytes);
+ valid.setMemory(0, valid.length, (byte) 0xFF);
+ validCapacity = rowCapacity;
+ return;
+ }
+ if (validCapacity < rowCapacity) {
+ long maskBytes = ColumnView.getValidityBufferSize((int) rowCapacity);
+ HostMemoryBuffer newValid = HostMemoryBuffer.allocate(maskBytes);
+ newValid.setMemory(0, newValid.length, (byte) 0xFF);
+ valid = copyBuffer(newValid, valid);
+ validCapacity = rowCapacity;
+ }
+ }
+
+ /**
+ * A method automatically grows data buffer for fixed-width columns as needed along with
+ * incrementing the row counts. Please call this method before appending any value or null.
*/
- private void growBuffersAndRows(boolean hasNull, int length) {
+ private void growFixedWidthBuffersAndRows() {
assert rows + 1 <= Integer.MAX_VALUE : "Row count cannot go over Integer.MAX_VALUE";
rows++;
- long targetDataSize = 0;
- if (!type.isNestedType()) {
- if (type.equals(DType.STRING)) {
- targetDataSize = data == null ? length : currentByteIndex + length;
- } else {
- targetDataSize = data == null ? estimatedRows * type.getSizeInBytes() : rows * type.getSizeInBytes();
- }
+ if (data == null) {
+ data = HostMemoryBuffer.allocate(estimatedRows << bitShiftBySize);
+ rowCapacity = estimatedRows;
+ } else if (rows > rowCapacity) {
+ long newCap = Math.min(rowCapacity * 2, Integer.MAX_VALUE - 1);
+ data = copyBuffer(HostMemoryBuffer.allocate(newCap << bitShiftBySize), data);
+ rowCapacity = newCap;
}
+ }
- if (targetDataSize > 0) {
- if (data == null) {
- data = HostMemoryBuffer.allocate(targetDataSize);
- } else {
- long maxLen;
- if (type.equals(DType.STRING)) {
- maxLen = Integer.MAX_VALUE;
- } else {
- maxLen = Integer.MAX_VALUE * (long) type.getSizeInBytes();
- }
- long oldLen = data.getLength();
- long newDataLen = Math.max(1, oldLen);
- while (targetDataSize > newDataLen) {
- newDataLen = newDataLen * 2;
- }
- if (newDataLen != oldLen) {
- newDataLen = Math.min(newDataLen, maxLen);
- if (newDataLen < targetDataSize) {
- throw new IllegalStateException("A data buffer for strings is not supported over 2GB in size");
- }
- HostMemoryBuffer newData = HostMemoryBuffer.allocate(newDataLen);
- data = copyBuffer(newData, data);
- }
- }
+ /**
+ * A method automatically grows offsets buffer for list columns as needed along with
+ * incrementing the row counts. Please call this method before appending any value or null.
+ */
+ private void growListBuffersAndRows() {
+ assert rows + 2 <= Integer.MAX_VALUE : "Row count cannot go over Integer.MAX_VALUE";
+ rows++;
+
+ if (offsets == null) {
+ offsets = HostMemoryBuffer.allocate((estimatedRows + 1) << bitShiftByOffset);
+ offsets.setInt(0, 0);
+ rowCapacity = estimatedRows;
+ } else if (rows > rowCapacity) {
+ long newCap = Math.min(rowCapacity * 2, Integer.MAX_VALUE - 2);
+ offsets = copyBuffer(HostMemoryBuffer.allocate((newCap + 1) << bitShiftByOffset), offsets);
+ rowCapacity = newCap;
}
- if (type.equals(DType.LIST) || type.equals(DType.STRING)) {
- if (offsets == null) {
- offsets = HostMemoryBuffer.allocate((estimatedRows + 1) * OFFSET_SIZE);
- offsets.setInt(0, 0);
- } else if ((rows +1) * OFFSET_SIZE > offsets.length) {
- long newOffsetLen = offsets.length * 2;
- HostMemoryBuffer newOffsets = HostMemoryBuffer.allocate(newOffsetLen);
- offsets = copyBuffer(newOffsets, offsets);
- }
+ }
+
+ /**
+ * A method automatically grows offsets and data buffer for string columns as needed along with
+ * incrementing the row counts. Please call this method before appending any value or null.
+ *
+ * @param stringLength number of bytes required by the next row
+ */
+ private void growStringBuffersAndRows(int stringLength) {
+ assert rows + 2 <= Integer.MAX_VALUE : "Row count cannot go over Integer.MAX_VALUE";
+ rows++;
+
+ if (offsets == null) {
+ // Initialize data buffer with at least 1 byte in case the first appended value is null.
+ data = HostMemoryBuffer.allocate(Math.max(1, stringLength));
+ offsets = HostMemoryBuffer.allocate((estimatedRows + 1) << bitShiftByOffset);
+ offsets.setInt(0, 0);
+ rowCapacity = estimatedRows;
+ return;
}
- if (hasNull || nullCount > 0) {
- if (valid == null) {
- long targetValidSize = ColumnView.getNativeValidPointerSize((int)estimatedRows);
- valid = HostMemoryBuffer.allocate(targetValidSize);
- valid.setMemory(0, targetValidSize, (byte) 0xFF);
- } else if (valid.length < ColumnView.getNativeValidPointerSize((int)rows)) {
- long newValidLen = valid.length * 2;
- HostMemoryBuffer newValid = HostMemoryBuffer.allocate(newValidLen);
- newValid.setMemory(0, newValidLen, (byte) 0xFF);
- valid = copyBuffer(newValid, valid);
- }
+
+ if (rows > rowCapacity) {
+ long newCap = Math.min(rowCapacity * 2, Integer.MAX_VALUE - 2);
+ offsets = copyBuffer(HostMemoryBuffer.allocate((newCap + 1) << bitShiftByOffset), offsets);
+ rowCapacity = newCap;
+ }
+
+ long currentLength = currentStringByteIndex + stringLength;
+ if (currentLength > data.length) {
+ long requiredLength = data.length;
+ do {
+ requiredLength = requiredLength * 2;
+ } while (currentLength > requiredLength);
+ data = copyBuffer(HostMemoryBuffer.allocate(requiredLength), data);
+ }
+ }
+
+ /**
+ * For struct columns, we only need to update rows and rowCapacity (for the growth of
+ * valid buffer), because struct columns hold no buffer itself.
+ * Please call this method before appending any value or null.
+ */
+ private void growStructBuffersAndRows() {
+ assert rows + 1 <= Integer.MAX_VALUE : "Row count cannot go over Integer.MAX_VALUE";
+ rows++;
+
+ if (rowCapacity == 0) {
+ rowCapacity = estimatedRows;
+ } else if (rows > rowCapacity) {
+ rowCapacity = Math.min(rowCapacity * 2, Integer.MAX_VALUE - 1);
}
}
@@ -1015,29 +1111,13 @@ private HostMemoryBuffer copyBuffer(HostMemoryBuffer targetBuffer, HostMemoryBuf
* Method that sets the null bit in the validity vector
* @param index the row index at which the null is marked
*/
- private void setNullAt(int index) {
+ private void setNullAt(long index) {
assert index < rows : "Index for null value should fit the column with " + rows + " rows";
nullCount += BitVectorHelper.setNullAt(valid, index);
}
public final ColumnBuilder appendNull() {
- growBuffersAndRows(true, 0);
- setNullAt(currentIndex);
- currentIndex++;
- currentByteIndex += type.getSizeInBytes();
- if (type.hasOffsets()) {
- if (type.equals(DType.LIST)) {
- offsets.setInt(currentIndex * OFFSET_SIZE, childBuilders.get(0).getCurrentIndex());
- } else {
- // It is a String
- offsets.setInt(currentIndex * OFFSET_SIZE, currentByteIndex);
- }
- } else if (type.equals(DType.STRUCT)) {
- // structs propagate nulls to children and even further down if needed
- for (ColumnBuilder childBuilder : childBuilders) {
- childBuilder.appendNull();
- }
- }
+ nullHandler.run();
return this;
}
@@ -1081,7 +1161,7 @@ public ColumnBuilder endStruct() {
assert type.equals(DType.STRUCT) : "This only works for structs";
assert allChildrenHaveSameIndex() : "Appending structs data appears to be off " +
childBuilders + " should all have the same currentIndex " + type;
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
+ growStructBuffersAndRows();
currentIndex++;
return this;
}
@@ -1095,9 +1175,8 @@ assert allChildrenHaveSameIndex() : "Appending structs data appears to be off "
*/
public ColumnBuilder endList() {
assert type.equals(DType.LIST);
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
- currentIndex++;
- offsets.setInt(currentIndex * OFFSET_SIZE, childBuilders.get(0).getCurrentIndex());
+ growListBuffersAndRows();
+ offsets.setInt(++currentIndex << bitShiftByOffset, childBuilders.get(0).getCurrentIndex());
return this;
}
@@ -1155,80 +1234,67 @@ public void incrCurrentIndex() {
}
public int getCurrentIndex() {
- return currentIndex;
+ return (int) currentIndex;
}
+ @Deprecated
public int getCurrentByteIndex() {
- return currentByteIndex;
+ return currentStringByteIndex;
}
public final ColumnBuilder append(byte value) {
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
+ growFixedWidthBuffersAndRows();
assert type.isBackedByByte();
assert currentIndex < rows;
- data.setByte(currentIndex * type.getSizeInBytes(), value);
- currentIndex++;
- currentByteIndex += type.getSizeInBytes();
+ data.setByte(currentIndex++ << bitShiftBySize, value);
return this;
}
public final ColumnBuilder append(short value) {
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
+ growFixedWidthBuffersAndRows();
assert type.isBackedByShort();
assert currentIndex < rows;
- data.setShort(currentIndex * type.getSizeInBytes(), value);
- currentIndex++;
- currentByteIndex += type.getSizeInBytes();
+ data.setShort(currentIndex++ << bitShiftBySize, value);
return this;
}
public final ColumnBuilder append(int value) {
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
+ growFixedWidthBuffersAndRows();
assert type.isBackedByInt();
assert currentIndex < rows;
- data.setInt(currentIndex * type.getSizeInBytes(), value);
- currentIndex++;
- currentByteIndex += type.getSizeInBytes();
+ data.setInt(currentIndex++ << bitShiftBySize, value);
return this;
}
public final ColumnBuilder append(long value) {
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
+ growFixedWidthBuffersAndRows();
assert type.isBackedByLong();
assert currentIndex < rows;
- data.setLong(currentIndex * type.getSizeInBytes(), value);
- currentIndex++;
- currentByteIndex += type.getSizeInBytes();
+ data.setLong(currentIndex++ << bitShiftBySize, value);
return this;
}
public final ColumnBuilder append(float value) {
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
+ growFixedWidthBuffersAndRows();
assert type.equals(DType.FLOAT32);
assert currentIndex < rows;
- data.setFloat(currentIndex * type.getSizeInBytes(), value);
- currentIndex++;
- currentByteIndex += type.getSizeInBytes();
+ data.setFloat(currentIndex++ << bitShiftBySize, value);
return this;
}
public final ColumnBuilder append(double value) {
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
+ growFixedWidthBuffersAndRows();
assert type.equals(DType.FLOAT64);
assert currentIndex < rows;
- data.setDouble(currentIndex * type.getSizeInBytes(), value);
- currentIndex++;
- currentByteIndex += type.getSizeInBytes();
+ data.setDouble(currentIndex++ << bitShiftBySize, value);
return this;
}
public final ColumnBuilder append(boolean value) {
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
+ growFixedWidthBuffersAndRows();
assert type.equals(DType.BOOL8);
assert currentIndex < rows;
- data.setBoolean(currentIndex * type.getSizeInBytes(), value);
- currentIndex++;
- currentByteIndex += type.getSizeInBytes();
+ data.setBoolean(currentIndex++ << bitShiftBySize, value);
return this;
}
@@ -1237,22 +1303,19 @@ public ColumnBuilder append(BigDecimal value) {
}
public ColumnBuilder append(BigInteger unscaledVal) {
- growBuffersAndRows(false, currentIndex * type.getSizeInBytes() + type.getSizeInBytes());
+ growFixedWidthBuffersAndRows();
assert currentIndex < rows;
if (type.typeId == DType.DTypeEnum.DECIMAL32) {
- data.setInt(currentIndex * type.getSizeInBytes(), unscaledVal.intValueExact());
+ data.setInt(currentIndex++ << bitShiftBySize, unscaledVal.intValueExact());
} else if (type.typeId == DType.DTypeEnum.DECIMAL64) {
- data.setLong(currentIndex * type.getSizeInBytes(), unscaledVal.longValueExact());
+ data.setLong(currentIndex++ << bitShiftBySize, unscaledVal.longValueExact());
} else if (type.typeId == DType.DTypeEnum.DECIMAL128) {
- assert currentIndex < rows;
byte[] unscaledValueBytes = unscaledVal.toByteArray();
byte[] result = convertDecimal128FromJavaToCudf(unscaledValueBytes);
- data.setBytes(currentIndex*DType.DTypeEnum.DECIMAL128.sizeInBytes, result, 0, result.length);
- } else {
+ data.setBytes(currentIndex++ << bitShiftBySize, result, 0, result.length);
+ } else {
throw new IllegalStateException(type + " is not a supported decimal type.");
}
- currentIndex++;
- currentByteIndex += type.getSizeInBytes();
return this;
}
@@ -1271,14 +1334,13 @@ public ColumnBuilder appendUTF8String(byte[] value, int srcOffset, int length) {
assert length >= 0;
assert value.length + srcOffset <= length;
assert type.equals(DType.STRING) : " type " + type + " is not String";
- currentIndex++;
- growBuffersAndRows(false, length);
- assert currentIndex < rows + 1;
+ growStringBuffersAndRows(length);
+ assert currentIndex < rows;
if (length > 0) {
- data.setBytes(currentByteIndex, value, srcOffset, length);
+ data.setBytes(currentStringByteIndex, value, srcOffset, length);
}
- currentByteIndex += length;
- offsets.setInt(currentIndex * OFFSET_SIZE, currentByteIndex);
+ currentStringByteIndex += length;
+ offsets.setInt(++currentIndex << bitShiftByOffset, currentStringByteIndex);
return this;
}
@@ -1822,7 +1884,7 @@ public final Builder append(HostColumnVector columnVector) {
}
private void allocateBitmaskAndSetDefaultValues() {
- long bitmaskSize = ColumnView.getNativeValidPointerSize((int) rows);
+ long bitmaskSize = ColumnView.getValidityBufferSize((int) rows);
valid = HostMemoryBuffer.allocate(bitmaskSize);
valid.setMemory(0, bitmaskSize, (byte) 0xFF);
}
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 0fce27bc130..63247eb0066 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -1790,16 +1790,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getNativeValidityLength(J
CATCH_STD(env, 0);
}
-JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getNativeValidPointerSize(JNIEnv *env,
- jobject j_object,
- jint size) {
- try {
- cudf::jni::auto_set_device(env);
- return static_cast(cudf::bitmask_allocation_size_bytes(size));
- }
- CATCH_STD(env, 0);
-}
-
JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getDeviceMemorySize(JNIEnv *env, jclass,
jlong handle) {
JNI_NULL_CHECK(env, handle, "native handle is null", 0);
diff --git a/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java
index a26dbec4907..7b476c31b95 100644
--- a/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
import org.junit.jupiter.api.Test;
import java.util.Random;
+import java.util.function.Consumer;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -39,21 +40,34 @@ public void testCreateColumnVectorBuilder() {
@Test
public void testArrayAllocation() {
- try (HostColumnVector byteColumnVector = HostColumnVector.fromBytes(new byte[]{2, 3, 5})) {
- assertFalse(byteColumnVector.hasNulls());
- assertEquals(byteColumnVector.getByte(0), 2);
- assertEquals(byteColumnVector.getByte(1), 3);
- assertEquals(byteColumnVector.getByte(2), 5);
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertEquals(cv.getByte(0), 2);
+ assertEquals(cv.getByte(1), 3);
+ assertEquals(cv.getByte(2), 5);
+ };
+ try (HostColumnVector bcv = HostColumnVector.fromBytes(new byte[]{2, 3, 5})) {
+ verify.accept(bcv);
+ }
+ try (HostColumnVector bcv = ColumnBuilderHelper.fromBytes(true, new byte[]{2, 3, 5})) {
+ verify.accept(bcv);
}
}
@Test
public void testUnsignedArrayAllocation() {
- try (HostColumnVector v = HostColumnVector.fromUnsignedBytes(new byte[]{(byte)0xff, (byte)128, 5})) {
- assertFalse(v.hasNulls());
- assertEquals(0xff, Byte.toUnsignedInt(v.getByte(0)), 0xff);
- assertEquals(128, Byte.toUnsignedInt(v.getByte(1)), 128);
- assertEquals(5, Byte.toUnsignedInt(v.getByte(2)), 5);
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertEquals(0xff, Byte.toUnsignedInt(cv.getByte(0)), 0xff);
+ assertEquals(128, Byte.toUnsignedInt(cv.getByte(1)), 128);
+ assertEquals(5, Byte.toUnsignedInt(cv.getByte(2)), 5);
+ };
+ try (HostColumnVector bcv = HostColumnVector.fromUnsignedBytes(new byte[]{(byte)0xff, (byte)128, 5})) {
+ verify.accept(bcv);
+ }
+ try (HostColumnVector bcv = ColumnBuilderHelper.fromBytes(false,
+ new byte[]{(byte)0xff, (byte)128, 5})) {
+ verify.accept(bcv);
}
}
@@ -70,47 +84,73 @@ public void testAppendRepeatingValues() {
@Test
public void testUpperIndexOutOfBoundsException() {
- try (HostColumnVector byteColumnVector = HostColumnVector.fromBytes(new byte[]{2, 3, 5})) {
- assertThrows(AssertionError.class, () -> byteColumnVector.getByte(3));
- assertFalse(byteColumnVector.hasNulls());
+ Consumer verify = (cv) -> {
+ assertThrows(AssertionError.class, () -> cv.getByte(3));
+ assertFalse(cv.hasNulls());
+ };
+ try (HostColumnVector bcv = HostColumnVector.fromBytes(new byte[]{2, 3, 5})) {
+ verify.accept(bcv);
+ }
+ try (HostColumnVector bcv = ColumnBuilderHelper.fromBytes(true, new byte[]{2, 3, 5})) {
+ verify.accept(bcv);
}
}
@Test
public void testLowerIndexOutOfBoundsException() {
- try (HostColumnVector byteColumnVector = HostColumnVector.fromBytes(new byte[]{2, 3, 5})) {
- assertFalse(byteColumnVector.hasNulls());
- assertThrows(AssertionError.class, () -> byteColumnVector.getByte(-1));
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertThrows(AssertionError.class, () -> cv.getByte(-1));
+ };
+ try (HostColumnVector bcv = HostColumnVector.fromBytes(new byte[]{2, 3, 5})) {
+ verify.accept(bcv);
+ }
+ try (HostColumnVector bcv = ColumnBuilderHelper.fromBytes(true, new byte[]{2, 3, 5})) {
+ verify.accept(bcv);
}
}
@Test
public void testAddingNullValues() {
- try (HostColumnVector byteColumnVector = HostColumnVector.fromBoxedBytes(
- new Byte[]{2, 3, 4, 5, 6, 7, null, null})) {
- assertTrue(byteColumnVector.hasNulls());
- assertEquals(2, byteColumnVector.getNullCount());
+ Consumer verify = (cv) -> {
+ assertTrue(cv.hasNulls());
+ assertEquals(2, cv.getNullCount());
for (int i = 0; i < 6; i++) {
- assertFalse(byteColumnVector.isNull(i));
+ assertFalse(cv.isNull(i));
}
- assertTrue(byteColumnVector.isNull(6));
- assertTrue(byteColumnVector.isNull(7));
+ assertTrue(cv.isNull(6));
+ assertTrue(cv.isNull(7));
+ };
+ try (HostColumnVector bcv = HostColumnVector.fromBoxedBytes(
+ new Byte[]{2, 3, 4, 5, 6, 7, null, null})) {
+ verify.accept(bcv);
+ }
+ try (HostColumnVector bcv = ColumnBuilderHelper.fromBoxedBytes(true,
+ new Byte[]{2, 3, 4, 5, 6, 7, null, null})) {
+ verify.accept(bcv);
}
}
@Test
public void testAddingUnsignedNullValues() {
- try (HostColumnVector byteColumnVector = HostColumnVector.fromBoxedUnsignedBytes(
- new Byte[]{2, 3, 4, 5, (byte)128, (byte)254, null, null})) {
- assertTrue(byteColumnVector.hasNulls());
- assertEquals(2, byteColumnVector.getNullCount());
+ Consumer verify = (cv) -> {
+ assertTrue(cv.hasNulls());
+ assertEquals(2, cv.getNullCount());
for (int i = 0; i < 6; i++) {
- assertFalse(byteColumnVector.isNull(i));
+ assertFalse(cv.isNull(i));
}
- assertEquals(128, Byte.toUnsignedInt(byteColumnVector.getByte(4)));
- assertEquals(254, Byte.toUnsignedInt(byteColumnVector.getByte(5)));
- assertTrue(byteColumnVector.isNull(6));
- assertTrue(byteColumnVector.isNull(7));
+ assertEquals(128, Byte.toUnsignedInt(cv.getByte(4)));
+ assertEquals(254, Byte.toUnsignedInt(cv.getByte(5)));
+ assertTrue(cv.isNull(6));
+ assertTrue(cv.isNull(7));
+ };
+ try (HostColumnVector bcv = HostColumnVector.fromBoxedUnsignedBytes(
+ new Byte[]{2, 3, 4, 5, (byte)128, (byte)254, null, null})) {
+ verify.accept(bcv);
+ }
+ try (HostColumnVector bcv = ColumnBuilderHelper.fromBoxedBytes(false,
+ new Byte[]{2, 3, 4, 5, (byte)128, (byte)254, null, null})) {
+ verify.accept(bcv);
}
}
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnBuilderHelper.java b/java/src/test/java/ai/rapids/cudf/ColumnBuilderHelper.java
new file mode 100644
index 00000000000..263244b2413
--- /dev/null
+++ b/java/src/test/java/ai/rapids/cudf/ColumnBuilderHelper.java
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package ai.rapids.cudf;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Objects;
+import java.util.function.Consumer;
+
+/**
+ * ColumnBuilderHelper helps to test ColumnBuilder with existed ColumnVector tests.
+ */
+public class ColumnBuilderHelper {
+
+ public static HostColumnVector build(
+ HostColumnVector.DataType type,
+ int rows,
+ Consumer init) {
+ try (HostColumnVector.ColumnBuilder b = new HostColumnVector.ColumnBuilder(type, rows)) {
+ init.accept(b);
+ return b.build();
+ }
+ }
+
+ public static ColumnVector buildOnDevice(
+ HostColumnVector.DataType type,
+ int rows,
+ Consumer init) {
+ try (HostColumnVector.ColumnBuilder b = new HostColumnVector.ColumnBuilder(type, rows)) {
+ init.accept(b);
+ return b.buildAndPutOnDevice();
+ }
+ }
+
+ public static HostColumnVector fromBoxedBytes(boolean signed, Byte... values) {
+ DType dt = signed ? DType.INT8 : DType.UINT8;
+ return ColumnBuilderHelper.build(
+ new HostColumnVector.BasicType(true, dt),
+ values.length,
+ (b) -> {
+ for (Byte v : values)
+ if (v == null) b.appendNull();
+ else b.append(v);
+ });
+ }
+
+ public static HostColumnVector fromBoxedDoubles(Double... values) {
+ return ColumnBuilderHelper.build(
+ new HostColumnVector.BasicType(true, DType.FLOAT64),
+ values.length,
+ (b) -> {
+ for (Double v : values)
+ if (v == null) b.appendNull();
+ else b.append(v);
+ });
+ }
+
+ public static HostColumnVector fromBoxedInts(boolean signed, Integer... values) {
+ DType dt = signed ? DType.INT32 : DType.UINT32;
+ return ColumnBuilderHelper.build(
+ new HostColumnVector.BasicType(true, dt),
+ values.length,
+ (b) -> {
+ for (Integer v : values)
+ if (v == null) b.appendNull();
+ else b.append(v);
+ });
+ }
+
+ public static HostColumnVector fromBoxedLongs(boolean signed, Long... values) {
+ DType dt = signed ? DType.INT64 : DType.UINT64;
+ return ColumnBuilderHelper.build(
+ new HostColumnVector.BasicType(true, dt),
+ values.length,
+ (b) -> {
+ for (Long v : values)
+ if (v == null) b.appendNull();
+ else b.append(v);
+ });
+ }
+
+ public static HostColumnVector fromBytes(boolean signed, byte... values) {
+ DType dt = signed ? DType.INT8 : DType.UINT8;
+ return ColumnBuilderHelper.build(
+ new HostColumnVector.BasicType(false, dt),
+ values.length,
+ (b) -> {
+ for (byte v : values) b.append(v);
+ });
+ }
+
+ public static HostColumnVector fromDecimals(BigDecimal... values) {
+ // Simply copy from HostColumnVector.fromDecimals
+ BigDecimal maxDec = Arrays.stream(values).filter(Objects::nonNull)
+ .max(Comparator.comparingInt(BigDecimal::precision))
+ .orElse(BigDecimal.ZERO);
+ int maxScale = Arrays.stream(values).filter(Objects::nonNull)
+ .map(decimal -> decimal.scale())
+ .max(Comparator.naturalOrder())
+ .orElse(0);
+ maxDec = maxDec.setScale(maxScale, RoundingMode.UNNECESSARY);
+
+ return ColumnBuilderHelper.build(
+ new HostColumnVector.BasicType(true, DType.fromJavaBigDecimal(maxDec)),
+ values.length,
+ (b) -> {
+ for (BigDecimal v : values)
+ if (v == null) b.appendNull();
+ else b.append(v);
+ });
+ }
+
+ public static HostColumnVector fromDoubles(double... values) {
+ return ColumnBuilderHelper.build(
+ new HostColumnVector.BasicType(false, DType.FLOAT64),
+ values.length,
+ (b) -> {
+ for (double v : values) b.append(v);
+ });
+ }
+
+ public static HostColumnVector fromInts(boolean signed, int... values) {
+ DType dt = signed ? DType.INT32 : DType.UINT32;
+ return ColumnBuilderHelper.build(
+ new HostColumnVector.BasicType(false, dt),
+ values.length,
+ (b) -> {
+ for (int v : values) b.append(v);
+ });
+ }
+
+ public static HostColumnVector fromLongs(boolean signed, long... values) {
+ DType dt = signed ? DType.INT64 : DType.UINT64;
+ return ColumnBuilderHelper.build(
+ new HostColumnVector.BasicType(false, dt),
+ values.length,
+ (b) -> {
+ for (long v : values) b.append(v);
+ });
+ }
+}
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 2dbec454eb2..8f39c3c51ce 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -3559,7 +3559,6 @@ void testCastDecimal64ToString() {
for (int scale : new int[]{-5, -2, -1, 0, 1, 2, 5}) {
for (int i = 0; i < strDecimalValues.length; i++) {
strDecimalValues[i] = dumpDecimal(unScaledValues[i], scale);
- System.out.println(strDecimalValues[i]);
}
testCastFixedWidthToStringsAndBack(DType.create(DType.DTypeEnum.DECIMAL64, scale),
diff --git a/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java
index c2772520f57..994066c5df0 100644
--- a/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/DecimalColumnVectorTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -22,10 +22,12 @@
import org.junit.jupiter.api.Test;
import java.math.BigDecimal;
+import java.math.BigInteger;
import java.math.RoundingMode;
import java.util.Arrays;
import java.util.Objects;
import java.util.Random;
+import java.util.function.Consumer;
import static org.junit.jupiter.api.Assertions.*;
@@ -33,9 +35,11 @@ public class DecimalColumnVectorTest extends CudfTestBase {
private static final Random rdSeed = new Random(1234);
private static final int dec32Scale = 4;
private static final int dec64Scale = 10;
+ private static final int dec128Scale = 30;
private static final BigDecimal[] decimal32Zoo = new BigDecimal[20];
private static final BigDecimal[] decimal64Zoo = new BigDecimal[20];
+ private static final BigDecimal[] decimal128Zoo = new BigDecimal[20];
private static final int[] unscaledDec32Zoo = new int[decimal32Zoo.length];
private static final long[] unscaledDec64Zoo = new long[decimal64Zoo.length];
@@ -45,6 +49,9 @@ public class DecimalColumnVectorTest extends CudfTestBase {
private final BigDecimal[] boundaryDecimal64 = new BigDecimal[]{
new BigDecimal("999999999999999999"), new BigDecimal("-999999999999999999")};
+ private final BigDecimal[] boundaryDecimal128 = new BigDecimal[]{
+ new BigDecimal("99999999999999999999999999999999999999"), new BigDecimal("-99999999999999999999999999999999999999")};
+
private final BigDecimal[] overflowDecimal32 = new BigDecimal[]{
BigDecimal.valueOf(Integer.MAX_VALUE), BigDecimal.valueOf(Integer.MIN_VALUE)};
@@ -72,6 +79,12 @@ public static void setup() {
} else {
decimal64Zoo[i] = null;
}
+ if (rdSeed.nextBoolean()) {
+ BigInteger unscaledVal = BigInteger.valueOf(rdSeed.nextLong()).multiply(BigInteger.valueOf(rdSeed.nextLong()));
+ decimal128Zoo[i] = new BigDecimal(unscaledVal, dec128Scale);
+ } else {
+ decimal128Zoo[i] = null;
+ }
}
}
@@ -190,27 +203,44 @@ public void testDecimalGeneral() {
@Test
public void testDecimalFromDecimals() {
- DecimalColumnVectorTest.testDecimalImpl(false, dec32Scale, decimal32Zoo);
- DecimalColumnVectorTest.testDecimalImpl(true, dec64Scale, decimal64Zoo);
- DecimalColumnVectorTest.testDecimalImpl(false, 0, boundaryDecimal32);
- DecimalColumnVectorTest.testDecimalImpl(true, 0, boundaryDecimal64);
+ DecimalColumnVectorTest.testDecimalImpl(DType.DTypeEnum.DECIMAL32, dec32Scale, decimal32Zoo);
+ DecimalColumnVectorTest.testDecimalImpl(DType.DTypeEnum.DECIMAL64, dec64Scale, decimal64Zoo);
+ DecimalColumnVectorTest.testDecimalImpl(DType.DTypeEnum.DECIMAL128, dec128Scale, decimal128Zoo);
+ DecimalColumnVectorTest.testDecimalImpl(DType.DTypeEnum.DECIMAL32, 0, boundaryDecimal32);
+ DecimalColumnVectorTest.testDecimalImpl(DType.DTypeEnum.DECIMAL64, 0, boundaryDecimal64);
+ DecimalColumnVectorTest.testDecimalImpl(DType.DTypeEnum.DECIMAL128, 0, boundaryDecimal128);
}
- private static void testDecimalImpl(boolean isInt64, int scale, BigDecimal[] decimalZoo) {
- try (ColumnVector cv = ColumnVector.fromDecimals(decimalZoo)) {
- try (HostColumnVector hcv = cv.copyToHost()) {
- assertEquals(-scale, hcv.getType().getScale());
- assertEquals(isInt64, hcv.getType().typeId == DType.DTypeEnum.DECIMAL64);
- assertEquals(decimalZoo.length, hcv.rows);
- for (int i = 0; i < decimalZoo.length; i++) {
- assertEquals(decimalZoo[i] == null, hcv.isNull(i));
- if (decimalZoo[i] != null) {
- assertEquals(decimalZoo[i].floatValue(), hcv.getBigDecimal(i).floatValue());
- long backValue = isInt64 ? hcv.getLong(i) : hcv.getInt(i);
- assertEquals(decimalZoo[i].setScale(scale, RoundingMode.UNNECESSARY), BigDecimal.valueOf(backValue, scale));
+ private static void testDecimalImpl(DType.DTypeEnum decimalType, int scale, BigDecimal[] decimalZoo) {
+ Consumer assertions = (hcv) -> {
+ assertEquals(-scale, hcv.getType().getScale());
+ assertEquals(hcv.getType().typeId, decimalType);
+ assertEquals(decimalZoo.length, hcv.rows);
+ for (int i = 0; i < decimalZoo.length; i++) {
+ assertEquals(decimalZoo[i] == null, hcv.isNull(i));
+ if (decimalZoo[i] != null) {
+ BigDecimal actual;
+ switch (decimalType) {
+ case DECIMAL32:
+ actual = BigDecimal.valueOf(hcv.getInt(i), scale);
+ break;
+ case DECIMAL64:
+ actual = BigDecimal.valueOf(hcv.getLong(i), scale);
+ break;
+ default:
+ actual = hcv.getBigDecimal(i);
}
+ assertEquals(decimalZoo[i].subtract(actual).longValueExact(), 0L);
}
}
+ };
+ try (ColumnVector cv = ColumnVector.fromDecimals(decimalZoo)) {
+ try (HostColumnVector hcv = cv.copyToHost()) {
+ assertions.accept(hcv);
+ }
+ }
+ try (HostColumnVector hcv = ColumnBuilderHelper.fromDecimals(decimalZoo)) {
+ assertions.accept(hcv);
}
}
diff --git a/java/src/test/java/ai/rapids/cudf/DoubleColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/DoubleColumnVectorTest.java
index d82565e1d2d..fa34429685e 100644
--- a/java/src/test/java/ai/rapids/cudf/DoubleColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/DoubleColumnVectorTest.java
@@ -1,6 +1,6 @@
/*
*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
import org.junit.jupiter.api.Test;
import java.util.Random;
+import java.util.function.Consumer;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -40,34 +41,51 @@ public void testCreateColumnVectorBuilder() {
@Test
public void testArrayAllocation() {
- try (HostColumnVector doubleColumnVector = HostColumnVector.fromDoubles(2.1, 3.02, 5.003)) {
- assertFalse(doubleColumnVector.hasNulls());
- assertEqualsWithinPercentage(doubleColumnVector.getDouble(0), 2.1, 0.01);
- assertEqualsWithinPercentage(doubleColumnVector.getDouble(1), 3.02, 0.01);
- assertEqualsWithinPercentage(doubleColumnVector.getDouble(2), 5.003, 0.001);
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertEqualsWithinPercentage(cv.getDouble(0), 2.1, 0.01);
+ assertEqualsWithinPercentage(cv.getDouble(1), 3.02, 0.01);
+ assertEqualsWithinPercentage(cv.getDouble(2), 5.003, 0.001);
+ };
+ try (HostColumnVector dcv = HostColumnVector.fromDoubles(2.1, 3.02, 5.003)) {
+ verify.accept(dcv);
+ }
+ try (HostColumnVector dcv = ColumnBuilderHelper.fromDoubles(2.1, 3.02, 5.003)) {
+ verify.accept(dcv);
}
}
@Test
public void testUpperIndexOutOfBoundsException() {
- try (HostColumnVector doubleColumnVector = HostColumnVector.fromDoubles(2.1, 3.02, 5.003)) {
- assertThrows(AssertionError.class, () -> doubleColumnVector.getDouble(3));
- assertFalse(doubleColumnVector.hasNulls());
+ Consumer verify = (cv) -> {
+ assertThrows(AssertionError.class, () -> cv.getDouble(3));
+ assertFalse(cv.hasNulls());
+ };
+ try (HostColumnVector dcv = HostColumnVector.fromDoubles(2.1, 3.02, 5.003)) {
+ verify.accept(dcv);
+ }
+ try (HostColumnVector dcv = ColumnBuilderHelper.fromDoubles(2.1, 3.02, 5.003)) {
+ verify.accept(dcv);
}
}
@Test
public void testLowerIndexOutOfBoundsException() {
- try (HostColumnVector doubleColumnVector = HostColumnVector.fromDoubles(2.1, 3.02, 5.003)) {
- assertFalse(doubleColumnVector.hasNulls());
- assertThrows(AssertionError.class, () -> doubleColumnVector.getDouble(-1));
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertThrows(AssertionError.class, () -> cv.getDouble(-1));
+ };
+ try (HostColumnVector dcv = HostColumnVector.fromDoubles(2.1, 3.02, 5.003)) {
+ verify.accept(dcv);
+ }
+ try (HostColumnVector dcv = ColumnBuilderHelper.fromDoubles(2.1, 3.02, 5.003)) {
+ verify.accept(dcv);
}
}
@Test
public void testAddingNullValues() {
- try (HostColumnVector cv =
- HostColumnVector.fromBoxedDoubles(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, null, null)) {
+ Consumer verify = (cv) -> {
assertTrue(cv.hasNulls());
assertEquals(2, cv.getNullCount());
for (int i = 0; i < 6; i++) {
@@ -75,6 +93,14 @@ public void testAddingNullValues() {
}
assertTrue(cv.isNull(6));
assertTrue(cv.isNull(7));
+ };
+ try (HostColumnVector dcv =
+ HostColumnVector.fromBoxedDoubles(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, null, null)) {
+ verify.accept(dcv);
+ }
+ try (HostColumnVector dcv = ColumnBuilderHelper.fromBoxedDoubles(
+ 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, null, null)) {
+ verify.accept(dcv);
}
}
diff --git a/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java
index 2fb8164534b..7d6311fb24c 100644
--- a/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
import org.junit.jupiter.api.Test;
import java.util.Random;
+import java.util.function.Consumer;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -34,47 +35,75 @@ public void testCreateColumnVectorBuilder() {
try (ColumnVector intColumnVector = ColumnVector.build(DType.INT32, 3, (b) -> b.append(1))) {
assertFalse(intColumnVector.hasNulls());
}
+ try (ColumnVector intColumnVector = ColumnBuilderHelper.buildOnDevice(
+ new HostColumnVector.BasicType(true, DType.INT32), 3, (b) -> b.append(1))) {
+ assertFalse(intColumnVector.hasNulls());
+ }
}
@Test
public void testArrayAllocation() {
- try (HostColumnVector intColumnVector = HostColumnVector.fromInts(2, 3, 5)) {
- assertFalse(intColumnVector.hasNulls());
- assertEquals(intColumnVector.getInt(0), 2);
- assertEquals(intColumnVector.getInt(1), 3);
- assertEquals(intColumnVector.getInt(2), 5);
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertEquals(cv.getInt(0), 2);
+ assertEquals(cv.getInt(1), 3);
+ assertEquals(cv.getInt(2), 5);
+ };
+ try (HostColumnVector cv = HostColumnVector.fromInts(2, 3, 5)) {
+ verify.accept(cv);
+ }
+ try (HostColumnVector cv = ColumnBuilderHelper.fromInts(true, 2, 3, 5)) {
+ verify.accept(cv);
}
}
@Test
public void testUnsignedArrayAllocation() {
- try (HostColumnVector v = HostColumnVector.fromUnsignedInts(0xfedcba98, 0x80000000, 5)) {
- assertFalse(v.hasNulls());
- assertEquals(0xfedcba98L, Integer.toUnsignedLong(v.getInt(0)));
- assertEquals(0x80000000L, Integer.toUnsignedLong(v.getInt(1)));
- assertEquals(5, Integer.toUnsignedLong(v.getInt(2)));
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertEquals(0xfedcba98L, Integer.toUnsignedLong(cv.getInt(0)));
+ assertEquals(0x80000000L, Integer.toUnsignedLong(cv.getInt(1)));
+ assertEquals(5, Integer.toUnsignedLong(cv.getInt(2)));
+ };
+ try (HostColumnVector cv = HostColumnVector.fromUnsignedInts(0xfedcba98, 0x80000000, 5)) {
+ verify.accept(cv);
+ }
+ try (HostColumnVector cv = ColumnBuilderHelper.fromInts(false, 0xfedcba98, 0x80000000, 5)) {
+ verify.accept(cv);
}
}
@Test
public void testUpperIndexOutOfBoundsException() {
- try (HostColumnVector intColumnVector = HostColumnVector.fromInts(2, 3, 5)) {
- assertThrows(AssertionError.class, () -> intColumnVector.getInt(3));
- assertFalse(intColumnVector.hasNulls());
+ Consumer verify = (cv) -> {
+ assertThrows(AssertionError.class, () -> cv.getInt(3));
+ assertFalse(cv.hasNulls());
+ };
+ try (HostColumnVector icv = HostColumnVector.fromInts(2, 3, 5)) {
+ verify.accept(icv);
+ }
+ try (HostColumnVector icv = ColumnBuilderHelper.fromInts(true, 2, 3, 5)) {
+ verify.accept(icv);
}
}
@Test
public void testLowerIndexOutOfBoundsException() {
- try (HostColumnVector intColumnVector = HostColumnVector.fromInts(2, 3, 5)) {
- assertFalse(intColumnVector.hasNulls());
- assertThrows(AssertionError.class, () -> intColumnVector.getInt(-1));
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertThrows(AssertionError.class, () -> cv.getInt(-1));
+ };
+ try (HostColumnVector icv = HostColumnVector.fromInts(2, 3, 5)) {
+ verify.accept(icv);
+ }
+ try (HostColumnVector icv = ColumnBuilderHelper.fromInts(true, 2, 3, 5)) {
+ verify.accept(icv);
}
}
@Test
public void testAddingNullValues() {
- try (HostColumnVector cv = HostColumnVector.fromBoxedInts(2, 3, 4, 5, 6, 7, null, null)) {
+ Consumer verify = (cv) -> {
assertTrue(cv.hasNulls());
assertEquals(2, cv.getNullCount());
for (int i = 0; i < 6; i++) {
@@ -82,13 +111,18 @@ public void testAddingNullValues() {
}
assertTrue(cv.isNull(6));
assertTrue(cv.isNull(7));
+ };
+ try (HostColumnVector cv = HostColumnVector.fromBoxedInts(2, 3, 4, 5, 6, 7, null, null)) {
+ verify.accept(cv);
+ }
+ try (HostColumnVector cv = ColumnBuilderHelper.fromBoxedInts(true, 2, 3, 4, 5, 6, 7, null, null)) {
+ verify.accept(cv);
}
}
@Test
public void testAddingUnsignedNullValues() {
- try (HostColumnVector cv = HostColumnVector.fromBoxedUnsignedInts(
- 2, 3, 4, 5, 0xfedbca98, 0x80000000, null, null)) {
+ Consumer verify = (cv) -> {
assertTrue(cv.hasNulls());
assertEquals(2, cv.getNullCount());
for (int i = 0; i < 6; i++) {
@@ -98,6 +132,14 @@ public void testAddingUnsignedNullValues() {
assertEquals(0x80000000L, Integer.toUnsignedLong(cv.getInt(5)));
assertTrue(cv.isNull(6));
assertTrue(cv.isNull(7));
+ };
+ try (HostColumnVector cv = HostColumnVector.fromBoxedUnsignedInts(
+ 2, 3, 4, 5, 0xfedbca98, 0x80000000, null, null)) {
+ verify.accept(cv);
+ }
+ try (HostColumnVector cv = ColumnBuilderHelper.fromBoxedInts(false,
+ 2, 3, 4, 5, 0xfedbca98, 0x80000000, null, null)) {
+ verify.accept(cv);
}
}
diff --git a/java/src/test/java/ai/rapids/cudf/LongColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/LongColumnVectorTest.java
index 43c2b5a99c2..193992f5304 100644
--- a/java/src/test/java/ai/rapids/cudf/LongColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/LongColumnVectorTest.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
import org.junit.jupiter.api.Test;
import java.util.Random;
+import java.util.function.Consumer;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -38,46 +39,71 @@ public void testCreateColumnVectorBuilder() {
@Test
public void testArrayAllocation() {
- try (HostColumnVector longColumnVector = HostColumnVector.fromLongs(2L, 3L, 5L)) {
- assertFalse(longColumnVector.hasNulls());
- assertEquals(longColumnVector.getLong(0), 2);
- assertEquals(longColumnVector.getLong(1), 3);
- assertEquals(longColumnVector.getLong(2), 5);
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertEquals(cv.getLong(0), 2);
+ assertEquals(cv.getLong(1), 3);
+ assertEquals(cv.getLong(2), 5);
+ };
+ try (HostColumnVector lcv = HostColumnVector.fromLongs(2L, 3L, 5L)) {
+ verify.accept(lcv);
+ }
+ try (HostColumnVector lcv = ColumnBuilderHelper.fromLongs(true,2L, 3L, 5L)) {
+ verify.accept(lcv);
}
}
@Test
public void testUnsignedArrayAllocation() {
- try (HostColumnVector longColumnVector = HostColumnVector.fromUnsignedLongs(
- 0xfedcba9876543210L, 0x8000000000000000L, 5L)) {
- assertFalse(longColumnVector.hasNulls());
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
assertEquals(Long.toUnsignedString(0xfedcba9876543210L),
- Long.toUnsignedString(longColumnVector.getLong(0)));
+ Long.toUnsignedString(cv.getLong(0)));
assertEquals(Long.toUnsignedString(0x8000000000000000L),
- Long.toUnsignedString(longColumnVector.getLong(1)));
- assertEquals(5L, longColumnVector.getLong(2));
+ Long.toUnsignedString(cv.getLong(1)));
+ assertEquals(5L, cv.getLong(2));
+ };
+ try (HostColumnVector lcv = HostColumnVector.fromUnsignedLongs(
+ 0xfedcba9876543210L, 0x8000000000000000L, 5L)) {
+ verify.accept(lcv);
+ }
+ try (HostColumnVector lcv = ColumnBuilderHelper.fromLongs(false,
+ 0xfedcba9876543210L, 0x8000000000000000L, 5L)) {
+ verify.accept(lcv);
}
}
@Test
public void testUpperIndexOutOfBoundsException() {
- try (HostColumnVector longColumnVector = HostColumnVector.fromLongs(2L, 3L, 5L)) {
- assertThrows(AssertionError.class, () -> longColumnVector.getLong(3));
- assertFalse(longColumnVector.hasNulls());
+ Consumer verify = (cv) -> {
+ assertThrows(AssertionError.class, () -> cv.getLong(3));
+ assertFalse(cv.hasNulls());
+ };
+ try (HostColumnVector lcv = HostColumnVector.fromLongs(2L, 3L, 5L)) {
+ verify.accept(lcv);
+ }
+ try (HostColumnVector lcv = ColumnBuilderHelper.fromLongs(true, 2L, 3L, 5L)) {
+ verify.accept(lcv);
}
}
@Test
public void testLowerIndexOutOfBoundsException() {
- try (HostColumnVector longColumnVector = HostColumnVector.fromLongs(2L, 3L, 5L)) {
- assertFalse(longColumnVector.hasNulls());
- assertThrows(AssertionError.class, () -> longColumnVector.getLong(-1));
+ Consumer verify = (cv) -> {
+ assertFalse(cv.hasNulls());
+ assertThrows(AssertionError.class, () -> cv.getLong(-1));
+ };
+ try (HostColumnVector lcv = HostColumnVector.fromLongs(2L, 3L, 5L)) {
+ verify.accept(lcv);
+ }
+ try (HostColumnVector lcv = ColumnBuilderHelper.fromLongs(true, 2L, 3L, 5L)) {
+ verify.accept(lcv);
}
}
@Test
public void testAddingNullValues() {
- try (HostColumnVector cv = HostColumnVector.fromBoxedLongs(2L, 3L, 4L, 5L, 6L, 7L, null, null)) {
+ Consumer verify = (cv) -> {
assertTrue(cv.hasNulls());
assertEquals(2, cv.getNullCount());
for (int i = 0; i < 6; i++) {
@@ -85,13 +111,19 @@ public void testAddingNullValues() {
}
assertTrue(cv.isNull(6));
assertTrue(cv.isNull(7));
+ };
+ try (HostColumnVector lcv = HostColumnVector.fromBoxedLongs(2L, 3L, 4L, 5L, 6L, 7L, null, null)) {
+ verify.accept(lcv);
+ }
+ try (HostColumnVector lcv = ColumnBuilderHelper.fromBoxedLongs(true,
+ 2L, 3L, 4L, 5L, 6L, 7L, null, null)) {
+ verify.accept(lcv);
}
}
@Test
public void testAddingUnsignedNullValues() {
- try (HostColumnVector cv = HostColumnVector.fromBoxedUnsignedLongs(
- 2L, 3L, 4L, 5L, 0xfedcba9876543210L, 0x8000000000000000L, null, null)) {
+ Consumer verify = (cv) -> {
assertTrue(cv.hasNulls());
assertEquals(2, cv.getNullCount());
for (int i = 0; i < 6; i++) {
@@ -103,6 +135,14 @@ public void testAddingUnsignedNullValues() {
Long.toUnsignedString(cv.getLong(5)));
assertTrue(cv.isNull(6));
assertTrue(cv.isNull(7));
+ };
+ try (HostColumnVector lcv = HostColumnVector.fromBoxedUnsignedLongs(
+ 2L, 3L, 4L, 5L, 0xfedcba9876543210L, 0x8000000000000000L, null, null)) {
+ verify.accept(lcv);
+ }
+ try (HostColumnVector lcv = ColumnBuilderHelper.fromBoxedLongs(false,
+ 2L, 3L, 4L, 5L, 0xfedcba9876543210L, 0x8000000000000000L, null, null)) {
+ verify.accept(lcv);
}
}
From c25d35b36145028bd156ef8e31a2afad0e41b0c3 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR
Date: Mon, 31 Jan 2022 11:35:05 -0600
Subject: [PATCH 03/14] Preserve the correct `ListDtype` while creating an
identical empty column (#10151)
Fixes: #10122
This PR fixes an issue where the list columns children[1]'s dtype wasn't being preserved correctly.
Authors:
- GALI PREM SAGAR (https://github.com/galipremsagar)
Approvers:
- https://github.com/brandon-b-miller
URL: https://github.com/rapidsai/cudf/pull/10151
---
python/cudf/cudf/core/column/column.py | 6 ++++++
python/cudf/cudf/tests/test_list.py | 13 ++++++++++++-
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 5d694dac255..e8ba4cc258e 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1271,6 +1271,12 @@ def column_empty(
column_empty(row_count, field_dtype)
for field_dtype in dtype.fields.values()
)
+ elif is_list_dtype(dtype):
+ data = None
+ children = (
+ full(row_count + 1, 0, dtype="int32"),
+ column_empty(row_count, dtype=dtype.element_type),
+ )
elif is_categorical_dtype(dtype):
data = None
children = (
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 44749103b54..3ef579574c6 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -1,4 +1,5 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
import functools
import operator
@@ -586,3 +587,13 @@ def test_listcol_setitem_error_cases(data, item, error):
sr = cudf.Series(data)
with pytest.raises(BaseException, match=error):
sr[1] = item
+
+
+def test_listcol_setitem_retain_dtype():
+ df = cudf.DataFrame(
+ {"a": cudf.Series([["a", "b"], []]), "b": [1, 2], "c": [123, 321]}
+ )
+ df1 = df[df.b.isna()]
+ df1["b"] = df1["c"]
+ df2 = df1.drop(["c"], axis=1)
+ assert df2.a.dtype == df.a.dtype
From c52f4838a68bcd7fe19e81ba697d075963a8b9df Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR
Date: Mon, 31 Jan 2022 15:28:16 -0600
Subject: [PATCH 04/14] Add comments to explain test validation (#10176)
PR to address https://github.com/rapidsai/cudf/pull/10151#pullrequestreview-868246185
Authors:
- GALI PREM SAGAR (https://github.com/galipremsagar)
Approvers:
- Bradley Dice (https://github.com/bdice)
URL: https://github.com/rapidsai/cudf/pull/10176
---
python/cudf/cudf/tests/test_list.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 3ef579574c6..fc9ad9711d1 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -593,7 +593,12 @@ def test_listcol_setitem_retain_dtype():
df = cudf.DataFrame(
{"a": cudf.Series([["a", "b"], []]), "b": [1, 2], "c": [123, 321]}
)
- df1 = df[df.b.isna()]
+ df1 = df.head(0)
+ # Performing a setitem on `b` triggers a `column.column_empty_like` call
+ # which tries to create an empty ListColumn.
df1["b"] = df1["c"]
- df2 = df1.drop(["c"], axis=1)
- assert df2.a.dtype == df.a.dtype
+ # Performing a copy to trigger a copy dtype which is obtained by accessing
+ # `ListColumn.children` that would have been corrupted in previous call
+ # prior to this fix: https://github.com/rapidsai/cudf/pull/10151/
+ df2 = df1.copy()
+ assert df2["a"].dtype == df["a"].dtype
From 2c6b0dac61a6671642bb5b076e910e20e2bdd1b6 Mon Sep 17 00:00:00 2001
From: Bradley Dice
Date: Mon, 31 Jan 2022 17:35:47 -0600
Subject: [PATCH 05/14] Refactor groupby::get_groups. (#10161)
This PR refactors the `groupby::get_groups` method. It replaces several lines of construct-and-copy with a vector factory function and makes stream usage more clear.
Authors:
- Bradley Dice (https://github.com/bdice)
Approvers:
- Nghia Truong (https://github.com/ttnghia)
- Mike Wilson (https://github.com/hyperbolic2346)
URL: https://github.com/rapidsai/cudf/pull/10161
---
cpp/src/groupby/groupby.cu | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index e8b4a8b1cbf..57bb222aaa0 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -37,7 +38,6 @@
#include
-#include
#include
#include
@@ -219,20 +219,18 @@ std::pair, std::vector> groupby::scan
groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
- auto grouped_keys = helper().sorted_keys(rmm::cuda_stream_default, mr);
+ auto const stream = rmm::cuda_stream_default;
+ auto grouped_keys = helper().sorted_keys(stream, mr);
- auto const& group_offsets = helper().group_offsets(rmm::cuda_stream_default);
- std::vector group_offsets_vector(group_offsets.size());
- thrust::copy(thrust::device_pointer_cast(group_offsets.begin()),
- thrust::device_pointer_cast(group_offsets.end()),
- group_offsets_vector.begin());
+ auto const& group_offsets = helper().group_offsets(stream);
+ auto const group_offsets_vector = cudf::detail::make_std_vector_sync(group_offsets, stream);
- if (values.num_columns()) {
+ if (not values.is_empty()) {
auto grouped_values = cudf::detail::gather(values,
- helper().key_sort_order(rmm::cuda_stream_default),
+ helper().key_sort_order(stream),
cudf::out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
- rmm::cuda_stream_default,
+ stream,
mr);
return groupby::groups{
std::move(grouped_keys), std::move(group_offsets_vector), std::move(grouped_values)};
From 8d2a9cc8fafd40d8af2c5fba11d394d07b619833 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans"
Date: Tue, 1 Feb 2022 11:32:29 -0600
Subject: [PATCH 06/14] Fix JNI leak of a cudf::column_view native class.
(#10171)
This fixes a memory leak that we were seeing when running some tests. I will be doing some more testing to be sure that all of the memory leaks are gone, but it should be a good step forward.
Authors:
- Robert (Bobby) Evans (https://github.com/revans2)
Approvers:
- Jason Lowe (https://github.com/jlowe)
- Kuhu Shukla (https://github.com/kuhushukla)
URL: https://github.com/rapidsai/cudf/pull/10171
---
.../java/ai/rapids/cudf/ColumnVector.java | 40 +++++++++++--------
.../main/java/ai/rapids/cudf/ColumnView.java | 22 +++++++++-
2 files changed, 45 insertions(+), 17 deletions(-)
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java
index 61981b34615..cb3234bf706 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java
@@ -45,7 +45,6 @@ public final class ColumnVector extends ColumnView {
NativeDepsLoader.loadNativeDeps();
}
- private final OffHeapState offHeap;
private Optional nullCount = Optional.empty();
private int refCount;
@@ -56,14 +55,23 @@ public final class ColumnVector extends ColumnView {
* owned by this instance.
*/
public ColumnVector(long nativePointer) {
- super(getColumnViewFromColumn(nativePointer));
+ super(new OffHeapState(nativePointer));
assert nativePointer != 0;
- offHeap = new OffHeapState(nativePointer);
MemoryCleaner.register(this, offHeap);
this.refCount = 0;
incRefCountInternal(true);
}
+ private static OffHeapState makeOffHeap(DType type, long rows, Optional nullCount,
+ DeviceMemoryBuffer dataBuffer, DeviceMemoryBuffer validityBuffer,
+ DeviceMemoryBuffer offsetBuffer) {
+ long viewHandle = initViewHandle(
+ type, (int)rows, nullCount.orElse(UNKNOWN_NULL_COUNT).intValue(),
+ dataBuffer, validityBuffer, offsetBuffer, null);
+ return new OffHeapState(type, (int) rows, dataBuffer, validityBuffer,
+ offsetBuffer, null, viewHandle);
+ }
+
/**
* Create a new column vector based off of data already on the device.
* @param type the type of the vector
@@ -81,24 +89,29 @@ public ColumnVector(long nativePointer) {
public ColumnVector(DType type, long rows, Optional nullCount,
DeviceMemoryBuffer dataBuffer, DeviceMemoryBuffer validityBuffer,
DeviceMemoryBuffer offsetBuffer) {
- super(ColumnVector.initViewHandle(
- type, (int)rows, nullCount.orElse(UNKNOWN_NULL_COUNT).intValue(),
- dataBuffer, validityBuffer, offsetBuffer, null));
+ super(makeOffHeap(type, rows, nullCount, dataBuffer, validityBuffer, offsetBuffer));
assert !type.equals(DType.LIST) : "This constructor should not be used for list type";
if (!type.equals(DType.STRING)) {
assert offsetBuffer == null : "offsets are only supported for STRING";
}
assert (nullCount.isPresent() && nullCount.get() <= Integer.MAX_VALUE)
|| !nullCount.isPresent();
- offHeap = new OffHeapState(type, (int) rows, dataBuffer, validityBuffer,
- offsetBuffer, null, viewHandle);
MemoryCleaner.register(this, offHeap);
this.nullCount = nullCount;
-
this.refCount = 0;
incRefCountInternal(true);
}
+ private static OffHeapState makeOffHeap(DType type, long rows, Optional nullCount,
+ DeviceMemoryBuffer dataBuffer, DeviceMemoryBuffer validityBuffer,
+ DeviceMemoryBuffer offsetBuffer, List toClose, long[] childHandles) {
+ long viewHandle = initViewHandle(type, (int)rows, nullCount.orElse(UNKNOWN_NULL_COUNT).intValue(),
+ dataBuffer, validityBuffer,
+ offsetBuffer, childHandles);
+ return new OffHeapState(type, (int) rows, dataBuffer, validityBuffer, offsetBuffer,
+ toClose, viewHandle);
+ }
+
/**
* Create a new column vector based off of data already on the device with child columns.
* @param type the type of the vector, typically a nested type
@@ -118,16 +131,12 @@ public ColumnVector(DType type, long rows, Optional nullCount,
public ColumnVector(DType type, long rows, Optional nullCount,
DeviceMemoryBuffer dataBuffer, DeviceMemoryBuffer validityBuffer,
DeviceMemoryBuffer offsetBuffer, List toClose, long[] childHandles) {
- super(initViewHandle(type, (int)rows, nullCount.orElse(UNKNOWN_NULL_COUNT).intValue(),
- dataBuffer, validityBuffer,
- offsetBuffer, childHandles));
+ super(makeOffHeap(type, rows, nullCount, dataBuffer, validityBuffer, offsetBuffer, toClose, childHandles));
if (!type.equals(DType.STRING) && !type.equals(DType.LIST)) {
assert offsetBuffer == null : "offsets are only supported for STRING, LISTS";
}
assert (nullCount.isPresent() && nullCount.get() <= Integer.MAX_VALUE)
|| !nullCount.isPresent();
- offHeap = new OffHeapState(type, (int) rows, dataBuffer, validityBuffer, offsetBuffer,
- toClose, viewHandle);
MemoryCleaner.register(this, offHeap);
this.refCount = 0;
@@ -143,8 +152,7 @@ public ColumnVector(DType type, long rows, Optional nullCount,
* @param contiguousBuffer the buffer that this is based off of.
*/
private ColumnVector(long viewAddress, DeviceMemoryBuffer contiguousBuffer) {
- super(viewAddress);
- offHeap = new OffHeapState(viewAddress, contiguousBuffer);
+ super(new OffHeapState(viewAddress, contiguousBuffer));
MemoryCleaner.register(this, offHeap);
// TODO we may want to ask for the null count anyways...
this.nullCount = Optional.empty();
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index cc1fa46becb..8155fe79080 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -40,6 +40,7 @@ public class ColumnView implements AutoCloseable, BinaryOperable {
protected final DType type;
protected final long rows;
protected final long nullCount;
+ protected final ColumnVector.OffHeapState offHeap;
/**
* Constructs a Column View given a native view address
@@ -50,6 +51,22 @@ public class ColumnView implements AutoCloseable, BinaryOperable {
this.type = DType.fromNative(ColumnView.getNativeTypeId(viewHandle), ColumnView.getNativeTypeScale(viewHandle));
this.rows = ColumnView.getNativeRowCount(viewHandle);
this.nullCount = ColumnView.getNativeNullCount(viewHandle);
+ this.offHeap = null;
+ }
+
+
+ /**
+ * Intended to be called from ColumnVector when it is being constructed. Because state creates a
+ * cudf::column_view instance and will close it in all cases, we don't want to have to double
+ * close it.
+ * @param state the state this view is based off of.
+ */
+ protected ColumnView(ColumnVector.OffHeapState state) {
+ offHeap = state;
+ viewHandle = state.getViewHandle();
+ type = DType.fromNative(ColumnView.getNativeTypeId(viewHandle), ColumnView.getNativeTypeScale(viewHandle));
+ rows = ColumnView.getNativeRowCount(viewHandle);
+ nullCount = ColumnView.getNativeNullCount(viewHandle);
}
/**
@@ -265,7 +282,10 @@ public long getDeviceMemorySize() {
@Override
public void close() {
- ColumnView.deleteColumnView(viewHandle);
+ // close the view handle so long as offHeap is not going to do it for us.
+ if (offHeap == null) {
+ ColumnView.deleteColumnView(viewHandle);
+ }
viewHandle = 0;
}
From a080a4c984473595973dc6936fd9fc9f7f01d92d Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR
Date: Wed, 2 Feb 2022 11:45:28 -0600
Subject: [PATCH 07/14] Remove cleaned up methods from docs (#10189)
This PR cleans up some remaining places where `to_array` needs to be removed. Actual cleanup was done in: https://github.com/rapidsai/cudf/pull/10124/
Authors:
- GALI PREM SAGAR (https://github.com/galipremsagar)
Approvers:
- Bradley Dice (https://github.com/bdice)
URL: https://github.com/rapidsai/cudf/pull/10189
---
docs/cudf/source/api_docs/index_objects.rst | 3 +--
docs/cudf/source/api_docs/series.rst | 4 ++--
docs/cudf/source/user_guide/10min.ipynb | 4 ++--
python/cudf/cudf/core/column/column.py | 2 +-
4 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
index 2a4dd5ff9c8..d705504cc0c 100644
--- a/docs/cudf/source/api_docs/index_objects.rst
+++ b/docs/cudf/source/api_docs/index_objects.rst
@@ -22,7 +22,6 @@ Properties
:toctree: api/
Index.empty
- Index.gpu_values
Index.has_duplicates
Index.is_monotonic
Index.is_monotonic_increasing
@@ -93,9 +92,9 @@ Conversion
:toctree: api/
Index.astype
- Index.to_array
Index.to_arrow
Index.to_list
+ Index.to_numpy
Index.to_series
Index.to_frame
Index.to_pandas
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 891bb3a1e61..cf5dd4a2a1d 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -408,13 +408,13 @@ Serialization / IO / conversion
.. autosummary::
:toctree: api/
- Series.to_array
Series.to_arrow
+ Series.to_cupy
Series.to_dlpack
Series.to_frame
- Series.to_gpu_array
Series.to_hdf
Series.to_json
+ Series.to_numpy
Series.to_pandas
Series.to_string
Series.from_arrow
diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb
index a7e959a05a7..0034584a6f7 100644
--- a/docs/cudf/source/user_guide/10min.ipynb
+++ b/docs/cudf/source/user_guide/10min.ipynb
@@ -4550,7 +4550,7 @@
}
],
"source": [
- "df['a'].to_array()"
+ "df['a'].to_numpy()"
]
},
{
@@ -4571,7 +4571,7 @@
}
],
"source": [
- "ddf['a'].compute().to_array()"
+ "ddf['a'].compute().to_numpy()"
]
},
{
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index e8ba4cc258e..82641d83b07 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -986,7 +986,7 @@ def __array__(self, dtype=None):
raise TypeError(
"Implicit conversion to a host NumPy array via __array__ is not "
"allowed. To explicitly construct a host array, consider using "
- ".to_array()"
+ ".to_numpy()"
)
@property
From b6bb463c6fc8b9f9b6fad4284e6fc276725d0db0 Mon Sep 17 00:00:00 2001
From: Yunsong Wang
Date: Wed, 2 Feb 2022 14:39:10 -0500
Subject: [PATCH 08/14] Optimize compaction operations (#10030)
Related to https://github.com/rapidsai/cudf/issues/9413.
This PR adds `unordered_drop_duplicates`/`unordered_distinct_count` APIs by using hash-based algorithms. It doesn't close the original issue since adding `std::unique`-like `drop_duplicates` is not addressed in this PR. It involves several changes:
- [x] Change the behavior of the existing `distinct_count`: counting the number of consecutive groups of equivalent rows instead of total unique.
- [x] Add hash-based `unordered_distinct_count`: this new API counts unique rows across the whole table by using a hash map. It requires a newer version of `cuco` with bug fixing: https://github.com/NVIDIA/cuCollections/pull/132 and https://github.com/NVIDIA/cuCollections/pull/138.
- [x] Add hash-based `unordered_drop_duplicates`: similar to `drop_duplicates`, but this API doesn't support `keep` option and the output is in an unspecified order.
- [x] Replace all the cpp-side `drop_duplicates`/`distinct_count` use cases with `unordered_` versions.
- [x] Update and replace the existing compaction benchmark with `nvbench`.
Authors:
- Yunsong Wang (https://github.com/PointKernel)
Approvers:
- https://github.com/brandon-b-miller
- Bradley Dice (https://github.com/bdice)
- Nghia Truong (https://github.com/ttnghia)
- Robert Maynard (https://github.com/robertmaynard)
URL: https://github.com/rapidsai/cudf/pull/10030
---
cpp/benchmarks/CMakeLists.txt | 4 +-
.../stream_compaction/drop_duplicates.cpp | 118 +++--
cpp/cmake/thirdparty/get_cucollections.cmake | 4 +-
cpp/include/cudf/detail/stream_compaction.hpp | 33 +-
cpp/include/cudf/stream_compaction.hpp | 101 +++-
cpp/src/dictionary/add_keys.cu | 42 +-
cpp/src/dictionary/detail/concatenate.cu | 21 +-
cpp/src/dictionary/set_keys.cu | 24 +-
cpp/src/reductions/reductions.cpp | 4 +-
cpp/src/stream_compaction/distinct_count.cu | 242 +++++++---
cpp/src/stream_compaction/drop_duplicates.cu | 87 +++-
.../stream_compaction_common.cuh | 58 +++
.../stream_compaction_common.hpp | 47 ++
cpp/src/transform/encode.cu | 37 +-
cpp/tests/CMakeLists.txt | 3 +-
.../distinct_count_tests.cpp | 370 +++++++++++++++
.../drop_duplicates_tests.cpp | 437 +++++++-----------
.../cudf/cudf/_lib/cpp/stream_compaction.pxd | 20 +-
python/cudf/cudf/_lib/stream_compaction.pyx | 6 +-
19 files changed, 1170 insertions(+), 488 deletions(-)
create mode 100644 cpp/src/stream_compaction/stream_compaction_common.cuh
create mode 100644 cpp/src/stream_compaction/stream_compaction_common.hpp
create mode 100644 cpp/tests/stream_compaction/distinct_count_tests.cpp
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 57592de59af..13ef02efc99 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -1,5 +1,5 @@
# =============================================================================
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
@@ -123,7 +123,7 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp
# ##################################################################################################
# * stream_compaction benchmark -------------------------------------------------------------------
-ConfigureBench(STREAM_COMPACTION_BENCH stream_compaction/drop_duplicates.cpp)
+ConfigureNVBench(STREAM_COMPACTION_BENCH stream_compaction/drop_duplicates.cpp)
# ##################################################################################################
# * join benchmark --------------------------------------------------------------------------------
diff --git a/cpp/benchmarks/stream_compaction/drop_duplicates.cpp b/cpp/benchmarks/stream_compaction/drop_duplicates.cpp
index 8039d7d065f..317db92ae8b 100644
--- a/cpp/benchmarks/stream_compaction/drop_duplicates.cpp
+++ b/cpp/benchmarks/stream_compaction/drop_duplicates.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,64 +15,102 @@
*/
#include
-#include
+#include
#include
#include
#include
-#include
-#include
+
+#include
+
+#include
#include
#include
-class Compaction : public cudf::benchmark {
-};
+// necessary for custom enum types
+// see: https://github.com/NVIDIA/nvbench/blob/main/examples/enums.cu
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+ // Enum type:
+ cudf::duplicate_keep_option,
+ // Callable to generate input strings:
+ [](cudf::duplicate_keep_option option) {
+ switch (option) {
+ case cudf::duplicate_keep_option::KEEP_FIRST: return "KEEP_FIRST";
+ case cudf::duplicate_keep_option::KEEP_LAST: return "KEEP_LAST";
+ case cudf::duplicate_keep_option::KEEP_NONE: return "KEEP_NONE";
+ default: return "ERROR";
+ }
+ },
+ // Callable to generate descriptions:
+ [](auto) { return std::string{}; })
+
+NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms");
+
+template
+void nvbench_drop_duplicates(nvbench::state& state,
+ nvbench::type_list>)
+{
+ if constexpr (not std::is_same_v and
+ Keep != cudf::duplicate_keep_option::KEEP_FIRST) {
+ state.skip("Skip unwanted benchmarks.");
+ }
+
+ cudf::rmm_pool_raii pool_raii;
+
+ auto const num_rows = state.get_int64("NumRows");
+
+ cudf::test::UniformRandomGenerator rand_gen(0, 100);
+ auto elements = cudf::detail::make_counting_transform_iterator(
+ 0, [&rand_gen](auto row) { return rand_gen.generate(); });
+ auto valids = cudf::detail::make_counting_transform_iterator(
+ 0, [](auto i) { return i % 100 == 0 ? false : true; });
+ cudf::test::fixed_width_column_wrapper values(elements, elements + num_rows, valids);
+
+ auto input_column = cudf::column_view(values);
+ auto input_table = cudf::table_view({input_column, input_column, input_column, input_column});
+
+ state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+ rmm::cuda_stream_view stream_view{launch.get_stream()};
+ auto result = cudf::detail::drop_duplicates(
+ input_table, {0}, Keep, cudf::null_equality::EQUAL, cudf::null_order::BEFORE, stream_view);
+ });
+}
template
-void BM_compaction(benchmark::State& state, cudf::duplicate_keep_option keep)
+void nvbench_unordered_drop_duplicates(nvbench::state& state, nvbench::type_list)
{
- auto const n_rows = static_cast(state.range(0));
+ cudf::rmm_pool_raii pool_raii;
+
+ auto const num_rows = state.get_int64("NumRows");
cudf::test::UniformRandomGenerator rand_gen(0, 100);
auto elements = cudf::detail::make_counting_transform_iterator(
0, [&rand_gen](auto row) { return rand_gen.generate(); });
auto valids = cudf::detail::make_counting_transform_iterator(
0, [](auto i) { return i % 100 == 0 ? false : true; });
- cudf::test::fixed_width_column_wrapper values(elements, elements + n_rows, valids);
+ cudf::test::fixed_width_column_wrapper values(elements, elements + num_rows, valids);
auto input_column = cudf::column_view(values);
auto input_table = cudf::table_view({input_column, input_column, input_column, input_column});
- for (auto _ : state) {
- cuda_event_timer timer(state, true);
- auto result = cudf::drop_duplicates(input_table, {0}, keep);
- }
+ state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+ rmm::cuda_stream_view stream_view{launch.get_stream()};
+ auto result = cudf::detail::unordered_drop_duplicates(
+ input_table, {0}, cudf::null_equality::EQUAL, stream_view);
+ });
}
-#define concat(a, b, c) a##b##c
-#define get_keep(op) cudf::duplicate_keep_option::KEEP_##op
-
-// TYPE, OP
-#define RBM_BENCHMARK_DEFINE(name, type, keep) \
- BENCHMARK_DEFINE_F(Compaction, name)(::benchmark::State & state) \
- { \
- BM_compaction(state, get_keep(keep)); \
- } \
- BENCHMARK_REGISTER_F(Compaction, name) \
- ->UseManualTime() \
- ->Arg(10000) /* 10k */ \
- ->Arg(100000) /* 100k */ \
- ->Arg(1000000) /* 1M */ \
- ->Arg(10000000) /* 10M */
-
-#define COMPACTION_BENCHMARK_DEFINE(type, keep) \
- RBM_BENCHMARK_DEFINE(concat(type, _, keep), type, keep)
-
-COMPACTION_BENCHMARK_DEFINE(bool, NONE);
-COMPACTION_BENCHMARK_DEFINE(int8_t, NONE);
-COMPACTION_BENCHMARK_DEFINE(int32_t, NONE);
-COMPACTION_BENCHMARK_DEFINE(int32_t, FIRST);
-COMPACTION_BENCHMARK_DEFINE(int32_t, LAST);
-using cudf::timestamp_ms;
-COMPACTION_BENCHMARK_DEFINE(timestamp_ms, NONE);
-COMPACTION_BENCHMARK_DEFINE(float, NONE);
+using data_type = nvbench::type_list;
+using keep_option = nvbench::enum_type_list;
+
+NVBENCH_BENCH_TYPES(nvbench_drop_duplicates, NVBENCH_TYPE_AXES(data_type, keep_option))
+ .set_name("drop_duplicates")
+ .set_type_axes_names({"Type", "KeepOption"})
+ .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000});
+
+NVBENCH_BENCH_TYPES(nvbench_unordered_drop_duplicates, NVBENCH_TYPE_AXES(data_type))
+ .set_name("unordered_drop_duplicates")
+ .set_type_axes_names({"Type"})
+ .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000});
diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
index c964c85156c..5a20f78b798 100644
--- a/cpp/cmake/thirdparty/get_cucollections.cmake
+++ b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -1,5 +1,5 @@
# =============================================================================
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
@@ -21,7 +21,7 @@ function(find_and_configure_cucollections)
cuco 0.0
GLOBAL_TARGETS cuco::cuco
CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
- GIT_TAG 0ca860b824f5dc22cf8a41f09912e62e11f07d82
+ GIT_TAG 6ec8b6dcdeceea07ab4456d32461a05c18864411
OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
)
diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
index 87823d71c6f..3d065556827 100644
--- a/cpp/include/cudf/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -75,6 +75,18 @@ std::unique_ptr drop_duplicates(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+/**
+ * @copydoc cudf::unordered_drop_duplicates
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr unordered_drop_duplicates(
+ table_view const& input,
+ std::vector const& keys,
+ null_equality nulls_equal = null_equality::EQUAL,
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
/**
* @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy)
*
@@ -94,5 +106,24 @@ cudf::size_type distinct_count(table_view const& input,
null_equality nulls_equal = null_equality::EQUAL,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
+/**
+ * @copydoc cudf::unordered_distinct_count(column_view const&, null_policy, nan_policy)
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+cudf::size_type unordered_distinct_count(column_view const& input,
+ null_policy null_handling,
+ nan_policy nan_handling,
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default);
+
+/**
+ * @copydoc cudf::unordered_distinct_count(table_view const&, null_equality)
+ *
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ */
+cudf::size_type unordered_distinct_count(table_view const& input,
+ null_equality nulls_equal = null_equality::EQUAL,
+ rmm::cuda_stream_view stream = rmm::cuda_stream_default);
+
} // namespace detail
} // namespace cudf
diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp
index 7551511d281..94039d81f31 100644
--- a/cpp/include/cudf/stream_compaction.hpp
+++ b/cpp/include/cudf/stream_compaction.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -189,7 +189,7 @@ std::unique_ptr drop_nans(
* @note if @p input.num_rows() is zero, there is no error, and an empty table
* is returned.
*
- * @throws cudf::logic_error if The `input` size and `boolean_mask` size mismatches.
+ * @throws cudf::logic_error if `input.num_rows() != boolean_mask.size()`.
* @throws cudf::logic_error if `boolean_mask` is not `type_id::BOOL8` type.
*
* @param[in] input The input table_view to filter
@@ -214,7 +214,10 @@ enum class duplicate_keep_option {
};
/**
- * @brief Create a new table without duplicate rows
+ * @brief Create a new table without duplicate rows.
+ *
+ * The output table is sorted according to the lexicographic ordering of the data in the columns
+ * indexed by `keys`.
*
* Given an `input` table_view, each row is copied to output table if the corresponding
* row of `keys` columns is unique, where the definition of unique depends on the value of @p keep:
@@ -222,18 +225,18 @@ enum class duplicate_keep_option {
* - KEEP_LAST: only the last of a sequence of duplicate rows is copied
* - KEEP_NONE: no duplicate rows are copied
*
- * @throws cudf::logic_error if The `input` row size mismatches with `keys`.
+ * @throws cudf::logic_error if the `keys` column indices are out of bounds in the `input` table.
*
* @param[in] input input table_view to copy only unique rows
* @param[in] keys vector of indices representing key columns from `input`
- * @param[in] keep keep first entry, last entry, or no entries if duplicates found
+ * @param[in] keep keep first row, last row, or no rows of the found duplicates
* @param[in] nulls_equal flag to denote nulls are equal if null_equality::EQUAL, nulls are not
* equal if null_equality::UNEQUAL
* @param[in] null_precedence flag to denote nulls should appear before or after non-null items
* @param[in] mr Device memory resource used to allocate the returned table's device
- * memory
+ * memory
*
- * @return Table with unique rows as per specified `keep`.
+ * @return Table with sorted unique rows as specified by `keep`.
*/
std::unique_ptr drop_duplicates(
table_view const& input,
@@ -244,37 +247,95 @@ std::unique_ptr drop_duplicates(
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/**
- * @brief Count the unique elements in the column_view
+ * @brief Create a new table without duplicate rows with hash-based algorithms.
+ *
+ * Given an `input` table_view, each row is copied to output table if the corresponding
+ * row of `keys` columns is unique. If duplicate rows are present, it is unspecified which
+ * row is copied.
*
- * Given an input column_view, number of unique elements in this column_view is returned
+ * The order of elements in the output table is not specified.
+ *
+ * @param[in] input input table_view to copy only unique rows
+ * @param[in] keys vector of indices representing key columns from `input`
+ * @param[in] nulls_equal flag to denote nulls are equal if null_equality::EQUAL, nulls are not
+ * equal if null_equality::UNEQUAL
+ * @param[in] mr Device memory resource used to allocate the returned table's device
+ * memory
+ *
+ * @return Table with unique rows in an unspecified order.
+ */
+std::unique_ptr unordered_drop_duplicates(
+ table_view const& input,
+ std::vector const& keys,
+ null_equality nulls_equal = null_equality::EQUAL,
+ rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Count the number of consecutive groups of equivalent elements in a column.
*
* If `null_handling` is null_policy::EXCLUDE and `nan_handling` is nan_policy::NAN_IS_NULL, both
* `NaN` and `null` values are ignored. If `null_handling` is null_policy::EXCLUDE and
- * `nan_handling` is nan_policy::NAN_IS_VALID, only `null` is ignored, `NaN` is considered in unique
- * count.
+ * `nan_handling` is nan_policy::NAN_IS_VALID, only `null` is ignored, `NaN` is considered in count.
+ *
+ * `null`s are handled as equal.
*
- * @param[in] input The column_view whose unique elements will be counted.
+ * @param[in] input The column_view whose number of distinct consecutive groups will be counted
* @param[in] null_handling flag to include or ignore `null` while counting
- * @param[in] nan_handling flag to consider `NaN==null` or not.
+ * @param[in] nan_handling flag to consider `NaN==null` or not
*
- * @return number of unique elements
+ * @return number of distinct consecutive groups in the column
*/
cudf::size_type distinct_count(column_view const& input,
null_policy null_handling,
nan_policy nan_handling);
/**
- * @brief Count the unique rows in a table.
- *
+ * @brief Count the number of consecutive groups of equivalent elements in a table.
*
- * @param[in] input Table whose unique rows will be counted.
- * @param[in] nulls_equal flag to denote if null elements should be considered equal
- * nulls are not equal if null_equality::UNEQUAL
+ * @param[in] input Table whose number of distinct consecutive groups will be counted
+ * @param[in] nulls_equal flag to denote if null elements should be considered equal.
+ * nulls are not equal if null_equality::UNEQUAL.
*
- * @return number of unique rows in the table
+ * @return number of distinct consecutive groups in the table
*/
cudf::size_type distinct_count(table_view const& input,
null_equality nulls_equal = null_equality::EQUAL);
+/**
+ * @brief Count the unique elements in the column_view.
+ *
+ * If `nulls_equal == nulls_equal::UNEQUAL`, all `null`s are unique.
+ *
+ * Given an input column_view, number of unique elements in this column_view is returned.
+ *
+ * If `null_handling` is null_policy::EXCLUDE and `nan_handling` is nan_policy::NAN_IS_NULL, both
+ * `NaN` and `null` values are ignored. If `null_handling` is null_policy::EXCLUDE and
+ * `nan_handling` is nan_policy::NAN_IS_VALID, only `null` is ignored, `NaN` is considered in unique
+ * count.
+ *
+ * `null`s are handled as equal.
+ *
+ * @param[in] input The column_view whose unique elements will be counted
+ * @param[in] null_handling flag to include or ignore `null` while counting
+ * @param[in] nan_handling flag to consider `NaN==null` or not
+ *
+ * @return number of unique elements
+ */
+cudf::size_type unordered_distinct_count(column_view const& input,
+ null_policy null_handling,
+ nan_policy nan_handling);
+
+/**
+ * @brief Count the unique rows in a table.
+ *
+ * @param[in] input Table whose unique rows will be counted
+ * @param[in] nulls_equal flag to denote if null elements should be considered equal.
+ * nulls are not equal if null_equality::UNEQUAL.
+ *
+ * @return number of unique rows in the table
+ */
+cudf::size_type unordered_distinct_count(table_view const& input,
+ null_equality nulls_equal = null_equality::EQUAL);
+
/** @} */
} // namespace cudf
diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu
index e3d1ea88ece..96b7fd48dc9 100644
--- a/cpp/src/dictionary/add_keys.cu
+++ b/cpp/src/dictionary/add_keys.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -57,26 +58,29 @@ std::unique_ptr add_keys(
// [a,b,c,d,f] + [d,b,e] = [a,b,c,d,f,d,b,e]
auto combined_keys =
cudf::detail::concatenate(std::vector{old_keys, new_keys}, stream);
- // sort and remove any duplicates from the combined keys
- // drop_duplicates([a,b,c,d,f,d,b,e]) = [a,b,c,d,e,f]
- auto table_keys = cudf::detail::drop_duplicates(table_view{{combined_keys->view()}},
- std::vector{0}, // only one key column
- duplicate_keep_option::KEEP_FIRST,
- null_equality::EQUAL,
- null_order::BEFORE,
- stream,
- mr)
- ->release();
- std::unique_ptr keys_column(std::move(table_keys.front()));
+
+ // Drop duplicates from the combined keys, then sort the result.
+ // sort(unordered_drop_duplicates([a,b,c,d,f,d,b,e])) = [a,b,c,d,e,f]
+ auto table_keys =
+ cudf::detail::unordered_drop_duplicates(table_view{{combined_keys->view()}},
+ std::vector{0}, // only one key column
+ null_equality::EQUAL,
+ stream,
+ mr);
+ std::vector column_order{order::ASCENDING};
+ std::vector null_precedence{null_order::AFTER}; // should be no nulls here
+ auto sorted_keys =
+ cudf::detail::sort(table_keys->view(), column_order, null_precedence, stream, mr)->release();
+
+ std::unique_ptr keys_column(std::move(sorted_keys.front()));
// create a map for the indices
// lower_bound([a,b,c,d,e,f],[a,b,c,d,f]) = [0,1,2,3,5]
- auto map_indices = cudf::detail::lower_bound(
- table_view{{keys_column->view()}},
- table_view{{old_keys}},
- std::vector{order::ASCENDING},
- std::vector{null_order::AFTER}, // should be no nulls here
- stream,
- mr);
+ auto map_indices = cudf::detail::lower_bound(table_view{{keys_column->view()}},
+ table_view{{old_keys}},
+ column_order,
+ null_precedence,
+ stream,
+ mr);
// now create the indices column -- map old values to the new ones
// gather([4,0,3,1,2,2,2,4,0],[0,1,2,3,5]) = [5,0,3,1,2,2,2,5,0]
column_view indices_view(dictionary_column.indices().type(),
diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu
index fd86d8ec7d4..301338fa1a8 100644
--- a/cpp/src/dictionary/detail/concatenate.cu
+++ b/cpp/src/dictionary/detail/concatenate.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -216,15 +217,15 @@ std::unique_ptr concatenate(host_span columns,
// sort keys and remove duplicates;
// this becomes the keys child for the output dictionary column
- auto table_keys = cudf::detail::drop_duplicates(table_view{{all_keys->view()}},
- std::vector{0},
- duplicate_keep_option::KEEP_FIRST,
- null_equality::EQUAL,
- null_order::BEFORE,
- stream,
- mr)
- ->release();
- std::unique_ptr keys_column(std::move(table_keys.front()));
+ auto table_keys = cudf::detail::unordered_drop_duplicates(
+ table_view{{all_keys->view()}}, std::vector{0}, null_equality::EQUAL, stream, mr);
+ auto sorted_keys = cudf::detail::sort(table_keys->view(),
+ std::vector{order::ASCENDING},
+ std::vector{null_order::BEFORE},
+ stream,
+ mr)
+ ->release();
+ std::unique_ptr keys_column(std::move(sorted_keys.front()));
// next, concatenate the indices
std::vector indices_views(columns.size());
diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu
index 72f6e034479..c1fb1fa2180 100644
--- a/cpp/src/dictionary/set_keys.cu
+++ b/cpp/src/dictionary/set_keys.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -120,16 +121,17 @@ std::unique_ptr set_keys(
auto keys = dictionary_column.keys();
CUDF_EXPECTS(keys.type() == new_keys.type(), "keys types must match");
- // copy the keys -- use drop_duplicates to make sure they are sorted and unique
- auto table_keys = cudf::detail::drop_duplicates(table_view{{new_keys}},
- std::vector{0},
- duplicate_keep_option::KEEP_FIRST,
- null_equality::EQUAL,
- null_order::BEFORE,
- stream,
- mr)
- ->release();
- std::unique_ptr keys_column(std::move(table_keys.front()));
+ // copy the keys -- use unordered_drop_duplicates to make sure they are unique, then
+ // sort the results.
+ auto unique_keys = cudf::detail::unordered_drop_duplicates(
+ table_view{{new_keys}}, std::vector{0}, null_equality::EQUAL, stream, mr);
+ auto sorted_keys = cudf::detail::sort(unique_keys->view(),
+ std::vector{order::ASCENDING},
+ std::vector{null_order::BEFORE},
+ stream,
+ mr)
+ ->release();
+ std::unique_ptr keys_column(std::move(sorted_keys.front()));
// compute the new nulls
auto matches = cudf::detail::contains(keys, keys_column->view(), stream, mr);
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index 6f9149a47e2..234eaf51f96 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -93,7 +93,7 @@ struct reduce_dispatch_functor {
case aggregation::NUNIQUE: {
auto nunique_agg = dynamic_cast(agg.get());
return make_fixed_width_scalar(
- detail::distinct_count(
+ detail::unordered_distinct_count(
col, nunique_agg->_null_handling, nan_policy::NAN_IS_VALID, stream),
stream,
mr);
diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu
index 5c695f8a16f..2c7488084b5 100644
--- a/cpp/src/stream_compaction/distinct_count.cu
+++ b/cpp/src/stream_compaction/distinct_count.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,14 +14,18 @@
* limitations under the License.
*/
+#include "stream_compaction_common.cuh"
+#include "stream_compaction_common.hpp"
+
#include
#include
#include
+#include
+#include
#include
#include
#include
#include
-#include
#include
#include
@@ -30,39 +34,19 @@
#include
#include
+#include
+
+#include
+#include
+#include
+#include
#include
namespace cudf {
namespace detail {
-
-cudf::size_type distinct_count(table_view const& keys,
- null_equality nulls_equal,
- rmm::cuda_stream_view stream)
-{
- // sort only indices
- auto sorted_indices = sorted_order(keys,
- std::vector{},
- std::vector{},
- stream,
- rmm::mr::get_current_device_resource());
-
- // count unique elements
- auto sorted_row_index = sorted_indices->view().data();
- auto device_input_table = cudf::table_device_view::create(keys, stream);
-
- row_equality_comparator comp(
- nullate::DYNAMIC{cudf::has_nulls(keys)}, *device_input_table, *device_input_table, nulls_equal);
- return thrust::count_if(
- rmm::exec_policy(stream),
- thrust::counting_iterator(0),
- thrust::counting_iterator(keys.num_rows()),
- [sorted_row_index, comp] __device__(cudf::size_type i) {
- return (i == 0 || not comp(sorted_row_index[i], sorted_row_index[i - 1]));
- });
-}
-
+namespace {
/**
- * @brief Functor to check for `NAN` at an index in a `column_device_view`.
+ * @brief Functor to check for `NaN` at an index in a `column_device_view`.
*
* @tparam T The type of `column_device_view`
*/
@@ -76,97 +60,199 @@ struct check_for_nan {
check_for_nan(cudf::column_device_view input) : _input{input} {}
/**
- * @brief Operator to be called to check for `NAN` at `index` in `_input`
+ * @brief Operator to be called to check for `NaN` at `index` in `_input`
*
- * @param[in] index The index at which the `NAN` needs to be checked in `input`
+ * @param[in] index The index at which the `NaN` needs to be checked in `input`
*
- * @returns bool true if value at `index` is `NAN` and not null, else false
+ * @returns bool true if value at `index` is `NaN` and not null, else false
*/
- __device__ bool operator()(size_type index)
+ __device__ bool operator()(size_type index) const noexcept
{
return std::isnan(_input.data()[index]) and _input.is_valid(index);
}
- protected:
cudf::column_device_view _input;
};
/**
* @brief A structure to be used along with type_dispatcher to check if a
- * `column_view` has `NAN`.
+ * `column_view` has `NaN`.
*/
struct has_nans {
/**
- * @brief Checks if `input` has `NAN`
+ * @brief Checks if `input` has `NaN`
*
* @note This will be applicable only for floating point type columns.
*
- * @param[in] input The `column_view` which will be checked for `NAN`
+ * @param[in] input The `column_view` which will be checked for `NaN`
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*
- * @returns bool true if `input` has `NAN` else false
+ * @returns bool true if `input` has `NaN` else false
*/
- template ::value>* = nullptr>
+ template >* = nullptr>
bool operator()(column_view const& input, rmm::cuda_stream_view stream)
{
auto input_device_view = cudf::column_device_view::create(input, stream);
auto device_view = *input_device_view;
- auto count = thrust::count_if(rmm::exec_policy(stream),
- thrust::counting_iterator(0),
- thrust::counting_iterator(input.size()),
- check_for_nan(device_view));
- return count > 0;
+ return thrust::any_of(rmm::exec_policy(stream),
+ thrust::counting_iterator(0),
+ thrust::counting_iterator(input.size()),
+ check_for_nan(device_view));
}
/**
- * @brief Checks if `input` has `NAN`
+ * @brief Checks if `input` has `NaN`
*
* @note This will be applicable only for non-floating point type columns. And
- * non-floating point columns can never have `NAN`, so it will always return
+ * non-floating point columns can never have `NaN`, so it will always return
* false
*
- * @param[in] input The `column_view` which will be checked for `NAN`
+ * @param[in] input The `column_view` which will be checked for `NaN`
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*
- * @returns bool Always false as non-floating point columns can't have `NAN`
+ * @returns bool Always false as non-floating point columns can't have `NaN`
*/
- template ::value>* = nullptr>
- bool operator()(column_view const& input, rmm::cuda_stream_view stream)
+ template >* = nullptr>
+ bool operator()(column_view const&, rmm::cuda_stream_view)
+ {
+ return false;
+ }
+};
+
+/**
+ * @brief A functor to be used along with device type_dispatcher to check if
+ * the row `index` of `column_device_view` is `NaN`.
+ */
+struct check_nan {
+ // Check if it's `NaN` for floating point type columns
+ template >* = nullptr>
+ __device__ inline bool operator()(column_device_view const& input, size_type index)
+ {
+ return std::isnan(input.data()[index]);
+ }
+ // Non-floating point type columns can never have `NaN`, so it will always return false.
+ template >* = nullptr>
+ __device__ inline bool operator()(column_device_view const&, size_type)
{
return false;
}
};
+} // namespace
+
+cudf::size_type distinct_count(table_view const& keys,
+ null_equality nulls_equal,
+ rmm::cuda_stream_view stream)
+{
+ auto table_ptr = cudf::table_device_view::create(keys, stream);
+ row_equality_comparator comp(
+ nullate::DYNAMIC{cudf::has_nulls(keys)}, *table_ptr, *table_ptr, nulls_equal);
+ return thrust::count_if(
+ rmm::exec_policy(stream),
+ thrust::counting_iterator(0),
+ thrust::counting_iterator(keys.num_rows()),
+ [comp] __device__(cudf::size_type i) { return (i == 0 or not comp(i, i - 1)); });
+}
+
+cudf::size_type unordered_distinct_count(table_view const& keys,
+ null_equality nulls_equal,
+ rmm::cuda_stream_view stream)
+{
+ auto table_ptr = cudf::table_device_view::create(keys, stream);
+ auto const num_rows = table_ptr->num_rows();
+ auto const has_null = nullate::DYNAMIC{cudf::has_nulls(keys)};
+
+ hash_map_type key_map{compute_hash_table_size(num_rows),
+ COMPACTION_EMPTY_KEY_SENTINEL,
+ COMPACTION_EMPTY_VALUE_SENTINEL,
+ detail::hash_table_allocator_type{default_allocator{}, stream},
+ stream.value()};
+
+ compaction_hash hash_key{has_null, *table_ptr};
+ row_equality_comparator row_equal(has_null, *table_ptr, *table_ptr, nulls_equal);
+ auto iter = cudf::detail::make_counting_transform_iterator(
+ 0, [] __device__(size_type i) { return cuco::make_pair(i, i); });
+
+ // when nulls are equal, insert non-null rows only to improve efficiency
+ if (nulls_equal == null_equality::EQUAL and has_null) {
+ thrust::counting_iterator stencil(0);
+ auto const [row_bitmask, null_count] = cudf::detail::bitmask_or(keys, stream);
+ row_validity pred{static_cast(row_bitmask.data())};
+
+ key_map.insert_if(iter, iter + num_rows, stencil, pred, hash_key, row_equal, stream.value());
+ return key_map.get_size() + static_cast((null_count > 0) ? 1 : 0);
+ }
+ // otherwise, insert all
+ key_map.insert(iter, iter + num_rows, hash_key, row_equal, stream.value());
+ return key_map.get_size();
+}
cudf::size_type distinct_count(column_view const& input,
null_policy null_handling,
nan_policy nan_handling,
rmm::cuda_stream_view stream)
{
- if (0 == input.size() || input.null_count() == input.size()) { return 0; }
-
- cudf::size_type nrows = input.size();
-
- bool has_nan = false;
- // Check for Nans
- // Checking for nulls in input and flag nan_handling, as the count will
- // only get affected if these two conditions are true. NAN will only be
- // be an extra if nan_handling was NAN_IS_NULL and input also had null, which
- // will increase the count by 1.
- if (input.has_nulls() and nan_handling == nan_policy::NAN_IS_NULL) {
- has_nan = cudf::type_dispatcher(input.type(), has_nans{}, input, stream);
- }
+ auto const num_rows = input.size();
- auto count = detail::distinct_count(table_view{{input}}, null_equality::EQUAL, stream);
+ if (num_rows == 0 or num_rows == input.null_count()) { return 0; }
- // if nan is considered null and there are already null values
- if (nan_handling == nan_policy::NAN_IS_NULL and has_nan and input.has_nulls()) --count;
+ auto const count_nulls = null_handling == null_policy::INCLUDE;
+ auto const nan_is_null = nan_handling == nan_policy::NAN_IS_NULL;
+ auto const should_check_nan = cudf::is_floating_point(input.type());
+ auto input_device_view = cudf::column_device_view::create(input, stream);
+ auto device_view = *input_device_view;
+ auto input_table_view = table_view{{input}};
+ auto table_ptr = cudf::table_device_view::create(input_table_view, stream);
+ row_equality_comparator comp(nullate::DYNAMIC{cudf::has_nulls(input_table_view)},
+ *table_ptr,
+ *table_ptr,
+ null_equality::EQUAL);
- if (null_handling == null_policy::EXCLUDE and input.has_nulls())
- return --count;
- else
- return count;
+ return thrust::count_if(
+ rmm::exec_policy(stream),
+ thrust::counting_iterator(0),
+ thrust::counting_iterator(num_rows),
+ [count_nulls, nan_is_null, should_check_nan, device_view, comp] __device__(cudf::size_type i) {
+ auto const is_null = device_view.is_null(i);
+ auto const is_nan = nan_is_null and should_check_nan and
+ cudf::type_dispatcher(device_view.type(), check_nan{}, device_view, i);
+ if (not count_nulls and (is_null or (nan_is_null and is_nan))) { return false; }
+ if (i == 0) { return true; }
+ if (count_nulls and nan_is_null and (is_nan or is_null)) {
+ auto const prev_is_nan =
+ should_check_nan and
+ cudf::type_dispatcher(device_view.type(), check_nan{}, device_view, i - 1);
+ return not(prev_is_nan or device_view.is_null(i - 1));
+ }
+ return not comp(i, i - 1);
+ });
}
+cudf::size_type unordered_distinct_count(column_view const& input,
+ null_policy null_handling,
+ nan_policy nan_handling,
+ rmm::cuda_stream_view stream)
+{
+ if (0 == input.size() or input.null_count() == input.size()) { return 0; }
+
+ auto count = detail::unordered_distinct_count(table_view{{input}}, null_equality::EQUAL, stream);
+
+ // Check for nulls. If the null policy is EXCLUDE and null values were found,
+ // we decrement the count.
+ auto const has_null = input.has_nulls();
+ if (null_handling == null_policy::EXCLUDE and has_null) { --count; }
+
+ // Check for NaNs. There are two cases that can lead to decrementing the
+ // count. The first case is when the input has no nulls, but has NaN values
+ // handled as a null via NAN_IS_NULL and has a policy to EXCLUDE null values
+ // from the count. The second case is when the input has null values and NaN
+ // values handled as nulls via NAN_IS_NULL. Regardless of whether the null
+ // policy is set to EXCLUDE, we decrement the count to avoid double-counting
+ // null and NaN as distinct entities.
+ auto const has_nan_as_null = (nan_handling == nan_policy::NAN_IS_NULL) and
+ cudf::type_dispatcher(input.type(), has_nans{}, input, stream);
+ if (has_nan_as_null and (has_null or null_handling == null_policy::EXCLUDE)) { --count; }
+ return count;
+}
} // namespace detail
cudf::size_type distinct_count(column_view const& input,
@@ -183,4 +269,18 @@ cudf::size_type distinct_count(table_view const& input, null_equality nulls_equa
return detail::distinct_count(input, nulls_equal);
}
+cudf::size_type unordered_distinct_count(column_view const& input,
+ null_policy null_handling,
+ nan_policy nan_handling)
+{
+ CUDF_FUNC_RANGE();
+ return detail::unordered_distinct_count(input, null_handling, nan_handling);
+}
+
+cudf::size_type unordered_distinct_count(table_view const& input, null_equality nulls_equal)
+{
+ CUDF_FUNC_RANGE();
+ return detail::unordered_distinct_count(input, nulls_equal);
+}
+
} // namespace cudf
diff --git a/cpp/src/stream_compaction/drop_duplicates.cu b/cpp/src/stream_compaction/drop_duplicates.cu
index abc34663aee..2fd1f530b6d 100644
--- a/cpp/src/stream_compaction/drop_duplicates.cu
+++ b/cpp/src/stream_compaction/drop_duplicates.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,13 +14,16 @@
* limitations under the License.
*/
-#include
+#include "drop_duplicates.cuh"
+#include "stream_compaction_common.cuh"
+#include "stream_compaction_common.hpp"
#include
#include
#include
#include
#include
+#include
#include
#include
#include
@@ -37,6 +40,7 @@
#include
#include
+#include
#include
namespace cudf {
@@ -85,12 +89,12 @@ column_view get_unique_ordered_indices(cudf::table_view const& keys,
auto comp = row_equality_comparator(
nullate::DYNAMIC{cudf::has_nulls(keys)}, *device_input_table, *device_input_table, nulls_equal);
- auto result_end = unique_copy(sorted_indices->view().begin(),
- sorted_indices->view().end(),
- unique_indices.begin(),
- comp,
- keep,
- stream);
+ auto result_end = cudf::detail::unique_copy(sorted_indices->view().begin(),
+ sorted_indices->view().end(),
+ unique_indices.begin(),
+ comp,
+ keep,
+ stream);
return cudf::detail::slice(column_view(unique_indices),
0,
@@ -106,7 +110,7 @@ std::unique_ptr drop_duplicates(table_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
- if (0 == input.num_rows() || 0 == input.num_columns() || 0 == keys.size()) {
+ if (input.num_rows() == 0 or input.num_columns() == 0 or keys.empty()) {
return empty_like(input);
}
@@ -130,6 +134,62 @@ std::unique_ptr drop_duplicates(table_view const& input,
mr);
}
+std::unique_ptr unordered_drop_duplicates(table_view const& input,
+ std::vector const& keys,
+ null_equality nulls_equal,
+ rmm::cuda_stream_view stream,
+ rmm::mr::device_memory_resource* mr)
+{
+ if (input.num_rows() == 0 or input.num_columns() == 0 or keys.empty()) {
+ return empty_like(input);
+ }
+
+ auto keys_view = input.select(keys);
+ auto table_ptr = cudf::table_device_view::create(keys_view, stream);
+ auto has_null = nullate::DYNAMIC{cudf::has_nulls(keys_view)};
+ auto const num_rows{table_ptr->num_rows()};
+
+ hash_map_type key_map{compute_hash_table_size(num_rows),
+ COMPACTION_EMPTY_KEY_SENTINEL,
+ COMPACTION_EMPTY_VALUE_SENTINEL,
+ detail::hash_table_allocator_type{default_allocator{}, stream},
+ stream.value()};
+
+ compaction_hash hash_key{has_null, *table_ptr};
+ row_equality_comparator row_equal(has_null, *table_ptr, *table_ptr, nulls_equal);
+
+ auto iter = cudf::detail::make_counting_transform_iterator(
+ 0, [] __device__(size_type i) { return cuco::make_pair(i, i); });
+ // insert unique indices into the map.
+ key_map.insert(iter, iter + num_rows, hash_key, row_equal, stream.value());
+
+ auto counting_iter = thrust::make_counting_iterator(0);
+ rmm::device_uvector index_exists_in_map(num_rows, stream, mr);
+ // enumerate all indices to check if they are present in the map.
+ key_map.contains(counting_iter, counting_iter + num_rows, index_exists_in_map.begin(), hash_key);
+
+ auto const output_size{key_map.get_size()};
+
+ // write unique indices to a numeric column
+ auto unique_indices = cudf::make_numeric_column(
+ data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED, stream, mr);
+ auto mutable_view = mutable_column_device_view::create(*unique_indices, stream);
+ thrust::copy_if(rmm::exec_policy(stream),
+ counting_iter,
+ counting_iter + num_rows,
+ index_exists_in_map.begin(),
+ mutable_view->begin(),
+ thrust::identity{});
+
+ // run gather operation to establish new order
+ return detail::gather(input,
+ unique_indices->view(),
+ out_of_bounds_policy::DONT_CHECK,
+ detail::negative_index_policy::NOT_ALLOWED,
+ stream,
+ mr);
+}
+
} // namespace detail
std::unique_ptr drop_duplicates(table_view const& input,
@@ -144,4 +204,13 @@ std::unique_ptr drop_duplicates(table_view const& input,
input, keys, keep, nulls_equal, null_precedence, rmm::cuda_stream_default, mr);
}
+std::unique_ptr unordered_drop_duplicates(table_view const& input,
+ std::vector const& keys,
+ null_equality nulls_equal,
+ rmm::mr::device_memory_resource* mr)
+{
+ CUDF_FUNC_RANGE();
+ return detail::unordered_drop_duplicates(input, keys, nulls_equal, rmm::cuda_stream_default, mr);
+}
+
} // namespace cudf
diff --git a/cpp/src/stream_compaction/stream_compaction_common.cuh b/cpp/src/stream_compaction/stream_compaction_common.cuh
new file mode 100644
index 00000000000..8ba9223a1bc
--- /dev/null
+++ b/cpp/src/stream_compaction/stream_compaction_common.cuh
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "stream_compaction_common.hpp"
+
+namespace cudf {
+namespace detail {
+
+/**
+ * @brief Device callable to hash a given row.
+ */
+template
+class compaction_hash {
+ public:
+ compaction_hash(Nullate has_nulls, table_device_view t) : _hash{has_nulls, t} {}
+
+ __device__ inline auto operator()(size_type i) const noexcept
+ {
+ auto hash = _hash(i);
+ return (hash == COMPACTION_EMPTY_KEY_SENTINEL) ? (hash - 1) : hash;
+ }
+
+ private:
+ row_hash _hash;
+};
+
+/**
+ * @brief Device functor to determine if a row is valid.
+ */
+class row_validity {
+ public:
+ row_validity(bitmask_type const* row_bitmask) : _row_bitmask{row_bitmask} {}
+
+ __device__ inline bool operator()(const size_type& i) const noexcept
+ {
+ return cudf::bit_is_set(_row_bitmask, i);
+ }
+
+ private:
+ bitmask_type const* _row_bitmask;
+};
+
+} // namespace detail
+} // namespace cudf
diff --git a/cpp/src/stream_compaction/stream_compaction_common.hpp b/cpp/src/stream_compaction/stream_compaction_common.hpp
new file mode 100644
index 00000000000..1d743eccdbe
--- /dev/null
+++ b/cpp/src/stream_compaction/stream_compaction_common.hpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+
+#include
+
+#include
+
+namespace cudf {
+namespace detail {
+
+constexpr auto COMPACTION_EMPTY_KEY_SENTINEL = std::numeric_limits::max();
+constexpr auto COMPACTION_EMPTY_VALUE_SENTINEL = std::numeric_limits::min();
+
+using hash_type = cuco::detail::MurmurHash3_32;
+
+using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>;
+
+using hash_map_type =
+ cuco::static_map;
+
+using row_hash = cudf::row_hasher;
+
+} // namespace detail
+} // namespace cudf
diff --git a/cpp/src/transform/encode.cu b/cpp/src/transform/encode.cu
index dadeaf7d1e0..405c83ab872 100644
--- a/cpp/src/transform/encode.cu
+++ b/cpp/src/transform/encode.cu
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -30,7 +30,10 @@
#include
#include
+#include
#include
+#include
+#include
namespace cudf {
namespace detail {
@@ -38,29 +41,23 @@ namespace detail {
std::pair, std::unique_ptr> encode(
table_view const& input_table, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr)
{
- std::vector drop_keys(input_table.num_columns());
+ auto const num_cols = input_table.num_columns();
+
+ std::vector drop_keys(num_cols);
std::iota(drop_keys.begin(), drop_keys.end(), 0);
- // side effects of this function we are now dependent on:
- // - resulting column elements are sorted ascending
- // - nulls are sorted to the beginning
- auto keys_table = cudf::detail::drop_duplicates(input_table,
- drop_keys,
- duplicate_keep_option::KEEP_FIRST,
- null_equality::EQUAL,
- null_order::AFTER,
- stream,
- mr);
+ auto unique_keys = cudf::detail::unordered_drop_duplicates(
+ input_table, drop_keys, null_equality::EQUAL, stream, mr);
+
+ std::vector column_order(num_cols, order::ASCENDING);
+ std::vector null_precedence(num_cols, null_order::AFTER);
+ auto sorted_unique_keys =
+ cudf::detail::sort(unique_keys->view(), column_order, null_precedence, stream, mr);
- auto indices_column =
- cudf::detail::lower_bound(keys_table->view(),
- input_table,
- std::vector(input_table.num_columns(), order::ASCENDING),
- std::vector(input_table.num_columns(), null_order::AFTER),
- stream,
- mr);
+ auto indices_column = cudf::detail::lower_bound(
+ sorted_unique_keys->view(), input_table, column_order, null_precedence, stream, mr);
- return std::make_pair(std::move(keys_table), std::move(indices_column));
+ return std::make_pair(std::move(sorted_unique_keys), std::move(indices_column));
}
} // namespace detail
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index e23403e68e4..6b5670630ec 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -1,5 +1,5 @@
# =============================================================================
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
@@ -292,6 +292,7 @@ ConfigureTest(
ConfigureTest(
STREAM_COMPACTION_TEST
stream_compaction/apply_boolean_mask_tests.cpp
+ stream_compaction/distinct_count_tests.cpp
stream_compaction/drop_nulls_tests.cpp
stream_compaction/drop_nans_tests.cpp
stream_compaction/drop_duplicates_tests.cpp
diff --git a/cpp/tests/stream_compaction/distinct_count_tests.cpp b/cpp/tests/stream_compaction/distinct_count_tests.cpp
new file mode 100644
index 00000000000..78b52db5255
--- /dev/null
+++ b/cpp/tests/stream_compaction/distinct_count_tests.cpp
@@ -0,0 +1,370 @@
+/*
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include