Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix GpuSize #1981

Merged
merged 2 commits into from
Mar 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/supported_ops.md
Original file line number Diff line number Diff line change
Expand Up @@ -12329,8 +12329,8 @@ Accelerator support is described below.
<td> </td>
<td> </td>
<td> </td>
<td>S*</td>
<td>S*</td>
<td><em>PS* (missing nested BINARY, CALENDAR, UDT)</em></td>
<td><em>PS* (missing nested BINARY, CALENDAR, UDT)</em></td>
<td> </td>
<td> </td>
</tr>
Expand Down
6 changes: 5 additions & 1 deletion integration_tests/src/main/python/collection_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
from data_gen import *
from pyspark.sql.types import *

@pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
nested_gens = [ArrayGen(LongGen()),
StructGen([("a", LongGen())]),
MapGen(StringGen(pattern='key_[0-9]', nullable=False), StringGen())]

@pytest.mark.parametrize('data_gen', all_gen + nested_gens, ids=idfn)
@pytest.mark.parametrize('size_of_null', ['true', 'false'], ids=idfn)
def test_size_of_array(data_gen, size_of_null):
gen = ArrayGen(data_gen)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2339,7 +2339,8 @@ object GpuOverrides {
expr[Size](
"The size of an array or a map",
ExprChecks.unaryProjectNotLambda(TypeSig.INT, TypeSig.INT,
(TypeSig.ARRAY + TypeSig.MAP).nested(TypeSig.all),
(TypeSig.ARRAY + TypeSig.MAP).nested(TypeSig.commonCudfTypes + TypeSig.NULL
+ TypeSig.DECIMAL + TypeSig.ARRAY + TypeSig.STRUCT + TypeSig.MAP),
(TypeSig.ARRAY + TypeSig.MAP).nested(TypeSig.all)),
(a, conf, p, r) => new UnaryExprMeta[Size](a, conf, p, r) {
override def convertToGpu(child: Expression): GpuExpression =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,40 +31,18 @@ case class GpuSize(child: Expression, legacySizeOfNull: Boolean)
override def nullable: Boolean = if (legacySizeOfNull) false else super.nullable

override protected def doColumnar(input: GpuColumnVector): ColumnVector = {
val inputBase = input.getBase
if (inputBase.getRowCount == 0) {
return GpuColumnVector.from(GpuScalar.from(0), 0, IntegerType).getBase
}

// Compute sizes of cuDF.ListType to get sizes of each ArrayData or MapData, considering
// MapData is represented as List of Struct in terms of cuDF.
// We compute list size via subtracting the offset of next element(row) to the current offset.
val collectionSize = {
// Here is a hack: using index -1 to fetch the offset column of list.
// In terms of cuDF native, the offset is the first (index 0) child of list_column_view.
// In JNI layer, we add 1 to the child index when fetching child column of ListType to keep
// alignment.
// So, in JVM layer, we have to use -1 as index to fetch the real first child of list_column.
withResource(inputBase.getChildColumnView(-1)) { offset =>
withResource(offset.subVector(1)) { upBound =>
withResource(offset.subVector(0, offset.getRowCount.toInt - 1)) { lowBound =>
upBound.sub(lowBound)
withResource(input.getBase.countElements()) { collectionSize =>
if (legacySizeOfNull) {
withResource(GpuScalar.from(-1)) { nullScalar =>
withResource(input.getBase.isNull) { inputIsNull =>
inputIsNull.ifElse(nullScalar, collectionSize)
}
}
}
}

val nullScalar = if (legacySizeOfNull) {
GpuScalar.from(-1)
} else {
GpuScalar.from(null, IntegerType)
}

withResource(collectionSize) { collectionSize =>
withResource(nullScalar) { nullScalar =>
withResource(inputBase.isNull) { inputIsNull =>
inputIsNull.ifElse(nullScalar, collectionSize)
}
} else {
collectionSize.incRefCount()
}
}
}
Expand Down