Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Series fill, fillInPlace, replaceNulls, replaceNaNs #128

Merged
merged 11 commits into from
Mar 23, 2021
Merged
6 changes: 6 additions & 0 deletions modules/cudf/src/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ Napi::Object Column::Init(Napi::Env env, Napi::Object exports) {
InstanceMethod<&Column::set_null_count>("setNullCount"),
// column/copying.cpp
InstanceMethod<&Column::gather>("gather"),
// column/filling.cpp
InstanceMethod<&Column::fill>("fill"),
InstanceMethod<&Column::fill_in_place>("fillInPlace"),
// column/binaryop.cpp
InstanceMethod<&Column::add>("add"),
InstanceMethod<&Column::sub>("sub"),
Expand Down Expand Up @@ -190,6 +193,9 @@ Napi::Object Column::Init(Napi::Env env, Napi::Object exports) {
InstanceMethod<&Column::variance>("var"),
InstanceMethod<&Column::std>("std"),
InstanceMethod<&Column::quantile>("quantile"),
// column/replacement.cpp
InstanceMethod<&Column::replace_nulls>("replaceNulls"),
InstanceMethod<&Column::replace_nans>("replaceNaNs"),
// column/unaryop.cpp
InstanceMethod<&Column::cast>("cast"),
InstanceMethod<&Column::is_null>("isNull"),
Expand Down
42 changes: 42 additions & 0 deletions modules/cudf/src/column.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
Integral,
Numeric,
} from './types/dtypes';
import {ReplacePolicy} from './types/enums';
import {CommonType, Interpolation} from './types/mappings';

export type ColumnProps<T extends DataType = any> = {
Expand Down Expand Up @@ -157,6 +158,47 @@ export interface Column<T extends DataType = any> {
*/
setNullMask(mask: DeviceBuffer, nullCount?: number): void;

/**
* Fills a range of elements in a column out-of-place with a scalar value.
*
* @param begin The starting index of the fill range (inclusive).
* @param end The index of the last element in the fill range (exclusive).
* @param value The scalar value to fill.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
fill(value: Scalar<T>, begin?: number, end?: number, memoryResource?: MemoryResource): Column<T>;

/**
* Fills a range of elements in-place in a column with a scalar value.
*
* @param begin The starting index of the fill range (inclusive)
* @param end The index of the last element in the fill range (exclusive)
* @param value The scalar value to fill
*/
fillInPlace(value: Scalar<T>, begin?: number, end?: number): Column<T>;

/**
* Replace null values with a `Column`, `Scalar`, or the first/last non-null value.
*
* @param value The value to use in place of nulls.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNulls(value: Column<T>, memoryResource?: MemoryResource): Column<T>;
replaceNulls(value: Scalar<T>, memoryResource?: MemoryResource): Column<T>;
replaceNulls(value: ReplacePolicy, memoryResource?: MemoryResource): Column<T>;

/**
* Replace NaN values with a scalar value, or the corresponding elements from another Column.
*
* @param value The value to use in place of NaNs.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNaNs(value: Column<T>, memoryResource?: MemoryResource): Column<T>;
replaceNaNs(value: Scalar<T>, memoryResource?: MemoryResource): Column<T>;

/**
* Add this Column and another Column or scalar value.
*
Expand Down
35 changes: 33 additions & 2 deletions modules/cudf/src/column/filling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,48 @@
// limitations under the License.

#include <node_cudf/column.hpp>
#include <node_cudf/scalar.hpp>

#include <node_rmm/device_buffer.hpp>
#include <nv_node/utilities/wrap.hpp>

#include <napi.h>
#include <cudf/column/column.hpp>
#include <cudf/filling.hpp>
#include <cudf/types.hpp>
#include <rmm/device_buffer.hpp>

#include <napi.h>

namespace nv {

ObjectUnwrap<Column> Column::fill(cudf::size_type begin,
cudf::size_type end,
cudf::scalar const& value,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::fill(*this, begin, end, value, mr));
}

Napi::Value Column::fill(Napi::CallbackInfo const& info) {
CallbackArgs args{info};
auto scalar = Scalar::Unwrap(args[0].ToObject());
cudf::size_type begin = args.Length() > 1 ? args[1] : 0;
cudf::size_type end = args.Length() > 2 ? args[2] : size();
try {
return fill(begin, end, *scalar, args[3]);
} catch (cudf::logic_error const& e) { NAPI_THROW(Napi::Error::New(info.Env(), e.what())); }
}

void Column::fill_in_place(Napi::CallbackInfo const& info) {
CallbackArgs args{info};
auto scalar = Scalar::Unwrap(args[0].ToObject());
cudf::size_type begin = args.Length() > 1 ? args[1] : 0;
cudf::size_type end = args.Length() > 2 ? args[2] : size();
try {
cudf::mutable_column_view view = *this;
cudf::fill_in_place(view, begin, end, *scalar);
} catch (cudf::logic_error const& e) { NAPI_THROW(Napi::Error::New(info.Env(), e.what())); }
}

ObjectUnwrap<Column> Column::sequence(Napi::Env const& env,
cudf::size_type size,
cudf::scalar const& init,
Expand Down Expand Up @@ -72,6 +103,6 @@ Napi::Value Column::sequence(Napi::CallbackInfo const& info) {
rmm::mr::device_memory_resource* mr = args[3];
return Column::sequence(info.Env(), size, init, step, mr);
}
}

} // namespace nv
} // namespace nv
70 changes: 70 additions & 0 deletions modules/cudf/src/column/replace.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright (c) 2021, NVIDIA CORPORATION.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <node_cudf/column.hpp>
#include <node_cudf/scalar.hpp>

#include <cudf/filling.hpp>
#include <cudf/replace.hpp>

namespace nv {

ObjectUnwrap<Column> Column::replace_nulls(cudf::column_view const& replacement,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nulls(*this, replacement, mr));
}

ObjectUnwrap<Column> Column::replace_nulls(cudf::scalar const& replacement,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nulls(*this, replacement, mr));
}

ObjectUnwrap<Column> Column::replace_nulls(cudf::replace_policy const& replace_policy,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nulls(*this, replace_policy, mr));
}

ObjectUnwrap<Column> Column::replace_nans(cudf::column_view const& replacement,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nans(*this, replacement, mr));
}

ObjectUnwrap<Column> Column::replace_nans(cudf::scalar const& replacement,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nans(*this, replacement, mr));
}

Napi::Value Column::replace_nulls(Napi::CallbackInfo const& info) {
CallbackArgs args{info};
try {
if (Column::is_instance(info[0])) { return replace_nulls(*Column::Unwrap(args[0]), args[1]); }
if (Scalar::is_instance(info[0])) { return replace_nulls(*Scalar::Unwrap(args[0]), args[1]); }
if (args[0].IsBoolean()) {
cudf::replace_policy policy{args[0]};
return replace_nulls(policy, args[1]);
}
} catch (cudf::logic_error const& e) { NAPI_THROW(Napi::Error::New(info.Env(), e.what())); }
throw Napi::Error::New(info.Env(), "replace_nulls requires a Column, Scalar, or Boolean");
}

Napi::Value Column::replace_nans(Napi::CallbackInfo const& info) {
CallbackArgs args{info};
try {
if (Column::is_instance(info[0])) { return replace_nans(*Column::Unwrap(args[0]), args[1]); }
if (Scalar::is_instance(info[0])) { return replace_nans(*Scalar::Unwrap(args[0]), args[1]); }
} catch (cudf::logic_error const& e) { NAPI_THROW(Napi::Error::New(info.Env(), e.what())); }
throw Napi::Error::New(info.Env(), "replace_nans requires a Column or Scalar");
}

} // namespace nv
2 changes: 2 additions & 0 deletions modules/cudf/src/column_accessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ export class ColumnAccessor<T extends TypeMap = any> {

get length() { return this._labels_to_indices.size; }

has(name: string) { return name in this._data; }

get<R extends keyof T>(name: R) {
if (!(name in this._data)) { throw new Error(`Unknown column name: ${name.toString()}`); }
return this._data[name];
Expand Down
7 changes: 7 additions & 0 deletions modules/cudf/src/data_frame.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ export class DataFrame<T extends TypeMap = any> {
*/
drop<R extends keyof T>(names: R[]) { return new DataFrame(this._accessor.dropColumns(names)); }

/**
* Return whether the DataFrame has a Series.
*
* @param name Name of the Series to return.
*/
has(name: string) { return this._accessor.has(name); }

/**
* Return a series by name.
*
Expand Down
38 changes: 38 additions & 0 deletions modules/cudf/src/node_cudf/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
#include <nv_node/utilities/wrap.hpp>

#include <napi.h>

#include <cudf/aggregation.hpp>
#include <cudf/binaryop.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/copying.hpp>
#include <cudf/replace.hpp>
#include <cudf/types.hpp>
#include <cudf/unary.hpp>
#include <rmm/device_buffer.hpp>
Expand Down Expand Up @@ -620,6 +622,34 @@ class Column : public Napi::ObjectWrap<Column> {
cudf::out_of_bounds_policy bounds_policy = cudf::out_of_bounds_policy::DONT_CHECK,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;

// column/filling.cpp
ObjectUnwrap<Column> fill(
cudf::size_type begin,
cudf::size_type end,
cudf::scalar const& value,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// column/replace.cpp
ObjectUnwrap<Column> replace_nulls(
cudf::column_view const& replacement,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

ObjectUnwrap<Column> replace_nulls(
cudf::scalar const& replacement,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

ObjectUnwrap<Column> replace_nulls(
cudf::replace_policy const& replace_policy,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

ObjectUnwrap<Column> replace_nans(
cudf::column_view const& replacement,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

ObjectUnwrap<Column> replace_nans(
cudf::scalar const& replacement,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// column/unaryop.cpp
ObjectUnwrap<Column> cast(
cudf::data_type out_type,
Expand Down Expand Up @@ -723,6 +753,10 @@ class Column : public Napi::ObjectWrap<Column> {
Napi::Value null_max(Napi::CallbackInfo const& info);
Napi::Value null_min(Napi::CallbackInfo const& info);

// column/filling.cpp
Napi::Value fill(Napi::CallbackInfo const& info);
void fill_in_place(Napi::CallbackInfo const& info);

// column/stream_compaction.cpp
Napi::Value drop_nulls(Napi::CallbackInfo const& info);
Napi::Value drop_nans(Napi::CallbackInfo const& info);
Expand All @@ -749,6 +783,10 @@ class Column : public Napi::ObjectWrap<Column> {
Napi::Value std(Napi::CallbackInfo const& info);
Napi::Value quantile(Napi::CallbackInfo const& info);

// column/replace.cpp
Napi::Value replace_nulls(Napi::CallbackInfo const& info);
Napi::Value replace_nans(Napi::CallbackInfo const& info);

// column/unaryop.cpp
Napi::Value cast(Napi::CallbackInfo const& info);
Napi::Value is_null(Napi::CallbackInfo const& info);
Expand Down
71 changes: 70 additions & 1 deletion modules/cudf/src/series.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ import {
Uint8,
Utf8String,
} from './types/dtypes';
import {NullOrder} from './types/enums';
import {
NullOrder,
ReplacePolicy,
} from './types/enums';
import {ArrowToCUDFType, arrowToCUDFType} from './types/mappings';

export type SeriesProps<T extends DataType = any> = {
Expand Down Expand Up @@ -193,6 +196,72 @@ export class AbstractSeries<T extends DataType = any> {
*/
get numChildren() { return this._col.numChildren; }

/**
* Fills a range of elements in a column out-of-place with a scalar value.
*
* @param begin The starting index of the fill range (inclusive).
* @param end The index of the last element in the fill range (exclusive).
* @param value The scalar value to fill.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
fill(value: T, begin = 0, end = this.length, memoryResource?: MemoryResource): Series<T> {
return Series.new(
this._col.fill(new Scalar({type: this.type, value}), begin, end, memoryResource));
}

/**
* Fills a range of elements in-place in a column with a scalar value.
*
* @param begin The starting index of the fill range (inclusive)
* @param end The index of the last element in the fill range (exclusive)
* @param value The scalar value to fill
*/
fillInPlace(value: T, begin = 0, end = this.length) {
this._col.fillInPlace(new Scalar({type: this.type, value}), begin, end);
return this;
}

/**
* Replace null values with a scalar value.
*
* @param value The scalar value to use in place of nulls.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNulls(value: T['scalarType'], memoryResource?: MemoryResource): Series<T>;

/**
* Replace null values with the corresponding elements from another Series.
*
* @param value The Series to use in place of nulls.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNulls(value: Series<T>, memoryResource?: MemoryResource): Series<T>;

/**
* Replace null values with the closest non-null value before or after each null.
*
* @param value The {@link ReplacePolicy} indicating the side to search for the closest non-null
* value.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNulls(value: keyof typeof ReplacePolicy, memoryResource?: MemoryResource): Series<T>;

replaceNulls(value: any, memoryResource?: MemoryResource): Series<T> {
if (value instanceof Series) {
return Series.new(this._col.replaceNulls(value._col, memoryResource));
} else if (value in ReplacePolicy) {
return Series.new(
this._col.replaceNulls(ReplacePolicy[value as keyof typeof ReplacePolicy], memoryResource));
} else {
return Series.new(
this._col.replaceNulls(new Scalar({type: this.type, value}), memoryResource));
}
}

/**
* Return a sub-selection of this Series using the specified integral indices.
*
Expand Down
Loading