Skip to content

Commit

Permalink
Merge pull request #128 from trxcllnt/fea/demo-labels
Browse files Browse the repository at this point in the history
Add Series `fill`, `fillInPlace`, `replaceNulls`, `replaceNaNs`
  • Loading branch information
trxcllnt authored Mar 23, 2021
2 parents 7cef8fc + ebb1204 commit 3ce3371
Show file tree
Hide file tree
Showing 18 changed files with 664 additions and 250 deletions.
6 changes: 6 additions & 0 deletions modules/cudf/src/column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ Napi::Object Column::Init(Napi::Env env, Napi::Object exports) {
InstanceMethod<&Column::set_null_count>("setNullCount"),
// column/copying.cpp
InstanceMethod<&Column::gather>("gather"),
// column/filling.cpp
InstanceMethod<&Column::fill>("fill"),
InstanceMethod<&Column::fill_in_place>("fillInPlace"),
// column/binaryop.cpp
InstanceMethod<&Column::add>("add"),
InstanceMethod<&Column::sub>("sub"),
Expand Down Expand Up @@ -190,6 +193,9 @@ Napi::Object Column::Init(Napi::Env env, Napi::Object exports) {
InstanceMethod<&Column::variance>("var"),
InstanceMethod<&Column::std>("std"),
InstanceMethod<&Column::quantile>("quantile"),
// column/replacement.cpp
InstanceMethod<&Column::replace_nulls>("replaceNulls"),
InstanceMethod<&Column::replace_nans>("replaceNaNs"),
// column/unaryop.cpp
InstanceMethod<&Column::cast>("cast"),
InstanceMethod<&Column::is_null>("isNull"),
Expand Down
42 changes: 42 additions & 0 deletions modules/cudf/src/column.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
Integral,
Numeric,
} from './types/dtypes';
import {ReplacePolicy} from './types/enums';
import {CommonType, Interpolation} from './types/mappings';

export type ColumnProps<T extends DataType = any> = {
Expand Down Expand Up @@ -157,6 +158,47 @@ export interface Column<T extends DataType = any> {
*/
setNullMask(mask: DeviceBuffer, nullCount?: number): void;

/**
* Fills a range of elements in a column out-of-place with a scalar value.
*
* @param begin The starting index of the fill range (inclusive).
* @param end The index of the last element in the fill range (exclusive).
* @param value The scalar value to fill.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
fill(value: Scalar<T>, begin?: number, end?: number, memoryResource?: MemoryResource): Column<T>;

/**
* Fills a range of elements in-place in a column with a scalar value.
*
* @param begin The starting index of the fill range (inclusive)
* @param end The index of the last element in the fill range (exclusive)
* @param value The scalar value to fill
*/
fillInPlace(value: Scalar<T>, begin?: number, end?: number): Column<T>;

/**
* Replace null values with a `Column`, `Scalar`, or the first/last non-null value.
*
* @param value The value to use in place of nulls.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNulls(value: Column<T>, memoryResource?: MemoryResource): Column<T>;
replaceNulls(value: Scalar<T>, memoryResource?: MemoryResource): Column<T>;
replaceNulls(value: ReplacePolicy, memoryResource?: MemoryResource): Column<T>;

/**
* Replace NaN values with a scalar value, or the corresponding elements from another Column.
*
* @param value The value to use in place of NaNs.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNaNs(value: Column<T>, memoryResource?: MemoryResource): Column<T>;
replaceNaNs(value: Scalar<T>, memoryResource?: MemoryResource): Column<T>;

/**
* Add this Column and another Column or scalar value.
*
Expand Down
35 changes: 33 additions & 2 deletions modules/cudf/src/column/filling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,48 @@
// limitations under the License.

#include <node_cudf/column.hpp>
#include <node_cudf/scalar.hpp>

#include <node_rmm/device_buffer.hpp>
#include <nv_node/utilities/wrap.hpp>

#include <napi.h>
#include <cudf/column/column.hpp>
#include <cudf/filling.hpp>
#include <cudf/types.hpp>
#include <rmm/device_buffer.hpp>

#include <napi.h>

namespace nv {

ObjectUnwrap<Column> Column::fill(cudf::size_type begin,
cudf::size_type end,
cudf::scalar const& value,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::fill(*this, begin, end, value, mr));
}

Napi::Value Column::fill(Napi::CallbackInfo const& info) {
CallbackArgs args{info};
auto scalar = Scalar::Unwrap(args[0].ToObject());
cudf::size_type begin = args.Length() > 1 ? args[1] : 0;
cudf::size_type end = args.Length() > 2 ? args[2] : size();
try {
return fill(begin, end, *scalar, args[3]);
} catch (cudf::logic_error const& e) { NAPI_THROW(Napi::Error::New(info.Env(), e.what())); }
}

void Column::fill_in_place(Napi::CallbackInfo const& info) {
CallbackArgs args{info};
auto scalar = Scalar::Unwrap(args[0].ToObject());
cudf::size_type begin = args.Length() > 1 ? args[1] : 0;
cudf::size_type end = args.Length() > 2 ? args[2] : size();
try {
cudf::mutable_column_view view = *this;
cudf::fill_in_place(view, begin, end, *scalar);
} catch (cudf::logic_error const& e) { NAPI_THROW(Napi::Error::New(info.Env(), e.what())); }
}

ObjectUnwrap<Column> Column::sequence(Napi::Env const& env,
cudf::size_type size,
cudf::scalar const& init,
Expand Down Expand Up @@ -72,6 +103,6 @@ Napi::Value Column::sequence(Napi::CallbackInfo const& info) {
rmm::mr::device_memory_resource* mr = args[3];
return Column::sequence(info.Env(), size, init, step, mr);
}
}

} // namespace nv
} // namespace nv
70 changes: 70 additions & 0 deletions modules/cudf/src/column/replace.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright (c) 2021, NVIDIA CORPORATION.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <node_cudf/column.hpp>
#include <node_cudf/scalar.hpp>

#include <cudf/filling.hpp>
#include <cudf/replace.hpp>

namespace nv {

ObjectUnwrap<Column> Column::replace_nulls(cudf::column_view const& replacement,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nulls(*this, replacement, mr));
}

ObjectUnwrap<Column> Column::replace_nulls(cudf::scalar const& replacement,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nulls(*this, replacement, mr));
}

ObjectUnwrap<Column> Column::replace_nulls(cudf::replace_policy const& replace_policy,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nulls(*this, replace_policy, mr));
}

ObjectUnwrap<Column> Column::replace_nans(cudf::column_view const& replacement,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nans(*this, replacement, mr));
}

ObjectUnwrap<Column> Column::replace_nans(cudf::scalar const& replacement,
rmm::mr::device_memory_resource* mr) {
return Column::New(cudf::replace_nans(*this, replacement, mr));
}

Napi::Value Column::replace_nulls(Napi::CallbackInfo const& info) {
CallbackArgs args{info};
try {
if (Column::is_instance(info[0])) { return replace_nulls(*Column::Unwrap(args[0]), args[1]); }
if (Scalar::is_instance(info[0])) { return replace_nulls(*Scalar::Unwrap(args[0]), args[1]); }
if (args[0].IsBoolean()) {
cudf::replace_policy policy{args[0]};
return replace_nulls(policy, args[1]);
}
} catch (cudf::logic_error const& e) { NAPI_THROW(Napi::Error::New(info.Env(), e.what())); }
throw Napi::Error::New(info.Env(), "replace_nulls requires a Column, Scalar, or Boolean");
}

Napi::Value Column::replace_nans(Napi::CallbackInfo const& info) {
CallbackArgs args{info};
try {
if (Column::is_instance(info[0])) { return replace_nans(*Column::Unwrap(args[0]), args[1]); }
if (Scalar::is_instance(info[0])) { return replace_nans(*Scalar::Unwrap(args[0]), args[1]); }
} catch (cudf::logic_error const& e) { NAPI_THROW(Napi::Error::New(info.Env(), e.what())); }
throw Napi::Error::New(info.Env(), "replace_nans requires a Column or Scalar");
}

} // namespace nv
2 changes: 2 additions & 0 deletions modules/cudf/src/column_accessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ export class ColumnAccessor<T extends TypeMap = any> {

get length() { return this._labels_to_indices.size; }

has(name: string) { return name in this._data; }

get<R extends keyof T>(name: R) {
if (!(name in this._data)) { throw new Error(`Unknown column name: ${name.toString()}`); }
return this._data[name];
Expand Down
7 changes: 7 additions & 0 deletions modules/cudf/src/data_frame.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ export class DataFrame<T extends TypeMap = any> {
*/
drop<R extends keyof T>(names: R[]) { return new DataFrame(this._accessor.dropColumns(names)); }

/**
* Return whether the DataFrame has a Series.
*
* @param name Name of the Series to return.
*/
has(name: string) { return this._accessor.has(name); }

/**
* Return a series by name.
*
Expand Down
38 changes: 38 additions & 0 deletions modules/cudf/src/node_cudf/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
#include <nv_node/utilities/wrap.hpp>

#include <napi.h>

#include <cudf/aggregation.hpp>
#include <cudf/binaryop.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/copying.hpp>
#include <cudf/replace.hpp>
#include <cudf/types.hpp>
#include <cudf/unary.hpp>
#include <rmm/device_buffer.hpp>
Expand Down Expand Up @@ -620,6 +622,34 @@ class Column : public Napi::ObjectWrap<Column> {
cudf::out_of_bounds_policy bounds_policy = cudf::out_of_bounds_policy::DONT_CHECK,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;

// column/filling.cpp
ObjectUnwrap<Column> fill(
cudf::size_type begin,
cudf::size_type end,
cudf::scalar const& value,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// column/replace.cpp
ObjectUnwrap<Column> replace_nulls(
cudf::column_view const& replacement,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

ObjectUnwrap<Column> replace_nulls(
cudf::scalar const& replacement,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

ObjectUnwrap<Column> replace_nulls(
cudf::replace_policy const& replace_policy,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

ObjectUnwrap<Column> replace_nans(
cudf::column_view const& replacement,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

ObjectUnwrap<Column> replace_nans(
cudf::scalar const& replacement,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// column/unaryop.cpp
ObjectUnwrap<Column> cast(
cudf::data_type out_type,
Expand Down Expand Up @@ -723,6 +753,10 @@ class Column : public Napi::ObjectWrap<Column> {
Napi::Value null_max(Napi::CallbackInfo const& info);
Napi::Value null_min(Napi::CallbackInfo const& info);

// column/filling.cpp
Napi::Value fill(Napi::CallbackInfo const& info);
void fill_in_place(Napi::CallbackInfo const& info);

// column/stream_compaction.cpp
Napi::Value drop_nulls(Napi::CallbackInfo const& info);
Napi::Value drop_nans(Napi::CallbackInfo const& info);
Expand All @@ -749,6 +783,10 @@ class Column : public Napi::ObjectWrap<Column> {
Napi::Value std(Napi::CallbackInfo const& info);
Napi::Value quantile(Napi::CallbackInfo const& info);

// column/replace.cpp
Napi::Value replace_nulls(Napi::CallbackInfo const& info);
Napi::Value replace_nans(Napi::CallbackInfo const& info);

// column/unaryop.cpp
Napi::Value cast(Napi::CallbackInfo const& info);
Napi::Value is_null(Napi::CallbackInfo const& info);
Expand Down
71 changes: 70 additions & 1 deletion modules/cudf/src/series.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ import {
Uint8,
Utf8String,
} from './types/dtypes';
import {NullOrder} from './types/enums';
import {
NullOrder,
ReplacePolicy,
} from './types/enums';
import {ArrowToCUDFType, arrowToCUDFType} from './types/mappings';

export type SeriesProps<T extends DataType = any> = {
Expand Down Expand Up @@ -193,6 +196,72 @@ export class AbstractSeries<T extends DataType = any> {
*/
get numChildren() { return this._col.numChildren; }

/**
* Fills a range of elements in a column out-of-place with a scalar value.
*
* @param begin The starting index of the fill range (inclusive).
* @param end The index of the last element in the fill range (exclusive).
* @param value The scalar value to fill.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
fill(value: T, begin = 0, end = this.length, memoryResource?: MemoryResource): Series<T> {
return Series.new(
this._col.fill(new Scalar({type: this.type, value}), begin, end, memoryResource));
}

/**
* Fills a range of elements in-place in a column with a scalar value.
*
* @param begin The starting index of the fill range (inclusive)
* @param end The index of the last element in the fill range (exclusive)
* @param value The scalar value to fill
*/
fillInPlace(value: T, begin = 0, end = this.length) {
this._col.fillInPlace(new Scalar({type: this.type, value}), begin, end);
return this;
}

/**
* Replace null values with a scalar value.
*
* @param value The scalar value to use in place of nulls.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNulls(value: T['scalarType'], memoryResource?: MemoryResource): Series<T>;

/**
* Replace null values with the corresponding elements from another Series.
*
* @param value The Series to use in place of nulls.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNulls(value: Series<T>, memoryResource?: MemoryResource): Series<T>;

/**
* Replace null values with the closest non-null value before or after each null.
*
* @param value The {@link ReplacePolicy} indicating the side to search for the closest non-null
* value.
* @param memoryResource The optional MemoryResource used to allocate the result Column's device
* memory.
*/
replaceNulls(value: keyof typeof ReplacePolicy, memoryResource?: MemoryResource): Series<T>;

replaceNulls(value: any, memoryResource?: MemoryResource): Series<T> {
if (value instanceof Series) {
return Series.new(this._col.replaceNulls(value._col, memoryResource));
} else if (value in ReplacePolicy) {
return Series.new(
this._col.replaceNulls(ReplacePolicy[value as keyof typeof ReplacePolicy], memoryResource));
} else {
return Series.new(
this._col.replaceNulls(new Scalar({type: this.type, value}), memoryResource));
}
}

/**
* Return a sub-selection of this Series using the specified integral indices.
*
Expand Down
Loading

0 comments on commit 3ce3371

Please sign in to comment.