diff --git a/modules/cudf/src/node_cudf/utilities/cpp_to_napi.hpp b/modules/cudf/src/node_cudf/utilities/cpp_to_napi.hpp index 6d1a980f2..370350207 100644 --- a/modules/cudf/src/node_cudf/utilities/cpp_to_napi.hpp +++ b/modules/cudf/src/node_cudf/utilities/cpp_to_napi.hpp @@ -14,6 +14,8 @@ #pragma once +#include "scalar_to_value.hpp" + #include #include @@ -82,87 +84,6 @@ inline Napi::Value CPPToNapi::operator()(cudf::timestamp_ns const& val) const { return (*this)(val.time_since_epoch()); } -namespace detail { - -struct get_scalar_value { - Napi::Env env; - template - inline std::enable_if_t(), Napi::Value> operator()( - std::unique_ptr const& scalar, cudaStream_t stream = 0) { - if (!scalar->is_valid(stream)) { return env.Null(); } - switch (scalar->type().id()) { - case cudf::type_id::INT64: - return Napi::BigInt::New( - env, static_cast*>(scalar.get())->value(stream)); - case cudf::type_id::UINT64: - return Napi::BigInt::New( - env, static_cast*>(scalar.get())->value(stream)); - default: - return Napi::Number::New( - env, static_cast*>(scalar.get())->value(stream)); - } - } - template - inline std::enable_if_t(), Napi::Value> operator()( - std::unique_ptr const& scalar, cudaStream_t stream = 0) { - return scalar->is_valid(stream) - ? Napi::Number::New(env, - static_cast*>(scalar.get())->value(stream)) - : env.Null(); - } - template - inline std::enable_if_t::value, Napi::Value> operator()( - std::unique_ptr const& scalar, cudaStream_t stream = 0) { - return scalar->is_valid(stream) - ? Napi::Boolean::New( - env, static_cast*>(scalar.get())->value(stream)) - : env.Null(); - } - template - inline std::enable_if_t::value, Napi::Value> operator()( - std::unique_ptr const& scalar, cudaStream_t stream = 0) { - return scalar->is_valid(stream) - ? CPPToNapi(env)(static_cast(scalar.get())->to_string(stream)) - : env.Null(); - } - template - inline std::enable_if_t(), Napi::Value> operator()( - std::unique_ptr const& scalar, cudaStream_t stream = 0) { - return scalar->is_valid(stream) - ? CPPToNapi(env)(static_cast*>(scalar.get())->value(stream)) - : env.Null(); - } - template - inline std::enable_if_t(), Napi::Value> operator()( - std::unique_ptr const& scalar, cudaStream_t stream = 0) { - return scalar->is_valid(stream) - ? CPPToNapi(env)(static_cast*>(scalar.get())->value(stream)) - : env.Null(); - } - template - inline std::enable_if_t(), Napi::Value> operator()( - std::unique_ptr const& scalar, cudaStream_t stream = 0) { - return scalar->is_valid(stream) - ? CPPToNapi(env)( - static_cast*>(scalar.get())->value(stream)) - : env.Null(); - } - template - inline std::enable_if_t() || // - cudf::is_floating_point() || // - std::is_same::value || // - std::is_same::value || // - cudf::is_duration() || // - cudf::is_timestamp() || // - cudf::is_fixed_point()), - Napi::Value> - operator()(std::unique_ptr const& scalar, cudaStream_t stream = 0) { - NAPI_THROW(Napi::Error::New(env, "Unsupported dtype")); - } -}; - -} // namespace detail - template <> inline Napi::Value CPPToNapi::operator()(std::unique_ptr const& scalar) const { return cudf::type_dispatcher(scalar->type(), detail::get_scalar_value{Env()}, scalar); @@ -227,9 +148,4 @@ inline Value Value::From(napi_env env, cudf::timestamp_ns const& val) { return Value::From(env, val.time_since_epoch()); } -template <> -inline Value Value::From(napi_env env, std::unique_ptr const& scalar) { - return cudf::type_dispatcher(scalar->type(), nv::detail::get_scalar_value{env}, scalar); -} - } // namespace Napi diff --git a/modules/cudf/src/node_cudf/utilities/scalar_to_value.hpp b/modules/cudf/src/node_cudf/utilities/scalar_to_value.hpp new file mode 100644 index 000000000..d311e885e --- /dev/null +++ b/modules/cudf/src/node_cudf/utilities/scalar_to_value.hpp @@ -0,0 +1,132 @@ +// Copyright (c) 2021, NVIDIA CORPORATION. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace nv { +namespace detail { + +struct get_scalar_value { + Napi::Env env; + template + inline std::enable_if_t(), Napi::Value> operator()( + std::unique_ptr const& scalar, cudaStream_t stream = 0) { + if (!scalar->is_valid(stream)) { return env.Null(); } + switch (scalar->type().id()) { + case cudf::type_id::INT64: + return Napi::BigInt::New( + env, static_cast*>(scalar.get())->value(stream)); + case cudf::type_id::UINT64: + return Napi::BigInt::New( + env, static_cast*>(scalar.get())->value(stream)); + default: + return Napi::Number::New( + env, static_cast*>(scalar.get())->value(stream)); + } + } + template + inline std::enable_if_t(), Napi::Value> operator()( + std::unique_ptr const& scalar, cudaStream_t stream = 0) { + return scalar->is_valid(stream) + ? Napi::Number::New(env, + static_cast*>(scalar.get())->value(stream)) + : env.Null(); + } + template + inline std::enable_if_t::value, Napi::Value> operator()( + std::unique_ptr const& scalar, cudaStream_t stream = 0) { + return scalar->is_valid(stream) + ? Napi::Boolean::New( + env, static_cast*>(scalar.get())->value(stream)) + : env.Null(); + } + template + inline std::enable_if_t::value, Napi::Value> operator()( + std::unique_ptr const& scalar, cudaStream_t stream = 0) { + return scalar->is_valid(stream) + ? CPPToNapi(env)(static_cast(scalar.get())->to_string(stream)) + : env.Null(); + } + template + inline std::enable_if_t(), Napi::Value> operator()( + std::unique_ptr const& scalar, cudaStream_t stream = 0) { + return scalar->is_valid(stream) + ? CPPToNapi(env)(static_cast*>(scalar.get())->value(stream)) + : env.Null(); + } + template + inline std::enable_if_t(), Napi::Value> operator()( + std::unique_ptr const& scalar, cudaStream_t stream = 0) { + return scalar->is_valid(stream) + ? CPPToNapi(env)(static_cast*>(scalar.get())->value(stream)) + : env.Null(); + } + template + inline std::enable_if_t(), Napi::Value> operator()( + std::unique_ptr const& scalar, cudaStream_t stream = 0) { + return scalar->is_valid(stream) + ? CPPToNapi(env)( + static_cast*>(scalar.get())->value(stream)) + : env.Null(); + } + template + inline std::enable_if_t::value, Napi::Value> operator()( + std::unique_ptr const& scalar, cudaStream_t stream = 0) { + return scalar->is_valid(stream) + ? Column::New(env, + std::make_unique( + // The list_scalar's view is copied here because its underlying column + // cannot be moved. + static_cast(scalar.get())->view(), + stream)) + : env.Null(); + } + template + inline std::enable_if_t() || // + cudf::is_floating_point() || // + std::is_same::value || // + std::is_same::value || // + cudf::is_duration() || // + cudf::is_timestamp() || // + cudf::is_fixed_point() || // + std::is_same::value), + Napi::Value> + operator()(std::unique_ptr const& scalar, cudaStream_t stream = 0) { + NAPI_THROW(Napi::Error::New(env, "Unsupported dtype")); + } +}; + +} // namespace detail + +} // namespace nv + +namespace Napi { + +template <> +inline Value Value::From(napi_env env, std::unique_ptr const& scalar) { + return cudf::type_dispatcher(scalar->type(), nv::detail::get_scalar_value{env}, scalar); +} + +} // namespace Napi diff --git a/modules/cudf/src/node_cudf/utilities/value_to_scalar.hpp b/modules/cudf/src/node_cudf/utilities/value_to_scalar.hpp new file mode 100644 index 000000000..fa414c2ce --- /dev/null +++ b/modules/cudf/src/node_cudf/utilities/value_to_scalar.hpp @@ -0,0 +1,74 @@ +// Copyright (c) 2021, NVIDIA CORPORATION. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include + +#include + +namespace nv { +namespace detail { + +struct set_scalar_value { + Napi::Value val; + + template + inline std::enable_if_t(), void> operator()( + std::unique_ptr& scalar, cudaStream_t stream = 0) { + static_cast*>(scalar.get())->set_value(NapiToCPP(val), stream); + } + template + inline std::enable_if_t::value, void> operator()( + std::unique_ptr& scalar, cudaStream_t stream = 0) { + scalar.reset(new cudf::string_scalar(val.ToString(), true, stream)); + } + template + inline std::enable_if_t(), void> operator()( + std::unique_ptr& scalar, cudaStream_t stream = 0) { + static_cast*>(scalar.get())->set_value(NapiToCPP(val), stream); + } + template + inline std::enable_if_t(), void> operator()( + std::unique_ptr& scalar, cudaStream_t stream = 0) { + static_cast*>(scalar.get())->set_value(NapiToCPP(val), stream); + } + template + inline std::enable_if_t(), void> operator()( + std::unique_ptr& scalar, cudaStream_t stream = 0) { + scalar.reset(new cudf::fixed_point_scalar(val.ToNumber(), true, stream)); + } + template + inline std::enable_if_t::value, void> operator()( + std::unique_ptr& scalar, cudaStream_t stream = 0) { + scalar.reset(new cudf::list_scalar(*Column::Unwrap(val.ToObject()), true, stream)); + } + template + inline std::enable_if_t() || // + std::is_same::value || // + cudf::is_duration() || // + cudf::is_timestamp() || // + cudf::is_fixed_point() || // + std::is_same::value), + void> + operator()(std::unique_ptr const& scalar, cudaStream_t stream = 0) { + NAPI_THROW(Napi::Error::New(val.Env(), "Unsupported dtype")); + } +}; + +} // namespace detail +} // namespace nv diff --git a/modules/cudf/src/scalar.cpp b/modules/cudf/src/scalar.cpp index f1b190b51..26ae89b32 100644 --- a/modules/cudf/src/scalar.cpp +++ b/modules/cudf/src/scalar.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -34,50 +35,6 @@ namespace nv { -namespace { - -struct set_scalar_value { - Napi::Value val; - - template - inline std::enable_if_t(), void> operator()( - std::unique_ptr& scalar, cudaStream_t stream = 0) { - static_cast*>(scalar.get())->set_value(NapiToCPP(val), stream); - } - template - inline std::enable_if_t::value, void> operator()( - std::unique_ptr& scalar, cudaStream_t stream = 0) { - scalar.reset(new cudf::string_scalar(val.ToString(), true, stream)); - } - template - inline std::enable_if_t(), void> operator()( - std::unique_ptr& scalar, cudaStream_t stream = 0) { - static_cast*>(scalar.get())->set_value(NapiToCPP(val), stream); - } - template - inline std::enable_if_t(), void> operator()( - std::unique_ptr& scalar, cudaStream_t stream = 0) { - static_cast*>(scalar.get())->set_value(NapiToCPP(val), stream); - } - template - inline std::enable_if_t(), void> operator()( - std::unique_ptr& scalar, cudaStream_t stream = 0) { - scalar.reset(new cudf::fixed_point_scalar(val.ToNumber(), true, stream)); - } - template - inline std::enable_if_t() || // - std::is_same::value || // - cudf::is_duration() || // - cudf::is_timestamp() || // - cudf::is_fixed_point()), - void> - operator()(std::unique_ptr const& scalar, cudaStream_t stream = 0) { - NAPI_THROW(Napi::Error::New(val.Env(), "Unsupported dtype")); - } -}; - -} // namespace - Napi::Function Scalar::Init(Napi::Env const& env, Napi::Object exports) { return DefineClass( env, @@ -145,7 +102,7 @@ void Scalar::set_value(Napi::CallbackInfo const& info, Napi::Value const& value) if (value.IsNull() or value.IsUndefined()) { this->set_valid(false); } else { - cudf::type_dispatcher(this->type(), set_scalar_value{value}, scalar_); + cudf::type_dispatcher(this->type(), detail::set_scalar_value{value}, scalar_); } } diff --git a/modules/cudf/src/series.ts b/modules/cudf/src/series.ts index c9866fafe..8ba2de9e6 100644 --- a/modules/cudf/src/series.ts +++ b/modules/cudf/src/series.ts @@ -192,7 +192,7 @@ export class AbstractSeries { * const b = Series.new(a._col); // Int32Series [1, 2, 3, 4] * ``` */ - static new(input: Column|SeriesProps): Series; + static new(input: AbstractSeries|Column|SeriesProps): Series; /** * Create a new cudf.StringSeries * @@ -200,7 +200,8 @@ export class AbstractSeries { * ```typescript * import {Series} from '@rapidsai/cudf'; * - * const a = Series.new(["foo", "bar", "test",null]); // StringSeries ["foo", "bar", "test", null] + * // StringSeries ["foo", "bar", "test", null] + * const a = Series.new(["foo", "bar", "test", null]); * ``` */ static new(input: (string|null|undefined)[]): Series; @@ -211,7 +212,8 @@ export class AbstractSeries { * ```typescript * import {Series} from '@rapidsai/cudf'; * - * const a = Series.new([1, 2, 3,, 4]); // Float64Series [1, 2, 3, null, 4] + * // Float64Series [1, 2, 3, null, 4] + * const a = Series.new([1, 2, 3, undefined, 4]); * ``` */ static new(input: (number|null|undefined)[]): Series; @@ -222,7 +224,8 @@ export class AbstractSeries { * ```typescript * import {Series} from '@rapidsai/cudf'; * - * const a = Series.new([1n, 2n, 3n,undefined, 4n]); // Int64Series [1n, 2n, 3n, null, 4n] + * // Int64Series [1n, 2n, 3n, null, 4n] + * const a = Series.new([1n, 2n, 3n, undefined, 4n]); * ``` */ static new(input: (bigint|null|undefined)[]): Series; @@ -233,20 +236,80 @@ export class AbstractSeries { * ```typescript * import {Series} from '@rapidsai/cudf'; * - * const a = Series.new([true, false, true, false]); // Bool8Series [true, false, true, false] + * // Bool8Series [true, false, null, false] + * const a = Series.new([true, false, undefined, false]); * ``` */ static new(input: (boolean|null|undefined)[]): Series; - static new(input: Column|SeriesProps|arrow.Vector| + /** + * Create a new cudf.ListSeries that contain cudf.StringSeries elements. + * + * @example + * ```typescript + * import {Series} from '@rapidsai/cudf'; + * + * // ListSeries [["foo", "bar"], ["test", null]] + * const a = Series.new([["foo", "bar"], ["test",null]]); + * a.getValue(0) // StringSeries ["foo", "bar"] + * a.getValue(1) // StringSeries ["test", null] + * ``` + */ + static new(input: (string|null|undefined)[][]): Series>; + /** + * Create a new cudf.ListSeries that contain cudf.Float64Series elements. + * + * @example + * ```typescript + * import {Series} from '@rapidsai/cudf'; + * + * // ListSeries [[1, 2], [3, null, 4]] + * const a = Series.new([[1, 2], [3, undefined, 4]]); + * a.getValue(0) // Float64Series [1, 2] + * a.getValue(1) // Float64Series [3, null, 4] + * ``` + */ + static new(input: (number|null|undefined)[][]): Series>; + /** + * Create a new cudf.ListSeries that contain cudf.Int64Series elements. + * + * @example + * ```typescript + * import {Series} from '@rapidsai/cudf'; + * + * // ListSeries [[1n, 2n], [3n, null, 4n]] + * const a = Series.new([[1n, 2n], [3n, undefined, 4n]]); + * a.getValue(0) // Int64Series [1n, 2n] + * a.getValue(1) // Int64Series [3n, null, 4n] + * ``` + */ + static new(input: (bigint|null|undefined)[][]): Series>; + /** + * Create a new cudf.ListSeries that contain cudf.Bool8Series elements. + * + * @example + * ```typescript + * import {Series} from '@rapidsai/cudf'; + * + * // ListSeries [[true, false], [null, false]] + * const a = Series.new([[true, false], [undefined, false]]); + * a.getValue(0) // Bool8Series [true, false] + * a.getValue(1) // Bool8Series [null, false] + * ``` + */ + static new(input: (boolean|null|undefined)[][]): Series>; + + static new(input: AbstractSeries|Column|SeriesProps|arrow.Vector| (string|null|undefined)[]|(number|null|undefined)[]| - (bigint|null|undefined)[]|(boolean|null|undefined)[]) { + (bigint|null|undefined)[]|(boolean|null|undefined)[]| + (string|null|undefined)[][]|(number|null|undefined)[][]| + (bigint|null|undefined)[][]|(boolean|null|undefined)[][]) { return columnToSeries(asColumn(input)) as any as Series; } /** @ignore */ public _col: Column; - protected constructor(input: SeriesProps|Column|arrow.Vector) { + protected constructor(input: AbstractSeries|SeriesProps|Column|arrow.Vector) { this._col = asColumn(input); } @@ -533,51 +596,6 @@ export class AbstractSeries { */ filter(mask: Series): Series { return this.__construct(this._col.gather(mask._col)); } - /** - * Return a value at the specified index to host memory - * - * @param index the index in this Series to return a value for - * - * @example - * ```typescript - * import {Series} from "@rapidsai/cudf"; - * - * // Float64Series - * Series.new([1, 2, 3]).getValue(0) // 1 - * // StringSeries - * Series.new(["foo", "bar", "test"]).getValue(2) // "test" - * // Bool8Series - * Series.new([false, true, true]).getValue(3) // throws index out of bounds error - * ``` - */ - getValue(index: number) { return this._col.getValue(index); } - - /** - * set value at the specified index - * - * @param index the index in this Series to set a value for - * @param value the value to set at `index` - * - * @example - * ```typescript - * import {Series} from "@rapidsai/cudf"; - * - * // Float64Series - * const a = Series.new([1, 2, 3]); - * a.setValue(0, -1) // inplace update [-1, 2, 3] - * - * // StringSeries - * const b = Series.new(["foo", "bar", "test"]) - * b.setValue(1,"test1") // inplace update ["foo", "test1", "test"] - * // Bool8Series - * const c = Series.new([false, true, true]) - * c.cetValue(2, false) // inplace update [false, true, false] - * ``` - */ - setValue(index: number, value: T['scalarType']): void { - this._col = this.scatter(value, [index])._col as Column; - } - /** * set values at the specified indices * @@ -602,8 +620,8 @@ export class AbstractSeries { /** * Copy the underlying device memory to host, and return an Iterator of the values. */ - [Symbol.iterator](): IterableIterator { - return this.toArrow()[Symbol.iterator](); + [Symbol.iterator](): IterableIterator { + return this.toArrow()[Symbol.iterator]() as IterableIterator; } /** @@ -875,21 +893,72 @@ export { }; function inferType(value: any[]): DataType { - if (value.length == 0 || (value.every((val) => typeof val === 'number' || val == null))) - return new Float64; - if (value.every((val) => typeof val === 'string' || val == null)) return new Utf8String; - if (value.every((val) => typeof val === 'bigint' || val == null)) return new Int64; - if (value.every((val) => typeof val === 'boolean' || val == null)) return new Bool8; - throw new TypeError('Unable to infer type series type, explicit type declaration expected'); + if (value.length === 0) { return new Float64; } + let nullsCount = 0; + let arraysCount = 0; + let objectsCount = 0; + let numbersCount = 0; + let stringsCount = 0; + let bigintsCount = 0; + let booleansCount = 0; + let unknownCount = 0; + value.forEach((val) => { + if (val == null) { return ++nullsCount; } + switch (typeof val) { + case 'bigint': return ++bigintsCount; + case 'boolean': return ++booleansCount; + case 'number': return ++numbersCount; + case 'string': return ++stringsCount; + case 'object': return Array.isArray(val) ? ++arraysCount : ++objectsCount; + } + return ++unknownCount; + }); + if (unknownCount === 0) { + if (numbersCount + nullsCount === value.length) { + return new Float64; + } else if (stringsCount + nullsCount === value.length) { + return new Utf8String; + } else if (bigintsCount + nullsCount === value.length) { + return new Int64; + } else if (booleansCount + nullsCount === value.length) { + return new Bool8; + } else if (arraysCount + nullsCount === value.length) { + const childType = inferType(value[value.findIndex((ary) => ary != null)]); + if (value.every((ary) => ary == null || childType.compareTo(inferType(ary)))) { + return new List(new arrow.Field('', childType)); + } + } else if (objectsCount + nullsCount === value.length) { + const fields = new Map(); + value.forEach((val) => { + Object.keys(val).forEach((key) => { + if (!fields.has(key)) { + // use the type inferred for the first instance of a found key + fields.set(key, new arrow.Field(key, inferType(val[key]))); + } + }); + }, {}); + return new Struct([...fields.values()]); + } + } + throw new TypeError( + 'Unable to infer Series type from input values, explicit type declaration expected'); } -function asColumn(value: SeriesProps|Column|arrow.Vector| - (string | null | undefined)[]|(number | null | undefined)[]| - (bigint | null | undefined)[]| - (boolean | null | undefined)[]): Column { +function asColumn( + value: AbstractSeries|SeriesProps|Column|arrow.Vector // + |(string | null | undefined)[] // + |(number | null | undefined)[] // + |(bigint | null | undefined)[] // + |(boolean | null | undefined)[] // + |(string | null | undefined)[][] // + |(number | null | undefined)[][] // + |(bigint | null | undefined)[][] // + |(boolean | null | undefined)[][] // + ): Column { + if (value instanceof AbstractSeries) { return value._col; } if (Array.isArray(value)) { return fromArrow(arrow.Vector.from( - {type: inferType(value), values: value, highWaterMark: Infinity})) as any; + {type: inferType(value), values: value as any, highWaterMark: Infinity})) as any; } if (value instanceof arrow.Vector) { return fromArrow(value) as any; } if (!value.type && Array.isArray(value.data)) { diff --git a/modules/cudf/src/series/list.ts b/modules/cudf/src/series/list.ts index 656c04d8a..b2144a102 100644 --- a/modules/cudf/src/series/list.ts +++ b/modules/cudf/src/series/list.ts @@ -36,6 +36,7 @@ export class ListSeries extends Series> { throw new Error(`Cast from ${arrow.Type[this.type.typeId]} to ${ arrow.Type[dataType.typeId]} not implemented`); } + /** * Series of integer offsets for each list * @example @@ -74,6 +75,30 @@ export class ListSeries extends Series> { // TODO: account for this.offset get elements(): Series { return Series.new(this._col.getChild(1)); } + /** + * Return a value at the specified index to host memory + * + * @param index the index in this Series to return a value for + * + * @example + * ```typescript + * import {Series} from "@rapidsai/cudf"; + * + * // Series> + * Series.new([[1, 2], [3]]).getValue(0) // Series([1, 2]) + * + * // Series> + * Series.new([["foo", "bar"], ["test"]]).getValue(1) // Series(["test"]) + * + * // Series> + * Series.new([[false, true], [true]]).getValue(2) // throws index out of bounds error + * ``` + */ + getValue(index: number) { + const value = this._col.getValue(index); + return value === null ? null : Series.new(value); + } + /** @ignore */ protected __construct(col: Column>) { return new ListSeries(Object.assign(col, {type: fixNames(this.type, col.type)})); diff --git a/modules/cudf/src/series/numeric.ts b/modules/cudf/src/series/numeric.ts index f4de115cd..09ac6c909 100644 --- a/modules/cudf/src/series/numeric.ts +++ b/modules/cudf/src/series/numeric.ts @@ -78,6 +78,42 @@ export abstract class NumericSeries extends Series { return Series.new({type: dataType, data: this._col.data, length: newLength}); } + /** + * Return a value at the specified index to host memory + * + * @param index the index in this Series to return a value for + * + * @example + * ```typescript + * import {Series} from "@rapidsai/cudf"; + * + * // Float64Series + * Series.new([1, 2, 3]).getValue(0) // 1 + * Series.new([1, 2, 3]).getValue(2) // 3 + * Series.new([1, 2, 3]).getValue(3) // throws index out of bounds error + * ``` + */ + getValue(index: number) { return this._col.getValue(index); } + + /** + * set value at the specified index + * + * @param index the index in this Series to set a value for + * @param value the value to set at `index` + * + * @example + * ```typescript + * import {Series} from "@rapidsai/cudf"; + * + * // Float64Series + * const a = Series.new([1, 2, 3]); + * a.setValue(0, -1) // inplace update -> Series([-1, 2, 3]) + * ``` + */ + setValue(index: number, value: T['scalarType']): void { + this._col = this.scatter(value, [index])._col as Column; + } + /** * Add this Series and another Series or scalar value. * diff --git a/modules/cudf/src/series/string.ts b/modules/cudf/src/series/string.ts index bf9925b48..8b7eed53e 100644 --- a/modules/cudf/src/series/string.ts +++ b/modules/cudf/src/series/string.ts @@ -35,6 +35,41 @@ export class StringSeries extends Series { throw new Error(`Cast from ${arrow.Type[this.type.typeId]} to ${ arrow.Type[dataType.typeId]} not implemented`); } + + /** + * Return a value at the specified index to host memory + * + * @param index the index in this Series to return a value for + * + * @example + * ```typescript + * import {Series} from "@rapidsai/cudf"; + * + * // StringSeries + * Series.new(["foo", "bar", "test"]).getValue(0) // "foo" + * Series.new(["foo", "bar", "test"]).getValue(2) // "test" + * Series.new(["foo", "bar", "test"]).getValue(3) // throws index out of bounds error + * ``` + */ + getValue(index: number) { return this._col.getValue(index); } + + /** + * set value at the specified index + * + * @param index the index in this Series to set a value for + * @param value the value to set at `index` + * + * @example + * ```typescript + * import {Series} from "@rapidsai/cudf"; + * + * // StringSeries + * const a = Series.new(["foo", "bar", "test"]) + * a.setValue(2, "test1") // inplace update -> Series(["foo", "bar", "test1"]) + * ``` + */ + setValue(index: number, value: string): void { this._col = this.scatter(value, [index])._col; } + /** * Series of integer offsets for each string * @example @@ -47,6 +82,7 @@ export class StringSeries extends Series { */ // TODO: Account for this.offset get offsets() { return Series.new(this._col.getChild(0)); } + /** * Series containing the utf8 characters of each string * @example diff --git a/modules/cudf/src/series/struct.ts b/modules/cudf/src/series/struct.ts index b9fb1eb5a..b41228ff9 100644 --- a/modules/cudf/src/series/struct.ts +++ b/modules/cudf/src/series/struct.ts @@ -37,6 +37,7 @@ export class StructSeries extends Series> { throw new Error(`Cast from ${arrow.Type[this.type.typeId]} to ${ arrow.Type[dataType.typeId]} not implemented`); } + /** * Return a child series by name. * diff --git a/modules/cudf/src/types/dtypes.ts b/modules/cudf/src/types/dtypes.ts index 5ddf3b444..9211a3b16 100644 --- a/modules/cudf/src/types/dtypes.ts +++ b/modules/cudf/src/types/dtypes.ts @@ -13,6 +13,7 @@ // limitations under the License. import * as arrow from 'apache-arrow'; +import {Column} from '../column'; import {TypeMap} from './mappings'; export type FloatingPoint = Float32|Float64; @@ -43,6 +44,7 @@ export class Int32 extends arrow.Int32 {} (Int32.prototype as any).BYTES_PER_ELEMENT = 4; export interface Int64 extends arrow.Int64 { + TValue: bigint; scalarType: bigint; readonly BYTES_PER_ELEMENT: number; } @@ -71,6 +73,7 @@ export class Uint32 extends arrow.Uint32 {} (Uint32.prototype as any).BYTES_PER_ELEMENT = 4; export interface Uint64 extends arrow.Uint64 { + TValue: bigint; scalarType: bigint; readonly BYTES_PER_ELEMENT: number; } @@ -105,7 +108,7 @@ export class Utf8String extends arrow.Utf8 {} export interface List extends arrow.List { childType: T; - scalarType: T['scalarType'][]; + scalarType: Column; } export class List extends arrow.List {} diff --git a/modules/cudf/test/series/list-tests.ts b/modules/cudf/test/series/list-tests.ts index b33520f2f..f98b8cf5e 100644 --- a/modules/cudf/test/series/list-tests.ts +++ b/modules/cudf/test/series/list-tests.ts @@ -17,7 +17,7 @@ import '../jest-extensions'; import {setDefaultAllocator} from '@nvidia/cuda'; -import {Int32, List, Series} from '@rapidsai/cudf'; +import {Int32, Int32Series, List, Series} from '@rapidsai/cudf'; import {CudaMemoryResource, DeviceBuffer} from '@rapidsai/rmm'; import * as arrow from 'apache-arrow'; import {VectorType} from 'apache-arrow/interfaces'; @@ -39,6 +39,12 @@ describe('ListSeries', () => { expect(expectedElements).toEqualTypedArray(actualElements); }; + test('Can create from JS Arrays', () => { + const listOfLists = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, null]]; + const col = Series.new(listOfLists); + expect([...col].map((elt) => [...elt!])).toEqual(listOfLists); + }); + test('Can create from Arrow', () => { const vec = listsOfInt32s([[0, 1, 2], [3, 4, 5]]); const ints = vec.getChildAt(0)! as VectorType; @@ -48,6 +54,26 @@ describe('ListSeries', () => { validateElements(ints, col.elements); }); + test('Can get individual values', () => { + const vec = listsOfInt32s([[0, 1, 2], [3, 4, 5]]); + const col = Series.new(vec); + for (let i = -1; ++i < col.length;) { + const elt = col.getValue(i); + expect(elt).not.toBeNull(); + expect(elt).toBeInstanceOf(Int32Series); + expect([...elt!]).toEqual([...vec.get(i)!]); + } + }); + + // Uncomment this once libcudf supports scatter w/ list_scalar + // test('Can set individual values', () => { + // const listOfLists = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, null]]; + // const col = Series.new(listOfLists); + // col.setValue(0, [1, 1, 1]); + // expect([...col].map((elt) => [...elt!])).toEqual([[1, 1, 1], [3, 4, 5], [6, 7, 8], [9, + // null]]); + // }); + test('Can create a List of Lists from Arrow', () => { const vec = listsOfListsOfInt32s([[[0, 1, 2]], [[3, 4, 5], [7, 8, 9]]]); const list = vec.getChildAt(0)! as VectorType;