diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e649f8f419..8f8893d50ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,20 +4,18 @@ - PR #4360 Added Java bindings for bitwise shift operators - PR #3577 Add initial dictionary support to column classes -- PR #3917 Add dictionary add_keys function - PR #3777 Add support for dictionary column in gather - PR #3693 add string support, skipna to scan operation - PR #3662 Define and implement `shift`. -- PR #3842 ORC writer: add support for column statistics - PR #3861 Added Series.sum feature for String - PR #4069 Added cast of numeric columns from/to String - PR #3681 Add cudf::experimental::boolean_mask_scatter -- PR #4088 Added asString() on ColumnVector in Java that takes a format string - PR #4040 Add support for n-way merge of sorted tables - PR #4053 Multi-column quantiles. - PR #4100 Add set_keys function for dictionary columns - PR #3894 Add remove_keys functions for dictionary columns - PR #4107 Add groupby nunique aggregation +- PR #4235 Port nvtx.pyx to use non-legacy libcudf APIs - PR #4153 Support Dask serialization protocol on cuDF objects - PR #4127 Add python API for n-way sorted merge (merge_sorted) - PR #4164 Add Buffer "constructor-kwargs" header @@ -64,19 +62,15 @@ - PR #3911 Adding null boolean handling for copy_if_else - PR #4003 Drop old `to_device` utility wrapper function - PR #4002 Adding to_frame and fix for categorical column issue -- PR #4035 Port NVText tokenize function to libcudf++ - PR #4009 build script update to enable cudf build without installing - PR #3897 Port cuIO JSON reader to cudf::column types - PR #4008 Eliminate extra copy in column constructor - PR #4013 Add cython definition for io readers cudf/io/io_types.hpp - PR #4028 Port json.pyx to use new libcudf APIs - PR #4014 ORC/Parquet: add count parameter to stripe/rowgroup-based reader API -- PR #4042 Port cudf/io/functions.hpp to Cython for use in IO bindings -- PR #3880 Add aggregation infrastructure support for reduction - PR #3880 Add aggregation infrastructure support for cudf::reduce -- PR #4059 Add aggregation infrastructure support for cudf::scan +- PR #4059 Add aggregation infrastructure support for cudf::scan - PR #4021 Change quantiles signature for clarity. -- PR #4058 Port hash.pyx to use libcudf++ APIs - PR #4057 Handle offsets in cython Column class - PR #4045 Reorganize `libxx` directory - PR #4029 Port stream_compaction.pyx to use libcudf++ APIs @@ -102,7 +96,6 @@ - PR #4098 Remove legacy calls from libcudf strings column code - PR #4044 Port join.pyx to use libcudf++ APIs - PR #4111 Use `Buffer`'s to serialize `StringColumn` -- PR #4133 Mask cleanup and fixes: use `int32` dtype, ensure 64 byte padding, handle offsets - PR #4113 Get `len` of `StringColumn`s without `nvstrings` - PR #4147 Remove workaround for UNKNOWN_NULL_COUNT in contiguous_split. - PR #4130 Renames in-place `cudf::experimental::fill` to `cudf::experimental::fill_in_place` @@ -203,13 +196,9 @@ - PR #4089 Fix dask groupby mutliindex test case issues in join - PR #4097 Fix strings concatenate logic with column offsets - PR #4076 All null string entries should have null data buffer -- PR #4145 Support empty index case in DataFrame._from_table - PR #4109 Use rmm::device_vector instead of thrust::device_vector - PR #4113 Use `.nvstrings` in `StringColumn.sum(...)` - PR #4116 Fix a bug in contiguous_split() where tables with mixed column types could corrupt string output -- PR #4108 Fix dtype bugs in dask_cudf metadata (metadata_nonempty overhaul) -- PR #4138 Really fix strings concatenate logic with column offsets -- PR #4119 Fix binary ops slowdown using jitify -remove-unused-globals - PR #4125 Fix type enum to account for added Dictionary type in `types.hpp` - PR #4132 Fix `hash_partition` null mask allocation - PR #4137 Update Java for mutating fill and rolling window changes diff --git a/python/cudf/cudf/_lib/avro.pyx b/python/cudf/cudf/_lib/avro.pyx index cf086f2fb9e..10e3e7a3286 100644 --- a/python/cudf/cudf/_lib/avro.pyx +++ b/python/cudf/cudf/_lib/avro.pyx @@ -14,7 +14,10 @@ from libcpp.vector cimport vector from libcpp.memory cimport unique_ptr from cudf.utils import ioutils -from cudf._lib.nvtx import nvtx_range_push, nvtx_range_pop +from cudf._libxx.nvtx import ( + range_push as nvtx_range_push, + range_pop as nvtx_range_pop +) from io import BytesIO import errno diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index d444a50ef07..6eae1488237 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -6,7 +6,10 @@ from cudf._lib.cudf cimport * from cudf._lib.cudf import * from cudf._lib.utils cimport * from cudf._lib.utils import * -from cudf._lib.nvtx import nvtx_range_push, nvtx_range_pop +from cudf._libxx.nvtx import ( + range_push as nvtx_range_push, + range_pop as nvtx_range_pop +) from cudf._lib.includes.csv cimport ( reader as csv_reader, reader_options as csv_reader_options @@ -87,7 +90,7 @@ cpdef read_csv( if delimiter is None: delimiter = sep - nvtx_range_push("CUDF_READ_CSV", "purple") + nvtx_range_push("CUDF_READ_CSV", "PURPLE") # Setup reader options cdef csv_reader_options args = csv_reader_options() @@ -262,7 +265,7 @@ cpdef write_csv( cudf.io.csv.write_csv """ - nvtx_range_push("CUDF_WRITE_CSV", "purple") + nvtx_range_push("CUDF_WRITE_CSV", "PURPLE") from cudf.core.series import Series diff --git a/python/cudf/cudf/_lib/cudf.pxd b/python/cudf/cudf/_lib/cudf.pxd index 14a3bcabb79..ff75c0104df 100644 --- a/python/cudf/cudf/_lib/cudf.pxd +++ b/python/cudf/cudf/_lib/cudf.pxd @@ -317,19 +317,6 @@ cdef extern from "cudf/cudf.h" nogil: size_type* out_indices ) except + - cdef gdf_error gdf_nvtx_range_push( - const char* const name, - gdf_color color - ) except + - - cdef gdf_error gdf_nvtx_range_push_hex( - const char* const name, - unsigned int color - ) except + - - cdef gdf_error gdf_nvtx_range_pop() except + - - cdef extern from "cudf/legacy/bitmask.hpp" nogil: cdef gdf_error gdf_count_nonzero_mask( diff --git a/python/cudf/cudf/_libxx/__init__.py b/python/cudf/cudf/_libxx/__init__.py index 61a1ebff6d1..a16cf807011 100644 --- a/python/cudf/cudf/_libxx/__init__.py +++ b/python/cudf/cudf/_libxx/__init__.py @@ -14,6 +14,7 @@ merge, null_mask, nvtext, + nvtx, orc, quantiles, reduce, diff --git a/python/cudf/cudf/_libxx/cpp/utilities/__init__.pxd b/python/cudf/cudf/_libxx/cpp/utilities/__init__.pxd new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cudf/cudf/_libxx/cpp/utilities/__init__.py b/python/cudf/cudf/_libxx/cpp/utilities/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cudf/cudf/_libxx/cpp/utilities/nvtx_utils.pxd b/python/cudf/cudf/_libxx/cpp/utilities/nvtx_utils.pxd new file mode 100644 index 00000000000..fa6936d2742 --- /dev/null +++ b/python/cudf/cudf/_libxx/cpp/utilities/nvtx_utils.pxd @@ -0,0 +1,34 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. + +from libc.stdint cimport uint32_t + + +cdef extern from "cudf/utilities/nvtx_utils.hpp" namespace "cudf::nvtx" nogil: + ctypedef enum color: + GREEN 'cudf::nvtx::color::GREEN' + BLUE 'cudf::nvtx::color::BLUE' + YELLOW 'cudf::nvtx::color::YELLOW' + PURPLE 'cudf::nvtx::color::PURPLE' + CYAN 'cudf::nvtx::color::CYAN' + RED 'cudf::nvtx::color::RED' + WHITE 'cudf::nvtx::color::WHITE' + DARK_GREEN 'cudf::nvtx::color::DARK_GREEN' + ORANGE 'cudf::nvtx::color::ORANGE' + + cdef color JOIN_COLOR 'cudf::nvtx::JOIN_COLOR' + cdef color GROUP_COLOR 'cudf::nvtx::GROUP_COLOR' + cdef color BINARY_OP_COLOR 'cudf::nvtx::BINARY_OP_COLOR' + cdef color PARTITION_COLOR 'cudf::nvtx::PARTITION_COLOR' + cdef color READ_CSV_COLOR 'cudf::nvtx::READ_CSV_COLOR' + + cdef void range_push( + const char* const name, + color color + ) except + + + cdef void range_push_hex( + const char* const name, + uint32_t color + ) except + + + cdef void range_pop() except + diff --git a/python/cudf/cudf/_libxx/nvtx.pxd b/python/cudf/cudf/_libxx/nvtx.pxd new file mode 100644 index 00000000000..1c9b43979ca --- /dev/null +++ b/python/cudf/cudf/_libxx/nvtx.pxd @@ -0,0 +1,6 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. + +from libc.stdint cimport uint32_t + + +ctypedef uint32_t underlying_type_t_color diff --git a/python/cudf/cudf/_libxx/nvtx.pyx b/python/cudf/cudf/_libxx/nvtx.pyx new file mode 100644 index 00000000000..3fcfe821ef6 --- /dev/null +++ b/python/cudf/cudf/_libxx/nvtx.pyx @@ -0,0 +1,58 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. + +from enum import IntEnum +from libcpp.string cimport string +from cudf._libxx.cpp.utilities.nvtx_utils cimport ( + range_push as cpp_range_push, + range_push_hex as cpp_range_push_hex, + range_pop as cpp_range_pop, + color as color_types, +) +from cudf._libxx.nvtx cimport underlying_type_t_color + + +class Color(IntEnum): + GREEN = color_types.GREEN + BLUE = color_types.BLUE + YELLOW = color_types.YELLOW + PURPLE = color_types.PURPLE + CYAN = color_types.CYAN + RED = color_types.RED + WHITE = color_types.WHITE + DARK_GREEN = color_types.DARK_GREEN + ORANGE = color_types.ORANGE + + +def range_push(object name, object color='GREEN'): + """ + Demarcate the beginning of a user-defined NVTX range. + + Parameters + ---------- + name : str + The name of the NVTX range + color : str + The color to use for the range. + Can be named color or hex RGB string. + """ + try: + color = int(color, 16) + except ValueError: + color = int(Color[color.upper()].value) + + cdef const char *_name + name = name.encode() + _name = name + + cdef underlying_type_t_color _color = color + + with nogil: + cpp_range_push_hex(_name, _color) + + +def range_pop(): + """ + Demarcate the end of a user-defined NVTX range. + """ + with nogil: + cpp_range_pop() diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 5639e1dabe4..895d5caa0e3 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -268,7 +268,7 @@ def is_unique(self): def binop(lhs, rhs, op, out_dtype): - libcudf.nvtx.nvtx_range_push("CUDF_BINARY_OP", "orange") + libcudfxx.nvtx.range_push("CUDF_BINARY_OP", "orange") out = libcudfxx.binaryop.binaryop(lhs, rhs, op, out_dtype) - libcudf.nvtx.nvtx_range_pop() + libcudfxx.nvtx.range_pop() return out diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index 6be5408e2ac..9d71e004edf 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -5,7 +5,6 @@ import pyarrow as pa from pandas.api.types import is_integer_dtype -import cudf._lib as libcudf import cudf._libxx as libcudfxx from cudf.core.buffer import Buffer from cudf.core.column import as_column, column @@ -409,7 +408,7 @@ def can_cast_safely(self, to_dtype): def _numeric_column_binop(lhs, rhs, op, out_dtype, reflect=False): if reflect: lhs, rhs = rhs, lhs - libcudf.nvtx.nvtx_range_push("CUDF_BINARY_OP", "orange") + libcudfxx.nvtx.range_push("CUDF_BINARY_OP", "orange") is_op_comparison = op in ["lt", "gt", "le", "ge", "eq", "ne"] @@ -421,7 +420,7 @@ def _numeric_column_binop(lhs, rhs, op, out_dtype, reflect=False): if is_op_comparison: out = out.fillna(op == "ne") - libcudf.nvtx.nvtx_range_pop() + libcudfxx.nvtx.range_pop() return out diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 4c7bea5f515..bc401266e58 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -13,7 +13,6 @@ import cudf._libxx as libcudfxx import cudf._libxx.string_casting as str_cast -from cudf._lib.nvtx import nvtx_range_pop, nvtx_range_push from cudf._libxx.nvtext.generate_ngrams import ( generate_ngrams as cpp_generate_ngrams, ) @@ -27,6 +26,10 @@ count_tokens as cpp_count_tokens, tokenize as cpp_tokenize, ) +from cudf._libxx.nvtx import ( + range_pop as nvtx_range_pop, + range_push as nvtx_range_push, +) from cudf._libxx.strings.attributes import ( code_points as cpp_code_points, count_characters as cpp_count_characters, diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 56f84506356..1b72375d629 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -1863,7 +1863,7 @@ def nans_to_nulls(self): @classmethod def _concat(cls, objs, axis=0, ignore_index=False): - libcudf.nvtx.nvtx_range_push("CUDF_CONCAT", "orange") + libcudfxx.nvtx.range_push("CUDF_CONCAT", "orange") if ignore_index: index = RangeIndex(sum(map(len, objs))) @@ -1901,7 +1901,7 @@ def _concat(cls, objs, axis=0, ignore_index=False): else: out.columns = unique_columns_ordered_ls - libcudf.nvtx.nvtx_range_pop() + libcudfxx.nvtx.range_pop() return out def as_gpu_matrix(self, columns=None, order="F"): @@ -2303,7 +2303,7 @@ def merge( 4 3 13.0 2 4 14.0 12.0 """ - libcudf.nvtx.nvtx_range_push("CUDF_JOIN", "blue") + libcudfxx.nvtx.range_push("CUDF_JOIN", "blue") if indicator: raise NotImplementedError( "Only indicator=False is currently supported" @@ -2344,7 +2344,7 @@ def merge( how, method, ) - + libcudfxx.nvtx.range_pop() return gdf_result def join( @@ -2383,7 +2383,7 @@ def join( - *on* is not supported yet due to lack of multi-index support. """ - libcudf.nvtx.nvtx_range_push("CUDF_JOIN", "blue") + libcudfxx.nvtx.range_push("CUDF_JOIN", "blue") # Outer joins still use the old implementation if type != "": @@ -2518,7 +2518,7 @@ def _set_categories(col, cats): df.index.names = index_frame_l.columns for new_key, old_key in zip(index_frame_l.columns, idx_col_names): df.index._data[new_key] = df.index._data.pop(old_key) - + libcudfxx.nvtx.range_pop() return df def groupby( @@ -2584,7 +2584,7 @@ def groupby( # The corresponding pop() is in # DataFrameGroupBy._apply_aggregation() - libcudf.nvtx.nvtx_range_push("CUDF_GROUPBY", "purple") + libcudfxx.nvtx.range_push("CUDF_GROUPBY", "purple") result = DataFrameGroupBy( self, @@ -2682,7 +2682,7 @@ def query(self, expr, local_dict={}): ) ) - libcudf.nvtx.nvtx_range_push("CUDF_QUERY", "purple") + libcudfxx.nvtx.range_push("CUDF_QUERY", "purple") # Get calling environment callframe = inspect.currentframe().f_back callenv = { @@ -2699,7 +2699,7 @@ def query(self, expr, local_dict={}): newseries = self[col][selected] newdf[col] = newseries result = newdf - libcudf.nvtx.nvtx_range_pop() + libcudfxx.nvtx.range_pop() return result @applyutils.doc_apply() diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 5113becff58..ce8f702e17d 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -6,6 +6,7 @@ import cudf import cudf._lib as libcudf +import cudf._libxx as libcudfxx from cudf import MultiIndex from cudf.core.column import deserialize_columns, serialize_columns from cudf.utils.dtypes import is_scalar @@ -130,7 +131,7 @@ def _apply_aggregation(self, agg): Applies the aggregation function(s) ``agg`` on all columns """ result = self._groupby.compute_result(agg) - libcudf.nvtx.nvtx_range_pop() + libcudfxx.nvtx.range_pop() return result def __getitem__(self, arg): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index a098be5129b..ca0e351647f 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -9,7 +9,6 @@ from pandas.api.types import is_dict_like import cudf -import cudf._lib as libcudf import cudf._libxx as libcudfxx from cudf.core.column import ( ColumnBase, @@ -614,7 +613,7 @@ def _binaryop(self, other, fn, fill_value=None, reflect=False): # e.g. for fn = 'and', _apply_op equivalent is '__and__' return other._apply_op(self, fn) - libcudf.nvtx.nvtx_range_push("CUDF_BINARY_OP", "orange") + libcudfxx.nvtx.range_push("CUDF_BINARY_OP", "orange") result_name = utils.get_result_name(self, other) if isinstance(other, Series): lhs, rhs = _align_indices([self, other], allow_non_unique=True) @@ -649,7 +648,7 @@ def _binaryop(self, other, fn, fill_value=None, reflect=False): outcol = lhs._column.binary_operator(fn, rhs, reflect=reflect) result = lhs._copy_construct(data=outcol, name=result_name) - libcudf.nvtx.nvtx_range_pop() + libcudfxx.nvtx.range_pop() return result def add(self, other, fill_value=None, axis=0):