diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0e649f8f419..8f8893d50ce 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,20 +4,18 @@
 
 - PR #4360 Added Java bindings for bitwise shift operators
 - PR #3577 Add initial dictionary support to column classes
-- PR #3917 Add dictionary add_keys function
 - PR #3777 Add support for dictionary column in gather
 - PR #3693 add string support, skipna to scan operation
 - PR #3662 Define and implement `shift`.
-- PR #3842 ORC writer: add support for column statistics
 - PR #3861 Added Series.sum feature for String
 - PR #4069 Added cast of numeric columns from/to String
 - PR #3681 Add cudf::experimental::boolean_mask_scatter
-- PR #4088 Added asString() on ColumnVector in Java that takes a format string
 - PR #4040 Add support for n-way merge of sorted tables
 - PR #4053 Multi-column quantiles.
 - PR #4100 Add set_keys function for dictionary columns
 - PR #3894 Add remove_keys functions for dictionary columns
 - PR #4107 Add groupby nunique aggregation
+- PR #4235 Port nvtx.pyx to use non-legacy libcudf APIs
 - PR #4153 Support Dask serialization protocol on cuDF objects
 - PR #4127 Add python API for n-way sorted merge (merge_sorted)
 - PR #4164 Add Buffer "constructor-kwargs" header
@@ -64,19 +62,15 @@
 - PR #3911 Adding null boolean handling for copy_if_else
 - PR #4003 Drop old `to_device` utility wrapper function
 - PR #4002 Adding to_frame and fix for categorical column issue
-- PR #4035 Port NVText tokenize function to libcudf++
 - PR #4009 build script update to enable cudf build without installing
 - PR #3897 Port cuIO JSON reader to cudf::column types
 - PR #4008 Eliminate extra copy in column constructor
 - PR #4013 Add cython definition for io readers cudf/io/io_types.hpp
 - PR #4028 Port json.pyx to use new libcudf APIs
 - PR #4014 ORC/Parquet: add count parameter to stripe/rowgroup-based reader API
-- PR #4042 Port cudf/io/functions.hpp to Cython for use in IO bindings
-- PR #3880 Add aggregation infrastructure support for reduction
 - PR #3880 Add aggregation infrastructure support for cudf::reduce
-- PR #4059 Add aggregation infrastructure support for cudf::scan
+- PR #4059 Add aggregation infrastructure support for cudf::scan 
 - PR #4021 Change quantiles signature for clarity.
-- PR #4058 Port hash.pyx to use libcudf++ APIs
 - PR #4057 Handle offsets in cython Column class
 - PR #4045 Reorganize `libxx` directory
 - PR #4029 Port stream_compaction.pyx to use libcudf++ APIs
@@ -102,7 +96,6 @@
 - PR #4098 Remove legacy calls from libcudf strings column code
 - PR #4044 Port join.pyx to use libcudf++ APIs
 - PR #4111 Use `Buffer`'s to serialize `StringColumn`
-- PR #4133 Mask cleanup and fixes: use `int32` dtype, ensure 64 byte padding, handle offsets
 - PR #4113 Get `len` of `StringColumn`s without `nvstrings`
 - PR #4147 Remove workaround for UNKNOWN_NULL_COUNT in contiguous_split.
 - PR #4130 Renames in-place `cudf::experimental::fill` to `cudf::experimental::fill_in_place`
@@ -203,13 +196,9 @@
 - PR #4089 Fix dask groupby mutliindex test case issues in join
 - PR #4097 Fix strings concatenate logic with column offsets
 - PR #4076 All null string entries should have null data buffer
-- PR #4145 Support empty index case in DataFrame._from_table
 - PR #4109 Use rmm::device_vector instead of thrust::device_vector
 - PR #4113 Use `.nvstrings` in `StringColumn.sum(...)`
 - PR #4116 Fix a bug in contiguous_split() where tables with mixed column types could corrupt string output
-- PR #4108 Fix dtype bugs in dask_cudf metadata (metadata_nonempty overhaul)
-- PR #4138 Really fix strings concatenate logic with column offsets
-- PR #4119 Fix binary ops slowdown using jitify -remove-unused-globals
 - PR #4125 Fix type enum to account for added Dictionary type in `types.hpp`
 - PR #4132 Fix `hash_partition` null mask allocation
 - PR #4137 Update Java for mutating fill and rolling window changes
diff --git a/python/cudf/cudf/_lib/avro.pyx b/python/cudf/cudf/_lib/avro.pyx
index cf086f2fb9e..10e3e7a3286 100644
--- a/python/cudf/cudf/_lib/avro.pyx
+++ b/python/cudf/cudf/_lib/avro.pyx
@@ -14,7 +14,10 @@ from libcpp.vector cimport vector
 from libcpp.memory cimport unique_ptr
 
 from cudf.utils import ioutils
-from cudf._lib.nvtx import nvtx_range_push, nvtx_range_pop
+from cudf._libxx.nvtx import (
+    range_push as nvtx_range_push,
+    range_pop as nvtx_range_pop
+)
 
 from io import BytesIO
 import errno
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index d444a50ef07..6eae1488237 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -6,7 +6,10 @@ from cudf._lib.cudf cimport *
 from cudf._lib.cudf import *
 from cudf._lib.utils cimport *
 from cudf._lib.utils import *
-from cudf._lib.nvtx import nvtx_range_push, nvtx_range_pop
+from cudf._libxx.nvtx import (
+    range_push as nvtx_range_push,
+    range_pop as nvtx_range_pop
+)
 from cudf._lib.includes.csv cimport (
     reader as csv_reader,
     reader_options as csv_reader_options
@@ -87,7 +90,7 @@ cpdef read_csv(
     if delimiter is None:
         delimiter = sep
 
-    nvtx_range_push("CUDF_READ_CSV", "purple")
+    nvtx_range_push("CUDF_READ_CSV", "PURPLE")
 
     # Setup reader options
     cdef csv_reader_options args = csv_reader_options()
@@ -262,7 +265,7 @@ cpdef write_csv(
     cudf.io.csv.write_csv
     """
 
-    nvtx_range_push("CUDF_WRITE_CSV", "purple")
+    nvtx_range_push("CUDF_WRITE_CSV", "PURPLE")
 
     from cudf.core.series import Series
 
diff --git a/python/cudf/cudf/_lib/cudf.pxd b/python/cudf/cudf/_lib/cudf.pxd
index 14a3bcabb79..ff75c0104df 100644
--- a/python/cudf/cudf/_lib/cudf.pxd
+++ b/python/cudf/cudf/_lib/cudf.pxd
@@ -317,19 +317,6 @@ cdef extern from "cudf/cudf.h" nogil:
         size_type* out_indices
     ) except +
 
-    cdef gdf_error gdf_nvtx_range_push(
-        const char* const name,
-        gdf_color color
-    ) except +
-
-    cdef gdf_error gdf_nvtx_range_push_hex(
-        const char* const name,
-        unsigned int color
-    ) except +
-
-    cdef gdf_error gdf_nvtx_range_pop() except +
-
-
 cdef extern from "cudf/legacy/bitmask.hpp" nogil:
 
     cdef gdf_error gdf_count_nonzero_mask(
diff --git a/python/cudf/cudf/_libxx/__init__.py b/python/cudf/cudf/_libxx/__init__.py
index 61a1ebff6d1..a16cf807011 100644
--- a/python/cudf/cudf/_libxx/__init__.py
+++ b/python/cudf/cudf/_libxx/__init__.py
@@ -14,6 +14,7 @@
     merge,
     null_mask,
     nvtext,
+    nvtx,
     orc,
     quantiles,
     reduce,
diff --git a/python/cudf/cudf/_libxx/cpp/utilities/__init__.pxd b/python/cudf/cudf/_libxx/cpp/utilities/__init__.pxd
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/_libxx/cpp/utilities/__init__.py b/python/cudf/cudf/_libxx/cpp/utilities/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/_libxx/cpp/utilities/nvtx_utils.pxd b/python/cudf/cudf/_libxx/cpp/utilities/nvtx_utils.pxd
new file mode 100644
index 00000000000..fa6936d2742
--- /dev/null
+++ b/python/cudf/cudf/_libxx/cpp/utilities/nvtx_utils.pxd
@@ -0,0 +1,34 @@
+# Copyright (c) 2020, NVIDIA CORPORATION.
+
+from libc.stdint cimport uint32_t
+
+
+cdef extern from "cudf/utilities/nvtx_utils.hpp" namespace "cudf::nvtx" nogil:
+    ctypedef enum color:
+        GREEN 'cudf::nvtx::color::GREEN'
+        BLUE 'cudf::nvtx::color::BLUE'
+        YELLOW 'cudf::nvtx::color::YELLOW'
+        PURPLE 'cudf::nvtx::color::PURPLE'
+        CYAN 'cudf::nvtx::color::CYAN'
+        RED 'cudf::nvtx::color::RED'
+        WHITE 'cudf::nvtx::color::WHITE'
+        DARK_GREEN 'cudf::nvtx::color::DARK_GREEN'
+        ORANGE 'cudf::nvtx::color::ORANGE'
+
+    cdef color JOIN_COLOR 'cudf::nvtx::JOIN_COLOR'
+    cdef color GROUP_COLOR 'cudf::nvtx::GROUP_COLOR'
+    cdef color BINARY_OP_COLOR 'cudf::nvtx::BINARY_OP_COLOR'
+    cdef color PARTITION_COLOR 'cudf::nvtx::PARTITION_COLOR'
+    cdef color READ_CSV_COLOR 'cudf::nvtx::READ_CSV_COLOR'
+
+    cdef void range_push(
+        const char* const name,
+        color color
+    ) except +
+
+    cdef void range_push_hex(
+        const char* const name,
+        uint32_t color
+    ) except +
+
+    cdef void range_pop() except +
diff --git a/python/cudf/cudf/_libxx/nvtx.pxd b/python/cudf/cudf/_libxx/nvtx.pxd
new file mode 100644
index 00000000000..1c9b43979ca
--- /dev/null
+++ b/python/cudf/cudf/_libxx/nvtx.pxd
@@ -0,0 +1,6 @@
+# Copyright (c) 2020, NVIDIA CORPORATION.
+
+from libc.stdint cimport uint32_t
+
+
+ctypedef uint32_t underlying_type_t_color
diff --git a/python/cudf/cudf/_libxx/nvtx.pyx b/python/cudf/cudf/_libxx/nvtx.pyx
new file mode 100644
index 00000000000..3fcfe821ef6
--- /dev/null
+++ b/python/cudf/cudf/_libxx/nvtx.pyx
@@ -0,0 +1,58 @@
+# Copyright (c) 2020, NVIDIA CORPORATION.
+
+from enum import IntEnum
+from libcpp.string cimport string
+from cudf._libxx.cpp.utilities.nvtx_utils cimport (
+    range_push as cpp_range_push,
+    range_push_hex as cpp_range_push_hex,
+    range_pop as cpp_range_pop,
+    color as color_types,
+)
+from cudf._libxx.nvtx cimport underlying_type_t_color
+
+
+class Color(IntEnum):
+    GREEN = <underlying_type_t_color> color_types.GREEN
+    BLUE = <underlying_type_t_color> color_types.BLUE
+    YELLOW = <underlying_type_t_color> color_types.YELLOW
+    PURPLE = <underlying_type_t_color> color_types.PURPLE
+    CYAN = <underlying_type_t_color> color_types.CYAN
+    RED = <underlying_type_t_color> color_types.RED
+    WHITE = <underlying_type_t_color> color_types.WHITE
+    DARK_GREEN = <underlying_type_t_color> color_types.DARK_GREEN
+    ORANGE = <underlying_type_t_color> color_types.ORANGE
+
+
+def range_push(object name, object color='GREEN'):
+    """
+    Demarcate the beginning of a user-defined NVTX range.
+
+    Parameters
+    ----------
+    name : str
+        The name of the NVTX range
+    color : str
+        The color to use for the range.
+        Can be named color or hex RGB string.
+    """
+    try:
+        color = int(color, 16)
+    except ValueError:
+        color = int(Color[color.upper()].value)
+
+    cdef const char *_name
+    name = name.encode()
+    _name = name
+
+    cdef underlying_type_t_color _color = color
+
+    with nogil:
+        cpp_range_push_hex(_name, _color)
+
+
+def range_pop():
+    """
+    Demarcate the end of a user-defined NVTX range.
+    """
+    with nogil:
+        cpp_range_pop()
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 5639e1dabe4..895d5caa0e3 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -268,7 +268,7 @@ def is_unique(self):
 
 
 def binop(lhs, rhs, op, out_dtype):
-    libcudf.nvtx.nvtx_range_push("CUDF_BINARY_OP", "orange")
+    libcudfxx.nvtx.range_push("CUDF_BINARY_OP", "orange")
     out = libcudfxx.binaryop.binaryop(lhs, rhs, op, out_dtype)
-    libcudf.nvtx.nvtx_range_pop()
+    libcudfxx.nvtx.range_pop()
     return out
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 6be5408e2ac..9d71e004edf 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -5,7 +5,6 @@
 import pyarrow as pa
 from pandas.api.types import is_integer_dtype
 
-import cudf._lib as libcudf
 import cudf._libxx as libcudfxx
 from cudf.core.buffer import Buffer
 from cudf.core.column import as_column, column
@@ -409,7 +408,7 @@ def can_cast_safely(self, to_dtype):
 def _numeric_column_binop(lhs, rhs, op, out_dtype, reflect=False):
     if reflect:
         lhs, rhs = rhs, lhs
-    libcudf.nvtx.nvtx_range_push("CUDF_BINARY_OP", "orange")
+    libcudfxx.nvtx.range_push("CUDF_BINARY_OP", "orange")
 
     is_op_comparison = op in ["lt", "gt", "le", "ge", "eq", "ne"]
 
@@ -421,7 +420,7 @@ def _numeric_column_binop(lhs, rhs, op, out_dtype, reflect=False):
     if is_op_comparison:
         out = out.fillna(op == "ne")
 
-    libcudf.nvtx.nvtx_range_pop()
+    libcudfxx.nvtx.range_pop()
     return out
 
 
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 4c7bea5f515..bc401266e58 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -13,7 +13,6 @@
 
 import cudf._libxx as libcudfxx
 import cudf._libxx.string_casting as str_cast
-from cudf._lib.nvtx import nvtx_range_pop, nvtx_range_push
 from cudf._libxx.nvtext.generate_ngrams import (
     generate_ngrams as cpp_generate_ngrams,
 )
@@ -27,6 +26,10 @@
     count_tokens as cpp_count_tokens,
     tokenize as cpp_tokenize,
 )
+from cudf._libxx.nvtx import (
+    range_pop as nvtx_range_pop,
+    range_push as nvtx_range_push,
+)
 from cudf._libxx.strings.attributes import (
     code_points as cpp_code_points,
     count_characters as cpp_count_characters,
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 56f84506356..1b72375d629 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1863,7 +1863,7 @@ def nans_to_nulls(self):
     @classmethod
     def _concat(cls, objs, axis=0, ignore_index=False):
 
-        libcudf.nvtx.nvtx_range_push("CUDF_CONCAT", "orange")
+        libcudfxx.nvtx.range_push("CUDF_CONCAT", "orange")
 
         if ignore_index:
             index = RangeIndex(sum(map(len, objs)))
@@ -1901,7 +1901,7 @@ def _concat(cls, objs, axis=0, ignore_index=False):
         else:
             out.columns = unique_columns_ordered_ls
 
-        libcudf.nvtx.nvtx_range_pop()
+        libcudfxx.nvtx.range_pop()
         return out
 
     def as_gpu_matrix(self, columns=None, order="F"):
@@ -2303,7 +2303,7 @@ def merge(
         4    3    13.0
         2    4    14.0    12.0
         """
-        libcudf.nvtx.nvtx_range_push("CUDF_JOIN", "blue")
+        libcudfxx.nvtx.range_push("CUDF_JOIN", "blue")
         if indicator:
             raise NotImplementedError(
                 "Only indicator=False is currently supported"
@@ -2344,7 +2344,7 @@ def merge(
             how,
             method,
         )
-
+        libcudfxx.nvtx.range_pop()
         return gdf_result
 
     def join(
@@ -2383,7 +2383,7 @@ def join(
         - *on* is not supported yet due to lack of multi-index support.
         """
 
-        libcudf.nvtx.nvtx_range_push("CUDF_JOIN", "blue")
+        libcudfxx.nvtx.range_push("CUDF_JOIN", "blue")
 
         # Outer joins still use the old implementation
         if type != "":
@@ -2518,7 +2518,7 @@ def _set_categories(col, cats):
             df.index.names = index_frame_l.columns
             for new_key, old_key in zip(index_frame_l.columns, idx_col_names):
                 df.index._data[new_key] = df.index._data.pop(old_key)
-
+        libcudfxx.nvtx.range_pop()
         return df
 
     def groupby(
@@ -2584,7 +2584,7 @@ def groupby(
 
             # The corresponding pop() is in
             # DataFrameGroupBy._apply_aggregation()
-            libcudf.nvtx.nvtx_range_push("CUDF_GROUPBY", "purple")
+            libcudfxx.nvtx.range_push("CUDF_GROUPBY", "purple")
 
             result = DataFrameGroupBy(
                 self,
@@ -2682,7 +2682,7 @@ def query(self, expr, local_dict={}):
                 )
             )
 
-        libcudf.nvtx.nvtx_range_push("CUDF_QUERY", "purple")
+        libcudfxx.nvtx.range_push("CUDF_QUERY", "purple")
         # Get calling environment
         callframe = inspect.currentframe().f_back
         callenv = {
@@ -2699,7 +2699,7 @@ def query(self, expr, local_dict={}):
             newseries = self[col][selected]
             newdf[col] = newseries
         result = newdf
-        libcudf.nvtx.nvtx_range_pop()
+        libcudfxx.nvtx.range_pop()
         return result
 
     @applyutils.doc_apply()
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 5113becff58..ce8f702e17d 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -6,6 +6,7 @@
 
 import cudf
 import cudf._lib as libcudf
+import cudf._libxx as libcudfxx
 from cudf import MultiIndex
 from cudf.core.column import deserialize_columns, serialize_columns
 from cudf.utils.dtypes import is_scalar
@@ -130,7 +131,7 @@ def _apply_aggregation(self, agg):
         Applies the aggregation function(s) ``agg`` on all columns
         """
         result = self._groupby.compute_result(agg)
-        libcudf.nvtx.nvtx_range_pop()
+        libcudfxx.nvtx.range_pop()
         return result
 
     def __getitem__(self, arg):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index a098be5129b..ca0e351647f 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -9,7 +9,6 @@
 from pandas.api.types import is_dict_like
 
 import cudf
-import cudf._lib as libcudf
 import cudf._libxx as libcudfxx
 from cudf.core.column import (
     ColumnBase,
@@ -614,7 +613,7 @@ def _binaryop(self, other, fn, fill_value=None, reflect=False):
             # e.g. for fn = 'and', _apply_op equivalent is '__and__'
             return other._apply_op(self, fn)
 
-        libcudf.nvtx.nvtx_range_push("CUDF_BINARY_OP", "orange")
+        libcudfxx.nvtx.range_push("CUDF_BINARY_OP", "orange")
         result_name = utils.get_result_name(self, other)
         if isinstance(other, Series):
             lhs, rhs = _align_indices([self, other], allow_non_unique=True)
@@ -649,7 +648,7 @@ def _binaryop(self, other, fn, fill_value=None, reflect=False):
 
         outcol = lhs._column.binary_operator(fn, rhs, reflect=reflect)
         result = lhs._copy_construct(data=outcol, name=result_name)
-        libcudf.nvtx.nvtx_range_pop()
+        libcudfxx.nvtx.range_pop()
         return result
 
     def add(self, other, fill_value=None, axis=0):