From 84073e8c3c9477c8afa974f14058f1208f63aba2 Mon Sep 17 00:00:00 2001
From: AJ Schmidt <aschmidt@nvidia.com>
Date: Fri, 17 Dec 2021 17:08:19 -0500
Subject: [PATCH 01/12] update changelog

---
 CHANGELOG.md | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 225 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b55669f7f50..39bb868c7db 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,229 @@
-# cuDF 21.12.00 (Date TBD)
+# cuDF 21.12.00 (9 Dec 2021)
 
-Please see https://github.com/rapidsai/cudf/releases/tag/v21.12.00a for the latest changes to this development branch.
+## 🚨 Breaking Changes
+
+- Update `bitmask_and` and `bitmask_or` to return a pair of resulting mask and count of unset bits ([#9616](https://github.com/rapidsai/cudf/pull/9616)) [@PointKernel](https://github.com/PointKernel)
+- Remove sizeof and standardize on memory_usage ([#9544](https://github.com/rapidsai/cudf/pull/9544)) [@vyasr](https://github.com/vyasr)
+- Add support for single-line regex anchors ^/$ in contains_re ([#9482](https://github.com/rapidsai/cudf/pull/9482)) [@davidwendt](https://github.com/davidwendt)
+- Refactor sorting APIs ([#9464](https://github.com/rapidsai/cudf/pull/9464)) [@vyasr](https://github.com/vyasr)
+- Update Java nvcomp JNI bindings to nvcomp 2.x API ([#9384](https://github.com/rapidsai/cudf/pull/9384)) [@jbrennan333](https://github.com/jbrennan333)
+- Support Python UDFs written in terms of rows ([#9343](https://github.com/rapidsai/cudf/pull/9343)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- JNI: Support nested types in ORC writer ([#9334](https://github.com/rapidsai/cudf/pull/9334)) [@firestarman](https://github.com/firestarman)
+- Optionally nullify out-of-bounds indices in segmented_gather(). ([#9318](https://github.com/rapidsai/cudf/pull/9318)) [@mythrocks](https://github.com/mythrocks)
+- Refactor cuIO timestamp processing with `cuda::std::chrono` ([#9278](https://github.com/rapidsai/cudf/pull/9278)) [@PointKernel](https://github.com/PointKernel)
+- Various internal MultiIndex improvements ([#9243](https://github.com/rapidsai/cudf/pull/9243)) [@vyasr](https://github.com/vyasr)
+
+## 🐛 Bug Fixes
+
+- Fix read_parquet bug for bytes input ([#9669](https://github.com/rapidsai/cudf/pull/9669)) [@rjzamora](https://github.com/rjzamora)
+- Use `_gather` internal for `sort_*` ([#9668](https://github.com/rapidsai/cudf/pull/9668)) [@isVoid](https://github.com/isVoid)
+- Fix behavior of equals for non-DataFrame Frames and add tests. ([#9653](https://github.com/rapidsai/cudf/pull/9653)) [@vyasr](https://github.com/vyasr)
+- Dont recompute output size if it is already available ([#9649](https://github.com/rapidsai/cudf/pull/9649)) [@abellina](https://github.com/abellina)
+- Fix read_parquet bug for extended dtypes from remote storage ([#9638](https://github.com/rapidsai/cudf/pull/9638)) [@rjzamora](https://github.com/rjzamora)
+- add const when getting data from a JNI data wrapper ([#9637](https://github.com/rapidsai/cudf/pull/9637)) [@wjxiz1992](https://github.com/wjxiz1992)
+- Fix debrotli issue on CUDA 11.5 ([#9632](https://github.com/rapidsai/cudf/pull/9632)) [@vuule](https://github.com/vuule)
+- Use std::size_t when computing join output size ([#9626](https://github.com/rapidsai/cudf/pull/9626)) [@jlowe](https://github.com/jlowe)
+- Fix `usecols` parameter handling in `dask_cudf.read_csv` ([#9618](https://github.com/rapidsai/cudf/pull/9618)) [@galipremsagar](https://github.com/galipremsagar)
+- Add support for string `&#39;nan&#39;, &#39;inf&#39; &amp; &#39;-inf&#39;` values while type-casting to `float` ([#9613](https://github.com/rapidsai/cudf/pull/9613)) [@galipremsagar](https://github.com/galipremsagar)
+- Avoid passing NativeFileDatasource to pyarrow in read_parquet ([#9608](https://github.com/rapidsai/cudf/pull/9608)) [@rjzamora](https://github.com/rjzamora)
+- Fix test failure with cuda 11.5 in row_bit_count tests. ([#9581](https://github.com/rapidsai/cudf/pull/9581)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Correct _LIBCUDACXX_CUDACC_VER value computation ([#9579](https://github.com/rapidsai/cudf/pull/9579)) [@robertmaynard](https://github.com/robertmaynard)
+- Increase max RLE stream size estimate to avoid potential overflows ([#9568](https://github.com/rapidsai/cudf/pull/9568)) [@vuule](https://github.com/vuule)
+- Fix edge case in tdigest scalar generation for groups containing all nulls. ([#9551](https://github.com/rapidsai/cudf/pull/9551)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fix pytests failing in `cuda-11.5` environment ([#9547](https://github.com/rapidsai/cudf/pull/9547)) [@galipremsagar](https://github.com/galipremsagar)
+- compile libnvcomp with PTDS if requested ([#9540](https://github.com/rapidsai/cudf/pull/9540)) [@jbrennan333](https://github.com/jbrennan333)
+- Fix `segmented_gather()` for null LIST rows ([#9537](https://github.com/rapidsai/cudf/pull/9537)) [@mythrocks](https://github.com/mythrocks)
+- Deprecate DataFrame.label_encoding, use private _label_encoding method internally. ([#9535](https://github.com/rapidsai/cudf/pull/9535)) [@bdice](https://github.com/bdice)
+- Fix several test and benchmark issues related to bitmask allocations. ([#9521](https://github.com/rapidsai/cudf/pull/9521)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fix for inserting duplicates in groupby result cache ([#9508](https://github.com/rapidsai/cudf/pull/9508)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix mismatched types error in clip() when using non int64 numeric types ([#9498](https://github.com/rapidsai/cudf/pull/9498)) [@davidwendt](https://github.com/davidwendt)
+- Match conda pinnings for style checks (revert part of #9412, #9433). ([#9490](https://github.com/rapidsai/cudf/pull/9490)) [@bdice](https://github.com/bdice)
+- Make sure all dask-cudf supported aggs are handled in `_tree_node_agg` ([#9487](https://github.com/rapidsai/cudf/pull/9487)) [@charlesbluca](https://github.com/charlesbluca)
+- Resolve `hash_columns` `FutureWarning` in `dask_cudf` ([#9481](https://github.com/rapidsai/cudf/pull/9481)) [@pentschev](https://github.com/pentschev)
+- Add fixed point to AllTypes in libcudf unit tests ([#9472](https://github.com/rapidsai/cudf/pull/9472)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix regex handling of embedded null characters ([#9470](https://github.com/rapidsai/cudf/pull/9470)) [@davidwendt](https://github.com/davidwendt)
+- Fix memcheck error in copy-if-else ([#9467](https://github.com/rapidsai/cudf/pull/9467)) [@davidwendt](https://github.com/davidwendt)
+- Fix bug in dask_cudf.read_parquet for index=False ([#9453](https://github.com/rapidsai/cudf/pull/9453)) [@rjzamora](https://github.com/rjzamora)
+- Preserve the decimal scale when creating a default scalar ([#9449](https://github.com/rapidsai/cudf/pull/9449)) [@revans2](https://github.com/revans2)
+- Push down parent nulls when flattening nested columns. ([#9443](https://github.com/rapidsai/cudf/pull/9443)) [@mythrocks](https://github.com/mythrocks)
+- Fix memcheck error in gtest SegmentedGatherTest/GatherSliced ([#9442](https://github.com/rapidsai/cudf/pull/9442)) [@davidwendt](https://github.com/davidwendt)
+- Revert &quot;Fix quantile division / partition handling for dask-cudf sort… ([#9438](https://github.com/rapidsai/cudf/pull/9438)) [@charlesbluca](https://github.com/charlesbluca)
+- Allow int-like objects for the `decimals` argument in `round` ([#9428](https://github.com/rapidsai/cudf/pull/9428)) [@shwina](https://github.com/shwina)
+- Fix stream compaction&#39;s `drop_duplicates` API to use stable sort ([#9417](https://github.com/rapidsai/cudf/pull/9417)) [@ttnghia](https://github.com/ttnghia)
+- Skip Comparing Uniform Window Results in Var/std Tests ([#9416](https://github.com/rapidsai/cudf/pull/9416)) [@isVoid](https://github.com/isVoid)
+- Fix `StructColumn.to_pandas` type handling issues ([#9388](https://github.com/rapidsai/cudf/pull/9388)) [@galipremsagar](https://github.com/galipremsagar)
+- Correct issues in the build dir cudf-config.cmake ([#9386](https://github.com/rapidsai/cudf/pull/9386)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix Java table partition test to account for non-deterministic ordering ([#9385](https://github.com/rapidsai/cudf/pull/9385)) [@jlowe](https://github.com/jlowe)
+- Fix timestamp truncation/overflow bugs in orc/parquet ([#9382](https://github.com/rapidsai/cudf/pull/9382)) [@PointKernel](https://github.com/PointKernel)
+- Fix the crash in stats code ([#9368](https://github.com/rapidsai/cudf/pull/9368)) [@devavret](https://github.com/devavret)
+- Make Series.hash_encode results reproducible. ([#9366](https://github.com/rapidsai/cudf/pull/9366)) [@bdice](https://github.com/bdice)
+- Fix libcudf compile warnings on debug 11.4 build ([#9360](https://github.com/rapidsai/cudf/pull/9360)) [@davidwendt](https://github.com/davidwendt)
+- Fail gracefully when compiling python UDFs that attempt to access columns with unsupported dtypes ([#9359](https://github.com/rapidsai/cudf/pull/9359)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Set pass_filenames: false in mypy pre-commit configuration. ([#9349](https://github.com/rapidsai/cudf/pull/9349)) [@bdice](https://github.com/bdice)
+- Fix cudf_assert in cudf::io::orc::gpu::gpuDecodeOrcColumnData ([#9348](https://github.com/rapidsai/cudf/pull/9348)) [@davidwendt](https://github.com/davidwendt)
+- Fix memcheck error in groupby-tdigest get_scalar_minmax ([#9339](https://github.com/rapidsai/cudf/pull/9339)) [@davidwendt](https://github.com/davidwendt)
+- Optimizations for `cudf.concat` when `axis=1` ([#9333](https://github.com/rapidsai/cudf/pull/9333)) [@galipremsagar](https://github.com/galipremsagar)
+- Use f-string in join helper warning message. ([#9325](https://github.com/rapidsai/cudf/pull/9325)) [@bdice](https://github.com/bdice)
+- Avoid casting to list or struct dtypes in dask_cudf.read_parquet ([#9314](https://github.com/rapidsai/cudf/pull/9314)) [@rjzamora](https://github.com/rjzamora)
+- Fix null count in statistics for parquet ([#9303](https://github.com/rapidsai/cudf/pull/9303)) [@devavret](https://github.com/devavret)
+- Potential overflow of `decimal32` when casting to `int64_t` ([#9287](https://github.com/rapidsai/cudf/pull/9287)) [@codereport](https://github.com/codereport)
+- Fix quantile division / partition handling for dask-cudf sort on null dataframes ([#9259](https://github.com/rapidsai/cudf/pull/9259)) [@charlesbluca](https://github.com/charlesbluca)
+- Updating cudf version also updates rapids cmake branch ([#9249](https://github.com/rapidsai/cudf/pull/9249)) [@robertmaynard](https://github.com/robertmaynard)
+- Implement `one_hot_encoding` in libcudf and bind to python ([#9229](https://github.com/rapidsai/cudf/pull/9229)) [@isVoid](https://github.com/isVoid)
+- BUG FIX: CSV Writer ignores the header parameter when no metadata is provided ([#8740](https://github.com/rapidsai/cudf/pull/8740)) [@skirui-source](https://github.com/skirui-source)
+
+## 📖 Documentation
+
+- Update Documentation to use `TYPED_TEST_SUITE` ([#9654](https://github.com/rapidsai/cudf/pull/9654)) [@codereport](https://github.com/codereport)
+- Add dedicated page for `StringHandling` in python docs ([#9624](https://github.com/rapidsai/cudf/pull/9624)) [@galipremsagar](https://github.com/galipremsagar)
+- Update docstring of `DataFrame.merge` ([#9572](https://github.com/rapidsai/cudf/pull/9572)) [@galipremsagar](https://github.com/galipremsagar)
+- Use raw strings to avoid SyntaxErrors in parsed docstrings. ([#9526](https://github.com/rapidsai/cudf/pull/9526)) [@bdice](https://github.com/bdice)
+- Add example to docstrings in `rolling.apply` ([#9522](https://github.com/rapidsai/cudf/pull/9522)) [@isVoid](https://github.com/isVoid)
+- Update help message to escape quotes in ./build.sh --cmake-args. ([#9494](https://github.com/rapidsai/cudf/pull/9494)) [@bdice](https://github.com/bdice)
+- Improve Python docstring formatting. ([#9493](https://github.com/rapidsai/cudf/pull/9493)) [@bdice](https://github.com/bdice)
+- Update table of I/O supported types ([#9476](https://github.com/rapidsai/cudf/pull/9476)) [@vuule](https://github.com/vuule)
+- Document invalid regex patterns as undefined behavior ([#9473](https://github.com/rapidsai/cudf/pull/9473)) [@davidwendt](https://github.com/davidwendt)
+- Miscellaneous documentation fixes to `cudf` ([#9471](https://github.com/rapidsai/cudf/pull/9471)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix many documentation errors in libcudf. ([#9355](https://github.com/rapidsai/cudf/pull/9355)) [@karthikeyann](https://github.com/karthikeyann)
+- Fixing SubwordTokenizer docs issue ([#9354](https://github.com/rapidsai/cudf/pull/9354)) [@mayankanand007](https://github.com/mayankanand007)
+- Improved deprecation warnings. ([#9347](https://github.com/rapidsai/cudf/pull/9347)) [@bdice](https://github.com/bdice)
+- doc reorder mr, stream to stream, mr ([#9308](https://github.com/rapidsai/cudf/pull/9308)) [@karthikeyann](https://github.com/karthikeyann)
+- Deprecate method parameters to DataFrame.join, DataFrame.merge. ([#9291](https://github.com/rapidsai/cudf/pull/9291)) [@bdice](https://github.com/bdice)
+- Added deprecation warning for `.label_encoding()` ([#9289](https://github.com/rapidsai/cudf/pull/9289)) [@mayankanand007](https://github.com/mayankanand007)
+
+## 🚀 New Features
+
+- Enable Series.divide and DataFrame.divide ([#9630](https://github.com/rapidsai/cudf/pull/9630)) [@vyasr](https://github.com/vyasr)
+- Update `bitmask_and` and `bitmask_or` to return a pair of resulting mask and count of unset bits ([#9616](https://github.com/rapidsai/cudf/pull/9616)) [@PointKernel](https://github.com/PointKernel)
+- Add handling of mixed numeric types in `to_dlpack` ([#9585](https://github.com/rapidsai/cudf/pull/9585)) [@galipremsagar](https://github.com/galipremsagar)
+- Support re.Pattern object for pat arg in str.replace ([#9573](https://github.com/rapidsai/cudf/pull/9573)) [@davidwendt](https://github.com/davidwendt)
+- Add JNI for `lists::drop_list_duplicates` with keys-values input column ([#9553](https://github.com/rapidsai/cudf/pull/9553)) [@ttnghia](https://github.com/ttnghia)
+- Support structs column in `min`, `max`, `argmin` and `argmax` groupby aggregate() and scan() ([#9545](https://github.com/rapidsai/cudf/pull/9545)) [@ttnghia](https://github.com/ttnghia)
+- Move libcudacxx to use `rapids_cpm` and use newer versions ([#9539](https://github.com/rapidsai/cudf/pull/9539)) [@robertmaynard](https://github.com/robertmaynard)
+- Add scan min/max support for chrono types to libcudf reduction-scan (not groupby scan) ([#9518](https://github.com/rapidsai/cudf/pull/9518)) [@davidwendt](https://github.com/davidwendt)
+- Support `args=` in `apply` ([#9514](https://github.com/rapidsai/cudf/pull/9514)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add groupby scan min/max support for strings values ([#9502](https://github.com/rapidsai/cudf/pull/9502)) [@davidwendt](https://github.com/davidwendt)
+- Add list output option to character_ngrams() function ([#9499](https://github.com/rapidsai/cudf/pull/9499)) [@davidwendt](https://github.com/davidwendt)
+- More granular column selection in ORC reader ([#9496](https://github.com/rapidsai/cudf/pull/9496)) [@vuule](https://github.com/vuule)
+- add min_periods, ddof to groupby covariance, &amp; correlation aggregation ([#9492](https://github.com/rapidsai/cudf/pull/9492)) [@karthikeyann](https://github.com/karthikeyann)
+- Implement Series.datetime.floor ([#9488](https://github.com/rapidsai/cudf/pull/9488)) [@skirui-source](https://github.com/skirui-source)
+- Enable linting of CMake files using pre-commit ([#9484](https://github.com/rapidsai/cudf/pull/9484)) [@vyasr](https://github.com/vyasr)
+- Add support for single-line regex anchors ^/$ in contains_re ([#9482](https://github.com/rapidsai/cudf/pull/9482)) [@davidwendt](https://github.com/davidwendt)
+- Augment `order_by` to Accept a List of `null_precedence` ([#9455](https://github.com/rapidsai/cudf/pull/9455)) [@isVoid](https://github.com/isVoid)
+- Add format API for list column of strings ([#9454](https://github.com/rapidsai/cudf/pull/9454)) [@davidwendt](https://github.com/davidwendt)
+- Enable Datetime/Timedelta dtypes in Masked UDFs ([#9451](https://github.com/rapidsai/cudf/pull/9451)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add cudf python groupby.diff ([#9446](https://github.com/rapidsai/cudf/pull/9446)) [@karthikeyann](https://github.com/karthikeyann)
+- Implement `lists::stable_sort_lists` for stable sorting of elements within each row of lists column ([#9425](https://github.com/rapidsai/cudf/pull/9425)) [@ttnghia](https://github.com/ttnghia)
+- add ctest memcheck using cuda-sanitizer ([#9414](https://github.com/rapidsai/cudf/pull/9414)) [@karthikeyann](https://github.com/karthikeyann)
+- Support Unary Operations in Masked UDF ([#9409](https://github.com/rapidsai/cudf/pull/9409)) [@isVoid](https://github.com/isVoid)
+- Move Several Series Function to Frame ([#9394](https://github.com/rapidsai/cudf/pull/9394)) [@isVoid](https://github.com/isVoid)
+- MD5 Python hash API ([#9390](https://github.com/rapidsai/cudf/pull/9390)) [@bdice](https://github.com/bdice)
+- Add cudf strings is_title API ([#9380](https://github.com/rapidsai/cudf/pull/9380)) [@davidwendt](https://github.com/davidwendt)
+- Enable casting to int64, uint64, and double in AST code. ([#9379](https://github.com/rapidsai/cudf/pull/9379)) [@vyasr](https://github.com/vyasr)
+- Add support for writing ORC with map columns ([#9369](https://github.com/rapidsai/cudf/pull/9369)) [@vuule](https://github.com/vuule)
+- extract_list_elements() with column_view indices ([#9367](https://github.com/rapidsai/cudf/pull/9367)) [@mythrocks](https://github.com/mythrocks)
+- Reimplement `lists::drop_list_duplicates` for keys-values lists columns ([#9345](https://github.com/rapidsai/cudf/pull/9345)) [@ttnghia](https://github.com/ttnghia)
+- Support Python UDFs written in terms of rows ([#9343](https://github.com/rapidsai/cudf/pull/9343)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- JNI: Support nested types in ORC writer ([#9334](https://github.com/rapidsai/cudf/pull/9334)) [@firestarman](https://github.com/firestarman)
+- Optionally nullify out-of-bounds indices in segmented_gather(). ([#9318](https://github.com/rapidsai/cudf/pull/9318)) [@mythrocks](https://github.com/mythrocks)
+- Add shallow hash function and shallow equality comparison for column_view ([#9312](https://github.com/rapidsai/cudf/pull/9312)) [@karthikeyann](https://github.com/karthikeyann)
+- Add CudaMemoryBuffer for cudaMalloc memory using RMM cuda_memory_resource ([#9311](https://github.com/rapidsai/cudf/pull/9311)) [@rongou](https://github.com/rongou)
+- Add parameters to control row index stride and stripe size in ORC writer ([#9310](https://github.com/rapidsai/cudf/pull/9310)) [@vuule](https://github.com/vuule)
+- Add `na_position` param to dask-cudf `sort_values` ([#9264](https://github.com/rapidsai/cudf/pull/9264)) [@charlesbluca](https://github.com/charlesbluca)
+- Add `ascending` parameter for dask-cudf `sort_values` ([#9250](https://github.com/rapidsai/cudf/pull/9250)) [@charlesbluca](https://github.com/charlesbluca)
+- New array conversion methods ([#9236](https://github.com/rapidsai/cudf/pull/9236)) [@vyasr](https://github.com/vyasr)
+- Series `apply` method backed by masked UDFs ([#9217](https://github.com/rapidsai/cudf/pull/9217)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Grouping by frequency and resampling ([#9178](https://github.com/rapidsai/cudf/pull/9178)) [@shwina](https://github.com/shwina)
+- Pure-python masked UDFs ([#9174](https://github.com/rapidsai/cudf/pull/9174)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add Covariance, Pearson correlation for sort groupby (libcudf) ([#9154](https://github.com/rapidsai/cudf/pull/9154)) [@karthikeyann](https://github.com/karthikeyann)
+- Add `calendrical_month_sequence` in c++ and `date_range` in python ([#8886](https://github.com/rapidsai/cudf/pull/8886)) [@shwina](https://github.com/shwina)
+
+## 🛠️ Improvements
+
+- Followup to PR 9088 comments ([#9659](https://github.com/rapidsai/cudf/pull/9659)) [@cwharris](https://github.com/cwharris)
+- Update cuCollections to version that supports installed libcudacxx ([#9633](https://github.com/rapidsai/cudf/pull/9633)) [@robertmaynard](https://github.com/robertmaynard)
+- Add `11.5` dev.yml to `cudf` ([#9617](https://github.com/rapidsai/cudf/pull/9617)) [@galipremsagar](https://github.com/galipremsagar)
+- Add `xfail` for parquet reader `11.5` issue ([#9612](https://github.com/rapidsai/cudf/pull/9612)) [@galipremsagar](https://github.com/galipremsagar)
+- remove deprecated Rmm.initialize method ([#9607](https://github.com/rapidsai/cudf/pull/9607)) [@rongou](https://github.com/rongou)
+- Use HostColumnVectorCore for child columns in JCudfSerialization.unpackHostColumnVectors ([#9596](https://github.com/rapidsai/cudf/pull/9596)) [@sperlingxx](https://github.com/sperlingxx)
+- Set RMM pool to a fixed size in JNI ([#9583](https://github.com/rapidsai/cudf/pull/9583)) [@rongou](https://github.com/rongou)
+- Use nvCOMP for Snappy compression/decompression ([#9582](https://github.com/rapidsai/cudf/pull/9582)) [@vuule](https://github.com/vuule)
+- Build CUDA version agnostic packages for dask-cudf ([#9578](https://github.com/rapidsai/cudf/pull/9578)) [@Ethyling](https://github.com/Ethyling)
+- Fixed tests warning: &quot;TYPED_TEST_CASE is deprecated, please use TYPED_TEST_SUITE&quot; ([#9574](https://github.com/rapidsai/cudf/pull/9574)) [@ttnghia](https://github.com/ttnghia)
+- Enable CMake format in CI and fix style ([#9570](https://github.com/rapidsai/cudf/pull/9570)) [@vyasr](https://github.com/vyasr)
+- Add NVTX Start/End Ranges to JNI ([#9563](https://github.com/rapidsai/cudf/pull/9563)) [@abellina](https://github.com/abellina)
+- Add librdkafka and python-confluent-kafka to dev conda environments s… ([#9562](https://github.com/rapidsai/cudf/pull/9562)) [@jdye64](https://github.com/jdye64)
+- Add offsets_begin/end() to strings_column_view ([#9559](https://github.com/rapidsai/cudf/pull/9559)) [@davidwendt](https://github.com/davidwendt)
+- remove alignment options for RMM jni ([#9550](https://github.com/rapidsai/cudf/pull/9550)) [@rongou](https://github.com/rongou)
+- Add axis parameter passthrough to `DataFrame` and `Series` take for pandas API compatibility ([#9549](https://github.com/rapidsai/cudf/pull/9549)) [@dantegd](https://github.com/dantegd)
+- Remove sizeof and standardize on memory_usage ([#9544](https://github.com/rapidsai/cudf/pull/9544)) [@vyasr](https://github.com/vyasr)
+- Adds cudaProfilerStart/cudaProfilerStop in JNI api ([#9543](https://github.com/rapidsai/cudf/pull/9543)) [@abellina](https://github.com/abellina)
+- Generalize comparison binary operations ([#9542](https://github.com/rapidsai/cudf/pull/9542)) [@vyasr](https://github.com/vyasr)
+- Expose APIs to wrap CUDA or RMM allocations with a Java device buffer instance ([#9538](https://github.com/rapidsai/cudf/pull/9538)) [@jlowe](https://github.com/jlowe)
+- Add scan sum support for duration types to libcudf ([#9536](https://github.com/rapidsai/cudf/pull/9536)) [@davidwendt](https://github.com/davidwendt)
+- Force inlining to improve AST performance ([#9530](https://github.com/rapidsai/cudf/pull/9530)) [@vyasr](https://github.com/vyasr)
+- Generalize some more indexed frame methods ([#9529](https://github.com/rapidsai/cudf/pull/9529)) [@vyasr](https://github.com/vyasr)
+- Add Java bindings for rolling window stddev aggregation ([#9527](https://github.com/rapidsai/cudf/pull/9527)) [@razajafri](https://github.com/razajafri)
+- catch rmm::out_of_memory exceptions in jni ([#9525](https://github.com/rapidsai/cudf/pull/9525)) [@rongou](https://github.com/rongou)
+- Add an overload of `make_empty_column` with `type_id` parameter ([#9524](https://github.com/rapidsai/cudf/pull/9524)) [@ttnghia](https://github.com/ttnghia)
+- Accelerate conditional inner joins with larger right tables ([#9523](https://github.com/rapidsai/cudf/pull/9523)) [@vyasr](https://github.com/vyasr)
+- Initial pass of generalizing `decimal` support in `cudf` python layer ([#9517](https://github.com/rapidsai/cudf/pull/9517)) [@galipremsagar](https://github.com/galipremsagar)
+- Cleanup for flattening nested columns ([#9509](https://github.com/rapidsai/cudf/pull/9509)) [@rwlee](https://github.com/rwlee)
+- Enable running tests using RMM arena and async memory resources ([#9506](https://github.com/rapidsai/cudf/pull/9506)) [@rongou](https://github.com/rongou)
+- Remove dependency on six. ([#9495](https://github.com/rapidsai/cudf/pull/9495)) [@bdice](https://github.com/bdice)
+- Cleanup some libcudf strings gtests ([#9489](https://github.com/rapidsai/cudf/pull/9489)) [@davidwendt](https://github.com/davidwendt)
+- Rename strings/array_tests.cu to strings/array_tests.cpp ([#9480](https://github.com/rapidsai/cudf/pull/9480)) [@davidwendt](https://github.com/davidwendt)
+- Refactor sorting APIs ([#9464](https://github.com/rapidsai/cudf/pull/9464)) [@vyasr](https://github.com/vyasr)
+- Implement DataFrame.hash_values, deprecate DataFrame.hash_columns. ([#9458](https://github.com/rapidsai/cudf/pull/9458)) [@bdice](https://github.com/bdice)
+- Deprecate Series.hash_encode. ([#9457](https://github.com/rapidsai/cudf/pull/9457)) [@bdice](https://github.com/bdice)
+- Update `conda` recipes for Enhanced Compatibility effort ([#9456](https://github.com/rapidsai/cudf/pull/9456)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Small clean up to simplify column selection code in ORC reader ([#9444](https://github.com/rapidsai/cudf/pull/9444)) [@vuule](https://github.com/vuule)
+- add missing stream to scalar.is_valid() wherever stream is available ([#9436](https://github.com/rapidsai/cudf/pull/9436)) [@karthikeyann](https://github.com/karthikeyann)
+- Adds Deprecation Warnings to `one_hot_encoding` and Implement `get_dummies` with Cython API ([#9435](https://github.com/rapidsai/cudf/pull/9435)) [@isVoid](https://github.com/isVoid)
+- Update pre-commit hook URLs. ([#9433](https://github.com/rapidsai/cudf/pull/9433)) [@bdice](https://github.com/bdice)
+- Remove pyarrow import in `dask_cudf.io.parquet` ([#9429](https://github.com/rapidsai/cudf/pull/9429)) [@charlesbluca](https://github.com/charlesbluca)
+- Miscellaneous improvements for UDFs ([#9422](https://github.com/rapidsai/cudf/pull/9422)) [@isVoid](https://github.com/isVoid)
+- Use pre-commit for CI ([#9412](https://github.com/rapidsai/cudf/pull/9412)) [@vyasr](https://github.com/vyasr)
+- Update to UCX-Py 0.23 ([#9407](https://github.com/rapidsai/cudf/pull/9407)) [@pentschev](https://github.com/pentschev)
+- Expose OutOfBoundsPolicy in JNI for Table.gather ([#9406](https://github.com/rapidsai/cudf/pull/9406)) [@abellina](https://github.com/abellina)
+- Improvements to tdigest aggregation code. ([#9403](https://github.com/rapidsai/cudf/pull/9403)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Add Java API to deserialize a table to host columns ([#9402](https://github.com/rapidsai/cudf/pull/9402)) [@jlowe](https://github.com/jlowe)
+- Frame copy to use __class__ instead of type() ([#9397](https://github.com/rapidsai/cudf/pull/9397)) [@madsbk](https://github.com/madsbk)
+- Change all DeprecationWarnings to FutureWarning. ([#9392](https://github.com/rapidsai/cudf/pull/9392)) [@bdice](https://github.com/bdice)
+- Update Java nvcomp JNI bindings to nvcomp 2.x API ([#9384](https://github.com/rapidsai/cudf/pull/9384)) [@jbrennan333](https://github.com/jbrennan333)
+- Add IndexedFrame class and move SingleColumnFrame to a separate module ([#9378](https://github.com/rapidsai/cudf/pull/9378)) [@vyasr](https://github.com/vyasr)
+- Support Arrow NativeFile and PythonFile for remote ORC storage ([#9377](https://github.com/rapidsai/cudf/pull/9377)) [@rjzamora](https://github.com/rjzamora)
+- Use Arrow PythonFile for remote CSV storage ([#9376](https://github.com/rapidsai/cudf/pull/9376)) [@rjzamora](https://github.com/rjzamora)
+- Add multi-threaded writing to GDS writes ([#9372](https://github.com/rapidsai/cudf/pull/9372)) [@devavret](https://github.com/devavret)
+- Miscellaneous column cleanup ([#9370](https://github.com/rapidsai/cudf/pull/9370)) [@vyasr](https://github.com/vyasr)
+- Use single kernel to extract all groups in cudf::strings::extract ([#9358](https://github.com/rapidsai/cudf/pull/9358)) [@davidwendt](https://github.com/davidwendt)
+- Consolidate binary ops into `Frame` ([#9357](https://github.com/rapidsai/cudf/pull/9357)) [@isVoid](https://github.com/isVoid)
+- Move rank scan implementations from scan_inclusive.cu to rank_scan.cu ([#9351](https://github.com/rapidsai/cudf/pull/9351)) [@davidwendt](https://github.com/davidwendt)
+- Remove usage of deprecated thrust::host_space_tag. ([#9350](https://github.com/rapidsai/cudf/pull/9350)) [@bdice](https://github.com/bdice)
+- Use Default Memory Resource for Temporaries in `reduction.cpp` ([#9344](https://github.com/rapidsai/cudf/pull/9344)) [@isVoid](https://github.com/isVoid)
+- Fix Cython compilation warnings. ([#9327](https://github.com/rapidsai/cudf/pull/9327)) [@bdice](https://github.com/bdice)
+- Fix some unused variable warnings in libcudf ([#9326](https://github.com/rapidsai/cudf/pull/9326)) [@davidwendt](https://github.com/davidwendt)
+- Use optional-iterator for copy-if-else kernel ([#9324](https://github.com/rapidsai/cudf/pull/9324)) [@davidwendt](https://github.com/davidwendt)
+- Remove Table class ([#9315](https://github.com/rapidsai/cudf/pull/9315)) [@vyasr](https://github.com/vyasr)
+- Unpin `dask` and `distributed` in CI ([#9307](https://github.com/rapidsai/cudf/pull/9307)) [@galipremsagar](https://github.com/galipremsagar)
+- Add optional-iterator support to indexalator ([#9306](https://github.com/rapidsai/cudf/pull/9306)) [@davidwendt](https://github.com/davidwendt)
+- Consolidate more methods in Frame ([#9305](https://github.com/rapidsai/cudf/pull/9305)) [@vyasr](https://github.com/vyasr)
+- Add Arrow-NativeFile and PythonFile support to read_parquet and read_csv in cudf ([#9304](https://github.com/rapidsai/cudf/pull/9304)) [@rjzamora](https://github.com/rjzamora)
+- Pin mypy in .pre-commit-config.yaml to match conda environment pinning. ([#9300](https://github.com/rapidsai/cudf/pull/9300)) [@bdice](https://github.com/bdice)
+- Use gather.hpp when gather-map exists in device memory ([#9299](https://github.com/rapidsai/cudf/pull/9299)) [@davidwendt](https://github.com/davidwendt)
+- Fix Automerger for `Branch-21.12` from `branch-21.10` ([#9285](https://github.com/rapidsai/cudf/pull/9285)) [@galipremsagar](https://github.com/galipremsagar)
+- Refactor cuIO timestamp processing with `cuda::std::chrono` ([#9278](https://github.com/rapidsai/cudf/pull/9278)) [@PointKernel](https://github.com/PointKernel)
+- Change strings copy_if_else to use optional-iterator instead of pair-iterator ([#9266](https://github.com/rapidsai/cudf/pull/9266)) [@davidwendt](https://github.com/davidwendt)
+- Update cudf java bindings to 21.12.0-SNAPSHOT ([#9248](https://github.com/rapidsai/cudf/pull/9248)) [@pxLi](https://github.com/pxLi)
+- Various internal MultiIndex improvements ([#9243](https://github.com/rapidsai/cudf/pull/9243)) [@vyasr](https://github.com/vyasr)
+- Add detail interface for `split` and `slice(table_view)`, refactors both function with `host_span` ([#9226](https://github.com/rapidsai/cudf/pull/9226)) [@isVoid](https://github.com/isVoid)
+- Refactor MD5 implementation. ([#9212](https://github.com/rapidsai/cudf/pull/9212)) [@bdice](https://github.com/bdice)
+- Update groupby result_cache to allow sharing intermediate results based on column_view instead of requests. ([#9195](https://github.com/rapidsai/cudf/pull/9195)) [@karthikeyann](https://github.com/karthikeyann)
+- Use nvcomp&#39;s snappy decompressor in avro reader ([#9181](https://github.com/rapidsai/cudf/pull/9181)) [@devavret](https://github.com/devavret)
+- Add `isocalendar` API support ([#9169](https://github.com/rapidsai/cudf/pull/9169)) [@marlenezw](https://github.com/marlenezw)
+- Simplify read_json by removing unnecessary reader/impl classes ([#9088](https://github.com/rapidsai/cudf/pull/9088)) [@cwharris](https://github.com/cwharris)
+- Simplify read_csv by removing unnecessary reader/impl classes ([#9041](https://github.com/rapidsai/cudf/pull/9041)) [@cwharris](https://github.com/cwharris)
+- Refactor hash join with cuCollections multimap ([#8934](https://github.com/rapidsai/cudf/pull/8934)) [@PointKernel](https://github.com/PointKernel)
 
 # cuDF 21.10.00 (7 Oct 2021)
 

From ce02856c099694ad463dbf7970dfc69276842557 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 17 Dec 2021 15:44:56 -0800
Subject: [PATCH 02/12] Add decimal types to cuIO benchmarks (#9776)

Closes https://github.com/rapidsai/cudf/issues/9769
Depends on https://github.com/rapidsai/cudf/pull/9775

Benchmarks now include decimal32/64/128 columns for all supported formats.
Also fixes an issue in distribution factory, which caused all normal distributions to generate `upper_bound` in many cases.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Devavret Makkar (https://github.com/devavret)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/9776
---
 .../common/generate_benchmark_input.cpp       | 25 +++++++++++++++--
 .../common/generate_benchmark_input.hpp       | 13 +++++++--
 .../common/random_distribution_factory.hpp    | 27 +++++++++++--------
 .../io/csv/csv_reader_benchmark.cpp           |  2 ++
 .../io/csv/csv_writer_benchmark.cpp           |  2 ++
 .../io/orc/orc_reader_benchmark.cpp           |  5 +++-
 .../io/orc/orc_writer_benchmark.cpp           |  5 +++-
 .../io/parquet/parquet_reader_benchmark.cpp   |  5 +++-
 .../io/parquet/parquet_writer_benchmark.cpp   |  5 +++-
 9 files changed, 70 insertions(+), 19 deletions(-)

diff --git a/cpp/benchmarks/common/generate_benchmark_input.cpp b/cpp/benchmarks/common/generate_benchmark_input.cpp
index 0ec2590bdb5..995cea13c27 100644
--- a/cpp/benchmarks/common/generate_benchmark_input.cpp
+++ b/cpp/benchmarks/common/generate_benchmark_input.cpp
@@ -161,8 +161,29 @@ struct random_value_fn<T, typename std::enable_if_t<cudf::is_chrono<T>()>> {
  */
 template <typename T>
 struct random_value_fn<T, typename std::enable_if_t<cudf::is_fixed_point<T>()>> {
-  random_value_fn(distribution_params<T> const&) {}
-  T operator()(std::mt19937& engine) { CUDF_FAIL("Not implemented"); }
+  using rep = typename T::rep;
+  rep const lower_bound;
+  rep const upper_bound;
+  distribution_fn<rep> dist;
+  std::optional<numeric::scale_type> scale;
+
+  random_value_fn(distribution_params<rep> const& desc)
+    : lower_bound{desc.lower_bound},
+      upper_bound{desc.upper_bound},
+      dist{make_distribution<rep>(desc.id, desc.lower_bound, desc.upper_bound)}
+  {
+  }
+
+  T operator()(std::mt19937& engine)
+  {
+    if (not scale.has_value()) {
+      int const max_scale = std::numeric_limits<rep>::digits10;
+      auto scale_dist     = make_distribution<int>(distribution_id::NORMAL, -max_scale, max_scale);
+      scale = numeric::scale_type{std::max(std::min(scale_dist(engine), max_scale), -max_scale)};
+    }
+    // Clamp the generated random value to the specified range
+    return T{std::max(std::min(dist(engine), upper_bound), lower_bound), *scale};
+  }
 };
 
 /**
diff --git a/cpp/benchmarks/common/generate_benchmark_input.hpp b/cpp/benchmarks/common/generate_benchmark_input.hpp
index 6ea57c0a7ad..3dbc6561839 100644
--- a/cpp/benchmarks/common/generate_benchmark_input.hpp
+++ b/cpp/benchmarks/common/generate_benchmark_input.hpp
@@ -216,6 +216,7 @@ class data_profile {
   distribution_params<cudf::string_view> string_dist_desc{{distribution_id::NORMAL, 0, 32}};
   distribution_params<cudf::list_view> list_dist_desc{
     cudf::type_id::INT32, {distribution_id::GEOMETRIC, 0, 100}, 2};
+  std::map<cudf::type_id, distribution_params<__uint128_t>> decimal_params;
 
   double bool_probability        = 0.5;
   double null_frequency          = 0.01;
@@ -284,9 +285,17 @@ class data_profile {
   }
 
   template <typename T, typename std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
-  distribution_params<T> get_distribution_params() const
+  distribution_params<typename T::rep> get_distribution_params() const
   {
-    CUDF_FAIL("Not implemented");
+    using rep = typename T::rep;
+    auto it   = decimal_params.find(cudf::type_to_id<T>());
+    if (it == decimal_params.end()) {
+      auto const range = default_range<rep>();
+      return distribution_params<rep>{default_distribution_id<rep>(), range.first, range.second};
+    } else {
+      auto& desc = it->second;
+      return {desc.id, static_cast<rep>(desc.lower_bound), static_cast<rep>(desc.upper_bound)};
+    }
   }
 
   auto get_bool_probability() const { return bool_probability; }
diff --git a/cpp/benchmarks/common/random_distribution_factory.hpp b/cpp/benchmarks/common/random_distribution_factory.hpp
index c21fb645573..65dc8b4dd4d 100644
--- a/cpp/benchmarks/common/random_distribution_factory.hpp
+++ b/cpp/benchmarks/common/random_distribution_factory.hpp
@@ -21,19 +21,24 @@
 #include <memory>
 #include <random>
 
+/**
+ * @brief Generates a normal(binomial) distribution between zero and upper_bound.
+ */
 template <typename T, typename std::enable_if_t<std::is_integral<T>::value, T>* = nullptr>
-auto make_normal_dist(T range_start, T range_end)
+auto make_normal_dist(T upper_bound)
 {
-  using uT            = typename std::make_unsigned<T>::type;
-  uT const range_size = range_end - range_start;
-  return std::binomial_distribution<uT>(range_size, 0.5);
+  using uT = typename std::make_unsigned<T>::type;
+  return std::binomial_distribution<uT>(upper_bound, 0.5);
 }
 
+/**
+ * @brief Generates a normal distribution between zero and upper_bound.
+ */
 template <typename T, std::enable_if_t<cudf::is_floating_point<T>()>* = nullptr>
-auto make_normal_dist(T range_start, T range_end)
+auto make_normal_dist(T upper_bound)
 {
-  T const mean   = range_start / 2 + range_end / 2;
-  T const stddev = range_end / 6 - range_start / 6;
+  T const mean   = upper_bound / 2;
+  T const stddev = upper_bound / 6;
   return std::normal_distribution<T>(mean, stddev);
 }
 
@@ -82,8 +87,8 @@ distribution_fn<T> make_distribution(distribution_id did, T lower_bound, T upper
 {
   switch (did) {
     case distribution_id::NORMAL:
-      return [lower_bound, dist = make_normal_dist(lower_bound, upper_bound)](
-               std::mt19937& engine) mutable -> T { return dist(engine) - lower_bound; };
+      return [lower_bound, dist = make_normal_dist(upper_bound - lower_bound)](
+               std::mt19937& engine) mutable -> T { return dist(engine) + lower_bound; };
     case distribution_id::UNIFORM:
       return [dist = make_uniform_dist(lower_bound, upper_bound)](
                std::mt19937& engine) mutable -> T { return dist(engine); };
@@ -104,8 +109,8 @@ distribution_fn<T> make_distribution(distribution_id dist_id, T lower_bound, T u
 {
   switch (dist_id) {
     case distribution_id::NORMAL:
-      return [dist = make_normal_dist(lower_bound, upper_bound)](
-               std::mt19937& engine) mutable -> T { return dist(engine); };
+      return [lower_bound, dist = make_normal_dist(upper_bound - lower_bound)](
+               std::mt19937& engine) mutable -> T { return dist(engine) + lower_bound; };
     case distribution_id::UNIFORM:
       return [dist = make_uniform_dist(lower_bound, upper_bound)](
                std::mt19937& engine) mutable -> T { return dist(engine); };
diff --git a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp
index 3f5549a3148..77bf4b03a14 100644
--- a/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_benchmark.cpp
@@ -70,6 +70,7 @@ void BM_csv_read_varying_options(benchmark::State& state)
   auto const data_types =
     dtypes_for_column_selection(get_type_or_group({int32_t(type_group_id::INTEGRAL),
                                                    int32_t(type_group_id::FLOATING_POINT),
+                                                   int32_t(type_group_id::FIXED_POINT),
                                                    int32_t(type_group_id::TIMESTAMP),
                                                    int32_t(cudf::type_id::STRING)}),
                                 col_sel);
@@ -143,6 +144,7 @@ void BM_csv_read_varying_options(benchmark::State& state)
 
 RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, integral, type_group_id::INTEGRAL);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, floats, type_group_id::FLOATING_POINT);
+RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, decimal, type_group_id::FIXED_POINT);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(CSV_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING);
 
diff --git a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp
index fdd7c63eece..9baab6b2571 100644
--- a/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp
+++ b/cpp/benchmarks/io/csv/csv_writer_benchmark.cpp
@@ -63,6 +63,7 @@ void BM_csv_write_varying_options(benchmark::State& state)
 
   auto const data_types = get_type_or_group({int32_t(type_group_id::INTEGRAL),
                                              int32_t(type_group_id::FLOATING_POINT),
+                                             int32_t(type_group_id::FIXED_POINT),
                                              int32_t(type_group_id::TIMESTAMP),
                                              int32_t(cudf::type_id::STRING)});
 
@@ -96,6 +97,7 @@ void BM_csv_write_varying_options(benchmark::State& state)
 
 WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, integral, type_group_id::INTEGRAL);
 WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, floats, type_group_id::FLOATING_POINT);
+WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, decimal, type_group_id::FIXED_POINT);
 WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 WR_BENCHMARK_DEFINE_ALL_SINKS(CSV_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING);
 
diff --git a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp
index f0624e40149..6ab8d8d09c0 100644
--- a/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_benchmark.cpp
@@ -91,8 +91,10 @@ void BM_orc_read_varying_options(benchmark::State& state)
   auto const data_types =
     dtypes_for_column_selection(get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
                                                    int32_t(type_group_id::FLOATING_POINT),
+                                                   int32_t(type_group_id::FIXED_POINT),
                                                    int32_t(type_group_id::TIMESTAMP),
-                                                   int32_t(cudf::type_id::STRING)}),
+                                                   int32_t(cudf::type_id::STRING),
+                                                   int32_t(cudf::type_id::LIST)}),
                                 col_sel);
   auto const tbl  = create_random_table(data_types, data_types.size(), table_size_bytes{data_size});
   auto const view = tbl->view();
@@ -158,6 +160,7 @@ void BM_orc_read_varying_options(benchmark::State& state)
 
 RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, integral, type_group_id::INTEGRAL_SIGNED);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, floats, type_group_id::FLOATING_POINT);
+RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, decimal, type_group_id::FIXED_POINT);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(ORC_RD_BM_INPUTS_DEFINE, list, cudf::type_id::LIST);
diff --git a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp
index bfa7d4fc6d9..933b3d02e08 100644
--- a/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer_benchmark.cpp
@@ -70,8 +70,10 @@ void BM_orc_write_varying_options(benchmark::State& state)
 
   auto const data_types = get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
                                              int32_t(type_group_id::FLOATING_POINT),
+                                             int32_t(type_group_id::FIXED_POINT),
                                              int32_t(type_group_id::TIMESTAMP),
-                                             int32_t(cudf::type_id::STRING)});
+                                             int32_t(cudf::type_id::STRING),
+                                             int32_t(cudf::type_id::LIST)});
 
   auto const tbl  = create_random_table(data_types, data_types.size(), table_size_bytes{data_size});
   auto const view = tbl->view();
@@ -101,6 +103,7 @@ void BM_orc_write_varying_options(benchmark::State& state)
 
 WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, integral, type_group_id::INTEGRAL_SIGNED);
 WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, floats, type_group_id::FLOATING_POINT);
+WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, decimal, type_group_id::FIXED_POINT);
 WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING);
 WR_BENCHMARK_DEFINE_ALL_SINKS(ORC_WR_BM_INOUTS_DEFINE, list, cudf::type_id::LIST);
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp
index 045aa0e043b..a68ce2bd1a1 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_benchmark.cpp
@@ -92,8 +92,10 @@ void BM_parq_read_varying_options(benchmark::State& state)
   auto const data_types =
     dtypes_for_column_selection(get_type_or_group({int32_t(type_group_id::INTEGRAL),
                                                    int32_t(type_group_id::FLOATING_POINT),
+                                                   int32_t(type_group_id::FIXED_POINT),
                                                    int32_t(type_group_id::TIMESTAMP),
-                                                   int32_t(cudf::type_id::STRING)}),
+                                                   int32_t(cudf::type_id::STRING),
+                                                   int32_t(cudf::type_id::LIST)}),
                                 col_sel);
   auto const tbl  = create_random_table(data_types, data_types.size(), table_size_bytes{data_size});
   auto const view = tbl->view();
@@ -160,6 +162,7 @@ void BM_parq_read_varying_options(benchmark::State& state)
 
 RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, integral, type_group_id::INTEGRAL);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, floats, type_group_id::FLOATING_POINT);
+RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, decimal, type_group_id::FIXED_POINT);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, string, cudf::type_id::STRING);
 RD_BENCHMARK_DEFINE_ALL_SOURCES(PARQ_RD_BM_INPUTS_DEFINE, list, cudf::type_id::LIST);
diff --git a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp
index 5c3c53fee8e..1af7e206692 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp
@@ -71,8 +71,10 @@ void BM_parq_write_varying_options(benchmark::State& state)
 
   auto const data_types = get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
                                              int32_t(type_group_id::FLOATING_POINT),
+                                             int32_t(type_group_id::FIXED_POINT),
                                              int32_t(type_group_id::TIMESTAMP),
-                                             int32_t(cudf::type_id::STRING)});
+                                             int32_t(cudf::type_id::STRING),
+                                             int32_t(cudf::type_id::LIST)});
 
   auto const tbl  = create_random_table(data_types, data_types.size(), table_size_bytes{data_size});
   auto const view = tbl->view();
@@ -103,6 +105,7 @@ void BM_parq_write_varying_options(benchmark::State& state)
 
 WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, integral, type_group_id::INTEGRAL);
 WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, floats, type_group_id::FLOATING_POINT);
+WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, decimal, type_group_id::FIXED_POINT);
 WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, timestamps, type_group_id::TIMESTAMP);
 WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, string, cudf::type_id::STRING);
 WR_BENCHMARK_DEFINE_ALL_SINKS(PARQ_WR_BM_INOUTS_DEFINE, list, cudf::type_id::LIST);

From a4dc42d4c6b88b7f2da78d55934c01cb7479a6a1 Mon Sep 17 00:00:00 2001
From: MithunR <mythrocks@gmail.com>
Date: Mon, 20 Dec 2021 08:26:23 -0800
Subject: [PATCH 03/12] Implement `lists::index_of()` to find positions in list
 rows (#9510)

Fixes #9164.

### Prelude
`lists::contains()` (introduced in #7039) returns a `BOOL8` column, indicating whether the specified search_key(s) exist at all in each corresponding list row of an input LIST column. It does not return the actual position.

### `index_of()`
This commit introduces `lists::index_of()`, to return the INT32 positions of the specified search_key(s) in a LIST column.

The search keys may be searched for using either `FIND_FIRST` (which finds the position of the first occurrence), or `FIND_LAST` (which finds the last occurrence). Both column_view and scalar search keys are supported.

As with `lists::contains()`, nested types are not supported as search keys in `lists::index_of()`.

If the search_key cannot be found, that output row is set to `-1`. Additionally, the row `output[i]` is set to null if:
  1. The `search_key`(scalar) or `search_keys[i]`(column_view) is null.
  2. The list row `lists[i]` is null

In all other cases, `output[i]` should contain a non-negative value.

### Semantic changes for `lists::contains()`
This commit also modifies the semantics of `lists::contains()`: it will now return nulls only for the following cases:
  1. The `search_key`(scalar) or `search_keys[i]`(column_view) is null.
  2. The list row `lists[i]` is null

In all other cases, a non-null bool is returned. Specifically `lists::contains()` no longer conforms to SQL semantics of returning `NULL` for list rows that don't contain the search key, while simultaneously containing nulls. In this case, `false` is returned.

### `lists::contains_null_elements()`
A new function has been introduced to check if each list row contains null elements. The semantics are similar to `lists::contains()`, in that the column returned is BOOL8 typed:
  1. If even 1 element in a list row is null, the returned row is `true`.
  2. If no element is null, the returned row is `false`.
  3. If the list row is null, the returned row is `null`.
  4. If the list row is empty, the returned row is `false`.

The current implementation is an inefficient placeholder, to be replaced once (#9588) is available. It is included here to reconstruct the SQL semantics dropped from `lists::contains()`.

Authors:
  - MithunR (https://github.com/mythrocks)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Jason Lowe (https://github.com/jlowe)
  - Mark Harris (https://github.com/harrism)
  - Conor Hoekstra (https://github.com/codereport)

URL: https://github.com/rapidsai/cudf/pull/9510
---
 cpp/include/cudf/lists/contains.hpp           | 102 +-
 cpp/src/lists/contains.cu                     | 353 +++++--
 cpp/tests/lists/contains_tests.cpp            | 995 ++++++++++++------
 .../main/java/ai/rapids/cudf/ColumnView.java  |  84 +-
 java/src/main/native/src/ColumnViewJni.cpp    |  50 +
 .../java/ai/rapids/cudf/ColumnVectorTest.java | 165 ++-
 python/cudf/cudf/tests/test_list.py           |   4 +-
 7 files changed, 1283 insertions(+), 470 deletions(-)

diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp
index 7cd40bb2f86..d529677d505 100644
--- a/cpp/include/cudf/lists/contains.hpp
+++ b/cpp/include/cudf/lists/contains.hpp
@@ -27,7 +27,7 @@ namespace lists {
  */
 
 /**
- * @brief Create a column of bool values indicating whether the specified scalar
+ * @brief Create a column of `bool` values indicating whether the specified scalar
  * is an element of each row of a list column.
  *
  * The output column has as many elements as the input `lists` column.
@@ -51,7 +51,7 @@ std::unique_ptr<column> contains(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Create a column of bool values indicating whether the list rows of the first
+ * @brief Create a column of `bool` values indicating whether the list rows of the first
  * column contain the corresponding values in the second column
  *
  * The output column has as many elements as the input `lists` column.
@@ -74,6 +74,104 @@ std::unique_ptr<column> contains(
   cudf::column_view const& search_keys,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Create a column of `bool` values indicating whether each row in the `lists` column
+ * contains at least one null element.
+ *
+ * The output column has as many elements as the input `lists` column.
+ * Output `column[i]` is set to null the list row `lists[i]` is null.
+ * Otherwise, `column[i]` is set to a non-null boolean value, depending on whether that list
+ * contains a null element.
+ * (Empty list rows are considered *NOT* to contain a null element.)
+ *
+ * @param lists Lists column whose `n` rows are to be searched
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return std::unique_ptr<column> BOOL8 column of `n` rows with the result of the lookup
+ */
+std::unique_ptr<column> contains_nulls(
+  cudf::lists_column_view const& lists,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Option to choose whether `index_of()` returns the first or last match
+ * of a search key in a list row
+ */
+enum class duplicate_find_option : int32_t {
+  FIND_FIRST = 0,  ///< Finds first instance of a search key in a list row.
+  FIND_LAST        ///< Finds last instance of a search key in a list row.
+};
+
+/**
+ * @brief Create a column of `size_type` values indicating the position of a search key
+ * within each list row in the `lists` column
+ *
+ * The output column has as many elements as there are rows in the input `lists` column.
+ * Output `column[i]` contains a 0-based index indicating the position of the search key
+ * in each list, counting from the beginning of the list.
+ * Note:
+ *   1. If the `search_key` is null, all output rows are set to null.
+ *   2. If the row `lists[i]` is null, `output[i]` is also null.
+ *   3. If the row `lists[i]` does not contain the `search_key`, `output[i]` is set to `-1`.
+ *   4. In all other cases, `output[i]` is set to a non-negative `size_type` index.
+ *
+ * If the `find_option` is set to `FIND_FIRST`, the position of the first match for
+ * `search_key` is returned.
+ * If `find_option == FIND_LAST`, the position of the last match in the list row is
+ * returned.
+ *
+ * @param lists Lists column whose `n` rows are to be searched
+ * @param search_key The scalar key to be looked up in each list row
+ * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or
+ * last (`FIND_LAST`)
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return std::unique_ptr<column> INT32 column of `n` rows with the location of the `search_key`
+ *
+ * @throw cudf::logic_error If `search_key` type does not match the element type in `lists`
+ * @throw cudf::logic_error If `search_key` is of a nested type, or `lists` contains nested
+ * elements (LIST, STRUCT)
+ */
+std::unique_ptr<column> index_of(
+  cudf::lists_column_view const& lists,
+  cudf::scalar const& search_key,
+  duplicate_find_option find_option   = duplicate_find_option::FIND_FIRST,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief Create a column of `size_type` values indicating the position of a search key
+ * row within the corresponding list row in the `lists` column
+ *
+ * The output column has as many elements as there are rows in the input `lists` column.
+ * Output `column[i]` contains a 0-based index indicating the position of each search key
+ * row in its corresponding list row, counting from the beginning of the list.
+ * Note:
+ *   1. If `search_keys[i]` is null, `output[i]` is also null.
+ *   2. If the row `lists[i]` is null, `output[i]` is also null.
+ *   3. If the row `lists[i]` does not contain `search_key[i]`, `output[i]` is set to `-1`.
+ *   4. In all other cases, `output[i]` is set to a non-negative `size_type` index.
+ *
+ * If the `find_option` is set to `FIND_FIRST`, the position of the first match for
+ * `search_key` is returned.
+ * If `find_option == FIND_LAST`, the position of the last match in the list row is
+ * returned.
+ *
+ * @param lists Lists column whose `n` rows are to be searched
+ * @param search_keys A column of search keys to be looked up in each corresponding row of
+ * `lists`
+ * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or
+ * last (`FIND_LAST`)
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return std::unique_ptr<column> INT32 column of `n` rows with the location of the `search_key`
+ *
+ * @throw cudf::logic_error If `search_keys` does not match `lists` in its number of rows
+ * @throw cudf::logic_error If `search_keys` type does not match the element type in `lists`
+ * @throw cudf::logic_error If `lists` or `search_keys` contains nested elements (LIST, STRUCT)
+ */
+std::unique_ptr<column> index_of(
+  cudf::lists_column_view const& lists,
+  cudf::column_view const& search_keys,
+  duplicate_find_option find_option   = duplicate_find_option::FIND_FIRST,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of group
 }  // namespace lists
 }  // namespace cudf
diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu
index 3d135992dea..5d095fdd5a3 100644
--- a/cpp/src/lists/contains.cu
+++ b/cpp/src/lists/contains.cu
@@ -35,6 +35,8 @@ namespace lists {
 
 namespace {
 
+auto constexpr absent_index = size_type{-1};
+
 auto get_search_keys_device_iterable_view(cudf::column_view const& search_keys,
                                           rmm::cuda_stream_view stream)
 {
@@ -46,6 +48,59 @@ auto get_search_keys_device_iterable_view(cudf::scalar const& search_key, rmm::c
   return &search_key;
 }
 
+template <typename ElementType, duplicate_find_option find_option>
+auto __device__ find_begin(list_device_view const& list)
+{
+  if constexpr (find_option == duplicate_find_option::FIND_FIRST) {
+    return list.pair_rep_begin<ElementType>();
+  } else {
+    return thrust::make_reverse_iterator(list.pair_rep_end<ElementType>());
+  }
+}
+
+template <typename ElementType, duplicate_find_option find_option>
+auto __device__ find_end(list_device_view const& list)
+{
+  if constexpr (find_option == duplicate_find_option::FIND_FIRST) {
+    return list.pair_rep_end<ElementType>();
+  } else {
+    return thrust::make_reverse_iterator(list.pair_rep_begin<ElementType>());
+  }
+}
+
+template <duplicate_find_option find_option, typename Iterator>
+size_type __device__ distance([[maybe_unused]] Iterator begin, Iterator end, Iterator find_iter)
+{
+  if (find_iter == end) {
+    return absent_index;  // Not found.
+  }
+
+  if constexpr (find_option == duplicate_find_option::FIND_FIRST) {
+    return find_iter - begin;  // Distance of find_position from begin.
+  } else {
+    return end - find_iter - 1;  // Distance of find_position from end.
+  }
+}
+
+/**
+ * @brief __device__ functor to search for a key in a `list_device_view`.
+ */
+template <duplicate_find_option find_option>
+struct finder {
+  template <typename ElementType>
+  __device__ size_type operator()(list_device_view const& list, ElementType const& search_key) const
+  {
+    auto const list_begin = find_begin<ElementType, find_option>(list);
+    auto const list_end   = find_end<ElementType, find_option>(list);
+    auto const find_iter  = thrust::find_if(
+      thrust::seq, list_begin, list_end, [search_key] __device__(auto element_and_validity) {
+        auto [element, element_is_valid] = element_and_validity;
+        return element_is_valid && cudf::equality_compare(element, search_key);
+      });
+    return distance<find_option>(list_begin, list_end, find_iter);
+  };
+};
+
 /**
  * @brief Functor to search each list row for the specified search keys.
  */
@@ -63,13 +118,15 @@ struct lookup_functor {
     Args&&...) const
   {
     CUDF_FAIL(
-      "lists::contains() is only supported on numeric types, decimals, chrono types, and strings.");
+      "List search operations are only supported on numeric types, decimals, chrono types, and "
+      "strings.");
   }
 
-  std::pair<rmm::device_buffer, size_type> construct_null_mask(lists_column_view const& input_lists,
-                                                               column_view const& result_validity,
-                                                               rmm::cuda_stream_view stream,
-                                                               rmm::mr::device_memory_resource* mr)
+  std::pair<rmm::device_buffer, size_type> construct_null_mask(
+    lists_column_view const& input_lists,
+    column_view const& result_validity,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr) const
   {
     if (!search_keys_have_nulls && !input_lists.has_nulls() && !input_lists.child().has_nulls()) {
       return {rmm::device_buffer{0, stream, mr}, size_type{0}};
@@ -82,50 +139,31 @@ struct lookup_functor {
   template <typename ElementType, typename SearchKeyPairIter>
   void search_each_list_row(cudf::detail::lists_column_device_view const& d_lists,
                             SearchKeyPairIter search_key_pair_iter,
-                            cudf::mutable_column_device_view mutable_ret_bools,
-                            cudf::mutable_column_device_view mutable_ret_validity,
-                            rmm::cuda_stream_view stream,
-                            rmm::mr::device_memory_resource*)
+                            duplicate_find_option find_option,
+                            cudf::mutable_column_device_view ret_positions,
+                            cudf::mutable_column_device_view ret_validity,
+                            rmm::cuda_stream_view stream) const
   {
-    thrust::for_each(
+    auto output_iterator = thrust::make_zip_iterator(
+      thrust::make_tuple(ret_positions.data<size_type>(), ret_validity.data<bool>()));
+
+    thrust::tabulate(
       rmm::exec_policy(stream),
-      thrust::make_counting_iterator(0),
-      thrust::make_counting_iterator(d_lists.size()),
-      [d_lists,
-       search_key_pair_iter,
-       d_bools    = mutable_ret_bools.data<bool>(),
-       d_validity = mutable_ret_validity.data<bool>()] __device__(auto row_index) {
-        auto search_key_and_validity    = search_key_pair_iter[row_index];
-        auto const& search_key_is_valid = search_key_and_validity.second;
-
-        if (search_keys_have_nulls && !search_key_is_valid) {
-          d_bools[row_index]    = false;
-          d_validity[row_index] = false;
-          return;
-        }
+      output_iterator,
+      output_iterator + d_lists.size(),
+      [d_lists, search_key_pair_iter, absent_index = absent_index, find_option] __device__(
+        auto row_index) -> thrust::pair<size_type, bool> {
+        auto [search_key, search_key_is_valid] = search_key_pair_iter[row_index];
+
+        if (search_keys_have_nulls && !search_key_is_valid) { return {absent_index, false}; }
 
         auto list = cudf::list_device_view(d_lists, row_index);
-        if (list.is_null()) {
-          d_bools[row_index]    = false;
-          d_validity[row_index] = false;
-          return;
-        }
-
-        auto search_key = search_key_and_validity.first;
-        d_bools[row_index] =
-          thrust::find_if(thrust::seq,
-                          list.pair_rep_begin<ElementType>(),
-                          list.pair_rep_end<ElementType>(),
-                          [search_key] __device__(auto element_and_validity) {
-                            return element_and_validity.second &&
-                                   cudf::equality_compare(element_and_validity.first, search_key);
-                          }) != list.pair_rep_end<ElementType>();
-        d_validity[row_index] =
-          d_bools[row_index] ||
-          thrust::none_of(thrust::seq,
-                          thrust::make_counting_iterator(size_type{0}),
-                          thrust::make_counting_iterator(list.size()),
-                          [&list] __device__(auto const& i) { return list.is_null(i); });
+        if (list.is_null()) { return {absent_index, false}; }
+
+        auto const position = find_option == duplicate_find_option::FIND_FIRST
+                                ? finder<duplicate_find_option::FIND_FIRST>{}(list, search_key)
+                                : finder<duplicate_find_option::FIND_LAST>{}(list, search_key);
+        return {position, true};
       });
   }
 
@@ -133,74 +171,171 @@ struct lookup_functor {
   std::enable_if_t<is_supported<ElementType>::value, std::unique_ptr<column>> operator()(
     cudf::lists_column_view const& lists,
     SearchKeyType const& search_key,
+    duplicate_find_option find_option,
     rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr)
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const
   {
     using namespace cudf;
     using namespace cudf::detail;
 
     CUDF_EXPECTS(!cudf::is_nested(lists.child().type()),
-                 "Nested types not supported in lists::contains()");
+                 "Nested types not supported in list search operations.");
     CUDF_EXPECTS(lists.child().type() == search_key.type(),
                  "Type/Scale of search key does not match list column element type.");
     CUDF_EXPECTS(search_key.type().id() != type_id::EMPTY, "Type cannot be empty.");
 
     auto constexpr search_key_is_scalar = std::is_same_v<SearchKeyType, cudf::scalar>;
 
-    if (search_keys_have_nulls && search_key_is_scalar) {
-      return make_fixed_width_column(data_type(type_id::BOOL8),
-                                     lists.size(),
-                                     cudf::create_null_mask(lists.size(), mask_state::ALL_NULL, mr),
-                                     lists.size(),
-                                     stream,
-                                     mr);
+    if constexpr (search_keys_have_nulls && search_key_is_scalar) {
+      return make_numeric_column(data_type(type_id::INT32),
+                                 lists.size(),
+                                 cudf::create_null_mask(lists.size(), mask_state::ALL_NULL, mr),
+                                 lists.size(),
+                                 stream,
+                                 mr);
     }
 
     auto const device_view = column_device_view::create(lists.parent(), stream);
-    auto const d_lists     = lists_column_device_view(*device_view);
+    auto const d_lists     = lists_column_device_view{*device_view};
     auto const d_skeys     = get_search_keys_device_iterable_view(search_key, stream);
 
-    auto result_validity = make_fixed_width_column(
+    auto result_positions = make_numeric_column(
+      data_type{type_id::INT32}, lists.size(), cudf::mask_state::UNALLOCATED, stream, mr);
+    auto result_validity = make_numeric_column(
       data_type{type_id::BOOL8}, lists.size(), cudf::mask_state::UNALLOCATED, stream, mr);
-    auto result_bools = make_fixed_width_column(
-      data_type{type_id::BOOL8}, lists.size(), cudf::mask_state::UNALLOCATED, stream, mr);
-    auto mutable_result_bools =
-      mutable_column_device_view::create(result_bools->mutable_view(), stream);
+    auto mutable_result_positions =
+      mutable_column_device_view::create(result_positions->mutable_view(), stream);
     auto mutable_result_validity =
       mutable_column_device_view::create(result_validity->mutable_view(), stream);
     auto search_key_iter =
       cudf::detail::make_pair_rep_iterator<ElementType, search_keys_have_nulls>(*d_skeys);
 
-    search_each_list_row<ElementType>(
-      d_lists, search_key_iter, *mutable_result_bools, *mutable_result_validity, stream, mr);
-
-    rmm::device_buffer null_mask;
-    size_type num_nulls;
+    search_each_list_row<ElementType>(d_lists,
+                                      search_key_iter,
+                                      find_option,
+                                      *mutable_result_positions,
+                                      *mutable_result_validity,
+                                      stream);
 
-    std::tie(null_mask, num_nulls) =
-      construct_null_mask(lists, result_validity->view(), stream, mr);
-    result_bools->set_null_mask(std::move(null_mask), num_nulls);
-
-    return result_bools;
+    auto [null_mask, num_nulls] = construct_null_mask(lists, result_validity->view(), stream, mr);
+    result_positions->set_null_mask(std::move(null_mask), num_nulls);
+    return result_positions;
   }
 };
 
+/**
+ * @brief Converts key-positions vector (from index_of()) to a BOOL8 vector, indicating if
+ * the search key was found.
+ */
+std::unique_ptr<column> to_contains(std::unique_ptr<column>&& key_positions,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(key_positions->type().id() == type_id::INT32,
+               "Expected input column of type INT32.");
+  // If position == -1, the list did not contain the search key.
+  auto const num_rows        = key_positions->size();
+  auto const positions_begin = key_positions->view().begin<size_type>();
+  auto result =
+    make_numeric_column(data_type{type_id::BOOL8}, num_rows, mask_state::UNALLOCATED, stream, mr);
+  thrust::transform(rmm::exec_policy(stream),
+                    positions_begin,
+                    positions_begin + num_rows,
+                    result->mutable_view().begin<bool>(),
+                    [] __device__(auto i) { return i != absent_index; });
+  [[maybe_unused]] auto [_, null_mask, __] = key_positions->release();
+  result->set_null_mask(std::move(*null_mask));
+  return result;
+}
 }  // namespace
 
 namespace detail {
+/**
+ * @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
+ *                                cudf::scalar const&,
+ *                                duplicate_find_option,
+ *                                rmm::mr::device_memory_resource*)
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> index_of(
+  cudf::lists_column_view const& lists,
+  cudf::scalar const& search_key,
+  duplicate_find_option find_option,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+{
+  return search_key.is_valid(stream)
+           ? cudf::type_dispatcher(search_key.type(),
+                                   lookup_functor<false>{},  // No nulls in search key
+                                   lists,
+                                   search_key,
+                                   find_option,
+                                   stream,
+                                   mr)
+           : cudf::type_dispatcher(search_key.type(),
+                                   lookup_functor<true>{},  // Nulls in search key
+                                   lists,
+                                   search_key,
+                                   find_option,
+                                   stream,
+                                   mr);
+}
+
+/**
+ * @copydoc cudf::lists::index_of(cudf::lists_column_view const&,
+ *                                cudf::column_view const&,
+ *                                duplicate_find_option,
+ *                                rmm::mr::device_memory_resource*)
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> index_of(
+  cudf::lists_column_view const& lists,
+  cudf::column_view const& search_keys,
+  duplicate_find_option find_option,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+{
+  CUDF_EXPECTS(search_keys.size() == lists.size(),
+               "Number of search keys must match list column size.");
+
+  return search_keys.has_nulls()
+           ? cudf::type_dispatcher(search_keys.type(),
+                                   lookup_functor<true>{},  // Nulls in search keys
+                                   lists,
+                                   search_keys,
+                                   find_option,
+                                   stream,
+                                   mr)
+           : cudf::type_dispatcher(search_keys.type(),
+                                   lookup_functor<false>{},  // No nulls in search keys
+                                   lists,
+                                   search_keys,
+                                   find_option,
+                                   stream,
+                                   mr);
+}
 
+/**
+ * @copydoc cudf::lists::contains(cudf::lists_column_view const&,
+ *                                cudf::scalar const&,
+ *                                rmm::mr::device_memory_resource*)
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
 std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
                                  cudf::scalar const& search_key,
                                  rmm::cuda_stream_view stream,
                                  rmm::mr::device_memory_resource* mr)
 {
-  return search_key.is_valid(stream)
-           ? cudf::type_dispatcher(
-               search_key.type(), lookup_functor<false>{}, lists, search_key, stream, mr)
-           : cudf::type_dispatcher(
-               search_key.type(), lookup_functor<true>{}, lists, search_key, stream, mr);
+  return to_contains(
+    index_of(lists, search_key, duplicate_find_option::FIND_FIRST, stream), stream, mr);
 }
 
+/**
+ * @copydoc cudf::lists::contains(cudf::lists_column_view const&,
+ *                                cudf::column_view const&,
+ *                                rmm::mr::device_memory_resource*)
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
 std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
                                  cudf::column_view const& search_keys,
                                  rmm::cuda_stream_view stream,
@@ -209,11 +344,44 @@ std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
   CUDF_EXPECTS(search_keys.size() == lists.size(),
                "Number of search keys must match list column size.");
 
-  return search_keys.has_nulls()
-           ? cudf::type_dispatcher(
-               search_keys.type(), lookup_functor<true>{}, lists, search_keys, stream, mr)
-           : cudf::type_dispatcher(
-               search_keys.type(), lookup_functor<false>{}, lists, search_keys, stream, mr);
+  return to_contains(
+    index_of(lists, search_keys, duplicate_find_option::FIND_FIRST, stream), stream, mr);
+}
+
+/**
+ * @copydoc cudf::lists::contain_nulls(cudf::lists_column_view const&,
+ *                                     rmm::mr::device_memory_resource*)
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> contains_nulls(cudf::lists_column_view const& input_lists,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::mr::device_memory_resource* mr)
+{
+  auto const num_rows   = input_lists.size();
+  auto const d_lists    = column_device_view::create(input_lists.parent());
+  auto has_nulls_output = make_numeric_column(
+    data_type{type_id::BOOL8}, input_lists.size(), mask_state::UNALLOCATED, stream, mr);
+  auto const output_begin = has_nulls_output->mutable_view().begin<bool>();
+  thrust::tabulate(
+    rmm::exec_policy(stream),
+    output_begin,
+    output_begin + num_rows,
+    [lists = cudf::detail::lists_column_device_view{*d_lists}] __device__(auto list_idx) {
+      auto list       = list_device_view{lists, list_idx};
+      auto list_begin = thrust::make_counting_iterator(size_type{0});
+      return list.is_null() ||
+             thrust::any_of(thrust::seq, list_begin, list_begin + list.size(), [&list](auto i) {
+               return list.is_null(i);
+             });
+    });
+  auto const validity_begin = cudf::detail::make_counting_transform_iterator(
+    0, [lists = cudf::detail::lists_column_device_view{*d_lists}] __device__(auto list_idx) {
+      return not list_device_view{lists, list_idx}.is_null();
+    });
+  auto [null_mask, num_nulls] = cudf::detail::valid_if(
+    validity_begin, validity_begin + num_rows, thrust::identity<bool>{}, stream, mr);
+  has_nulls_output->set_null_mask(std::move(null_mask), num_nulls);
+  return has_nulls_output;
 }
 
 }  // namespace detail
@@ -234,5 +402,30 @@ std::unique_ptr<column> contains(cudf::lists_column_view const& lists,
   return detail::contains(lists, search_keys, rmm::cuda_stream_default, mr);
 }
 
+std::unique_ptr<column> contains_nulls(cudf::lists_column_view const& input_lists,
+                                       rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::contains_nulls(input_lists, rmm::cuda_stream_default, mr);
+}
+
+std::unique_ptr<column> index_of(cudf::lists_column_view const& lists,
+                                 cudf::scalar const& search_key,
+                                 duplicate_find_option find_option,
+                                 rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::index_of(lists, search_key, find_option, rmm::cuda_stream_default, mr);
+}
+
+std::unique_ptr<column> index_of(cudf::lists_column_view const& lists,
+                                 cudf::column_view const& search_keys,
+                                 duplicate_find_option find_option,
+                                 rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::index_of(lists, search_keys, find_option, rmm::cuda_stream_default, mr);
+}
+
 }  // namespace lists
 }  // namespace cudf
diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp
index 5d7e218898c..066eb7eafc8 100644
--- a/cpp/tests/lists/contains_tests.cpp
+++ b/cpp/tests/lists/contains_tests.cpp
@@ -25,6 +25,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
 namespace cudf {
@@ -42,6 +43,12 @@ struct TypedContainsTest : public ContainsTest {
 TYPED_TEST_SUITE(TypedContainsTest, ContainsTestTypes);
 
 namespace {
+
+auto constexpr x          = int32_t{-1};    // Placeholder for nulls.
+auto constexpr absent     = size_type{-1};  // Index when key is not found in a list.
+auto constexpr FIND_FIRST = lists::duplicate_find_option::FIND_FIRST;
+auto constexpr FIND_LAST  = lists::duplicate_find_option::FIND_LAST;
+
 template <typename T, std::enable_if_t<cudf::is_numeric<T>(), void>* = nullptr>
 auto create_scalar_search_key(T const& value)
 {
@@ -101,238 +108,381 @@ auto create_null_search_key()
 
 }  // namespace
 
-TYPED_TEST(TypedContainsTest, ListContainsScalarWithNoNulls)
+using iterators::all_nulls;
+using iterators::null_at;
+using iterators::nulls_at;
+using bools   = fixed_width_column_wrapper<bool>;
+using indices = fixed_width_column_wrapper<size_type>;
+
+TYPED_TEST(TypedContainsTest, ScalarKeyWithNoNulls)
 {
   using T = TypeParam;
 
-  auto search_space = lists_column_wrapper<T, int32_t>{
-    {0, 1, 2},
-    {3, 4, 5},
-    {6, 7, 8},
-    {9, 0, 1},
-    {2, 3, 4},
-    {5, 6, 7},
-    {8, 9, 0},
-    {},
-    {1, 2, 3},
-    {}}.release();
-  auto search_key_one  = create_scalar_search_key<T>(1);
-  auto actual_result   = lists::contains(search_space->view(), *search_key_one);
-  auto expected_result = fixed_width_column_wrapper<bool>{1, 0, 0, 1, 0, 0, 0, 0, 1, 0};
+  auto search_space   = lists_column_view{lists_column_wrapper<T, int32_t>{{0, 1, 2, 1},
+                                                                         {3, 4, 5},
+                                                                         {6, 7, 8},
+                                                                         {9, 0, 1, 3, 1},
+                                                                         {2, 3, 4},
+                                                                         {5, 6, 7},
+                                                                         {8, 9, 0},
+                                                                         {},
+                                                                         {1, 2, 1, 3},
+                                                                         {}}};
+  auto search_key_one = create_scalar_search_key<T>(1);
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space, *search_key_one);
+    auto expected = bools{1, 0, 0, 1, 0, 0, 0, 0, 1, 0};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // CONTAINS NULLS
+    auto result   = lists::contains_nulls(search_space);
+    auto expected = bools{0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space, *search_key_one, FIND_FIRST);
+    auto expected = indices{1, absent, absent, 2, absent, absent, absent, absent, 0, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space, *search_key_one, FIND_LAST);
+    auto expected = indices{3, absent, absent, 4, absent, absent, absent, absent, 2, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TYPED_TEST(TypedContainsTest, ListContainsScalarWithNullLists)
+TYPED_TEST(TypedContainsTest, ScalarKeyWithNullLists)
 {
   // Test List columns that have NULL list rows.
-
   using T = TypeParam;
 
-  auto search_space = lists_column_wrapper<T, int32_t>{
-    {{0, 1, 2},
-     {3, 4, 5},
-     {6, 7, 8},
-     {},
-     {9, 0, 1},
-     {2, 3, 4},
-     {5, 6, 7},
-     {8, 9, 0},
-     {},
-     {1, 2, 3},
-     {}},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-      return (i != 3) && (i != 10);
-    })}.release();
-
+  auto search_space   = lists_column_view{lists_column_wrapper<T, int32_t>{{{0, 1, 2, 1},
+                                                                          {3, 4, 5},
+                                                                          {6, 7, 8},
+                                                                          {},
+                                                                          {9, 0, 1, 3, 1},
+                                                                          {2, 3, 4},
+                                                                          {5, 6, 7},
+                                                                          {8, 9, 0},
+                                                                          {},
+                                                                          {1, 2, 2, 3},
+                                                                          {}},
+                                                                         nulls_at({3, 10})}};
   auto search_key_one = create_scalar_search_key<T>(1);
-  auto actual_result  = lists::contains(search_space->view(), *search_key_one);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0},
-                                     cudf::detail::make_counting_transform_iterator(
-                                       0, [](auto i) { return (i != 3) && (i != 10); })};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space, *search_key_one);
+    auto expected = bools{{1, 0, 0, x, 1, 0, 0, 0, 0, 1, x}, nulls_at({3, 10})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // CONTAINS NULLS
+    auto result   = lists::contains_nulls(search_space);
+    auto expected = bools{{0, 0, 0, x, 0, 0, 0, 0, 0, 0, x}, nulls_at({3, 10})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result = lists::index_of(search_space, *search_key_one, FIND_FIRST);
+    auto expected =
+      indices{{1, absent, absent, x, 2, absent, absent, absent, absent, 0, x}, nulls_at({3, 10})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result = lists::index_of(search_space, *search_key_one, FIND_LAST);
+    auto expected =
+      indices{{3, absent, absent, x, 4, absent, absent, absent, absent, 0, x}, nulls_at({3, 10})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
 TYPED_TEST(TypedContainsTest, SlicedLists)
 {
   // Test sliced List columns.
-
   using namespace cudf;
+  using T = TypeParam;
 
-  using T     = TypeParam;
-  using bools = fixed_width_column_wrapper<bool>;
-
-  auto search_space = lists_column_wrapper<T, int32_t>{
-    {{0, 1, 2},
-     {3, 4, 5},
-     {6, 7, 8},
-     {},
-     {9, 0, 1},
-     {2, 3, 4},
-     {5, 6, 7},
-     {8, 9, 0},
-     {},
-     {1, 2, 3},
-     {}},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-      return (i != 3) && (i != 10);
-    })}.release();
-
-  auto sliced_column_1 = cudf::detail::slice(search_space->view(), {1, 8}).front();
-
-  auto search_key_one = create_scalar_search_key<T>(1);
-  auto result_1       = lists::contains(sliced_column_1, *search_key_one);
-
-  auto expected_result_1 = bools{
-    {0, 0, 0, 1, 0, 0, 0}, cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-      return (i != 2);
-    })}.release();
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result_1->view(), result_1->view());
-
-  auto sliced_column_2 = cudf::detail::slice(search_space->view(), {3, 10}).front();
-
-  auto result_2 = lists::contains(sliced_column_2, *search_key_one);
+  auto search_space = lists_column_wrapper<T, int32_t>{{{0, 1, 2, 1},
+                                                        {3, 4, 5},
+                                                        {6, 7, 8},
+                                                        {},
+                                                        {9, 0, 1, 3, 1},
+                                                        {2, 3, 4},
+                                                        {5, 6, 7},
+                                                        {8, 9, 0},
+                                                        {},
+                                                        {1, 2, 1, 3},
+                                                        {}},
+                                                       nulls_at({3, 10})};
 
-  auto expected_result_2 = bools{
-    {0, 1, 0, 0, 0, 0, 1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-      return (i != 0);
-    })}.release();
+  {
+    // First Slice.
+    auto sliced_column_1 = cudf::detail::slice(search_space, {1, 8}).front();
+    auto search_key_one  = create_scalar_search_key<T>(1);
+    {
+      // CONTAINS
+      auto result          = lists::contains(sliced_column_1, *search_key_one);
+      auto expected_result = bools{{0, 0, x, 1, 0, 0, 0}, null_at(2)};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
+    }
+    {
+      // CONTAINS NULLS
+      auto result          = lists::contains_nulls(sliced_column_1);
+      auto expected_result = bools{{0, 0, x, 0, 0, 0, 0}, null_at(2)};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
+    }
+    {
+      // FIND_FIRST
+      auto result          = lists::index_of(sliced_column_1, *search_key_one, FIND_FIRST);
+      auto expected_result = indices{{absent, absent, 0, 2, absent, absent, absent}, null_at(2)};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
+    }
+    {
+      // FIND_LAST
+      auto result          = lists::index_of(sliced_column_1, *search_key_one, FIND_LAST);
+      auto expected_result = indices{{absent, absent, 0, 4, absent, absent, absent}, null_at(2)};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
+    }
+  }
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result_2->view(), result_2->view());
+  {
+    // Second Slice.
+    auto sliced_column_2 = cudf::detail::slice(search_space, {3, 10}).front();
+    auto search_key_one  = create_scalar_search_key<T>(1);
+    {
+      // CONTAINS
+      auto result          = lists::contains(sliced_column_2, *search_key_one);
+      auto expected_result = bools{{x, 1, 0, 0, 0, 0, 1}, null_at(0)};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
+    }
+    {
+      // CONTAINS NULLS
+      auto result          = lists::contains_nulls(sliced_column_2);
+      auto expected_result = bools{{x, 0, 0, 0, 0, 0, 0}, null_at(0)};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
+    }
+    {
+      // FIND_FIRST
+      auto result          = lists::index_of(sliced_column_2, *search_key_one, FIND_FIRST);
+      auto expected_result = indices{{0, 2, absent, absent, absent, absent, 0}, null_at(0)};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
+    }
+    {
+      // FIND_LAST
+      auto result          = lists::index_of(sliced_column_2, *search_key_one, FIND_LAST);
+      auto expected_result = indices{{0, 4, absent, absent, absent, absent, 2}, null_at(0)};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
+    }
+  }
 }
 
-TYPED_TEST(TypedContainsTest, ListContainsScalarNonNullListsWithNullValues)
+TYPED_TEST(TypedContainsTest, ScalarKeyNonNullListsWithNullValues)
 {
   // Test List columns that have no NULL list rows, but NULL elements in some list rows.
   using T = TypeParam;
 
-  auto numerals = fixed_width_column_wrapper<T>{
-    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })};
-
-  auto search_space =
-    make_lists_column(8,
-                      fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
-                      numerals.release(),
-                      0,
-                      {});
-
+  auto numerals     = fixed_width_column_wrapper<T>{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1},
+                                                nulls_at({0, 3, 6, 9, 10, 13})};
+  auto search_space = make_lists_column(
+    8, indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {});
+  // Search space: [ [x], [1,2], [x,4,5,x], [], [], [7,8,x], [x], [1,2,x,1] ]
   auto search_key_one = create_scalar_search_key<T>(1);
-  auto actual_result  = lists::contains(search_space->view(), *search_key_one);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 1, 0, 1, 1}};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto expected = bools{0, 1, 0, 0, 0, 0, 0, 1};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // CONTAINS NULLS
+    auto result   = lists::contains_nulls(search_space->view());
+    auto expected = bools{1, 0, 1, 0, 0, 1, 1, 1};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto expected = indices{absent, 0, absent, absent, absent, absent, absent, 0};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto expected = indices{absent, 0, absent, absent, absent, absent, absent, 3};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TYPED_TEST(TypedContainsTest, ListContainsScalarWithNullsInLists)
+TYPED_TEST(TypedContainsTest, ScalarKeysWithNullsInLists)
 {
   using T = TypeParam;
 
-  auto numerals = fixed_width_column_wrapper<T>{
-    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })};
-
-  auto input_null_mask_iter =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; });
+  auto numerals = fixed_width_column_wrapper<T>{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1},
+                                                nulls_at({0, 3, 6, 9, 10, 13})};
+  auto input_null_mask_iter = null_at(4);
 
   auto search_space = make_lists_column(
     8,
-    fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
+    indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
 
+  // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ]
   auto search_key_one = create_scalar_search_key<T>(1);
-  auto actual_result  = lists::contains(search_space->view(), *search_key_one);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 1, 1}};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  {
+    // CONTAINS.
+    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto expected = bools{{0, 1, 0, 0, x, 0, 0, 1}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // CONTAINS NULLS.
+    auto result   = lists::contains_nulls(search_space->view());
+    auto expected = bools{{1, 0, 1, 0, x, 1, 1, 1}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST.
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 0}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST.
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 3}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TEST_F(ContainsTest, BoolListContainsScalarWithNullsInLists)
+TEST_F(ContainsTest, BoolScalarWithNullsInLists)
 {
   using T = bool;
 
-  auto numerals = fixed_width_column_wrapper<T>{
-    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })};
-
-  auto input_null_mask_iter =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; });
-
-  auto search_space = make_lists_column(
+  auto numerals = fixed_width_column_wrapper<T>{{x, 1, 1, x, 1, 1, x, 1, 1, x, x, 1, 1, x, 1},
+                                                nulls_at({0, 3, 6, 9, 10, 13})};
+  auto input_null_mask_iter = null_at(4);
+  auto search_space         = make_lists_column(
     8,
     fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
 
+  // Search space: [ [x], [1,1], [x,1,1,x], [], x, [1,1,x], [x], [1,1,x,1] ]
   auto search_key_one = create_scalar_search_key<T>(1);
-  auto actual_result  = lists::contains(search_space->view(), *search_key_one);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 0, 1, 1, 1}};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto expected = bools{{0, 1, 1, 0, x, 1, 0, 1}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // CONTAINS NULLS
+    auto result   = lists::contains_nulls(search_space->view());
+    auto expected = bools{{1, 0, 1, 0, x, 1, 1, 1}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST.
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto expected = indices{{absent, 0, 1, absent, x, 0, absent, 0}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST.
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto expected = indices{{absent, 1, 2, absent, x, 1, absent, 3}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TEST_F(ContainsTest, StringListContainsScalarWithNullsInLists)
+TEST_F(ContainsTest, StringScalarWithNullsInLists)
 {
   using T = std::string;
 
   auto strings = strings_column_wrapper{
-    {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "1", "2", "3", "4"},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })};
-
-  auto input_null_mask_iter =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; });
-
-  auto search_space = make_lists_column(
+    {"X", "1", "2", "X", "4", "5", "X", "7", "8", "X", "X", "1", "2", "X", "1"},
+    nulls_at({0, 3, 6, 9, 10, 13})};
+  auto input_null_mask_iter = null_at(4);
+  auto search_space         = make_lists_column(
     8,
-    fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
+    indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     strings.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
 
+  // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ]
   auto search_key_one = create_scalar_search_key<T>("1");
-  auto actual_result  = lists::contains(search_space->view(), *search_key_one);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 1, 1}};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto expected = bools{{0, 1, 0, 0, x, 0, 0, 1}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // CONTAINS NULLS
+    auto result   = lists::contains_nulls(search_space->view());
+    auto expected = bools{{1, 0, 1, 0, x, 1, 1, 1}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST.
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 0}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST.
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 3}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TYPED_TEST(TypedContainsTest, ContainsScalarNullSearchKey)
+TYPED_TEST(TypedContainsTest, ScalarNullSearchKey)
 {
   using T = TypeParam;
 
-  auto search_space = lists_column_wrapper<T, int32_t>{
-    {{0, 1, 2},
-     {3, 4, 5},
-     {6, 7, 8},
-     {},
-     {9, 0, 1},
-     {2, 3, 4},
-     {5, 6, 7},
-     {8, 9, 0},
-     {},
-     {1, 2, 3},
-     {}},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-      return (i != 3) && (i != 10);
-    })}.release();
-
+  auto search_space = lists_column_wrapper<T, int32_t>{{{0, 1, 2},
+                                                        {3, 4, 5},
+                                                        {6, 7, 8},
+                                                        {},
+                                                        {9, 0, 1},
+                                                        {2, 3, 4},
+                                                        {5, 6, 7},
+                                                        {8, 9, 0},
+                                                        {},
+                                                        {1, 2, 3},
+                                                        {}},
+                                                       nulls_at({3, 10})}
+                        .release();
   auto search_key_null = create_null_search_key<T>();
-  auto actual_result   = lists::contains(search_space->view(), *search_key_null);
-  auto expected_result = fixed_width_column_wrapper<bool>{
-    {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return false; })};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), *search_key_null);
+    auto expected = bools{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), *search_key_null, FIND_FIRST);
+    auto expected = indices{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), *search_key_null, FIND_LAST);
+    auto expected = indices{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
 TEST_F(ContainsTest, ScalarTypeRelatedExceptions)
@@ -346,9 +496,12 @@ TEST_F(ContainsTest, ScalarTypeRelatedExceptions)
        {4, 5, 6}}}.release();
     auto skey = create_scalar_search_key<int32_t>(10);
     CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_lists->view(), *skey),
-                              "Nested types not supported in lists::contains()");
+                              "Nested types not supported in list search operations.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), *skey, FIND_FIRST),
+                              "Nested types not supported in list search operations.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), *skey, FIND_LAST),
+                              "Nested types not supported in list search operations.");
   }
-
   {
     // Search key must match list elements in type.
     auto list_of_ints =
@@ -360,6 +513,10 @@ TEST_F(ContainsTest, ScalarTypeRelatedExceptions)
     auto skey = create_scalar_search_key<std::string>("Hello, World!");
     CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_ints->view(), *skey),
                               "Type/Scale of search key does not match list column element type.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), *skey, FIND_FIRST),
+                              "Type/Scale of search key does not match list column element type.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), *skey, FIND_LAST),
+                              "Type/Scale of search key does not match list column element type.");
   }
 }
 
@@ -367,199 +524,275 @@ template <typename T>
 struct TypedVectorContainsTest : public ContainsTest {
 };
 
-using VectorContainsTestTypes =
+using VectorTestTypes =
   cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes>;
 
-TYPED_TEST_SUITE(TypedVectorContainsTest, VectorContainsTestTypes);
+TYPED_TEST_SUITE(TypedVectorContainsTest, VectorTestTypes);
 
-TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNoNulls)
+TYPED_TEST(TypedVectorContainsTest, VectorKeysWithNoNulls)
 {
   using T = TypeParam;
 
   auto search_space = lists_column_wrapper<T, int32_t>{
-    {0, 1, 2},
+    {0, 1, 2, 1},
     {3, 4, 5},
     {6, 7, 8},
-    {9, 0, 1},
+    {9, 0, 1, 3, 1},
     {2, 3, 4},
     {5, 6, 7},
     {8, 9, 0},
     {},
-    {1, 2, 3},
+    {1, 2, 3, 3},
     {}}.release();
 
-  auto search_key      = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 2, 3, 1};
-  auto actual_result   = lists::contains(search_space->view(), search_key);
-  auto expected_result = fixed_width_column_wrapper<bool>{1, 0, 0, 1, 1, 0, 0, 0, 1, 0};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  auto search_key = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 2, 3, 1};
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), search_key);
+    auto expected = bools{1, 0, 0, 1, 1, 0, 0, 0, 1, 0};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), search_key, FIND_FIRST);
+    auto expected = indices{1, absent, absent, 2, 0, absent, absent, absent, 2, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), search_key, FIND_LAST);
+    auto expected = indices{3, absent, absent, 4, 0, absent, absent, absent, 3, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullLists)
+TYPED_TEST(TypedVectorContainsTest, VectorWithNullLists)
 {
   // Test List columns that have NULL list rows.
 
   using T = TypeParam;
 
-  auto search_space = lists_column_wrapper<T, int32_t>{
-    {{0, 1, 2},
-     {3, 4, 5},
-     {6, 7, 8},
-     {},
-     {9, 0, 1},
-     {2, 3, 4},
-     {5, 6, 7},
-     {8, 9, 0},
-     {},
-     {1, 2, 3},
-     {}},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-      return (i != 3) && (i != 10);
-    })}.release();
-
-  auto search_keys   = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2};
-  auto actual_result = lists::contains(search_space->view(), search_keys);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0},
-                                     cudf::detail::make_counting_transform_iterator(
-                                       0, [](auto i) { return (i != 3) && (i != 10); })};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  auto search_space = lists_column_wrapper<T, int32_t>{{{0, 1, 2, 1},
+                                                        {3, 4, 5},
+                                                        {6, 7, 8},
+                                                        {},
+                                                        {9, 0, 1, 3, 1},
+                                                        {2, 3, 4},
+                                                        {5, 6, 7},
+                                                        {8, 9, 0},
+                                                        {},
+                                                        {1, 2, 3, 3},
+                                                        {}},
+                                                       nulls_at({3, 10})}
+                        .release();
+
+  auto search_keys = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2};
+
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), search_keys);
+    auto expected = bools{{1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}, nulls_at({3, 10})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto expected =
+      indices{{1, absent, absent, x, absent, 1, absent, absent, absent, 0, x}, nulls_at({3, 10})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto expected =
+      indices{{3, absent, absent, x, absent, 1, absent, absent, absent, 0, x}, nulls_at({3, 10})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TYPED_TEST(TypedVectorContainsTest, ListContainsVectorNonNullListsWithNullValues)
+TYPED_TEST(TypedVectorContainsTest, VectorNonNullListsWithNullValues)
 {
   // Test List columns that have no NULL list rows, but NULL elements in some list rows.
   using T = TypeParam;
 
-  auto numerals = fixed_width_column_wrapper<T>{
-    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })};
-
-  auto search_space =
-    make_lists_column(8,
-                      fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(),
-                      numerals.release(),
-                      0,
-                      {});
-
-  auto search_keys   = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 3};
-  auto actual_result = lists::contains(search_space->view(), search_keys);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{0, 1, 0, 0, 0, 0, 1, 1}, {0, 1, 0, 1, 1, 0, 1, 1}};
+  auto numerals = fixed_width_column_wrapper<T>{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1},
+                                                nulls_at({0, 3, 6, 9, 10, 13})};
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  auto search_space = make_lists_column(
+    8, indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {});
+  // Search space: [ [x], [1,2], [x,4,5,x], [], [], [7,8,x], [x], [1,2,x,1] ]
+  auto search_keys = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 1};
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), search_keys);
+    auto expected = bools{0, 1, 0, 0, 0, 0, 0, 1};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto expected = indices{absent, 1, absent, absent, absent, absent, absent, 0};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto expected = indices{absent, 1, absent, absent, absent, absent, absent, 3};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullsInLists)
+TYPED_TEST(TypedVectorContainsTest, VectorWithNullsInLists)
 {
   using T = TypeParam;
 
-  auto numerals = fixed_width_column_wrapper<T>{
-    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })};
+  auto numerals = fixed_width_column_wrapper<T>{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1},
+                                                nulls_at({0, 3, 6, 9, 10, 13})};
 
-  auto input_null_mask_iter =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; });
+  auto input_null_mask_iter = null_at(4);
 
   auto search_space = make_lists_column(
     8,
-    fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(),
+    indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
+  // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ]
 
-  auto search_keys   = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 3};
-  auto actual_result = lists::contains(search_space->view(), search_keys);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{0, 1, 0, 0, 0, 0, 1, 1}, {0, 1, 0, 1, 0, 0, 1, 1}};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  auto search_keys = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 1};
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), search_keys);
+    auto expected = bools{{0, 1, 0, 0, x, 0, 0, 1}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto expected = indices{{absent, 1, absent, absent, x, absent, absent, 0}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto expected = indices{{absent, 1, absent, absent, x, absent, absent, 3}, null_at(4)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
 TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullsInListsAndInSearchKeys)
 {
   using T = TypeParam;
 
-  auto numerals = fixed_width_column_wrapper<T>{
-    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })};
+  auto numerals = fixed_width_column_wrapper<T>{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1},
+                                                nulls_at({0, 3, 6, 9, 10, 13})};
 
-  auto input_null_mask_iter =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; });
+  auto input_null_mask_iter = null_at(4);
 
   auto search_space = make_lists_column(
     8,
-    fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(),
+    indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
+  // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ]
 
-  auto search_keys = fixed_width_column_wrapper<T, int32_t>{
-    {1, 2, 3, 1, 2, 3, 1, 3},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 6; })};
-
-  auto actual_result = lists::contains(search_space->view(), search_keys);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 0, 1}};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  auto search_keys = fixed_width_column_wrapper<T, int32_t>{{1, 2, 3, x, 2, 3, 1, 1}, null_at(3)};
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), search_keys);
+    auto expected = bools{{0, 1, 0, x, x, 0, 0, 1}, nulls_at({3, 4})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto expected = indices{{absent, 1, absent, x, x, absent, absent, 0}, nulls_at({3, 4})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto expected = indices{{absent, 1, absent, x, x, absent, absent, 3}, nulls_at({3, 4})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TEST_F(ContainsTest, BoolListContainsVectorWithNullsInListsAndInSearchKeys)
+TEST_F(ContainsTest, BoolKeyVectorWithNullsInListsAndInSearchKeys)
 {
   using T = bool;
 
-  auto numerals = fixed_width_column_wrapper<T, int32_t>{
-    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })};
+  auto numerals = fixed_width_column_wrapper<T>{{x, 0, 1, x, 1, 1, x, 1, 1, x, x, 0, 1, x, 1},
+                                                nulls_at({0, 3, 6, 9, 10, 13})};
 
-  auto input_null_mask_iter =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; });
+  auto input_null_mask_iter = null_at(4);
 
   auto search_space = make_lists_column(
     8,
-    fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(),
+    indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
 
-  auto search_keys = fixed_width_column_wrapper<T, int32_t>{
-    {0, 1, 0, 1, 0, 0, 1, 1},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 6; })};
-
-  auto actual_result = lists::contains(search_space->view(), search_keys);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 0, 1}};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  auto search_keys = fixed_width_column_wrapper<T, int32_t>{{0, 1, 0, x, 0, 0, 1, 1}, null_at(3)};
+  // Search space: [ [x], [0,1], [x,1,1,x], [], x, [1,1,x], [x], [0,1,x,1] ]
+  // Search keys : [  0,   1,     0,         x, 0,  0,       1,   1        ]
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), search_keys);
+    auto expected = bools{{0, 1, 0, x, x, 0, 0, 1}, nulls_at({3, 4})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto expected = indices{{absent, 1, absent, x, x, absent, absent, 1}, nulls_at({3, 4})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto expected = indices{{absent, 1, absent, x, x, absent, absent, 3}, nulls_at({3, 4})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TEST_F(ContainsTest, StringListContainsVectorWithNullsInListsAndInSearchKeys)
+TEST_F(ContainsTest, StringKeyVectorWithNullsInListsAndInSearchKeys)
 {
-  auto numerals = strings_column_wrapper{
-    {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "1", "2", "3", "4"},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })};
-
-  auto input_null_mask_iter =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; });
-
-  auto search_space = make_lists_column(
+  auto strings = strings_column_wrapper{
+    {"X", "1", "2", "X", "4", "5", "X", "7", "8", "X", "X", "1", "2", "X", "1"},
+    nulls_at({0, 3, 6, 9, 10, 13})};
+  auto input_null_mask_iter = null_at(4);
+  auto search_space         = make_lists_column(
     8,
-    fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(),
-    numerals.release(),
+    fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
+    strings.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
 
-  auto search_keys = strings_column_wrapper{
-    {"1", "2", "3", "1", "2", "3", "1", "3"},
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 6; })};
+  auto search_keys = strings_column_wrapper{{"1", "2", "3", "X", "2", "3", "1", "1"}, null_at(3)};
 
-  auto actual_result = lists::contains(search_space->view(), search_keys);
-  auto expected_result =
-    fixed_width_column_wrapper<bool>{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 0, 1}};
+  // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ]
+  // Search keys:  [  1,   2,     3,         X, 2,  3,       1,   1]
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), search_keys);
+    auto expected = bools{{0, 1, 0, x, x, 0, 0, 1}, nulls_at({3, 4})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto expected = indices{{absent, 1, absent, x, x, absent, absent, 0}, nulls_at({3, 4})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto expected = indices{{absent, 1, absent, x, x, absent, absent, 3}, nulls_at({3, 4})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
 TEST_F(ContainsTest, VectorTypeRelatedExceptions)
@@ -573,9 +806,12 @@ TEST_F(ContainsTest, VectorTypeRelatedExceptions)
        {4, 5, 6}}}.release();
     auto skey = fixed_width_column_wrapper<int32_t>{0, 1, 2};
     CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_lists->view(), skey),
-                              "Nested types not supported in lists::contains()");
+                              "Nested types not supported in list search operations.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), skey, FIND_FIRST),
+                              "Nested types not supported in list search operations.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), skey, FIND_LAST),
+                              "Nested types not supported in list search operations.");
   }
-
   {
     // Search key must match list elements in type.
     auto list_of_ints =
@@ -587,15 +823,21 @@ TEST_F(ContainsTest, VectorTypeRelatedExceptions)
     auto skey = strings_column_wrapper{"Hello", "World"};
     CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_ints->view(), skey),
                               "Type/Scale of search key does not match list column element type.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_FIRST),
+                              "Type/Scale of search key does not match list column element type.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_LAST),
+                              "Type/Scale of search key does not match list column element type.");
   }
-
   {
     // Search key column size must match lists column size.
     auto list_of_ints = lists_column_wrapper<int32_t>{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}.release();
-
-    auto skey = fixed_width_column_wrapper<int32_t>{0, 1, 2, 3};
+    auto skey         = fixed_width_column_wrapper<int32_t>{0, 1, 2, 3};
     CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_ints->view(), skey),
                               "Number of search keys must match list column size.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_FIRST),
+                              "Number of search keys must match list column size.");
+    CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_LAST),
+                              "Number of search keys must match list column size.");
   }
 }
 
@@ -605,6 +847,7 @@ struct TypedContainsNaNsTest : public ContainsTest {
 
 TYPED_TEST_SUITE(TypedContainsNaNsTest, FloatingPointTypes);
 
+namespace {
 template <typename T>
 T get_nan(const char* nan_contents)
 {
@@ -616,8 +859,9 @@ float get_nan<float>(const char* nan_contents)
 {
   return std::nanf(nan_contents);
 }
+}  // namespace
 
-TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsScalar)
+TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsScalar)
 {
   using T = TypeParam;
 
@@ -637,11 +881,25 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsScalar)
     {1, 2, 3},
     {}}.release();
 
-  auto search_key_nan  = create_scalar_search_key<T>(nan_3);
-  auto actual_result   = lists::contains(search_space->view(), *search_key_nan);
-  auto expected_result = fixed_width_column_wrapper<bool>{0, 0, 0, 0, 1, 0, 1, 0, 0, 0};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  auto search_key_nan = create_scalar_search_key<T>(nan_3);
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), *search_key_nan);
+    auto expected = bools{0, 0, 0, 0, 1, 0, 1, 0, 0, 0};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), *search_key_nan, FIND_FIRST);
+    auto expected = indices{absent, absent, absent, absent, 0, absent, 1, absent, absent, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), *search_key_nan, FIND_LAST);
+    auto expected = indices{absent, absent, absent, absent, 0, absent, 1, absent, absent, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
 TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector)
@@ -652,19 +910,18 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector)
   // presence of NaN values:
   //   1. If the search key is null, null is still returned.
   //   2. If the list contains a null, and the non-null search
-  //      key is not found, null is returned.
+  //      key is not found:
+  //      a) contains() returns `null`.
+  //      b) index_of() returns -1.
   using T = TypeParam;
 
   auto nan_1 = get_nan<T>("1");
   auto nan_2 = get_nan<T>("2");
   auto nan_3 = get_nan<T>("3");
 
-  auto null_at_index_2 =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; });
-
   auto search_space = lists_column_wrapper<T>{
     {0.0, 1.0, 2.0},
-    {{3, 4, 5}, null_at_index_2},  // i.e. {3, 4, ∅}.
+    {{3, 4, 5}, null_at(2)},  // i.e. {3, 4, ∅}.
     {6, 7, 8},
     {9, 0, 1},
     {nan_1, 3.0, 4.0},
@@ -679,33 +936,52 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector)
   {
     // With nulls in the search key rows. (At index 2.)
     auto search_keys =
-      fixed_width_column_wrapper<T>{
-        search_key_values.begin(), search_key_values.end(), null_at_index_2}
+      fixed_width_column_wrapper<T>{search_key_values.begin(), search_key_values.end(), null_at(2)}
         .release();
 
-    auto actual_result = lists::contains(search_space->view(), search_keys->view());
-    auto null_at_index_1_and_2 =
-      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1 && i != 2; });
-
-    auto expected_result =
-      fixed_width_column_wrapper<bool>{{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}, null_at_index_1_and_2};
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+    {
+      // CONTAINS
+      auto result   = lists::contains(search_space->view(), search_keys->view());
+      auto expected = bools{{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}, null_at(2)};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+    }
+    {
+      // FIND_FIRST
+      auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST);
+      auto expected =
+        indices{{1, absent, x, absent, 0, absent, 2, absent, 1, absent}, nulls_at({2})};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+    }
+    {
+      // FIND_LAST
+      auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_LAST);
+      auto expected =
+        indices{{1, absent, x, absent, 0, absent, 2, absent, 1, absent}, nulls_at({2})};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+    }
   }
-
   {
     // No nulls in the search key rows.
     auto search_keys =
       fixed_width_column_wrapper<T>(search_key_values.begin(), search_key_values.end()).release();
-
-    auto actual_result = lists::contains(search_space->view(), search_keys->view());
-    auto null_at_index_1 =
-      cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; });
-
-    auto expected_result =
-      fixed_width_column_wrapper<bool>{{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}, null_at_index_1};
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+    {
+      // CONTAINS
+      auto result   = lists::contains(search_space->view(), search_keys->view());
+      auto expected = bools{1, 0, 0, 0, 1, 0, 1, 0, 1, 0};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+    }
+    {
+      // FIND_FIRST
+      auto result   = lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST);
+      auto expected = indices{1, absent, absent, absent, 0, absent, 2, absent, 1, absent};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+    }
+    {
+      // FIND_LAST
+      auto result   = lists::index_of(search_space->view(), search_keys->view(), FIND_LAST);
+      auto expected = indices{1, absent, absent, absent, 0, absent, 2, absent, 1, absent};
+      CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+    }
   }
 }
 
@@ -715,50 +991,79 @@ struct TypedContainsDecimalsTest : public ContainsTest {
 
 TYPED_TEST_SUITE(TypedContainsDecimalsTest, FixedPointTypes);
 
-TYPED_TEST(TypedContainsDecimalsTest, ListContainsScalar)
+TYPED_TEST(TypedContainsDecimalsTest, ScalarKey)
 {
   using T = TypeParam;
 
-  auto const values = std::vector<typename T::rep>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1,
-                                                   2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3};
-  auto decimals     = fixed_point_column_wrapper<typename T::rep>{
-    values.begin(), values.end(), numeric::scale_type{0}};
-
-  auto list_offsets = fixed_width_column_wrapper<size_type>{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24};
-
-  auto const search_space =
-    make_lists_column(10, list_offsets.release(), decimals.release(), 0, {});
-
-  auto search_key_one  = make_fixed_point_scalar<T>(typename T::rep{1}, numeric::scale_type{0});
-  auto actual_result   = lists::contains(search_space->view(), *search_key_one);
-  auto expected_result = fixed_width_column_wrapper<bool>{1, 0, 0, 1, 0, 0, 0, 0, 1, 0};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  auto const search_space = [] {
+    auto const values = std::vector<typename T::rep>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1,
+                                                     2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3};
+    auto decimals     = fixed_point_column_wrapper<typename T::rep>{
+      values.begin(), values.end(), numeric::scale_type{0}};
+    auto list_offsets = indices{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24};
+    return make_lists_column(10, list_offsets.release(), decimals.release(), 0, {});
+  }();
+  auto search_key_one = make_fixed_point_scalar<T>(typename T::rep{1}, numeric::scale_type{0});
+
+  // Search space: [[0,1,2], [3,4,5], [6,7,8], [9,0,1], [2,3,4], [5,6,7], [8,9,0], [], [1,2,3], []]
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto expected = bools{1, 0, 0, 1, 0, 0, 0, 0, 1, 0};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto expected = indices{1, absent, absent, 2, absent, absent, absent, absent, 0, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto expected = indices{1, absent, absent, 2, absent, absent, absent, absent, 0, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
-TYPED_TEST(TypedContainsDecimalsTest, ListContainsVector)
+TYPED_TEST(TypedContainsDecimalsTest, VectorKey)
 {
   using T = TypeParam;
 
-  auto const values = std::vector<typename T::rep>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1,
-                                                   2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3};
-  auto decimals     = fixed_point_column_wrapper<typename T::rep>{
-    values.begin(), values.end(), numeric::scale_type{0}};
-
-  auto list_offsets = fixed_width_column_wrapper<size_type>{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24};
-
-  auto const search_space =
-    make_lists_column(10, list_offsets.release(), decimals.release(), 0, {});
+  auto const search_space = [] {
+    auto const values = std::vector<typename T::rep>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1,
+                                                     2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3};
+    auto decimals     = fixed_point_column_wrapper<typename T::rep>{
+      values.begin(), values.end(), numeric::scale_type{0}};
+    auto list_offsets = indices{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24};
+    return make_lists_column(10, list_offsets.release(), decimals.release(), 0, {});
+  }();
 
   auto search_key = fixed_point_column_wrapper<typename T::rep>{
     {1, 2, 3, 1, 2, 3, 1, 2, 3, 1},
     numeric::scale_type{
       0}}.release();
 
-  auto actual_result   = lists::contains(search_space->view(), search_key->view());
-  auto expected_result = fixed_width_column_wrapper<bool>{1, 0, 0, 1, 1, 0, 0, 0, 1, 0};
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
+  // Search space: [ [0,1,2], [3,4,5], [6,7,8], [9,0,1], [2,3,4], [5,6,7], [8,9,0], [], [1,2,3], []
+  // ] Search keys:  [  1,       2,       3,       1,       2,       3,       1,       2,  3, 1 ]
+  {
+    // CONTAINS
+    auto result   = lists::contains(search_space->view(), search_key->view());
+    auto expected = bools{1, 0, 0, 1, 1, 0, 0, 0, 1, 0};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_FIRST
+    auto result   = lists::index_of(search_space->view(), search_key->view(), FIND_FIRST);
+    auto expected = indices{1, absent, absent, 2, 0, absent, absent, absent, 2, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
+  {
+    // FIND_LAST
+    auto result   = lists::index_of(search_space->view(), search_key->view(), FIND_LAST);
+    auto expected = indices{1, absent, absent, 2, 0, absent, absent, absent, 2, absent};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
+  }
 }
 
 }  // namespace test
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 5153c5c1d2a..a2e080e02f6 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -3170,8 +3170,6 @@ public static ColumnView fromDeviceBuffer(BaseDeviceMemoryBuffer buffer,
    * Output `column[i]` is set to null if one or more of the following are true:
    * 1. The key is null
    * 2. The column vector list value is null
-   * 3. The list row does not contain the key, and contains at least
-   *    one null.
    * @param key the scalar to look up
    * @return a Boolean ColumnVector with the result of the lookup
    */
@@ -3183,10 +3181,9 @@ public final ColumnVector listContains(Scalar key) {
   /**
    * Create a column of bool values indicating whether the list rows of the first
    * column contain the corresponding values in the second column.
+   * Output `column[i]` is set to null if one or more of the following are true:
    * 1. The key value is null
    * 2. The column vector list value is null
-   * 3. The list row does not contain the key, and contains at least
-   *    one null.
    * @param key the ColumnVector with look up values
    * @return a Boolean ColumnVector with the result of the lookup
    */
@@ -3195,6 +3192,58 @@ public final ColumnVector listContainsColumn(ColumnView key) {
     return new ColumnVector(listContainsColumn(getNativeView(), key.getNativeView()));
   }
 
+  /**
+   * Create a column of bool values indicating whether the list rows of the specified
+   * column contain null elements.
+   * Output `column[i]` is set to null iff the input list row is null.
+   * @return a Boolean ColumnVector with the result of the lookup
+   */
+  public final ColumnVector listContainsNulls() {
+    assert type.equals(DType.LIST) : "column type must be a LIST";
+    return new ColumnVector(listContainsNulls(getNativeView()));
+  }
+
+  /**
+   * Enum to choose behaviour of listIndexOf functions:
+   *   1. FIND_FIRST finds the first occurrence of a search key.
+   *   2. FIND_LAST finds the last occurrence of a search key.
+   */
+  public enum FindOptions {FIND_FIRST, FIND_LAST};
+
+  /**
+   * Create a column of int32 indices, indicating the position of the scalar search key
+   * in each list row.
+   * All indices are 0-based. If a search key is not found, the index is set to -1.
+   * The index is set to null if one of the following is true: 
+   * 1. The search key is null.
+   * 2. The list row is null.
+   * @param key The scalar search key
+   * @param findOption Whether to find the first index of the key, or the last.
+   * @return The resultant column of int32 indices
+   */
+  public final ColumnVector listIndexOf(Scalar key, FindOptions findOption) {
+    assert type.equals(DType.LIST) : "column type must be a LIST";
+    boolean isFindFirst = findOption == FindOptions.FIND_FIRST;
+    return new ColumnVector(listIndexOfScalar(getNativeView(), key.getScalarHandle(), isFindFirst));
+  }
+
+  /**
+   * Create a column of int32 indices, indicating the position of each row in the
+   * search key column in the corresponding row of the lists column.
+   * All indices are 0-based. If a search key is not found, the index is set to -1.
+   * The index is set to null if one of the following is true: 
+   * 1. The search key row is null.
+   * 2. The list row is null.
+   * @param key ColumnView of search keys.
+   * @param findOption Whether to find the first index of the key, or the last.
+   * @return The resultant column of int32 indices
+   */
+  public final ColumnVector listIndexOf(ColumnView keys, FindOptions findOption) {
+    assert type.equals(DType.LIST) : "column type must be a LIST";
+    boolean isFindFirst = findOption == FindOptions.FIND_FIRST;
+    return new ColumnVector(listIndexOfColumn(getNativeView(), keys.getNativeView(), isFindFirst));
+  }
+
   /**
    * Segmented sort of the elements within a list in each row of a list column.
    * NOTICE: list columns with nested child are NOT supported yet.
@@ -3616,6 +3665,33 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat
    */
   private static native long listContainsColumn(long nativeView, long keyColumn);
 
+  /**
+   * Native method to search list rows for null elements.
+   * @param nativeView the column view handle of the list
+   * @return column handle of the resultant boolean column 
+   */
+  private static native long listContainsNulls(long nativeView);
+
+  /**
+   * Native method to find the first (or last) index of a specified scalar key,
+   * in each row of a list column.
+   * @param nativeView the column view handle of the list
+   * @param scalarKeyHandle handle to the scalar search key
+   * @param isFindFirst Whether to find the first index of the key, or the last.
+   * @return column handle of the resultant column of int32 indices
+   */
+  private static native long listIndexOfScalar(long nativeView, long scalarKeyHandle, boolean isFindFirst);
+
+  /**
+   * Native method to find the first (or last) index of each search key in the specified column,
+   * in each row of a list column.
+   * @param nativeView the column view handle of the list
+   * @param scalarColumnHandle handle to the search key column
+   * @param isFindFirst Whether to find the first index of the key, or the last.
+   * @return column handle of the resultant column of int32 indices
+   */
+  private static native long listIndexOfColumn(long nativeView, long keyColumnHandle, boolean isFindFirst);
+
   private static native long listSortRows(long nativeView, boolean isDescending, boolean isNullSmallest);
 
   private static native long getElement(long nativeView, int index);
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 4cd4b070aed..73ea49c18d9 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -511,6 +511,18 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listContains(JNIEnv *env,
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listContainsNulls(JNIEnv *env, jclass,
+                                                                         jlong column_view) {
+  JNI_NULL_CHECK(env, column_view, "column is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    auto cv = reinterpret_cast<cudf::column_view *>(column_view);
+    auto lcv = cudf::lists_column_view{*cv};
+    return reinterpret_cast<jlong>(cudf::lists::contains_nulls(lcv).release());
+  }
+  CATCH_STD(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listContainsColumn(JNIEnv *env, jclass,
                                                                           jlong column_view,
                                                                           jlong lookup_key_cv) {
@@ -528,6 +540,44 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listContainsColumn(JNIEnv
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listIndexOfScalar(JNIEnv *env, jclass,
+                                                                         jlong column_view,
+                                                                         jlong lookup_key,
+                                                                         jboolean is_find_first) {
+  JNI_NULL_CHECK(env, column_view, "column is null", 0);
+  JNI_NULL_CHECK(env, lookup_key, "lookup scalar is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const cv = reinterpret_cast<cudf::column_view const *>(column_view);
+    auto const lcv = cudf::lists_column_view{*cv};
+    auto const lookup_key_scalar = reinterpret_cast<cudf::scalar const *>(lookup_key);
+    auto const find_option = is_find_first ? cudf::lists::duplicate_find_option::FIND_FIRST :
+                                             cudf::lists::duplicate_find_option::FIND_LAST;
+    auto result = cudf::lists::index_of(lcv, *lookup_key_scalar, find_option);
+    return reinterpret_cast<jlong>(result.release());
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listIndexOfColumn(JNIEnv *env, jclass,
+                                                                         jlong column_view,
+                                                                         jlong lookup_keys,
+                                                                         jboolean is_find_first) {
+  JNI_NULL_CHECK(env, column_view, "column is null", 0);
+  JNI_NULL_CHECK(env, lookup_keys, "lookup key column is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const cv = reinterpret_cast<cudf::column_view const *>(column_view);
+    auto const lcv = cudf::lists_column_view{*cv};
+    auto const lookup_key_column = reinterpret_cast<cudf::column_view const *>(lookup_keys);
+    auto const find_option = is_find_first ? cudf::lists::duplicate_find_option::FIND_FIRST :
+                                             cudf::lists::duplicate_find_option::FIND_LAST;
+    auto result = cudf::lists::index_of(lcv, *lookup_key_column, find_option);
+    return reinterpret_cast<jlong>(result.release());
+  }
+  CATCH_STD(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listSortRows(JNIEnv *env, jclass,
                                                                     jlong column_view,
                                                                     jboolean is_descending,
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index b78183692a3..0771de9492d 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -18,6 +18,7 @@
 
 package ai.rapids.cudf;
 
+import ai.rapids.cudf.ColumnView.FindOptions;
 import ai.rapids.cudf.HostColumnVector.*;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
@@ -4364,70 +4365,160 @@ void testDropListDuplicatesWithKeysValues() {
     }
   }
 
+  @SafeVarargs
+  private static <T> ColumnVector makeListsColumn(DType childDType, List<T>... rows) {
+    HostColumnVector.DataType childType = new HostColumnVector.BasicType(true, childDType);
+    HostColumnVector.DataType listType  = new HostColumnVector.ListType(true, childType);
+    return ColumnVector.fromLists(listType, rows);
+  }
+
   @Test
   void testListContainsString() {
-    List<String> list1 = Arrays.asList("Héllo there", "thésé");
-    List<String> list2 = Arrays.asList("", "ARé some", "test strings");
-    List<String> list3 = Arrays.asList(null, "", "ARé some", "test strings", "thésé");
-    List<String> list4 = Arrays.asList(null, "", "ARé some", "test strings");
-    List<String> list5 = null;
-    try (ColumnVector v = ColumnVector.fromLists(new HostColumnVector.ListType(true,
-        new HostColumnVector.BasicType(true, DType.STRING)), list1, list2, list3, list4, list5);
-         ColumnVector expected = ColumnVector.fromBoxedBooleans(true, false, true, null, null);
-         Scalar strScalar = Scalar.fromString("thésé");
-         ColumnVector result = v.listContains(strScalar)) {
+    List<String> list0 = Arrays.asList("Héllo there", "thésé");
+    List<String> list1 = Arrays.asList("", "ARé some", "test strings");
+    List<String> list2 = Arrays.asList(null, "", "ARé some", "test strings", "thésé");
+    List<String> list3 = Arrays.asList(null, "", "ARé some", "test strings");
+    List<String> list4 = null;
+    try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4);
+         Scalar searchKey = Scalar.fromString("thésé");
+         ColumnVector expected = ColumnVector.fromBoxedBooleans(true, false, true, false, null);
+         ColumnVector result = input.listContains(searchKey)) {
       assertColumnsAreEqual(expected, result);
     }
   }
 
   @Test
   void testListContainsInt() {
-    List<Integer> list1 = Arrays.asList(1, 2, 3);
-    List<Integer> list2 = Arrays.asList(4, 5, 6);
-    List<Integer> list3 = Arrays.asList(7, 8, 9);
-    List<Integer> list4 = null;
-    try (ColumnVector v = ColumnVector.fromLists(new HostColumnVector.ListType(true,
-        new HostColumnVector.BasicType(true, DType.INT32)), list1, list2, list3, list4);
+    List<Integer> list0 = Arrays.asList(1, 2, 3);
+    List<Integer> list1 = Arrays.asList(4, 5, 6);
+    List<Integer> list2 = Arrays.asList(7, 8, 9);
+    List<Integer> list3 = null;
+    try (ColumnVector input =  makeListsColumn(DType.INT32, list0, list1, list2, list3);
+         Scalar searchKey = Scalar.fromInt(7);
          ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, null);
-         Scalar intScalar = Scalar.fromInt(7);
-         ColumnVector result = v.listContains(intScalar)) {
+         ColumnVector result = input.listContains(searchKey)) {
       assertColumnsAreEqual(expected, result);
     }
   }
 
   @Test
   void testListContainsStringCol() {
-    List<String> list1 = Arrays.asList("Héllo there", "thésé");
-    List<String> list2 = Arrays.asList("", "ARé some", "test strings");
-    List<String> list3 = Arrays.asList("FOO", "", "ARé some", "test");
+    List<String> list0 = Arrays.asList("Héllo there", "thésé");
+    List<String> list1 = Arrays.asList("", "ARé some", "test strings");
+    List<String> list2 = Arrays.asList("FOO", "", "ARé some", "test");
+    List<String> list3 = Arrays.asList(null, "FOO", "", "ARé some", "test");
     List<String> list4 = Arrays.asList(null, "FOO", "", "ARé some", "test");
-    List<String> list5 = Arrays.asList(null, "FOO", "", "ARé some", "test");
-    List<String> list6 = null;
-    try (ColumnVector v = ColumnVector.fromLists(new HostColumnVector.ListType(true,
-        new HostColumnVector.BasicType(true, DType.STRING)), list1, list2, list3, list4, list5, list6);
-         ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, true, true, null, null);
-         ColumnVector strCol = ColumnVector.fromStrings("thésé", "", "test", "test", "iotA", null);
-         ColumnVector result = v.listContainsColumn(strCol)) {
+    List<String> list5 = null;
+    try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4, list5);
+         ColumnVector searchKeys = ColumnVector.fromStrings("thésé", "", "test", "test", "iotA", null);
+         ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, true, true, false, null);
+         ColumnVector result = input.listContainsColumn(searchKeys)) {
       assertColumnsAreEqual(expected, result);
     }
   }
 
   @Test
   void testListContainsIntCol() {
-    List<Integer> list1 = Arrays.asList(1, 2, 3);
-    List<Integer> list2 = Arrays.asList(4, 5, 6);
+    List<Integer> list0 = Arrays.asList(1, 2, 3);
+    List<Integer> list1 = Arrays.asList(4, 5, 6);
+    List<Integer> list2 = Arrays.asList(null, 8, 9);
     List<Integer> list3 = Arrays.asList(null, 8, 9);
-    List<Integer> list4 = Arrays.asList(null, 8, 9);
-    List<Integer> list5 = null;
-    try (ColumnVector v = ColumnVector.fromLists(new HostColumnVector.ListType(true,
-        new HostColumnVector.BasicType(true, DType.INT32)), list1, list2, list3, list4, list5);
-         ColumnVector expected = ColumnVector.fromBoxedBooleans(true, false, true, null, null);
-         ColumnVector intCol = ColumnVector.fromBoxedInts(3, 3, 8, 3, null);
-         ColumnVector result = v.listContainsColumn(intCol)) {
+    List<Integer> list4 = null;
+    try (ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3, list4);
+         ColumnVector searchKeys = ColumnVector.fromBoxedInts(3, 3, 8, 3, null);
+         ColumnVector expected = ColumnVector.fromBoxedBooleans(true, false, true, false, null);
+         ColumnVector result = input.listContainsColumn(searchKeys)) {
+      assertColumnsAreEqual(expected, result);
+    }
+  }
+
+  @Test
+  void testListContainsNulls() {
+    List<String> list0 = Arrays.asList("Héllo there", "thésé");
+    List<String> list1 = Arrays.asList("", "ARé some", "test strings");
+    List<String> list2 = Arrays.asList("FOO", "", "ARé some", "test");
+    List<String> list3 = Arrays.asList(null, "FOO", "", "ARé some", "test");
+    List<String> list4 = Arrays.asList(null, "FOO", "", "ARé some", "test");
+    List<String> list5 = null;
+    try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4, list5);
+         ColumnVector result = input.listContainsNulls();
+         ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, true, true, null)) {
       assertColumnsAreEqual(expected, result);
     }
   }
 
+  @Test
+  void testListIndexOfString() {
+    List<String> list0 = Arrays.asList("Héllo there", "thésé");
+    List<String> list1 = Arrays.asList("", "ARé some", "test strings");
+    List<String> list2 = Arrays.asList(null, "", "ARé some", "thésé", "test strings", "thésé");
+    List<String> list3 = Arrays.asList(null, "", "ARé some", "test strings");
+    List<String> list4 = null;
+    try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4);
+         Scalar searchKey = Scalar.fromString("thésé");
+         ColumnVector expectedFirst = ColumnVector.fromBoxedInts(1, -1, 3, -1, null);
+         ColumnVector resultFirst = input.listIndexOf(searchKey, FindOptions.FIND_FIRST);
+         ColumnVector expectedLast = ColumnVector.fromBoxedInts(1, -1, 5, -1, null);
+         ColumnVector resultLast = input.listIndexOf(searchKey, FindOptions.FIND_LAST)) {
+      assertColumnsAreEqual(expectedFirst, resultFirst);
+      assertColumnsAreEqual(expectedLast, resultLast);
+    }
+  }
+
+  @Test
+  void testListIndexOfInt() {
+    List<Integer> list0 = Arrays.asList(1, 2, 3);
+    List<Integer> list1 = Arrays.asList(4, 5, 6);
+    List<Integer> list2 = Arrays.asList(7, 8, 9, 7);
+    List<Integer> list3 = null;
+    try (ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3);
+         Scalar searchKey = Scalar.fromInt(7);
+         ColumnVector expectedFirst = ColumnVector.fromBoxedInts(-1, -1, 0, null);
+         ColumnVector resultFirst = input.listIndexOf(searchKey, FindOptions.FIND_FIRST);
+         ColumnVector expectedLast = ColumnVector.fromBoxedInts(-1, -1, 3, null);
+         ColumnVector resultLast = input.listIndexOf(searchKey, FindOptions.FIND_LAST)) {
+      assertColumnsAreEqual(expectedFirst, resultFirst);
+      assertColumnsAreEqual(expectedLast, resultLast);
+    }
+  }
+
+  @Test
+  void testListIndexOfStringCol() {
+    List<String> list0 = Arrays.asList("Héllo there", "thésé");
+    List<String> list1 = Arrays.asList("", "ARé some", "test strings");
+    List<String> list2 = Arrays.asList("FOO", "", "ARé some", "test");
+    List<String> list3 = Arrays.asList(null, "FOO", "", "test", "ARé some", "test");
+    List<String> list4 = Arrays.asList(null, "FOO", "", "ARé some", "test");
+    List<String> list5 = null;
+    try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4, list5);
+         ColumnVector searchKeys = ColumnVector.fromStrings("thésé", "", "test", "test", "iotA", null);
+         ColumnVector expectedFirst = ColumnVector.fromBoxedInts(1, 0, 3, 3, -1, null);
+         ColumnVector resultFirst = input.listIndexOf(searchKeys, FindOptions.FIND_FIRST);
+         ColumnVector expectedLast = ColumnVector.fromBoxedInts(1, 0, 3, 5, -1, null);
+         ColumnVector resultLast = input.listIndexOf(searchKeys, FindOptions.FIND_LAST)) {
+      assertColumnsAreEqual(expectedFirst, resultFirst);
+      assertColumnsAreEqual(expectedLast, resultLast);
+    }
+  }
+
+  @Test
+  void testListIndexOfIntCol() {
+    List<Integer> list0 = Arrays.asList(1, 2, 3);
+    List<Integer> list1 = Arrays.asList(4, 5, 6);
+    List<Integer> list2 = Arrays.asList(null, 8, 9, 8);
+    List<Integer> list3 = Arrays.asList(null, 8, 9);
+    List<Integer> list4 = null;
+    try (ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3, list4);
+         ColumnVector searchKeys = ColumnVector.fromBoxedInts(3, 3, 8, 3, null);
+         ColumnVector expectedFirst = ColumnVector.fromBoxedInts(2, -1, 1, -1, null);
+         ColumnVector resultFirst = input.listIndexOf(searchKeys, FindOptions.FIND_FIRST);
+         ColumnVector expectedLast = ColumnVector.fromBoxedInts(2, -1, 3, -1, null);
+         ColumnVector resultLast = input.listIndexOf(searchKeys, FindOptions.FIND_LAST)) {
+      assertColumnsAreEqual(expectedFirst, resultFirst);
+      assertColumnsAreEqual(expectedLast, resultLast);
+    }
+  }
+
   @Test
   void testListSortRowsWithIntChild() {
     List<Integer> list1 = Arrays.asList(1, 3, 0, 2);
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index b898222d7d7..44749103b54 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -304,8 +304,8 @@ def test_get_nulls():
         ([[1, 2, 3], [], [3, 4, 5]], 6, [False, False, False],),
         ([[1.0, 2.0, 3.0], None, []], 2.0, [True, None, False],),
         ([[None, "b", "c"], [], ["b", "e", "f"]], "b", [True, False, True],),
-        ([[None, 2, 3], None, []], 1, [None, None, False]),
-        ([[None, "b", "c"], [], ["b", "e", "f"]], "d", [None, False, False],),
+        ([[None, 2, 3], None, []], 1, [False, None, False]),
+        ([[None, "b", "c"], [], ["b", "e", "f"]], "d", [False, False, False],),
     ],
 )
 def test_contains_scalar(data, scalar, expect):

From 68384ea2e1071d2f35867514d8a6add500d50cc6 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 22 Dec 2021 09:14:27 -0600
Subject: [PATCH 04/12] Merge branch-21.12 into branch-22.02

---
 CHANGELOG.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 36c550926ab..68ff9abc9ea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,7 +2,6 @@
 
 Please see https://github.com/rapidsai/cudf/releases/tag/v22.02.00a for the latest changes to this development branch.
 
-# cuDF 21.12.00 (Date TBD)
 # cuDF 21.12.00 (9 Dec 2021)
 
 ## 🚨 Breaking Changes
@@ -1705,7 +1704,7 @@ Please see https://github.com/rapidsai/cudf/releases/tag/v22.02.00a for the late
 - PR #6459 Add `map` method to series
 - PR #6379 Add list hashing functionality to MD5
 - PR #6498 Add helper method to ColumnBuilder with some nits
-- PR #6336 Add `join` functionality in cudf concat 
+- PR #6336 Add `join` functionality in cudf concat
 - PR #6653 Replaced SHFL_XOR calls with cub::WarpReduce
 - PR #6751 Rework ColumnViewAccess and its usage
 - PR #6698 Remove macros from ORC reader and writer

From 04f4219428f734ddc284aad141a34f9d2bca37f5 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 23 Dec 2021 00:29:27 -0600
Subject: [PATCH 05/12] Use gpuci_mamba_retry to install local artifacts.
 (#9951)

I see CI timeouts occurring at the step where local conda artifacts for libcudf and libcudf_kafka are installed. This PR uses `gpuci_mamba_retry` instead of `conda` to install those local artifacts (this change was also recently made in https://github.com/rapidsai/cugraph/pull/1928).

Example timeouts:
- https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/prb/job/cudf-gpu-test/CUDA=11.5,GPU_LABEL=driver-495,LINUX_VER=ubuntu20.04,PYTHON=3.8/5764/console
- https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/prb/job/cudf-gpu-test/CUDA=11.5,GPU_LABEL=driver-495,LINUX_VER=ubuntu20.04,PYTHON=3.8/5773/console

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/9951
---
 ci/gpu/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 5646c268301..a557a2ef066 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -219,7 +219,7 @@ else
     KAFKA_CONDA_FILE=${KAFKA_CONDA_FILE//-/=} #convert to conda install
 
     gpuci_logger "Installing $CUDF_CONDA_FILE & $KAFKA_CONDA_FILE"
-    conda install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"
+    gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CUDF_CONDA_FILE" "$KAFKA_CONDA_FILE"
 
     install_dask
 

From c99a37fb804282565204fd1544d145177620c0a3 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 23 Dec 2021 17:40:31 -0600
Subject: [PATCH 06/12] Remove deprecated method Series.hash_encode. (#9942)

This PR removes the deprecated method `Series.hash_encode`. Resolves #9475. Follows up on #9457, #9381.

This PR also removes libcudf code paths used solely for this Python method.

Users may replace code like `series.hash_encode(stop, use_name=False)` with `series.hash_values(method="murmur3") % stop`.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu)
  - Conor Hoekstra (https://github.com/codereport)

URL: https://github.com/rapidsai/cudf/pull/9942
---
 cpp/include/cudf/detail/hashing.hpp      | 14 ++---
 cpp/include/cudf/hashing.hpp             |  9 +--
 cpp/include/cudf/table/row_operators.cuh | 48 ---------------
 cpp/src/hash/hashing.cu                  |  6 +-
 cpp/src/hash/murmur_hash.cu              | 25 ++------
 cpp/tests/hashing/hash_test.cpp          | 50 +++++++--------
 docs/cudf/source/api_docs/series.rst     |  2 -
 python/cudf/cudf/_lib/cpp/hash.pxd       |  3 +-
 python/cudf/cudf/_lib/hash.pyx           |  4 +-
 python/cudf/cudf/core/frame.py           |  4 +-
 python/cudf/cudf/core/series.py          | 77 ------------------------
 python/cudf/cudf/tests/test_dataframe.py | 38 ------------
 12 files changed, 46 insertions(+), 234 deletions(-)

diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index bd5c8a42a51..0fc807593fb 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -32,17 +32,15 @@ namespace detail {
  */
 std::unique_ptr<column> hash(
   table_view const& input,
-  hash_id hash_function                        = hash_id::HASH_MURMUR3,
-  cudf::host_span<uint32_t const> initial_hash = {},
-  uint32_t seed                                = 0,
-  rmm::cuda_stream_view stream                 = rmm::cuda_stream_default,
-  rmm::mr::device_memory_resource* mr          = rmm::mr::get_current_device_resource());
+  hash_id hash_function               = hash_id::HASH_MURMUR3,
+  uint32_t seed                       = 0,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<column> murmur_hash3_32(
   table_view const& input,
-  cudf::host_span<uint32_t const> initial_hash = {},
-  rmm::cuda_stream_view stream                 = rmm::cuda_stream_default,
-  rmm::mr::device_memory_resource* mr          = rmm::mr::get_current_device_resource());
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<column> md5_hash(
   table_view const& input,
diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp
index 6b281c3f7f4..cce05042917 100644
--- a/cpp/include/cudf/hashing.hpp
+++ b/cpp/include/cudf/hashing.hpp
@@ -31,8 +31,6 @@ namespace cudf {
  *
  * @param input The table of columns to hash.
  * @param hash_function The hash function enum to use.
- * @param initial_hash Optional host_span of initial hash values for each column.
- * If this span is empty then each element will be hashed as-is.
  * @param seed Optional seed value to use for the hash function.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  *
@@ -40,10 +38,9 @@ namespace cudf {
  */
 std::unique_ptr<column> hash(
   table_view const& input,
-  hash_id hash_function                        = hash_id::HASH_MURMUR3,
-  cudf::host_span<uint32_t const> initial_hash = {},
-  uint32_t seed                                = DEFAULT_HASH_SEED,
-  rmm::mr::device_memory_resource* mr          = rmm::mr::get_current_device_resource());
+  hash_id hash_function               = hash_id::HASH_MURMUR3,
+  uint32_t seed                       = DEFAULT_HASH_SEED,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh
index 0f3ca073380..32ddd1ef49a 100644
--- a/cpp/include/cudf/table/row_operators.cuh
+++ b/cpp/include/cudf/table/row_operators.cuh
@@ -539,52 +539,4 @@ class row_hasher {
   uint32_t _seed{DEFAULT_HASH_SEED};
 };
 
-/**
- * @brief Computes the hash value of a row in the given table, combined with an
- * initial hash value for each column.
- *
- * @tparam hash_function Hash functor to use for hashing elements.
- * @tparam Nullate A cudf::nullate type describing how to check for nulls.
- */
-template <template <typename> class hash_function, typename Nullate>
-class row_hasher_initial_values {
- public:
-  row_hasher_initial_values() = delete;
-  row_hasher_initial_values(Nullate has_nulls, table_device_view t, hash_value_type* initial_hash)
-    : _table{t}, _initial_hash(initial_hash), _has_nulls{has_nulls}
-  {
-  }
-
-  __device__ auto operator()(size_type row_index) const
-  {
-    auto hash_combiner = [](hash_value_type lhs, hash_value_type rhs) {
-      return hash_function<hash_value_type>{}.hash_combine(lhs, rhs);
-    };
-
-    // Hashes an element in a column and combines with an initial value
-    auto hasher = [=](size_type column_index) {
-      auto hash_value = cudf::type_dispatcher<dispatch_storage_type>(
-        _table.column(column_index).type(),
-        element_hasher<hash_function, Nullate>{_has_nulls},
-        _table.column(column_index),
-        row_index);
-
-      return hash_combiner(_initial_hash[column_index], hash_value);
-    };
-
-    // Hash each element and combine all the hash values together
-    return thrust::transform_reduce(thrust::seq,
-                                    thrust::make_counting_iterator(0),
-                                    thrust::make_counting_iterator(_table.num_columns()),
-                                    hasher,
-                                    hash_value_type{0},
-                                    hash_combiner);
-  }
-
- private:
-  table_device_view _table;
-  hash_value_type* _initial_hash;
-  Nullate _has_nulls;
-};
-
 }  // namespace cudf
diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu
index 039652e0012..ac2f06c0ea3 100644
--- a/cpp/src/hash/hashing.cu
+++ b/cpp/src/hash/hashing.cu
@@ -88,13 +88,12 @@ std::unique_ptr<column> serial_murmur_hash3_32(table_view const& input,
 
 std::unique_ptr<column> hash(table_view const& input,
                              hash_id hash_function,
-                             cudf::host_span<uint32_t const> initial_hash,
                              uint32_t seed,
                              rmm::cuda_stream_view stream,
                              rmm::mr::device_memory_resource* mr)
 {
   switch (hash_function) {
-    case (hash_id::HASH_MURMUR3): return murmur_hash3_32(input, initial_hash, stream, mr);
+    case (hash_id::HASH_MURMUR3): return murmur_hash3_32(input, stream, mr);
     case (hash_id::HASH_MD5): return md5_hash(input, stream, mr);
     case (hash_id::HASH_SERIAL_MURMUR3):
       return serial_murmur_hash3_32<MurmurHash3_32>(input, seed, stream, mr);
@@ -108,12 +107,11 @@ std::unique_ptr<column> hash(table_view const& input,
 
 std::unique_ptr<column> hash(table_view const& input,
                              hash_id hash_function,
-                             cudf::host_span<uint32_t const> initial_hash,
                              uint32_t seed,
                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::hash(input, hash_function, initial_hash, seed, rmm::cuda_stream_default, mr);
+  return detail::hash(input, hash_function, seed, rmm::cuda_stream_default, mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/hash/murmur_hash.cu b/cpp/src/hash/murmur_hash.cu
index a761d058180..bc8d3577513 100644
--- a/cpp/src/hash/murmur_hash.cu
+++ b/cpp/src/hash/murmur_hash.cu
@@ -29,7 +29,6 @@ namespace cudf {
 namespace detail {
 
 std::unique_ptr<column> murmur_hash3_32(table_view const& input,
-                                        cudf::host_span<uint32_t const> initial_hash,
                                         rmm::cuda_stream_view stream,
                                         rmm::mr::device_memory_resource* mr)
 {
@@ -44,24 +43,12 @@ std::unique_ptr<column> murmur_hash3_32(table_view const& input,
   auto const device_input = table_device_view::create(input, stream);
   auto output_view        = output->mutable_view();
 
-  // Compute the hash value for each row depending on the specified hash function
-  if (!initial_hash.empty()) {
-    CUDF_EXPECTS(initial_hash.size() == size_t(input.num_columns()),
-                 "Expected same size of initial hash values as number of columns");
-    auto device_initial_hash = make_device_uvector_async(initial_hash, stream);
-
-    thrust::tabulate(rmm::exec_policy(stream),
-                     output_view.begin<int32_t>(),
-                     output_view.end<int32_t>(),
-                     row_hasher_initial_values<MurmurHash3_32, nullate::DYNAMIC>(
-                       nullate::DYNAMIC{nullable}, *device_input, device_initial_hash.data()));
-  } else {
-    thrust::tabulate(
-      rmm::exec_policy(stream),
-      output_view.begin<int32_t>(),
-      output_view.end<int32_t>(),
-      row_hasher<MurmurHash3_32, nullate::DYNAMIC>(nullate::DYNAMIC{nullable}, *device_input));
-  }
+  // Compute the hash value for each row
+  thrust::tabulate(
+    rmm::exec_policy(stream),
+    output_view.begin<int32_t>(),
+    output_view.end<int32_t>(),
+    row_hasher<MurmurHash3_32, nullate::DYNAMIC>(nullate::DYNAMIC{nullable}, *device_input));
 
   return output;
 }
diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp
index ee321b761db..bd6deae9dc4 100644
--- a/cpp/tests/hashing/hash_test.cpp
+++ b/cpp/tests/hashing/hash_test.cpp
@@ -116,13 +116,13 @@ TEST_F(HashTest, MultiValueNulls)
   EXPECT_EQ(input1.num_rows(), output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
 
-  auto const serial_output1 = cudf::hash(input1, cudf::hash_id::HASH_SERIAL_MURMUR3, {}, 0);
+  auto const serial_output1 = cudf::hash(input1, cudf::hash_id::HASH_SERIAL_MURMUR3, 0);
   auto const serial_output2 = cudf::hash(input2, cudf::hash_id::HASH_SERIAL_MURMUR3);
 
   EXPECT_EQ(input1.num_rows(), serial_output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(serial_output1->view(), serial_output2->view());
 
-  auto const spark_output1 = cudf::hash(input1, cudf::hash_id::HASH_SPARK_MURMUR3, {}, 0);
+  auto const spark_output1 = cudf::hash(input1, cudf::hash_id::HASH_SPARK_MURMUR3, 0);
   auto const spark_output2 = cudf::hash(input2, cudf::hash_id::HASH_SPARK_MURMUR3);
 
   EXPECT_EQ(input1.num_rows(), spark_output1->size());
@@ -147,13 +147,13 @@ TYPED_TEST(HashTestTyped, Equality)
   EXPECT_EQ(input.num_rows(), output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
 
-  auto const serial_output1 = cudf::hash(input, cudf::hash_id::HASH_SERIAL_MURMUR3, {}, 0);
+  auto const serial_output1 = cudf::hash(input, cudf::hash_id::HASH_SERIAL_MURMUR3, 0);
   auto const serial_output2 = cudf::hash(input, cudf::hash_id::HASH_SERIAL_MURMUR3);
 
   EXPECT_EQ(input.num_rows(), serial_output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(serial_output1->view(), serial_output2->view());
 
-  auto const spark_output1 = cudf::hash(input, cudf::hash_id::HASH_SPARK_MURMUR3, {}, 0);
+  auto const spark_output1 = cudf::hash(input, cudf::hash_id::HASH_SPARK_MURMUR3, 0);
   auto const spark_output2 = cudf::hash(input, cudf::hash_id::HASH_SPARK_MURMUR3);
 
   EXPECT_EQ(input.num_rows(), spark_output1->size());
@@ -177,13 +177,13 @@ TYPED_TEST(HashTestTyped, EqualityNulls)
   EXPECT_EQ(input1.num_rows(), output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
 
-  auto const serial_output1 = cudf::hash(input1, cudf::hash_id::HASH_SERIAL_MURMUR3, {}, 0);
+  auto const serial_output1 = cudf::hash(input1, cudf::hash_id::HASH_SERIAL_MURMUR3, 0);
   auto const serial_output2 = cudf::hash(input2, cudf::hash_id::HASH_SERIAL_MURMUR3);
 
   EXPECT_EQ(input1.num_rows(), serial_output1->size());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(serial_output1->view(), serial_output2->view());
 
-  auto const spark_output1 = cudf::hash(input1, cudf::hash_id::HASH_SPARK_MURMUR3, {}, 0);
+  auto const spark_output1 = cudf::hash(input1, cudf::hash_id::HASH_SPARK_MURMUR3, 0);
   auto const spark_output2 = cudf::hash(input2, cudf::hash_id::HASH_SPARK_MURMUR3);
 
   EXPECT_EQ(input1.num_rows(), spark_output1->size());
@@ -222,7 +222,7 @@ TYPED_TEST(HashTestFloatTyped, TestExtremes)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_col, *hash_col_neg_nan, verbosity);
 
   constexpr auto serial_hasher   = cudf::hash_id::HASH_SERIAL_MURMUR3;
-  auto const serial_col          = cudf::hash(table_col, serial_hasher, {}, 0);
+  auto const serial_col          = cudf::hash(table_col, serial_hasher, 0);
   auto const serial_col_neg_zero = cudf::hash(table_col_neg_zero, serial_hasher);
   auto const serial_col_neg_nan  = cudf::hash(table_col_neg_nan, serial_hasher);
 
@@ -231,7 +231,7 @@ TYPED_TEST(HashTestFloatTyped, TestExtremes)
 
   // Spark hash is sensitive to 0 and -0
   constexpr auto spark_hasher  = cudf::hash_id::HASH_SPARK_MURMUR3;
-  auto const spark_col         = cudf::hash(table_col, spark_hasher, {}, 0);
+  auto const spark_col         = cudf::hash(table_col, spark_hasher, 0);
   auto const spark_col_neg_nan = cudf::hash(table_col_neg_nan, spark_hasher);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*spark_col, *spark_col_neg_nan);
@@ -269,8 +269,8 @@ TEST_F(SerialMurmurHash3Test, MultiValueWithSeeds)
   auto const combo2 = cudf::table_view({strings_col, ints_col, bools_col2});
 
   constexpr auto hasher   = cudf::hash_id::HASH_SERIAL_MURMUR3;
-  auto const strings_hash = cudf::hash(cudf::table_view({strings_col}), hasher, {}, 314);
-  auto const ints_hash    = cudf::hash(cudf::table_view({ints_col}), hasher, {}, 42);
+  auto const strings_hash = cudf::hash(cudf::table_view({strings_col}), hasher, 314);
+  auto const ints_hash    = cudf::hash(cudf::table_view({ints_col}), hasher, 42);
   auto const combo1_hash  = cudf::hash(combo1, hasher, {});
   auto const combo2_hash  = cudf::hash(combo2, hasher, {});
   auto const structs_hash = cudf::hash(cudf::table_view({structs_col}), hasher, {});
@@ -396,20 +396,20 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds)
   fixed_width_column_wrapper<bool> const bools_col2({0, 1, 2, 255, 0});
 
   constexpr auto hasher      = cudf::hash_id::HASH_SPARK_MURMUR3;
-  auto const hash_structs    = cudf::hash(cudf::table_view({structs_col}), hasher, {}, 42);
-  auto const hash_strings    = cudf::hash(cudf::table_view({strings_col}), hasher, {}, 314);
-  auto const hash_doubles    = cudf::hash(cudf::table_view({doubles_col}), hasher, {}, 42);
-  auto const hash_timestamps = cudf::hash(cudf::table_view({timestamps_col}), hasher, {}, 42);
-  auto const hash_decimal64  = cudf::hash(cudf::table_view({decimal64_col}), hasher, {}, 42);
-  auto const hash_longs      = cudf::hash(cudf::table_view({longs_col}), hasher, {}, 42);
-  auto const hash_floats     = cudf::hash(cudf::table_view({floats_col}), hasher, {}, 42);
-  auto const hash_dates      = cudf::hash(cudf::table_view({dates_col}), hasher, {}, 42);
-  auto const hash_decimal32  = cudf::hash(cudf::table_view({decimal32_col}), hasher, {}, 42);
-  auto const hash_ints       = cudf::hash(cudf::table_view({ints_col}), hasher, {}, 42);
-  auto const hash_shorts     = cudf::hash(cudf::table_view({shorts_col}), hasher, {}, 42);
-  auto const hash_bytes      = cudf::hash(cudf::table_view({bytes_col}), hasher, {}, 42);
-  auto const hash_bools1     = cudf::hash(cudf::table_view({bools_col1}), hasher, {}, 42);
-  auto const hash_bools2     = cudf::hash(cudf::table_view({bools_col2}), hasher, {}, 42);
+  auto const hash_structs    = cudf::hash(cudf::table_view({structs_col}), hasher, 42);
+  auto const hash_strings    = cudf::hash(cudf::table_view({strings_col}), hasher, 314);
+  auto const hash_doubles    = cudf::hash(cudf::table_view({doubles_col}), hasher, 42);
+  auto const hash_timestamps = cudf::hash(cudf::table_view({timestamps_col}), hasher, 42);
+  auto const hash_decimal64  = cudf::hash(cudf::table_view({decimal64_col}), hasher, 42);
+  auto const hash_longs      = cudf::hash(cudf::table_view({longs_col}), hasher, 42);
+  auto const hash_floats     = cudf::hash(cudf::table_view({floats_col}), hasher, 42);
+  auto const hash_dates      = cudf::hash(cudf::table_view({dates_col}), hasher, 42);
+  auto const hash_decimal32  = cudf::hash(cudf::table_view({decimal32_col}), hasher, 42);
+  auto const hash_ints       = cudf::hash(cudf::table_view({ints_col}), hasher, 42);
+  auto const hash_shorts     = cudf::hash(cudf::table_view({shorts_col}), hasher, 42);
+  auto const hash_bytes      = cudf::hash(cudf::table_view({bytes_col}), hasher, 42);
+  auto const hash_bools1     = cudf::hash(cudf::table_view({bools_col1}), hasher, 42);
+  auto const hash_bools2     = cudf::hash(cudf::table_view({bools_col2}), hasher, 42);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_structs, hash_structs_expected, verbosity);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_strings, hash_strings_expected, verbosity);
@@ -439,7 +439,7 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds)
                                                 shorts_col,
                                                 bytes_col,
                                                 bools_col2});
-  auto const hash_combined  = cudf::hash(combined_table, hasher, {}, 42);
+  auto const hash_combined  = cudf::hash(combined_table, hasher, 42);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*hash_combined, hash_combined_expected, verbosity);
 }
 
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 6dc38d985f8..e0dc4bd4f46 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -422,6 +422,4 @@ Serialization / IO / conversion
    Series.from_categorical
    Series.from_masked_array
    Series.from_pandas
-   Series.hash_encode
    Series.hash_values
-   
diff --git a/python/cudf/cudf/_lib/cpp/hash.pxd b/python/cudf/cudf/_lib/cpp/hash.pxd
index f07a6c0f046..fd9992152a6 100644
--- a/python/cudf/cudf/_lib/cpp/hash.pxd
+++ b/python/cudf/cudf/_lib/cpp/hash.pxd
@@ -13,7 +13,6 @@ from cudf._lib.cpp.table.table_view cimport table_view
 cdef extern from "cudf/hashing.hpp" namespace "cudf" nogil:
     cdef unique_ptr[column] hash "cudf::hash" (
         const table_view& input,
-        const libcudf_types.hash_id& hash_function,
-        const vector[uint32_t]& initial_hash,
+        const libcudf_types.hash_id hash_function,
         const uint32_t seed
     ) except +
diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx
index 9b34a049cac..adc48159aac 100644
--- a/python/cudf/cudf/_lib/hash.pyx
+++ b/python/cudf/cudf/_lib/hash.pyx
@@ -54,8 +54,7 @@ def hash_partition(source_table, object columns_to_hash,
     )
 
 
-def hash(source_table, str method, object initial_hash=None, int seed=0):
-    cdef vector[uint32_t] c_initial_hash = initial_hash or []
+def hash(source_table, str method, int seed=0):
     cdef table_view c_source_view = table_view_from_table(
         source_table, ignore_index=True)
     cdef unique_ptr[column] c_result
@@ -71,7 +70,6 @@ def hash(source_table, str method, object initial_hash=None, int seed=0):
             cpp_hash(
                 c_source_view,
                 c_hash_function,
-                c_initial_hash,
                 seed
             )
         )
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index c85ed0c8555..c83b06707a4 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -580,8 +580,8 @@ def _gather(
         result._copy_type_metadata(self)
         return result
 
-    def _hash(self, method, initial_hash=None):
-        return libcudf.hash.hash(self, method, initial_hash)
+    def _hash(self, method):
+        return libcudf.hash.hash(self, method)
 
     def _hash_partition(
         self, columns_to_hash, num_partitions, keep_index=True
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 036c8c1ee00..4ec7c3df076 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -7,7 +7,6 @@
 import pickle
 import warnings
 from collections import abc as abc
-from hashlib import sha256
 from numbers import Number
 from shutil import get_terminal_size
 from typing import Any, MutableMapping, Optional, Set, Union
@@ -3144,82 +3143,6 @@ def hash_values(self, method="murmur3"):
             {None: self._hash(method=method)}, index=self.index
         )
 
-    def hash_encode(self, stop, use_name=False):
-        """Encode column values as ints in [0, stop) using hash function.
-
-        This method is deprecated. Replace ``series.hash_encode(stop,
-        use_name=False)`` with ``series.hash_values(method="murmur3") % stop``.
-
-        Parameters
-        ----------
-        stop : int
-            The upper bound on the encoding range.
-        use_name : bool
-            If ``True`` then combine hashed column values
-            with hashed column name. This is useful for when the same
-            values in different columns should be encoded
-            with different hashed values.
-
-        Returns
-        -------
-        result : Series
-            The encoded Series.
-
-        Examples
-        --------
-        >>> import cudf
-        >>> series = cudf.Series([10, 120, 30])
-        >>> series.hash_encode(stop=200)
-        0     53
-        1     51
-        2    124
-        dtype: int32
-
-        You can choose to include name while hash
-        encoding by specifying `use_name=True`
-
-        >>> series.hash_encode(stop=200, use_name=True)
-        0    131
-        1     29
-        2     76
-        dtype: int32
-        """
-        warnings.warn(
-            "The `hash_encode` method will be removed in a future cuDF "
-            "release. Replace `series.hash_encode(stop, use_name=False)` "
-            'with `series.hash_values(method="murmur3") % stop`.',
-            FutureWarning,
-        )
-
-        if not stop > 0:
-            raise ValueError("stop must be a positive integer.")
-
-        if use_name:
-            name_hasher = sha256()
-            name_hasher.update(str(self.name).encode())
-            name_hash_bytes = name_hasher.digest()[:4]
-            name_hash_int = (
-                int.from_bytes(name_hash_bytes, "little", signed=False)
-                & 0xFFFFFFFF
-            )
-            initial_hash = [name_hash_int]
-        else:
-            initial_hash = None
-
-        hashed_values = Series._from_data(
-            {
-                self.name: self._hash(
-                    method="murmur3", initial_hash=initial_hash
-                )
-            },
-            self.index,
-        )
-
-        if hashed_values.has_nulls:
-            raise ValueError("Column must have no nulls.")
-
-        return hashed_values % stop
-
     def quantile(
         self, q=0.5, interpolation="linear", exact=True, quant_index=True
     ):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index ab0856fad1e..78560ee6723 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -2237,44 +2237,6 @@ def test_arrow_pandas_compat(pdf, gdf, preserve_index):
     assert_eq(pdf2, gdf2)
 
 
-@pytest.mark.parametrize("nrows", [1, 8, 100, 1000, 100000])
-def test_series_hash_encode(nrows):
-    data = np.asarray(range(nrows))
-    # Python hash returns different value which sometimes
-    # results in enc_with_name_arr and enc_arr to be same.
-    # And there is no other better way to make hash return same value.
-    # So using an integer name to get constant value back from hash.
-    s = cudf.Series(data, name=1)
-    num_features = 1000
-
-    with pytest.warns(FutureWarning):
-        encoded_series = s.hash_encode(num_features)
-    assert isinstance(encoded_series, cudf.Series)
-    enc_arr = encoded_series.to_numpy()
-    assert np.all(enc_arr >= 0)
-    assert np.max(enc_arr) < num_features
-
-    with pytest.warns(FutureWarning):
-        enc_with_name_arr = s.hash_encode(
-            num_features, use_name=True
-        ).to_numpy()
-    assert enc_with_name_arr[0] != enc_arr[0]
-
-
-def test_series_hash_encode_reproducible_results():
-    # Regression test to ensure that hash_encode outputs are reproducible
-    data = cudf.Series([0, 1, 2])
-    with pytest.warns(FutureWarning):
-        hash_result = data.hash_encode(stop=2 ** 16, use_name=False)
-    expected_result = cudf.Series([42165, 55037, 7341])
-    assert_eq(hash_result, expected_result)
-
-    with pytest.warns(FutureWarning):
-        hash_result_with_name = data.hash_encode(stop=2 ** 16, use_name=True)
-    expected_result_with_name = cudf.Series([36137, 39649, 58673])
-    assert_eq(hash_result_with_name, expected_result_with_name)
-
-
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES + ["bool"])
 def test_cuda_array_interface(dtype):
 

From e432d016c3f9fd9911a47807506ceea24228a996 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Fri, 24 Dec 2021 05:52:20 -0800
Subject: [PATCH 07/12] Add `first` and `last` method to `IndexedFrame` (#9710)

closes #9600

This PR adds `first` and `last` method to `indexed_frame`.  This method only applies to `IndexedFrame` with `DatetimeIndex` and gathers the first or last rows within time range specified by `offset` argument.

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - Sheilah Kirui (https://github.com/skirui-source)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/9710
---
 python/cudf/cudf/core/indexed_frame.py  | 126 +++++++++++++++++++++++-
 python/cudf/cudf/tests/test_datetime.py | 104 +++++++++++++++++++
 2 files changed, 229 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 51bfad3a054..4be35d960ee 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3,9 +3,10 @@
 
 from __future__ import annotations
 
+import operator
 import warnings
 from collections import abc
-from typing import Type, TypeVar
+from typing import Callable, Type, TypeVar
 from uuid import uuid4
 
 import cupy as cp
@@ -109,6 +110,7 @@ class IndexedFrame(Frame):
     # mypy can't handle bound type variables as class members
     _loc_indexer_type: Type[_LocIndexerClass]  # type: ignore
     _iloc_indexer_type: Type[_IlocIndexerClass]  # type: ignore
+    _index: cudf.core.index.BaseIndex
 
     def __init__(self, data=None, index=None):
         super().__init__(data=data, index=index)
@@ -1104,3 +1106,125 @@ def resample(
             if isinstance(self, cudf.Series)
             else cudf.core.resample.DataFrameResampler(self, by=by)
         )
+
+    def _first_or_last(
+        self, offset, idx: int, op: Callable, side: str, slice_func: Callable
+    ) -> "IndexedFrame":
+        """Shared code path for ``first`` and ``last``."""
+        if not isinstance(self._index, cudf.core.index.DatetimeIndex):
+            raise TypeError("'first' only supports a DatetimeIndex index.")
+        if not isinstance(offset, str):
+            raise NotImplementedError(
+                f"Unsupported offset type {type(offset)}."
+            )
+
+        if len(self) == 0:
+            return self.copy()
+
+        pd_offset = pd.tseries.frequencies.to_offset(offset)
+        to_search = op(pd.Timestamp(self._index._column[idx]), pd_offset)
+        if (
+            idx == 0
+            and not isinstance(pd_offset, pd.tseries.offsets.Tick)
+            and pd_offset.is_on_offset(pd.Timestamp(self._index[0]))
+        ):
+            # Special handle is required when the start time of the index
+            # is on the end of the offset. See pandas gh29623 for detail.
+            to_search = to_search - pd_offset.base
+            return self.loc[:to_search]
+        end_point = int(
+            self._index._column.searchsorted(to_search, side=side)[0]
+        )
+        return slice_func(end_point)
+
+    def first(self, offset):
+        """Select initial periods of time series data based on a date offset.
+
+        When having a DataFrame with **sorted** dates as index, this function
+        can select the first few rows based on a date offset.
+
+        Parameters
+        ----------
+        offset: str
+            The offset length of the data that will be selected. For intance,
+            '1M' will display all rows having their index within the first
+            month.
+
+        Returns
+        -------
+        Series or DataFrame
+            A subset of the caller.
+
+        Raises
+        ------
+        TypeError
+            If the index is not a ``DatetimeIndex``
+
+        Examples
+        --------
+        >>> i = cudf.date_range('2018-04-09', periods=4, freq='2D')
+        >>> ts = cudf.DataFrame({'A': [1, 2, 3, 4]}, index=i)
+        >>> ts
+                    A
+        2018-04-09  1
+        2018-04-11  2
+        2018-04-13  3
+        2018-04-15  4
+        >>> ts.first('3D')
+                    A
+        2018-04-09  1
+        2018-04-11  2
+        """
+        return self._first_or_last(
+            offset,
+            idx=0,
+            op=operator.__add__,
+            side="left",
+            slice_func=lambda i: self.iloc[:i],
+        )
+
+    def last(self, offset):
+        """Select final periods of time series data based on a date offset.
+
+        When having a DataFrame with **sorted** dates as index, this function
+        can select the last few rows based on a date offset.
+
+        Parameters
+        ----------
+        offset: str
+            The offset length of the data that will be selected. For instance,
+            '3D' will display all rows having their index within the last 3
+            days.
+
+        Returns
+        -------
+        Series or DataFrame
+            A subset of the caller.
+
+        Raises
+        ------
+        TypeError
+            If the index is not a ``DatetimeIndex``
+
+        Examples
+        --------
+        >>> i = cudf.date_range('2018-04-09', periods=4, freq='2D')
+        >>> ts = cudf.DataFrame({'A': [1, 2, 3, 4]}, index=i)
+        >>> ts
+                    A
+        2018-04-09  1
+        2018-04-11  2
+        2018-04-13  3
+        2018-04-15  4
+        >>> ts.last('3D')
+                    A
+        2018-04-13  3
+        2018-04-15  4
+        """
+        return self._first_or_last(
+            offset,
+            idx=-1,
+            op=operator.__sub__,
+            side="right",
+            slice_func=lambda i: self.iloc[i:],
+        )
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 1a1b21aa3d5..9d120819248 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -1892,3 +1892,107 @@ def test_round(data, time_type, resolution):
     expect = ps.dt.round(resolution)
     got = gs.dt.round(resolution)
     assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.DatetimeIndex([]),
+        pd.DatetimeIndex(["2010-05-31"]),
+        pd.date_range("2000-01-01", "2000-12-31", periods=21),
+    ],
+)
+@pytest.mark.parametrize(
+    "offset",
+    [
+        "10Y",
+        "6M",
+        "M",
+        "31D",
+        "0H",
+        "44640T",
+        "44640min",
+        "2678000S",
+        "2678000000L",
+        "2678000000ms",
+        "2678000000000U",
+        "2678000000000us",
+        "2678000000000000N",
+        "2678000000000000ns",
+    ],
+)
+def test_first(idx, offset):
+    p = pd.Series(range(len(idx)), index=idx)
+    g = cudf.from_pandas(p)
+
+    expect = p.first(offset=offset)
+    got = g.first(offset=offset)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    # This test case tests correctness when start is end of month
+    "idx, offset",
+    [
+        (
+            pd.DatetimeIndex(
+                [
+                    "2020-01-31",
+                    "2020-02-15",
+                    "2020-02-29",
+                    "2020-03-15",
+                    "2020-03-31",
+                    "2020-04-15",
+                    "2020-04-30",
+                ]
+            ),
+            "3M",
+        )
+    ],
+)
+def test_first_start_at_end_of_month(idx, offset):
+    p = pd.Series(range(len(idx)), index=idx)
+    g = cudf.from_pandas(p)
+
+    expect = p.first(offset=offset)
+    got = g.first(offset=offset)
+
+    assert_eq(expect, got)
+
+
+@pytest.mark.parametrize(
+    "idx",
+    [
+        pd.DatetimeIndex([]),
+        pd.DatetimeIndex(["2010-05-31"]),
+        pd.date_range("2000-01-01", "2000-12-31", periods=21),
+    ],
+)
+@pytest.mark.parametrize(
+    "offset",
+    [
+        "10Y",
+        "6M",
+        "M",
+        "31D",
+        "0H",
+        "44640T",
+        "44640min",
+        "2678000S",
+        "2678000000L",
+        "2678000000ms",
+        "2678000000000U",
+        "2678000000000us",
+        "2678000000000000N",
+        "2678000000000000ns",
+    ],
+)
+def test_last(idx, offset):
+    p = pd.Series(range(len(idx)), index=idx)
+    g = cudf.from_pandas(p)
+
+    expect = p.last(offset=offset)
+    got = g.last(offset=offset)
+
+    assert_eq(expect, got)

From bf7f7bea4674600168bfb73b380ae57071c4e53c Mon Sep 17 00:00:00 2001
From: esoha-nvidia <69258779+esoha-nvidia@users.noreply.github.com>
Date: Fri, 24 Dec 2021 11:54:40 -0700
Subject: [PATCH 08/12] Fix cudf compilation instructions. (#9956)

This change is needed since https://github.com/rapidsai/cudf/commit/967f3397fb486368d74916ae344c0e1d9eb0a1a8 by @bdice

Authors:
  - https://github.com/esoha-nvidia

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/9956
---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index aae62fbd47c..6d1c0528832 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -86,7 +86,7 @@ git submodule update --init --remote --recursive
 ```bash
 # create the conda environment (assuming in base `cudf` directory)
 # note: RAPIDS currently doesn't support `channel_priority: strict`; use `channel_priority: flexible` instead
-conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.0.yml
+conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.5.yml
 # activate the environment
 conda activate cudf_dev
 ```

From 67c925c84aa111ad3a54c0352c18c570d17f5d75 Mon Sep 17 00:00:00 2001
From: Liangcai Li <firestarmanllc@gmail.com>
Date: Wed, 29 Dec 2021 15:04:31 +0800
Subject: [PATCH 09/12] Fix cudf java build error. (#9958)

cudf Java build is broken by https://github.com/rapidsai/cudf/pull/9942. So update the `hash` JNI accordingly.

Signed-off-by: Firestarman <firestarmanllc@gmail.com>

Authors:
  - Liangcai Li (https://github.com/firestarman)

Approvers:
  - Gary Shen (https://github.com/GaryShen2008)

URL: https://github.com/rapidsai/cudf/pull/9958
---
 java/src/main/java/ai/rapids/cudf/ColumnVector.java | 12 ++++--------
 java/src/main/native/src/ColumnVectorJni.cpp        | 11 ++---------
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java
index 3fed6316215..c83fe6adca1 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java
@@ -680,7 +680,7 @@ public static ColumnVector md5Hash(ColumnView... columns) {
           "Unsupported nested type column";
       columnViews[i] = columns[i].getNativeView();
     }
-    return new ColumnVector(hash(columnViews, HashType.HASH_MD5.getNativeId(), new int[0], 0));
+    return new ColumnVector(hash(columnViews, HashType.HASH_MD5.getNativeId(), 0));
   }
 
   /**
@@ -704,7 +704,7 @@ public static ColumnVector serial32BitMurmurHash3(int seed, ColumnView columns[]
       assert !columns[i].getType().equals(DType.LIST) : "List columns are not supported";
       columnViews[i] = columns[i].getNativeView();
     }
-    return new ColumnVector(hash(columnViews, HashType.HASH_SERIAL_MURMUR3.getNativeId(), new int[0], seed));
+    return new ColumnVector(hash(columnViews, HashType.HASH_SERIAL_MURMUR3.getNativeId(), seed));
   }
 
   /**
@@ -739,7 +739,7 @@ public static ColumnVector spark32BitMurmurHash3(int seed, ColumnView columns[])
       assert !columns[i].getType().equals(DType.LIST) : "List columns are not supported";
       columnViews[i] = columns[i].getNativeView();
     }
-    return new ColumnVector(hash(columnViews, HashType.HASH_SPARK_MURMUR3.getNativeId(), new int[0], seed));
+    return new ColumnVector(hash(columnViews, HashType.HASH_SPARK_MURMUR3.getNativeId(), seed));
   }
 
   /**
@@ -859,14 +859,10 @@ private static native long stringConcatenationSepCol(long[] columnViews,
    *
    * @param viewHandles array of native handles to the cudf::column_view columns being operated on.
    * @param hashId integer native ID of the hashing function identifier HashType.
-   * @param initialValues array of integer values, one per column, only used by non-serial murmur3
-   *                      hash. Each element's hash value is merged with its column's initial value
-   *                      before the row is merged into a single value.
    * @param seed integer seed for the hash. Only used by serial murmur3 hash.
    * @return native handle of the resulting cudf column containing the hex-string hashing results.
    */
-  private static native long hash(long[] viewHandles, int hashId, int[] initialValues,
-                                  int seed) throws CudfException;
+  private static native long hash(long[] viewHandles, int hashId, int seed) throws CudfException;
 
   /////////////////////////////////////////////////////////////////////////////
   // INTERNAL/NATIVE ACCESS
diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp
index 7fe466f828f..cfad89cb399 100644
--- a/java/src/main/native/src/ColumnVectorJni.cpp
+++ b/java/src/main/native/src/ColumnVectorJni.cpp
@@ -327,10 +327,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_concatenate(JNIEnv *env
 
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_hash(JNIEnv *env, jobject j_object,
                                                               jlongArray column_handles,
-                                                              jint hash_function_id,
-                                                              jintArray initial_values, jint seed) {
+                                                              jint hash_function_id, jint seed) {
   JNI_NULL_CHECK(env, column_handles, "array of column handles is null", 0);
-  JNI_NULL_CHECK(env, initial_values, "array of initial values is null", 0);
 
   try {
     cudf::jni::native_jpointerArray<cudf::column_view> n_cudf_columns(env, column_handles);
@@ -340,13 +338,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_hash(JNIEnv *env, jobje
                    [](auto const &p_column) { return *p_column; });
     cudf::table_view *input_table = new cudf::table_view(column_views);
 
-    cudf::jni::native_jintArray native_iv(env, initial_values);
-    std::vector<uint32_t> vector_iv;
-    std::transform(native_iv.data(), native_iv.data() + native_iv.size(),
-                   std::back_inserter(vector_iv), [](auto const &iv) { return iv; });
-
     std::unique_ptr<cudf::column> result =
-        cudf::hash(*input_table, static_cast<cudf::hash_id>(hash_function_id), vector_iv, seed);
+        cudf::hash(*input_table, static_cast<cudf::hash_id>(hash_function_id), seed);
     return reinterpret_cast<jlong>(result.release());
   }
   CATCH_STD(env, 0);

From 723376576b2f36711b12dd434b95e8aeac99f653 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 3 Jan 2022 12:38:41 -0600
Subject: [PATCH 10/12] Remove various unused functions (#9922)

This PR removes a number of unused functions and inlines some helpers that are only called in one place. This PR also deprecates `Series.fill`, which does not appear to be a pandas API. This PR resolves #9824.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ram (Ramakrishna Prabhu) (https://github.com/rgsl888prabhu)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/9922
---
 python/cudf/cudf/core/dataframe.py  | 143 +++++++---------------------
 python/cudf/cudf/core/frame.py      |  35 +++----
 python/cudf/cudf/core/series.py     |  18 +++-
 python/cudf/cudf/tests/test_fill.py |   2 +-
 python/cudf/cudf/tests/test_repr.py |   6 --
 5 files changed, 62 insertions(+), 142 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 88c8aaebd9e..b7fc5efb412 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -59,6 +59,7 @@
     _get_label_range_or_mask,
     _indices_from_labels,
 )
+from cudf.core.multiindex import MultiIndex
 from cudf.core.resample import DataFrameResampler
 from cudf.core.series import Series
 from cudf.utils import applyutils, docutils, ioutils, queryutils, utils
@@ -90,8 +91,6 @@
 
 class _DataFrameIndexer(_FrameIndexer):
     def __getitem__(self, arg):
-        from cudf import MultiIndex
-
         if isinstance(self._frame.index, MultiIndex) or isinstance(
             self._frame.columns, MultiIndex
         ):
@@ -118,8 +117,6 @@ def _can_downcast_to_series(self, df, arg):
         operation should be "downcasted" from a DataFrame to a
         Series
         """
-        from cudf.core.column import as_column
-
         if isinstance(df, cudf.Series):
             return False
         nrows, ncols = df.shape
@@ -201,11 +198,6 @@ def _getitem_scalar(self, arg):
     def _getitem_tuple_arg(self, arg):
         from uuid import uuid4
 
-        from cudf import MultiIndex
-        from cudf.core.column import column
-        from cudf.core.dataframe import DataFrame
-        from cudf.core.index import as_index
-
         # Step 1: Gather columns
         if isinstance(arg, tuple):
             columns_df = self._frame._get_columns_by_label(arg[1])
@@ -245,7 +237,7 @@ def _getitem_tuple_arg(self, arg):
                     tmp_arg = ([tmp_arg[0]], tmp_arg[1])
                 if len(tmp_arg[0]) == 0:
                     return columns_df._empty_like(keep_index=True)
-                tmp_arg = (column.as_column(tmp_arg[0]), tmp_arg[1])
+                tmp_arg = (as_column(tmp_arg[0]), tmp_arg[1])
 
                 if is_bool_dtype(tmp_arg[0]):
                     df = columns_df._apply_boolean_mask(tmp_arg[0])
@@ -273,7 +265,7 @@ def _getitem_tuple_arg(self, arg):
                     start = self._frame.index[0]
                 df.index = as_index(start)
             else:
-                row_selection = column.as_column(arg[0])
+                row_selection = as_column(arg[0])
                 if is_bool_dtype(row_selection.dtype):
                     df.index = self._frame.index.take(row_selection)
                 else:
@@ -285,7 +277,7 @@ def _getitem_tuple_arg(self, arg):
 
     @annotate("LOC_SETITEM", color="blue", domain="cudf_python")
     def _setitem_tuple_arg(self, key, value):
-        if isinstance(self._frame.index, cudf.MultiIndex) or isinstance(
+        if isinstance(self._frame.index, MultiIndex) or isinstance(
             self._frame.columns, pd.MultiIndex
         ):
             raise NotImplementedError(
@@ -322,7 +314,7 @@ def _setitem_tuple_arg(self, key, value):
             self._frame._data.insert(key[1], new_col)
         else:
             if isinstance(value, (cupy.ndarray, np.ndarray)):
-                value_df = cudf.DataFrame(value)
+                value_df = DataFrame(value)
                 if value_df.shape[1] != columns_df.shape[1]:
                     if value_df.shape[1] == 1:
                         value_cols = (
@@ -351,13 +343,9 @@ class _DataFrameIlocIndexer(_DataFrameIndexer):
 
     @annotate("ILOC_GETITEM", color="blue", domain="cudf_python")
     def _getitem_tuple_arg(self, arg):
-        from cudf import MultiIndex
-        from cudf.core.column import column
-        from cudf.core.index import as_index
-
         # Iloc Step 1:
         # Gather the columns specified by the second tuple arg
-        columns_df = cudf.DataFrame(self._frame._get_columns_by_index(arg[1]))
+        columns_df = DataFrame(self._frame._get_columns_by_index(arg[1]))
 
         columns_df._index = self._frame._index
 
@@ -385,7 +373,7 @@ def _getitem_tuple_arg(self, arg):
                     index += len(columns_df)
                 df = columns_df._slice(slice(index, index + 1, 1))
             else:
-                arg = (column.as_column(arg[0]), arg[1])
+                arg = (as_column(arg[0]), arg[1])
                 if is_bool_dtype(arg[0]):
                     df = columns_df._apply_boolean_mask(arg[0])
                 else:
@@ -407,7 +395,7 @@ def _getitem_tuple_arg(self, arg):
 
     @annotate("ILOC_SETITEM", color="blue", domain="cudf_python")
     def _setitem_tuple_arg(self, key, value):
-        columns = cudf.DataFrame(self._frame._get_columns_by_index(key[1]))
+        columns = DataFrame(self._frame._get_columns_by_index(key[1]))
 
         for col in columns:
             self._frame[col].iloc[key[0]] = value
@@ -953,6 +941,7 @@ def ndim(self):
         return 2
 
     def __dir__(self):
+        # Add the columns of the DataFrame to the dir output.
         o = set(dir(type(self)))
         o.update(self.__dict__)
         o.update(
@@ -1169,8 +1158,6 @@ def _slice(self: T, arg: slice) -> T:
         arg : should always be of type slice
 
         """
-        from cudf.core.index import RangeIndex
-
         num_rows = len(self)
         if num_rows == 0:
             return self
@@ -1284,8 +1271,6 @@ def memory_usage(self, index=True, deep=False):
         return Series(sizes, index=ind)
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
-        import cudf
-
         if method == "__call__" and hasattr(cudf, ufunc.__name__):
             func = getattr(cudf, ufunc.__name__)
             return func(self)
@@ -1329,6 +1314,7 @@ def __array_function__(self, func, types, args, kwargs):
         else:
             return NotImplemented
 
+    # The _get_numeric_data method is necessary for dask compatibility.
     def _get_numeric_data(self):
         """Return a dataframe with only numeric data types"""
         columns = [
@@ -1554,9 +1540,9 @@ def _concat(
                 out._index._data,
                 indices[:first_data_column_position],
             )
-            if not isinstance(
-                out._index, cudf.MultiIndex
-            ) and is_categorical_dtype(out._index._values.dtype):
+            if not isinstance(out._index, MultiIndex) and is_categorical_dtype(
+                out._index._values.dtype
+            ):
                 out = out.set_index(
                     cudf.core.index.as_index(out.index._values)
                 )
@@ -1672,51 +1658,6 @@ def astype(self, dtype, copy=False, errors="raise", **kwargs):
 
         return result
 
-    def _repr_pandas025_formatting(self, ncols, nrows, dtype=None):
-        """
-        With Pandas > 0.25 there are some new conditional formatting for some
-        datatypes and column/row configurations. This fixes most of them in
-        context to match the expected Pandas repr of the same content.
-
-        Examples
-        --------
-        >>> gdf.__repr__()
-            0   ...  19
-        0   46  ...  48
-        ..  ..  ...  ..
-        19  40  ...  29
-
-        [20 rows x 20 columns]
-
-        >>> nrows, ncols = _repr_pandas025_formatting(2, 2, dtype="category")
-        >>> pd.options.display.max_rows = nrows
-        >>> pd.options.display.max_columns = ncols
-        >>> gdf.__repr__()
-             0  ...  19
-        0   46  ...  48
-        ..  ..  ...  ..
-        19  40  ...  29
-
-        [20 rows x 20 columns]
-        """
-        ncols = 1 if ncols in [0, 2] and dtype == "datetime64[ns]" else ncols
-        ncols = (
-            1
-            if ncols == 0
-            and nrows == 1
-            and dtype in ["int8", "str", "category"]
-            else ncols
-        )
-        ncols = (
-            1
-            if nrows == 1
-            and dtype in ["int8", "int16", "int64", "str", "category"]
-            else ncols
-        )
-        ncols = 0 if ncols == 2 else ncols
-        ncols = 19 if ncols in [20, 21] else ncols
-        return ncols, nrows
-
     def _clean_renderable_dataframe(self, output):
         """
         This method takes in partial/preprocessed dataframe
@@ -1822,7 +1763,7 @@ def _get_renderable_dataframe(self):
                 # adjust right columns for output if multiindex.
                 right_cols = (
                     right_cols - 1
-                    if isinstance(self.index, cudf.MultiIndex)
+                    if isinstance(self.index, MultiIndex)
                     else right_cols
                 )
                 left_cols = int(ncols / 2.0) + 1
@@ -2151,20 +2092,6 @@ def columns(self, columns):
             data, multiindex=is_multiindex, level_names=columns.names,
         )
 
-    def _rename_columns(self, new_names):
-        old_cols = iter(self._data.names)
-        l_old_cols = len(self._data)
-        l_new_cols = len(new_names)
-        if l_new_cols != l_old_cols:
-            msg = (
-                f"Length of new column names: {l_new_cols} does not "
-                "match length of previous column names: {l_old_cols}"
-            )
-            raise ValueError(msg)
-
-        mapper = dict(zip(old_cols, new_names))
-        self.rename(mapper=mapper, inplace=True, axis=1)
-
     def _reindex(
         self, columns, dtypes=None, deep=False, index=None, inplace=False
     ):
@@ -2209,11 +2136,9 @@ def _reindex(
                 columns = (
                     columns if columns is not None else list(df._column_names)
                 )
-                df = cudf.DataFrame()
+                df = DataFrame()
             else:
-                df = cudf.DataFrame(None, index).join(
-                    df, how="left", sort=True
-                )
+                df = DataFrame(None, index).join(df, how="left", sort=True)
                 # double-argsort to map back from sorted to unsorted positions
                 df = df.take(index.argsort(ascending=True).argsort())
 
@@ -2445,7 +2370,7 @@ def set_index(
                     except TypeError:
                         msg = f"{col} cannot be converted to column-like."
                         raise TypeError(msg)
-                if isinstance(col, (cudf.MultiIndex, pd.MultiIndex)):
+                if isinstance(col, (MultiIndex, pd.MultiIndex)):
                     col = (
                         cudf.from_pandas(col)
                         if isinstance(col, pd.MultiIndex)
@@ -2473,7 +2398,7 @@ def set_index(
 
         if append:
             idx_cols = [self.index._data[x] for x in self.index._data]
-            if isinstance(self.index, cudf.MultiIndex):
+            if isinstance(self.index, MultiIndex):
                 idx_names = self.index.names
             else:
                 idx_names = [self.index.name]
@@ -2485,7 +2410,7 @@ def set_index(
         elif len(columns_to_add) == 1:
             idx = cudf.Index(columns_to_add[0], name=names[0])
         else:
-            idx = cudf.MultiIndex._from_data(
+            idx = MultiIndex._from_data(
                 {i: col for i, col in enumerate(columns_to_add)}
             )
             idx.names = names
@@ -2568,7 +2493,7 @@ class max_speed
         result = self if inplace else self.copy()
 
         if not drop:
-            if isinstance(self.index, cudf.MultiIndex):
+            if isinstance(self.index, MultiIndex):
                 names = tuple(
                     name if name is not None else f"level_{i}"
                     for i, name in enumerate(self.index.names)
@@ -3028,9 +2953,7 @@ def rename(
                     "mixed type is not yet supported."
                 )
 
-            if level is not None and isinstance(
-                self.index, cudf.core.multiindex.MultiIndex
-            ):
+            if level is not None and isinstance(self.index, MultiIndex):
                 out_index = self.index.copy(deep=copy)
                 out_index.get_level_values(level).to_frame().replace(
                     to_replace=list(index.keys()),
@@ -3307,7 +3230,7 @@ def agg(self, aggs, axis=None):
             raise NotImplementedError("axis not implemented yet")
 
         if isinstance(aggs, Iterable) and not isinstance(aggs, (str, dict)):
-            result = cudf.DataFrame()
+            result = DataFrame()
             # TODO : Allow simultaneous pass for multi-aggregation as
             # a future optimization
             for agg in aggs:
@@ -3320,7 +3243,7 @@ def agg(self, aggs, axis=None):
                     f"{aggs} is not a valid function for "
                     f"'DataFrame' object"
                 )
-            result = cudf.DataFrame()
+            result = DataFrame()
             result[aggs] = getattr(df_normalized, aggs)()
             result = result.iloc[:, 0]
             result.name = None
@@ -3355,7 +3278,7 @@ def agg(self, aggs, axis=None):
                         raise NotImplementedError(
                             "callable parameter is not implemented yet"
                         )
-                result = cudf.DataFrame(index=idxs, columns=cols)
+                result = DataFrame(index=idxs, columns=cols)
                 for key in aggs.keys():
                     col = df_normalized[key]
                     col_empty = column_empty(
@@ -4758,7 +4681,7 @@ def to_pandas(self, nullable=False, **kwargs):
 
         if isinstance(self.columns, BaseIndex):
             out_columns = self.columns.to_pandas()
-            if isinstance(self.columns, cudf.core.multiindex.MultiIndex):
+            if isinstance(self.columns, MultiIndex):
                 if self.columns.names is not None:
                     out_columns.names = self.columns.names
             else:
@@ -4934,7 +4857,7 @@ def to_arrow(self, preserve_index=True):
                     "step": 1,
                 }
             else:
-                if isinstance(self.index, cudf.MultiIndex):
+                if isinstance(self.index, MultiIndex):
                     gen_names = tuple(
                         f"level_{i}"
                         for i, _ in enumerate(self.index._data.names)
@@ -5462,7 +5385,7 @@ def _prepare_for_rowwise_op(self, method, skipna):
             warnings.warn(msg)
 
         if not skipna and any(col.nullable for col in filtered._columns):
-            mask = cudf.DataFrame(
+            mask = DataFrame(
                 {
                     name: filtered._data[name]._get_mask_as_column()
                     if filtered._data[name].nullable
@@ -6010,11 +5933,11 @@ def stack(self, level=-1, dropna=True):
         repeated_index = self.index.repeat(self.shape[1])
         name_index = Frame({0: self._column_names}).tile(self.shape[0])
         new_index = list(repeated_index._columns) + [name_index._columns[0]]
-        if isinstance(self._index, cudf.MultiIndex):
+        if isinstance(self._index, MultiIndex):
             index_names = self._index.names + [None]
         else:
             index_names = [None] * len(new_index)
-        new_index = cudf.core.multiindex.MultiIndex.from_frame(
+        new_index = MultiIndex.from_frame(
             DataFrame(dict(zip(range(0, len(new_index)), new_index))),
             names=index_names,
         )
@@ -6275,8 +6198,8 @@ def append(
         elif isinstance(other, list):
             if not other:
                 pass
-            elif not isinstance(other[0], cudf.DataFrame):
-                other = cudf.DataFrame(other)
+            elif not isinstance(other[0], DataFrame):
+                other = DataFrame(other)
                 if (self.columns.get_indexer(other.columns) >= 0).all():
                     other = other.reindex(columns=self.columns)
 
@@ -6574,7 +6497,7 @@ def from_pandas(obj, nan_as_null=None):
     elif isinstance(obj, pd.Series):
         return Series.from_pandas(obj, nan_as_null=nan_as_null)
     elif isinstance(obj, pd.MultiIndex):
-        return cudf.MultiIndex.from_pandas(obj, nan_as_null=nan_as_null)
+        return MultiIndex.from_pandas(obj, nan_as_null=nan_as_null)
     elif isinstance(obj, pd.RangeIndex):
         return cudf.core.index.RangeIndex(
             start=obj.start, stop=obj.stop, step=obj.step, name=obj.name
@@ -6692,7 +6615,7 @@ def extract_col(df, col):
         if (
             col == "index"
             and col not in df.index._data
-            and not isinstance(df.index, cudf.MultiIndex)
+            and not isinstance(df.index, MultiIndex)
         ):
             return df.index._data.columns[0]
         return df.index._data[col]
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index c83b06707a4..bae15c5e9fd 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1798,40 +1798,27 @@ def repeat(self, repeats, axis=None):
                 "Only axis=`None` supported at this time."
             )
 
-        return self._repeat(repeats)
-
-    def _repeat(self, count):
-        if not is_scalar(count):
-            count = as_column(count)
+        if not is_scalar(repeats):
+            repeats = as_column(repeats)
 
         result = self.__class__._from_data(
-            *libcudf.filling.repeat(self, count)
+            *libcudf.filling.repeat(self, repeats)
         )
 
         result._copy_type_metadata(self)
         return result
 
-    def _fill(self, fill_values, begin, end, inplace):
-        col_and_fill = zip(self._columns, fill_values)
-
-        if not inplace:
-            data_columns = (c._fill(v, begin, end) for (c, v) in col_and_fill)
-            return self.__class__._from_data(
-                zip(self._column_names, data_columns), self._index
-            )
-
-        for (c, v) in col_and_fill:
-            c.fill(v, begin, end, inplace=True)
-
-        return self
-
     def shift(self, periods=1, freq=None, axis=0, fill_value=None):
         """Shift values by `periods` positions."""
-        assert axis in (None, 0) and freq is None
-        return self._shift(periods)
+        axis = self._get_axis_from_axis_arg(axis)
+        if axis != 0:
+            raise ValueError("Only axis=0 is supported.")
+        if freq is not None:
+            raise ValueError("The freq argument is not yet supported.")
 
-    def _shift(self, offset, fill_value=None):
-        data_columns = (col.shift(offset, fill_value) for col in self._columns)
+        data_columns = (
+            col.shift(periods, fill_value) for col in self._columns
+        )
         return self.__class__._from_data(
             zip(self._column_names, data_columns), self._index
         )
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 4ec7c3df076..fb86cf85c4c 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1628,7 +1628,23 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False):
         return self._mimic_inplace(result, inplace=inplace)
 
     def fill(self, fill_value, begin=0, end=-1, inplace=False):
-        return self._fill([fill_value], begin, end, inplace)
+        warnings.warn(
+            "The fill method will be removed in a future cuDF release.",
+            FutureWarning,
+        )
+        fill_values = [fill_value]
+        col_and_fill = zip(self._columns, fill_values)
+
+        if not inplace:
+            data_columns = (c._fill(v, begin, end) for (c, v) in col_and_fill)
+            return self.__class__._from_data(
+                zip(self._column_names, data_columns), self._index
+            )
+
+        for (c, v) in col_and_fill:
+            c.fill(v, begin, end, inplace=True)
+
+        return self
 
     def fillna(
         self, value=None, method=None, axis=None, inplace=False, limit=None
diff --git a/python/cudf/cudf/tests/test_fill.py b/python/cudf/cudf/tests/test_fill.py
index efbe2834486..224db2b39d1 100644
--- a/python/cudf/cudf/tests/test_fill.py
+++ b/python/cudf/cudf/tests/test_fill.py
@@ -50,7 +50,7 @@ def test_fill(data, fill_value, begin, end, inplace):
 
         begin = max(0, min(len(gs), begin))
         end = max(0, min(len(gs), end))
-        actual = gs._fill([fill_value], begin, end, False)
+        actual = gs.fill(fill_value, begin, end, False)
         assert actual is not gs
 
     ps[begin:end] = fill_value
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index fe95b2930df..f8c136b8c2d 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -98,15 +98,9 @@ def test_full_dataframe_20(dtype, nrows, ncols):
     ).astype(dtype)
     gdf = cudf.from_pandas(pdf)
 
-    ncols, nrows = gdf._repr_pandas025_formatting(ncols, nrows, dtype)
-    pd.options.display.max_rows = int(nrows)
-    pd.options.display.max_columns = int(ncols)
-
     assert pdf.__repr__() == gdf.__repr__()
     assert pdf._repr_html_() == gdf._repr_html_()
     assert pdf._repr_latex_() == gdf._repr_latex_()
-    pd.reset_option("display.max_rows")
-    pd.reset_option("display.max_columns")
 
 
 @pytest.mark.parametrize("dtype", repr_categories)

From 897a9eaebd8396728a1a91093554ba99ea3e85ba Mon Sep 17 00:00:00 2001
From: Mayank Anand <36782063+mayankanand007@users.noreply.github.com>
Date: Tue, 4 Jan 2022 11:02:58 -0500
Subject: [PATCH 11/12] Refactoring ceil/round/floor code for datetime64 types
 (#9926)

This PR is a follow up to #9820 where @bdice and @vyasr raised the point of having a design such that we avoid writing bunch of boilerplate code, which is common in the implementations of ceil/round/floor. The aim is to reduce the total number of functions, as well as have a cleaner design.

Authors:
  - Mayank Anand (https://github.com/mayankanand007)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - Karthikeyan (https://github.com/karthikeyann)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/9926
---
 cpp/include/cudf/datetime.hpp            | 278 +++--------------------
 cpp/src/datetime/datetime_ops.cu         | 242 ++------------------
 cpp/tests/datetime/datetime_ops_test.cpp | 200 ++++++++--------
 python/cudf/cudf/_lib/cpp/datetime.pxd   |  53 ++---
 python/cudf/cudf/_lib/datetime.pyx       |  89 +++-----
 5 files changed, 215 insertions(+), 647 deletions(-)

diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp
index 17bea935dfd..117119cd40f 100644
--- a/cpp/include/cudf/datetime.hpp
+++ b/cpp/include/cudf/datetime.hpp
@@ -285,280 +285,66 @@ std::unique_ptr<cudf::column> extract_quarter(
   cudf::column_view const& column,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-/** @} */  // end of group
-
-/**
- * @brief Round up to the nearest day
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<cudf::column> ceil_day(
-  cudf::column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round up to the nearest hour
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<cudf::column> ceil_hour(
-  cudf::column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round up to the nearest minute
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<cudf::column> ceil_minute(
-  cudf::column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round up to the nearest second
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<cudf::column> ceil_second(
-  cudf::column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round up to the nearest millisecond
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<column> ceil_millisecond(
-  column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round up to the nearest microsecond
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<column> ceil_microsecond(
-  column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round up to the nearest nanosecond
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<column> ceil_nanosecond(
-  column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
 /**
- * @brief Round down to the nearest day
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
+ * @brief Fixed frequencies supported by datetime rounding functions ceil, floor, round.
  *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
  */
-std::unique_ptr<cudf::column> floor_day(
-  cudf::column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+enum class rounding_frequency : int32_t {
+  DAY,
+  HOUR,
+  MINUTE,
+  SECOND,
+  MILLISECOND,
+  MICROSECOND,
+  NANOSECOND
+};
 
 /**
- * @brief Round down to the nearest hour
+ * @brief Round datetimes up to the nearest multiple of the given frequency.
  *
- * @param column cudf::column_view of the input datetime values
+ * @param column cudf::column_view of the input datetime values.
+ * @param freq rounding_frequency indicating the frequency to round up to.
  * @param mr Device memory resource used to allocate device memory of the returned column.
  *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
+ * @throw cudf::logic_error if input column datatype is not TIMESTAMP.
+ * @return cudf::column of the same datetime resolution as the input column.
  */
-std::unique_ptr<cudf::column> floor_hour(
+std::unique_ptr<cudf::column> ceil_datetimes(
   cudf::column_view const& column,
+  rounding_frequency freq,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Round down to the nearest minute
+ * @brief Round datetimes down to the nearest multiple of the given frequency.
  *
- * @param column cudf::column_view of the input datetime values
+ * @param column cudf::column_view of the input datetime values.
+ * @param freq rounding_frequency indicating the frequency to round down to.
  * @param mr Device memory resource used to allocate device memory of the returned column.
  *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
+ * @throw cudf::logic_error if input column datatype is not TIMESTAMP.
+ * @return cudf::column of the same datetime resolution as the input column.
  */
-std::unique_ptr<cudf::column> floor_minute(
+std::unique_ptr<cudf::column> floor_datetimes(
   cudf::column_view const& column,
+  rounding_frequency freq,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Round down to the nearest second
+ * @brief Round datetimes to the nearest multiple of the given frequency.
  *
- * @param column cudf::column_view of the input datetime values
+ * @param column cudf::column_view of the input datetime values.
+ * @param freq rounding_frequency indicating the frequency to round to.
  * @param mr Device memory resource used to allocate device memory of the returned column.
  *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
+ * @throw cudf::logic_error if input column datatype is not TIMESTAMP.
+ * @return cudf::column of the same datetime resolution as the input column.
  */
-std::unique_ptr<cudf::column> floor_second(
+std::unique_ptr<cudf::column> round_datetimes(
   cudf::column_view const& column,
+  rounding_frequency freq,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-/**
- * @brief Round down to the nearest millisecond
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<column> floor_millisecond(
-  column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round down to the nearest microsecond
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<column> floor_microsecond(
-  column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round down to the nearest nanosecond
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<column> floor_nanosecond(
-  column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round to the nearest day
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<cudf::column> round_day(
-  cudf::column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round to the nearest hour
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<cudf::column> round_hour(
-  cudf::column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round to the nearest minute
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<cudf::column> round_minute(
-  cudf::column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round to the nearest second
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<cudf::column> round_second(
-  cudf::column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round to the nearest millisecond
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<column> round_millisecond(
-  column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round to the nearest microsecond
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<column> round_microsecond(
-  column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Round to the nearest nanosecond
- *
- * @param column cudf::column_view of the input datetime values
- * @param mr Device memory resource used to allocate device memory of the returned column.
- *
- * @throw cudf::logic_error if input column datatype is not TIMESTAMP
- * @return cudf::column of the same datetime resolution as the input column
- */
-std::unique_ptr<column> round_nanosecond(
-  column_view const& column,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+/** @} */  // end of group
 
 }  // namespace datetime
 }  // namespace cudf
diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu
index 85653b4f0be..1e9a39560b8 100644
--- a/cpp/src/datetime/datetime_ops.cu
+++ b/cpp/src/datetime/datetime_ops.cu
@@ -113,9 +113,9 @@ struct RoundFunctor {
 
 struct RoundingDispatcher {
   rounding_function round_kind;
-  datetime_component component;
+  rounding_frequency component;
 
-  RoundingDispatcher(rounding_function round_kind, datetime_component component)
+  RoundingDispatcher(rounding_function round_kind, rounding_frequency component)
     : round_kind(round_kind), component(component)
   {
   }
@@ -124,25 +124,25 @@ struct RoundingDispatcher {
   CUDA_DEVICE_CALLABLE Timestamp operator()(Timestamp const ts) const
   {
     switch (component) {
-      case datetime_component::DAY:
+      case rounding_frequency::DAY:
         return time_point_cast<typename Timestamp::duration>(
           RoundFunctor<duration_D>{}(round_kind, ts));
-      case datetime_component::HOUR:
+      case rounding_frequency::HOUR:
         return time_point_cast<typename Timestamp::duration>(
           RoundFunctor<duration_h>{}(round_kind, ts));
-      case datetime_component::MINUTE:
+      case rounding_frequency::MINUTE:
         return time_point_cast<typename Timestamp::duration>(
           RoundFunctor<duration_m>{}(round_kind, ts));
-      case datetime_component::SECOND:
+      case rounding_frequency::SECOND:
         return time_point_cast<typename Timestamp::duration>(
           RoundFunctor<duration_s>{}(round_kind, ts));
-      case datetime_component::MILLISECOND:
+      case rounding_frequency::MILLISECOND:
         return time_point_cast<typename Timestamp::duration>(
           RoundFunctor<duration_ms>{}(round_kind, ts));
-      case datetime_component::MICROSECOND:
+      case rounding_frequency::MICROSECOND:
         return time_point_cast<typename Timestamp::duration>(
           RoundFunctor<duration_us>{}(round_kind, ts));
-      case datetime_component::NANOSECOND:
+      case rounding_frequency::NANOSECOND:
         return time_point_cast<typename Timestamp::duration>(
           RoundFunctor<duration_ns>{}(round_kind, ts));
       default: cudf_assert(false && "Unsupported datetime rounding resolution.");
@@ -234,7 +234,7 @@ struct dispatch_round {
   template <typename Timestamp>
   std::enable_if_t<cudf::is_timestamp<Timestamp>(), std::unique_ptr<cudf::column>> operator()(
     rounding_function round_kind,
-    datetime_component component,
+    rounding_frequency component,
     cudf::column_view const& column,
     rmm::cuda_stream_view stream,
     rmm::mr::device_memory_resource* mr) const
@@ -420,7 +420,7 @@ std::unique_ptr<column> add_calendrical_months(column_view const& timestamp_colu
 }
 
 std::unique_ptr<column> round_general(rounding_function round_kind,
-                                      datetime_component component,
+                                      rounding_frequency component,
                                       column_view const& column,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr)
@@ -531,223 +531,31 @@ std::unique_ptr<column> extract_quarter(column_view const& column,
 
 }  // namespace detail
 
-std::unique_ptr<column> ceil_day(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::CEIL,
-                               detail::datetime_component::DAY,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> ceil_hour(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::CEIL,
-                               detail::datetime_component::HOUR,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> ceil_minute(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::CEIL,
-                               detail::datetime_component::MINUTE,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> ceil_second(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::CEIL,
-                               detail::datetime_component::SECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> ceil_millisecond(column_view const& column,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::CEIL,
-                               detail::datetime_component::MILLISECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> ceil_microsecond(column_view const& column,
-                                         rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> ceil_datetimes(column_view const& column,
+                                       rounding_frequency freq,
+                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::CEIL,
-                               detail::datetime_component::MICROSECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
+  return detail::round_general(
+    detail::rounding_function::CEIL, freq, column, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<column> ceil_nanosecond(column_view const& column,
+std::unique_ptr<column> floor_datetimes(column_view const& column,
+                                        rounding_frequency freq,
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::CEIL,
-                               detail::datetime_component::NANOSECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> floor_day(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::FLOOR,
-                               detail::datetime_component::DAY,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> floor_hour(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::FLOOR,
-                               detail::datetime_component::HOUR,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
+  return detail::round_general(
+    detail::rounding_function::FLOOR, freq, column, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<column> floor_minute(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::FLOOR,
-                               detail::datetime_component::MINUTE,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> floor_second(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::FLOOR,
-                               detail::datetime_component::SECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> floor_millisecond(column_view const& column,
-                                          rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::FLOOR,
-                               detail::datetime_component::MILLISECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> floor_microsecond(column_view const& column,
-                                          rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::FLOOR,
-                               detail::datetime_component::MICROSECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> floor_nanosecond(column_view const& column,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::FLOOR,
-                               detail::datetime_component::NANOSECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> round_day(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::ROUND,
-                               detail::datetime_component::DAY,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> round_hour(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::ROUND,
-                               detail::datetime_component::HOUR,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> round_minute(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::ROUND,
-                               detail::datetime_component::MINUTE,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> round_second(column_view const& column, rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::ROUND,
-                               detail::datetime_component::SECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> round_millisecond(column_view const& column,
-                                          rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::ROUND,
-                               detail::datetime_component::MILLISECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> round_microsecond(column_view const& column,
-                                          rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::ROUND,
-                               detail::datetime_component::MICROSECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
-}
-
-std::unique_ptr<column> round_nanosecond(column_view const& column,
-                                         rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> round_datetimes(column_view const& column,
+                                        rounding_frequency freq,
+                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::round_general(detail::rounding_function::ROUND,
-                               detail::datetime_component::NANOSECOND,
-                               column,
-                               rmm::cuda_stream_default,
-                               mr);
+  return detail::round_general(
+    detail::rounding_function::ROUND, freq, column, rmm::cuda_stream_default, mr);
 }
 
 std::unique_ptr<column> extract_year(column_view const& column, rmm::mr::device_memory_resource* mr)
diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp
index 62b8425704f..655fbf5679b 100644
--- a/cpp/tests/datetime/datetime_ops_test.cpp
+++ b/cpp/tests/datetime/datetime_ops_test.cpp
@@ -347,78 +347,6 @@ TEST_F(BasicDatetimeOpsTest, TestLastDayOfMonthWithDate)
     verbosity);
 }
 
-TYPED_TEST(TypedDatetimeOpsTest, TestCeilDatetime)
-{
-  using T = TypeParam;
-  using namespace cudf::test;
-  using namespace cudf::datetime;
-  using namespace cuda::std::chrono;
-
-  auto start = milliseconds(-2500000000000);  // Sat, 11 Oct 1890 19:33:20 GMT
-  auto stop  = milliseconds(2500000000000);   // Mon, 22 Mar 2049 04:26:40 GMT
-
-  auto input = generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop));
-
-  auto host_val                     = to_host<T>(input);
-  thrust::host_vector<T> timestamps = host_val.first;
-
-  thrust::host_vector<T> ceiled_day(timestamps.size());
-  thrust::transform(timestamps.begin(), timestamps.end(), ceiled_day.begin(), [](auto i) {
-    return time_point_cast<typename T::duration>(ceil<days>(i));
-  });
-  auto expected_day =
-    fixed_width_column_wrapper<T, typename T::duration::rep>(ceiled_day.begin(), ceiled_day.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_day(input), expected_day);
-
-  thrust::host_vector<T> ceiled_hour(timestamps.size());
-  thrust::transform(timestamps.begin(), timestamps.end(), ceiled_hour.begin(), [](auto i) {
-    return time_point_cast<typename T::duration>(ceil<hours>(i));
-  });
-  auto expected_hour = fixed_width_column_wrapper<T, typename T::duration::rep>(ceiled_hour.begin(),
-                                                                                ceiled_hour.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_hour(input), expected_hour);
-
-  std::vector<T> ceiled_minute(timestamps.size());
-  std::transform(timestamps.begin(), timestamps.end(), ceiled_minute.begin(), [](auto i) {
-    return time_point_cast<typename T::duration>(ceil<minutes>(i));
-  });
-  auto expected_minute = fixed_width_column_wrapper<T, typename T::duration::rep>(
-    ceiled_minute.begin(), ceiled_minute.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_minute(input), expected_minute);
-
-  std::vector<T> ceiled_second(timestamps.size());
-  std::transform(timestamps.begin(), timestamps.end(), ceiled_second.begin(), [](auto i) {
-    return time_point_cast<typename T::duration>(ceil<seconds>(i));
-  });
-  auto expected_second = fixed_width_column_wrapper<T, typename T::duration::rep>(
-    ceiled_second.begin(), ceiled_second.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_second(input), expected_second);
-
-  std::vector<T> ceiled_millisecond(timestamps.size());
-  std::transform(timestamps.begin(), timestamps.end(), ceiled_millisecond.begin(), [](auto i) {
-    return time_point_cast<typename T::duration>(ceil<milliseconds>(i));
-  });
-  auto expected_millisecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
-    ceiled_millisecond.begin(), ceiled_millisecond.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_millisecond(input), expected_millisecond);
-
-  std::vector<T> ceiled_microsecond(timestamps.size());
-  std::transform(timestamps.begin(), timestamps.end(), ceiled_microsecond.begin(), [](auto i) {
-    return time_point_cast<typename T::duration>(ceil<microseconds>(i));
-  });
-  auto expected_microsecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
-    ceiled_microsecond.begin(), ceiled_microsecond.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_microsecond(input), expected_microsecond);
-
-  std::vector<T> ceiled_nanosecond(timestamps.size());
-  std::transform(timestamps.begin(), timestamps.end(), ceiled_nanosecond.begin(), [](auto i) {
-    return time_point_cast<typename T::duration>(ceil<nanoseconds>(i));
-  });
-  auto expected_nanosecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
-    ceiled_nanosecond.begin(), ceiled_nanosecond.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_nanosecond(input), expected_nanosecond);
-}
-
 TEST_F(BasicDatetimeOpsTest, TestDayOfYearWithDate)
 {
   using namespace cudf::test;
@@ -841,7 +769,7 @@ TEST_F(BasicDatetimeOpsTest, TestQuarter)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_quarter(timestamps_s), quarter);
 }
 
-TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
+TYPED_TEST(TypedDatetimeOpsTest, TestCeilDatetime)
 {
   using T = TypeParam;
   using namespace cudf::test;
@@ -851,10 +779,85 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
   auto start = milliseconds(-2500000000000);  // Sat, 11 Oct 1890 19:33:20 GMT
   auto stop  = milliseconds(2500000000000);   // Mon, 22 Mar 2049 04:26:40 GMT
 
-  auto input = generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop));
+  auto const input =
+    generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop));
+  auto const timestamps = to_host<T>(input).first;
+
+  std::vector<T> ceiled_day(timestamps.size());
+  thrust::transform(timestamps.begin(), timestamps.end(), ceiled_day.begin(), [](auto i) {
+    return time_point_cast<typename T::duration>(ceil<days>(i));
+  });
+  auto expected_day =
+    fixed_width_column_wrapper<T, typename T::duration::rep>(ceiled_day.begin(), ceiled_day.end());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::DAY), expected_day);
+
+  std::vector<T> ceiled_hour(timestamps.size());
+  thrust::transform(timestamps.begin(), timestamps.end(), ceiled_hour.begin(), [](auto i) {
+    return time_point_cast<typename T::duration>(ceil<hours>(i));
+  });
+  auto expected_hour = fixed_width_column_wrapper<T, typename T::duration::rep>(ceiled_hour.begin(),
+                                                                                ceiled_hour.end());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::HOUR), expected_hour);
+
+  std::vector<T> ceiled_minute(timestamps.size());
+  std::transform(timestamps.begin(), timestamps.end(), ceiled_minute.begin(), [](auto i) {
+    return time_point_cast<typename T::duration>(ceil<minutes>(i));
+  });
+  auto expected_minute = fixed_width_column_wrapper<T, typename T::duration::rep>(
+    ceiled_minute.begin(), ceiled_minute.end());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::MINUTE),
+                                 expected_minute);
+
+  std::vector<T> ceiled_second(timestamps.size());
+  std::transform(timestamps.begin(), timestamps.end(), ceiled_second.begin(), [](auto i) {
+    return time_point_cast<typename T::duration>(ceil<seconds>(i));
+  });
+  auto expected_second = fixed_width_column_wrapper<T, typename T::duration::rep>(
+    ceiled_second.begin(), ceiled_second.end());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::SECOND),
+                                 expected_second);
+
+  std::vector<T> ceiled_millisecond(timestamps.size());
+  std::transform(timestamps.begin(), timestamps.end(), ceiled_millisecond.begin(), [](auto i) {
+    return time_point_cast<typename T::duration>(ceil<milliseconds>(i));
+  });
+  auto expected_millisecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
+    ceiled_millisecond.begin(), ceiled_millisecond.end());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::MILLISECOND),
+                                 expected_millisecond);
+
+  std::vector<T> ceiled_microsecond(timestamps.size());
+  std::transform(timestamps.begin(), timestamps.end(), ceiled_microsecond.begin(), [](auto i) {
+    return time_point_cast<typename T::duration>(ceil<microseconds>(i));
+  });
+  auto expected_microsecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
+    ceiled_microsecond.begin(), ceiled_microsecond.end());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::MICROSECOND),
+                                 expected_microsecond);
+
+  std::vector<T> ceiled_nanosecond(timestamps.size());
+  std::transform(timestamps.begin(), timestamps.end(), ceiled_nanosecond.begin(), [](auto i) {
+    return time_point_cast<typename T::duration>(ceil<nanoseconds>(i));
+  });
+  auto expected_nanosecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
+    ceiled_nanosecond.begin(), ceiled_nanosecond.end());
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*ceil_datetimes(input, rounding_frequency::NANOSECOND),
+                                 expected_nanosecond);
+}
+
+TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
+{
+  using T = TypeParam;
+  using namespace cudf::test;
+  using namespace cudf::datetime;
+  using namespace cuda::std::chrono;
 
-  auto host_val                     = to_host<T>(input);
-  thrust::host_vector<T> timestamps = host_val.first;
+  auto start = milliseconds(-2500000000000);  // Sat, 11 Oct 1890 19:33:20 GMT
+  auto stop  = milliseconds(2500000000000);   // Mon, 22 Mar 2049 04:26:40 GMT
+
+  auto const input =
+    generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop));
+  auto const timestamps = to_host<T>(input).first;
 
   std::vector<T> floored_day(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), floored_day.begin(), [](auto i) {
@@ -862,7 +865,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
   });
   auto expected_day = fixed_width_column_wrapper<T, typename T::duration::rep>(floored_day.begin(),
                                                                                floored_day.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_day(input), expected_day);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::DAY), expected_day);
 
   std::vector<T> floored_hour(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), floored_hour.begin(), [](auto i) {
@@ -870,7 +873,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
   });
   auto expected_hour = fixed_width_column_wrapper<T, typename T::duration::rep>(
     floored_hour.begin(), floored_hour.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_hour(input), expected_hour);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::HOUR), expected_hour);
 
   std::vector<T> floored_minute(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), floored_minute.begin(), [](auto i) {
@@ -878,7 +881,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
   });
   auto expected_minute = fixed_width_column_wrapper<T, typename T::duration::rep>(
     floored_minute.begin(), floored_minute.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_minute(input), expected_minute);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::MINUTE),
+                                 expected_minute);
 
   std::vector<T> floored_second(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), floored_second.begin(), [](auto i) {
@@ -886,7 +890,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
   });
   auto expected_second = fixed_width_column_wrapper<T, typename T::duration::rep>(
     floored_second.begin(), floored_second.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_second);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::SECOND),
+                                 expected_second);
 
   std::vector<T> floored_millisecond(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), floored_millisecond.begin(), [](auto i) {
@@ -894,7 +899,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
   });
   auto expected_millisecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
     floored_millisecond.begin(), floored_millisecond.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_millisecond);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::MILLISECOND),
+                                 expected_millisecond);
 
   std::vector<T> floored_microsecond(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), floored_microsecond.begin(), [](auto i) {
@@ -902,7 +908,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
   });
   auto expected_microsecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
     floored_microsecond.begin(), floored_microsecond.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_second(input), expected_microsecond);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::MICROSECOND),
+                                 expected_microsecond);
 
   std::vector<T> floored_nanosecond(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), floored_nanosecond.begin(), [](auto i) {
@@ -910,7 +917,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestFloorDatetime)
   });
   auto expected_nanosecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
     floored_nanosecond.begin(), floored_nanosecond.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_millisecond(input), expected_nanosecond);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*floor_datetimes(input, rounding_frequency::NANOSECOND),
+                                 expected_nanosecond);
 }
 
 TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime)
@@ -923,10 +931,9 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime)
   auto start = milliseconds(-2500000000000);  // Sat, 11 Oct 1890 19:33:20 GMT
   auto stop  = milliseconds(2500000000000);   // Mon, 22 Mar 2049 04:26:40 GMT
 
-  auto input = generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop));
-
-  auto host_val   = to_host<T>(input);
-  auto timestamps = host_val.first;
+  auto const input =
+    generate_timestamps<T>(this->size(), time_point_ms(start), time_point_ms(stop));
+  auto const timestamps = to_host<T>(input).first;
 
   std::vector<T> rounded_day(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), rounded_day.begin(), [](auto i) {
@@ -934,7 +941,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime)
   });
   auto expected_day = fixed_width_column_wrapper<T, typename T::duration::rep>(rounded_day.begin(),
                                                                                rounded_day.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_day(input), expected_day);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::DAY), expected_day);
 
   std::vector<T> rounded_hour(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), rounded_hour.begin(), [](auto i) {
@@ -942,7 +949,7 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime)
   });
   auto expected_hour = fixed_width_column_wrapper<T, typename T::duration::rep>(
     rounded_hour.begin(), rounded_hour.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_hour(input), expected_hour);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::HOUR), expected_hour);
 
   std::vector<T> rounded_minute(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), rounded_minute.begin(), [](auto i) {
@@ -950,7 +957,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime)
   });
   auto expected_minute = fixed_width_column_wrapper<T, typename T::duration::rep>(
     rounded_minute.begin(), rounded_minute.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_minute(input), expected_minute);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::MINUTE),
+                                 expected_minute);
 
   std::vector<T> rounded_second(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), rounded_second.begin(), [](auto i) {
@@ -958,7 +966,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime)
   });
   auto expected_second = fixed_width_column_wrapper<T, typename T::duration::rep>(
     rounded_second.begin(), rounded_second.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_second(input), expected_second);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::SECOND),
+                                 expected_second);
 
   std::vector<T> rounded_millisecond(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), rounded_millisecond.begin(), [](auto i) {
@@ -966,7 +975,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime)
   });
   auto expected_millisecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
     rounded_millisecond.begin(), rounded_millisecond.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_millisecond(input), expected_millisecond);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::MILLISECOND),
+                                 expected_millisecond);
 
   std::vector<T> rounded_microsecond(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), rounded_microsecond.begin(), [](auto i) {
@@ -974,7 +984,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime)
   });
   auto expected_microsecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
     rounded_microsecond.begin(), rounded_microsecond.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_microsecond(input), expected_microsecond);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::MICROSECOND),
+                                 expected_microsecond);
 
   std::vector<T> rounded_nanosecond(timestamps.size());
   std::transform(timestamps.begin(), timestamps.end(), rounded_nanosecond.begin(), [](auto i) {
@@ -982,7 +993,8 @@ TYPED_TEST(TypedDatetimeOpsTest, TestRoundDatetime)
   });
   auto expected_nanosecond = fixed_width_column_wrapper<T, typename T::duration::rep>(
     rounded_nanosecond.begin(), rounded_nanosecond.end());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_nanosecond(input), expected_nanosecond);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*round_datetimes(input, rounding_frequency::NANOSECOND),
+                                 expected_nanosecond);
 }
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd
index f75b39ce6ee..498fc313cf9 100644
--- a/python/cudf/cudf/_lib/cpp/datetime.pxd
+++ b/python/cudf/cudf/_lib/cpp/datetime.pxd
@@ -13,45 +13,26 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
     cdef unique_ptr[column] extract_hour(const column_view& column) except +
     cdef unique_ptr[column] extract_minute(const column_view& column) except +
     cdef unique_ptr[column] extract_second(const column_view& column) except +
-    cdef unique_ptr[column] ceil_day(const column_view& column) except +
-    cdef unique_ptr[column] ceil_hour(const column_view& column) except +
-    cdef unique_ptr[column] ceil_minute(const column_view& column) except +
-    cdef unique_ptr[column] ceil_second(const column_view& column) except +
-    cdef unique_ptr[column] ceil_millisecond(
-        const column_view& column
-    ) except +
-    cdef unique_ptr[column] ceil_microsecond(
-        const column_view& column
-    ) except +
-    cdef unique_ptr[column] ceil_nanosecond(
-        const column_view& column
-    ) except +
-    cdef unique_ptr[column] floor_day(const column_view& column) except +
-    cdef unique_ptr[column] floor_hour(const column_view& column) except +
-    cdef unique_ptr[column] floor_minute(const column_view& column) except +
-    cdef unique_ptr[column] floor_second(const column_view& column) except +
-    cdef unique_ptr[column] floor_millisecond(
-        const column_view& column
-    ) except +
-    cdef unique_ptr[column] floor_microsecond(
-        const column_view& column
-    ) except +
-    cdef unique_ptr[column] floor_nanosecond(
-        const column_view& column
-    ) except +
-    cdef unique_ptr[column] round_day(const column_view& column) except +
-    cdef unique_ptr[column] round_hour(const column_view& column) except +
-    cdef unique_ptr[column] round_minute(const column_view& column) except +
-    cdef unique_ptr[column] round_second(const column_view& column) except +
-    cdef unique_ptr[column] round_millisecond(
-        const column_view& column
+
+    ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency":
+        DAY "cudf::datetime::rounding_frequency::DAY"
+        HOUR "cudf::datetime::rounding_frequency::HOUR"
+        MINUTE "cudf::datetime::rounding_frequency::MINUTE"
+        SECOND "cudf::datetime::rounding_frequency::SECOND"
+        MILLISECOND "cudf::datetime::rounding_frequency::MILLISECOND"
+        MICROSECOND "cudf::datetime::rounding_frequency::MICROSECOND"
+        NANOSECOND "cudf::datetime::rounding_frequency::NANOSECOND"
+
+    cdef unique_ptr[column] ceil_datetimes(
+        const column_view& column, rounding_frequency freq
     ) except +
-    cdef unique_ptr[column] round_microsecond(
-        const column_view& column
+    cdef unique_ptr[column] floor_datetimes(
+        const column_view& column, rounding_frequency freq
     ) except +
-    cdef unique_ptr[column] round_nanosecond(
-        const column_view& column
+    cdef unique_ptr[column] round_datetimes(
+        const column_view& column, rounding_frequency freq
     ) except +
+
     cdef unique_ptr[column] add_calendrical_months(
         const column_view& timestamps,
         const column_view& months
diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
index 3c05a17c268..e41016645cd 100644
--- a/python/cudf/cudf/_lib/datetime.pyx
+++ b/python/cudf/cudf/_lib/datetime.pyx
@@ -62,82 +62,63 @@ def extract_datetime_component(Column col, object field):
     return result
 
 
-def ceil_datetime(Column col, object field):
+cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq):
+    cdef libcudf_datetime.rounding_frequency freq_val
+
+    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Timedelta.resolution_string.html
+    if freq == "D":
+        freq_val = libcudf_datetime.rounding_frequency.DAY
+    elif freq == "H":
+        freq_val = libcudf_datetime.rounding_frequency.HOUR
+    elif freq in ("T", "min"):
+        freq_val = libcudf_datetime.rounding_frequency.MINUTE
+    elif freq == "S":
+        freq_val = libcudf_datetime.rounding_frequency.SECOND
+    elif freq in ("L", "ms"):
+        freq_val = libcudf_datetime.rounding_frequency.MILLISECOND
+    elif freq in ("U", "us"):
+        freq_val = libcudf_datetime.rounding_frequency.MICROSECOND
+    elif freq == "N":
+        freq_val = libcudf_datetime.rounding_frequency.NANOSECOND
+    else:
+        raise ValueError(f"Invalid resolution: '{freq}'")
+    return freq_val
+
+
+def ceil_datetime(Column col, object freq):
     cdef unique_ptr[column] c_result
     cdef column_view col_view = col.view()
+    cdef libcudf_datetime.rounding_frequency freq_val = \
+        _get_rounding_frequency(freq)
 
     with nogil:
-        # https://pandas.pydata.org/pandas-docs/version/0.25.0/reference/api/pandas.Timedelta.resolution.html
-        if field == "D":
-            c_result = move(libcudf_datetime.ceil_day(col_view))
-        elif field == "H":
-            c_result = move(libcudf_datetime.ceil_hour(col_view))
-        elif field == "T" or field == "min":
-            c_result = move(libcudf_datetime.ceil_minute(col_view))
-        elif field == "S":
-            c_result = move(libcudf_datetime.ceil_second(col_view))
-        elif field == "L" or field == "ms":
-            c_result = move(libcudf_datetime.ceil_millisecond(col_view))
-        elif field == "U" or field == "us":
-            c_result = move(libcudf_datetime.ceil_microsecond(col_view))
-        elif field == "N":
-            c_result = move(libcudf_datetime.ceil_nanosecond(col_view))
-        else:
-            raise ValueError(f"Invalid resolution: '{field}'")
+        c_result = move(libcudf_datetime.ceil_datetimes(col_view, freq_val))
 
     result = Column.from_unique_ptr(move(c_result))
     return result
 
 
-def floor_datetime(Column col, object field):
+def floor_datetime(Column col, object freq):
     cdef unique_ptr[column] c_result
     cdef column_view col_view = col.view()
+    cdef libcudf_datetime.rounding_frequency freq_val = \
+        _get_rounding_frequency(freq)
 
     with nogil:
-        # https://pandas.pydata.org/docs/reference/api/pandas.Timedelta.resolution_string.html
-        if field == "D":
-            c_result = move(libcudf_datetime.floor_day(col_view))
-        elif field == "H":
-            c_result = move(libcudf_datetime.floor_hour(col_view))
-        elif field == "T" or field == "min":
-            c_result = move(libcudf_datetime.floor_minute(col_view))
-        elif field == "S":
-            c_result = move(libcudf_datetime.floor_second(col_view))
-        elif field == "L" or field == "ms":
-            c_result = move(libcudf_datetime.floor_millisecond(col_view))
-        elif field == "U" or field == "us":
-            c_result = move(libcudf_datetime.floor_microsecond(col_view))
-        elif field == "N":
-            c_result = move(libcudf_datetime.floor_nanosecond(col_view))
-        else:
-            raise ValueError(f"Invalid resolution: '{field}'")
+        c_result = move(libcudf_datetime.floor_datetimes(col_view, freq_val))
 
     result = Column.from_unique_ptr(move(c_result))
     return result
 
 
-def round_datetime(Column col, object field):
+def round_datetime(Column col, object freq):
     cdef unique_ptr[column] c_result
     cdef column_view col_view = col.view()
+    cdef libcudf_datetime.rounding_frequency freq_val = \
+        _get_rounding_frequency(freq)
 
     with nogil:
-        # https://pandas.pydata.org/docs/reference/api/pandas.Timedelta.resolution_string.html
-        if field == "D":
-            c_result = move(libcudf_datetime.round_day(col_view))
-        elif field == "H":
-            c_result = move(libcudf_datetime.round_hour(col_view))
-        elif field == "T" or field == "min":
-            c_result = move(libcudf_datetime.round_minute(col_view))
-        elif field == "S":
-            c_result = move(libcudf_datetime.round_second(col_view))
-        elif field == "L" or field == "ms":
-            c_result = move(libcudf_datetime.round_millisecond(col_view))
-        elif field == "U" or field == "us":
-            c_result = move(libcudf_datetime.round_microsecond(col_view))
-        elif field == "N":
-            c_result = move(libcudf_datetime.round_nanosecond(col_view))
-        else:
-            raise ValueError(f"Invalid resolution: '{field}'")
+        c_result = move(libcudf_datetime.round_datetimes(col_view, freq_val))
 
     result = Column.from_unique_ptr(move(c_result))
     return result

From d69ea611c8a89cd661d01e361575056e37a6060a Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 4 Jan 2022 12:33:33 -0600
Subject: [PATCH 12/12] Remove deprecated method DataFrame.hash_columns.
 (#9943)

This PR removes the deprecated method `DataFrame.hash_columns`. Users can replace existing calls like `df.hash_columns(columns, method)` with `df[columns].hash_values(method)`. Resolves #9503, follows up on #9458.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/9943
---
 docs/cudf/source/api_docs/dataframe.rst  |  1 -
 python/cudf/cudf/core/dataframe.py       | 32 ------------------------
 python/cudf/cudf/tests/test_dataframe.py | 28 ---------------------
 3 files changed, 61 deletions(-)

diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index 94f88a40ea5..2de55553c3f 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -254,7 +254,6 @@ Serialization / IO / conversion
    DataFrame.from_arrow
    DataFrame.from_pandas
    DataFrame.from_records
-   DataFrame.hash_columns
    DataFrame.hash_values
    DataFrame.to_arrow
    DataFrame.to_dlpack
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index b7fc5efb412..d97ea456f72 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -4150,38 +4150,6 @@ def apply_chunks(
             tpb=tpb,
         )
 
-    def hash_columns(self, columns=None, method="murmur3"):
-        """Hash the given *columns* and return a new device array
-
-        This method is deprecated. Replace ``df.hash_columns(columns, method)``
-        with ``df[columns].hash_values(method)``.
-
-        Parameters
-        ----------
-        columns : sequence of str; optional
-            Sequence of column names. If columns is *None* (unspecified),
-            all columns in the frame are used.
-        method : {'murmur3', 'md5'}, default 'murmur3'
-            Hash function to use:
-            * murmur3: MurmurHash3 hash function.
-            * md5: MD5 hash function.
-
-        Returns
-        -------
-        Series
-            Hash values for each row.
-        """
-        warnings.warn(
-            "The `hash_columns` method will be removed in a future cuDF "
-            "release. Replace `df.hash_columns(columns, method)` with "
-            "`df[columns].hash_values(method)`.",
-            FutureWarning,
-        )
-        if columns is None:
-            # Slice by [:] to keep all columns.
-            columns = slice(None, None, None)
-        return self[columns].hash_values(method=method)
-
     def hash_values(self, method="murmur3"):
         """Compute the hash of values in each row.
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 78560ee6723..33c993cc56a 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -1107,34 +1107,6 @@ def test_assign():
     np.testing.assert_equal(gdf2.y.to_numpy(), [2, 3, 4])
 
 
-@pytest.mark.parametrize("nrows", [1, 8, 100, 1000])
-@pytest.mark.parametrize("method", ["murmur3", "md5"])
-def test_dataframe_hash_columns(nrows, method):
-    gdf = cudf.DataFrame()
-    data = np.asarray(range(nrows))
-    data[0] = data[-1]  # make first and last the same
-    gdf["a"] = data
-    gdf["b"] = gdf.a + 100
-    with pytest.warns(FutureWarning):
-        out = gdf.hash_columns(["a", "b"])
-    assert isinstance(out, cudf.Series)
-    assert len(out) == nrows
-    assert out.dtype == np.int32
-
-    # Check default
-    with pytest.warns(FutureWarning):
-        out_all = gdf.hash_columns()
-    assert_eq(out, out_all)
-
-    # Check single column
-    with pytest.warns(FutureWarning):
-        out_one = gdf.hash_columns(["a"], method=method)
-    # First matches last
-    assert out_one.iloc[0] == out_one.iloc[-1]
-    # Equivalent to the cudf.Series.hash_values()
-    assert_eq(gdf["a"].hash_values(method=method), out_one)
-
-
 @pytest.mark.parametrize("nrows", [1, 8, 100, 1000])
 @pytest.mark.parametrize("method", ["murmur3", "md5"])
 def test_dataframe_hash_values(nrows, method):