diff --git a/CHANGELOG.md b/CHANGELOG.md
index 316f1abde98..cc20728ca35 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,13 +2,325 @@
 
 Please see https://github.com/rapidsai/cudf/releases/tag/v21.08.00a for the latest changes to this development branch.
 
-# cuDF 0.20.0 (Date TBD)
-
-Please see https://github.com/rapidsai/cudf/releases/tag/v0.20.0a for the latest changes to this development branch.
-
-# cuDF 0.19.0 (Date TBD)
-
-Please see https://github.com/rapidsai/cudf/releases/tag/v0.19.0a for the latest changes to this development branch.
+# cuDF 21.06.00 (Date TBD)
+
+Please see https://github.com/rapidsai/cudf/releases/tag/v21.06.00a for the latest changes to this development branch.
+
+# cuDF 0.19.0 (21 Apr 2021)
+
+## 🚨 Breaking Changes
+
+- Allow hash_partition to take a seed value ([#7771](https://github.com/rapidsai/cudf/pull/7771)) [@magnatelee](https://github.com/magnatelee)
+- Allow merging index column with data column using keyword &quot;on&quot; ([#7736](https://github.com/rapidsai/cudf/pull/7736)) [@skirui-source](https://github.com/skirui-source)
+- Change JNI API to avoid loading native dependencies when creating sort order classes. ([#7729](https://github.com/rapidsai/cudf/pull/7729)) [@revans2](https://github.com/revans2)
+- Replace device_vector with device_uvector in null_mask ([#7715](https://github.com/rapidsai/cudf/pull/7715)) [@harrism](https://github.com/harrism)
+- Don&#39;t identify decimals as strings. ([#7710](https://github.com/rapidsai/cudf/pull/7710)) [@vyasr](https://github.com/vyasr)
+- Fix Java Parquet write after writer API changes ([#7655](https://github.com/rapidsai/cudf/pull/7655)) [@revans2](https://github.com/revans2)
+- Convert cudf::concatenate APIs to use spans and device_uvector ([#7621](https://github.com/rapidsai/cudf/pull/7621)) [@harrism](https://github.com/harrism)
+- Update missing docstring examples in python public APIs ([#7546](https://github.com/rapidsai/cudf/pull/7546)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove unneeded step parameter from strings::detail::copy_slice ([#7525](https://github.com/rapidsai/cudf/pull/7525)) [@davidwendt](https://github.com/davidwendt)
+- Rename ARROW_STATIC_LIB because it conflicts with one in FindArrow.cmake ([#7518](https://github.com/rapidsai/cudf/pull/7518)) [@trxcllnt](https://github.com/trxcllnt)
+- Match Pandas logic for comparing two objects with nulls ([#7490](https://github.com/rapidsai/cudf/pull/7490)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add struct support to parquet writer ([#7461](https://github.com/rapidsai/cudf/pull/7461)) [@devavret](https://github.com/devavret)
+- Join APIs that return gathermaps ([#7454](https://github.com/rapidsai/cudf/pull/7454)) [@shwina](https://github.com/shwina)
+- `fixed_point` + `cudf::binary_operation` API Changes ([#7435](https://github.com/rapidsai/cudf/pull/7435)) [@codereport](https://github.com/codereport)
+- Fix BUG: Exception when PYTHONOPTIMIZE=2 ([#7434](https://github.com/rapidsai/cudf/pull/7434)) [@skirui-source](https://github.com/skirui-source)
+- Change nvtext::load_vocabulary_file to return a unique ptr ([#7424](https://github.com/rapidsai/cudf/pull/7424)) [@davidwendt](https://github.com/davidwendt)
+- Refactor strings column factories ([#7397](https://github.com/rapidsai/cudf/pull/7397)) [@harrism](https://github.com/harrism)
+- Use CMAKE_CUDA_ARCHITECTURES ([#7391](https://github.com/rapidsai/cudf/pull/7391)) [@robertmaynard](https://github.com/robertmaynard)
+- Upgrade pandas to 1.2 ([#7375](https://github.com/rapidsai/cudf/pull/7375)) [@galipremsagar](https://github.com/galipremsagar)
+- Rename `logical_cast` to `bit_cast` and allow additional conversions ([#7373](https://github.com/rapidsai/cudf/pull/7373)) [@ttnghia](https://github.com/ttnghia)
+- Rework libcudf CMakeLists.txt to export targets for CPM ([#7107](https://github.com/rapidsai/cudf/pull/7107)) [@trxcllnt](https://github.com/trxcllnt)
+
+## 🐛 Bug Fixes
+
+- Fix a `NameError` in meta dispatch API ([#7996](https://github.com/rapidsai/cudf/pull/7996)) [@galipremsagar](https://github.com/galipremsagar)
+- Reindex in `DataFrame.__setitem__` ([#7957](https://github.com/rapidsai/cudf/pull/7957)) [@galipremsagar](https://github.com/galipremsagar)
+- jitify direct-to-cubin compilation and caching. ([#7919](https://github.com/rapidsai/cudf/pull/7919)) [@cwharris](https://github.com/cwharris)
+- Use dynamic cudart for nvcomp in java build ([#7896](https://github.com/rapidsai/cudf/pull/7896)) [@abellina](https://github.com/abellina)
+- fix &quot;incompatible redefinition&quot; warnings ([#7894](https://github.com/rapidsai/cudf/pull/7894)) [@cwharris](https://github.com/cwharris)
+- cudf consistently specifies the cuda runtime ([#7887](https://github.com/rapidsai/cudf/pull/7887)) [@robertmaynard](https://github.com/robertmaynard)
+- disable verbose output for jitify_preprocess ([#7886](https://github.com/rapidsai/cudf/pull/7886)) [@cwharris](https://github.com/cwharris)
+- CMake jit_preprocess_files function only runs when needed ([#7872](https://github.com/rapidsai/cudf/pull/7872)) [@robertmaynard](https://github.com/robertmaynard)
+- Push DeviceScalar construction into cython for list.contains ([#7864](https://github.com/rapidsai/cudf/pull/7864)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- cudf now sets an install rpath of $ORIGIN ([#7863](https://github.com/rapidsai/cudf/pull/7863)) [@robertmaynard](https://github.com/robertmaynard)
+- Don&#39;t install Thrust examples, tests, docs, and python files ([#7811](https://github.com/rapidsai/cudf/pull/7811)) [@robertmaynard](https://github.com/robertmaynard)
+- Sort by index in groupby tests more consistently ([#7802](https://github.com/rapidsai/cudf/pull/7802)) [@shwina](https://github.com/shwina)
+- Revert &quot;Update conda recipes pinning of repo dependencies ([#7743)&quot; (#7793](https://github.com/rapidsai/cudf/pull/7743)&quot; (#7793)) [@raydouglass](https://github.com/raydouglass)
+- Add decimal column handling in copy_type_metadata ([#7788](https://github.com/rapidsai/cudf/pull/7788)) [@shwina](https://github.com/shwina)
+- Add column names validation in parquet writer ([#7786](https://github.com/rapidsai/cudf/pull/7786)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix Java explode outer unit tests ([#7782](https://github.com/rapidsai/cudf/pull/7782)) [@jlowe](https://github.com/jlowe)
+- Fix compiler warning about non-POD types passed through ellipsis ([#7781](https://github.com/rapidsai/cudf/pull/7781)) [@jrhemstad](https://github.com/jrhemstad)
+- User resource fix for replace_nulls ([#7769](https://github.com/rapidsai/cudf/pull/7769)) [@magnatelee](https://github.com/magnatelee)
+- Fix type dispatch for columnar replace_nulls ([#7768](https://github.com/rapidsai/cudf/pull/7768)) [@jlowe](https://github.com/jlowe)
+- Add `ignore_order` parameter to dask-cudf concat dispatch ([#7765](https://github.com/rapidsai/cudf/pull/7765)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix slicing and arrow representations of decimal columns ([#7755](https://github.com/rapidsai/cudf/pull/7755)) [@vyasr](https://github.com/vyasr)
+- Fixing issue with explode_outer position not nulling position entries of null rows ([#7754](https://github.com/rapidsai/cudf/pull/7754)) [@hyperbolic2346](https://github.com/hyperbolic2346)
+- Implement scatter for struct columns ([#7752](https://github.com/rapidsai/cudf/pull/7752)) [@ttnghia](https://github.com/ttnghia)
+- Fix data corruption in string columns ([#7746](https://github.com/rapidsai/cudf/pull/7746)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix string length in stripe dictionary building ([#7744](https://github.com/rapidsai/cudf/pull/7744)) [@kaatish](https://github.com/kaatish)
+- Update conda recipes pinning of repo dependencies ([#7743](https://github.com/rapidsai/cudf/pull/7743)) [@mike-wendt](https://github.com/mike-wendt)
+- Enable dask dispatch to cuDF&#39;s `is_categorical_dtype` for cuDF objects ([#7740](https://github.com/rapidsai/cudf/pull/7740)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Fix dictionary size computation in ORC writer ([#7737](https://github.com/rapidsai/cudf/pull/7737)) [@vuule](https://github.com/vuule)
+- Fix `cudf::cast` overflow for `decimal64` to `int32_t` or smaller in certain cases ([#7733](https://github.com/rapidsai/cudf/pull/7733)) [@codereport](https://github.com/codereport)
+- Change JNI API to avoid loading native dependencies when creating sort order classes. ([#7729](https://github.com/rapidsai/cudf/pull/7729)) [@revans2](https://github.com/revans2)
+- Disable column_view data accessors for unsupported types ([#7725](https://github.com/rapidsai/cudf/pull/7725)) [@jrhemstad](https://github.com/jrhemstad)
+- Materialize `RangeIndex` when `index=True` in parquet writer ([#7711](https://github.com/rapidsai/cudf/pull/7711)) [@galipremsagar](https://github.com/galipremsagar)
+- Don&#39;t identify decimals as strings. ([#7710](https://github.com/rapidsai/cudf/pull/7710)) [@vyasr](https://github.com/vyasr)
+- Fix return type of `DataFrame.argsort` ([#7706](https://github.com/rapidsai/cudf/pull/7706)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix/correct cudf installed package requirements ([#7688](https://github.com/rapidsai/cudf/pull/7688)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix SparkMurmurHash3_32 hash inconsistencies with Apache Spark ([#7672](https://github.com/rapidsai/cudf/pull/7672)) [@jlowe](https://github.com/jlowe)
+- Fix ORC reader issue with reading empty string columns ([#7656](https://github.com/rapidsai/cudf/pull/7656)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
+- Fix Java Parquet write after writer API changes ([#7655](https://github.com/rapidsai/cudf/pull/7655)) [@revans2](https://github.com/revans2)
+- Fixing empty null lists throwing explode_outer for a loop. ([#7649](https://github.com/rapidsai/cudf/pull/7649)) [@hyperbolic2346](https://github.com/hyperbolic2346)
+- Fix internal compiler error during JNI Docker build ([#7645](https://github.com/rapidsai/cudf/pull/7645)) [@jlowe](https://github.com/jlowe)
+- Fix Debug build break with device_uvectors in grouped_rolling.cu ([#7633](https://github.com/rapidsai/cudf/pull/7633)) [@mythrocks](https://github.com/mythrocks)
+- Parquet reader:  Fix issue when using skip_rows on non-nested columns containing nulls ([#7627](https://github.com/rapidsai/cudf/pull/7627)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fix ORC reader for empty DataFrame/Table ([#7624](https://github.com/rapidsai/cudf/pull/7624)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
+- Fix specifying GPU architecture in JNI build ([#7612](https://github.com/rapidsai/cudf/pull/7612)) [@jlowe](https://github.com/jlowe)
+- Fix ORC writer OOM issue ([#7605](https://github.com/rapidsai/cudf/pull/7605)) [@vuule](https://github.com/vuule)
+- Fix 0.18 --&gt; 0.19 automerge ([#7589](https://github.com/rapidsai/cudf/pull/7589)) [@kkraus14](https://github.com/kkraus14)
+- Fix ORC issue with incorrect timestamp nanosecond values ([#7581](https://github.com/rapidsai/cudf/pull/7581)) [@vuule](https://github.com/vuule)
+- Fix missing Dask imports ([#7580](https://github.com/rapidsai/cudf/pull/7580)) [@kkraus14](https://github.com/kkraus14)
+- CMAKE_CUDA_ARCHITECTURES doesn&#39;t change when build-system invokes cmake ([#7579](https://github.com/rapidsai/cudf/pull/7579)) [@robertmaynard](https://github.com/robertmaynard)
+- Another fix for offsets_end() iterator in lists_column_view ([#7575](https://github.com/rapidsai/cudf/pull/7575)) [@ttnghia](https://github.com/ttnghia)
+- Fix ORC writer output corruption with string columns ([#7565](https://github.com/rapidsai/cudf/pull/7565)) [@vuule](https://github.com/vuule)
+- Fix cudf::lists::sort_lists failing for sliced column ([#7564](https://github.com/rapidsai/cudf/pull/7564)) [@ttnghia](https://github.com/ttnghia)
+- FIX Fix Anaconda upload args ([#7558](https://github.com/rapidsai/cudf/pull/7558)) [@dillon-cullinan](https://github.com/dillon-cullinan)
+- Fix index mismatch issue in equality related APIs ([#7555](https://github.com/rapidsai/cudf/pull/7555)) [@galipremsagar](https://github.com/galipremsagar)
+- FIX Revert gpuci_conda_retry on conda file output locations ([#7552](https://github.com/rapidsai/cudf/pull/7552)) [@dillon-cullinan](https://github.com/dillon-cullinan)
+- Fix offset_end iterator for lists_column_view, which was not correctl… ([#7551](https://github.com/rapidsai/cudf/pull/7551)) [@ttnghia](https://github.com/ttnghia)
+- Fix no such file dlpack.h error when build libcudf ([#7549](https://github.com/rapidsai/cudf/pull/7549)) [@chenrui17](https://github.com/chenrui17)
+- Update missing docstring examples in python public APIs ([#7546](https://github.com/rapidsai/cudf/pull/7546)) [@galipremsagar](https://github.com/galipremsagar)
+- Decimal32 Build Fix ([#7544](https://github.com/rapidsai/cudf/pull/7544)) [@razajafri](https://github.com/razajafri)
+- FIX Retry conda output location ([#7540](https://github.com/rapidsai/cudf/pull/7540)) [@dillon-cullinan](https://github.com/dillon-cullinan)
+- fix missing renames of dask git branches from master to main ([#7535](https://github.com/rapidsai/cudf/pull/7535)) [@kkraus14](https://github.com/kkraus14)
+- Remove detail from device_span ([#7533](https://github.com/rapidsai/cudf/pull/7533)) [@rwlee](https://github.com/rwlee)
+- Change dask and distributed branch to main ([#7532](https://github.com/rapidsai/cudf/pull/7532)) [@dantegd](https://github.com/dantegd)
+- Update JNI build to use CUDF_USE_ARROW_STATIC ([#7526](https://github.com/rapidsai/cudf/pull/7526)) [@jlowe](https://github.com/jlowe)
+- Make sure rmm::rmm CMake target is visibile to cudf users ([#7524](https://github.com/rapidsai/cudf/pull/7524)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix contiguous_split not properly handling output partitions &gt; 2 GB. ([#7515](https://github.com/rapidsai/cudf/pull/7515)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Change jit launch to safe_launch ([#7510](https://github.com/rapidsai/cudf/pull/7510)) [@devavret](https://github.com/devavret)
+- Fix comparison between Datetime/Timedelta columns and NULL scalars ([#7504](https://github.com/rapidsai/cudf/pull/7504)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Fix off-by-one error in char-parallel string scalar replace ([#7502](https://github.com/rapidsai/cudf/pull/7502)) [@jlowe](https://github.com/jlowe)
+- Fix JNI deprecation of all, put it on the wrong version before ([#7501](https://github.com/rapidsai/cudf/pull/7501)) [@revans2](https://github.com/revans2)
+- Fix Series/Dataframe Mixed Arithmetic ([#7491](https://github.com/rapidsai/cudf/pull/7491)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Fix JNI build after removal of libcudf sub-libraries ([#7486](https://github.com/rapidsai/cudf/pull/7486)) [@jlowe](https://github.com/jlowe)
+- Correctly compile benchmarks ([#7485](https://github.com/rapidsai/cudf/pull/7485)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix bool column corruption with ORC Reader ([#7483](https://github.com/rapidsai/cudf/pull/7483)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
+- Fix `__repr__` for categorical dtype ([#7476](https://github.com/rapidsai/cudf/pull/7476)) [@galipremsagar](https://github.com/galipremsagar)
+- Java cleaner synchronization ([#7474](https://github.com/rapidsai/cudf/pull/7474)) [@abellina](https://github.com/abellina)
+- Fix java float/double parsing tests ([#7473](https://github.com/rapidsai/cudf/pull/7473)) [@revans2](https://github.com/revans2)
+- Pass stream and user resource to make_default_constructed_scalar ([#7469](https://github.com/rapidsai/cudf/pull/7469)) [@magnatelee](https://github.com/magnatelee)
+- Improve stability of dask_cudf.DataFrame.var and dask_cudf.DataFrame.std ([#7453](https://github.com/rapidsai/cudf/pull/7453)) [@rjzamora](https://github.com/rjzamora)
+- Missing `device_storage_dispatch` change affecting `cudf::gather` ([#7449](https://github.com/rapidsai/cudf/pull/7449)) [@codereport](https://github.com/codereport)
+- fix cuFile JNI compile errors ([#7445](https://github.com/rapidsai/cudf/pull/7445)) [@rongou](https://github.com/rongou)
+- Support `Series.__setitem__` with key to a new row ([#7443](https://github.com/rapidsai/cudf/pull/7443)) [@isVoid](https://github.com/isVoid)
+- Fix BUG: Exception when PYTHONOPTIMIZE=2 ([#7434](https://github.com/rapidsai/cudf/pull/7434)) [@skirui-source](https://github.com/skirui-source)
+- Make inclusive scan safe for cases with leading nulls ([#7432](https://github.com/rapidsai/cudf/pull/7432)) [@magnatelee](https://github.com/magnatelee)
+- Fix typo in list_device_view::pair_rep_end() ([#7423](https://github.com/rapidsai/cudf/pull/7423)) [@mythrocks](https://github.com/mythrocks)
+- Fix string to double conversion and row equivalent comparison ([#7410](https://github.com/rapidsai/cudf/pull/7410)) [@ttnghia](https://github.com/ttnghia)
+- Fix thrust failure when transfering data from device_vector to host_vector with vectors of size 1 ([#7382](https://github.com/rapidsai/cudf/pull/7382)) [@ttnghia](https://github.com/ttnghia)
+- Fix std::exeception catch-by-reference gcc9 compile error ([#7380](https://github.com/rapidsai/cudf/pull/7380)) [@davidwendt](https://github.com/davidwendt)
+- Fix skiprows issue with ORC Reader ([#7359](https://github.com/rapidsai/cudf/pull/7359)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
+- fix Arrow CMake file ([#7358](https://github.com/rapidsai/cudf/pull/7358)) [@rongou](https://github.com/rongou)
+- Fix lists::contains() for NaN and Decimals ([#7349](https://github.com/rapidsai/cudf/pull/7349)) [@mythrocks](https://github.com/mythrocks)
+- Handle cupy array in `Dataframe.__setitem__` ([#7340](https://github.com/rapidsai/cudf/pull/7340)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix invalid-device-fn error in cudf::strings::replace_re with multiple regex&#39;s ([#7336](https://github.com/rapidsai/cudf/pull/7336)) [@davidwendt](https://github.com/davidwendt)
+- FIX Add codecov upload block to gpu script ([#6860](https://github.com/rapidsai/cudf/pull/6860)) [@dillon-cullinan](https://github.com/dillon-cullinan)
+
+## 📖 Documentation
+
+- Fix join API doxygen ([#7890](https://github.com/rapidsai/cudf/pull/7890)) [@shwina](https://github.com/shwina)
+- Add Resources to README. ([#7697](https://github.com/rapidsai/cudf/pull/7697)) [@bdice](https://github.com/bdice)
+- Add `isin` examples in Docstring ([#7479](https://github.com/rapidsai/cudf/pull/7479)) [@galipremsagar](https://github.com/galipremsagar)
+- Resolving unlinked type shorthands in cudf doc ([#7416](https://github.com/rapidsai/cudf/pull/7416)) [@isVoid](https://github.com/isVoid)
+- Fix typo in regex.md doc page ([#7363](https://github.com/rapidsai/cudf/pull/7363)) [@davidwendt](https://github.com/davidwendt)
+- Fix incorrect strings_column_view::chars_size documentation ([#7360](https://github.com/rapidsai/cudf/pull/7360)) [@jlowe](https://github.com/jlowe)
+
+## 🚀 New Features
+
+- Enable basic reductions for decimal columns ([#7776](https://github.com/rapidsai/cudf/pull/7776)) [@ChrisJar](https://github.com/ChrisJar)
+- Enable join on decimal columns ([#7764](https://github.com/rapidsai/cudf/pull/7764)) [@ChrisJar](https://github.com/ChrisJar)
+- Allow merging index column with data column using keyword &quot;on&quot; ([#7736](https://github.com/rapidsai/cudf/pull/7736)) [@skirui-source](https://github.com/skirui-source)
+- Implement DecimalColumn + Scalar and add cudf.Scalars of Decimal64Dtype ([#7732](https://github.com/rapidsai/cudf/pull/7732)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add support for `unique` groupby aggregation ([#7726](https://github.com/rapidsai/cudf/pull/7726)) [@shwina](https://github.com/shwina)
+- Expose libcudf&#39;s label_bins function to cudf ([#7724](https://github.com/rapidsai/cudf/pull/7724)) [@vyasr](https://github.com/vyasr)
+- Adding support for equi-join on struct ([#7720](https://github.com/rapidsai/cudf/pull/7720)) [@hyperbolic2346](https://github.com/hyperbolic2346)
+- Add decimal column comparison operations ([#7716](https://github.com/rapidsai/cudf/pull/7716)) [@isVoid](https://github.com/isVoid)
+- Implement scan operations for decimal columns ([#7707](https://github.com/rapidsai/cudf/pull/7707)) [@ChrisJar](https://github.com/ChrisJar)
+- Enable typecasting between decimal and int ([#7691](https://github.com/rapidsai/cudf/pull/7691)) [@ChrisJar](https://github.com/ChrisJar)
+- Enable decimal support in parquet writer ([#7673](https://github.com/rapidsai/cudf/pull/7673)) [@devavret](https://github.com/devavret)
+- Adds `list.unique` API ([#7664](https://github.com/rapidsai/cudf/pull/7664)) [@isVoid](https://github.com/isVoid)
+- Fix NaN handling in drop_list_duplicates ([#7662](https://github.com/rapidsai/cudf/pull/7662)) [@ttnghia](https://github.com/ttnghia)
+- Add `lists.sort_values` API ([#7657](https://github.com/rapidsai/cudf/pull/7657)) [@isVoid](https://github.com/isVoid)
+- Add is_integer API that can check for the validity of a string-to-integer conversion ([#7642](https://github.com/rapidsai/cudf/pull/7642)) [@ttnghia](https://github.com/ttnghia)
+- Adds `explode` API ([#7607](https://github.com/rapidsai/cudf/pull/7607)) [@isVoid](https://github.com/isVoid)
+- Adds `list.take`, python binding for `cudf::lists::segmented_gather` ([#7591](https://github.com/rapidsai/cudf/pull/7591)) [@isVoid](https://github.com/isVoid)
+- Implement cudf::label_bins() ([#7554](https://github.com/rapidsai/cudf/pull/7554)) [@vyasr](https://github.com/vyasr)
+- Add Python bindings for `lists::contains` ([#7547](https://github.com/rapidsai/cudf/pull/7547)) [@skirui-source](https://github.com/skirui-source)
+- cudf::row_bit_count() support. ([#7534](https://github.com/rapidsai/cudf/pull/7534)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Implement drop_list_duplicates ([#7528](https://github.com/rapidsai/cudf/pull/7528)) [@ttnghia](https://github.com/ttnghia)
+- Add Python bindings for `lists::extract_lists_element` ([#7505](https://github.com/rapidsai/cudf/pull/7505)) [@skirui-source](https://github.com/skirui-source)
+- Add explode_outer and explode_outer_position ([#7499](https://github.com/rapidsai/cudf/pull/7499)) [@hyperbolic2346](https://github.com/hyperbolic2346)
+- Match Pandas logic for comparing two objects with nulls ([#7490](https://github.com/rapidsai/cudf/pull/7490)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add struct support to parquet writer ([#7461](https://github.com/rapidsai/cudf/pull/7461)) [@devavret](https://github.com/devavret)
+- Enable type conversion from float to decimal type ([#7450](https://github.com/rapidsai/cudf/pull/7450)) [@ChrisJar](https://github.com/ChrisJar)
+- Add cython for converting strings/fixed-point functions ([#7429](https://github.com/rapidsai/cudf/pull/7429)) [@davidwendt](https://github.com/davidwendt)
+- Add struct column support to cudf::sort and cudf::sorted_order ([#7422](https://github.com/rapidsai/cudf/pull/7422)) [@karthikeyann](https://github.com/karthikeyann)
+- Implement groupby collect_set ([#7420](https://github.com/rapidsai/cudf/pull/7420)) [@ttnghia](https://github.com/ttnghia)
+- Merge branch-0.18 into branch-0.19 ([#7411](https://github.com/rapidsai/cudf/pull/7411)) [@raydouglass](https://github.com/raydouglass)
+- Refactor strings column factories ([#7397](https://github.com/rapidsai/cudf/pull/7397)) [@harrism](https://github.com/harrism)
+- Add groupby scan operations (sort groupby) ([#7387](https://github.com/rapidsai/cudf/pull/7387)) [@karthikeyann](https://github.com/karthikeyann)
+- Add cudf::explode_position ([#7376](https://github.com/rapidsai/cudf/pull/7376)) [@hyperbolic2346](https://github.com/hyperbolic2346)
+- Add string conversion to/from decimal values libcudf APIs ([#7364](https://github.com/rapidsai/cudf/pull/7364)) [@davidwendt](https://github.com/davidwendt)
+- Add  groupby SUM_OF_SQUARES support ([#7362](https://github.com/rapidsai/cudf/pull/7362)) [@karthikeyann](https://github.com/karthikeyann)
+- Add `Series.drop` api ([#7304](https://github.com/rapidsai/cudf/pull/7304)) [@isVoid](https://github.com/isVoid)
+- get_json_object() implementation ([#7286](https://github.com/rapidsai/cudf/pull/7286)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Python API for `LIstMethods.len()` ([#7283](https://github.com/rapidsai/cudf/pull/7283)) [@isVoid](https://github.com/isVoid)
+- Support null_policy::EXCLUDE for COLLECT rolling aggregation ([#7264](https://github.com/rapidsai/cudf/pull/7264)) [@mythrocks](https://github.com/mythrocks)
+- Add support for special tokens in nvtext::subword_tokenizer ([#7254](https://github.com/rapidsai/cudf/pull/7254)) [@davidwendt](https://github.com/davidwendt)
+- Fix inplace update of data and add Series.update ([#7201](https://github.com/rapidsai/cudf/pull/7201)) [@galipremsagar](https://github.com/galipremsagar)
+- Implement `cudf::group_by` (hash) for `decimal32` and `decimal64` ([#7190](https://github.com/rapidsai/cudf/pull/7190)) [@codereport](https://github.com/codereport)
+- Adding support to specify &quot;level&quot; parameter  for `Dataframe.rename` ([#7135](https://github.com/rapidsai/cudf/pull/7135)) [@skirui-source](https://github.com/skirui-source)
+
+## 🛠️ Improvements
+
+- fix GDS include path for version 0.95 ([#7877](https://github.com/rapidsai/cudf/pull/7877)) [@rongou](https://github.com/rongou)
+- Update `dask` + `distributed` to `2021.4.0` ([#7858](https://github.com/rapidsai/cudf/pull/7858)) [@jakirkham](https://github.com/jakirkham)
+- Add ability to extract include dirs from `CUDF_HOME` ([#7848](https://github.com/rapidsai/cudf/pull/7848)) [@galipremsagar](https://github.com/galipremsagar)
+- Add USE_GDS as an option in build script ([#7833](https://github.com/rapidsai/cudf/pull/7833)) [@pxLi](https://github.com/pxLi)
+- add an allocate method with stream in java DeviceMemoryBuffer ([#7826](https://github.com/rapidsai/cudf/pull/7826)) [@rongou](https://github.com/rongou)
+- Constrain dask and distributed versions to 2021.3.1 ([#7825](https://github.com/rapidsai/cudf/pull/7825)) [@shwina](https://github.com/shwina)
+- Revert dask versioning of concat dispatch ([#7823](https://github.com/rapidsai/cudf/pull/7823)) [@galipremsagar](https://github.com/galipremsagar)
+- add copy methods in Java memory buffer ([#7791](https://github.com/rapidsai/cudf/pull/7791)) [@rongou](https://github.com/rongou)
+- Update README and CONTRIBUTING for 0.19 ([#7778](https://github.com/rapidsai/cudf/pull/7778)) [@robertmaynard](https://github.com/robertmaynard)
+- Allow hash_partition to take a seed value ([#7771](https://github.com/rapidsai/cudf/pull/7771)) [@magnatelee](https://github.com/magnatelee)
+- Turn on NVTX by default in java build ([#7761](https://github.com/rapidsai/cudf/pull/7761)) [@tgravescs](https://github.com/tgravescs)
+- Add Java bindings to join gather map APIs ([#7751](https://github.com/rapidsai/cudf/pull/7751)) [@jlowe](https://github.com/jlowe)
+- Add replacements column support for Java replaceNulls ([#7750](https://github.com/rapidsai/cudf/pull/7750)) [@jlowe](https://github.com/jlowe)
+- Add Java bindings for row_bit_count ([#7749](https://github.com/rapidsai/cudf/pull/7749)) [@jlowe](https://github.com/jlowe)
+- Remove unused JVM array creation ([#7748](https://github.com/rapidsai/cudf/pull/7748)) [@jlowe](https://github.com/jlowe)
+- Added JNI support for new is_integer ([#7739](https://github.com/rapidsai/cudf/pull/7739)) [@revans2](https://github.com/revans2)
+- Create and promote library aliases in libcudf installations ([#7734](https://github.com/rapidsai/cudf/pull/7734)) [@trxcllnt](https://github.com/trxcllnt)
+- Support groupby operations for decimal dtypes ([#7731](https://github.com/rapidsai/cudf/pull/7731)) [@vyasr](https://github.com/vyasr)
+- Memory map the input file only when GDS compatiblity mode is not used ([#7717](https://github.com/rapidsai/cudf/pull/7717)) [@vuule](https://github.com/vuule)
+- Replace device_vector with device_uvector in null_mask ([#7715](https://github.com/rapidsai/cudf/pull/7715)) [@harrism](https://github.com/harrism)
+- Struct hashing support for SerialMurmur3 and SparkMurmur3 ([#7714](https://github.com/rapidsai/cudf/pull/7714)) [@jlowe](https://github.com/jlowe)
+- Add gbenchmark for nvtext replace-tokens function ([#7708](https://github.com/rapidsai/cudf/pull/7708)) [@davidwendt](https://github.com/davidwendt)
+- Use stream in groupby calls ([#7705](https://github.com/rapidsai/cudf/pull/7705)) [@karthikeyann](https://github.com/karthikeyann)
+- Update codeowners file ([#7701](https://github.com/rapidsai/cudf/pull/7701)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Cleanup groupby to use host_span, device_span, device_uvector ([#7698](https://github.com/rapidsai/cudf/pull/7698)) [@karthikeyann](https://github.com/karthikeyann)
+- Add gbenchmark for nvtext ngrams functions ([#7693](https://github.com/rapidsai/cudf/pull/7693)) [@davidwendt](https://github.com/davidwendt)
+- Misc Python/Cython optimizations ([#7686](https://github.com/rapidsai/cudf/pull/7686)) [@shwina](https://github.com/shwina)
+- Add gbenchmark for nvtext tokenize functions ([#7684](https://github.com/rapidsai/cudf/pull/7684)) [@davidwendt](https://github.com/davidwendt)
+- Add column_device_view to orc writer ([#7676](https://github.com/rapidsai/cudf/pull/7676)) [@kaatish](https://github.com/kaatish)
+- cudf_kafka now uses cuDF CMake export targets (CPM) ([#7674](https://github.com/rapidsai/cudf/pull/7674)) [@robertmaynard](https://github.com/robertmaynard)
+- Add gbenchmark for nvtext normalize functions ([#7668](https://github.com/rapidsai/cudf/pull/7668)) [@davidwendt](https://github.com/davidwendt)
+- Resolve unnecessary import of thrust/optional.hpp in types.hpp ([#7667](https://github.com/rapidsai/cudf/pull/7667)) [@vyasr](https://github.com/vyasr)
+- Feature/optimize accessor copy ([#7660](https://github.com/rapidsai/cudf/pull/7660)) [@vyasr](https://github.com/vyasr)
+- Fix `find_package(cudf)` ([#7658](https://github.com/rapidsai/cudf/pull/7658)) [@trxcllnt](https://github.com/trxcllnt)
+- Work-around for gcc7 compile error on Centos7 ([#7652](https://github.com/rapidsai/cudf/pull/7652)) [@davidwendt](https://github.com/davidwendt)
+- Add in JNI support for count_elements ([#7651](https://github.com/rapidsai/cudf/pull/7651)) [@revans2](https://github.com/revans2)
+- Fix issues with building cudf in a non-conda environment ([#7647](https://github.com/rapidsai/cudf/pull/7647)) [@galipremsagar](https://github.com/galipremsagar)
+- Refactor ConfigureCUDA to not conditionally insert compiler flags ([#7643](https://github.com/rapidsai/cudf/pull/7643)) [@robertmaynard](https://github.com/robertmaynard)
+- Add gbenchmark for converting strings to/from timestamps ([#7641](https://github.com/rapidsai/cudf/pull/7641)) [@davidwendt](https://github.com/davidwendt)
+- Handle constructing a `cudf.Scalar` from a `cudf.Scalar` ([#7639](https://github.com/rapidsai/cudf/pull/7639)) [@shwina](https://github.com/shwina)
+- Add in JNI support for table partition ([#7637](https://github.com/rapidsai/cudf/pull/7637)) [@revans2](https://github.com/revans2)
+- Add explicit fixed_point merge test ([#7635](https://github.com/rapidsai/cudf/pull/7635)) [@codereport](https://github.com/codereport)
+- Add JNI support for IDENTITY hash partitioning ([#7626](https://github.com/rapidsai/cudf/pull/7626)) [@revans2](https://github.com/revans2)
+- Java support on explode_outer ([#7625](https://github.com/rapidsai/cudf/pull/7625)) [@sperlingxx](https://github.com/sperlingxx)
+- Java support of casting string from/to decimal ([#7623](https://github.com/rapidsai/cudf/pull/7623)) [@sperlingxx](https://github.com/sperlingxx)
+- Convert cudf::concatenate APIs to use spans and device_uvector ([#7621](https://github.com/rapidsai/cudf/pull/7621)) [@harrism](https://github.com/harrism)
+- Add gbenchmark for cudf::strings::translate function ([#7617](https://github.com/rapidsai/cudf/pull/7617)) [@davidwendt](https://github.com/davidwendt)
+- Use file(COPY ) over file(INSTALL ) so cmake output is reduced ([#7616](https://github.com/rapidsai/cudf/pull/7616)) [@robertmaynard](https://github.com/robertmaynard)
+- Use rmm::device_uvector in place of rmm::device_vector for ORC reader/writer and cudf::io::column_buffer ([#7614](https://github.com/rapidsai/cudf/pull/7614)) [@vuule](https://github.com/vuule)
+- Refactor Java host-side buffer concatenation to expose separate steps ([#7610](https://github.com/rapidsai/cudf/pull/7610)) [@jlowe](https://github.com/jlowe)
+- Add gbenchmarks for string substrings functions ([#7603](https://github.com/rapidsai/cudf/pull/7603)) [@davidwendt](https://github.com/davidwendt)
+- Refactor string conversion check ([#7599](https://github.com/rapidsai/cudf/pull/7599)) [@ttnghia](https://github.com/ttnghia)
+- JNI: Pass names of children struct columns to native Arrow IPC writer ([#7598](https://github.com/rapidsai/cudf/pull/7598)) [@firestarman](https://github.com/firestarman)
+- Revert &quot;ENH Fix stale GHA and prevent duplicates &quot; ([#7595](https://github.com/rapidsai/cudf/pull/7595)) [@mike-wendt](https://github.com/mike-wendt)
+- ENH Fix stale GHA and prevent duplicates ([#7594](https://github.com/rapidsai/cudf/pull/7594)) [@mike-wendt](https://github.com/mike-wendt)
+- Fix auto-detecting GPU architectures ([#7593](https://github.com/rapidsai/cudf/pull/7593)) [@trxcllnt](https://github.com/trxcllnt)
+- Reduce cudf library size ([#7583](https://github.com/rapidsai/cudf/pull/7583)) [@robertmaynard](https://github.com/robertmaynard)
+- Optimize cudf::make_strings_column for long strings ([#7576](https://github.com/rapidsai/cudf/pull/7576)) [@davidwendt](https://github.com/davidwendt)
+- Always build and export the cudf::cudftestutil target ([#7574](https://github.com/rapidsai/cudf/pull/7574)) [@trxcllnt](https://github.com/trxcllnt)
+- Eliminate literal parameters to uvector::set_element_async and device_scalar::set_value ([#7563](https://github.com/rapidsai/cudf/pull/7563)) [@harrism](https://github.com/harrism)
+- Add gbenchmark for strings::concatenate ([#7560](https://github.com/rapidsai/cudf/pull/7560)) [@davidwendt](https://github.com/davidwendt)
+- Update Changelog Link ([#7550](https://github.com/rapidsai/cudf/pull/7550)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Add gbenchmarks for strings replace regex functions ([#7541](https://github.com/rapidsai/cudf/pull/7541)) [@davidwendt](https://github.com/davidwendt)
+- Add `__repr__` for Column and ColumnAccessor ([#7531](https://github.com/rapidsai/cudf/pull/7531)) [@shwina](https://github.com/shwina)
+- Support Decimal DIV changes in cudf ([#7527](https://github.com/rapidsai/cudf/pull/7527)) [@razajafri](https://github.com/razajafri)
+- Remove unneeded step parameter from strings::detail::copy_slice ([#7525](https://github.com/rapidsai/cudf/pull/7525)) [@davidwendt](https://github.com/davidwendt)
+- Use device_uvector, device_span in sort groupby ([#7523](https://github.com/rapidsai/cudf/pull/7523)) [@karthikeyann](https://github.com/karthikeyann)
+- Add gbenchmarks for strings extract function ([#7522](https://github.com/rapidsai/cudf/pull/7522)) [@davidwendt](https://github.com/davidwendt)
+- Rename ARROW_STATIC_LIB because it conflicts with one in FindArrow.cmake ([#7518](https://github.com/rapidsai/cudf/pull/7518)) [@trxcllnt](https://github.com/trxcllnt)
+- Reduce compile time/size for scan.cu ([#7516](https://github.com/rapidsai/cudf/pull/7516)) [@davidwendt](https://github.com/davidwendt)
+- Change device_vector to device_uvector in nvtext source files ([#7512](https://github.com/rapidsai/cudf/pull/7512)) [@davidwendt](https://github.com/davidwendt)
+- Removed unneeded includes from traits.hpp ([#7509](https://github.com/rapidsai/cudf/pull/7509)) [@davidwendt](https://github.com/davidwendt)
+- FIX Remove random build directory generation for ccache ([#7508](https://github.com/rapidsai/cudf/pull/7508)) [@dillon-cullinan](https://github.com/dillon-cullinan)
+- xfail failing pytest in pandas 1.2.3 ([#7507](https://github.com/rapidsai/cudf/pull/7507)) [@galipremsagar](https://github.com/galipremsagar)
+- JNI bit cast ([#7493](https://github.com/rapidsai/cudf/pull/7493)) [@revans2](https://github.com/revans2)
+- Combine rolling window function tests ([#7480](https://github.com/rapidsai/cudf/pull/7480)) [@mythrocks](https://github.com/mythrocks)
+- Prepare Changelog for Automation ([#7477](https://github.com/rapidsai/cudf/pull/7477)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Java support for explode position ([#7471](https://github.com/rapidsai/cudf/pull/7471)) [@sperlingxx](https://github.com/sperlingxx)
+- Update 0.18 changelog entry ([#7463](https://github.com/rapidsai/cudf/pull/7463)) [@ajschmidt8](https://github.com/ajschmidt8)
+- JNI: Support skipping nulls for collect aggregation ([#7457](https://github.com/rapidsai/cudf/pull/7457)) [@firestarman](https://github.com/firestarman)
+- Join APIs that return gathermaps ([#7454](https://github.com/rapidsai/cudf/pull/7454)) [@shwina](https://github.com/shwina)
+- Remove dependence on managed memory for multimap test ([#7451](https://github.com/rapidsai/cudf/pull/7451)) [@jrhemstad](https://github.com/jrhemstad)
+- Use cuFile for Parquet IO when available ([#7444](https://github.com/rapidsai/cudf/pull/7444)) [@vuule](https://github.com/vuule)
+- Statistics cleanup ([#7439](https://github.com/rapidsai/cudf/pull/7439)) [@kaatish](https://github.com/kaatish)
+- Add gbenchmarks for strings filter functions ([#7438](https://github.com/rapidsai/cudf/pull/7438)) [@davidwendt](https://github.com/davidwendt)
+- `fixed_point` + `cudf::binary_operation` API Changes ([#7435](https://github.com/rapidsai/cudf/pull/7435)) [@codereport](https://github.com/codereport)
+- Improve string gather performance ([#7433](https://github.com/rapidsai/cudf/pull/7433)) [@jlowe](https://github.com/jlowe)
+- Don&#39;t use user resource for a temporary allocation in sort_by_key ([#7431](https://github.com/rapidsai/cudf/pull/7431)) [@magnatelee](https://github.com/magnatelee)
+- Detail APIs for datetime functions ([#7430](https://github.com/rapidsai/cudf/pull/7430)) [@magnatelee](https://github.com/magnatelee)
+- Replace thrust::max_element with thrust::reduce in strings findall_re ([#7428](https://github.com/rapidsai/cudf/pull/7428)) [@davidwendt](https://github.com/davidwendt)
+- Add gbenchmark for strings split/split_record functions ([#7427](https://github.com/rapidsai/cudf/pull/7427)) [@davidwendt](https://github.com/davidwendt)
+- Update JNI build to use CMAKE_CUDA_ARCHITECTURES ([#7425](https://github.com/rapidsai/cudf/pull/7425)) [@jlowe](https://github.com/jlowe)
+- Change nvtext::load_vocabulary_file to return a unique ptr ([#7424](https://github.com/rapidsai/cudf/pull/7424)) [@davidwendt](https://github.com/davidwendt)
+- Simplify type dispatch with `device_storage_dispatch` ([#7419](https://github.com/rapidsai/cudf/pull/7419)) [@codereport](https://github.com/codereport)
+- Java support for casting of nested child columns ([#7417](https://github.com/rapidsai/cudf/pull/7417)) [@razajafri](https://github.com/razajafri)
+- Improve scalar string replace performance for long strings ([#7415](https://github.com/rapidsai/cudf/pull/7415)) [@jlowe](https://github.com/jlowe)
+- Remove unneeded temporary device vector for strings scatter specialization ([#7409](https://github.com/rapidsai/cudf/pull/7409)) [@davidwendt](https://github.com/davidwendt)
+- bitmask_or implementation with bitmask refactor ([#7406](https://github.com/rapidsai/cudf/pull/7406)) [@rwlee](https://github.com/rwlee)
+- Add other cudf::strings::replace functions to current strings replace gbenchmark ([#7403](https://github.com/rapidsai/cudf/pull/7403)) [@davidwendt](https://github.com/davidwendt)
+- Clean up included headers in `device_operators.cuh` ([#7401](https://github.com/rapidsai/cudf/pull/7401)) [@codereport](https://github.com/codereport)
+- Move nullable index iterator to indexalator factory ([#7399](https://github.com/rapidsai/cudf/pull/7399)) [@davidwendt](https://github.com/davidwendt)
+- ENH Pass ccache variables to conda recipe &amp; use Ninja in CI ([#7398](https://github.com/rapidsai/cudf/pull/7398)) [@Ethyling](https://github.com/Ethyling)
+- upgrade maven-antrun-plugin to support maven parallel builds ([#7393](https://github.com/rapidsai/cudf/pull/7393)) [@rongou](https://github.com/rongou)
+- Add gbenchmark for strings find/contains functions ([#7392](https://github.com/rapidsai/cudf/pull/7392)) [@davidwendt](https://github.com/davidwendt)
+- Use CMAKE_CUDA_ARCHITECTURES ([#7391](https://github.com/rapidsai/cudf/pull/7391)) [@robertmaynard](https://github.com/robertmaynard)
+- Refactor libcudf strings::replace to use make_strings_children utility ([#7384](https://github.com/rapidsai/cudf/pull/7384)) [@davidwendt](https://github.com/davidwendt)
+- Added in JNI support for out of core sort algorithm ([#7381](https://github.com/rapidsai/cudf/pull/7381)) [@revans2](https://github.com/revans2)
+- Upgrade pandas to 1.2 ([#7375](https://github.com/rapidsai/cudf/pull/7375)) [@galipremsagar](https://github.com/galipremsagar)
+- Rename `logical_cast` to `bit_cast` and allow additional conversions ([#7373](https://github.com/rapidsai/cudf/pull/7373)) [@ttnghia](https://github.com/ttnghia)
+- jitify 2 support ([#7372](https://github.com/rapidsai/cudf/pull/7372)) [@cwharris](https://github.com/cwharris)
+- compile_udf: Cache PTX for similar functions ([#7371](https://github.com/rapidsai/cudf/pull/7371)) [@gmarkall](https://github.com/gmarkall)
+- Add string scalar replace benchmark ([#7369](https://github.com/rapidsai/cudf/pull/7369)) [@jlowe](https://github.com/jlowe)
+- Add gbenchmark for strings contains_re/count_re functions ([#7366](https://github.com/rapidsai/cudf/pull/7366)) [@davidwendt](https://github.com/davidwendt)
+- Update orc reader and writer fuzz tests ([#7357](https://github.com/rapidsai/cudf/pull/7357)) [@galipremsagar](https://github.com/galipremsagar)
+- Improve url_decode performance for long strings ([#7353](https://github.com/rapidsai/cudf/pull/7353)) [@jlowe](https://github.com/jlowe)
+- `cudf::ast` Small Refactorings ([#7352](https://github.com/rapidsai/cudf/pull/7352)) [@codereport](https://github.com/codereport)
+- Remove std::cout and print in the scatter test function EmptyListsOfNullableStrings. ([#7342](https://github.com/rapidsai/cudf/pull/7342)) [@ttnghia](https://github.com/ttnghia)
+- Use `cudf::detail::make_counting_transform_iterator` ([#7338](https://github.com/rapidsai/cudf/pull/7338)) [@codereport](https://github.com/codereport)
+- Change block size parameter from a global to a template param. ([#7333](https://github.com/rapidsai/cudf/pull/7333)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Partial clean up of ORC writer ([#7324](https://github.com/rapidsai/cudf/pull/7324)) [@vuule](https://github.com/vuule)
+- Add gbenchmark for cudf::strings::to_lower ([#7316](https://github.com/rapidsai/cudf/pull/7316)) [@davidwendt](https://github.com/davidwendt)
+- Update Java bindings version to 0.19-SNAPSHOT ([#7307](https://github.com/rapidsai/cudf/pull/7307)) [@pxLi](https://github.com/pxLi)
+- Move `cudf::test::make_counting_transform_iterator` to `cudf/detail/iterator.cuh` ([#7306](https://github.com/rapidsai/cudf/pull/7306)) [@codereport](https://github.com/codereport)
+- Use string literals in `fixed_point` `release_assert`s ([#7303](https://github.com/rapidsai/cudf/pull/7303)) [@codereport](https://github.com/codereport)
+- Fix merge conflicts for #7295 ([#7297](https://github.com/rapidsai/cudf/pull/7297)) [@ajschmidt8](https://github.com/ajschmidt8)
+- Add UTF-8 chars to create_random_column&lt;string_view&gt; benchmark utility ([#7292](https://github.com/rapidsai/cudf/pull/7292)) [@davidwendt](https://github.com/davidwendt)
+- Abstracting block reduce and block scan from cuIO kernels with `cub` apis ([#7278](https://github.com/rapidsai/cudf/pull/7278)) [@rgsl888prabhu](https://github.com/rgsl888prabhu)
+- Build.sh use cmake --build to drive build system invocation ([#7270](https://github.com/rapidsai/cudf/pull/7270)) [@robertmaynard](https://github.com/robertmaynard)
+- Refactor dictionary support for reductions any/all ([#7242](https://github.com/rapidsai/cudf/pull/7242)) [@davidwendt](https://github.com/davidwendt)
+- Replace stream.value() with stream for stream_view args ([#7236](https://github.com/rapidsai/cudf/pull/7236)) [@karthikeyann](https://github.com/karthikeyann)
+- Interval index and interval_range ([#7182](https://github.com/rapidsai/cudf/pull/7182)) [@marlenezw](https://github.com/marlenezw)
+- avro reader integration tests ([#7156](https://github.com/rapidsai/cudf/pull/7156)) [@cwharris](https://github.com/cwharris)
+- Rework libcudf CMakeLists.txt to export targets for CPM ([#7107](https://github.com/rapidsai/cudf/pull/7107)) [@trxcllnt](https://github.com/trxcllnt)
+- Adding Interval Dtype ([#6984](https://github.com/rapidsai/cudf/pull/6984)) [@marlenezw](https://github.com/marlenezw)
+- Cleaning up `for` loops with `make_(counting_)transform_iterator` ([#6546](https://github.com/rapidsai/cudf/pull/6546)) [@codereport](https://github.com/codereport)
 
 # cuDF 0.18.0 (24 Feb 2021)
 
diff --git a/README.md b/README.md
index ed4277e1fcb..c0c33c645e1 100644
--- a/README.md
+++ b/README.md
@@ -65,15 +65,16 @@ Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapids
 
 cuDF can be installed with conda ([miniconda](https://conda.io/miniconda.html), or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel:
 
-For `cudf version == 21.081.06` :
+<<<<<<< HEAD
+For `cudf version == 21.06` :
 ```bash
-# for CUDA 10.1
+# for CUDA 11.0
 conda install -c rapidsai -c nvidia -c numba -c conda-forge \
-    cudf=21.081.06 python=3.7 cudatoolkit=10.1
+    cudf=21.06 python=3.7 cudatoolkit=11.0
 
-# or, for CUDA 10.2
+# or, for CUDA 11.2
 conda install -c rapidsai -c nvidia -c numba -c conda-forge \
-    cudf=21.081.06 python=3.7 cudatoolkit=10.2
+    cudf=21.06 python=3.7 cudatoolkit=11.2
 
 ```
 
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 39f2ba3188c..631ebf16aea 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -3,7 +3,7 @@
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version =  version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version=environ.get('CONDA_PY', 36) %}
-{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %}
+{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %}
 
 package:
   name: cudf
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index 35dfb1791d8..b59a49b0db7 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -3,7 +3,7 @@
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version =  version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version=environ.get('CONDA_PY', 36) %}
-{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %}
+{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %}
 
 package:
   name: cudf_kafka
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index 0ae0ce830ad..bb5186d7057 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -3,7 +3,7 @@
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version =  version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version=environ.get('CONDA_PY', 36) %}
-{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %}
+{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %}
 
 package:
   name: custreamz
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index e66b4c930ec..14376f54ba1 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -3,7 +3,7 @@
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version =  version.split('.')[0] + '.' + version.split('.')[1] %}
 {% set py_version=environ.get('CONDA_PY', 36) %}
-{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %}
+{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %}
 
 package:
   name: dask-cudf
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index ea2fda399fd..a8abe5b09f0 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -2,7 +2,7 @@
 
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
 {% set minor_version =  version.split('.')[0] + '.' + version.split('.')[1] %}
-{% set cuda_version='.'.join(environ.get('CUDA_VERSION', '10.1').split('.')[:2]) %}
+{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %}
 
 package:
   name: libcudf
@@ -133,12 +133,14 @@ test:
     - test -f $PREFIX/include/cudf/io/types.hpp
     - test -f $PREFIX/include/cudf/ipc.hpp
     - test -f $PREFIX/include/cudf/join.hpp
+    - test -f $PREFIX/include/cudf/lists/detail/combine.hpp
     - test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
     - test -f $PREFIX/include/cudf/lists/detail/copying.hpp
+    - test -f $PREFIX/include/cudf/lists/lists_column_factories.hpp
     - test -f $PREFIX/include/cudf/lists/detail/drop_list_duplicates.hpp
     - test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp
     - test -f $PREFIX/include/cudf/lists/detail/sorting.hpp
-    - test -f $PREFIX/include/cudf/lists/concatenate_rows.hpp
+    - test -f $PREFIX/include/cudf/lists/combine.hpp
     - test -f $PREFIX/include/cudf/lists/count_elements.hpp
     - test -f $PREFIX/include/cudf/lists/explode.hpp
     - test -f $PREFIX/include/cudf/lists/drop_list_duplicates.hpp
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2220a1b1a2c..8620531ec22 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -266,7 +266,8 @@ add_library(cudf
     src/join/join.cu
     src/join/semi_join.cu
     src/lists/contains.cu
-    src/lists/concatenate_rows.cu
+    src/lists/combine/concatenate_list_elements.cu		
+    src/lists/combine/concatenate_rows.cu
     src/lists/copying/concatenate.cu
     src/lists/copying/copying.cu
     src/lists/copying/gather.cu
@@ -332,8 +333,8 @@ add_library(cudf
     src/strings/char_types/char_cases.cu
     src/strings/char_types/char_types.cu
     src/strings/combine/concatenate.cu
-    src/strings/combine/concatenate_list_elements.cu
     src/strings/combine/join.cu
+    src/strings/combine/join_list_elements.cu
     src/strings/contains.cu
     src/strings/convert/convert_booleans.cu
     src/strings/convert/convert_datetime.cu
diff --git a/cpp/cmake/thrust.patch b/cpp/cmake/thrust.patch
index c14b8cdafe5..2f9201d8ab4 100644
--- a/cpp/cmake/thrust.patch
+++ b/cpp/cmake/thrust.patch
@@ -81,25 +81,3 @@ index c0c6d59..937ee31 100644
      {
          typedef AgentScanPolicy<
                  128, 15,                                        ///< Threads per block, items per thread
-diff --git a/thrust/system/cuda/detail/scan_by_key.h b/thrust/system/cuda/detail/scan_by_key.h
-index fe4b321c..b3974c69 100644
---- a/thrust/system/cuda/detail/scan_by_key.h
-+++ b/thrust/system/cuda/detail/scan_by_key.h
-@@ -513,7 +513,7 @@ namespace __scan_by_key {
-             scan_op(scan_op_)
-       {
-         int  tile_idx      = blockIdx.x;
--        Size tile_base     = ITEMS_PER_TILE * tile_idx;
-+        Size tile_base     = ITEMS_PER_TILE * static_cast<Size>(tile_idx);
-         Size num_remaining = num_items - tile_base;
- 
-         if (num_remaining > ITEMS_PER_TILE)
-@@ -734,7 +734,7 @@ namespace __scan_by_key {
-                              ScanOp                     scan_op,
-                              AddInitToScan              add_init_to_scan)
-   {
--    int          num_items    = static_cast<int>(thrust::distance(keys_first, keys_last));
-+    size_t       num_items    = static_cast<size_t>(thrust::distance(keys_first, keys_last));
-     size_t       storage_size = 0;
-     cudaStream_t stream       = cuda_cub::stream(policy);
-     bool         debug_sync   = THRUST_DEBUG_SYNC_FLAG;
diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index 43c2407d629..e5424f0fc44 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -541,7 +541,8 @@ std::unique_ptr<cudf::column> make_structs_column(
  *
  * The output column will have the same type as `s.type()`
  * The output column will contain all null rows if `s.invalid()==false`
- * The output column will be empty if `size==0`.
+ * The output column will be empty if `size==0`. For LIST scalars, the column hierarchy
+ * from @p s is preserved.
  *
  * @param[in] s The scalar to use for values in the column.
  * @param[in] size The number of rows for the output column.
diff --git a/cpp/include/cudf/lists/concatenate_rows.hpp b/cpp/include/cudf/lists/combine.hpp
similarity index 57%
rename from cpp/include/cudf/lists/concatenate_rows.hpp
rename to cpp/include/cudf/lists/combine.hpp
index 1d93de418f8..a9407ed57ca 100644
--- a/cpp/include/cudf/lists/concatenate_rows.hpp
+++ b/cpp/include/cudf/lists/combine.hpp
@@ -21,7 +21,7 @@
 namespace cudf {
 namespace lists {
 /**
- * @addtogroup lists_concatenate_rows
+ * @addtogroup lists_combine
  * @{
  * @file
  */
@@ -53,16 +53,47 @@ enum class concatenate_null_policy { IGNORE, NULLIFY_OUTPUT_ROW };
  *
  * @param input Table of lists to be concatenated.
  * @param null_policy The parameter to specify whether a null list element will be ignored from
- * concatenation, or any concatenation involving a null list element will result in a null list.
+ *        concatenation, or any concatenation involving a null element will result in a null list.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return A new column in which each row is a list resulted from concatenating all list elements in
- * the corresponding row of the input table.
+ *         the corresponding row of the input table.
  */
 std::unique_ptr<column> concatenate_rows(
   table_view const& input,
   concatenate_null_policy null_policy = concatenate_null_policy::IGNORE,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Concatenating multiple lists on the same row of a lists column into a single list.
+ *
+ * Given a lists column where each row in the column is a list of lists of entries, an output lists
+ * column is generated by concatenating all the list elements at the same row together. If any row
+ * contains null list elements, the concatenation process will either ignore those null elements, or
+ * will simply set the entire resulting row to be a null element.
+ *
+ * @code{.pseudo}
+ * l = [ [{1, 2}, {3, 4}, {5}], [{6}, {}, {7, 8, 9}] ]
+ * r = lists::concatenate_list_elements(l);
+ * r is [ {1, 2, 3, 4, 5}, {6, 7, 8, 9} ]
+ * @endcode
+ *
+ * @throws cudf::logic_error if the input column is not at least two-level depth lists column (i.e.,
+ *         each row must be a list of list).
+ * @throws cudf::logic_error if the input lists column contains nested typed entries that are not
+ *         lists.
+ *
+ * @param input The lists column containing lists of list elements to concatenate.
+ * @param null_policy The parameter to specify whether a null list element will be ignored from
+ *        concatenation, or any concatenation involving a null element will result in a null list.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return A new column in which each row is a list resulted from concatenating all list elements in
+ *         the corresponding row of the input lists column.
+ */
+std::unique_ptr<column> concatenate_list_elements(
+  column_view const& input,
+  concatenate_null_policy null_policy = concatenate_null_policy::IGNORE,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of group
 }  // namespace lists
 }  // namespace cudf
diff --git a/cpp/include/cudf/lists/detail/combine.hpp b/cpp/include/cudf/lists/detail/combine.hpp
new file mode 100644
index 00000000000..9f28074173a
--- /dev/null
+++ b/cpp/include/cudf/lists/detail/combine.hpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/lists/combine.hpp>
+#include <cudf/lists/lists_column_view.hpp>
+
+namespace cudf {
+namespace lists {
+namespace detail {
+/**
+ * @copydoc cudf::lists::concatenate_rows
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> concatenate_rows(
+  table_view const& input,
+  concatenate_null_policy null_policy,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @copydoc cudf::lists::concatenate_list_elements
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> concatenate_list_elements(
+  column_view const& input,
+  concatenate_null_policy null_policy,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+}  // namespace detail
+}  // namespace lists
+}  // namespace cudf
diff --git a/cpp/include/cudf/lists/detail/copying.hpp b/cpp/include/cudf/lists/detail/copying.hpp
index 548fec7e7f6..3760294f079 100644
--- a/cpp/include/cudf/lists/detail/copying.hpp
+++ b/cpp/include/cudf/lists/detail/copying.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -48,21 +48,6 @@ std::unique_ptr<cudf::column> copy_slice(lists_column_view const& lists,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr);
 
-/**
- * @brief Create a single-level empty lists column.
- *
- * An empty lists column contains empty children so the column's
- * basic type is recorded.
- *
- * @param child_type The type used for the child column.
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New empty lists column.
- */
-std::unique_ptr<cudf::column> make_empty_lists_column(data_type child_type,
-                                                      rmm::cuda_stream_view stream,
-                                                      rmm::mr::device_memory_resource* mr);
-
 }  // namespace detail
 }  // namespace lists
 }  // namespace cudf
diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh
index b179ccf228b..aec45d260bf 100644
--- a/cpp/include/cudf/lists/detail/scatter.cuh
+++ b/cpp/include/cudf/lists/detail/scatter.cuh
@@ -526,10 +526,7 @@ struct list_child_constructor {
 
     if (num_child_rows == 0) {
       // make an empty lists column using the input child type
-      return make_empty_lists_column(
-        source_lists_column_view.child().child(lists_column_view::child_column_index).type(),
-        stream,
-        mr);
+      return empty_like(source_lists_column_view.child());
     }
 
     auto child_list_views = rmm::device_uvector<unbound_list_view>(num_child_rows, stream, mr);
diff --git a/cpp/include/cudf/lists/lists_column_factories.hpp b/cpp/include/cudf/lists/lists_column_factories.hpp
new file mode 100644
index 00000000000..bdf06cfa9e7
--- /dev/null
+++ b/cpp/include/cudf/lists/lists_column_factories.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/types.hpp>
+
+namespace cudf {
+namespace lists {
+namespace detail {
+
+/**
+ * @brief Internal API to construct a lists column from a `list_scalar`, for public
+ * use, use `cudf::make_column_from_scalar`.
+ *
+ * @param[in] value The `list_scalar` to construct from
+ * @param[in] size The number of rows for the output column.
+ * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ * @param[in] mr Device memory resource used to allocate the returned column's device memory.
+ */
+std::unique_ptr<cudf::column> make_lists_column_from_scalar(
+  list_scalar const& value,
+  size_type size,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+}  // namespace detail
+}  // namespace lists
+}  // namespace cudf
diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp
index 6887ef0e670..360efe15303 100644
--- a/cpp/include/cudf/strings/combine.hpp
+++ b/cpp/include/cudf/strings/combine.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,12 +30,21 @@ namespace strings {
  * @brief Strings APIs for concatenate and join
  */
 
+/**
+ * @brief Setting for specifying how separators are added with
+ * null strings elements.
+ */
+enum class separator_on_nulls {
+  YES,  ///< Always add separators between elements
+  NO    ///< Do not add separators if an element is null
+};
+
 /**
  * @brief Concatenates all strings in the column into one new string delimited
  * by an optional separator string.
  *
  * This returns a column with one string. Any null entries are ignored unless
- * the narep parameter specifies a replacement string.
+ * the @p narep parameter specifies a replacement string.
  *
  * @code{.pseudo}
  * Example:
@@ -70,11 +79,9 @@ std::unique_ptr<column> join_strings(
  *
  * - If row separator for a given row is null, output column for that row is null, unless
  *   there is a valid @p separator_narep
- * - If all column values for a given row is null, output column for that row is null, unless
- *   there is a valid @p col_narep
- * - null column values for a given row are skipped, if the column replacement isn't valid
- * - The separator is only applied between two valid column values
- * - If valid @p separator_narep and @p col_narep are provided, the output column is always
+ * - The separator is applied between two output row values if the @p separate_nulls
+ *   is `YES` or only between valid rows if @p separate_nulls is `NO`.
+ * - If @p separator_narep and @p col_narep are both valid, the output column is always
  *   non nullable
  *
  * @code{.pseudo}
@@ -83,16 +90,25 @@ std::unique_ptr<column> join_strings(
  * c1   = [null, 'cc', 'dd', null, null, 'gg']
  * c2   = ['bb', '',   null, null, null, 'hh']
  * sep  = ['::', '%%', '^^', '!',  '*',  null]
- * out0 = concatenate([c0, c1, c2], sep)
- * out0 is ['aa::bb', 'cc%%', '^^dd', 'ee', null, null]
+ * out = concatenate({c0, c1, c2}, sep)
+ * // all rows have at least one null or sep[i]==null
+ * out is [null, null, null, null, null, null]
  *
  * sep_rep = '+'
- * out1    = concatenate([c0, c1, c2], sep, sep_rep)
- * out1 is ['aa::bb', 'cc%%', '^^dd', 'ee', null, 'ff+gg+hh']
- *
- * col_rep = '-'
- * out2    = concatenate([c0, c1, c2], sep, invalid_sep_rep, col_rep)
- * out2 is ['aa::-::bb', '-%%cc%%', '^^dd^^-', 'ee!-!-', '-*-*-', null]
+ * out = concatenate({c0, c1, c2}, sep, sep_rep)
+ * // all rows with at least one null output as null
+ * out is [null, null, null, null, null, 'ff+gg+hh']
+ *
+ * col_narep = '-'
+ * sep_na = non-valid scalar
+ * out = concatenate({c0, c1, c2}, sep, sep_na, col_narep)
+ * // only the null entry in the sep column produces a null row
+ * out is ['aa::-::bb', '-%%cc%%', '^^dd^^-', 'ee!-!-', '-*-*-', null]
+ *
+ * col_narep = ''
+ * out = concatenate({c0, c1, c2}, sep, sep_rep, col_narep, separator_on_nulls:NO)
+ * // parameter suppresses separator for null rows
+ * out is ['aa::bb', 'cc%%', '^^dd', 'ee', '', 'ff+gg+hh']
  * @endcode
  *
  * @throw cudf::logic_error if no input columns are specified - table view is empty
@@ -108,6 +124,8 @@ std::unique_ptr<column> join_strings(
  * @param col_narep String that should be used in place of any null strings
  *        found in any column. Default of invalid-scalar means no null column value replacements.
  *        Default is an invalid string.
+ * @param separate_nulls If YES, then the separator is included for null rows
+ *        if `col_narep` is valid.
  * @param mr Resource for allocating device memory.
  * @return New column with concatenated results.
  */
@@ -116,15 +134,9 @@ std::unique_ptr<column> concatenate(
   strings_column_view const& separators,
   string_scalar const& separator_narep = string_scalar("", false),
   string_scalar const& col_narep       = string_scalar("", false),
+  separator_on_nulls separate_nulls    = separator_on_nulls::YES,
   rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource());
 
-/**
- * @addtogroup strings_combine
- * @{
- * @file strings/combine.hpp
- * @brief Strings APIs for concatenate and join
- */
-
 /**
  * @brief Row-wise concatenates the given list of strings columns and
  * returns a single strings column result.
@@ -136,20 +148,30 @@ std::unique_ptr<column> concatenate(
  * row to be null entry unless a narep string is specified to be used
  * in its place.
  *
- * The number of strings in the columns provided must be the same.
+ * If @p separate_nulls is set to `NO` and @p narep is valid then
+ * separators are not added to the output between null elements.
+ * Otherwise, separators are always added if @p narep is valid.
+ *
+ * More than one column must be specified in the input @p strings_columns
+ * table.
  *
  * @code{.pseudo}
  * Example:
- * s1 = ['aa', null, '', 'aa']
- * s2 = ['', 'bb', 'bb', null]
- * r1 = concatenate([s1,s2])
- * r1 is ['aa', null, 'bb', null]
- * r2 = concatenate([s1,s2],':','_')
- * r2 is ['aa:', '_:bb', ':bb', 'aa:_']
+ * s1 = ['aa', null, '', 'dd']
+ * s2 = ['', 'bb', 'cc', null]
+ * out = concatenate({s1, s2})
+ * out is ['aa', null, 'cc', null]
+ *
+ * out = concatenate({s1, s2}, ':', '_')
+ * out is ['aa:', '_:bb', ':cc', 'dd:_']
+ *
+ * out = concatenate({s1, s2}, ':', '', separator_on_nulls::NO)
+ * out is ['aa:', 'bb', ':cc', 'dd']
  * @endcode
  *
  * @throw cudf::logic_error if input columns are not all strings columns.
  * @throw cudf::logic_error if separator is not valid.
+ * @throw cudf::logic_error if only one column is specified
  *
  * @param strings_columns List of string columns to concatenate.
  * @param separator String that should inserted between each string from each row.
@@ -157,6 +179,7 @@ std::unique_ptr<column> concatenate(
  * @param narep String that should be used in place of any null strings
  *        found in any column. Default of invalid-scalar means any null entry in any column will
  *        produces a null result for that row.
+ * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with concatenated results.
  */
@@ -164,6 +187,7 @@ std::unique_ptr<column> concatenate(
   table_view const& strings_columns,
   string_scalar const& separator      = string_scalar(""),
   string_scalar const& narep          = string_scalar("", false),
+  separator_on_nulls separate_nulls   = separator_on_nulls::YES,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -171,24 +195,30 @@ std::unique_ptr<column> concatenate(
  * within each row and returns a single strings column result.
  *
  * Each new string is created by concatenating the strings from the same row (same list element)
- * delimited by the row separator provided in the `separators` strings column.
+ * delimited by the row separator provided in the @p separators strings column.
  *
  * A null list row will always result in a null string in the output row. Any non-null list row
  * having a null element will result in the corresponding output row to be null unless a valid
- * `string_narep` scalar is provided to be used in its place. Any null row in the `separators`
- * column will also result in a null output row unless a valid `separator_narep` scalar is provided
+ * @p string_narep scalar is provided to be used in its place. Any null row in the @p separators
+ * column will also result in a null output row unless a valid @p separator_narep scalar is provided
  * to be used in place of the null separators.
  *
+ * If @p separate_nulls is set to `NO` and @p narep is valid then separators are not added to the
+ * output between null elements. Otherwise, separators are always added if @p narep is valid.
+ *
  * @code{.pseudo}
  * Example:
- * s = [ {'aa', 'bb', 'cc'}, null, {'', 'dd'}, {'ee', null}, {'ff', 'gg'} ]
+ * s = [ ['aa', 'bb', 'cc'], null, ['', 'dd'], ['ee', null], ['ff', 'gg'] ]
  * sep  = ['::', '%%',  '!',  '*',  null]
  *
- * r1 = strings::concatenate_list_elements(s, sep)
- * r1 is ['aa::bb::cc', null, '!dd', null, null]
+ * out = join_list_elements(s, sep)
+ * out is ['aa::bb::cc', null, '!dd', null, null]
+ *
+ * out = join_list_elements(s, sep, ':', '_')
+ * out is ['aa::bb::cc', null,  '!dd', 'ee*_', 'ff:gg']
  *
- * r2 = strings::concatenate_list_elements(s, sep, ':', '_')
- * r2 is ['aa::bb::cc', null,  '!dd', 'ee*_', 'ff:gg']
+ * out = join_list_elements(s, sep, ':', '', separator_on_nulls::NO)
+ * out is ['aa::bb::cc', null,  '!dd', 'ee', 'ff:gg']
  * @endcode
  *
  * @throw cudf::logic_error if input column is not lists of strings column.
@@ -203,14 +233,16 @@ std::unique_ptr<column> concatenate(
  * @param string_narep String that should be used to replace null strings in any non-null list row,
  *        default is an invalid-scalar denoting that list rows containing null strings will result
  *        in null string in the corresponding output rows.
+ * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column with concatenated results.
  */
-std::unique_ptr<column> concatenate_list_elements(
+std::unique_ptr<column> join_list_elements(
   const lists_column_view& lists_strings_column,
   const strings_column_view& separators,
   string_scalar const& separator_narep = string_scalar("", false),
   string_scalar const& string_narep    = string_scalar("", false),
+  separator_on_nulls separate_nulls    = separator_on_nulls::YES,
   rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource());
 
 /**
@@ -218,21 +250,27 @@ std::unique_ptr<column> concatenate_list_elements(
  * within each row and returns a single strings column result.
  *
  * Each new string is created by concatenating the strings from the same row (same list element)
- * delimited by the separator provided.
+ * delimited by the @p separator provided.
  *
  * A null list row will always result in a null string in the output row. Any non-null list row
- * having a null elenent will result in the corresponding output row to be null unless a narep
- * string is specified to be used in its place.
+ * having a null elenent will result in the corresponding output row to be null unless a
+ * @p narep string is specified to be used in its place.
+ *
+ * If @p separate_nulls is set to `NO` and @p narep is valid then separators are not added to the
+ * output between null elements. Otherwise, separators are always added if @p narep is valid.
  *
  * @code{.pseudo}
  * Example:
- * s = [ {'aa', 'bb', 'cc'}, null, {'', 'dd'}, {'ee', null}, {'ff'} ]
+ * s = [ ['aa', 'bb', 'cc'], null, ['', 'dd'], ['ee', null], ['ff'] ]
+ *
+ * out = join_list_elements(s)
+ * out is ['aabbcc', null, 'dd', null, 'ff']
  *
- * r1 = strings::concatenate_list_elements(s)
- * r1 is ['aabbcc', null, 'dd', null, 'ff']
+ * out = join_list_elements(s, ':', '_')
+ * out is ['aa:bb:cc', null,  ':dd', 'ee:_', 'ff']
  *
- * r2 = strings::concatenate_list_elements(s, ':', '_')
- * r2 is ['aa:bb:cc', null,  ':dd', 'ee:_', 'ff']
+ * out = join_list_elements(s, ':', '', separator_on_nulls::NO)
+ * out is ['aa:bb:cc', null,  ':dd', 'ee', 'ff']
  * @endcode
  *
  * @throw cudf::logic_error if input column is not lists of strings column.
@@ -244,13 +282,15 @@ std::unique_ptr<column> concatenate_list_elements(
  * @param narep String that should be used to replace null strings in any non-null list row, default
  *        is an invalid-scalar denoting that list rows containing null strings will result in null
  *        string in the corresponding output rows.
+ * @param separate_nulls If YES, then the separator is included for null rows if `narep` is valid.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column with concatenated results.
  */
-std::unique_ptr<column> concatenate_list_elements(
+std::unique_ptr<column> join_list_elements(
   const lists_column_view& lists_strings_column,
   string_scalar const& separator      = string_scalar(""),
   string_scalar const& narep          = string_scalar("", false),
+  separator_on_nulls separate_nulls   = separator_on_nulls::YES,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of doxygen group
diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp
index 6e25a4dfa38..d6bdf398886 100644
--- a/cpp/include/cudf/strings/detail/combine.hpp
+++ b/cpp/include/cudf/strings/detail/combine.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/scalar/scalar.hpp>
+#include <cudf/strings/combine.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table_view.hpp>
 
@@ -36,6 +37,7 @@ std::unique_ptr<column> concatenate(
   table_view const& strings_columns,
   string_scalar const& separator,
   string_scalar const& narep,
+  separator_on_nulls separate_nulls   = separator_on_nulls::YES,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/doxygen_groups.h b/cpp/include/doxygen_groups.h
index 11b907e7f16..dda8ce87432 100644
--- a/cpp/include/doxygen_groups.h
+++ b/cpp/include/doxygen_groups.h
@@ -143,7 +143,7 @@
  * @}
  * @defgroup lists_apis Lists
  * @{
- *   @defgroup lists_concatenate_rows Combining
+ *   @defgroup lists_combine Combining
  *   @defgroup lists_extract Extracting
  *   @defgroup lists_contains Searching
  *   @defgroup lists_gather Gathering
diff --git a/cpp/src/column/column_factories.cu b/cpp/src/column/column_factories.cu
index 60e642ea3d5..6ba8497b320 100644
--- a/cpp/src/column/column_factories.cu
+++ b/cpp/src/column/column_factories.cu
@@ -18,6 +18,7 @@
 #include <cudf/detail/fill.hpp>
 #include <cudf/detail/gather.cuh>
 #include <cudf/dictionary/dictionary_factories.hpp>
+#include <cudf/lists/lists_column_factories.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/detail/fill.hpp>
 
@@ -32,6 +33,7 @@ struct column_from_scalar_dispatch {
                                            rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr) const
   {
+    if (size == 0) return make_empty_column(value.type());
     if (!value.is_valid())
       return make_fixed_width_column(value.type(), size, mask_state::ALL_NULL, stream, mr);
     auto output_column =
@@ -49,6 +51,7 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stri
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr) const
 {
+  if (size == 0) return make_empty_column(value.type());
   auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr);
 
   if (!value.is_valid())
@@ -84,7 +87,8 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::list
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr) const
 {
-  CUDF_FAIL("TODO");
+  auto lv = static_cast<list_scalar const*>(&value);
+  return lists::detail::make_lists_column_from_scalar(*lv, size, stream, mr);
 }
 
 template <>
@@ -94,6 +98,7 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stru
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr) const
 {
+  if (size == 0) CUDF_FAIL("0-length struct column is unsupported.");
   auto ss   = static_cast<scalar_type_t<cudf::struct_view> const&>(value);
   auto iter = thrust::make_constant_iterator(0);
 
@@ -117,7 +122,6 @@ std::unique_ptr<column> make_column_from_scalar(scalar const& s,
                                                 rmm::cuda_stream_view stream,
                                                 rmm::mr::device_memory_resource* mr)
 {
-  if (size == 0) return make_empty_column(s.type());
   return type_dispatcher(s.type(), column_from_scalar_dispatch{}, s, size, stream, mr);
 }
 
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index a5fd6d6f9bb..f132d6b1511 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -79,6 +79,44 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::disp
 groupby::~groupby() = default;
 
 namespace {
+
+/**
+ * @brief Factory to construct empty result columns.
+ *
+ * Adds special handling for COLLECT_LIST/COLLECT_SET, because:
+ * 1. `make_empty_column()` does not support construction of nested columns.
+ * 2. Empty lists need empty child columns, to persist type information.
+ */
+struct empty_column_constructor {
+  column_view values;
+
+  template <typename ValuesType, aggregation::Kind k>
+  std::unique_ptr<cudf::column> operator()() const
+  {
+    using namespace cudf;
+    using namespace cudf::detail;
+
+    if constexpr (k == aggregation::Kind::COLLECT_LIST || k == aggregation::Kind::COLLECT_SET) {
+      return make_lists_column(
+        0, make_empty_column(data_type{type_to_id<offset_type>()}), empty_like(values), 0, {});
+    }
+
+    // If `values` is LIST typed, and the aggregation results match the type,
+    // construct empty results based on `values`.
+    // Most generally, this applies if input type matches output type.
+    //
+    // Note: `target_type_t` is not recursive, and `ValuesType` does not consider children.
+    //       It is important that `COLLECT_LIST` and `COLLECT_SET` are handled before this
+    //       point, because `COLLECT_LIST(LIST)` produces `LIST<LIST>`, but `target_type_t`
+    //       wouldn't know the difference.
+    if constexpr (std::is_same_v<target_type_t<ValuesType, k>, ValuesType>) {
+      return empty_like(values);
+    }
+
+    return make_empty_column(target_type(values.type(), k));
+  }
+};
+
 /// Make an empty table with appropriate types for requested aggs
 auto empty_results(host_span<aggregation_request const> requests)
 {
@@ -93,7 +131,8 @@ auto empty_results(host_span<aggregation_request const> requests)
         request.aggregations.end(),
         std::back_inserter(results),
         [&request](auto const& agg) {
-          return make_empty_column(cudf::detail::target_type(request.values.type(), agg->kind));
+          return cudf::detail::dispatch_type_and_aggregation(
+            request.values.type(), agg->kind, empty_column_constructor{request.values});
         });
 
       return aggregation_result{std::move(results)};
diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu
index d2b6be5eead..bc0e1243d4f 100644
--- a/cpp/src/io/csv/writer_impl.cu
+++ b/cpp/src/io/csv/writer_impl.cu
@@ -28,6 +28,7 @@
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/detail/combine.hpp>
 #include <cudf/strings/detail/converters.hpp>
+#include <cudf/strings/detail/replace.hpp>
 #include <cudf/strings/detail/utilities.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -119,7 +120,8 @@ struct column_to_strings_fn {
     return not((std::is_same<column_type, cudf::string_view>::value) ||
                (std::is_integral<column_type>::value) ||
                (std::is_floating_point<column_type>::value) ||
-               (cudf::is_timestamp<column_type>()) || (cudf::is_duration<column_type>()));
+               (cudf::is_fixed_point<column_type>()) || (cudf::is_timestamp<column_type>()) ||
+               (cudf::is_duration<column_type>()));
   }
 
   explicit column_to_strings_fn(
@@ -189,6 +191,15 @@ struct column_to_strings_fn {
     return cudf::strings::detail::from_floats(column, stream_, mr_);
   }
 
+  // fixed point:
+  //
+  template <typename column_type>
+  std::enable_if_t<cudf::is_fixed_point<column_type>(), std::unique_ptr<column>> operator()(
+    column_view const& column) const
+  {
+    return cudf::strings::detail::from_fixed_point(column, stream_, mr_);
+  }
+
   // timestamps:
   //
   template <typename column_type>
@@ -404,11 +415,19 @@ void writer::impl::write(table_view const& table,
       auto str_table_view = str_table_ptr->view();
 
       // concatenate columns in each row into one big string column
-      //(using null representation and delimiter):
+      // (using null representation and delimiter):
       //
       std::string delimiter_str{options_.get_inter_column_delimiter()};
-      auto str_concat_col = cudf::strings::detail::concatenate(
-        str_table_view, delimiter_str, options_.get_na_rep(), stream);
+      auto str_concat_col = [&] {
+        if (str_table_view.num_columns() > 1)
+          return cudf::strings::detail::concatenate(str_table_view,
+                                                    delimiter_str,
+                                                    options_.get_na_rep(),
+                                                    strings::separator_on_nulls::YES,
+                                                    stream);
+        cudf::string_scalar narep{options_.get_na_rep()};
+        return cudf::strings::detail::replace_nulls(str_table_view.column(0), narep, stream);
+      }();
 
       write_chunked(str_concat_col->view(), metadata, stream);
     }
diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu
new file mode 100644
index 00000000000..c5a28a8ec5f
--- /dev/null
+++ b/cpp/src/lists/combine/concatenate_list_elements.cu
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/copy.hpp>
+#include <cudf/detail/gather.cuh>
+#include <cudf/detail/get_value.cuh>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/valid_if.cuh>
+#include <cudf/lists/combine.hpp>
+#include <cudf/lists/lists_column_view.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/logical.h>
+#include <thrust/sequence.h>
+#include <thrust/transform.h>
+
+namespace cudf {
+namespace lists {
+namespace detail {
+namespace {
+/**
+ * @brief Concatenate lists within the same row into one list, ignoring any null list during
+ * concatenation.
+ */
+std::unique_ptr<column> concatenate_lists_ignore_null(column_view const& input,
+                                                      bool build_null_mask,
+                                                      rmm::cuda_stream_view stream,
+                                                      rmm::mr::device_memory_resource* mr)
+{
+  auto const num_rows = input.size();
+
+  static_assert(std::is_same_v<offset_type, int32_t> && std::is_same_v<size_type, int32_t>);
+  auto out_offsets = make_numeric_column(
+    data_type{type_id::INT32}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
+
+  // The array of int8_t stores validities for the output list elements.
+  auto validities = rmm::device_uvector<int8_t>(build_null_mask ? num_rows : 0, stream);
+
+  auto const d_out_offsets  = out_offsets->mutable_view().template begin<offset_type>();
+  auto const d_row_offsets  = lists_column_view(input).offsets_begin();
+  auto const d_list_offsets = lists_column_view(lists_column_view(input).child()).offsets_begin();
+  auto const lists_dv_ptr   = column_device_view::create(lists_column_view(input).child());
+
+  // Concatenating the lists at the same row by converting the entry offsets from the child column
+  // into row offsets of the root column. Those entry offsets are subtracted by the first entry
+  // offset to output zero-based offsets.
+  auto const iter = thrust::make_counting_iterator<size_type>(0);
+  thrust::transform(rmm::exec_policy(stream),
+                    iter,
+                    iter + num_rows + 1,
+                    d_out_offsets,
+                    [d_row_offsets,
+                     d_list_offsets,
+                     lists_dv     = *lists_dv_ptr,
+                     d_validities = validities.begin(),
+                     build_null_mask,
+                     iter] __device__(auto const idx) {
+                      if (build_null_mask) {
+                        // The output row will be null only if all lists on the input row are null.
+                        auto const is_valid = thrust::any_of(thrust::seq,
+                                                             iter + d_row_offsets[idx],
+                                                             iter + d_row_offsets[idx + 1],
+                                                             [&] __device__(auto const list_idx) {
+                                                               return lists_dv.is_valid(list_idx);
+                                                             });
+                        d_validities[idx]   = static_cast<int8_t>(is_valid);
+                      }
+                      auto const start_offset = d_list_offsets[d_row_offsets[0]];
+                      return d_list_offsets[d_row_offsets[idx]] - start_offset;
+                    });
+
+  // The child column of the output lists column is just copied from the input column.
+  auto out_entries = std::make_unique<column>(
+    lists_column_view(lists_column_view(input).get_sliced_child(stream)).get_sliced_child(stream));
+
+  auto [null_mask, null_count] = [&] {
+    return build_null_mask
+             ? cudf::detail::valid_if(
+                 validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr)
+             : std::make_pair(cudf::detail::copy_bitmask(input, stream, mr), input.null_count());
+  }();
+
+  return make_lists_column(num_rows,
+                           std::move(out_offsets),
+                           std::move(out_entries),
+                           null_count,
+                           null_count > 0 ? std::move(null_mask) : rmm::device_buffer{},
+                           stream,
+                           mr);
+}
+
+/**
+ * @brief Generate list offsets and list validities for the output lists column.
+ *
+ * This function is called only when (has_null_list == true and null_policy == NULLIFY_OUTPUT_ROW).
+ */
+std::pair<std::unique_ptr<column>, rmm::device_uvector<int8_t>>
+generate_list_offsets_and_validities(column_view const& input,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+{
+  auto const num_rows = input.size();
+
+  static_assert(std::is_same_v<offset_type, int32_t> && std::is_same_v<size_type, int32_t>);
+  auto out_offsets = make_numeric_column(
+    data_type{type_id::INT32}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);
+
+  auto const lists_of_lists_dv_ptr = column_device_view::create(input);
+  auto const lists_dv_ptr          = column_device_view::create(lists_column_view(input).child());
+  auto const d_out_offsets         = out_offsets->mutable_view().template begin<offset_type>();
+  auto const d_row_offsets         = lists_column_view(input).offsets_begin();
+  auto const d_list_offsets = lists_column_view(lists_column_view(input).child()).offsets_begin();
+
+  // The array of int8_t stores validities for the output list elements.
+  auto validities = rmm::device_uvector<int8_t>(num_rows, stream);
+
+  // Compute output list sizes and validities.
+  auto const iter = thrust::make_counting_iterator<size_type>(0);
+  thrust::transform(
+    rmm::exec_policy(stream),
+    iter,
+    iter + num_rows,
+    d_out_offsets,
+    [lists_of_lists_dv = *lists_of_lists_dv_ptr,
+     lists_dv          = *lists_dv_ptr,
+     d_row_offsets,
+     d_list_offsets,
+     d_validities = validities.begin(),
+     iter] __device__(auto const idx) {
+      if (d_row_offsets[idx] == d_row_offsets[idx + 1]) {  // This is a null/empty row.
+        d_validities[idx] = static_cast<int8_t>(lists_of_lists_dv.is_valid(idx));
+        return size_type{0};
+      }
+      // The output row will not be null only if all lists on the input row are not null.
+      auto const is_valid =
+        thrust::all_of(thrust::seq,
+                       iter + d_row_offsets[idx],
+                       iter + d_row_offsets[idx + 1],
+                       [&] __device__(auto const list_idx) { return lists_dv.is_valid(list_idx); });
+      d_validities[idx] = static_cast<int8_t>(is_valid);
+      if (!is_valid) { return size_type{0}; }
+
+      // Compute size of the output list as sum of sizes of all lists in the current input row.
+      return d_list_offsets[d_row_offsets[idx + 1]] - d_list_offsets[d_row_offsets[idx]];
+    });
+
+  // Compute offsets from sizes.
+  thrust::exclusive_scan(
+    rmm::exec_policy(stream), d_out_offsets, d_out_offsets + num_rows + 1, d_out_offsets);
+
+  return {std::move(out_offsets), std::move(validities)};
+}
+
+/**
+ * @brief Gather entries from the input lists column, ignoring rows that have null list elements.
+ *
+ * This function is called only when (has_null_list == true and null_policy == NULLIFY_OUTPUT_ROW).
+ */
+std::unique_ptr<column> gather_list_entries(column_view const& input,
+                                            column_view const& output_list_offsets,
+                                            size_type num_rows,
+                                            size_type num_output_entries,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr)
+{
+  auto const child_col      = lists_column_view(input).child();
+  auto const entry_col      = lists_column_view(child_col).child();
+  auto const d_row_offsets  = lists_column_view(input).offsets_begin();
+  auto const d_list_offsets = lists_column_view(child_col).offsets_begin();
+  auto gather_map           = rmm::device_uvector<size_type>(num_output_entries, stream);
+
+  // Fill the gather map with indices of the lists from the child column of the input column.
+  thrust::for_each_n(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator<size_type>(0),
+    num_rows,
+    [d_row_offsets,
+     d_list_offsets,
+     d_indices = gather_map.begin(),
+     d_out_list_offsets =
+       output_list_offsets.template begin<offset_type>()] __device__(size_type const idx) {
+      // The output row has been identified as a null/empty list during list size computation.
+      if (d_out_list_offsets[idx + 1] == d_out_list_offsets[idx]) { return; }
+
+      // The indices of the list elements on the row `idx` of the input column.
+      thrust::sequence(thrust::seq,
+                       d_indices + d_out_list_offsets[idx],
+                       d_indices + d_out_list_offsets[idx + 1],
+                       d_list_offsets[d_row_offsets[idx]]);
+    });
+
+  auto result = cudf::detail::gather(table_view{{entry_col}},
+                                     gather_map.begin(),
+                                     gather_map.end(),
+                                     out_of_bounds_policy::DONT_CHECK,
+                                     stream,
+                                     mr);
+  return std::move(result->release()[0]);
+}
+
+std::unique_ptr<column> concatenate_lists_nullifying_rows(column_view const& input,
+                                                          rmm::cuda_stream_view stream,
+                                                          rmm::mr::device_memory_resource* mr)
+{
+  // Generate offsets and validities of the output lists column.
+  auto [list_offsets, list_validities] = generate_list_offsets_and_validities(input, stream, mr);
+  auto const offsets_view              = list_offsets->view();
+
+  auto const num_rows = input.size();
+  auto const num_output_entries =
+    cudf::detail::get_value<size_type>(offsets_view, num_rows, stream);
+
+  auto list_entries =
+    gather_list_entries(input, offsets_view, num_rows, num_output_entries, stream, mr);
+  auto [null_mask, null_count] = cudf::detail::valid_if(
+    list_validities.begin(), list_validities.end(), thrust::identity<int8_t>{}, stream, mr);
+
+  return make_lists_column(num_rows,
+                           std::move(list_offsets),
+                           std::move(list_entries),
+                           null_count,
+                           null_count ? std::move(null_mask) : rmm::device_buffer{},
+                           stream,
+                           mr);
+}
+
+}  // namespace
+
+/**
+ * @copydoc cudf::lists::concatenate_list_elements
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> concatenate_list_elements(column_view const& input,
+                                                  concatenate_null_policy null_policy,
+                                                  rmm::cuda_stream_view stream,
+                                                  rmm::mr::device_memory_resource* mr)
+{
+  auto type = input.type();  // Column that is lists of lists.
+  CUDF_EXPECTS(type.id() == type_id::LIST, "Input column must be a lists column.");
+
+  auto col = lists_column_view(input).child();  // Rows, which are lists.
+  type     = col.type();
+  CUDF_EXPECTS(type.id() == type_id::LIST, "Rows of the input column must be lists.");
+
+  col  = lists_column_view(col).child();  // The last level entries what we need to check.
+  type = col.type();
+  CUDF_EXPECTS(type.id() == type_id::LIST || !cudf::is_nested(type),
+               "Entry of the input lists column must be of list or non-nested types.");
+
+  if (input.size() == 0) { return cudf::empty_like(input); }
+
+  bool has_null_list = lists_column_view(input).child().has_nulls();
+
+  return (null_policy == concatenate_null_policy::IGNORE || !has_null_list)
+           ? concatenate_lists_ignore_null(input, has_null_list, stream, mr)
+           : concatenate_lists_nullifying_rows(input, stream, mr);
+}
+
+}  // namespace detail
+
+/**
+ * @copydoc cudf::lists::concatenate_list_elements
+ */
+std::unique_ptr<column> concatenate_list_elements(column_view const& input,
+                                                  concatenate_null_policy null_policy,
+                                                  rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::concatenate_list_elements(input, null_policy, rmm::cuda_stream_default, mr);
+}
+
+}  // namespace lists
+}  // namespace cudf
diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu
new file mode 100644
index 00000000000..fdd71aea7bf
--- /dev/null
+++ b/cpp/src/lists/combine/concatenate_rows.cu
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/copy.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/lists/combine.hpp>
+#include <cudf/lists/detail/combine.hpp>
+#include <cudf/lists/detail/interleave_columns.hpp>
+#include <cudf/lists/lists_column_view.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/transform.h>
+
+namespace cudf {
+namespace lists {
+namespace detail {
+/**
+ * @copydoc cudf::lists::concatenate_rows
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> concatenate_rows(table_view const& input,
+                                         concatenate_null_policy null_policy,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(input.num_columns() > 0, "The input table must have at least one column.");
+
+  auto const entry_type = lists_column_view(*input.begin()).child().type();
+  for (auto const& col : input) {
+    CUDF_EXPECTS(col.type().id() == type_id::LIST,
+                 "All columns of the input table must be of lists column type.");
+
+    auto const child_col = lists_column_view(col).child();
+    CUDF_EXPECTS(not cudf::is_nested(child_col.type()), "Nested types are not supported.");
+    CUDF_EXPECTS(entry_type == child_col.type(),
+                 "The types of entries in the input columns must be the same.");
+  }
+
+  auto const num_rows = input.num_rows();
+  auto const num_cols = input.num_columns();
+  if (num_rows == 0) { return cudf::empty_like(input.column(0)); }
+  if (num_cols == 1) { return std::make_unique<column>(*(input.begin()), stream, mr); }
+
+  // Memory resource for temporary data.
+  auto const default_mr = rmm::mr::get_current_device_resource();
+
+  // Interleave the input table into one column.
+  auto const has_null_mask = std::any_of(
+    std::cbegin(input), std::cend(input), [](auto const& col) { return col.nullable(); });
+  auto interleaved_columns = detail::interleave_columns(input, has_null_mask, stream, default_mr);
+
+  // Generate a lists column which has child column is the interleaved_columns.
+  // The new nested lists column will have each row is a list of `num_cols` list elements.
+  static_assert(std::is_same_v<offset_type, int32_t> and std::is_same_v<size_type, int32_t>);
+  auto list_offsets = make_numeric_column(
+    data_type{type_id::INT32}, num_rows + 1, mask_state::UNALLOCATED, stream, default_mr);
+  thrust::transform(rmm::exec_policy(stream),
+                    thrust::make_counting_iterator<size_type>(0),
+                    thrust::make_counting_iterator<size_type>(num_rows + 1),
+                    list_offsets->mutable_view().template begin<offset_type>(),
+                    [num_cols] __device__(auto const idx) { return idx * num_cols; });
+  auto const nested_lists_col = make_lists_column(num_rows,
+                                                  std::move(list_offsets),
+                                                  std::move(interleaved_columns),
+                                                  0,
+                                                  rmm::device_buffer{},
+                                                  stream,
+                                                  default_mr);
+
+  // Concatenate lists on each row of the nested lists column, producing the desired output.
+  return concatenate_list_elements(nested_lists_col->view(), null_policy, stream, mr);
+}
+
+}  // namespace detail
+
+/**
+ * @copydoc cudf::lists::concatenate_rows
+ */
+std::unique_ptr<column> concatenate_rows(table_view const& lists_columns,
+                                         concatenate_null_policy null_policy,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::concatenate_rows(lists_columns, null_policy, rmm::cuda_stream_default, mr);
+}
+
+}  // namespace lists
+}  // namespace cudf
diff --git a/cpp/src/lists/concatenate_rows.cu b/cpp/src/lists/concatenate_rows.cu
deleted file mode 100644
index 8528a7680f7..00000000000
--- a/cpp/src/lists/concatenate_rows.cu
+++ /dev/null
@@ -1,441 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/column/column_factories.hpp>
-#include <cudf/detail/copy.hpp>
-#include <cudf/detail/get_value.cuh>
-#include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/valid_if.cuh>
-#include <cudf/lists/concatenate_rows.hpp>
-#include <cudf/lists/detail/interleave_columns.hpp>
-#include <cudf/lists/lists_column_view.hpp>
-#include <cudf/strings/detail/utilities.cuh>
-#include <cudf/table/table_device_view.cuh>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/copy.h>
-#include <thrust/logical.h>
-#include <thrust/transform.h>
-#include <thrust/transform_reduce.h>
-
-namespace cudf {
-namespace lists {
-namespace detail {
-namespace {
-/**
- * @brief Concatenate lists within the same row into one list, ignoring any null list during
- * concatenation.
- */
-std::unique_ptr<column> concatenate_rows_ignore_null(table_view const& input,
-                                                     bool has_null_mask,
-                                                     rmm::cuda_stream_view stream,
-                                                     rmm::mr::device_memory_resource* mr)
-{
-  auto const num_output_lists = input.num_rows();
-  auto const table_dv_ptr     = table_device_view::create(input);
-
-  // Interleave the list element from the input table, thus all the lists at the same row now stay
-  // next to each other.
-  auto interleaved_columns = detail::interleave_columns(input, has_null_mask, stream);
-
-  // Modify the list offsets to combine lists of the same input row.
-  static_assert(sizeof(offset_type) == sizeof(int32_t));
-  static_assert(sizeof(size_type) == sizeof(int32_t));
-  auto list_offsets = make_numeric_column(
-    data_type{type_id::INT32}, num_output_lists + 1, mask_state::UNALLOCATED, stream, mr);
-  auto const d_offsets = list_offsets->mutable_view().template begin<offset_type>();
-
-  // The array of int8_t to store validities for list elements.
-  // Since we combine multiple lists, we may need to recompute list validities.
-  auto validities = rmm::device_uvector<int8_t>(has_null_mask ? num_output_lists : 0, stream);
-
-  // For an input table of `n` columns, if after interleaving we have the list offsets are
-  // [ i_0, i_1, ..., i_n, i_n+1, ..., i_2n, ... ] then to concatenate them just modify the offsets
-  // to be [ i_0, i_n, i_2n, i_3n, ... ].
-  auto const d_interleaved_offsets = lists_column_view(interleaved_columns->view()).offsets_begin();
-  thrust::transform(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_type>(0),
-    thrust::make_counting_iterator<size_type>(num_output_lists + 1),
-    d_offsets,
-    [d_interleaved_offsets,
-     num_cols     = input.num_columns(),
-     table_dv     = *table_dv_ptr,
-     d_validities = validities.begin(),
-     has_null_mask] __device__(auto const idx) {
-      if (has_null_mask) {
-        auto const any_valid = thrust::any_of(
-          thrust::seq, table_dv.begin(), table_dv.end(), [idx](auto const& list_col) {
-            return list_col.is_valid(idx);
-          });
-        d_validities[idx] = static_cast<int8_t>(any_valid);
-      }
-      return d_interleaved_offsets[idx * num_cols];
-    });
-
-  auto [null_mask, null_count] = [&] {
-    return has_null_mask
-             ? cudf::detail::valid_if(
-                 validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr)
-             : std::make_pair(rmm::device_buffer{}, size_type{0});
-  }();
-
-  // The child column containing list entries is taken from the `interleaved_columns` column.
-  auto interleaved_columns_content = interleaved_columns->release();
-
-  return make_lists_column(
-    num_output_lists,
-    std::move(list_offsets),
-    std::move(interleaved_columns_content.children[lists_column_view::child_column_index]),
-    null_count,
-    null_count > 0 ? std::move(null_mask) : rmm::device_buffer{},
-    stream,
-    mr);
-}
-
-/**
- * @brief Generate list offsets and list validities for the output lists column from the table_view
- * of the input lists columns.
- *
- * This function is called only when (has_null_mask == true and null_policy == NULLIFY_OUTPUT_ROW).
- */
-std::pair<std::unique_ptr<column>, rmm::device_uvector<int8_t>>
-generate_list_offsets_and_validities(table_view const& input,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::mr::device_memory_resource* mr)
-{
-  auto const num_output_lists = input.num_rows();
-  auto const table_dv_ptr     = table_device_view::create(input);
-
-  // The output offsets column.
-  static_assert(sizeof(offset_type) == sizeof(int32_t));
-  static_assert(sizeof(size_type) == sizeof(int32_t));
-  auto list_offsets = make_numeric_column(
-    data_type{type_id::INT32}, num_output_lists + 1, mask_state::UNALLOCATED, stream, mr);
-  auto const d_offsets = list_offsets->mutable_view().template begin<offset_type>();
-
-  // The array of int8_t to store validities for list elements.
-  auto validities = rmm::device_uvector<int8_t>(num_output_lists, stream);
-
-  // Compute list sizes and validities.
-  thrust::transform(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_type>(0),
-    thrust::make_counting_iterator<size_type>(num_output_lists),
-    d_offsets,
-    [table_dv = *table_dv_ptr, d_validities = validities.begin()] __device__(size_type const idx) {
-      auto const all_valid =
-        thrust::all_of(thrust::seq, table_dv.begin(), table_dv.end(), [idx](auto const& list_col) {
-          return list_col.is_valid(idx);
-        });
-      d_validities[idx] = static_cast<int8_t>(all_valid);
-      if (not all_valid) return size_type{0};
-
-      // Compute size of the output list as sum of sizes of input lists
-      return thrust::transform_reduce(
-        thrust::seq,
-        table_dv.begin(),
-        table_dv.end(),
-        [idx] __device__(auto const& lists_col) {
-          auto const list_offsets =
-            lists_col.child(lists_column_view::offsets_column_index).template data<offset_type>() +
-            lists_col.offset();
-          return list_offsets[idx + 1] - list_offsets[idx];  // list size
-        },
-        size_type{0},
-        thrust::plus<size_type>{});
-    });
-
-  // Compute offsets from sizes.
-  thrust::exclusive_scan(
-    rmm::exec_policy(stream), d_offsets, d_offsets + num_output_lists + 1, d_offsets);
-
-  return {std::move(list_offsets), std::move(validities)};
-}
-
-/**
- * @brief Compute string sizes, string validities, and concatenate string lists functor.
- *
- * This functor is called only when (has_null_mask == true and null_policy == NULLIFY_OUTPUT_ROW).
- * It is executed twice. In the first pass, the sizes and validities of the output strings will be
- * computed. In the second pass, this will concatenate the lists of strings on the same row from the
- * given input table.
- */
-struct compute_string_sizes_and_concatenate_lists_fn {
-  table_device_view const table_dv;
-
-  // Store list offsets of the output lists column.
-  offset_type const* const dst_list_offsets;
-
-  // Store offsets of the strings.
-  offset_type* d_offsets{nullptr};
-
-  // If d_chars == nullptr: only compute sizes and validities of the output strings.
-  // If d_chars != nullptr: only concatenate lists of strings.
-  char* d_chars{nullptr};
-
-  // We need to set `1` or `0` for the validities of the strings in the child column.
-  int8_t* d_validities{nullptr};
-
-  __device__ void operator()(size_type const idx)
-  {
-    // The current row contain null, which has been identified during offsets computation.
-    if (dst_list_offsets[idx + 1] == dst_list_offsets[idx]) { return; }
-
-    // read_idx and write_idx are indices of string elements.
-    size_type write_idx = dst_list_offsets[idx];
-    thrust::for_each(
-      thrust::seq, table_dv.begin(), table_dv.end(), [&] __device__(auto const& lists_col) {
-        auto const list_offsets =
-          lists_col.child(lists_column_view::offsets_column_index).template data<offset_type>() +
-          lists_col.offset();
-        auto const& str_col = lists_col.child(lists_column_view::child_column_index);
-        auto const str_offsets =
-          str_col.child(strings_column_view::offsets_column_index).template data<offset_type>();
-
-        // The range of indices of the strings within the source list.
-        auto const start_str_idx = list_offsets[idx];
-        auto const end_str_idx   = list_offsets[idx + 1];
-
-        if (not d_chars) {  // just compute sizes of strings within a list
-          for (auto read_idx = start_str_idx; read_idx < end_str_idx; ++read_idx, ++write_idx) {
-            d_validities[write_idx] = static_cast<int8_t>(str_col.is_valid(read_idx));
-            d_offsets[write_idx]    = str_offsets[read_idx + 1] - str_offsets[read_idx];
-          }
-        } else {  // just copy the entire memory region containing all strings in the list
-          // start_byte and end_byte are indices of character of the string elements.
-          auto const start_byte = str_offsets[start_str_idx];
-          auto const end_byte   = str_offsets[end_str_idx];
-          if (start_byte < end_byte) {
-            auto const input_ptr =
-              str_col.child(strings_column_view::chars_column_index).template data<char>() +
-              start_byte;
-            auto const output_ptr = d_chars + d_offsets[write_idx];
-            thrust::copy(thrust::seq, input_ptr, input_ptr + end_byte - start_byte, output_ptr);
-          }
-          write_idx += end_str_idx - start_str_idx;
-        }
-      });
-  }
-};
-
-/**
- * @brief Struct used in type_dispatcher to interleave list entries of the input lists columns and
- * output the results into a destination column.
- *
- * This functor is called only when (has_null_mask == true and null_policy == NULLIFY_OUTPUT_ROW).
- */
-struct concatenate_lists_fn {
-  template <class T>
-  std::enable_if_t<std::is_same_v<T, cudf::string_view>, std::unique_ptr<column>> operator()(
-    table_view const& input,
-    column_view const& output_list_offsets,
-    size_type num_output_lists,
-    size_type num_output_entries,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const noexcept
-  {
-    auto const table_dv_ptr = table_device_view::create(input);
-    auto const comp_fn      = compute_string_sizes_and_concatenate_lists_fn{
-      *table_dv_ptr, output_list_offsets.template begin<offset_type>()};
-
-    // Generate a null mask because the input table has nullable column.
-    auto [offsets_column, chars_column, null_mask, null_count] =
-      cudf::strings::detail::make_strings_children_with_null_mask(
-        comp_fn, num_output_lists, num_output_entries, stream, mr);
-
-    return make_strings_column(num_output_entries,
-                               std::move(offsets_column),
-                               std::move(chars_column),
-                               null_count,
-                               std::move(null_mask),
-                               stream,
-                               mr);
-  }
-
-  template <class T>
-  std::enable_if_t<cudf::is_fixed_width<T>(), std::unique_ptr<column>> operator()(
-    table_view const& input,
-    column_view const& output_list_offsets,
-    size_type num_output_lists,
-    size_type num_output_entries,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr) const noexcept
-  {
-    auto const table_dv_ptr = table_device_view::create(input);
-
-    // The output child column.
-    auto const child_col = lists_column_view(*input.begin()).child();
-    auto output =
-      allocate_like(child_col, num_output_entries, mask_allocation_policy::NEVER, stream, mr);
-    auto output_dv_ptr = mutable_column_device_view::create(*output);
-
-    // The array of int8_t to store entry validities.
-    auto validities = rmm::device_uvector<int8_t>(num_output_entries, stream);
-
-    thrust::for_each_n(
-      rmm::exec_policy(stream),
-      thrust::make_counting_iterator<size_type>(0),
-      num_output_lists,
-      [num_cols         = input.num_columns(),
-       table_dv         = *table_dv_ptr,
-       d_validities     = validities.begin(),
-       dst_list_offsets = output_list_offsets.template begin<offset_type>(),
-       d_output         = output_dv_ptr->template begin<T>()] __device__(size_type const idx) {
-        // The output row has been identified as a null list during list size computation.
-        if (dst_list_offsets[idx + 1] == dst_list_offsets[idx]) { return; }
-
-        auto write_start = dst_list_offsets[idx];
-        thrust::for_each(
-          thrust::seq, table_dv.begin(), table_dv.end(), [&] __device__(auto const& lists_col) {
-            auto const list_offsets = lists_col.child(lists_column_view::offsets_column_index)
-                                        .template data<offset_type>() +
-                                      lists_col.offset();
-            auto const& data_col = lists_col.child(lists_column_view::child_column_index);
-
-            // The range of indices of the entries within the source list.
-            auto const start_idx = list_offsets[idx];
-            auto const end_idx   = list_offsets[idx + 1];
-
-            // Fill the validities array.
-            for (auto read_idx = start_idx, write_idx = write_start; read_idx < end_idx;
-                 ++read_idx, ++write_idx) {
-              d_validities[write_idx] = static_cast<int8_t>(data_col.is_valid(read_idx));
-            }
-            // Do a copy for the entire list entries.
-            auto const input_ptr =
-              reinterpret_cast<char const*>(data_col.template data<T>() + start_idx);
-            auto const output_ptr = reinterpret_cast<char*>(&d_output[write_start]);
-            thrust::copy(
-              thrust::seq, input_ptr, input_ptr + sizeof(T) * (end_idx - start_idx), output_ptr);
-            write_start += end_idx - start_idx;
-          });
-      });
-
-    auto [null_mask, null_count] = cudf::detail::valid_if(
-      validities.begin(), validities.end(), thrust::identity<int8_t>{}, stream, mr);
-    if (null_count > 0) { output->set_null_mask(null_mask, null_count); }
-
-    return output;
-  }
-
-  template <class T>
-  std::enable_if_t<not std::is_same_v<T, cudf::string_view> and not cudf::is_fixed_width<T>(),
-                   std::unique_ptr<column>>
-  operator()(table_view const&,
-             column_view const&,
-             size_type,
-             size_type,
-             rmm::cuda_stream_view,
-             rmm::mr::device_memory_resource*) const
-  {
-    // Currently, only support string_view and fixed-width types
-    CUDF_FAIL("Called `concatenate_lists_fn()` on non-supported types.");
-  }
-};
-
-std::unique_ptr<column> concatenate_with_nullifying_rows(table_view const& input,
-                                                         rmm::cuda_stream_view stream,
-                                                         rmm::mr::device_memory_resource* mr)
-{
-  // Generate offsets of the output lists column.
-  auto [list_offsets, list_validities] = generate_list_offsets_and_validities(input, stream, mr);
-  auto const offsets_view              = list_offsets->view();
-
-  // Copy entries from the input lists columns to the output lists column - this needed to be
-  // specialized for different types.
-  auto const num_output_lists = input.num_rows();
-  auto const num_output_entries =
-    cudf::detail::get_value<size_type>(offsets_view, num_output_lists, stream);
-  auto list_entries =
-    type_dispatcher<dispatch_storage_type>(lists_column_view(*input.begin()).child().type(),
-                                           concatenate_lists_fn{},
-                                           input,
-                                           offsets_view,
-                                           num_output_lists,
-                                           num_output_entries,
-                                           stream,
-                                           mr);
-
-  auto [null_mask, null_count] = cudf::detail::valid_if(
-    list_validities.begin(), list_validities.end(), thrust::identity<int8_t>{}, stream, mr);
-  return make_lists_column(num_output_lists,
-                           std::move(list_offsets),
-                           std::move(list_entries),
-                           null_count,
-                           null_count ? std::move(null_mask) : rmm::device_buffer{},
-                           stream,
-                           mr);
-}
-
-}  // namespace
-
-/**
- * @copydoc cudf::lists::concatenate_rows
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
- */
-std::unique_ptr<column> concatenate_rows(table_view const& input,
-                                         concatenate_null_policy null_policy,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  CUDF_EXPECTS(input.num_columns() > 0, "The input table must have at least one column.");
-
-  auto const entry_type = lists_column_view(*input.begin()).child().type();
-  for (auto const& col : input) {
-    CUDF_EXPECTS(col.type().id() == type_id::LIST,
-                 "All columns of the input table must be of lists column type.");
-
-    auto const child_col = lists_column_view(col).child();
-    CUDF_EXPECTS(not cudf::is_nested(child_col.type()), "Nested types are not supported.");
-    CUDF_EXPECTS(entry_type == child_col.type(),
-                 "The types of entries in the input columns must be the same.");
-  }
-
-  if (input.num_rows() == 0) { return cudf::empty_like(input.column(0)); }
-  if (input.num_columns() == 1) { return std::make_unique<column>(*(input.begin()), stream, mr); }
-
-  // List concatenation can be implemented by simply interleaving the lists columns, then modify the
-  // list offsets.
-  auto const has_null_mask = std::any_of(
-    std::cbegin(input), std::cend(input), [](auto const& col) { return col.nullable(); });
-  if (not has_null_mask or null_policy == concatenate_null_policy::IGNORE) {
-    return concatenate_rows_ignore_null(input, has_null_mask, stream, mr);
-  }
-
-  // Both conditions satisfied: has_null_mask == true and
-  // null_policy == NULLIFY_OUTPUT_ROW.
-  return concatenate_with_nullifying_rows(input, stream, mr);
-}
-
-}  // namespace detail
-
-/**
- * @copydoc cudf::lists::concatenate_rows
- */
-std::unique_ptr<column> concatenate_rows(table_view const& lists_columns,
-                                         concatenate_null_policy null_policy,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::concatenate_rows(lists_columns, null_policy, rmm::cuda_stream_default, mr);
-}
-
-}  // namespace lists
-}  // namespace cudf
diff --git a/cpp/src/lists/copying/copying.cu b/cpp/src/lists/copying/copying.cu
index 3275a496cfd..ff4649f4945 100644
--- a/cpp/src/lists/copying/copying.cu
+++ b/cpp/src/lists/copying/copying.cu
@@ -84,19 +84,6 @@ std::unique_ptr<cudf::column> copy_slice(lists_column_view const& lists,
                            std::move(null_mask));
 }
 
-std::unique_ptr<cudf::column> make_empty_lists_column(data_type child_type,
-                                                      rmm::cuda_stream_view stream,
-                                                      rmm::mr::device_memory_resource* mr)
-{
-  return cudf::make_lists_column(0,
-                                 make_empty_column(data_type{type_to_id<offset_type>()}),
-                                 make_empty_column(child_type),
-                                 0,                                  // Null count
-                                 rmm::device_buffer{0, stream, mr},  // Null mask
-                                 stream,
-                                 mr);
-}
-
 }  // namespace detail
 }  // namespace lists
 }  // namespace cudf
diff --git a/cpp/src/lists/lists_column_factories.cu b/cpp/src/lists/lists_column_factories.cu
index ebf5e07f76a..3291aeb9f22 100644
--- a/cpp/src/lists/lists_column_factories.cu
+++ b/cpp/src/lists/lists_column_factories.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,10 +16,75 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/gather.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/sequence.h>
 
 namespace cudf {
+namespace lists {
+namespace detail {
+
+std::unique_ptr<cudf::column> make_lists_column_from_scalar(list_scalar const& value,
+                                                            size_type size,
+                                                            rmm::cuda_stream_view stream,
+                                                            rmm::mr::device_memory_resource* mr)
+{
+  if (size == 0) {
+    return make_lists_column(0,
+                             make_empty_column(data_type{type_to_id<offset_type>()}),
+                             empty_like(value.view()),
+                             0,
+                             cudf::detail::create_null_mask(0, mask_state::UNALLOCATED, stream, mr),
+                             stream,
+                             mr);
+  }
+  auto mr_final = size == 1 ? mr : rmm::mr::get_current_device_resource();
+
+  // Handcraft a 1-row column
+  auto offsets = make_numeric_column(
+    data_type{type_to_id<offset_type>()}, 2, mask_state::UNALLOCATED, stream, mr_final);
+  auto m_offsets = offsets->mutable_view();
+  thrust::sequence(rmm::exec_policy(stream),
+                   m_offsets.begin<size_type>(),
+                   m_offsets.end<size_type>(),
+                   0,
+                   value.view().size());
+  size_type null_count = value.is_valid(stream) ? 0 : 1;
+  auto null_mask_state = null_count ? mask_state::ALL_NULL : mask_state::UNALLOCATED;
+  auto null_mask       = cudf::detail::create_null_mask(1, null_mask_state, stream, mr_final);
+
+  if (size == 1) {
+    auto child = std::make_unique<column>(value.view(), stream, mr_final);
+    return make_lists_column(
+      1, std::move(offsets), std::move(child), null_count, std::move(null_mask), stream, mr_final);
+  }
+
+  auto children_views   = std::vector<column_view>{offsets->view(), value.view()};
+  auto one_row_col_view = column_view(data_type{type_id::LIST},
+                                      1,
+                                      nullptr,
+                                      static_cast<bitmask_type const*>(null_mask.data()),
+                                      null_count,
+                                      0,
+                                      children_views);
+
+  auto begin = thrust::make_constant_iterator(0);
+  auto res   = cudf::detail::gather(table_view({one_row_col_view}),
+                                  begin,
+                                  begin + size,
+                                  out_of_bounds_policy::DONT_CHECK,
+                                  stream,
+                                  mr_final);
+  return std::move(res->release()[0]);
+}
+
+}  // namespace detail
+}  // namespace lists
 
 /**
  * @copydoc cudf::make_lists_column
diff --git a/cpp/src/strings/combine/concatenate.cu b/cpp/src/strings/combine/concatenate.cu
index 5d7b9152ff3..1329ad3113f 100644
--- a/cpp/src/strings/combine/concatenate.cu
+++ b/cpp/src/strings/combine/concatenate.cu
@@ -41,67 +41,93 @@ namespace strings {
 namespace detail {
 namespace {
 
-/**
- * @brief Concatenate strings functor
- *
- * This will concatenate the strings from each row of the given table
- * and apply the separator. The null-replacement string `d_narep` is
- * used in place of any string in a row that contains a null entry.
- */
-struct concat_strings_fn {
+struct concat_strings_base {
   table_device_view const d_table;
-  string_view const d_separator;
   string_scalar_device_view const d_narep;
+  separator_on_nulls separate_nulls;
   offset_type* d_offsets{};
   char* d_chars{};
 
-  __device__ void operator()(size_type idx)
+  /**
+   * @brief Concatenate each table row to a single output string.
+   *
+   * This will concatenate the strings from each row of the given table
+   * and apply the separator. The null-replacement string `d_narep` is
+   * used in place of any string in a row that contains a null entry.
+   *
+   * @param idx The current row to process
+   * @param d_separator String to place in between each column's row
+   */
+  __device__ void process_row(size_type idx, string_view const d_separator)
   {
-    bool const null_element =
-      thrust::any_of(thrust::seq, d_table.begin(), d_table.end(), [idx](auto const& col) {
-        return col.is_null(idx);
-      });
-    // handle a null row
-    if (null_element && !d_narep.is_valid()) {
+    if (!d_narep.is_valid() &&
+        thrust::any_of(thrust::seq, d_table.begin(), d_table.end(), [idx](auto const& col) {
+          return col.is_null(idx);
+        })) {
       if (!d_chars) d_offsets[idx] = 0;
       return;
     }
 
-    char* d_buffer  = d_chars ? d_chars + d_offsets[idx] : nullptr;
-    size_type bytes = 0;
+    char* d_buffer       = d_chars ? d_chars + d_offsets[idx] : nullptr;
+    offset_type bytes    = 0;
+    bool write_separator = false;
+
     for (auto itr = d_table.begin(); itr < d_table.end(); ++itr) {
-      auto const d_column = *itr;
-      auto const d_str =
-        d_column.is_null(idx) ? d_narep.value() : d_column.element<string_view>(idx);
-      if (d_buffer) d_buffer = detail::copy_string(d_buffer, d_str);
-      bytes += d_str.size_bytes();
-      // separator goes only in between elements
-      if (itr + 1 < d_table.end()) {
+      auto const d_column     = *itr;
+      bool const null_element = d_column.is_null(idx);
+
+      if (write_separator && (separate_nulls == separator_on_nulls::YES || !null_element)) {
         if (d_buffer) d_buffer = detail::copy_string(d_buffer, d_separator);
         bytes += d_separator.size_bytes();
+        write_separator = false;
       }
+
+      // write out column's row data (or narep if the row is null)
+      auto const d_str = null_element ? d_narep.value() : d_column.element<string_view>(idx);
+      if (d_buffer) d_buffer = detail::copy_string(d_buffer, d_str);
+      bytes += d_str.size_bytes();
+
+      write_separator =
+        write_separator || (separate_nulls == separator_on_nulls::YES) || !null_element;
     }
+
     if (!d_chars) d_offsets[idx] = bytes;
   }
 };
 
+/**
+ * @brief Single separator concatenate functor
+ */
+struct concat_strings_fn : concat_strings_base {
+  string_view const d_separator;
+
+  concat_strings_fn(table_device_view const& d_table,
+                    string_view const& d_separator,
+                    string_scalar_device_view const& d_narep,
+                    separator_on_nulls separate_nulls)
+    : concat_strings_base{d_table, d_narep, separate_nulls}, d_separator(d_separator)
+  {
+  }
+
+  __device__ void operator()(size_type idx) { process_row(idx, d_separator); }
+};
+
 }  // namespace
 
 std::unique_ptr<column> concatenate(table_view const& strings_columns,
                                     string_scalar const& separator,
                                     string_scalar const& narep,
+                                    separator_on_nulls separate_nulls,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
   auto const num_columns = strings_columns.num_columns();
-  CUDF_EXPECTS(num_columns > 0, "At least one column must be specified");
+  CUDF_EXPECTS(num_columns > 1, "At least two columns must be specified");
   // check all columns are of type string
   CUDF_EXPECTS(std::all_of(strings_columns.begin(),
                            strings_columns.end(),
                            [](auto c) { return c.type().id() == type_id::STRING; }),
                "All columns must be of type string");
-  if (num_columns == 1)  // single strings column returns a copy
-    return std::make_unique<column>(*(strings_columns.begin()), stream, mr);
   auto const strings_count = strings_columns.num_rows();
   if (strings_count == 0)  // empty begets empty
     return detail::make_empty_strings_column(stream, mr);
@@ -112,7 +138,7 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
 
   // Create device views from the strings columns.
   auto d_table = table_device_view::create(strings_columns, stream);
-  concat_strings_fn fn{*d_table, d_separator, d_narep};
+  concat_strings_fn fn{*d_table, d_separator, d_narep, separate_nulls};
   auto children = make_strings_children(fn, strings_count, stream, mr);
 
   // create resulting null mask
@@ -120,9 +146,9 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
     thrust::make_counting_iterator<size_type>(0),
     thrust::make_counting_iterator<size_type>(strings_count),
     [d_table = *d_table, d_narep] __device__(size_type idx) {
-      bool null_element = thrust::any_of(
+      if (d_narep.is_valid()) return true;
+      return !thrust::any_of(
         thrust::seq, d_table.begin(), d_table.end(), [idx](auto col) { return col.is_null(idx); });
-      return (!null_element || d_narep.is_valid());
     },
     stream,
     mr);
@@ -145,68 +171,42 @@ namespace {
  * when a separator row is null `d_separator_narep`. The `d_narep` is
  * used in place of a null entry in the strings columns.
  */
-struct multi_separator_concat_fn {
-  table_device_view const d_table;
+struct multi_separator_concat_fn : concat_strings_base {
   column_device_view const d_separators;
   string_scalar_device_view const d_separator_narep;
-  string_scalar_device_view const d_narep;
-  offset_type* d_offsets{};
-  char* d_chars{};
 
-  __device__ void operator()(size_type idx)
+  multi_separator_concat_fn(table_device_view const& d_table,
+                            column_device_view const& d_separators,
+                            string_scalar_device_view const& d_separator_narep,
+                            string_scalar_device_view const& d_narep,
+                            separator_on_nulls separate_nulls)
+    : concat_strings_base{d_table, d_narep, separate_nulls},
+      d_separators(d_separators),
+      d_separator_narep(d_separator_narep)
   {
-    bool const all_nulls =
-      thrust::all_of(thrust::seq, d_table.begin(), d_table.end(), [idx](auto const& col) {
-        return col.is_null(idx);
-      });
+  }
 
-    if ((d_separators.is_null(idx) && !d_separator_narep.is_valid()) ||
-        (all_nulls && !d_narep.is_valid())) {
+  __device__ void operator()(size_type idx)
+  {
+    if (d_separators.is_null(idx) && !d_separator_narep.is_valid()) {
       if (!d_chars) d_offsets[idx] = 0;
       return;
     }
 
-    // point to output location
-    char* d_buffer    = d_chars ? d_chars + d_offsets[idx] : nullptr;
-    offset_type bytes = 0;
-
-    // there is at least one non-null column value
     auto const d_separator = d_separators.is_valid(idx) ? d_separators.element<string_view>(idx)
                                                         : d_separator_narep.value();
-    auto const d_null_rep = d_narep.is_valid() ? d_narep.value() : string_view{};
-
-    // write output entry for this row
-    bool colval_written = false;  // state variable for writing separators
-    for (auto const d_column : d_table) {
-      // if the row is null and if there is no replacement, skip it
-      if (d_column.is_null(idx) && !d_narep.is_valid()) continue;
-
-      // separator in this row is written only after the first output
-      if (colval_written) {
-        if (d_buffer) d_buffer = detail::copy_string(d_buffer, d_separator);
-        bytes += d_separator.size_bytes();
-      }
-
-      // write out column's row data (or narep if the row is null)
-      string_view const d_str =
-        d_column.is_null(idx) ? d_null_rep : d_column.element<string_view>(idx);
-      if (d_buffer) d_buffer = detail::copy_string(d_buffer, d_str);
-      bytes += d_str.size_bytes();
-
-      // column's string or narep could by empty so we need this flag
-      // to know we got this far even if no actual bytes were copied
-      colval_written = true;  // use the separator before the next column
-    }
-
-    if (!d_chars) d_offsets[idx] = bytes;
+    // base class utility function handles the rest
+    process_row(idx, d_separator);
   }
 };
+
 }  // namespace
 
 std::unique_ptr<column> concatenate(table_view const& strings_columns,
                                     strings_column_view const& separators,
                                     string_scalar const& separator_narep,
                                     string_scalar const& col_narep,
+                                    separator_on_nulls separate_nulls,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
@@ -234,20 +234,19 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
   // Create device views from the strings columns.
   auto d_table = table_device_view::create(strings_columns, stream);
 
-  multi_separator_concat_fn mscf{*d_table, separator_col_view, separator_rep, col_rep};
+  multi_separator_concat_fn mscf{
+    *d_table, separator_col_view, separator_rep, col_rep, separate_nulls};
   auto children = make_strings_children(mscf, strings_count, stream, mr);
 
   // Create resulting null mask
   auto [null_mask, null_count] = cudf::detail::valid_if(
     thrust::make_counting_iterator<size_type>(0),
     thrust::make_counting_iterator<size_type>(strings_count),
-    [d_table = *d_table, separator_col_view, separator_rep, col_rep] __device__(size_type ridx) {
-      if (!separator_col_view.is_valid(ridx) && !separator_rep.is_valid()) return false;
-      bool all_nulls =
-        thrust::all_of(thrust::seq, d_table.begin(), d_table.end(), [ridx](auto const& col) {
-          return col.is_null(ridx);
-        });
-      return all_nulls ? col_rep.is_valid() : true;
+    [d_table = *d_table, separator_col_view, separator_rep, col_rep] __device__(size_type idx) {
+      if (!separator_col_view.is_valid(idx) && !separator_rep.is_valid()) return false;
+      if (col_rep.is_valid()) return true;
+      return !thrust::any_of(
+        thrust::seq, d_table.begin(), d_table.end(), [idx](auto col) { return col.is_null(idx); });
     },
     stream,
     mr);
@@ -268,21 +267,29 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
 std::unique_ptr<column> concatenate(table_view const& strings_columns,
                                     string_scalar const& separator,
                                     string_scalar const& narep,
+                                    separator_on_nulls separate_nulls,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::concatenate(strings_columns, separator, narep, rmm::cuda_stream_default, mr);
+  return detail::concatenate(
+    strings_columns, separator, narep, separate_nulls, rmm::cuda_stream_default, mr);
 }
 
 std::unique_ptr<column> concatenate(table_view const& strings_columns,
                                     strings_column_view const& separators,
                                     string_scalar const& separator_narep,
                                     string_scalar const& col_narep,
+                                    separator_on_nulls separate_nulls,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::concatenate(
-    strings_columns, separators, separator_narep, col_narep, rmm::cuda_stream_default, mr);
+  return detail::concatenate(strings_columns,
+                             separators,
+                             separator_narep,
+                             col_narep,
+                             separate_nulls,
+                             rmm::cuda_stream_default,
+                             mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/combine/concatenate_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu
similarity index 64%
rename from cpp/src/strings/combine/concatenate_list_elements.cu
rename to cpp/src/strings/combine/join_list_elements.cu
index 1157b8f3fce..7a83097566c 100644
--- a/cpp/src/strings/combine/concatenate_list_elements.cu
+++ b/cpp/src/strings/combine/join_list_elements.cu
@@ -20,6 +20,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
+#include <cudf/strings/combine.hpp>
 #include <cudf/strings/detail/utilities.cuh>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -51,6 +52,7 @@ struct compute_size_and_concatenate_fn {
   offset_type const* const list_offsets;
   column_device_view const strings_dv;
   string_scalar_device_view const string_narep_dv;
+  separator_on_nulls const separate_nulls;
 
   offset_type* d_offsets{nullptr};
 
@@ -72,33 +74,38 @@ struct compute_size_and_concatenate_fn {
       return;
     }
 
-    auto const separator      = func.separator(idx);
-    auto const separator_size = separator.size_bytes();
-    auto size_bytes           = size_type{0};
-    bool written              = false;
-    char* output_ptr          = d_chars ? d_chars + d_offsets[idx] : nullptr;
+    auto const separator = func.separator(idx);
+    auto size_bytes      = size_type{0};
+    char* output_ptr     = d_chars ? d_chars + d_offsets[idx] : nullptr;
+    bool write_separator = false;
 
     for (size_type str_idx = list_offsets[idx], idx_end = list_offsets[idx + 1]; str_idx < idx_end;
          ++str_idx) {
-      if (not d_chars and (strings_dv.is_null(str_idx) and not string_narep_dv.is_valid())) {
+      bool null_element = strings_dv.is_null(str_idx);
+
+      if (not d_chars and (null_element and not string_narep_dv.is_valid())) {
         d_offsets[idx]    = 0;
         d_validities[idx] = false;
         return;  // early termination: the entire list of strings will result in a null string
       }
-      auto const d_str = strings_dv.is_null(str_idx) ? string_narep_dv.value()
-                                                     : strings_dv.element<string_view>(str_idx);
-      size_bytes += separator_size + d_str.size_bytes();
-      if (output_ptr) {
-        // Separator is inserted only in between strings
-        if (written) { output_ptr = detail::copy_string(output_ptr, separator); }
-        output_ptr = detail::copy_string(output_ptr, d_str);
-        written    = true;
+
+      if (write_separator && (separate_nulls == separator_on_nulls::YES || !null_element)) {
+        if (output_ptr) output_ptr = detail::copy_string(output_ptr, separator);
+        size_bytes += separator.size_bytes();
+        write_separator = false;
       }
+
+      auto const d_str =
+        null_element ? string_narep_dv.value() : strings_dv.element<string_view>(str_idx);
+      if (output_ptr) output_ptr = detail::copy_string(output_ptr, d_str);
+      size_bytes += d_str.size_bytes();
+
+      write_separator =
+        write_separator || (separate_nulls == separator_on_nulls::YES) || !null_element;
     }
 
-    // Separator is inserted only in between strings
     if (not d_chars) {
-      d_offsets[idx]    = static_cast<size_type>(size_bytes - separator_size);
+      d_offsets[idx]    = size_bytes;
       d_validities[idx] = true;
     }
   }
@@ -123,11 +130,12 @@ struct scalar_separator_fn {
 
 }  // namespace
 
-std::unique_ptr<column> concatenate_list_elements(lists_column_view const& lists_strings_column,
-                                                  string_scalar const& separator,
-                                                  string_scalar const& narep,
-                                                  rmm::cuda_stream_view stream,
-                                                  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> join_list_elements(lists_column_view const& lists_strings_column,
+                                           string_scalar const& separator,
+                                           string_scalar const& narep,
+                                           separator_on_nulls separate_nulls,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(lists_strings_column.child().type().id() == type_id::STRING,
                "The input column must be a column of lists of strings");
@@ -146,14 +154,14 @@ std::unique_ptr<column> concatenate_list_elements(lists_column_view const& lists
   auto const sep_dv          = get_scalar_device_view(const_cast<string_scalar&>(separator));
   auto const string_narep_dv = get_scalar_device_view(const_cast<string_scalar&>(narep));
 
-  auto const func    = scalar_separator_fn{sep_dv};
-  auto const comp_fn = compute_size_and_concatenate_fn<decltype(func)>{
-    func,
-    *lists_dv_ptr,
-    lists_strings_column.offsets_begin(),
-    *strings_dv_ptr,
-    string_narep_dv,
-  };
+  auto const func = scalar_separator_fn{sep_dv};
+  auto const comp_fn =
+    compute_size_and_concatenate_fn<decltype(func)>{func,
+                                                    *lists_dv_ptr,
+                                                    lists_strings_column.offsets_begin(),
+                                                    *strings_dv_ptr,
+                                                    string_narep_dv,
+                                                    separate_nulls};
   auto [offsets_column, chars_column, null_mask, null_count] =
     make_strings_children_with_null_mask(comp_fn, num_rows, num_rows, stream, mr);
 
@@ -191,12 +199,13 @@ struct column_separators_fn {
 
 }  // namespace
 
-std::unique_ptr<column> concatenate_list_elements(lists_column_view const& lists_strings_column,
-                                                  strings_column_view const& separators,
-                                                  string_scalar const& separator_narep,
-                                                  string_scalar const& string_narep,
-                                                  rmm::cuda_stream_view stream,
-                                                  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> join_list_elements(lists_column_view const& lists_strings_column,
+                                           strings_column_view const& separators,
+                                           string_scalar const& separator_narep,
+                                           string_scalar const& string_narep,
+                                           separator_on_nulls separate_nulls,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(lists_strings_column.child().type().id() == type_id::STRING,
                "The input column must be a column of lists of strings");
@@ -217,14 +226,14 @@ std::unique_ptr<column> concatenate_list_elements(lists_column_view const& lists
   auto const sep_dv_ptr      = column_device_view::create(separators.parent(), stream);
   auto const sep_narep_dv    = get_scalar_device_view(const_cast<string_scalar&>(separator_narep));
 
-  auto const func    = column_separators_fn{*sep_dv_ptr, sep_narep_dv};
-  auto const comp_fn = compute_size_and_concatenate_fn<decltype(func)>{
-    func,
-    *lists_dv_ptr,
-    lists_strings_column.offsets_begin(),
-    *strings_dv_ptr,
-    string_narep_dv,
-  };
+  auto const func = column_separators_fn{*sep_dv_ptr, sep_narep_dv};
+  auto const comp_fn =
+    compute_size_and_concatenate_fn<decltype(func)>{func,
+                                                    *lists_dv_ptr,
+                                                    lists_strings_column.offsets_begin(),
+                                                    *strings_dv_ptr,
+                                                    string_narep_dv,
+                                                    separate_nulls};
   auto [offsets_column, chars_column, null_mask, null_count] =
     make_strings_children_with_null_mask(comp_fn, num_rows, num_rows, stream, mr);
 
@@ -239,25 +248,32 @@ std::unique_ptr<column> concatenate_list_elements(lists_column_view const& lists
 
 }  // namespace detail
 
-std::unique_ptr<column> concatenate_list_elements(lists_column_view const& lists_strings_column,
-                                                  string_scalar const& separator,
-                                                  string_scalar const& narep,
-                                                  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> join_list_elements(lists_column_view const& lists_strings_column,
+                                           string_scalar const& separator,
+                                           string_scalar const& narep,
+                                           separator_on_nulls separate_nulls,
+                                           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::concatenate_list_elements(
-    lists_strings_column, separator, narep, rmm::cuda_stream_default, mr);
+  return detail::join_list_elements(
+    lists_strings_column, separator, narep, separate_nulls, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<column> concatenate_list_elements(lists_column_view const& lists_strings_column,
-                                                  strings_column_view const& separators,
-                                                  string_scalar const& separator_narep,
-                                                  string_scalar const& string_narep,
-                                                  rmm::mr::device_memory_resource* mr)
+std::unique_ptr<column> join_list_elements(lists_column_view const& lists_strings_column,
+                                           strings_column_view const& separators,
+                                           string_scalar const& separator_narep,
+                                           string_scalar const& string_narep,
+                                           separator_on_nulls separate_nulls,
+                                           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::concatenate_list_elements(
-    lists_strings_column, separators, separator_narep, string_narep, rmm::cuda_stream_default, mr);
+  return detail::join_list_elements(lists_strings_column,
+                                    separators,
+                                    separator_narep,
+                                    string_narep,
+                                    separate_nulls,
+                                    rmm::cuda_stream_default,
+                                    mr);
 }
 
 }  // namespace strings
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index d87b4b81bdc..bbcfd69a52b 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -328,8 +328,8 @@ ConfigureTest(STRINGS_TEST
     strings/booleans_tests.cpp
     strings/case_tests.cpp
     strings/chars_types_tests.cpp
-    strings/combine/concatenate_list_elements_tests.cpp
     strings/combine/concatenate_tests.cpp
+    strings/combine/join_list_elements_tests.cpp
     strings/combine/join_strings_tests.cpp
     strings/concatenate_tests.cpp
     strings/contains_tests.cpp
@@ -407,7 +407,8 @@ ConfigureTest(AST_TEST ast/transform_tests.cpp)
 ###################################################################################################
 # - lists tests ----------------------------------------------------------------------------------
 ConfigureTest(LISTS_TEST
-    lists/concatenate_rows_tests.cpp
+    lists/combine/concatenate_list_elements_tests.cpp
+    lists/combine/concatenate_rows_tests.cpp
     lists/contains_tests.cpp
     lists/count_elements_tests.cpp
     lists/drop_list_duplicates_tests.cpp
diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp
index 71f65eedd91..f9e83311b1b 100644
--- a/cpp/tests/column/factories_test.cpp
+++ b/cpp/tests/column/factories_test.cpp
@@ -20,7 +20,9 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
+#include <cudf/null_mask.hpp>
 #include <cudf/scalar/scalar.hpp>
+#include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
@@ -462,6 +464,300 @@ TEST_F(ColumnFactoryTest, DictionaryFromStringScalarError)
   EXPECT_THROW(cudf::make_dictionary_from_scalar(value, 1), cudf::logic_error);
 }
 
+template <typename T>
+class ListsFixedWidthLeafTest : public ColumnFactoryTest {
+};
+
+TYPED_TEST_CASE(ListsFixedWidthLeafTest, cudf::test::FixedWidthTypes);
+
+TYPED_TEST(ListsFixedWidthLeafTest, FromNonNested)
+{
+  using FCW     = cudf::test::fixed_width_column_wrapper<TypeParam>;
+  using LCW     = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
+  using valid_t = std::vector<cudf::valid_type>;
+
+  auto s   = cudf::make_list_scalar(FCW({1, -1, 3}, {1, 0, 1}));
+  auto col = cudf::make_column_from_scalar(*s, 3);
+
+  auto expected = LCW{LCW({1, 2, 3}, valid_t{1, 0, 1}.begin()),
+                      LCW({1, 2, 3}, valid_t{1, 0, 1}.begin()),
+                      LCW({1, 2, 3}, valid_t{1, 0, 1}.begin())};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, expected);
+}
+
+TYPED_TEST(ListsFixedWidthLeafTest, FromNested)
+{
+  using LCW     = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
+  using valid_t = std::vector<cudf::valid_type>;
+
+#define row_data \
+  LCW({LCW({-1, -1, 3}, valid_t{0, 0, 1}.begin()), LCW{}, LCW{}}, valid_t{1, 0, 1}.begin())
+
+  auto s   = cudf::make_list_scalar(row_data);
+  auto col = cudf::make_column_from_scalar(*s, 5);
+
+  auto expected = LCW{row_data, row_data, row_data, row_data, row_data};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, expected);
+
+#undef row_data
+}
+
+template <typename T>
+class ListsDictionaryLeafTest : public ColumnFactoryTest {
+};
+
+TYPED_TEST_CASE(ListsDictionaryLeafTest, cudf::test::FixedWidthTypes);
+
+TYPED_TEST(ListsDictionaryLeafTest, FromNonNested)
+{
+  using DCW      = cudf::test::dictionary_column_wrapper<TypeParam>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+
+  auto s   = cudf::make_list_scalar(DCW({1, 3, -1, 1, 3}, {1, 1, 0, 1, 1}));
+  auto col = cudf::make_column_from_scalar(*s, 2);
+
+  DCW leaf({1, 3, -1, 1, 3, 1, 3, -1, 1, 3}, {1, 1, 0, 1, 1, 1, 1, 0, 1, 1});
+  offset_t offsets{0, 5, 10};
+  auto mask = cudf::create_null_mask(2, cudf::mask_state::UNALLOCATED);
+
+  auto expected = cudf::make_lists_column(2, offsets.release(), leaf.release(), 0, std::move(mask));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, *expected);
+}
+
+TYPED_TEST(ListsDictionaryLeafTest, FromNested)
+{
+  using DCW      = cudf::test::dictionary_column_wrapper<TypeParam>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+
+  DCW leaf({1, 3, -1, 1, 3, 1, 3, -1, 1, 3}, {1, 1, 0, 1, 1, 1, 1, 0, 1, 1});
+  offset_t offsets{0, 3, 3, 6, 6, 10};
+  auto mask = cudf::create_null_mask(5, cudf::mask_state::ALL_VALID);
+  cudf::set_null_mask(static_cast<cudf::bitmask_type *>(mask.data()), 1, 2, false);
+  auto data = cudf::make_lists_column(5, offsets.release(), leaf.release(), 0, std::move(mask));
+
+  auto s   = cudf::make_list_scalar(*data);
+  auto col = cudf::make_column_from_scalar(*s, 3);
+
+  DCW leaf2(
+    {1, 3, -1, 1, 3, 1, 3, -1, 1, 3, 1, 3, -1, 1, 3,
+     1, 3, -1, 1, 3, 1, 3, -1, 1, 3, 1, 3, -1, 1, 3},
+    {1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1});
+  offset_t offsets2{0, 3, 3, 6, 6, 10, 13, 13, 16, 16, 20, 23, 23, 26, 26, 30};
+  auto mask2 = cudf::create_null_mask(15, cudf::mask_state::ALL_VALID);
+  cudf::set_null_mask(static_cast<cudf::bitmask_type *>(mask2.data()), 1, 2, false);
+  cudf::set_null_mask(static_cast<cudf::bitmask_type *>(mask2.data()), 6, 7, false);
+  cudf::set_null_mask(static_cast<cudf::bitmask_type *>(mask2.data()), 11, 12, false);
+  auto nested =
+    cudf::make_lists_column(15, offsets2.release(), leaf2.release(), 3, std::move(mask2));
+
+  offset_t offsets3{0, 5, 10, 15};
+  auto mask3 = cudf::create_null_mask(3, cudf::mask_state::UNALLOCATED);
+  auto expected =
+    cudf::make_lists_column(3, offsets3.release(), std::move(nested), 0, std::move(mask3));
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, *expected);
+}
+
+class ListsStringLeafTest : public ColumnFactoryTest {
+};
+
+TEST_F(ListsStringLeafTest, FromNonNested)
+{
+  using SCW     = cudf::test::strings_column_wrapper;
+  using LCW     = cudf::test::lists_column_wrapper<cudf::string_view>;
+  using valid_t = std::vector<cudf::valid_type>;
+
+  auto s   = cudf::make_list_scalar(SCW({"xx", "", "z"}, {true, false, true}));
+  auto col = cudf::make_column_from_scalar(*s, 4);
+
+  auto expected = LCW{LCW({"xx", "", "z"}, valid_t{1, 0, 1}.begin()),
+                      LCW({"xx", "", "z"}, valid_t{1, 0, 1}.begin()),
+                      LCW({"xx", "", "z"}, valid_t{1, 0, 1}.begin()),
+                      LCW({"xx", "", "z"}, valid_t{1, 0, 1}.begin())};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, expected);
+}
+
+TEST_F(ListsStringLeafTest, FromNested)
+{
+  using LCW     = cudf::test::lists_column_wrapper<cudf::string_view>;
+  using valid_t = std::vector<cudf::valid_type>;
+
+#define row_data                                                              \
+  LCW({LCW{},                                                                 \
+       LCW({"@@", "rapids", "", "四", "ら"}, valid_t{1, 1, 0, 1, 1}.begin()), \
+       LCW{},                                                                 \
+       LCW({"hello", ""}, valid_t{1, 0}.begin())},                            \
+      valid_t{0, 1, 1, 1}.begin())
+
+  auto s = cudf::make_list_scalar(row_data);
+
+  auto col = cudf::make_column_from_scalar(*s, 3);
+
+  auto expected = LCW{row_data, row_data, row_data};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, expected);
+#undef row_data
+}
+
+template <typename T>
+class ListsStructsLeafTest : public ColumnFactoryTest {
+ protected:
+  using SCW = cudf::test::structs_column_wrapper;
+  /**
+   * @brief Create a structs column that contains 3 fields: int, string, List<int>
+   */
+  template <typename MaskIterator>
+  SCW make_test_structs_column(cudf::test::fixed_width_column_wrapper<T> field1,
+                               cudf::test::strings_column_wrapper field2,
+                               cudf::test::lists_column_wrapper<T, int32_t> field3,
+                               MaskIterator mask)
+  {
+    return SCW{{field1, field2, field3}, mask};
+  }
+};
+
+TYPED_TEST_CASE(ListsStructsLeafTest, cudf::test::FixedWidthTypes);
+
+TYPED_TEST(ListsStructsLeafTest, FromNonNested)
+{
+  using LCWinner_t = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
+  using StringCW   = cudf::test::strings_column_wrapper;
+  using offset_t   = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using valid_t    = std::vector<cudf::valid_type>;
+
+  auto data = this->make_test_structs_column(
+    {{1, 3, 5, 2, 4}, {1, 0, 1, 0, 1}},
+    StringCW({"fleur", "flower", "", "花", "はな"}, {true, true, false, true, true}),
+    LCWinner_t({{1, 2}, {}, {4, 5}, {-1}, {}}, valid_t{1, 1, 1, 1, 0}.begin()),
+    valid_t{1, 1, 1, 0, 1}.begin());
+  auto s   = cudf::make_list_scalar(data);
+  auto col = cudf::make_column_from_scalar(*s, 2);
+
+  auto leaf = this->make_test_structs_column(
+    {{1, 3, 5, 2, 4, 1, 3, 5, 2, 4}, {1, 0, 1, 0, 1, 1, 0, 1, 0, 1}},
+    StringCW({"fleur", "flower", "", "花", "はな", "fleur", "flower", "", "花", "はな"},
+             {true, true, false, true, true, true, true, false, true, true}),
+    LCWinner_t({{1, 2}, {}, {4, 5}, {-1}, {}, {1, 2}, {}, {4, 5}, {-1}, {}},
+               valid_t{1, 1, 1, 1, 0, 1, 1, 1, 1, 0}.begin()),
+    valid_t{1, 1, 1, 0, 1, 1, 1, 1, 0, 1}.begin());
+  auto expected = cudf::make_lists_column(2,
+                                          offset_t{0, 5, 10}.release(),
+                                          leaf.release(),
+                                          0,
+                                          cudf::create_null_mask(2, cudf::mask_state::UNALLOCATED));
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, *expected);
+}
+
+TYPED_TEST(ListsStructsLeafTest, FromNested)
+{
+  using LCWinner_t = cudf::test::lists_column_wrapper<TypeParam, int32_t>;
+  using StringCW   = cudf::test::strings_column_wrapper;
+  using offset_t   = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  using valid_t    = std::vector<cudf::valid_type>;
+  auto leaf        = this->make_test_structs_column(
+    {{1, 2}, {0, 1}},
+    StringCW({"étoile", "星"}, {true, true}),
+    LCWinner_t({LCWinner_t{}, LCWinner_t{42}}, valid_t{1, 1}.begin()),
+    valid_t{0, 1}.begin());
+  auto mask = cudf::create_null_mask(3, cudf::mask_state::ALL_VALID);
+  cudf::set_null_mask(static_cast<cudf::bitmask_type *>(mask.data()), 0, 1, false);
+  auto data =
+    cudf::make_lists_column(3, offset_t{0, 0, 1, 2}.release(), leaf.release(), 1, std::move(mask));
+  auto s = cudf::make_list_scalar(*data);
+
+  auto col = cudf::make_column_from_scalar(*s, 3);
+
+  auto leaf2 = this->make_test_structs_column(
+    {{1, 2, 1, 2, 1, 2}, {0, 1, 0, 1, 0, 1}},
+    StringCW({"étoile", "星", "étoile", "星", "étoile", "星"},
+             {true, true, true, true, true, true}),
+    LCWinner_t(
+      {LCWinner_t{}, LCWinner_t{42}, LCWinner_t{}, LCWinner_t{42}, LCWinner_t{}, LCWinner_t{42}},
+      valid_t{1, 1, 1, 1, 1, 1}.begin()),
+    valid_t{0, 1, 0, 1, 0, 1}.begin());
+  auto mask2 = cudf::create_null_mask(9, cudf::mask_state::ALL_VALID);
+  cudf::set_null_mask(static_cast<cudf::bitmask_type *>(mask2.data()), 0, 1, false);
+  cudf::set_null_mask(static_cast<cudf::bitmask_type *>(mask2.data()), 3, 4, false);
+  cudf::set_null_mask(static_cast<cudf::bitmask_type *>(mask2.data()), 6, 7, false);
+  auto data2 = cudf::make_lists_column(
+    9, offset_t{0, 0, 1, 2, 2, 3, 4, 4, 5, 6}.release(), leaf2.release(), 3, std::move(mask2));
+  auto expected = cudf::make_lists_column(3,
+                                          offset_t{0, 3, 6, 9}.release(),
+                                          std::move(data2),
+                                          0,
+                                          cudf::create_null_mask(3, cudf::mask_state::UNALLOCATED));
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*col, *expected);
+}
+
+class ListsZeroLengthColumnTest : public ColumnFactoryTest {
+ protected:
+  using StructsCW = cudf::test::structs_column_wrapper;
+  StructsCW make_test_structs_column(cudf::test::fixed_width_column_wrapper<int32_t> field1,
+                                     cudf::test::strings_column_wrapper field2,
+                                     cudf::test::lists_column_wrapper<int32_t> field3)
+  {
+    return StructsCW{field1, field2, field3};
+  }
+};
+
+TEST_F(ListsZeroLengthColumnTest, MixedTypes)
+{
+  using FCW      = cudf::test::fixed_width_column_wrapper<int32_t>;
+  using StringCW = cudf::test::strings_column_wrapper;
+  using LCW      = cudf::test::lists_column_wrapper<int32_t>;
+  using offset_t = cudf::test::fixed_width_column_wrapper<cudf::offset_type>;
+  {
+    auto s   = cudf::make_list_scalar(FCW{1, 2, 3});
+    auto got = cudf::make_column_from_scalar(*s, 0);
+    auto expected =
+      cudf::make_lists_column(0,
+                              offset_t{}.release(),
+                              FCW{}.release(),
+                              0,
+                              cudf::create_null_mask(0, cudf::mask_state::UNALLOCATED));
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*got, *expected);
+  }
+
+  {
+    auto s      = cudf::make_list_scalar(LCW{LCW{1, 2, 3}, LCW{}, LCW{5, 6}});
+    auto got    = cudf::make_column_from_scalar(*s, 0);
+    auto nested = cudf::make_lists_column(0,
+                                          offset_t{}.release(),
+                                          FCW{}.release(),
+                                          0,
+                                          cudf::create_null_mask(0, cudf::mask_state::UNALLOCATED));
+    auto expected =
+      cudf::make_lists_column(0,
+                              offset_t{}.release(),
+                              std::move(nested),
+                              0,
+                              cudf::create_null_mask(0, cudf::mask_state::UNALLOCATED));
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*got, *expected);
+  }
+
+  {
+    auto s = cudf::make_list_scalar(
+      this->make_test_structs_column({1, 2, 3}, StringCW({"x", "", "y"}), LCW{{5, 6}, {}, {7}}));
+    auto got = cudf::make_column_from_scalar(*s, 0);
+
+    std::vector<std::unique_ptr<cudf::column>> children;
+    children.emplace_back(FCW{}.release());
+    children.emplace_back(StringCW{}.release());
+    children.emplace_back(LCW{}.release());
+    auto nested = cudf::make_structs_column(
+      0, std::move(children), 0, cudf::create_null_mask(0, cudf::mask_state::UNALLOCATED));
+
+    auto expected =
+      cudf::make_lists_column(0,
+                              offset_t{}.release(),
+                              std::move(nested),
+                              0,
+                              cudf::create_null_mask(0, cudf::mask_state::UNALLOCATED));
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*got, *expected);
+  }
+}
+
 void struct_from_scalar(bool is_valid)
 {
   using LCW = cudf::test::lists_column_wrapper<int>;
diff --git a/cpp/tests/groupby/collect_list_tests.cpp b/cpp/tests/groupby/collect_list_tests.cpp
index 7580c1c4e3b..9d2141c913c 100644
--- a/cpp/tests/groupby/collect_list_tests.cpp
+++ b/cpp/tests/groupby/collect_list_tests.cpp
@@ -86,6 +86,21 @@ TYPED_TEST(groupby_collect_list_test, CollectWithNullExclusion)
   test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg));
 }
 
+TYPED_TEST(groupby_collect_list_test, CollectOnEmptyInput)
+{
+  using K = int32_t;
+  using V = TypeParam;
+
+  fixed_width_column_wrapper<K, int32_t> keys{};
+  fixed_width_column_wrapper<V, int32_t> values{};
+
+  fixed_width_column_wrapper<K, int32_t> expect_keys{};
+  lists_column_wrapper<V, int32_t> expect_vals{};
+
+  auto agg = cudf::make_collect_list_aggregation(null_policy::EXCLUDE);
+  test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg));
+}
+
 TYPED_TEST(groupby_collect_list_test, CollectLists)
 {
   using K = int32_t;
@@ -124,6 +139,61 @@ TYPED_TEST(groupby_collect_list_test, CollectListsWithNullExclusion)
   test_single_agg(keys, values, expect_keys, expect_vals, std::move(agg));
 }
 
+TYPED_TEST(groupby_collect_list_test, CollectOnEmptyInputLists)
+{
+  using K = int32_t;
+  using V = TypeParam;
+
+  using LCW = cudf::test::lists_column_wrapper<V, int32_t>;
+
+  auto offsets = data_type{type_to_id<offset_type>()};
+
+  fixed_width_column_wrapper<K, int32_t> keys{};
+  auto values = cudf::make_lists_column(0, make_empty_column(offsets), LCW{}.release(), 0, {});
+
+  fixed_width_column_wrapper<K, int32_t> expect_keys{};
+
+  auto expect_child =
+    cudf::make_lists_column(0, make_empty_column(offsets), LCW{}.release(), 0, {});
+  auto expect_values =
+    cudf::make_lists_column(0, make_empty_column(offsets), std::move(expect_child), 0, {});
+
+  auto agg = cudf::make_collect_list_aggregation();
+  test_single_agg(keys, values->view(), expect_keys, expect_values->view(), std::move(agg));
+}
+
+TYPED_TEST(groupby_collect_list_test, CollectOnEmptyInputListsOfStructs)
+{
+  using K = int32_t;
+  using V = TypeParam;
+
+  using LCW = cudf::test::lists_column_wrapper<V, int32_t>;
+
+  fixed_width_column_wrapper<K, int32_t> keys{};
+  auto struct_child  = LCW{};
+  auto struct_column = structs_column_wrapper{{struct_child}};
+
+  auto values = cudf::make_lists_column(
+    0, make_empty_column(data_type{type_to_id<offset_type>()}), struct_column.release(), 0, {});
+
+  fixed_width_column_wrapper<K, int32_t> expect_keys{};
+
+  auto expect_struct_child  = LCW{};
+  auto expect_struct_column = structs_column_wrapper{{expect_struct_child}};
+
+  auto expect_child =
+    cudf::make_lists_column(0,
+                            make_empty_column(data_type{type_to_id<offset_type>()}),
+                            expect_struct_column.release(),
+                            0,
+                            {});
+  auto expect_values = cudf::make_lists_column(
+    0, make_empty_column(data_type{type_to_id<offset_type>()}), std::move(expect_child), 0, {});
+
+  auto agg = cudf::make_collect_list_aggregation();
+  test_single_agg(keys, values->view(), expect_keys, expect_values->view(), std::move(agg));
+}
+
 TYPED_TEST(groupby_collect_list_test, dictionary)
 {
   using K = int32_t;
diff --git a/cpp/tests/groupby/collect_set_tests.cpp b/cpp/tests/groupby/collect_set_tests.cpp
index ce3a9a49372..d5a881a1993 100644
--- a/cpp/tests/groupby/collect_set_tests.cpp
+++ b/cpp/tests/groupby/collect_set_tests.cpp
@@ -58,8 +58,7 @@ TYPED_TEST_CASE(CollectSetTypedTest, FixedWidthTypesNotBool);
 TYPED_TEST(CollectSetTypedTest, TrivialInput)
 {
   // Empty input
-  // TODO: Enable this test after issue#7611 has been fixed
-  // test_single_agg(COL_K{}, COL_V{}, COL_K{}, COL_V{}, COLLECT_SET);
+  test_single_agg(COL_K{}, COL_V{}, COL_K{}, LCL_V{}, CollectSetTest::collect_set());
 
   // Single key input
   {
diff --git a/cpp/tests/groupby/nth_element_tests.cpp b/cpp/tests/groupby/nth_element_tests.cpp
index ec0265a3023..5630cba09da 100644
--- a/cpp/tests/groupby/nth_element_tests.cpp
+++ b/cpp/tests/groupby/nth_element_tests.cpp
@@ -362,5 +362,45 @@ TEST_F(groupby_nth_element_string_test, dictionary)
     keys, vals, expect_keys, expect_vals->view(), cudf::make_nth_element_aggregation(2));
 }
 
+template <typename T>
+struct groupby_nth_element_lists_test : BaseFixture {
+};
+
+TYPED_TEST_CASE(groupby_nth_element_lists_test, FixedWidthTypesWithoutFixedPoint);
+
+TYPED_TEST(groupby_nth_element_lists_test, Basics)
+{
+  using K = int32_t;
+  using V = TypeParam;
+
+  using lists = cudf::test::lists_column_wrapper<V, int32_t>;
+
+  auto keys   = fixed_width_column_wrapper<K, int32_t>{1, 1, 2, 2, 3, 3};
+  auto values = lists{{1, 2}, {3, 4}, {5, 6, 7}, lists{}, {9, 10}, {11}};
+
+  auto expected_keys   = fixed_width_column_wrapper<K, int32_t>{1, 2, 3};
+  auto expected_values = lists{{1, 2}, {5, 6, 7}, {9, 10}};
+
+  test_single_agg(
+    keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(0));
+}
+
+TYPED_TEST(groupby_nth_element_lists_test, EmptyInput)
+{
+  using K = int32_t;
+  using V = TypeParam;
+
+  using lists = cudf::test::lists_column_wrapper<V, int32_t>;
+
+  auto keys   = fixed_width_column_wrapper<K, int32_t>{};
+  auto values = lists{};
+
+  auto expected_keys   = fixed_width_column_wrapper<K, int32_t>{};
+  auto expected_values = lists{};
+
+  test_single_agg(
+    keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(2));
+}
+
 }  // namespace test
 }  // namespace cudf
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index 6bc08cf24a6..e45b67505ba 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -22,9 +22,11 @@
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
+#include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/io/csv.hpp>
 #include <cudf/io/datasource.hpp>
 #include <cudf/strings/convert/convert_datetime.hpp>
+#include <cudf/strings/convert/convert_fixed_point.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
@@ -61,6 +63,16 @@ using table_view = cudf::table_view;
 auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
   ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
 
+// Base test fixture for tests
+struct CsvWriterTest : public cudf::test::BaseFixture {
+};
+
+template <typename T>
+struct CsvFixedPointWriterTest : public CsvWriterTest {
+};
+
+TYPED_TEST_CASE(CsvFixedPointWriterTest, cudf::test::FixedPointTypes);
+
 // Base test fixture for tests
 struct CsvReaderTest : public cudf::test::BaseFixture {
 };
@@ -307,6 +319,98 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumn)
   expect_column_data_equal(std::vector<TypeParam>(sequence, sequence + num_rows), view.column(0));
 }
 
+TYPED_TEST(CsvFixedPointWriterTest, SingleColumnNegativeScale)
+{
+  std::vector<std::string> reference_strings = {
+    "1.23", "-8.76", "5.43", "-0.12", "0.25", "-0.23", "-0.27", "0.00", "0.00"};
+
+  auto validity = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return (i % 2 == 0) ? true : false; });
+  cudf::test::strings_column_wrapper strings(
+    reference_strings.begin(), reference_strings.end(), validity);
+
+  std::vector<std::string> valid_reference_strings;
+  thrust::copy_if(thrust::host,
+                  reference_strings.begin(),
+                  reference_strings.end(),
+                  thrust::make_counting_iterator(0),
+                  std::back_inserter(valid_reference_strings),
+                  validity.functor());
+  reference_strings = valid_reference_strings;
+
+  using DecimalType = TypeParam;
+  auto input_column = cudf::strings::to_fixed_point(
+    cudf::strings_column_view(strings),
+    cudf::data_type{cudf::type_to_id<DecimalType>(), numeric::scale_type{-2}});
+
+  auto input_table = cudf::table_view{std::vector<cudf::column_view>{*input_column}};
+
+  auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnNegativeScale.csv";
+
+  cudf_io::csv_writer_options writer_options =
+    cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table);
+
+  cudf_io::write_csv(writer_options);
+
+  std::vector<std::string> result_strings;
+  result_strings.reserve(reference_strings.size());
+
+  std::ifstream read_result_file(filepath);
+  assert(read_result_file.is_open());
+
+  std::copy(std::istream_iterator<std::string>(read_result_file),
+            std::istream_iterator<std::string>(),
+            std::back_inserter(result_strings));
+
+  EXPECT_EQ(result_strings, reference_strings);
+}
+
+TYPED_TEST(CsvFixedPointWriterTest, SingleColumnPositiveScale)
+{
+  std::vector<std::string> reference_strings = {
+    "123000", "-876000", "543000", "-12000", "25000", "-23000", "-27000", "0000", "0000"};
+
+  auto validity = cudf::detail::make_counting_transform_iterator(
+    0, [](auto i) { return (i % 2 == 0) ? true : false; });
+  cudf::test::strings_column_wrapper strings(
+    reference_strings.begin(), reference_strings.end(), validity);
+
+  std::vector<std::string> valid_reference_strings;
+  thrust::copy_if(thrust::host,
+                  reference_strings.begin(),
+                  reference_strings.end(),
+                  thrust::make_counting_iterator(0),
+                  std::back_inserter(valid_reference_strings),
+                  validity.functor());
+  reference_strings = valid_reference_strings;
+
+  using DecimalType = TypeParam;
+  auto input_column = cudf::strings::to_fixed_point(
+    cudf::strings_column_view(strings),
+    cudf::data_type{cudf::type_to_id<DecimalType>(), numeric::scale_type{3}});
+
+  auto input_table = cudf::table_view{std::vector<cudf::column_view>{*input_column}};
+
+  auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnPositiveScale.csv";
+
+  cudf_io::csv_writer_options writer_options =
+    cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table);
+
+  cudf_io::write_csv(writer_options);
+
+  std::vector<std::string> result_strings;
+  result_strings.reserve(reference_strings.size());
+
+  std::ifstream read_result_file(filepath);
+  assert(read_result_file.is_open());
+
+  std::copy(std::istream_iterator<std::string>(read_result_file),
+            std::istream_iterator<std::string>(),
+            std::back_inserter(result_strings));
+
+  EXPECT_EQ(result_strings, reference_strings);
+}
+
 TEST_F(CsvReaderTest, MultiColumn)
 {
   constexpr auto num_rows = 10;
diff --git a/cpp/tests/lists/combine/concatenate_list_elements_tests.cpp b/cpp/tests/lists/combine/concatenate_list_elements_tests.cpp
new file mode 100644
index 00000000000..7d79cf4aebe
--- /dev/null
+++ b/cpp/tests/lists/combine/concatenate_list_elements_tests.cpp
@@ -0,0 +1,501 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/lists/combine.hpp>
+
+namespace {
+using StrListsCol = cudf::test::lists_column_wrapper<cudf::string_view>;
+using IntListsCol = cudf::test::lists_column_wrapper<int32_t>;
+using IntCol      = cudf::test::fixed_width_column_wrapper<int32_t>;
+
+constexpr bool print_all{false};  // For debugging
+constexpr int32_t null{0};
+
+template <class T, class... Ts>
+auto build_lists_col(T& list, Ts&... lists)
+{
+  return T(std::initializer_list<T>{std::move(list), std::move(lists)...});
+}
+
+auto all_nulls() { return cudf::test::iterator_all_nulls(); }
+
+auto null_at(cudf::size_type idx) { return cudf::test::iterator_with_null_at(idx); }
+
+auto null_at(std::vector<cudf::size_type> const& indices)
+{
+  return cudf::test::iterator_with_null_at(cudf::host_span<cudf::size_type const>{indices});
+}
+
+}  // namespace
+
+struct ConcatenateListElementsTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(ConcatenateListElementsTest, InvalidInput)
+{
+  // Input lists is not a 2-level depth lists column.
+  {
+    auto const col = IntCol{};
+    EXPECT_THROW(cudf::lists::concatenate_list_elements(col), cudf::logic_error);
+  }
+
+  // Input lists is not at least 2-level depth lists column.
+  {
+    auto const col = IntListsCol{1, 2, 3};
+    EXPECT_THROW(cudf::lists::concatenate_list_elements(col), cudf::logic_error);
+  }
+}
+
+template <typename T>
+struct ConcatenateListElementsTypedTest : public cudf::test::BaseFixture {
+};
+
+using TypesForTest = cudf::test::Concat<cudf::test::IntegralTypesNotBool,
+                                        cudf::test::FloatingPointTypes,
+                                        cudf::test::FixedPointTypes>;
+TYPED_TEST_CASE(ConcatenateListElementsTypedTest, TypesForTest);
+
+TYPED_TEST(ConcatenateListElementsTypedTest, SimpleInputNoNull)
+{
+  using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
+
+  auto row0           = ListsCol{{1, 2}, {3}, {4, 5, 6}};
+  auto row1           = ListsCol{ListsCol{}};
+  auto row2           = ListsCol{{7, 8}, {9, 10}};
+  auto const col      = build_lists_col(row0, row1, row2);
+  auto const results  = cudf::lists::concatenate_list_elements(col);
+  auto const expected = ListsCol{{1, 2, 3, 4, 5, 6}, {}, {7, 8, 9, 10}};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+}
+
+TYPED_TEST(ConcatenateListElementsTypedTest, SimpleInputNestedManyLevelsNoNull)
+{
+  using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
+
+  auto row00 = ListsCol{{1, 2}, {3}, {4, 5, 6}};
+  auto row01 = ListsCol{ListsCol{}};
+  auto row02 = ListsCol{{7, 8}, {9, 10}};
+  auto row0  = build_lists_col(row00, row01, row02);
+
+  auto row10 = ListsCol{{1, 2}, {3}, {4, 5, 6}};
+  auto row11 = ListsCol{ListsCol{}};
+  auto row12 = ListsCol{{7, 8}, {9, 10}};
+  auto row1  = build_lists_col(row10, row11, row12);
+
+  auto row20 = ListsCol{{1, 2}, {3}, {4, 5, 6}};
+  auto row21 = ListsCol{ListsCol{}};
+  auto row22 = ListsCol{{7, 8}, {9, 10}};
+  auto row2  = build_lists_col(row20, row21, row22);
+
+  auto const col      = build_lists_col(row0, row1, row2);
+  auto const results  = cudf::lists::concatenate_list_elements(col);
+  auto const expected = ListsCol{ListsCol{{1, 2}, {3}, {4, 5, 6}, {}, {7, 8}, {9, 10}},
+                                 ListsCol{{1, 2}, {3}, {4, 5, 6}, {}, {7, 8}, {9, 10}},
+                                 ListsCol{{1, 2}, {3}, {4, 5, 6}, {}, {7, 8}, {9, 10}}};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+}
+
+TEST_F(ConcatenateListElementsTest, SimpleInputStringsColumnNoNull)
+{
+  auto row0 = StrListsCol{StrListsCol{"Tomato", "Apple"}, StrListsCol{"Orange"}};
+  auto row1 = StrListsCol{StrListsCol{"Banana", "Kiwi", "Cherry"}, StrListsCol{"Lemon", "Peach"}};
+  auto row2 = StrListsCol{StrListsCol{"Coconut"}, StrListsCol{}};
+  auto const col      = build_lists_col(row0, row1, row2);
+  auto const results  = cudf::lists::concatenate_list_elements(col);
+  auto const expected = StrListsCol{StrListsCol{"Tomato", "Apple", "Orange"},
+                                    StrListsCol{"Banana", "Kiwi", "Cherry", "Lemon", "Peach"},
+                                    StrListsCol{"Coconut"}};
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+}
+
+TYPED_TEST(ConcatenateListElementsTypedTest, SimpleInputWithNulls)
+{
+  using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
+  auto row0      = ListsCol{{ListsCol{{1, null, 3, 4}, null_at(1)},
+                        ListsCol{{10, 11, 12, null}, null_at(3)},
+                        ListsCol{} /*NULL*/},
+                       null_at(2)};
+  auto row1      = ListsCol{ListsCol{{null, 2, 3, 4}, null_at(0)},
+                       ListsCol{{13, 14, 15, 16, 17, null}, null_at(5)},
+                       ListsCol{{20, null}, null_at(1)}};
+  auto row2      = ListsCol{{ListsCol{{null, 2, 3, 4}, null_at(0)},
+                        ListsCol{} /*NULL*/,
+                        ListsCol{{null, 21, null, null}, null_at({0, 2, 3})}},
+                       null_at(1)};
+  auto row3      = ListsCol{{ListsCol{} /*NULL*/, ListsCol{{null, 18}, null_at(0)}}, null_at(0)};
+  auto row4      = ListsCol{ListsCol{{1, 2, null, 4}, null_at(2)},
+                       ListsCol{{19, 20, null}, null_at(2)},
+                       ListsCol{22, 23, 24, 25}};
+  auto row5      = ListsCol{ListsCol{{1, 2, 3, null}, null_at(3)},
+                       ListsCol{{null}, null_at(0)},
+                       ListsCol{{null, null, null, null, null}, all_nulls()}};
+  auto row6 =
+    ListsCol{{ListsCol{} /*NULL*/, ListsCol{} /*NULL*/, ListsCol{} /*NULL*/}, all_nulls()};
+  auto const col = build_lists_col(row0, row1, row2, row3, row4, row5, row6);
+
+  // Ignore null list elements.
+  {
+    auto const results = cudf::lists::concatenate_list_elements(col);
+    auto const expected =
+      ListsCol{{ListsCol{{1, null, 3, 4, 10, 11, 12, null}, null_at({1, 7})},
+                ListsCol{{null, 2, 3, 4, 13, 14, 15, 16, 17, null, 20, null}, null_at({0, 9, 11})},
+                ListsCol{{null, 2, 3, 4, null, 21, null, null}, null_at({0, 4, 6, 7})},
+                ListsCol{{null, 18}, null_at(0)},
+                ListsCol{{1, 2, null, 4, 19, 20, null, 22, 23, 24, 25}, null_at({2, 6})},
+                ListsCol{{1, 2, 3, null, null, null, null, null, null, null},
+                         null_at({3, 4, 5, 6, 7, 8, 9})},
+                ListsCol{} /*NULL*/},
+               null_at(6)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+
+  // Null lists result in null rows.
+  {
+    auto const results = cudf::lists::concatenate_list_elements(
+      col, cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW);
+    auto const expected =
+      ListsCol{{ListsCol{} /*NULL*/,
+                ListsCol{{null, 2, 3, 4, 13, 14, 15, 16, 17, null, 20, null}, null_at({0, 9, 11})},
+                ListsCol{} /*NULL*/,
+                ListsCol{} /*NULL*/,
+                ListsCol{{1, 2, null, 4, 19, 20, null, 22, 23, 24, 25}, null_at({2, 6})},
+                ListsCol{{1, 2, 3, null, null, null, null, null, null, null},
+                         null_at({3, 4, 5, 6, 7, 8, 9})},
+                ListsCol{} /*NULL*/},
+               null_at({0, 2, 3, 6})};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+}
+
+TYPED_TEST(ConcatenateListElementsTypedTest, SimpleInputNestedManyLevelsWithNulls)
+{
+  using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
+
+  auto row00 = ListsCol{{1, 2}, {3}, {4, 5, 6}};
+  auto row01 = ListsCol{ListsCol{}}; /*NULL*/
+  auto row02 = ListsCol{{7, 8}, {9, 10}};
+  auto row0  = ListsCol{{std::move(row00), std::move(row01), std::move(row02)}, null_at(1)};
+
+  auto row10 = ListsCol{{{1, 2}, {3}, {4, 5, 6} /*NULL*/}, null_at(2)};
+  auto row11 = ListsCol{ListsCol{}};
+  auto row12 = ListsCol{{7, 8}, {9, 10}};
+  auto row1  = build_lists_col(row10, row11, row12);
+
+  auto row20 = ListsCol{{1, 2}, {3}, {4, 5, 6}};
+  auto row21 = ListsCol{ListsCol{}};
+  auto row22 = ListsCol{ListsCol{{null, 8}, null_at(0)}, {9, 10}};
+  auto row2  = build_lists_col(row20, row21, row22);
+
+  auto const col = build_lists_col(row0, row1, row2);
+
+  // Ignore null list elements.
+  {
+    auto const results = cudf::lists::concatenate_list_elements(col);
+    auto const expected =
+      ListsCol{ListsCol{{1, 2}, {3}, {4, 5, 6}, {7, 8}, {9, 10}},
+               ListsCol{{{1, 2}, {3}, {} /*NULL*/, {}, {7, 8}, {9, 10}}, null_at(2)},
+               ListsCol{{1, 2}, {3}, {4, 5, 6}, {}, ListsCol{{null, 8}, null_at(0)}, {9, 10}}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+
+  // Null lists result in null rows.
+  {
+    auto const results = cudf::lists::concatenate_list_elements(
+      col, cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW);
+    auto const expected =
+      ListsCol{{ListsCol{ListsCol{}}, /*NULL*/
+                ListsCol{{{1, 2}, {3}, {} /*NULL*/, {}, {7, 8}, {9, 10}}, null_at(2)},
+                ListsCol{{1, 2}, {3}, {4, 5, 6}, {}, ListsCol{{null, 8}, null_at(0)}, {9, 10}}},
+               null_at(0)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+}
+
+TEST_F(ConcatenateListElementsTest, SimpleInputStringsColumnWithNulls)
+{
+  auto row0 = StrListsCol{
+    StrListsCol{{"Tomato", "Bear" /*NULL*/, "Apple"}, null_at(1)},
+    StrListsCol{{"Orange", "Dog" /*NULL*/, "Fox" /*NULL*/, "Duck" /*NULL*/}, null_at({1, 2, 3})}};
+  auto row1 = StrListsCol{
+    StrListsCol{{"Banana", "Pig" /*NULL*/, "Kiwi", "Cherry", "Whale" /*NULL*/}, null_at({1, 4})},
+    StrListsCol{"Lemon", "Peach"}};
+  auto row2      = StrListsCol{{StrListsCol{"Coconut"}, StrListsCol{} /*NULL*/}, null_at(1)};
+  auto const col = build_lists_col(row0, row1, row2);
+
+  // Ignore null list elements.
+  {
+    auto const results  = cudf::lists::concatenate_list_elements(col);
+    auto const expected = StrListsCol{
+      StrListsCol{{"Tomato", "" /*NULL*/, "Apple", "Orange", "" /*NULL*/, "" /*NULL*/, ""
+                   /*NULL*/},
+                  null_at({1, 4, 5, 6})},
+      StrListsCol{{"Banana", "" /*NULL*/, "Kiwi", "Cherry", "" /*NULL*/, "Lemon", "Peach"},
+                  null_at({1, 4})},
+      StrListsCol{"Coconut"}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+
+  // Null lists result in null rows.
+  {
+    auto const results = cudf::lists::concatenate_list_elements(
+      col, cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW);
+    auto const expected = StrListsCol{
+      {StrListsCol{
+         {"Tomato", "" /*NULL*/, "Apple", "Orange", "" /*NULL*/, "" /*NULL*/, "" /*NULL*/},
+         null_at({1, 4, 5, 6})},
+       StrListsCol{{"Banana", "" /*NULL*/, "Kiwi", "Cherry", "" /*NULL*/, "Lemon", "Peach"},
+                   null_at({1, 4})},
+       StrListsCol{} /*NULL*/},
+      null_at(2)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+}
+TEST_F(ConcatenateListElementsTest, SimpleInputStringsColumnWithEmptyStringsAndNulls)
+{
+  auto row0 =
+    StrListsCol{StrListsCol{"", "", ""},
+                StrListsCol{{"Orange", "" /*NULL*/, "" /*NULL*/, "" /*NULL*/}, null_at({1, 2, 3})}};
+  auto row1 = StrListsCol{
+    StrListsCol{{"Banana", "" /*NULL*/, "Kiwi", "Cherry", "" /*NULL*/}, null_at({1, 4})},
+    StrListsCol{""}};
+  auto row2      = StrListsCol{{StrListsCol{"Coconut"}, StrListsCol{} /*NULL*/}, null_at(1)};
+  auto const col = build_lists_col(row0, row1, row2);
+
+  // Ignore null list elements.
+  {
+    auto const results  = cudf::lists::concatenate_list_elements(col);
+    auto const expected = StrListsCol{
+      StrListsCol{{"", "", "", "Orange", "" /*NULL*/, "" /*NULL*/, "" /*NULL*/},
+                  null_at({4, 5, 6})},
+      StrListsCol{{"Banana", "" /*NULL*/, "Kiwi", "Cherry", "" /*NULL*/, ""}, null_at({1, 4})},
+      StrListsCol{"Coconut"}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+
+  // Null lists result in null rows.
+  {
+    auto const results = cudf::lists::concatenate_list_elements(
+      col, cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW);
+    auto const expected = StrListsCol{
+      {StrListsCol{{"", "", "", "Orange", "" /*NULL*/, "" /*NULL*/, "" /*NULL*/},
+                   null_at({4, 5, 6})},
+       StrListsCol{{"Banana", "" /*NULL*/, "Kiwi", "Cherry", "" /*NULL*/, ""}, null_at({1, 4})},
+       StrListsCol{} /*NULL*/},
+      null_at(2)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+}
+
+TYPED_TEST(ConcatenateListElementsTypedTest, SlicedColumnsInputNoNull)
+{
+  using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
+
+  auto const col_original = ListsCol{ListsCol{{1, 2, 3}, {2, 3}},
+                                     ListsCol{{3, 4, 5, 6}, {5, 6}, {}, {7}},
+                                     ListsCol{{7, 7, 7}, {7, 8, 1, 0}, {1}},
+                                     ListsCol{{9, 10, 11}},
+                                     ListsCol{},
+                                     ListsCol{{12, 13, 14, 15}, {16}, {17}}};
+
+  {
+    auto const col     = cudf::slice(col_original, {0, 3})[0];
+    auto const results = cudf::lists::concatenate_list_elements(col);
+    auto const expected =
+      ListsCol{{1, 2, 3, 2, 3}, {3, 4, 5, 6, 5, 6, 7}, {7, 7, 7, 7, 8, 1, 0, 1}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+  {
+    auto const col      = cudf::slice(col_original, {1, 4})[0];
+    auto const results  = cudf::lists::concatenate_list_elements(col);
+    auto const expected = ListsCol{{3, 4, 5, 6, 5, 6, 7}, {7, 7, 7, 7, 8, 1, 0, 1}, {9, 10, 11}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+  {
+    auto const col      = cudf::slice(col_original, {2, 5})[0];
+    auto const results  = cudf::lists::concatenate_list_elements(col);
+    auto const expected = ListsCol{{7, 7, 7, 7, 8, 1, 0, 1}, {9, 10, 11}, {}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+  {
+    auto const col      = cudf::slice(col_original, {3, 6})[0];
+    auto const results  = cudf::lists::concatenate_list_elements(col);
+    auto const expected = ListsCol{{9, 10, 11}, {}, {12, 13, 14, 15, 16, 17}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+}
+
+TYPED_TEST(ConcatenateListElementsTypedTest, SlicedColumnsInputWithNulls)
+{
+  using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;
+
+  auto row0 = ListsCol{ListsCol{{null, 2, 3}, null_at(0)}, ListsCol{2, 3}};
+  auto row1 = ListsCol{ListsCol{{3, null, null, 6}, null_at({1, 2})},
+                       ListsCol{{5, 6, null}, null_at(2)},
+                       ListsCol{},
+                       ListsCol{{7, null}, null_at(1)}};
+  auto row2 = ListsCol{ListsCol{7, 7, 7}, ListsCol{{7, 8, null, 0}, null_at(2)}, ListsCol{1}};
+  auto row3 = ListsCol{ListsCol{9, 10, 11}};
+  auto row4 = ListsCol{ListsCol{}};
+  auto row5 = ListsCol{ListsCol{{12, null, 14, 15}, null_at(1)}, ListsCol{16}, ListsCol{17}};
+  auto const col_original = build_lists_col(row0, row1, row2, row3, row4, row5);
+
+  {
+    auto const col     = cudf::slice(col_original, {0, 3})[0];
+    auto const results = cudf::lists::concatenate_list_elements(col);
+    auto const expected =
+      ListsCol{ListsCol{{null, 2, 3, 2, 3}, null_at(0)},
+               ListsCol{{3, null, null, 6, 5, 6, null, 7, null}, null_at({1, 2, 6, 8})},
+               ListsCol{{7, 7, 7, 7, 8, null, 0, 1}, null_at(5)}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+  {
+    auto const col     = cudf::slice(col_original, {1, 4})[0];
+    auto const results = cudf::lists::concatenate_list_elements(col);
+    auto const expected =
+      ListsCol{ListsCol{{3, null, null, 6, 5, 6, null, 7, null}, null_at({1, 2, 6, 8})},
+               ListsCol{{7, 7, 7, 7, 8, null, 0, 1}, null_at(5)},
+               ListsCol{9, 10, 11}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+  {
+    auto const col     = cudf::slice(col_original, {2, 5})[0];
+    auto const results = cudf::lists::concatenate_list_elements(col);
+    auto const expected =
+      ListsCol{ListsCol{{7, 7, 7, 7, 8, null, 0, 1}, null_at(5)}, ListsCol{9, 10, 11}, ListsCol{}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+  {
+    auto const col     = cudf::slice(col_original, {3, 6})[0];
+    auto const results = cudf::lists::concatenate_list_elements(col);
+    auto const expected =
+      ListsCol{ListsCol{9, 10, 11}, ListsCol{}, ListsCol{{12, null, 14, 15, 16, 17}, null_at(1)}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+}
+
+TEST_F(ConcatenateListElementsTest, SlicedStringsColumnsInputWithNulls)
+{
+  auto row0 = StrListsCol{
+    StrListsCol{{"Tomato", "Bear" /*NULL*/, "Apple"}, null_at(1)},
+    StrListsCol{{"Banana", "Pig" /*NULL*/, "Kiwi", "Cherry", "Whale" /*NULL*/}, null_at({1, 4})},
+    StrListsCol{"Coconut"}};
+  auto row1 = StrListsCol{
+    StrListsCol{{"Banana", "Pig" /*NULL*/, "Kiwi", "Cherry", "Whale" /*NULL*/}, null_at({1, 4})},
+    StrListsCol{"Coconut"},
+    StrListsCol{{"Orange", "Dog" /*NULL*/, "Fox" /*NULL*/, "Duck" /*NULL*/}, null_at({1, 2, 3})}};
+  auto row2 = StrListsCol{
+    StrListsCol{"Coconut"},
+    StrListsCol{{"Orange", "Dog" /*NULL*/, "Fox" /*NULL*/, "Duck" /*NULL*/}, null_at({1, 2, 3})},
+    StrListsCol{"Lemon", "Peach"}};
+  auto row3 = StrListsCol{
+    {StrListsCol{{"Orange", "Dog" /*NULL*/, "Fox" /*NULL*/, "Duck" /*NULL*/}, null_at({1, 2, 3})},
+     StrListsCol{"Lemon", "Peach"},
+     StrListsCol{} /*NULL*/},
+    null_at(2)};
+  auto const col_original = build_lists_col(row0, row1, row2, row3);
+
+  {
+    auto const col      = cudf::slice(col_original, {0, 2})[0];
+    auto const results  = cudf::lists::concatenate_list_elements(col);
+    auto const expected = StrListsCol{StrListsCol{{"Tomato",
+                                                   "" /*NULL*/,
+                                                   "Apple",
+                                                   "Banana",
+                                                   "" /*NULL*/,
+                                                   "Kiwi",
+                                                   "Cherry",
+                                                   "" /*NULL*/,
+                                                   "Coconut"},
+                                                  null_at({1, 4, 7})},
+                                      StrListsCol{{"Banana",
+                                                   "" /*NULL*/,
+                                                   "Kiwi",
+                                                   "Cherry",
+                                                   "" /*NULL*/,
+                                                   "Coconut",
+                                                   "Orange",
+                                                   "" /*NULL*/,
+                                                   "" /*NULL*/,
+                                                   "" /*NULL*/},
+                                                  null_at({1, 4, 7, 8, 9})}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+  {
+    auto const col      = cudf::slice(col_original, {1, 3})[0];
+    auto const results  = cudf::lists::concatenate_list_elements(col);
+    auto const expected = StrListsCol{StrListsCol{{"Banana",
+                                                   "" /*NULL*/,
+                                                   "Kiwi",
+                                                   "Cherry",
+                                                   "" /*NULL*/,
+                                                   "Coconut",
+                                                   "Orange",
+                                                   "" /*NULL*/,
+                                                   "" /*NULL*/,
+                                                   "" /*NULL*/},
+                                                  null_at({1, 4, 7, 8, 9})},
+                                      StrListsCol{{"Coconut",
+                                                   "Orange",
+                                                   "" /*NULL*/,
+                                                   "" /*NULL*/,
+                                                   "", /*NULL*/
+                                                   "Lemon",
+                                                   "Peach"},
+                                                  null_at({2, 3, 4})}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+  {
+    auto const col      = cudf::slice(col_original, {2, 4})[0];
+    auto const results  = cudf::lists::concatenate_list_elements(col);
+    auto const expected = StrListsCol{StrListsCol{{"Coconut",
+                                                   "Orange",
+                                                   "" /*NULL*/,
+                                                   "" /*NULL*/,
+                                                   "", /*NULL*/
+                                                   "Lemon",
+                                                   "Peach"},
+                                                  null_at({2, 3, 4})},
+                                      StrListsCol{{"Orange",
+                                                   "" /*NULL*/,
+                                                   "" /*NULL*/,
+                                                   "", /*NULL*/
+                                                   "Lemon",
+                                                   "Peach"},
+                                                  null_at({1, 2, 3})}};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+  {
+    auto const col     = cudf::slice(col_original, {2, 4})[0];
+    auto const results = cudf::lists::concatenate_list_elements(
+      col, cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW);
+    auto const expected = StrListsCol{{StrListsCol{{"Coconut",
+                                                    "Orange",
+                                                    "" /*NULL*/,
+                                                    "" /*NULL*/,
+                                                    "", /*NULL*/
+                                                    "Lemon",
+                                                    "Peach"},
+                                                   null_at({2, 3, 4})},
+                                       StrListsCol{} /*NULL*/},
+                                      null_at(1)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *results, print_all);
+  }
+}
diff --git a/cpp/tests/lists/concatenate_rows_tests.cpp b/cpp/tests/lists/combine/concatenate_rows_tests.cpp
similarity index 94%
rename from cpp/tests/lists/concatenate_rows_tests.cpp
rename to cpp/tests/lists/combine/concatenate_rows_tests.cpp
index 5abaf99f739..af22f329634 100644
--- a/cpp/tests/lists/concatenate_rows_tests.cpp
+++ b/cpp/tests/lists/combine/concatenate_rows_tests.cpp
@@ -19,7 +19,7 @@
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include <cudf/lists/concatenate_rows.hpp>
+#include <cudf/lists/combine.hpp>
 
 namespace {
 using StrListsCol = cudf::test::lists_column_wrapper<cudf::string_view>;
@@ -184,24 +184,27 @@ TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputWithNulls)
                               ListsCol{{null, 2, 3, 4}, null_at(0)},
                               ListsCol{} /*NULL*/,
                               ListsCol{{1, 2, null, 4}, null_at(2)},
-                              ListsCol{{1, 2, 3, null}, null_at(3)}},
-                             null_at(3)}
+                              ListsCol{{1, 2, 3, null}, null_at(3)},
+                              ListsCol{} /*NULL*/},
+                             null_at({3, 6})}
                       .release();
   auto const col2 = ListsCol{{ListsCol{{10, 11, 12, null}, null_at(3)},
                               ListsCol{{13, 14, 15, 16, 17, null}, null_at(5)},
                               ListsCol{} /*NULL*/,
                               ListsCol{{null, 18}, null_at(0)},
                               ListsCol{{19, 20, null}, null_at(2)},
-                              ListsCol{{null}, null_at(0)}},
-                             null_at(2)}
+                              ListsCol{{null}, null_at(0)},
+                              ListsCol{} /*NULL*/},
+                             null_at({2, 6})}
                       .release();
   auto const col3 = ListsCol{{ListsCol{} /*NULL*/,
                               ListsCol{{20, null}, null_at(1)},
                               ListsCol{{null, 21, null, null}, null_at({0, 2, 3})},
                               ListsCol{},
                               ListsCol{22, 23, 24, 25},
-                              ListsCol{{null, null, null, null, null}, all_nulls()}},
-                             null_at(0)}
+                              ListsCol{{null, null, null, null, null}, all_nulls()},
+                              ListsCol{} /*NULL*/},
+                             null_at({0, 6})}
                       .release();
 
   // Ignore null list elements
@@ -209,13 +212,15 @@ TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputWithNulls)
     auto const results =
       cudf::lists::concatenate_rows(TView{{col1->view(), col2->view(), col3->view()}});
     auto const expected =
-      ListsCol{ListsCol{{1, null, 3, 4, 10, 11, 12, null}, null_at({1, 7})},
-               ListsCol{{null, 2, 3, 4, 13, 14, 15, 16, 17, null, 20, null}, null_at({0, 9, 11})},
-               ListsCol{{null, 2, 3, 4, null, 21, null, null}, null_at({0, 4, 6, 7})},
-               ListsCol{{null, 18}, null_at(0)},
-               ListsCol{{1, 2, null, 4, 19, 20, null, 22, 23, 24, 25}, null_at({2, 6})},
-               ListsCol{{1, 2, 3, null, null, null, null, null, null, null},
-                        null_at({3, 4, 5, 6, 7, 8, 9})}}
+      ListsCol{{ListsCol{{1, null, 3, 4, 10, 11, 12, null}, null_at({1, 7})},
+                ListsCol{{null, 2, 3, 4, 13, 14, 15, 16, 17, null, 20, null}, null_at({0, 9, 11})},
+                ListsCol{{null, 2, 3, 4, null, 21, null, null}, null_at({0, 4, 6, 7})},
+                ListsCol{{null, 18}, null_at(0)},
+                ListsCol{{1, 2, null, 4, 19, 20, null, 22, 23, 24, 25}, null_at({2, 6})},
+                ListsCol{{1, 2, 3, null, null, null, null, null, null, null},
+                         null_at({3, 4, 5, 6, 7, 8, 9})},
+                ListsCol{} /*NULL*/},
+               null_at(6)}
         .release();
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
   }
@@ -232,8 +237,9 @@ TYPED_TEST(ListConcatenateRowsTypedTest, SimpleInputWithNulls)
                 ListsCol{} /*NULL*/,
                 ListsCol{{1, 2, null, 4, 19, 20, null, 22, 23, 24, 25}, null_at({2, 6})},
                 ListsCol{{1, 2, 3, null, null, null, null, null, null, null},
-                         null_at({3, 4, 5, 6, 7, 8, 9})}},
-               null_at({0, 2, 3})}
+                         null_at({3, 4, 5, 6, 7, 8, 9})},
+                ListsCol{} /*NULL*/},
+               null_at({0, 2, 3, 6})}
         .release();
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
   }
diff --git a/cpp/tests/partitioning/partition_test.cpp b/cpp/tests/partitioning/partition_test.cpp
index bdd5e7bc780..669d406d80a 100644
--- a/cpp/tests/partitioning/partition_test.cpp
+++ b/cpp/tests/partitioning/partition_test.cpp
@@ -310,3 +310,23 @@ TEST_F(PartitionTestNotTyped, ListOfListOfIntEmpty)
   CUDF_TEST_EXPECT_TABLES_EQUAL(table_to_partition, result.first->view());
   EXPECT_EQ(3, result.second.size());
 }
+
+TEST_F(PartitionTestNotTyped, ListOfListOfListOfIntEmpty)
+{
+  cudf::test::lists_column_wrapper<int32_t> level_3_list{};
+
+  fixed_width_column_wrapper<int32_t> level_2_offsets{};
+  std::unique_ptr<cudf::column> level_2_list =
+    cudf::make_lists_column(0, level_2_offsets.release(), level_3_list.release(), 0, {});
+
+  fixed_width_column_wrapper<int32_t> level_1_offsets{0, 0};
+  std::unique_ptr<cudf::column> level_1_list =
+    cudf::make_lists_column(1, level_1_offsets.release(), std::move(level_2_list), 0, {});
+
+  auto table_to_partition = cudf::table_view{{*level_1_list}};
+  fixed_width_column_wrapper<int32_t> map{0};
+
+  auto result = cudf::partition(table_to_partition, map, 2);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(table_to_partition, result.first->view());
+  EXPECT_EQ(3, result.second.size());
+}
diff --git a/cpp/tests/strings/combine/concatenate_tests.cpp b/cpp/tests/strings/combine/concatenate_tests.cpp
index c1c390e8a82..d91f669e42d 100644
--- a/cpp/tests/strings/combine/concatenate_tests.cpp
+++ b/cpp/tests/strings/combine/concatenate_tests.cpp
@@ -95,6 +95,58 @@ TEST_F(StringsCombineTest, Concatenate)
   }
 }
 
+TEST_F(StringsCombineTest, ConcatenateSkipNulls)
+{
+  cudf::test::strings_column_wrapper strings1({"eee", "", "", "", "aa", "bbb", "ééé"},
+                                              {1, 0, 0, 1, 1, 1, 1});
+  cudf::test::strings_column_wrapper strings2({"xyz", "", "d", "éa", "", "", "f"},
+                                              {1, 0, 1, 1, 1, 0, 1});
+  cudf::test::strings_column_wrapper strings3({"q", "", "s", "t", "u", "", "w"},
+                                              {1, 1, 1, 1, 1, 0, 1});
+
+  cudf::table_view table({strings1, strings2, strings3});
+
+  {
+    cudf::test::strings_column_wrapper expected(
+      {"eee+xyz+q", "++", "+d+s", "+éa+t", "aa++u", "bbb++", "ééé+f+w"});
+    auto results = cudf::strings::concatenate(table,
+                                              cudf::string_scalar("+"),
+                                              cudf::string_scalar(""),
+                                              cudf::strings::separator_on_nulls::YES);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  }
+  {
+    cudf::test::strings_column_wrapper expected(
+      {"eee+xyz+q", "", "d+s", "+éa+t", "aa++u", "bbb", "ééé+f+w"});
+    auto results = cudf::strings::concatenate(table,
+                                              cudf::string_scalar("+"),
+                                              cudf::string_scalar(""),
+                                              cudf::strings::separator_on_nulls::NO);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  }
+  {
+    cudf::test::strings_column_wrapper expected(
+      {"eee+xyz+q", "", "", "+éa+t", "aa++u", "", "ééé+f+w"}, {1, 0, 0, 1, 1, 0, 1});
+    auto results = cudf::strings::concatenate(table,
+                                              cudf::string_scalar("+"),
+                                              cudf::string_scalar("", false),
+                                              cudf::strings::separator_on_nulls::NO);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  }
+  {
+    cudf::test::strings_column_wrapper sep_col({"+", "-", ".", "@", "*", "^^", "#"});
+    auto results = cudf::strings::concatenate(table,
+                                              cudf::strings_column_view(sep_col),
+                                              cudf::string_scalar(""),
+                                              cudf::string_scalar(""),
+                                              cudf::strings::separator_on_nulls::NO);
+
+    cudf::test::strings_column_wrapper expected(
+      {"eee+xyz+q", "", "d.s", "@éa@t", "aa**u", "bbb", "ééé#f#w"});
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  }
+}
+
 TEST_F(StringsCombineTest, ConcatZeroSizeStringsColumns)
 {
   cudf::column_view zero_size_strings_column(
@@ -107,6 +159,12 @@ TEST_F(StringsCombineTest, ConcatZeroSizeStringsColumns)
   cudf::test::expect_strings_empty(results->view());
 }
 
+TEST_F(StringsCombineTest, SingleColumnErrorCheck)
+{
+  cudf::column_view col0(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
+  EXPECT_THROW(cudf::strings::concatenate(cudf::table_view{{col0}}), cudf::logic_error);
+}
+
 struct StringsConcatenateWithColSeparatorTest : public cudf::test::BaseFixture {
 };
 
@@ -157,7 +215,6 @@ TEST_F(StringsConcatenateWithColSeparatorTest, SingleColumnEmptyAndNullStringsNo
 
   auto exp_results =
     cudf::test::strings_column_wrapper({"", "", "", ""}, {false, true, false, false});
-
   auto results =
     cudf::strings::concatenate(cudf::table_view{{col0}}, cudf::strings_column_view(sep_col));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results, true);
@@ -295,12 +352,20 @@ TEST_F(StringsConcatenateWithColSeparatorTest, MultiColumnEmptyAndNullStringsNoR
   auto sep_col = cudf::test::strings_column_wrapper(
     {"", "", "", "", "", "", "", ""}, {true, false, true, false, true, false, true, false});
 
-  auto exp_results = cudf::test::strings_column_wrapper(
-    {"", "", "", "", "", "", "", ""}, {false, false, true, false, true, false, true, false});
-
+  auto exp_results1 = cudf::test::strings_column_wrapper(
+    {"", "", "", "", "", "", "", ""}, {false, false, true, false, false, false, false, false});
   auto results =
     cudf::strings::concatenate(cudf::table_view{{col0, col1}}, cudf::strings_column_view(sep_col));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results, true);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results1, true);
+
+  auto exp_results2 = cudf::test::strings_column_wrapper(
+    {"", "", "", "", "", "", "", ""}, {true, false, true, false, true, false, true, false});
+  results = cudf::strings::concatenate(cudf::table_view{{col0, col1}},
+                                       cudf::strings_column_view(sep_col),
+                                       cudf::string_scalar("", false),
+                                       cudf::string_scalar(""),
+                                       cudf::strings::separator_on_nulls::NO);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results2, true);
 }
 
 TEST_F(StringsConcatenateWithColSeparatorTest, MultiColumnStringMixNoReplacements)
@@ -315,13 +380,23 @@ TEST_F(StringsConcatenateWithColSeparatorTest, MultiColumnStringMixNoReplacement
     {"", "~~~", "", "@", "", "", "", "^^^^", "", "--", "*****", "######"},
     {true, true, false, true, false, true, false, true, true, true, true, true});
 
-  auto exp_results = cudf::test::strings_column_wrapper(
-    {"eeexyzfoo", "<null>~~~", "", "éééf", "", "", "", "valid", "doo", "", "", ""},
-    {true, true, false, true, false, true, false, true, true, false, false, false});
+  auto exp_results1 = cudf::test::strings_column_wrapper(
+    {"eeexyzfoo", "<null>~~~", "", "", "", "", "", "", "", "", "", ""},
+    {true, true, false, false, false, false, false, false, false, false, false, false});
 
   auto results =
     cudf::strings::concatenate(cudf::table_view{{col0, col1}}, cudf::strings_column_view(sep_col));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results, true);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results1, true);
+
+  auto exp_results2 = cudf::test::strings_column_wrapper(
+    {"eeexyzfoo", "<null>~~~", "", "éééf", "", "", "", "valid", "doo", "", "", ""},
+    {true, true, false, true, false, true, false, true, true, true, true, true});
+  results = cudf::strings::concatenate(cudf::table_view{{col0, col1}},
+                                       cudf::strings_column_view(sep_col),
+                                       cudf::string_scalar("", false),
+                                       cudf::string_scalar(""),
+                                       cudf::strings::separator_on_nulls::NO);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results2, true);
 }
 
 TEST_F(StringsConcatenateWithColSeparatorTest, MultiColumnStringMixSeparatorReplacement)
@@ -335,26 +410,26 @@ TEST_F(StringsConcatenateWithColSeparatorTest, MultiColumnStringMixSeparatorRepl
   auto sep_col = cudf::test::strings_column_wrapper(
     {"", "~~~", "", "@", "", "", "", "^^^^", "", "--", "*****", "######"},
     {true, true, false, true, false, true, false, true, true, true, true, true});
-  auto sep_rep = cudf::string_scalar("!!!!!!!!!!");
+  auto sep_rep = cudf::string_scalar("!!!!!!!");
 
-  auto exp_results = cudf::test::strings_column_wrapper(
-    {"eeexyzfoo",
-     "<null>~~~",
-     "!!!!!!!!!!éaff",
-     "éééf",
-     "éa",
-     "",
-     "éaff",
-     "valid",
-     "doo",
-     "",
-     "",
-     ""},
-    {true, true, true, true, true, true, true, true, true, false, false, false});
+  auto exp_results1 = cudf::test::strings_column_wrapper(
+    {"eeexyzfoo", "<null>~~~", "!!!!!!!éaff", "éééf", "éa", "", "éaff", "valid", "doo", "", "", ""},
+    {true, true, true, false, false, false, false, false, false, false, false, false});
 
   auto results = cudf::strings::concatenate(
     cudf::table_view{{col0, col1}}, cudf::strings_column_view(sep_col), sep_rep);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results, true);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results1, true);
+
+  auto exp_results2 = cudf::test::strings_column_wrapper(
+    {"eeexyzfoo", "<null>~~~", "!!!!!!!éaff", "éééf", "éa", "", "éaff", "valid", "doo", "", "", ""},
+    {true, true, true, true, true, true, true, true, true, true, true, true});
+
+  results = cudf::strings::concatenate(cudf::table_view{{col0, col1}},
+                                       cudf::strings_column_view(sep_col),
+                                       sep_rep,
+                                       cudf::string_scalar(""),
+                                       cudf::strings::separator_on_nulls::NO);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, exp_results2, true);
 }
 
 TEST_F(StringsConcatenateWithColSeparatorTest, MultiColumnStringMixColumnReplacement)
diff --git a/cpp/tests/strings/combine/concatenate_list_elements_tests.cpp b/cpp/tests/strings/combine/join_list_elements_tests.cpp
similarity index 82%
rename from cpp/tests/strings/combine/concatenate_list_elements_tests.cpp
rename to cpp/tests/strings/combine/join_list_elements_tests.cpp
index b6afd588dfb..e2f7c3e36a2 100644
--- a/cpp/tests/strings/combine/concatenate_list_elements_tests.cpp
+++ b/cpp/tests/strings/combine/join_list_elements_tests.cpp
@@ -58,7 +58,7 @@ TEST_F(StringsListsConcatenateTest, InvalidInput)
   {
     auto const string_lists = INT_LISTS{{1, 2, 3}, {4, 5, 6}}.release();
     auto const string_lv    = cudf::lists_column_view(string_lists->view());
-    EXPECT_THROW(cudf::strings::concatenate_list_elements(string_lv), cudf::logic_error);
+    EXPECT_THROW(cudf::strings::join_list_elements(string_lv), cudf::logic_error);
   }
 
   // Invalid scalar separator
@@ -66,9 +66,8 @@ TEST_F(StringsListsConcatenateTest, InvalidInput)
     auto const string_lists =
       STR_LISTS{STR_LISTS{""}, STR_LISTS{"", "", ""}, STR_LISTS{"", ""}}.release();
     auto const string_lv = cudf::lists_column_view(string_lists->view());
-    EXPECT_THROW(
-      cudf::strings::concatenate_list_elements(string_lv, cudf::string_scalar("", false)),
-      cudf::logic_error);
+    EXPECT_THROW(cudf::strings::join_list_elements(string_lv, cudf::string_scalar("", false)),
+                 cudf::logic_error);
   }
 
   // Invalid column separators
@@ -77,7 +76,7 @@ TEST_F(StringsListsConcatenateTest, InvalidInput)
       STR_LISTS{STR_LISTS{""}, STR_LISTS{"", "", ""}, STR_LISTS{"", ""}}.release();
     auto const string_lv  = cudf::lists_column_view(string_lists->view());
     auto const separators = STR_COL{"+++"}.release();  // size doesn't match with lists column size
-    EXPECT_THROW(cudf::strings::concatenate_list_elements(string_lv, separators->view()),
+    EXPECT_THROW(cudf::strings::join_list_elements(string_lv, separators->view()),
                  cudf::logic_error);
   }
 }
@@ -87,26 +86,26 @@ TEST_F(StringsListsConcatenateTest, EmptyInput)
   auto const string_lists = STR_LISTS{}.release();
   auto const string_lv    = cudf::lists_column_view(string_lists->view());
   auto const expected     = STR_COL{};
-  auto results            = cudf::strings::concatenate_list_elements(string_lv);
+  auto results            = cudf::strings::join_list_elements(string_lv);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
 
   auto const separators = STR_COL{}.release();
-  results               = cudf::strings::concatenate_list_elements(string_lv, separators->view());
+  results               = cudf::strings::join_list_elements(string_lv, separators->view());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
 }
 
 TEST_F(StringsListsConcatenateTest, ZeroSizeStringsInput)
 {
   auto const string_lists =
-    STR_LISTS{STR_LISTS{""}, STR_LISTS{"", "", ""}, STR_LISTS{"", ""}}.release();
+    STR_LISTS{STR_LISTS{""}, STR_LISTS{"", "", ""}, STR_LISTS{"", ""}, STR_LISTS{}}.release();
   auto const string_lv = cudf::lists_column_view(string_lists->view());
-  auto const expected  = STR_COL{"", "", ""};
+  auto const expected  = STR_COL{"", "", "", ""};
 
-  auto results = cudf::strings::concatenate_list_elements(string_lv);
+  auto results = cudf::strings::join_list_elements(string_lv);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
 
-  auto const separators = STR_COL{"", "", ""}.release();
-  results               = cudf::strings::concatenate_list_elements(string_lv, separators->view());
+  auto const separators = STR_COL{"", "", "", ""}.release();
+  results               = cudf::strings::join_list_elements(string_lv, separators->view());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
 }
 
@@ -120,29 +119,35 @@ TEST_F(StringsListsConcatenateTest, AllNullsStringsInput)
   auto const string_lv = cudf::lists_column_view(string_lists->view());
   auto const expected  = STR_COL{{"", "", ""}, all_nulls()};
 
-  auto results = cudf::strings::concatenate_list_elements(string_lv);
+  auto results = cudf::strings::join_list_elements(string_lv);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
 
   auto const separators = STR_COL{{"", "", ""}, all_nulls()}.release();
-  results               = cudf::strings::concatenate_list_elements(string_lv, separators->view());
+  results               = cudf::strings::join_list_elements(string_lv, separators->view());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
 }
 
+auto null_at(std::initializer_list<cudf::size_type> indices)
+{
+  return cudf::detail::make_counting_transform_iterator(
+    0, [indices](auto i) { return std::find(indices.begin(), indices.end(), i) == indices.end(); });
+}
+
 TEST_F(StringsListsConcatenateTest, ScalarSeparator)
 {
   auto const string_lists = STR_LISTS{{STR_LISTS{{"a", "bb" /*NULL*/, "ccc"}, null_at(1)},
                                        STR_LISTS{}, /*NULL*/
                                        STR_LISTS{{"ddd" /*NULL*/, "efgh", "ijk"}, null_at(0)},
-                                       STR_LISTS{"zzz", "xxxxx"}},
+                                       STR_LISTS{"zzz", "xxxxx"},
+                                       STR_LISTS{{"v", "", "", "w"}, null_at({1, 2})}},
                                       null_at(1)}
                               .release();
   auto const string_lv = cudf::lists_column_view(string_lists->view());
 
   // No null replacement
   {
-    auto const results =
-      cudf::strings::concatenate_list_elements(string_lv, cudf::string_scalar("+++"));
-    std::vector<const char*> h_expected{nullptr, nullptr, nullptr, "zzz+++xxxxx"};
+    auto const results = cudf::strings::join_list_elements(string_lv, cudf::string_scalar("+++"));
+    std::vector<const char*> h_expected{nullptr, nullptr, nullptr, "zzz+++xxxxx", nullptr};
     auto const expected =
       STR_COL{h_expected.begin(), h_expected.end(), nulls_from_nullptr(h_expected)};
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
@@ -150,10 +155,22 @@ TEST_F(StringsListsConcatenateTest, ScalarSeparator)
 
   // With null replacement
   {
-    auto const results = cudf::strings::concatenate_list_elements(
+    auto const results = cudf::strings::join_list_elements(
       string_lv, cudf::string_scalar("+++"), cudf::string_scalar("___"));
     std::vector<const char*> h_expected{
-      "a+++___+++ccc", nullptr, "___+++efgh+++ijk", "zzz+++xxxxx"};
+      "a+++___+++ccc", nullptr, "___+++efgh+++ijk", "zzz+++xxxxx", "v+++___+++___+++w"};
+    auto const expected =
+      STR_COL{h_expected.begin(), h_expected.end(), nulls_from_nullptr(h_expected)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
+  }
+
+  // Turn off separator-on-nulls
+  {
+    auto const results = cudf::strings::join_list_elements(string_lv,
+                                                           cudf::string_scalar("+++"),
+                                                           cudf::string_scalar(""),
+                                                           cudf::strings::separator_on_nulls::NO);
+    std::vector<const char*> h_expected{"a+++ccc", nullptr, "efgh+++ijk", "zzz+++xxxxx", "v+++w"};
     auto const expected =
       STR_COL{h_expected.begin(), h_expected.end(), nulls_from_nullptr(h_expected)};
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
@@ -181,8 +198,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithScalarSeparator)
   // Sliced the entire lists column, no null replacement
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {0, 11})[0]);
-    auto const results =
-      cudf::strings::concatenate_list_elements(string_lv, cudf::string_scalar("+++"));
+    auto const results   = cudf::strings::join_list_elements(string_lv, cudf::string_scalar("+++"));
     std::vector<const char*> h_expected{nullptr,
                                         nullptr,
                                         nullptr,
@@ -202,7 +218,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithScalarSeparator)
   // Sliced the entire lists column, with null replacement
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {0, 11})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(
+    auto const results   = cudf::strings::join_list_elements(
       string_lv, cudf::string_scalar("+++"), cudf::string_scalar("___"));
     std::vector<const char*> h_expected{"a+++___+++ccc",
                                         nullptr,
@@ -223,8 +239,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithScalarSeparator)
   // Sliced the first half of the lists column, no null replacement
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {0, 4})[0]);
-    auto const results =
-      cudf::strings::concatenate_list_elements(string_lv, cudf::string_scalar("+++"));
+    auto const results   = cudf::strings::join_list_elements(string_lv, cudf::string_scalar("+++"));
     std::vector<const char*> h_expected{nullptr, nullptr, nullptr, "zzz+++xxxxx"};
     auto const expected =
       STR_COL{h_expected.begin(), h_expected.end(), nulls_from_nullptr(h_expected)};
@@ -234,7 +249,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithScalarSeparator)
   // Sliced the first half of the lists column, with null replacement
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {0, 4})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(
+    auto const results   = cudf::strings::join_list_elements(
       string_lv, cudf::string_scalar("+++"), cudf::string_scalar("___"));
     std::vector<const char*> h_expected{
       "a+++___+++ccc", nullptr, "___+++efgh+++ijk", "zzz+++xxxxx"};
@@ -246,8 +261,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithScalarSeparator)
   // Sliced the second half of the lists column, no null replacement
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {5, 11})[0]);
-    auto const results =
-      cudf::strings::concatenate_list_elements(string_lv, cudf::string_scalar("+++"));
+    auto const results   = cudf::strings::join_list_elements(string_lv, cudf::string_scalar("+++"));
     std::vector<const char*> h_expected{
       nullptr, nullptr, "0a0b0c+++5x5y5z", nullptr, "ééé+++12345abcdef", "aaaééébbbéééccc+++12345"};
     auto const expected =
@@ -258,7 +272,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithScalarSeparator)
   // Sliced the second half of the lists column, with null replacement
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {5, 11})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(
+    auto const results   = cudf::strings::join_list_elements(
       string_lv, cudf::string_scalar("+++"), cudf::string_scalar("___"));
     std::vector<const char*> h_expected{"abcdef+++012345+++___+++xxx000",
                                         "___+++11111+++00000",
@@ -274,8 +288,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithScalarSeparator)
   // Sliced the middle part of the lists column, no null replacement
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {3, 8})[0]);
-    auto const results =
-      cudf::strings::concatenate_list_elements(string_lv, cudf::string_scalar("+++"));
+    auto const results   = cudf::strings::join_list_elements(string_lv, cudf::string_scalar("+++"));
     std::vector<const char*> h_expected{
       "zzz+++xxxxx", nullptr, nullptr, nullptr, "0a0b0c+++5x5y5z"};
     auto const expected =
@@ -286,7 +299,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithScalarSeparator)
   // Sliced the middle part of the lists column, with null replacement
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {3, 8})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(
+    auto const results   = cudf::strings::join_list_elements(
       string_lv, cudf::string_scalar("+++"), cudf::string_scalar("___"));
     std::vector<const char*> h_expected{"zzz+++xxxxx",
                                         nullptr,
@@ -318,7 +331,7 @@ TEST_F(StringsListsConcatenateTest, ColumnSeparators)
 
   // No null replacement
   {
-    auto const results = cudf::strings::concatenate_list_elements(string_lv, separators->view());
+    auto const results = cudf::strings::join_list_elements(string_lv, separators->view());
     std::vector<const char*> h_expected{nullptr, nullptr, nullptr, nullptr, nullptr, "zzz^^^xxxxx"};
     auto const expected =
       STR_COL{h_expected.begin(), h_expected.end(), nulls_from_nullptr(h_expected)};
@@ -327,8 +340,8 @@ TEST_F(StringsListsConcatenateTest, ColumnSeparators)
 
   // With null replacement for separators
   {
-    auto const results = cudf::strings::concatenate_list_elements(
-      string_lv, separators->view(), cudf::string_scalar("|||"));
+    auto const results =
+      cudf::strings::join_list_elements(string_lv, separators->view(), cudf::string_scalar("|||"));
     std::vector<const char*> h_expected{
       nullptr, nullptr, "0a0b0c|||xyzééé", nullptr, nullptr, "zzz^^^xxxxx"};
     auto const expected =
@@ -338,7 +351,7 @@ TEST_F(StringsListsConcatenateTest, ColumnSeparators)
 
   // With null replacement for strings
   {
-    auto const results = cudf::strings::concatenate_list_elements(
+    auto const results = cudf::strings::join_list_elements(
       string_lv, separators->view(), cudf::string_scalar("", false), cudf::string_scalar("XXXXX"));
     std::vector<const char*> h_expected{
       "a+++XXXXX+++ccc", nullptr, nullptr, nullptr, "XXXXX%%%ááá%%%ííí", "zzz^^^xxxxx"};
@@ -349,7 +362,7 @@ TEST_F(StringsListsConcatenateTest, ColumnSeparators)
 
   // With null replacement for both separators and strings
   {
-    auto const results = cudf::strings::concatenate_list_elements(
+    auto const results = cudf::strings::join_list_elements(
       string_lv, separators->view(), cudf::string_scalar("|||"), cudf::string_scalar("XXXXX"));
     std::vector<const char*> h_expected{"a+++XXXXX+++ccc",
                                         nullptr,
@@ -361,6 +374,20 @@ TEST_F(StringsListsConcatenateTest, ColumnSeparators)
       STR_COL{h_expected.begin(), h_expected.end(), nulls_from_nullptr(h_expected)};
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
   }
+
+  // Turn off separator-on-nulls
+  {
+    auto const results = cudf::strings::join_list_elements(string_lv,
+                                                           separators->view(),
+                                                           cudf::string_scalar("+++"),
+                                                           cudf::string_scalar(""),
+                                                           cudf::strings::separator_on_nulls::NO);
+    std::vector<const char*> h_expected{
+      "a+++ccc", nullptr, "0a0b0c+++xyzééé", "efgh+++ijk", "ááá%%%ííí", "zzz^^^xxxxx"};
+    auto const expected =
+      STR_COL{h_expected.begin(), h_expected.end(), nulls_from_nullptr(h_expected)};
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected, print_all);
+  }
 }
 
 TEST_F(StringsListsConcatenateTest, SlicedListsWithColumnSeparators)
@@ -390,7 +417,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithColumnSeparators)
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {0, 11})[0]);
     auto const sep_col   = cudf::strings_column_view(cudf::slice(separators->view(), {0, 11})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(string_lv, sep_col);
+    auto const results   = cudf::strings::join_list_elements(string_lv, sep_col);
     std::vector<const char*> h_expected{nullptr,
                                         nullptr,
                                         nullptr,
@@ -411,7 +438,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithColumnSeparators)
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {0, 11})[0]);
     auto const sep_col   = cudf::strings_column_view(cudf::slice(separators->view(), {0, 11})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(
+    auto const results   = cudf::strings::join_list_elements(
       string_lv, sep_col, cudf::string_scalar("|||"), cudf::string_scalar("___"));
     std::vector<const char*> h_expected{"a+++___+++ccc",
                                         nullptr,
@@ -433,7 +460,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithColumnSeparators)
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {0, 4})[0]);
     auto const sep_col   = cudf::strings_column_view(cudf::slice(separators->view(), {0, 4})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(string_lv, sep_col);
+    auto const results   = cudf::strings::join_list_elements(string_lv, sep_col);
     std::vector<const char*> h_expected{nullptr, nullptr, nullptr, nullptr};
     auto const expected =
       STR_COL{h_expected.begin(), h_expected.end(), nulls_from_nullptr(h_expected)};
@@ -444,7 +471,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithColumnSeparators)
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {0, 4})[0]);
     auto const sep_col   = cudf::strings_column_view(cudf::slice(separators->view(), {0, 4})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(
+    auto const results   = cudf::strings::join_list_elements(
       string_lv, sep_col, cudf::string_scalar("|||"), cudf::string_scalar("___"));
     std::vector<const char*> h_expected{
       "a+++___+++ccc", nullptr, "___|||efgh|||ijk", "zzz|||xxxxx"};
@@ -457,7 +484,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithColumnSeparators)
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {5, 11})[0]);
     auto const sep_col   = cudf::strings_column_view(cudf::slice(separators->view(), {5, 11})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(string_lv, sep_col);
+    auto const results   = cudf::strings::join_list_elements(string_lv, sep_col);
     std::vector<const char*> h_expected{
       nullptr, nullptr, "0a0b0c###5x5y5z", nullptr, "ééé-+-12345abcdef", "aaaééébbbéééccc=+=12345"};
     auto const expected =
@@ -469,7 +496,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithColumnSeparators)
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {5, 11})[0]);
     auto const sep_col   = cudf::strings_column_view(cudf::slice(separators->view(), {5, 11})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(
+    auto const results   = cudf::strings::join_list_elements(
       string_lv, sep_col, cudf::string_scalar("|||"), cudf::string_scalar("___"));
     std::vector<const char*> h_expected{"abcdef^^^012345^^^___^^^xxx000",
                                         "___~!~11111~!~00000",
@@ -486,7 +513,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithColumnSeparators)
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {3, 8})[0]);
     auto const sep_col   = cudf::strings_column_view(cudf::slice(separators->view(), {3, 8})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(string_lv, sep_col);
+    auto const results   = cudf::strings::join_list_elements(string_lv, sep_col);
     std::vector<const char*> h_expected{nullptr, nullptr, nullptr, nullptr, "0a0b0c###5x5y5z"};
     auto const expected =
       STR_COL{h_expected.begin(), h_expected.end(), nulls_from_nullptr(h_expected)};
@@ -497,7 +524,7 @@ TEST_F(StringsListsConcatenateTest, SlicedListsWithColumnSeparators)
   {
     auto const string_lv = cudf::lists_column_view(cudf::slice(string_lists->view(), {3, 8})[0]);
     auto const sep_col   = cudf::strings_column_view(cudf::slice(separators->view(), {3, 8})[0]);
-    auto const results   = cudf::strings::concatenate_list_elements(
+    auto const results   = cudf::strings::join_list_elements(
       string_lv, sep_col, cudf::string_scalar("|||"), cudf::string_scalar("___"));
     std::vector<const char*> h_expected{"zzz|||xxxxx",
                                         nullptr,
diff --git a/docs/cudf/source/io-gds-integration.rst b/docs/cudf/source/io-gds-integration.rst
new file mode 100644
index 00000000000..9ccf773b2e4
--- /dev/null
+++ b/docs/cudf/source/io-gds-integration.rst
@@ -0,0 +1,22 @@
+GPUDirect Storage Integration
+=============================
+
+Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations. 
+GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU. 
+GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer. 
+The SDK is available for download `here <https://developer.nvidia.com/gpudirect-storage>`_.
+
+Use of GPUDirect Storage in cuDF is disabled by default, and can be enabled through environment variable ``LIBCUDF_CUFILE_POLICY``. 
+This variable also controls the GDS compatibility mode. There are two special values for the environment variable:
+
+- "GDS": Use of GDS is enabled; GDS compatibility mode is *off*.
+- "ALWAYS": Use of GDS is enabled; GDS compatibility mode is *on*.
+
+Any other value (or no value set) will keep the GDS disabled for use in cuDF and IO will be done using cuDF's CPU bounce buffers.
+
+This environment variable also affects how cuDF treats GDS errors.
+When ``LIBCUDF_CUFILE_POLICY`` is set to "GDS" and a GDS API call fails for any reason, cuDF falls back to the internal implementation with bounce buffers.
+When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on), 
+cuDF throws an exception to propagate the error to te user.
+
+NOTE: current GDS integration is not fully optimized and enabling GDS will not lead to performance improvements in all cases.
\ No newline at end of file
diff --git a/docs/cudf/source/io.rst b/docs/cudf/source/io.rst
index 5186473ae10..e88162d8f52 100644
--- a/docs/cudf/source/io.rst
+++ b/docs/cudf/source/io.rst
@@ -8,4 +8,5 @@ This page contains Input / Output related APIs in cuDF.
    :maxdepth: 2
    :caption: Contents:
 
-   io-supported-types.rst
\ No newline at end of file
+   io-supported-types.rst
+   io-gds-integration.rst
\ No newline at end of file
diff --git a/java/ci/README.md b/java/ci/README.md
index 458a76bcd04..968ce279a2c 100644
--- a/java/ci/README.md
+++ b/java/ci/README.md
@@ -49,5 +49,5 @@ scl enable devtoolset-9 "java/ci/build-in-docker.sh"
 
 ### The output
 
-You can find the cuDF jar in java/target/ like cudf-21.06-SNAPSHOT-cuda11.jar.
+You can find the cuDF jar in java/target/ like cudf-21.06.0-SNAPSHOT-cuda11.jar.
 
diff --git a/java/pom.xml b/java/pom.xml
index cec20ec04af..fe2d9a453f7 100755
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -21,7 +21,7 @@
 
     <groupId>ai.rapids</groupId>
     <artifactId>cudf</artifactId>
-    <version>21.06-SNAPSHOT</version>
+    <version>21.06.0-SNAPSHOT</version>
 
     <name>cudfjni</name>
     <description>
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java
index ea93a2daf36..a7e589ac890 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java
@@ -1276,6 +1276,16 @@ public static ColumnVector fromStrings(String... values) {
     }
   }
 
+  /**
+   * Create a new string vector from the given values.  This API
+   * supports inline nulls.
+   */
+  public static ColumnVector fromUTF8Strings(byte[]... values) {
+    try (HostColumnVector host = HostColumnVector.fromUTF8Strings(values)) {
+      return host.copyToDevice();
+    }
+  }
+
   /**
    * Create a new vector from the given values.  This API supports inline nulls,
    * but is much slower than building from primitive array of unscaledValues.
diff --git a/java/src/main/java/ai/rapids/cudf/CuFile.java b/java/src/main/java/ai/rapids/cudf/CuFile.java
index 00c9cdb9fd5..4baad834570 100644
--- a/java/src/main/java/ai/rapids/cudf/CuFile.java
+++ b/java/src/main/java/ai/rapids/cudf/CuFile.java
@@ -78,11 +78,25 @@ public static boolean libraryLoaded() {
    * @param path        The file path to copy to.
    * @param file_offset The file offset from which to write the buffer.
    * @param buffer      The device buffer to copy from.
-   * @return The file offset from which the buffer was appended.
    */
   public static void writeDeviceBufferToFile(File path, long file_offset,
                                              BaseDeviceMemoryBuffer buffer) {
-    writeToFile(path.getAbsolutePath(), file_offset, buffer.getAddress(), buffer.getLength());
+    writeDeviceMemoryToFile(path, file_offset, buffer.getAddress(), buffer.getLength());
+  }
+
+  /**
+   * Write device memory to a given file path synchronously.
+   * <p>
+   * This method is NOT thread safe if the path points to the same file on disk.
+   *
+   * @param path        The file path to copy to.
+   * @param file_offset The file offset from which to write the buffer.
+   * @param address     The device memory address to copy from.
+   * @param length      The length to copy.
+   */
+  public static void writeDeviceMemoryToFile(File path, long file_offset, long address,
+                                             long length) {
+    writeToFile(path.getAbsolutePath(), file_offset, address, length);
   }
 
   /**
@@ -95,7 +109,21 @@ public static void writeDeviceBufferToFile(File path, long file_offset,
    * @return The file offset from which the buffer was appended.
    */
   public static long appendDeviceBufferToFile(File path, BaseDeviceMemoryBuffer buffer) {
-    return appendToFile(path.getAbsolutePath(), buffer.getAddress(), buffer.getLength());
+    return appendDeviceMemoryToFile(path, buffer.getAddress(), buffer.getLength());
+  }
+
+  /**
+   * Append device memory to a given file path synchronously.
+   * <p>
+   * This method is NOT thread safe if the path points to the same file on disk.
+   *
+   * @param path    The file path to copy to.
+   * @param address The device memory address to copy from.
+   * @param length  The length to copy.
+   * @return The file offset from which the buffer was appended.
+   */
+  public static long appendDeviceMemoryToFile(File path, long address, long length) {
+    return appendToFile(path.getAbsolutePath(), address, length);
   }
 
   /**
@@ -109,7 +137,21 @@ public static long appendDeviceBufferToFile(File path, BaseDeviceMemoryBuffer bu
    */
   public static void readFileToDeviceBuffer(BaseDeviceMemoryBuffer buffer, File path,
                                             long fileOffset) {
-    readFromFile(buffer.getAddress(), buffer.getLength(), path.getAbsolutePath(), fileOffset);
+    readFileToDeviceMemory(buffer.getAddress(), buffer.getLength(), path, fileOffset);
+  }
+
+  /**
+   * Read a file into device memory synchronously.
+   * <p>
+   * This method is NOT thread safe if the path points to the same file on disk.
+   *
+   * @param address The device memory address to read into.
+   * @param length  The length to read.
+   * @param path       The file path to copy from.
+   * @param fileOffset The file offset from which to copy the content.
+   */
+  public static void readFileToDeviceMemory(long address, long length, File path, long fileOffset) {
+    readFromFile(address, length, path.getAbsolutePath(), fileOffset);
   }
 
   private static native void writeToFile(String path, long file_offset, long address, long length);
diff --git a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java
index 846bcb3b635..46255428c1c 100644
--- a/java/src/main/java/ai/rapids/cudf/HostColumnVector.java
+++ b/java/src/main/java/ai/rapids/cudf/HostColumnVector.java
@@ -29,6 +29,7 @@
 import java.util.Objects;
 import java.util.Optional;
 import java.util.StringJoiner;
+import java.util.function.BiConsumer;
 import java.util.function.Consumer;
 
 /**
@@ -577,6 +578,40 @@ public static HostColumnVector fromStrings(String... values) {
     });
   }
 
+  /**
+   * Create a new string vector from the given values.  This API
+   * supports inline nulls.
+   */
+  public static HostColumnVector fromUTF8Strings(byte[]... values) {
+    int rows = values.length;
+    long nullCount = 0;
+    long bufferSize = 0;
+    // How many bytes do we need to hold the data.
+    for (byte[] s: values) {
+      if (s == null) {
+        nullCount++;
+      } else {
+        bufferSize += s.length;
+      }
+    }
+
+    BiConsumer<Builder, byte[]> appendUTF8 = nullCount == 0 ?
+      (b, s) -> b.appendUTF8String(s) :
+      (b, s) -> {
+        if (s == null) {
+          b.appendNull();
+        } else {
+          b.appendUTF8String(s);
+        }
+      };
+
+    return build(rows, bufferSize, (b) -> {
+      for (byte[] s: values) {
+        appendUTF8.accept(b, s);
+      }
+    });
+  }
+
   /**
    * Create a new vector from the given values.  This API supports inline nulls,
    * but is much slower than building from primitive array of unscaledValues.
@@ -1085,9 +1120,11 @@ private void appendChildOrNull(ColumnBuilder childBuilder, Object listElement) {
       } else if (listElement instanceof BigDecimal) {
         childBuilder.append((BigDecimal) listElement);
       } else if (listElement instanceof List) {
-        childBuilder.append((List) listElement);
+        childBuilder.append((List<?>) listElement);
       } else if (listElement instanceof StructData) {
         childBuilder.append((StructData) listElement);
+      } else if (listElement instanceof byte[]) {
+        childBuilder.appendUTF8String((byte[]) listElement);
       } else {
         throw new IllegalStateException("Unexpected element type: " + listElement.getClass());
       }
diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java
index 8d63d2aeefc..97813182deb 100755
--- a/java/src/main/java/ai/rapids/cudf/Rmm.java
+++ b/java/src/main/java/ai/rapids/cudf/Rmm.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -173,6 +173,36 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
    */
   public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize,
       long maxPoolSize) throws RmmException {
+    initialize(allocationMode, logConf, poolSize, maxPoolSize, 0, 0);
+  }
+
+  /**
+   * Initialize memory manager state and storage. This will always initialize
+   * the CUDA context for the calling thread if it is not already set. The
+   * caller is responsible for setting the desired CUDA device prior to this
+   * call if a specific device is already set.
+   * <p>NOTE: All cudf methods will set the chosen CUDA device in the CUDA
+   * context of the calling thread after this returns.
+   * @param allocationMode Allocation strategy to use. Bit set using
+   *                       {@link RmmAllocationMode#CUDA_DEFAULT},
+   *                       {@link RmmAllocationMode#POOL},
+   *                       {@link RmmAllocationMode#ARENA} and
+   *                       {@link RmmAllocationMode#CUDA_MANAGED_MEMORY}
+   * @param logConf        How to do logging or null if you don't want to
+   * @param poolSize       The initial pool size in bytes
+   * @param maxPoolSize    The maximum size the pool is allowed to grow. If the specified value
+   *                       is <= 0 then the pool size will not be artificially limited.
+   * @param allocationAlignment The size to which allocations are aligned.
+   * @param alignmentThreshold  Only allocations with size larger than or equal to this threshold
+   *                            are aligned with `allocationAlignment`.
+   * @throws IllegalStateException if RMM has already been initialized
+   * @throws IllegalArgumentException if a max pool size is specified but the allocation mode
+   *                                  is not {@link RmmAllocationMode#POOL} or
+   *                                  {@link RmmAllocationMode#ARENA}, or the maximum pool size is
+   *                                  below the initial size.
+   */
+  public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize,
+      long maxPoolSize, long allocationAlignment, long alignmentThreshold) throws RmmException {
     if (initialized) {
       throw new IllegalStateException("RMM is already initialized");
     }
@@ -195,7 +225,8 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
       loc = logConf.loc;
     }
 
-    initializeInternal(allocationMode, loc.internalId, path, poolSize, maxPoolSize);
+    initializeInternal(allocationMode, loc.internalId, path, poolSize, maxPoolSize,
+        allocationAlignment, alignmentThreshold);
     MemoryCleaner.setDefaultGpu(Cuda.getDevice());
     initialized = true;
   }
@@ -241,7 +272,8 @@ private static long[] sortThresholds(long[] thresholds) {
   }
 
   private static native void initializeInternal(int allocationMode, int logTo, String path,
-      long poolSize, long maxPoolSize) throws RmmException;
+      long poolSize, long maxPoolSize, long allocationAlignment, long alignmentThreshold)
+      throws RmmException;
 
   /**
    * Shut down any initialized RMM instance.  This should be used very rarely.  It does not need to
diff --git a/java/src/main/java/ai/rapids/cudf/Scalar.java b/java/src/main/java/ai/rapids/cudf/Scalar.java
index 62dd9bda13b..7794b57c3f9 100644
--- a/java/src/main/java/ai/rapids/cudf/Scalar.java
+++ b/java/src/main/java/ai/rapids/cudf/Scalar.java
@@ -329,10 +329,19 @@ public static Scalar timestampFromLong(DType type, Long value) {
   }
 
   public static Scalar fromString(String value) {
+    return fromUTF8String(value == null ? null : value.getBytes(StandardCharsets.UTF_8));
+  }
+
+  /**
+   * Creates a String scalar from an array of UTF8 bytes.
+   * @param value the array of UTF8 bytes
+   * @return a String scalar
+   */
+  public static Scalar fromUTF8String(byte[] value) {
     if (value == null) {
       return fromNull(DType.STRING);
     }
-    return new Scalar(DType.STRING, makeStringScalar(value.getBytes(StandardCharsets.UTF_8), true));
+    return new Scalar(DType.STRING, makeStringScalar(value, true));
   }
 
   /**
diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp
index 85bbdd41b4a..2953a6221e8 100644
--- a/java/src/main/native/src/ColumnVectorJni.cpp
+++ b/java/src/main/native/src/ColumnVectorJni.cpp
@@ -23,7 +23,7 @@
 #include <cudf/reshape.hpp>
 #include <cudf/utilities/bit.hpp>
 #include <cudf/detail/interop.hpp>
-#include <cudf/lists/concatenate_rows.hpp>
+#include <cudf/lists/combine.hpp>
 #include <cudf/lists/detail/concatenate.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
@@ -220,49 +220,14 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeList(JNIEnv *env, j
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_fromScalar(JNIEnv *env, jclass,
                                                                     jlong j_scalar,
                                                                     jint row_count) {
-  using ScalarType = cudf::scalar_type_t<cudf::size_type>;
   JNI_NULL_CHECK(env, j_scalar, "scalar is null", 0);
   try {
     cudf::jni::auto_set_device(env);
     auto scalar_val = reinterpret_cast<cudf::scalar const *>(j_scalar);
-    auto dtype = scalar_val->type();
-    cudf::mask_state mask_state =
-        scalar_val->is_valid() ? cudf::mask_state::UNALLOCATED : cudf::mask_state::ALL_NULL;
     std::unique_ptr<cudf::column> col;
-    if (dtype.id() == cudf::type_id::LIST) {
-      // Neither 'cudf::make_empty_column' nor 'cudf::make_column_from_scalar' supports
-      // LIST type for now (https://github.com/rapidsai/cudf/issues/8088), so the list
-      // precedes the others and takes care of the empty column itself.
-      auto s_list = reinterpret_cast<cudf::list_scalar const *>(scalar_val);
-      cudf::column_view s_val = s_list->view();
-
-      // Offsets: [0, list_size, list_size*2, ..., list_szie*row_count]
-      auto zero = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32));
-      auto step = cudf::make_numeric_scalar(cudf::data_type(cudf::type_id::INT32));
-      zero->set_valid(true);
-      step->set_valid(true);
-      static_cast<ScalarType *>(zero.get())->set_value(0);
-      static_cast<ScalarType *>(step.get())->set_value(s_val.size());
-      std::unique_ptr<cudf::column> offsets = cudf::sequence(row_count + 1, *zero, *step);
-      // Data:
-      // Builds the data column by leveraging `cudf::concatenate` to repeat the 's_val'
-      // 'row_count' times, because 'cudf::make_column_from_scalar' does not support list
-      // type.
-      // (Assumes the `row_count` is not big, otherwise there would be a performance issue.)
-      // Checks the `row_count` because `cudf::concatenate` does not support no rows.
-      auto data_col = row_count > 0
-          ? cudf::concatenate(std::vector<cudf::column_view>(row_count, s_val))
-          : cudf::empty_like(s_val);
-      col = cudf::make_lists_column(row_count, std::move(offsets), std::move(data_col),
-                                    cudf::state_null_count(mask_state, row_count),
-                                    cudf::create_null_mask(row_count, mask_state));
-    } else if (row_count == 0) {
-      col = cudf::make_empty_column(dtype);
-    } else if (cudf::is_fixed_width(dtype)) {
-      col = cudf::make_fixed_width_column(dtype, row_count, mask_state);
-      auto mut_view = col->mutable_view();
-      cudf::fill_in_place(mut_view, 0, row_count, *scalar_val);
-    } else if (dtype.id() == cudf::type_id::STRING) {
+    if (scalar_val->type().id() == cudf::type_id::STRING) {
+      // Tests fail when using the cudf implementation, complaining no child for string column.
+      // So here take care of the String type itself.
       // create a string column of all empty strings to fill (cheapest string column to create)
       auto offsets = cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT32}, row_count + 1,
                                                cudf::mask_state::UNALLOCATED);
@@ -273,7 +238,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_fromScalar(JNIEnv *env,
 
       col = cudf::fill(str_col->view(), 0, row_count, *scalar_val);
     } else {
-      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "Invalid data type", 0);
+      col = cudf::make_column_from_scalar(*scalar_val, row_count);
     }
     return reinterpret_cast<jlong>(col.release());
   }
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index 7f11e19fce8..e604fc7dd46 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <iostream>
 #include <limits>
 
+#include <rmm/mr/device/aligned_resource_adaptor.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/logging_resource_adaptor.hpp>
@@ -332,7 +333,9 @@ extern "C" {
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, jclass clazz,
                                                                   jint allocation_mode, jint log_to,
                                                                   jstring jpath, jlong pool_size,
-                                                                  jlong max_pool_size) {
+                                                                  jlong max_pool_size,
+                                                                  jlong allocation_alignment,
+                                                                  jlong alignment_threshold) {
   try {
     // make sure the CUDA device is setup in the context
     cudaError_t cuda_status = cudaFree(0);
@@ -351,13 +354,9 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
       if (use_managed_mem) {
         Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
             std::make_shared<rmm::mr::managed_memory_resource>(), pool_size, pool_limit);
-        auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
-        Tracking_memory_resource.reset(wrapped);
       } else {
         Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
             std::make_shared<rmm::mr::cuda_memory_resource>(), pool_size, pool_limit);
-        auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
-        Tracking_memory_resource.reset(wrapped);
       }
     } else if (use_arena_alloc) {
       std::size_t pool_limit = (max_pool_size > 0) ? static_cast<std::size_t>(max_pool_size) :
@@ -365,23 +364,26 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
       if (use_managed_mem) {
         Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
             std::make_shared<rmm::mr::managed_memory_resource>(), pool_size, pool_limit);
-        auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
-        Tracking_memory_resource.reset(wrapped);
       } else {
         Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
             std::make_shared<rmm::mr::cuda_memory_resource>(), pool_size, pool_limit);
-        auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
-        Tracking_memory_resource.reset(wrapped);
       }
     } else if (use_managed_mem) {
       Initialized_resource = std::make_shared<rmm::mr::managed_memory_resource>();
-      auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
-      Tracking_memory_resource.reset(wrapped);
     } else {
       Initialized_resource = std::make_shared<rmm::mr::cuda_memory_resource>();
-      auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
-      Tracking_memory_resource.reset(wrapped);
     }
+
+    if (allocation_alignment != 0) {
+      Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::aligned_resource_adaptor>(
+          Initialized_resource, allocation_alignment, alignment_threshold);
+    }
+
+    auto wrapped = make_tracking_adaptor(
+        Initialized_resource.get(),
+        std::max(RMM_ALLOC_SIZE_ALIGNMENT, static_cast<std::size_t>(allocation_alignment)));
+    Tracking_memory_resource.reset(wrapped);
+
     auto resource = Tracking_memory_resource.get();
     rmm::mr::set_current_device_resource(resource);
 
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 09ddef633e3..8da70afc6f3 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -29,6 +29,7 @@
 
 import java.math.BigDecimal;
 import java.math.RoundingMode;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -176,6 +177,19 @@ void testStringCreation() {
     }
   }
 
+  @Test
+  void testUTF8StringCreation() {
+    try (ColumnVector cv = ColumnVector.fromUTF8Strings(
+            "d".getBytes(StandardCharsets.UTF_8),
+            "sd".getBytes(StandardCharsets.UTF_8),
+            "sde".getBytes(StandardCharsets.UTF_8),
+            null,
+            "END".getBytes(StandardCharsets.UTF_8));
+         ColumnVector expected = ColumnVector.fromStrings("d", "sd", "sde", null, "END")) {
+      TableTest.assertColumnsAreEqual(expected, cv);
+    }
+  }
+
   @Test
   void testRefCountLeak() throws InterruptedException {
     assumeTrue(Boolean.getBoolean("ai.rapids.cudf.flaky-tests-enabled"));
@@ -2085,15 +2099,16 @@ void testStringConcatWithNulls() {
       assertColumnsAreEqual(concat, e_concat);
     }
 
-    try (ColumnVector v = ColumnVector.fromStrings("a", "B", "cd", "\u0480\u0481", "E\tf",
-        "g\nH", "IJ\"\u0100\u0101\u0500\u0501",
-        "kl m", "Nop1", "\\qRs2", null,
-        "3tuV\'", "wX4Yz", "\ud720\ud721");
-         Scalar emptyString = Scalar.fromString("");
-         Scalar nullSubstitute = Scalar.fromString("NULL");
-         ColumnVector concat = ColumnVector.stringConcatenate(emptyString, nullSubstitute, new ColumnView[]{v})) {
-      assertColumnsAreEqual(v, concat);
-    }
+    assertThrows(CudfException.class, () -> {
+      try (ColumnVector v = ColumnVector.fromStrings("a", "B", "cd", "\u0480\u0481", "E\tf",
+          "g\nH", "IJ\"\u0100\u0101\u0500\u0501",
+          "kl m", "Nop1", "\\qRs2", null,
+          "3tuV\'", "wX4Yz", "\ud720\ud721");
+           Scalar emptyString = Scalar.fromString("");
+           Scalar nullSubstitute = Scalar.fromString("NULL");
+           ColumnVector concat = ColumnVector.stringConcatenate(emptyString, nullSubstitute, new ColumnView[]{v})) {
+      }
+    });
   }
 
   @Test
diff --git a/java/src/test/java/ai/rapids/cudf/ScalarTest.java b/java/src/test/java/ai/rapids/cudf/ScalarTest.java
index b09850bc3d9..a1078f2546b 100644
--- a/java/src/test/java/ai/rapids/cudf/ScalarTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ScalarTest.java
@@ -27,6 +27,7 @@
 import org.junit.jupiter.api.Test;
 
 import java.math.BigDecimal;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
 import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
@@ -244,6 +245,22 @@ public void testString() {
     }
   }
 
+  @Test
+  public void testUTF8String() {
+    try (Scalar s = Scalar.fromUTF8String("TEST".getBytes(StandardCharsets.UTF_8))) {
+      assertEquals(DType.STRING, s.getType());
+      assertTrue(s.isValid());
+      assertEquals("TEST", s.getJavaString());
+      assertArrayEquals(new byte[]{'T', 'E', 'S', 'T'}, s.getUTF8());
+    }
+    try (Scalar s = Scalar.fromUTF8String("".getBytes(StandardCharsets.UTF_8))) {
+      assertEquals(DType.STRING, s.getType());
+      assertTrue(s.isValid());
+      assertEquals("", s.getJavaString());
+      assertArrayEquals(new byte[]{}, s.getUTF8());
+    }
+  }
+
   @Test
   public void testList() {
     // list of int
diff --git a/python/cudf/cudf/_lib/cpp/lists/concatenate_rows.pxd b/python/cudf/cudf/_lib/cpp/lists/combine.pxd
similarity index 83%
rename from python/cudf/cudf/_lib/cpp/lists/concatenate_rows.pxd
rename to python/cudf/cudf/_lib/cpp/lists/combine.pxd
index 8c4dabf5168..ea9ade178e2 100644
--- a/python/cudf/cudf/_lib/cpp/lists/concatenate_rows.pxd
+++ b/python/cudf/cudf/_lib/cpp/lists/combine.pxd
@@ -5,7 +5,7 @@ from libcpp.memory cimport unique_ptr
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.table.table_view cimport table_view
 
-cdef extern from "cudf/lists/concatenate_rows.hpp" namespace \
+cdef extern from "cudf/lists/combine.hpp" namespace \
         "cudf::lists" nogil:
     cdef unique_ptr[column] concatenate_rows(
         const table_view input_table
diff --git a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd
index fec1c6382e6..de5cb05447c 100644
--- a/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd
+++ b/python/cudf/cudf/_lib/cpp/scalar/scalar.pxd
@@ -9,6 +9,9 @@ from libcpp.string cimport string
 from cudf._lib.cpp.types cimport data_type
 from cudf._lib.cpp.wrappers.decimals cimport scale_type
 
+from cudf._lib.cpp.column.column_view cimport column_view
+
+
 cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil:
     cdef cppclass scalar:
         scalar() except +
@@ -60,3 +63,6 @@ cdef extern from "cudf/scalar/scalar.hpp" namespace "cudf" nogil:
                            bool is_valid) except +
         int64_t value() except +
         # TODO: Figure out how to add an int32 overload of value()
+
+    cdef cppclass list_scalar(scalar):
+        column_view view() except +
diff --git a/python/cudf/cudf/_lib/cpp/strings/combine.pxd b/python/cudf/cudf/_lib/cpp/strings/combine.pxd
index 250c6441882..51c706b68d0 100644
--- a/python/cudf/cudf/_lib/cpp/strings/combine.pxd
+++ b/python/cudf/cudf/_lib/cpp/strings/combine.pxd
@@ -18,13 +18,13 @@ cdef extern from "cudf/strings/combine.hpp" namespace "cudf::strings" nogil:
         string_scalar separator,
         string_scalar narep) except +
 
-    cdef unique_ptr[column] concatenate_list_elements(
+    cdef unique_ptr[column] join_list_elements(
         column_view lists_strings_column,
         column_view separators,
         string_scalar separator_narep,
         string_scalar string_narep) except +
 
-    cdef unique_ptr[column] concatenate_list_elements(
+    cdef unique_ptr[column] join_list_elements(
         column_view lists_strings_column,
         string_scalar separator,
         string_scalar narep) except +
diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index 46f034dc525..7d8909610dc 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -16,7 +16,7 @@ from cudf._lib.cpp.lists.drop_list_duplicates cimport (
 from cudf._lib.cpp.lists.sorting cimport (
     sort_lists as cpp_sort_lists
 )
-from cudf._lib.cpp.lists.concatenate_rows cimport (
+from cudf._lib.cpp.lists.combine cimport (
     concatenate_rows as cpp_concatenate_rows
 )
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 9f8a8ee6b1e..cb355a15f15 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -18,9 +18,18 @@ from libcpp.utility cimport move
 from libcpp cimport bool
 
 import cudf
-from cudf._lib.types import cudf_to_np_types, duration_unit_map
+from cudf.core.dtypes import ListDtype
+from cudf._lib.types import (
+    cudf_to_np_types,
+    duration_unit_map
+)
 from cudf._lib.types import datetime_unit_map
-from cudf._lib.types cimport underlying_type_t_type_id
+from cudf._lib.types cimport underlying_type_t_type_id, dtype_from_column_view
+
+from cudf._lib.column cimport Column
+from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.table cimport Table
+from cudf._lib.interop import to_arrow
 
 from cudf._lib.cpp.wrappers.timestamps cimport (
     timestamp_s,
@@ -41,12 +50,12 @@ from cudf._lib.cpp.scalar.scalar cimport (
     timestamp_scalar,
     duration_scalar,
     string_scalar,
-    fixed_point_scalar
+    fixed_point_scalar,
+    list_scalar,
 )
-from cudf.utils.dtypes import _decimal_to_int64
+from cudf.utils.dtypes import _decimal_to_int64, is_list_dtype
 cimport cudf._lib.cpp.types as libcudf_types
 
-
 cdef class DeviceScalar:
 
     def __init__(self, value, dtype):
@@ -97,6 +106,8 @@ cdef class DeviceScalar:
     def _to_host_scalar(self):
         if isinstance(self.dtype, cudf.Decimal64Dtype):
             result = _get_py_decimal_from_fixed_point(self.c_value)
+        elif is_list_dtype(self.dtype):
+            result = _get_py_list_from_list(self.c_value)
         elif pd.api.types.is_string_dtype(self.dtype):
             result = _get_py_string_from_string(self.c_value)
         elif pd.api.types.is_numeric_dtype(self.dtype):
@@ -159,6 +170,22 @@ cdef class DeviceScalar:
             raise TypeError(
                 "Must pass a dtype when constructing from a fixed-point scalar"
             )
+        elif cdtype.id() == libcudf_types.LIST:
+            if (
+                <list_scalar*>s.get_raw_ptr()
+            )[0].view().type().id() == libcudf_types.LIST:
+                s._dtype = dtype_from_column_view(
+                    (<list_scalar*>s.get_raw_ptr())[0].view()
+                )
+            else:
+                s._dtype = ListDtype(
+                    cudf_to_np_types[
+                        <underlying_type_t_type_id>(
+                            (<list_scalar*>s.get_raw_ptr())[0]
+                            .view().type().id()
+                        )
+                    ]
+                )
         else:
             if dtype is not None:
                 s._dtype = dtype
@@ -268,6 +295,19 @@ cdef _set_decimal64_from_scalar(unique_ptr[scalar]& s,
         )
     )
 
+cdef _get_py_list_from_list(unique_ptr[scalar]& s):
+
+    if not s.get()[0].is_valid():
+        return cudf.NA
+
+    cdef column_view list_col_view = (<list_scalar*>s.get()).view()
+    cdef Column list_col = Column.from_column_view(list_col_view, None)
+    cdef Table to_arrow_table = Table({"col": list_col})
+
+    arrow_table = to_arrow(to_arrow_table, [["col", []]])
+    result = arrow_table['col'].to_pylist()
+    return _nested_na_replace(result)
+
 cdef _get_py_string_from_string(unique_ptr[scalar]& s):
     if not s.get()[0].is_valid():
         return cudf.NA
@@ -440,3 +480,16 @@ def _create_proxy_nat_scalar(dtype):
         return result
     else:
         raise TypeError('NAT only valid for datetime and timedelta')
+
+
+def _nested_na_replace(input_list):
+    '''
+    Replace `None` with `cudf.NA` in the result of
+    `__getitem__` calls to list type columns
+    '''
+    for idx, value in enumerate(input_list):
+        if isinstance(value, list):
+            _nested_na_replace(value)
+        elif value is None:
+            input_list[idx] = cudf.NA
+    return input_list
diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx
index 25619de3ed0..0d7dfb5c619 100644
--- a/python/cudf/cudf/_lib/strings/combine.pyx
+++ b/python/cudf/cudf/_lib/strings/combine.pyx
@@ -16,7 +16,7 @@ from cudf._lib.table cimport Table
 from cudf._lib.cpp.strings.combine cimport (
     concatenate as cpp_concatenate,
     join_strings as cpp_join_strings,
-    concatenate_list_elements as cpp_concatenate_list_elements
+    join_list_elements as cpp_join_list_elements
 )
 
 
@@ -105,7 +105,7 @@ def join_lists_with_scalar(
     )
 
     with nogil:
-        c_result = move(cpp_concatenate_list_elements(
+        c_result = move(cpp_join_list_elements(
             source_view,
             scalar_separator[0],
             scalar_narep[0]
@@ -142,7 +142,7 @@ def join_lists_with_column(
     )
 
     with nogil:
-        c_result = move(cpp_concatenate_list_elements(
+        c_result = move(cpp_join_list_elements(
             source_view,
             separator_view,
             scalar_separator_narep[0],
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index 0439f0d24b8..d3da544f8b5 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -1,9 +1,7 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 """Common abstract base classes for cudf."""
 
-import abc
 import sys
-from abc import abstractmethod
 
 import rmm
 
@@ -18,7 +16,7 @@
     import pickle  # type: ignore
 
 
-class Serializable(abc.ABC):
+class Serializable:
     """A serializable object composed of device memory buffers.
 
     This base class defines a standard serialization protocol for objects
@@ -32,7 +30,6 @@ class Serializable(abc.ABC):
     latter converts back from that representation into an equivalent object.
     """
 
-    @abstractmethod
     def serialize(self):
         """Generate an equivalent serializable representation of an object.
 
@@ -53,10 +50,11 @@ def serialize(self):
 
         :meta private:
         """
-        pass
+        raise NotImplementedError(
+            "Subclasses of Serializable must implement serialize"
+        )
 
     @classmethod
-    @abstractmethod
     def deserialize(cls, header, frames):
         """Generate an object from a serialized representation.
 
@@ -80,7 +78,9 @@ class can be constructed from a serialized representation generalized
 
         :meta private:
         """
-        pass
+        raise NotImplementedError(
+            "Subclasses of Serializable must implement deserialize"
+        )
 
     def device_serialize(self):
         """Serialize data and metadata associated with device memory.
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 20f302f7e59..4bf4b2b87f2 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -40,7 +40,12 @@
 from cudf._typing import BinaryOperand, ColumnLike, Dtype, ScalarLike
 from cudf.core.abc import Serializable
 from cudf.core.buffer import Buffer
-from cudf.core.dtypes import CategoricalDtype, IntervalDtype
+from cudf.core.dtypes import (
+    CategoricalDtype,
+    IntervalDtype,
+    ListDtype,
+    StructDtype,
+)
 from cudf.utils import ioutils, utils
 from cudf.utils.dtypes import (
     check_cast_unsupported_dtype,
@@ -291,8 +296,7 @@ def from_arrow(cls, array: pa.Array) -> ColumnBase:
             "None"
         ]
 
-        if isinstance(result.dtype, cudf.Decimal64Dtype):
-            result.dtype.precision = array.type.precision
+        result = _copy_type_metadata_from_arrow(array, result)
         return result
 
     def _get_mask_as_column(self) -> ColumnBase:
@@ -2230,6 +2234,60 @@ def full(size: int, fill_value: ScalarLike, dtype: Dtype = None) -> ColumnBase:
     return ColumnBase.from_scalar(cudf.Scalar(fill_value, dtype), size)
 
 
+def _copy_type_metadata_from_arrow(
+    arrow_array: pa.array, cudf_column: ColumnBase
+) -> ColumnBase:
+    """
+    Similar to `Column._copy_type_metadata`, except copies type metadata
+    from arrow array into a cudf column. Recursive for every level.
+    * When `arrow_array` is struct type and `cudf_column` is StructDtype, copy
+    field names.
+    * When `arrow_array` is decimal type and `cudf_column` is
+    Decimal64Dtype, copy precisions.
+    """
+    if pa.types.is_decimal(arrow_array.type) and isinstance(
+        cudf_column, cudf.core.column.DecimalColumn
+    ):
+        cudf_column.dtype.precision = arrow_array.type.precision
+    elif pa.types.is_struct(arrow_array.type) and isinstance(
+        cudf_column, cudf.core.column.StructColumn
+    ):
+        base_children = tuple(
+            _copy_type_metadata_from_arrow(arrow_array.field(i), col_child)
+            for i, col_child in enumerate(cudf_column.base_children)
+        )
+        cudf_column.set_base_children(base_children)
+        return cudf.core.column.StructColumn(
+            data=None,
+            size=cudf_column.base_size,
+            dtype=StructDtype.from_arrow(arrow_array.type),
+            mask=cudf_column.base_mask,
+            offset=cudf_column.offset,
+            null_count=cudf_column.null_count,
+            children=base_children,
+        )
+    elif pa.types.is_list(arrow_array.type) and isinstance(
+        cudf_column, cudf.core.column.ListColumn
+    ):
+        if arrow_array.values and cudf_column.base_children:
+            base_children = (
+                cudf_column.base_children[0],
+                _copy_type_metadata_from_arrow(
+                    arrow_array.values, cudf_column.base_children[1]
+                ),
+            )
+            return cudf.core.column.ListColumn(
+                size=cudf_column.base_size,
+                dtype=ListDtype.from_arrow(arrow_array.type),
+                mask=cudf_column.base_mask,
+                offset=cudf_column.offset,
+                null_count=cudf_column.null_count,
+                children=base_children,
+            )
+
+    return cudf_column
+
+
 def _concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     """Concatenate a sequence of columns."""
     if len(objs) == 0:
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 7db8ba15caa..f0b0dbba4a5 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -143,6 +143,8 @@ def __init__(self, element_type: Any) -> None:
     def element_type(self) -> Dtype:
         if isinstance(self._typ.value_type, pa.ListType):
             return ListDtype.from_arrow(self._typ.value_type)
+        elif isinstance(self._typ.value_type, pa.StructType):
+            return StructDtype.from_arrow(self._typ.value_type)
         else:
             return np.dtype(self._typ.value_type.to_pandas_dtype()).name
 
@@ -176,10 +178,10 @@ def __eq__(self, other):
         return self._typ.equals(other._typ)
 
     def __repr__(self):
-        if isinstance(self.element_type, ListDtype):
-            return f"ListDtype({self.element_type.__repr__()})"
+        if isinstance(self.element_type, (ListDtype, StructDtype)):
+            return f"{type(self).__name__}({self.element_type.__repr__()})"
         else:
-            return f"ListDtype({self.element_type})"
+            return f"{type(self).__name__}({self.element_type})"
 
     def __hash__(self):
         return hash(self._typ)
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index f59954aaf08..1c6c1ed85e6 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -32,6 +32,7 @@
     is_numerical_dtype,
     is_scalar,
     min_scalar_type,
+    find_common_type,
 )
 
 T = TypeVar("T", bound="Frame")
@@ -156,6 +157,15 @@ def size(self):
         """
         return self._num_columns * self._num_rows
 
+    @property
+    def _is_homogeneous(self):
+        # make sure that the dataframe has columns
+        if not self._data.columns:
+            return True
+
+        first_type = self._data.columns[0].dtype.name
+        return all(x.dtype.name == first_type for x in self._data.columns)
+
     @property
     def empty(self):
         """
@@ -4029,8 +4039,11 @@ def _find_common_dtypes_and_categories(non_null_columns, dtypes):
         # default to the first non-null dtype
         dtypes[idx] = cols[0].dtype
         # If all the non-null dtypes are int/float, find a common dtype
-        if all(is_numerical_dtype(col.dtype) for col in cols):
-            dtypes[idx] = np.find_common_type([col.dtype for col in cols], [])
+        if all(
+            is_numerical_dtype(col.dtype) or is_decimal_dtype(col.dtype)
+            for col in cols
+        ):
+            dtypes[idx] = find_common_type([col.dtype for col in cols])
         # If all categorical dtypes, combine the categories
         elif all(
             isinstance(col, cudf.core.column.CategoricalColumn) for col in cols
@@ -4045,17 +4058,6 @@ def _find_common_dtypes_and_categories(non_null_columns, dtypes):
             # Set the column dtype to the codes' dtype. The categories
             # will be re-assigned at the end
             dtypes[idx] = min_scalar_type(len(categories[idx]))
-        elif all(
-            isinstance(col, cudf.core.column.DecimalColumn) for col in cols
-        ):
-            # Find the largest scale and the largest difference between
-            # precision and scale of the columns to be concatenated
-            s = max([col.dtype.scale for col in cols])
-            lhs = max([col.dtype.precision - col.dtype.scale for col in cols])
-            # Combine to get the necessary precision and clip at the maximum
-            # precision
-            p = min(cudf.Decimal64Dtype.MAX_PRECISION, s + lhs)
-            dtypes[idx] = cudf.Decimal64Dtype(p, s)
         # Otherwise raise an error if columns have different dtypes
         elif not all(is_dtype_equal(c.dtype, dtypes[idx]) for c in cols):
             raise ValueError("All columns must be the same type")
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 61fe20636f0..c1060d5f505 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -110,6 +110,7 @@ def cumcount(self):
             )
             .groupby(self.grouping, sort=self._sort)
             .agg("cumcount")
+            .reset_index(drop=True)
         )
 
     @cached_property
@@ -225,9 +226,10 @@ def nth(self, n):
         """
         Return the nth row from each group.
         """
-        result = self.agg(lambda x: x.nth(n))
-        sizes = self.size()
-        return result[n < sizes]
+        result = self.agg(lambda x: x.nth(n)).sort_index()
+        sizes = self.size().sort_index()
+
+        return result[sizes > n]
 
     def serialize(self):
         header = {}
diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py
index 7de1aaf9726..21d075ae67d 100755
--- a/python/cudf/cudf/core/indexing.py
+++ b/python/cudf/cudf/core/indexing.py
@@ -85,7 +85,11 @@ def __getitem__(self, arg):
             arg = list(arg)
         data = self._sr._column[arg]
 
-        if is_scalar(data) or _is_null_host_scalar(data):
+        if (
+            isinstance(data, list)
+            or is_scalar(data)
+            or _is_null_host_scalar(data)
+        ):
             return data
         index = self._sr.index.take(arg)
         return self._sr._copy_construct(data=data, index=index)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index d812214caf8..a894baf8235 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -45,7 +45,6 @@
 from cudf.utils import cudautils, docutils, ioutils
 from cudf.utils.docutils import copy_docstring
 from cudf.utils.dtypes import (
-    _decimal_normalize_types,
     can_convert_to_column,
     is_decimal_dtype,
     is_list_dtype,
@@ -53,7 +52,7 @@
     is_mixed_with_object_dtype,
     is_scalar,
     min_scalar_type,
-    numeric_normalize_types,
+    find_common_type,
 )
 from cudf.utils.utils import (
     get_appropriate_dispatched_func,
@@ -2402,10 +2401,8 @@ def _concat(cls, objs, axis=0, index=True):
                     )
 
             if dtype_mismatch:
-                if isinstance(objs[0]._column, cudf.core.column.DecimalColumn):
-                    objs = _decimal_normalize_types(*objs)
-                else:
-                    objs = numeric_normalize_types(*objs)
+                common_dtype = find_common_type([obj.dtype for obj in objs])
+                objs = [obj.astype(common_dtype) for obj in objs]
 
         col = _concat_columns([o._column for o in objs])
 
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
index 31dc6012905..5c4c121db4d 100644
--- a/python/cudf/cudf/tests/test_concat.py
+++ b/python/cudf/cudf/tests/test_concat.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from decimal import Decimal
 
 import cudf as gd
 from cudf.tests.utils import assert_eq, assert_exceptions_equal
@@ -1262,3 +1263,267 @@ def test_concat_decimal_series(ltype, rtype):
     expected = pd.concat([ps1, ps2])
 
     assert_eq(expected, got)
+
+
+@pytest.mark.parametrize(
+    "df1, df2, df3, expected",
+    [
+        (
+            gd.DataFrame(
+                {"val": [Decimal("42.5"), Decimal("8.7")]},
+                dtype=Decimal64Dtype(5, 2),
+            ),
+            gd.DataFrame(
+                {"val": [Decimal("9.23"), Decimal("-67.49")]},
+                dtype=Decimal64Dtype(6, 4),
+            ),
+            gd.DataFrame({"val": [8, -5]}, dtype="int32"),
+            gd.DataFrame(
+                {
+                    "val": [
+                        Decimal("42.5"),
+                        Decimal("8.7"),
+                        Decimal("9.23"),
+                        Decimal("-67.49"),
+                        Decimal("8"),
+                        Decimal("-5"),
+                    ]
+                },
+                dtype=Decimal64Dtype(7, 4),
+                index=[0, 1, 0, 1, 0, 1],
+            ),
+        ),
+        (
+            gd.DataFrame(
+                {"val": [Decimal("95.2"), Decimal("23.4")]},
+                dtype=Decimal64Dtype(5, 2),
+            ),
+            gd.DataFrame({"val": [54, 509]}, dtype="uint16"),
+            gd.DataFrame({"val": [24, -48]}, dtype="int32"),
+            gd.DataFrame(
+                {
+                    "val": [
+                        Decimal("95.2"),
+                        Decimal("23.4"),
+                        Decimal("54"),
+                        Decimal("509"),
+                        Decimal("24"),
+                        Decimal("-48"),
+                    ]
+                },
+                dtype=Decimal64Dtype(5, 2),
+                index=[0, 1, 0, 1, 0, 1],
+            ),
+        ),
+        (
+            gd.DataFrame(
+                {"val": [Decimal("36.56"), Decimal("-59.24")]},
+                dtype=Decimal64Dtype(9, 4),
+            ),
+            gd.DataFrame({"val": [403.21, 45.13]}, dtype="float32"),
+            gd.DataFrame({"val": [52.262, -49.25]}, dtype="float64"),
+            gd.DataFrame(
+                {
+                    "val": [
+                        Decimal("36.56"),
+                        Decimal("-59.24"),
+                        Decimal("403.21"),
+                        Decimal("45.13"),
+                        Decimal("52.262"),
+                        Decimal("-49.25"),
+                    ]
+                },
+                dtype=Decimal64Dtype(9, 4),
+                index=[0, 1, 0, 1, 0, 1],
+            ),
+        ),
+        (
+            gd.DataFrame(
+                {"val": [Decimal("9563.24"), Decimal("236.633")]},
+                dtype=Decimal64Dtype(9, 4),
+            ),
+            gd.DataFrame({"val": [5393, -95832]}, dtype="int64"),
+            gd.DataFrame({"val": [-29.234, -31.945]}, dtype="float64"),
+            gd.DataFrame(
+                {
+                    "val": [
+                        Decimal("9563.24"),
+                        Decimal("236.633"),
+                        Decimal("5393"),
+                        Decimal("-95832"),
+                        Decimal("-29.234"),
+                        Decimal("-31.945"),
+                    ]
+                },
+                dtype=Decimal64Dtype(9, 4),
+                index=[0, 1, 0, 1, 0, 1],
+            ),
+        ),
+    ],
+)
+def test_concat_decimal_numeric_dataframe(df1, df2, df3, expected):
+    df = gd.concat([df1, df2, df3])
+    assert_eq(df, expected)
+    assert_eq(df.val.dtype, expected.val.dtype)
+
+
+@pytest.mark.parametrize(
+    "s1, s2, s3, expected",
+    [
+        (
+            gd.Series(
+                [Decimal("32.8"), Decimal("-87.7")], dtype=Decimal64Dtype(6, 2)
+            ),
+            gd.Series(
+                [Decimal("101.243"), Decimal("-92.449")],
+                dtype=Decimal64Dtype(9, 6),
+            ),
+            gd.Series([94, -22], dtype="int32"),
+            gd.Series(
+                [
+                    Decimal("32.8"),
+                    Decimal("-87.7"),
+                    Decimal("101.243"),
+                    Decimal("-92.449"),
+                    Decimal("94"),
+                    Decimal("-22"),
+                ],
+                dtype=Decimal64Dtype(10, 6),
+                index=[0, 1, 0, 1, 0, 1],
+            ),
+        ),
+        (
+            gd.Series(
+                [Decimal("7.2"), Decimal("122.1")], dtype=Decimal64Dtype(5, 2)
+            ),
+            gd.Series([33, 984], dtype="uint32"),
+            gd.Series([593, -702], dtype="int32"),
+            gd.Series(
+                [
+                    Decimal("7.2"),
+                    Decimal("122.1"),
+                    Decimal("33"),
+                    Decimal("984"),
+                    Decimal("593"),
+                    Decimal("-702"),
+                ],
+                dtype=Decimal64Dtype(5, 2),
+                index=[0, 1, 0, 1, 0, 1],
+            ),
+        ),
+        (
+            gd.Series(
+                [Decimal("982.94"), Decimal("-493.626")],
+                dtype=Decimal64Dtype(9, 4),
+            ),
+            gd.Series([847.98, 254.442], dtype="float32"),
+            gd.Series([5299.262, -2049.25], dtype="float64"),
+            gd.Series(
+                [
+                    Decimal("982.94"),
+                    Decimal("-493.626"),
+                    Decimal("847.98"),
+                    Decimal("254.442"),
+                    Decimal("5299.262"),
+                    Decimal("-2049.25"),
+                ],
+                dtype=Decimal64Dtype(9, 4),
+                index=[0, 1, 0, 1, 0, 1],
+            ),
+        ),
+        (
+            gd.Series(
+                [Decimal("492.204"), Decimal("-72824.455")],
+                dtype=Decimal64Dtype(9, 4),
+            ),
+            gd.Series([8438, -27462], dtype="int64"),
+            gd.Series([-40.292, 49202.953], dtype="float64"),
+            gd.Series(
+                [
+                    Decimal("492.204"),
+                    Decimal("-72824.455"),
+                    Decimal("8438"),
+                    Decimal("-27462"),
+                    Decimal("-40.292"),
+                    Decimal("49202.953"),
+                ],
+                dtype=Decimal64Dtype(9, 4),
+                index=[0, 1, 0, 1, 0, 1],
+            ),
+        ),
+    ],
+)
+def test_concat_decimal_numeric_series(s1, s2, s3, expected):
+    s = gd.concat([s1, s2, s3])
+    assert_eq(s, expected)
+
+
+@pytest.mark.parametrize(
+    "s1, s2, expected",
+    [
+        (
+            gd.Series(
+                [Decimal("955.22"), Decimal("8.2")], dtype=Decimal64Dtype(5, 2)
+            ),
+            gd.Series(["2007-06-12", "2006-03-14"], dtype="datetime64"),
+            gd.Series(
+                [
+                    "955.22",
+                    "8.20",
+                    "2007-06-12 00:00:00",
+                    "2006-03-14 00:00:00",
+                ],
+                index=[0, 1, 0, 1],
+            ),
+        ),
+        (
+            gd.Series(
+                [Decimal("-52.44"), Decimal("365.22")],
+                dtype=Decimal64Dtype(5, 2),
+            ),
+            gd.Series(
+                np.arange(
+                    "2005-02-01T12", "2005-02-01T15", dtype="datetime64[h]"
+                ),
+                dtype="datetime64[s]",
+            ),
+            gd.Series(
+                [
+                    "-52.44",
+                    "365.22",
+                    "2005-02-01 12:00:00",
+                    "2005-02-01 13:00:00",
+                    "2005-02-01 14:00:00",
+                ],
+                index=[0, 1, 0, 1, 2],
+            ),
+        ),
+        (
+            gd.Series(
+                [Decimal("753.0"), Decimal("94.22")],
+                dtype=Decimal64Dtype(5, 2),
+            ),
+            gd.Series([np.timedelta64(111, "s"), np.timedelta64(509, "s")]),
+            gd.Series(
+                ["753.00", "94.22", "0 days 00:01:51", "0 days 00:08:29"],
+                index=[0, 1, 0, 1],
+            ),
+        ),
+        (
+            gd.Series(
+                [Decimal("753.0"), Decimal("94.22")],
+                dtype=Decimal64Dtype(5, 2),
+            ),
+            gd.Series(
+                [np.timedelta64(940252, "s"), np.timedelta64(758385, "s")]
+            ),
+            gd.Series(
+                ["753.00", "94.22", "10 days 21:10:52", "8 days 18:39:45"],
+                index=[0, 1, 0, 1],
+            ),
+        ),
+    ],
+)
+def test_concat_decimal_non_numeric(s1, s2, expected):
+    s = gd.concat([s1, s2])
+    assert_eq(s, expected)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e5e36ba7e21..0b73f32e94d 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8579,3 +8579,100 @@ def test_dataframe_init_from_series(data, columns, index):
         actual,
         check_index_type=False if len(expected) == 0 else True,
     )
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [
+        ({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [1.2, 1, 2, 3]}, False),
+        ({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, True),
+        ({"a": ["a", "b", "c"], "b": [4, 5, 6], "c": [7, 8, 9]}, False),
+        ({"a": [True, False, False], "b": [False, False, True]}, True),
+        ({"a": [True, False, False]}, True),
+        ({"a": [[1, 2], [3, 4]]}, True),
+        ({"a": [[1, 2], [3, 4]], "b": ["a", "b"]}, False),
+        ({"a": [{"c": 5}, {"e": 5}], "b": [{"c": 5}, {"g": 7}]}, True),
+        ({}, True),
+    ],
+)
+def test_is_homogeneous_dataframe(data, expected):
+    actual = cudf.DataFrame(data)._is_homogeneous
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "data, indexes, expected",
+    [
+        (
+            {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "c": [1.2, 1, 2, 3]},
+            ["a", "b"],
+            True,
+        ),
+        (
+            {
+                "a": [1, 2, 3, 4],
+                "b": [5, 6, 7, 8],
+                "c": [1.2, 1, 2, 3],
+                "d": ["hello", "world", "cudf", "rapids"],
+            },
+            ["a", "b"],
+            False,
+        ),
+        (
+            {
+                "a": ["a", "b", "c"],
+                "b": [4, 5, 6],
+                "c": [7, 8, 9],
+                "d": [1, 2, 3],
+            },
+            ["a", "b"],
+            True,
+        ),
+    ],
+)
+def test_is_homogeneous_multiIndex_dataframe(data, indexes, expected):
+    test_dataframe = cudf.DataFrame(data).set_index(indexes)
+    actual = cudf.DataFrame(test_dataframe)._is_homogeneous
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "data, expected", [([1, 2, 3, 4], True), ([True, False], True)]
+)
+def test_is_homogeneous_series(data, expected):
+    actual = cudf.Series(data)._is_homogeneous
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "levels, codes, expected",
+    [
+        (
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+            True,
+        ),
+        (
+            [[1, 2, 3], [True, False, True]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+            False,
+        ),
+    ],
+)
+def test_is_homogeneous_multiIndex(levels, codes, expected):
+    actual = cudf.MultiIndex(levels=levels, codes=codes)._is_homogeneous
+
+    assert actual == expected
+
+
+@pytest.mark.parametrize(
+    "data, expected",
+    [([1, 2, 3], True), (["Hello", "World"], True), ([True, False], True)],
+)
+def test_is_homogeneous_index(data, expected):
+    actual = cudf.Index(data)._is_homogeneous
+
+    assert actual == expected
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index b6e2aac0304..a5895caf49f 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -6,14 +6,16 @@
 import pytest
 
 import cudf
+from cudf.core.column import ColumnBase
 from cudf.core.dtypes import (
     CategoricalDtype,
     Decimal64Dtype,
+    IntervalDtype,
     ListDtype,
     StructDtype,
-    IntervalDtype,
 )
 from cudf.tests.utils import assert_eq
+from cudf.utils.dtypes import np_to_pa_dtype
 
 
 def test_cdt_basic():
@@ -155,3 +157,103 @@ def test_interval_dtype_pyarrow_round_trip(fields, closed):
     expect = pa_array
     got = IntervalDtype.from_arrow(expect).to_arrow()
     assert expect.equals(got)
+
+
+def assert_column_array_dtype_equal(column: ColumnBase, array: pa.array):
+    """
+    In cudf, each column holds its dtype. And since column may have child
+    columns, child columns also holds their datatype. This method tests
+    that every level of `column` matches the type of the given `array`
+    recursively.
+    """
+
+    if isinstance(column.dtype, ListDtype):
+        return array.type.equals(
+            column.dtype.to_arrow()
+        ) and assert_column_array_dtype_equal(
+            column.base_children[1], array.values
+        )
+    elif isinstance(column.dtype, StructDtype):
+        return array.type.equals(column.dtype.to_arrow()) and all(
+            [
+                assert_column_array_dtype_equal(child, array.field(i))
+                for i, child in enumerate(column.base_children)
+            ]
+        )
+    elif isinstance(column.dtype, Decimal64Dtype):
+        return array.type.equals(column.dtype.to_arrow())
+    elif isinstance(column.dtype, CategoricalDtype):
+        raise NotImplementedError()
+    else:
+        return array.type.equals(np_to_pa_dtype(column.dtype))
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [[{"name": 123}]],
+        [
+            [
+                {
+                    "IsLeapYear": False,
+                    "data": {"Year": 1999, "Month": 7},
+                    "names": ["Mike", None],
+                },
+                {
+                    "IsLeapYear": True,
+                    "data": {"Year": 2004, "Month": 12},
+                    "names": None,
+                },
+                {
+                    "IsLeapYear": False,
+                    "data": {"Year": 1996, "Month": 2},
+                    "names": ["Rose", "Richard"],
+                },
+            ]
+        ],
+        [
+            [None, {"human?": True, "deets": {"weight": 2.4, "age": 27}}],
+            [
+                {"human?": None, "deets": {"weight": 5.3, "age": 25}},
+                {"human?": False, "deets": {"weight": 8.0, "age": 31}},
+                {"human?": False, "deets": None},
+            ],
+            [],
+            None,
+            [{"human?": None, "deets": {"weight": 6.9, "age": None}}],
+        ],
+        [
+            {
+                "name": "var0",
+                "val": [
+                    {"name": "var1", "val": None, "type": "optional<struct>"}
+                ],
+                "type": "list",
+            },
+            {},
+            {
+                "name": "var2",
+                "val": [
+                    {
+                        "name": "var3",
+                        "val": {"field": 42},
+                        "type": "optional<struct>",
+                    },
+                    {
+                        "name": "var4",
+                        "val": {"field": 3.14},
+                        "type": "optional<struct>",
+                    },
+                ],
+                "type": "list",
+            },
+            None,
+        ],
+    ],
+)
+def test_lists_of_structs_dtype(data):
+    got = cudf.Series(data)
+    expected = pa.array(data)
+
+    assert_column_array_dtype_equal(got._column, expected)
+    assert expected.equals(got._column.to_arrow())
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index d1458c72770..2430b0da5ef 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -30,14 +30,28 @@
 _index_type_aggs = {"count", "idxmin", "idxmax", "cumcount"}
 
 
-def assert_groupby_results_equal(expect, got, sort=True, **kwargs):
+def assert_groupby_results_equal(
+    expect, got, sort=True, as_index=True, by=None, **kwargs
+):
     # Because we don't sort by index by default in groupby,
     # sort expect and got by index before comparing
     if sort:
-        expect = expect.sort_index()
-        got = got.sort_index()
-    else:
-        assert_eq(expect.sort_index(), got.sort_index(), **kwargs)
+        if as_index:
+            expect = expect.sort_index()
+            got = got.sort_index()
+        else:
+            assert by is not None
+            if isinstance(expect, (pd.DataFrame, cudf.DataFrame)):
+                expect = expect.sort_values(by=by).reset_index(drop=True)
+            else:
+                expect = expect.sort_values().reset_index(drop=True)
+
+            if isinstance(got, cudf.DataFrame):
+                got = got.sort_values(by=by).reset_index(drop=True)
+            else:
+                got = got.sort_values().reset_index(drop=True)
+
+    assert_eq(expect, got, **kwargs)
 
 
 def make_frame(
@@ -201,10 +215,16 @@ def test_groupby_getitem_getattr(as_index):
     pdf = pd.DataFrame({"x": [1, 3, 1], "y": [1, 2, 3], "z": [1, 4, 5]})
     gdf = cudf.from_pandas(pdf)
     assert_groupby_results_equal(
-        pdf.groupby("x")["y"].sum(), gdf.groupby("x")["y"].sum(),
+        pdf.groupby("x")["y"].sum(),
+        gdf.groupby("x")["y"].sum(),
+        as_index=as_index,
+        by="x",
     )
     assert_groupby_results_equal(
-        pdf.groupby("x").y.sum(), gdf.groupby("x").y.sum(),
+        pdf.groupby("x").y.sum(),
+        gdf.groupby("x").y.sum(),
+        as_index=as_index,
+        by="x",
     )
     assert_groupby_results_equal(
         pdf.groupby("x")[["y"]].sum(), gdf.groupby("x")[["y"]].sum(),
@@ -212,6 +232,8 @@ def test_groupby_getitem_getattr(as_index):
     assert_groupby_results_equal(
         pdf.groupby(["x", "y"], as_index=as_index).sum(),
         gdf.groupby(["x", "y"], as_index=as_index).sum(),
+        as_index=as_index,
+        by=["x", "y"],
     )
 
 
@@ -1088,7 +1110,13 @@ def test_groupby_datetime(nelem, as_index, agg):
     else:
         pdres = pdg.agg({"datetime": agg})
         gdres = gdg.agg({"datetime": agg})
-    assert_groupby_results_equal(pdres, gdres, check_dtype=check_dtype)
+    assert_groupby_results_equal(
+        pdres,
+        gdres,
+        check_dtype=check_dtype,
+        as_index=as_index,
+        by=["datetime"],
+    )
 
 
 def test_groupby_dropna():
@@ -1349,6 +1377,8 @@ def test_reset_index_after_empty_groupby():
     assert_groupby_results_equal(
         pdf.groupby("a").sum().reset_index(),
         gdf.groupby("a").sum().reset_index(),
+        as_index=False,
+        by="a",
     )
 
 
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 5dcecc6c9e1..7edcb08a7c8 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -7,6 +7,7 @@
 import pytest
 
 import cudf
+from cudf import NA
 from cudf.tests.utils import assert_eq
 
 
@@ -332,3 +333,20 @@ def test_concatenate_list_with_nonlist():
         gdf1 = cudf.DataFrame({"A": [["a", "c"], ["b", "d"], ["c", "d"]]})
         gdf2 = cudf.DataFrame({"A": ["a", "b", "c"]})
         gdf1["A"] + gdf2["A"]
+
+
+@pytest.mark.parametrize(
+    "indata,expect",
+    [
+        ([1], [1]),
+        ([1, 2, 3], [1, 2, 3]),
+        ([[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]),
+        ([None], [NA]),
+        ([1, None, 3], [1, NA, 3]),
+        ([[1, None, 3], [None, 5, 6]], [[1, NA, 3], [NA, 5, 6]]),
+    ],
+)
+def test_list_getitem(indata, expect):
+    list_sr = cudf.Series([indata])
+    # __getitem__ shall fill None with cudf.NA
+    assert list_sr[0] == expect
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 16c35bab4b1..0b59116f8e6 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -290,13 +290,15 @@ def is_decimal_dtype(obj):
     )
 
 
-def _decimal_normalize_types(*args):
-    s = max([a.dtype.scale for a in args])
-    lhs = max([a.dtype.precision - a.dtype.scale for a in args])
+def _find_common_type_decimal(dtypes):
+    # Find the largest scale and the largest difference between
+    # precision and scale of the columns to be concatenated
+    s = max([dtype.scale for dtype in dtypes])
+    lhs = max([dtype.precision - dtype.scale for dtype in dtypes])
+    # Combine to get the necessary precision and clip at the maximum
+    # precision
     p = min(cudf.Decimal64Dtype.MAX_PRECISION, s + lhs)
-    dtype = cudf.Decimal64Dtype(p, s)
-
-    return [a.astype(dtype) for a in args]
+    return cudf.Decimal64Dtype(p, s)
 
 
 def cudf_dtype_from_pydata_dtype(dtype):
@@ -690,9 +692,15 @@ def find_common_type(dtypes):
     dtypes = set(dtypes)
 
     if any(is_decimal_dtype(dtype) for dtype in dtypes):
-        raise NotImplementedError(
-            "DecimalDtype is not yet supported in find_common_type"
-        )
+        if all(
+            is_decimal_dtype(dtype) or is_numerical_dtype(dtype)
+            for dtype in dtypes
+        ):
+            return _find_common_type_decimal(
+                [dtype for dtype in dtypes if is_decimal_dtype(dtype)]
+            )
+        else:
+            return np.dtype("O")
 
     # Corner case 1:
     # Resort to np.result_type to handle "M" and "m" types separately