From 87b957690f02c8983ff77e7b95aa6a5504a590e3 Mon Sep 17 00:00:00 2001 From: Ray Douglass Date: Wed, 7 Aug 2024 10:40:28 -0400 Subject: [PATCH 1/2] Update Changelog [skip ci] --- CHANGELOG.md | 376 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 376 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5efe4eb9e5..f2a7c337675 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,379 @@ +# cudf 24.08.00 (7 Aug 2024) + +## 🚨 Breaking Changes + +- Align Index __init__ APIs with pandas 2.x ([#16362](https://github.com/rapidsai/cudf/pull/16362)) [@mroeschke](https://github.com/mroeschke) +- Align Series APIs with pandas 2.x ([#16333](https://github.com/rapidsai/cudf/pull/16333)) [@mroeschke](https://github.com/mroeschke) +- Add missing `stream` param to dictionary factory APIs ([#16319](https://github.com/rapidsai/cudf/pull/16319)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Deprecate dtype= parameter in reduction methods ([#16313](https://github.com/rapidsai/cudf/pull/16313)) [@mroeschke](https://github.com/mroeschke) +- Remove squeeze argument from groupby ([#16312](https://github.com/rapidsai/cudf/pull/16312)) [@mroeschke](https://github.com/mroeschke) +- Align more DataFrame APIs with pandas ([#16310](https://github.com/rapidsai/cudf/pull/16310)) [@mroeschke](https://github.com/mroeschke) +- Remove `mr` param from `write_csv` and `write_json` ([#16231](https://github.com/rapidsai/cudf/pull/16231)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Report number of rows per file read by PQ reader when no row selection and fix segfault in chunked PQ reader when skip_rows > 0 ([#16195](https://github.com/rapidsai/cudf/pull/16195)) [@mhaseeb123](https://github.com/mhaseeb123) +- Refactor from_arrow_device/host to use resource_ref ([#16160](https://github.com/rapidsai/cudf/pull/16160)) [@harrism](https://github.com/harrism) +- Deprecate Arrow support in I/O ([#16132](https://github.com/rapidsai/cudf/pull/16132)) [@lithomas1](https://github.com/lithomas1) +- Return `FrozenList` for `Index.names` ([#16047](https://github.com/rapidsai/cudf/pull/16047)) [@galipremsagar](https://github.com/galipremsagar) +- Add compile option to enable large strings support ([#16037](https://github.com/rapidsai/cudf/pull/16037)) [@davidwendt](https://github.com/davidwendt) +- Hide visibility of non public symbols ([#15982](https://github.com/rapidsai/cudf/pull/15982)) [@robertmaynard](https://github.com/robertmaynard) +- Rename strings multiple target replace API ([#15898](https://github.com/rapidsai/cudf/pull/15898)) [@davidwendt](https://github.com/davidwendt) +- Pinned vector factory that uses the global pool ([#15895](https://github.com/rapidsai/cudf/pull/15895)) [@vuule](https://github.com/vuule) +- Apply clang-tidy autofixes ([#15894](https://github.com/rapidsai/cudf/pull/15894)) [@vyasr](https://github.com/vyasr) +- Support `arrow:schema` in Parquet writer to faithfully roundtrip `duration` types with Arrow ([#15875](https://github.com/rapidsai/cudf/pull/15875)) [@mhaseeb123](https://github.com/mhaseeb123) +- Expose stream parameter to public rolling APIs ([#15865](https://github.com/rapidsai/cudf/pull/15865)) [@srinivasyadav18](https://github.com/srinivasyadav18) +- Fix large strings handling in nvtext::character_tokenize ([#15829](https://github.com/rapidsai/cudf/pull/15829)) [@davidwendt](https://github.com/davidwendt) +- Remove legacy JSON reader and concurrent_unordered_map.cuh. ([#15813](https://github.com/rapidsai/cudf/pull/15813)) [@bdice](https://github.com/bdice) + +## 🐛 Bug Fixes + +- Add `flatbuffers` to `libcudf` build ([#16446](https://github.com/rapidsai/cudf/pull/16446)) [@galipremsagar](https://github.com/galipremsagar) +- Fix parquet_field_list read_func lambda capture invalid this pointer ([#16440](https://github.com/rapidsai/cudf/pull/16440)) [@davidwendt](https://github.com/davidwendt) +- Enable prefetching in cudf.pandas.install() ([#16439](https://github.com/rapidsai/cudf/pull/16439)) [@bdice](https://github.com/bdice) +- Enable prefetching before `runpy` ([#16427](https://github.com/rapidsai/cudf/pull/16427)) [@galipremsagar](https://github.com/galipremsagar) +- Support thread-safe for `prefetch_config::get` and `prefetch_config::set` ([#16425](https://github.com/rapidsai/cudf/pull/16425)) [@ttnghia](https://github.com/ttnghia) +- Fix a `pandas-2.0` missing attribute error ([#16416](https://github.com/rapidsai/cudf/pull/16416)) [@galipremsagar](https://github.com/galipremsagar) +- [Bug] Remove loud `NativeFile` deprecation noise for `read_parquet` from S3 ([#16415](https://github.com/rapidsai/cudf/pull/16415)) [@rjzamora](https://github.com/rjzamora) +- Fix nightly memcheck error for empty STREAM_INTEROP_TEST ([#16406](https://github.com/rapidsai/cudf/pull/16406)) [@davidwendt](https://github.com/davidwendt) +- Gate ArrowStringArrayNumpySemantics cudf.pandas proxy behind version check ([#16401](https://github.com/rapidsai/cudf/pull/16401)) [@mroeschke](https://github.com/mroeschke) +- Don't export bs_thread_pool ([#16398](https://github.com/rapidsai/cudf/pull/16398)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Require fixed width types for casting in `cudf-polars` ([#16381](https://github.com/rapidsai/cudf/pull/16381)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Fix docstring of `DataFrame.apply` ([#16351](https://github.com/rapidsai/cudf/pull/16351)) [@galipremsagar](https://github.com/galipremsagar) +- Make __bool__ raise for more cudf objects ([#16311](https://github.com/rapidsai/cudf/pull/16311)) [@mroeschke](https://github.com/mroeschke) +- Rename `.devcontainer`s for CUDA 12.5 ([#16293](https://github.com/rapidsai/cudf/pull/16293)) [@jakirkham](https://github.com/jakirkham) +- Fix split_record for all empty strings column ([#16291](https://github.com/rapidsai/cudf/pull/16291)) [@davidwendt](https://github.com/davidwendt) +- Fix logic in to_arrow for empty list column ([#16279](https://github.com/rapidsai/cudf/pull/16279)) [@wence-](https://github.com/wence-) +- [BUG] Make name attr of Index fast slow attrs ([#16270](https://github.com/rapidsai/cudf/pull/16270)) [@Matt711](https://github.com/Matt711) +- Add custom name setter and getter for proxy objects in `cudf.pandas` ([#16234](https://github.com/rapidsai/cudf/pull/16234)) [@Matt711](https://github.com/Matt711) +- Fall back when casting a timestamp to numeric in cudf-polars ([#16232](https://github.com/rapidsai/cudf/pull/16232)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Disable large string support for Java build ([#16216](https://github.com/rapidsai/cudf/pull/16216)) [@jlowe](https://github.com/jlowe) +- Remove CCCL patch for PR 211. ([#16207](https://github.com/rapidsai/cudf/pull/16207)) [@bdice](https://github.com/bdice) +- Add single offset to an empty ListArray in cudf::to_arrow ([#16201](https://github.com/rapidsai/cudf/pull/16201)) [@davidwendt](https://github.com/davidwendt) +- Fix `memory_usage` when calculating nested list column ([#16193](https://github.com/rapidsai/cudf/pull/16193)) [@mroeschke](https://github.com/mroeschke) +- Support at/iat indexers in cudf.pandas ([#16177](https://github.com/rapidsai/cudf/pull/16177)) [@mroeschke](https://github.com/mroeschke) +- Fix unused-return-value debug build error in from_arrow_stream_test.cpp ([#16168](https://github.com/rapidsai/cudf/pull/16168)) [@davidwendt](https://github.com/davidwendt) +- Fix cudf::strings::replace_multiple hang on empty target ([#16167](https://github.com/rapidsai/cudf/pull/16167)) [@davidwendt](https://github.com/davidwendt) +- Refactor from_arrow_device/host to use resource_ref ([#16160](https://github.com/rapidsai/cudf/pull/16160)) [@harrism](https://github.com/harrism) +- interpolate returns new column if no values are interpolated ([#16158](https://github.com/rapidsai/cudf/pull/16158)) [@mroeschke](https://github.com/mroeschke) +- Use provided memory resource for allocating mixed join results. ([#16153](https://github.com/rapidsai/cudf/pull/16153)) [@bdice](https://github.com/bdice) +- Run DFG after verify-alpha-spec ([#16151](https://github.com/rapidsai/cudf/pull/16151)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Use size_t to allow large conditional joins ([#16127](https://github.com/rapidsai/cudf/pull/16127)) [@bdice](https://github.com/bdice) +- Allow only scale=0 fixed-point values in fixed_width_column_wrapper ([#16120](https://github.com/rapidsai/cudf/pull/16120)) [@davidwendt](https://github.com/davidwendt) +- Fix pylibcudf Table.num_rows for 0 columns case and add interop to docs ([#16108](https://github.com/rapidsai/cudf/pull/16108)) [@lithomas1](https://github.com/lithomas1) +- Add support for proxy `np.flatiter` objects ([#16107](https://github.com/rapidsai/cudf/pull/16107)) [@Matt711](https://github.com/Matt711) +- Ensure cudf objects can astype to any type when empty ([#16106](https://github.com/rapidsai/cudf/pull/16106)) [@mroeschke](https://github.com/mroeschke) +- Support `pd.read_pickle` and `pd.to_pickle` in `cudf.pandas` ([#16105](https://github.com/rapidsai/cudf/pull/16105)) [@Matt711](https://github.com/Matt711) +- Fix unnecessarily strict check in parquet chunked reader for choosing split locations. ([#16099](https://github.com/rapidsai/cudf/pull/16099)) [@nvdbaranec](https://github.com/nvdbaranec) +- Fix `is_monotonic_*` APIs to include `nan's` ([#16085](https://github.com/rapidsai/cudf/pull/16085)) [@galipremsagar](https://github.com/galipremsagar) +- More safely parse CUDA versions when subprocess output is contaminated ([#16067](https://github.com/rapidsai/cudf/pull/16067)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- fast_slow_proxy: Don't import assert_eq at top-level ([#16063](https://github.com/rapidsai/cudf/pull/16063)) [@wence-](https://github.com/wence-) +- Prevent bad ColumnAccessor state after .sort_index(axis=1, ignore_index=True) ([#16061](https://github.com/rapidsai/cudf/pull/16061)) [@mroeschke](https://github.com/mroeschke) +- Fix ArrowDeviceArray interface to pass address of event ([#16058](https://github.com/rapidsai/cudf/pull/16058)) [@zeroshade](https://github.com/zeroshade) +- Fix a size overflow bug in hash groupby ([#16053](https://github.com/rapidsai/cudf/pull/16053)) [@PointKernel](https://github.com/PointKernel) +- Fix `atomic_ref` scope when multiple blocks are updating the same output ([#16051](https://github.com/rapidsai/cudf/pull/16051)) [@vuule](https://github.com/vuule) +- Fix initialization error in to_arrow for empty string views ([#16033](https://github.com/rapidsai/cudf/pull/16033)) [@wence-](https://github.com/wence-) +- Fix the int32 overflow when computing page fragment sizes for large string columns ([#16028](https://github.com/rapidsai/cudf/pull/16028)) [@mhaseeb123](https://github.com/mhaseeb123) +- Fix the pool size alignment issue ([#16024](https://github.com/rapidsai/cudf/pull/16024)) [@PointKernel](https://github.com/PointKernel) +- Improve multibyte-split byte-range performance ([#16019](https://github.com/rapidsai/cudf/pull/16019)) [@davidwendt](https://github.com/davidwendt) +- Fix target counting in strings char-parallel replace ([#16017](https://github.com/rapidsai/cudf/pull/16017)) [@davidwendt](https://github.com/davidwendt) +- Support IntervalDtype in cudf.from_pandas ([#16014](https://github.com/rapidsai/cudf/pull/16014)) [@mroeschke](https://github.com/mroeschke) +- Fix memory size in create_byte_range_infos_consecutive ([#16012](https://github.com/rapidsai/cudf/pull/16012)) [@davidwendt](https://github.com/davidwendt) +- Hide visibility of non public symbols ([#15982](https://github.com/rapidsai/cudf/pull/15982)) [@robertmaynard](https://github.com/robertmaynard) +- Fix Cython typo preventing proper inheritance ([#15978](https://github.com/rapidsai/cudf/pull/15978)) [@vyasr](https://github.com/vyasr) +- Fix convert_dtypes with convert_integer=False/convert_floating=True ([#15964](https://github.com/rapidsai/cudf/pull/15964)) [@mroeschke](https://github.com/mroeschke) +- Fix nunique for `MultiIndex`, `DataFrame`, and all NA case with `dropna=False` ([#15962](https://github.com/rapidsai/cudf/pull/15962)) [@mroeschke](https://github.com/mroeschke) +- Explicitly build for all GPU architectures ([#15959](https://github.com/rapidsai/cudf/pull/15959)) [@vyasr](https://github.com/vyasr) +- Preserve column type and class information in more DataFrame operations ([#15949](https://github.com/rapidsai/cudf/pull/15949)) [@mroeschke](https://github.com/mroeschke) +- Add __array_interface__ to cudf.pandas numpy.ndarray proxy ([#15936](https://github.com/rapidsai/cudf/pull/15936)) [@mroeschke](https://github.com/mroeschke) +- Allow tests to be built when stream util is disabled ([#15933](https://github.com/rapidsai/cudf/pull/15933)) [@robertmaynard](https://github.com/robertmaynard) +- Fix JSON multi-source reading when total source size exceeds `INT_MAX` bytes ([#15930](https://github.com/rapidsai/cudf/pull/15930)) [@shrshi](https://github.com/shrshi) +- Fix `dask_cudf.read_parquet` regression for legacy timestamp data ([#15929](https://github.com/rapidsai/cudf/pull/15929)) [@rjzamora](https://github.com/rjzamora) +- Fix offsetalator when accessing over 268 million rows ([#15921](https://github.com/rapidsai/cudf/pull/15921)) [@davidwendt](https://github.com/davidwendt) +- Fix debug assert in rowgroup_char_counts_kernel ([#15902](https://github.com/rapidsai/cudf/pull/15902)) [@davidwendt](https://github.com/davidwendt) +- Fix categorical conversion from chunked arrow arrays ([#15886](https://github.com/rapidsai/cudf/pull/15886)) [@vyasr](https://github.com/vyasr) +- Handling for `NaN` and `inf` when converting floating point to fixed point types ([#15885](https://github.com/rapidsai/cudf/pull/15885)) [@ttnghia](https://github.com/ttnghia) +- Manual merge of Branch 24.08 from 24.06 ([#15869](https://github.com/rapidsai/cudf/pull/15869)) [@galipremsagar](https://github.com/galipremsagar) +- Avoid unnecessary `Index` cast in `IndexedFrame.index` setter ([#15843](https://github.com/rapidsai/cudf/pull/15843)) [@charlesbluca](https://github.com/charlesbluca) +- Fix large strings handling in nvtext::character_tokenize ([#15829](https://github.com/rapidsai/cudf/pull/15829)) [@davidwendt](https://github.com/davidwendt) +- Fix multi-replace target count logic for large strings ([#15807](https://github.com/rapidsai/cudf/pull/15807)) [@davidwendt](https://github.com/davidwendt) +- Fix JSON parsing memory corruption - Fix Mixed types nested children removal ([#15798](https://github.com/rapidsai/cudf/pull/15798)) [@karthikeyann](https://github.com/karthikeyann) +- Allow anonymous user in devcontainer name. ([#15784](https://github.com/rapidsai/cudf/pull/15784)) [@bdice](https://github.com/bdice) +- Add support for additional metaclasses of proxies and use for ExcelWriter ([#15399](https://github.com/rapidsai/cudf/pull/15399)) [@vyasr](https://github.com/vyasr) + +## 📖 Documentation + +- Add docstring for from_dataframe ([#16260](https://github.com/rapidsai/cudf/pull/16260)) [@mroeschke](https://github.com/mroeschke) +- Update libcudf compiler requirements in contributing doc ([#16103](https://github.com/rapidsai/cudf/pull/16103)) [@davidwendt](https://github.com/davidwendt) +- Add libcudf public/detail API pattern to developer guide ([#16086](https://github.com/rapidsai/cudf/pull/16086)) [@davidwendt](https://github.com/davidwendt) +- Explain line profiler and how to know which functions are GPU-accelerated. ([#16079](https://github.com/rapidsai/cudf/pull/16079)) [@bdice](https://github.com/bdice) +- cudf.pandas documentation improvement ([#15948](https://github.com/rapidsai/cudf/pull/15948)) [@Matt711](https://github.com/Matt711) +- Reland "Fix docs for IO readers and strings_convert" ([#15872)" (#15941](https://github.com/rapidsai/cudf/pull/15872)" (#15941)) [@lithomas1](https://github.com/lithomas1) +- Document how to use cudf.pandas in tandem with multiprocessing ([#15940](https://github.com/rapidsai/cudf/pull/15940)) [@wence-](https://github.com/wence-) +- DOC: Add documentation for cudf.pandas in the Developer Guide ([#15889](https://github.com/rapidsai/cudf/pull/15889)) [@Matt711](https://github.com/Matt711) +- Improve options docs ([#15888](https://github.com/rapidsai/cudf/pull/15888)) [@bdice](https://github.com/bdice) +- DOC: add linkcode to docs ([#15860](https://github.com/rapidsai/cudf/pull/15860)) [@raybellwaves](https://github.com/raybellwaves) +- DOC: use intersphinx mapping in pandas-compat ext ([#15846](https://github.com/rapidsai/cudf/pull/15846)) [@raybellwaves](https://github.com/raybellwaves) +- Fix inconsistent usage of 'results' and 'records' in read-json.md ([#15766](https://github.com/rapidsai/cudf/pull/15766)) [@dagardner-nv](https://github.com/dagardner-nv) +- Update PandasCompat.py to resolve references ([#15704](https://github.com/rapidsai/cudf/pull/15704)) [@raybellwaves](https://github.com/raybellwaves) + +## 🚀 New Features + +- Warn on cuDF failure when `POLARS_VERBOSE` is true ([#16308](https://github.com/rapidsai/cudf/pull/16308)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add `drop_nulls` in `cudf-polars` ([#16290](https://github.com/rapidsai/cudf/pull/16290)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- [JNI] Add setKernelPinnedCopyThreshold and setPinnedAllocationThreshold ([#16288](https://github.com/rapidsai/cudf/pull/16288)) [@abellina](https://github.com/abellina) +- Implement support for scan_ndjson in cudf-polars ([#16263](https://github.com/rapidsai/cudf/pull/16263)) [@lithomas1](https://github.com/lithomas1) +- Publish cudf-polars nightlies ([#16213](https://github.com/rapidsai/cudf/pull/16213)) [@lithomas1](https://github.com/lithomas1) +- Modify `make_host_vector` and `make_device_uvector` factories to optionally use pinned memory and kernel copy ([#16206](https://github.com/rapidsai/cudf/pull/16206)) [@vuule](https://github.com/vuule) +- Migrate lists/set_operations to pylibcudf ([#16190](https://github.com/rapidsai/cudf/pull/16190)) [@Matt711](https://github.com/Matt711) +- Migrate lists/filling to pylibcudf ([#16189](https://github.com/rapidsai/cudf/pull/16189)) [@Matt711](https://github.com/Matt711) +- Fall back to CPU for unsupported libcudf binaryops in cudf-polars ([#16188](https://github.com/rapidsai/cudf/pull/16188)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Use resource_ref for upstream in stream_checking_resource_adaptor ([#16187](https://github.com/rapidsai/cudf/pull/16187)) [@harrism](https://github.com/harrism) +- Migrate lists/modifying to pylibcudf ([#16185](https://github.com/rapidsai/cudf/pull/16185)) [@Matt711](https://github.com/Matt711) +- Migrate lists/filtering to pylibcudf ([#16184](https://github.com/rapidsai/cudf/pull/16184)) [@Matt711](https://github.com/Matt711) +- Migrate lists/sorting to pylibcudf ([#16179](https://github.com/rapidsai/cudf/pull/16179)) [@Matt711](https://github.com/Matt711) +- Add missing methods to lists/list_column_view.pxd in pylibcudf ([#16175](https://github.com/rapidsai/cudf/pull/16175)) [@Matt711](https://github.com/Matt711) +- Migrate pylibcudf lists gathering ([#16170](https://github.com/rapidsai/cudf/pull/16170)) [@Matt711](https://github.com/Matt711) +- Move kernel vis over to CUDF_HIDDEN ([#16165](https://github.com/rapidsai/cudf/pull/16165)) [@robertmaynard](https://github.com/robertmaynard) +- Add groupby_max multi-threaded benchmark ([#16154](https://github.com/rapidsai/cudf/pull/16154)) [@srinivasyadav18](https://github.com/srinivasyadav18) +- Promote has_nested_columns to cudf public API ([#16131](https://github.com/rapidsai/cudf/pull/16131)) [@robertmaynard](https://github.com/robertmaynard) +- Promote IO support queries to cudf API ([#16125](https://github.com/rapidsai/cudf/pull/16125)) [@robertmaynard](https://github.com/robertmaynard) +- cudf::merge public API now support passing a user stream ([#16124](https://github.com/rapidsai/cudf/pull/16124)) [@robertmaynard](https://github.com/robertmaynard) +- Add TPC-H inspired examples for Libcudf ([#16088](https://github.com/rapidsai/cudf/pull/16088)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Installed cudf header use cudf::allocate_like ([#16087](https://github.com/rapidsai/cudf/pull/16087)) [@robertmaynard](https://github.com/robertmaynard) +- `cudf-polars` string slicing ([#16082](https://github.com/rapidsai/cudf/pull/16082)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Migrate Parquet reader to pylibcudf ([#16078](https://github.com/rapidsai/cudf/pull/16078)) [@lithomas1](https://github.com/lithomas1) +- Migrate lists/count_elements to pylibcudf ([#16072](https://github.com/rapidsai/cudf/pull/16072)) [@Matt711](https://github.com/Matt711) +- Migrate lists/extract to pylibcudf ([#16071](https://github.com/rapidsai/cudf/pull/16071)) [@Matt711](https://github.com/Matt711) +- Move common string utilities to public api ([#16070](https://github.com/rapidsai/cudf/pull/16070)) [@robertmaynard](https://github.com/robertmaynard) +- stable_distinct public api now has a stream parameter ([#16068](https://github.com/rapidsai/cudf/pull/16068)) [@robertmaynard](https://github.com/robertmaynard) +- Migrate expressions to pylibcudf ([#16056](https://github.com/rapidsai/cudf/pull/16056)) [@lithomas1](https://github.com/lithomas1) +- Add support to ArrowDataSource in SourceInfo ([#16050](https://github.com/rapidsai/cudf/pull/16050)) [@lithomas1](https://github.com/lithomas1) +- Experimental support for configurable prefetching ([#16020](https://github.com/rapidsai/cudf/pull/16020)) [@vyasr](https://github.com/vyasr) +- Migrate CSV reader to pylibcudf ([#16011](https://github.com/rapidsai/cudf/pull/16011)) [@lithomas1](https://github.com/lithomas1) +- Migrate string `slice` APIs to `pylibcudf` ([#15988](https://github.com/rapidsai/cudf/pull/15988)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Migrate lists/contains to pylibcudf ([#15981](https://github.com/rapidsai/cudf/pull/15981)) [@Matt711](https://github.com/Matt711) +- Remove CCCL 2.2 patches as we now always use 2.5+ ([#15969](https://github.com/rapidsai/cudf/pull/15969)) [@robertmaynard](https://github.com/robertmaynard) +- Migrate JSON reader to pylibcudf ([#15966](https://github.com/rapidsai/cudf/pull/15966)) [@lithomas1](https://github.com/lithomas1) +- Add a developer check for proxy objects ([#15956](https://github.com/rapidsai/cudf/pull/15956)) [@Matt711](https://github.com/Matt711) +- Start migrating I/O writers to pylibcudf (starting with JSON) ([#15952](https://github.com/rapidsai/cudf/pull/15952)) [@lithomas1](https://github.com/lithomas1) +- Kernel copy for pinned memory ([#15934](https://github.com/rapidsai/cudf/pull/15934)) [@vuule](https://github.com/vuule) +- Migrate left join and conditional join benchmarks to use nvbench ([#15931](https://github.com/rapidsai/cudf/pull/15931)) [@srinivasyadav18](https://github.com/srinivasyadav18) +- Migrate lists/combine to pylibcudf ([#15928](https://github.com/rapidsai/cudf/pull/15928)) [@Matt711](https://github.com/Matt711) +- Plumb pylibcudf strings `contains_re` through cudf_polars ([#15918](https://github.com/rapidsai/cudf/pull/15918)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Start migrating I/O to pylibcudf ([#15899](https://github.com/rapidsai/cudf/pull/15899)) [@lithomas1](https://github.com/lithomas1) +- Pinned vector factory that uses the global pool ([#15895](https://github.com/rapidsai/cudf/pull/15895)) [@vuule](https://github.com/vuule) +- Migrate strings `contains` operations to `pylibcudf` ([#15880](https://github.com/rapidsai/cudf/pull/15880)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Migrate quantile.pxd to pylibcudf ([#15874](https://github.com/rapidsai/cudf/pull/15874)) [@lithomas1](https://github.com/lithomas1) +- Migrate round to pylibcudf ([#15863](https://github.com/rapidsai/cudf/pull/15863)) [@lithomas1](https://github.com/lithomas1) +- Migrate string replace.pxd to pylibcudf ([#15839](https://github.com/rapidsai/cudf/pull/15839)) [@lithomas1](https://github.com/lithomas1) +- Add an Environment Variable for debugging the fast path in cudf.pandas ([#15837](https://github.com/rapidsai/cudf/pull/15837)) [@Matt711](https://github.com/Matt711) +- Add an option to run cuIO benchmarks with pinned buffers as input ([#15830](https://github.com/rapidsai/cudf/pull/15830)) [@vuule](https://github.com/vuule) +- Update `pylibcudf` testing utilities ([#15772](https://github.com/rapidsai/cudf/pull/15772)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Migrate string `capitalize` APIs to `pylibcudf` ([#15503](https://github.com/rapidsai/cudf/pull/15503)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add tests for `pylibcudf` binaryops ([#15470](https://github.com/rapidsai/cudf/pull/15470)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Migrate column factories to pylibcudf ([#15257](https://github.com/rapidsai/cudf/pull/15257)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- cuDF/libcudf exponentially weighted moving averages ([#9027](https://github.com/rapidsai/cudf/pull/9027)) [@brandon-b-miller](https://github.com/brandon-b-miller) + +## 🛠️ Improvements + +- Ensure objects with __interface__ are converted to cupy/numpy arrays ([#16436](https://github.com/rapidsai/cudf/pull/16436)) [@mroeschke](https://github.com/mroeschke) +- Add about rmm modes in `cudf.pandas` docs ([#16404](https://github.com/rapidsai/cudf/pull/16404)) [@galipremsagar](https://github.com/galipremsagar) +- Gracefully CUDF_FAIL when `skip_rows > 0` in Chunked Parquet reader ([#16385](https://github.com/rapidsai/cudf/pull/16385)) [@mhaseeb123](https://github.com/mhaseeb123) +- Make C++ compilation warning free after #16297 ([#16379](https://github.com/rapidsai/cudf/pull/16379)) [@wence-](https://github.com/wence-) +- Align Index __init__ APIs with pandas 2.x ([#16362](https://github.com/rapidsai/cudf/pull/16362)) [@mroeschke](https://github.com/mroeschke) +- Use rapids_cpm_bs_thread_pool() ([#16360](https://github.com/rapidsai/cudf/pull/16360)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Rename PrefetchConfig to prefetch_config. ([#16358](https://github.com/rapidsai/cudf/pull/16358)) [@bdice](https://github.com/bdice) +- Implement parquet reading using pylibcudf in cudf-polars ([#16346](https://github.com/rapidsai/cudf/pull/16346)) [@lithomas1](https://github.com/lithomas1) +- Fix compile warnings with `jni_utils.hpp` ([#16336](https://github.com/rapidsai/cudf/pull/16336)) [@ttnghia](https://github.com/ttnghia) +- Align Series APIs with pandas 2.x ([#16333](https://github.com/rapidsai/cudf/pull/16333)) [@mroeschke](https://github.com/mroeschke) +- Add missing `stream` param to dictionary factory APIs ([#16319](https://github.com/rapidsai/cudf/pull/16319)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Mark cudf._typing as a typing module in ruff ([#16318](https://github.com/rapidsai/cudf/pull/16318)) [@mroeschke](https://github.com/mroeschke) +- Add `stream` param to list explode APIs ([#16317](https://github.com/rapidsai/cudf/pull/16317)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Fix polars for 1.2.1 ([#16316](https://github.com/rapidsai/cudf/pull/16316)) [@lithomas1](https://github.com/lithomas1) +- Use workflow branch 24.08 again ([#16314](https://github.com/rapidsai/cudf/pull/16314)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Deprecate dtype= parameter in reduction methods ([#16313](https://github.com/rapidsai/cudf/pull/16313)) [@mroeschke](https://github.com/mroeschke) +- Remove squeeze argument from groupby ([#16312](https://github.com/rapidsai/cudf/pull/16312)) [@mroeschke](https://github.com/mroeschke) +- Align more DataFrame APIs with pandas ([#16310](https://github.com/rapidsai/cudf/pull/16310)) [@mroeschke](https://github.com/mroeschke) +- Clean unneeded/redudant dtype utils ([#16309](https://github.com/rapidsai/cudf/pull/16309)) [@mroeschke](https://github.com/mroeschke) +- Implement read_csv in cudf-polars using pylibcudf ([#16307](https://github.com/rapidsai/cudf/pull/16307)) [@lithomas1](https://github.com/lithomas1) +- Use Column.can_cast_safely instead of some ad-hoc dtype functions in .where ([#16303](https://github.com/rapidsai/cudf/pull/16303)) [@mroeschke](https://github.com/mroeschke) +- Drop `{{ pin_compatible('numpy', max_pin='x') }}` ([#16301](https://github.com/rapidsai/cudf/pull/16301)) [@jakirkham](https://github.com/jakirkham) +- Host implementation of `to_arrow` using nanoarrow ([#16297](https://github.com/rapidsai/cudf/pull/16297)) [@zeroshade](https://github.com/zeroshade) +- Add ability to prefetch in `cudf.pandas` and change default to managed pool ([#16296](https://github.com/rapidsai/cudf/pull/16296)) [@galipremsagar](https://github.com/galipremsagar) +- Fix tests for polars 1.2 ([#16292](https://github.com/rapidsai/cudf/pull/16292)) [@lithomas1](https://github.com/lithomas1) +- Introduce dedicated options for low memory readers ([#16289](https://github.com/rapidsai/cudf/pull/16289)) [@galipremsagar](https://github.com/galipremsagar) +- Remove decimal/floating 64/128bit switches due to register pressure ([#16287](https://github.com/rapidsai/cudf/pull/16287)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Make ColumnAccessor strictly require a mapping of columns ([#16285](https://github.com/rapidsai/cudf/pull/16285)) [@mroeschke](https://github.com/mroeschke) +- Introduce version file so we can conditionally handle things in tests ([#16280](https://github.com/rapidsai/cudf/pull/16280)) [@wence-](https://github.com/wence-) +- Type & reduce cupy usage ([#16277](https://github.com/rapidsai/cudf/pull/16277)) [@mroeschke](https://github.com/mroeschke) +- Update cudf::detail::grid_1d to use thread_index_type ([#16276](https://github.com/rapidsai/cudf/pull/16276)) [@davidwendt](https://github.com/davidwendt) +- Replace np.isscalar/issubdtype checks with is_scalar/.kind checks ([#16275](https://github.com/rapidsai/cudf/pull/16275)) [@mroeschke](https://github.com/mroeschke) +- Remove xml from sort_ninja_log.py utility ([#16274](https://github.com/rapidsai/cudf/pull/16274)) [@davidwendt](https://github.com/davidwendt) +- Fix issue in horizontal concat implementation in cudf-polars ([#16271](https://github.com/rapidsai/cudf/pull/16271)) [@wence-](https://github.com/wence-) +- Preserve order in left join for cudf-polars ([#16268](https://github.com/rapidsai/cudf/pull/16268)) [@wence-](https://github.com/wence-) +- Replace is_datetime/timedelta_dtype checks with .kind checks ([#16262](https://github.com/rapidsai/cudf/pull/16262)) [@mroeschke](https://github.com/mroeschke) +- Replace is_float/integer_dtype checks with .kind checks ([#16261](https://github.com/rapidsai/cudf/pull/16261)) [@mroeschke](https://github.com/mroeschke) +- Build and test with CUDA 12.5.1 ([#16259](https://github.com/rapidsai/cudf/pull/16259)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Replace is_bool_type with checking .dtype.kind ([#16255](https://github.com/rapidsai/cudf/pull/16255)) [@mroeschke](https://github.com/mroeschke) +- remove `cuco_noexcept.diff` ([#16254](https://github.com/rapidsai/cudf/pull/16254)) [@trxcllnt](https://github.com/trxcllnt) +- Update contains_tests.cpp to use public cudf::slice ([#16253](https://github.com/rapidsai/cudf/pull/16253)) [@davidwendt](https://github.com/davidwendt) +- Improve the test data for pylibcudf I/O tests ([#16247](https://github.com/rapidsai/cudf/pull/16247)) [@lithomas1](https://github.com/lithomas1) +- Short circuit some Column methods ([#16246](https://github.com/rapidsai/cudf/pull/16246)) [@mroeschke](https://github.com/mroeschke) +- Make nvcomp adapter compatible with new version macros ([#16245](https://github.com/rapidsai/cudf/pull/16245)) [@vuule](https://github.com/vuule) +- Add Column.strftime/strptime instead of overloading `as_string/datetime/timedelta_column` ([#16243](https://github.com/rapidsai/cudf/pull/16243)) [@mroeschke](https://github.com/mroeschke) +- Remove temporary functor overloads required by cuco version bump ([#16242](https://github.com/rapidsai/cudf/pull/16242)) [@PointKernel](https://github.com/PointKernel) +- Remove hash_character_ngrams dependency from jaccard_index ([#16241](https://github.com/rapidsai/cudf/pull/16241)) [@davidwendt](https://github.com/davidwendt) +- Expose sorted groupby parameters to pylibcudf ([#16240](https://github.com/rapidsai/cudf/pull/16240)) [@wence-](https://github.com/wence-) +- Expose reflection to check if casting between two types is supported ([#16239](https://github.com/rapidsai/cudf/pull/16239)) [@wence-](https://github.com/wence-) +- Handle nans in groupby-aggregations in polars executor ([#16233](https://github.com/rapidsai/cudf/pull/16233)) [@wence-](https://github.com/wence-) +- Remove `mr` param from `write_csv` and `write_json` ([#16231](https://github.com/rapidsai/cudf/pull/16231)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Support Literals in groupby-agg ([#16218](https://github.com/rapidsai/cudf/pull/16218)) [@wence-](https://github.com/wence-) +- Handler csv reader options in cudf-polars ([#16211](https://github.com/rapidsai/cudf/pull/16211)) [@wence-](https://github.com/wence-) +- Update vendored thread_pool implementation ([#16210](https://github.com/rapidsai/cudf/pull/16210)) [@wence-](https://github.com/wence-) +- Add low memory JSON reader for `cudf.pandas` ([#16204](https://github.com/rapidsai/cudf/pull/16204)) [@galipremsagar](https://github.com/galipremsagar) +- Clean up state variables in MultiIndex ([#16203](https://github.com/rapidsai/cudf/pull/16203)) [@mroeschke](https://github.com/mroeschke) +- skip CMake 3.30.0 ([#16202](https://github.com/rapidsai/cudf/pull/16202)) [@jameslamb](https://github.com/jameslamb) +- Assert valid metadata is passed in to_arrow for list_view ([#16198](https://github.com/rapidsai/cudf/pull/16198)) [@wence-](https://github.com/wence-) +- Expose type traits to pylibcudf ([#16197](https://github.com/rapidsai/cudf/pull/16197)) [@wence-](https://github.com/wence-) +- Report number of rows per file read by PQ reader when no row selection and fix segfault in chunked PQ reader when skip_rows > 0 ([#16195](https://github.com/rapidsai/cudf/pull/16195)) [@mhaseeb123](https://github.com/mhaseeb123) +- Cast count aggs to correct dtype in translation ([#16192](https://github.com/rapidsai/cudf/pull/16192)) [@wence-](https://github.com/wence-) +- Some small fixes in cudf-polars ([#16191](https://github.com/rapidsai/cudf/pull/16191)) [@wence-](https://github.com/wence-) +- split up CUDA-suffixed dependencies in dependencies.yaml ([#16183](https://github.com/rapidsai/cudf/pull/16183)) [@jameslamb](https://github.com/jameslamb) +- Define PTDS for the stream hook libs ([#16182](https://github.com/rapidsai/cudf/pull/16182)) [@trxcllnt](https://github.com/trxcllnt) +- Make `test_python_cudf_pandas` generate `requirements.txt` ([#16181](https://github.com/rapidsai/cudf/pull/16181)) [@trxcllnt](https://github.com/trxcllnt) +- Add environment-agnostic `ci/run_cudf_polars_pytest.sh` ([#16178](https://github.com/rapidsai/cudf/pull/16178)) [@trxcllnt](https://github.com/trxcllnt) +- Implement translation for some unary functions and a single datetime extraction ([#16173](https://github.com/rapidsai/cudf/pull/16173)) [@wence-](https://github.com/wence-) +- Remove size constraints on source files in batched JSON reading ([#16162](https://github.com/rapidsai/cudf/pull/16162)) [@shrshi](https://github.com/shrshi) +- CI: Build wheels for cudf-polars ([#16156](https://github.com/rapidsai/cudf/pull/16156)) [@lithomas1](https://github.com/lithomas1) +- Update cudf-polars for v1 release of polars ([#16149](https://github.com/rapidsai/cudf/pull/16149)) [@wence-](https://github.com/wence-) +- Use strings concatenate to support large strings in CSV writer ([#16148](https://github.com/rapidsai/cudf/pull/16148)) [@davidwendt](https://github.com/davidwendt) +- Use verify-alpha-spec hook ([#16144](https://github.com/rapidsai/cudf/pull/16144)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Adds write-coalescing code path optimization to FST ([#16143](https://github.com/rapidsai/cudf/pull/16143)) [@elstehle](https://github.com/elstehle) +- MAINT: Adapt to NumPy 2 promotion changes ([#16141](https://github.com/rapidsai/cudf/pull/16141)) [@seberg](https://github.com/seberg) +- API: Check for integer overflows when creating scalar form python int ([#16140](https://github.com/rapidsai/cudf/pull/16140)) [@seberg](https://github.com/seberg) +- Remove the (unused) implementation of `host_parse_nested_json` ([#16135](https://github.com/rapidsai/cudf/pull/16135)) [@vuule](https://github.com/vuule) +- Deprecate Arrow support in I/O ([#16132](https://github.com/rapidsai/cudf/pull/16132)) [@lithomas1](https://github.com/lithomas1) +- Disable dict support for split-page kernel in the parquet reader. ([#16128](https://github.com/rapidsai/cudf/pull/16128)) [@nvdbaranec](https://github.com/nvdbaranec) +- Add throughput metrics for REDUCTION_BENCH/REDUCTION_NVBENCH benchmarks ([#16126](https://github.com/rapidsai/cudf/pull/16126)) [@jihoonson](https://github.com/jihoonson) +- Add ensure_index to not unnecessarily shallow copy cudf.Index ([#16117](https://github.com/rapidsai/cudf/pull/16117)) [@mroeschke](https://github.com/mroeschke) +- Make binary operators work between fixed-point and floating args ([#16116](https://github.com/rapidsai/cudf/pull/16116)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Implement Ternary copy_if_else ([#16114](https://github.com/rapidsai/cudf/pull/16114)) [@wence-](https://github.com/wence-) +- Implement handlers for series literal in cudf-polars ([#16113](https://github.com/rapidsai/cudf/pull/16113)) [@wence-](https://github.com/wence-) +- Fix dtype errors in `StringArrays` ([#16111](https://github.com/rapidsai/cudf/pull/16111)) [@galipremsagar](https://github.com/galipremsagar) +- Ensure MultiIndex.to_frame deep copies columns ([#16110](https://github.com/rapidsai/cudf/pull/16110)) [@mroeschke](https://github.com/mroeschke) +- Parallelize `gpuInitStringDescriptors` for fixed length byte array data ([#16109](https://github.com/rapidsai/cudf/pull/16109)) [@mhaseeb123](https://github.com/mhaseeb123) +- Finish implementation of cudf-polars boolean function handlers ([#16098](https://github.com/rapidsai/cudf/pull/16098)) [@wence-](https://github.com/wence-) +- Expose and then implement support for cross joins in cudf-polars ([#16097](https://github.com/rapidsai/cudf/pull/16097)) [@wence-](https://github.com/wence-) +- Defer copying in Column.astype(copy=True) ([#16095](https://github.com/rapidsai/cudf/pull/16095)) [@mroeschke](https://github.com/mroeschke) +- Fix segfault in conditional join ([#16094](https://github.com/rapidsai/cudf/pull/16094)) [@bdice](https://github.com/bdice) +- Free temp memory no longer needed in multibyte_split processing ([#16091](https://github.com/rapidsai/cudf/pull/16091)) [@davidwendt](https://github.com/davidwendt) +- Rename gather/scatter benchmarks to clarify coalesced behavior. ([#16083](https://github.com/rapidsai/cudf/pull/16083)) [@bdice](https://github.com/bdice) +- Adapt to polars upstream changes and turn on CI testing ([#16081](https://github.com/rapidsai/cudf/pull/16081)) [@wence-](https://github.com/wence-) +- Reduce/clean copy usage in Series, reshaping ([#16080](https://github.com/rapidsai/cudf/pull/16080)) [@mroeschke](https://github.com/mroeschke) +- Account for FIXED_LEN_BYTE_ARRAY when calculating fragment sizes in Parquet writer ([#16064](https://github.com/rapidsai/cudf/pull/16064)) [@etseidl](https://github.com/etseidl) +- Reduce (shallow) copies in DataFrame ops ([#16060](https://github.com/rapidsai/cudf/pull/16060)) [@mroeschke](https://github.com/mroeschke) +- Add multi-file support to `dask_cudf.read_json` ([#16057](https://github.com/rapidsai/cudf/pull/16057)) [@rjzamora](https://github.com/rjzamora) +- Reduce deep copies in Index ops ([#16054](https://github.com/rapidsai/cudf/pull/16054)) [@mroeschke](https://github.com/mroeschke) +- Implement chunked column wise concat in chunked parquet reader ([#16052](https://github.com/rapidsai/cudf/pull/16052)) [@galipremsagar](https://github.com/galipremsagar) +- Add exception when trying to create large strings with cudf::test::strings_column_wrapper ([#16049](https://github.com/rapidsai/cudf/pull/16049)) [@davidwendt](https://github.com/davidwendt) +- Return `FrozenList` for `Index.names` ([#16047](https://github.com/rapidsai/cudf/pull/16047)) [@galipremsagar](https://github.com/galipremsagar) +- Add ast cast test ([#16045](https://github.com/rapidsai/cudf/pull/16045)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Remove `override_dtypes` and `include_index` from `Frame._copy_type_metadata` ([#16043](https://github.com/rapidsai/cudf/pull/16043)) [@mroeschke](https://github.com/mroeschke) +- Add ruff rules to avoid importing from typing ([#16040](https://github.com/rapidsai/cudf/pull/16040)) [@mroeschke](https://github.com/mroeschke) +- Fix decimal -> float cast in ast code ([#16038](https://github.com/rapidsai/cudf/pull/16038)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Add compile option to enable large strings support ([#16037](https://github.com/rapidsai/cudf/pull/16037)) [@davidwendt](https://github.com/davidwendt) +- Reduce conditional_join nvbench configurations ([#16036](https://github.com/rapidsai/cudf/pull/16036)) [@srinivasyadav18](https://github.com/srinivasyadav18) +- Project automation update: skip if not in project ([#16035](https://github.com/rapidsai/cudf/pull/16035)) [@jarmak-nv](https://github.com/jarmak-nv) +- Add stream parameter to cudf::io::text::multibyte_split ([#16034](https://github.com/rapidsai/cudf/pull/16034)) [@davidwendt](https://github.com/davidwendt) +- Delete unused code from stringfunction evaluator ([#16032](https://github.com/rapidsai/cudf/pull/16032)) [@wence-](https://github.com/wence-) +- Fix exclude regex in pre-commit clang-format hook ([#16030](https://github.com/rapidsai/cudf/pull/16030)) [@wence-](https://github.com/wence-) +- Refactor rmm usage in `cudf.pandas` ([#16021](https://github.com/rapidsai/cudf/pull/16021)) [@galipremsagar](https://github.com/galipremsagar) +- Enable ruff TCH: typing imports under if TYPE_CHECKING ([#16015](https://github.com/rapidsai/cudf/pull/16015)) [@mroeschke](https://github.com/mroeschke) +- Restrict the allowed pandas timezone objects in cudf ([#16013](https://github.com/rapidsai/cudf/pull/16013)) [@mroeschke](https://github.com/mroeschke) +- orc multithreaded benchmark ([#16009](https://github.com/rapidsai/cudf/pull/16009)) [@zpuller](https://github.com/zpuller) +- Add tests of expression-based sort and sort-by ([#16008](https://github.com/rapidsai/cudf/pull/16008)) [@wence-](https://github.com/wence-) +- Add tests of implemented StringFunctions ([#16007](https://github.com/rapidsai/cudf/pull/16007)) [@wence-](https://github.com/wence-) +- Add test that diagonal concat with mismatching schemas raises ([#16006](https://github.com/rapidsai/cudf/pull/16006)) [@wence-](https://github.com/wence-) +- Add coverage selecting len from a dataframe (number of rows) ([#16005](https://github.com/rapidsai/cudf/pull/16005)) [@wence-](https://github.com/wence-) +- Add basic tests of dataframe scan ([#16003](https://github.com/rapidsai/cudf/pull/16003)) [@wence-](https://github.com/wence-) +- Add coverage for both expression and dataframe filter ([#16002](https://github.com/rapidsai/cudf/pull/16002)) [@wence-](https://github.com/wence-) +- Remove deprecated ExtContext node ([#16001](https://github.com/rapidsai/cudf/pull/16001)) [@wence-](https://github.com/wence-) +- Fix typo bug in gather implementation ([#16000](https://github.com/rapidsai/cudf/pull/16000)) [@wence-](https://github.com/wence-) +- Extend coverage of groupby and rolling window nodes ([#15999](https://github.com/rapidsai/cudf/pull/15999)) [@wence-](https://github.com/wence-) +- Coverage of binops where one or both operands are a scalar ([#15998](https://github.com/rapidsai/cudf/pull/15998)) [@wence-](https://github.com/wence-) +- Add full coverage for whole-frame Agg expressions ([#15997](https://github.com/rapidsai/cudf/pull/15997)) [@wence-](https://github.com/wence-) +- Add tests covering magic methods of Expr objects ([#15996](https://github.com/rapidsai/cudf/pull/15996)) [@wence-](https://github.com/wence-) +- Add full coverage of utility functions ([#15995](https://github.com/rapidsai/cudf/pull/15995)) [@wence-](https://github.com/wence-) +- Test behaviour of containers ([#15994](https://github.com/rapidsai/cudf/pull/15994)) [@wence-](https://github.com/wence-) +- Fix implemention of any, all, and isbetween ([#15993](https://github.com/rapidsai/cudf/pull/15993)) [@wence-](https://github.com/wence-) +- Raise early on unhandled PythonScan node ([#15992](https://github.com/rapidsai/cudf/pull/15992)) [@wence-](https://github.com/wence-) +- Remove mapfunction nodes that don't exist/aren't supported ([#15991](https://github.com/rapidsai/cudf/pull/15991)) [@wence-](https://github.com/wence-) +- Add test coverage for slicing with "out of bounds" negative indices ([#15990](https://github.com/rapidsai/cudf/pull/15990)) [@wence-](https://github.com/wence-) +- Standardize and type `Series.dt` methods ([#15987](https://github.com/rapidsai/cudf/pull/15987)) [@mroeschke](https://github.com/mroeschke) +- Refactor distinct with hashset-based algorithms ([#15984](https://github.com/rapidsai/cudf/pull/15984)) [@srinivasyadav18](https://github.com/srinivasyadav18) +- resolve dependency-file-generator warning, remove unnecessary rapids-build-backend configuration ([#15980](https://github.com/rapidsai/cudf/pull/15980)) [@jameslamb](https://github.com/jameslamb) +- Project automation bug fixes ([#15971](https://github.com/rapidsai/cudf/pull/15971)) [@jarmak-nv](https://github.com/jarmak-nv) +- Add typing to single_column_frame ([#15965](https://github.com/rapidsai/cudf/pull/15965)) [@mroeschke](https://github.com/mroeschke) +- Move some misc Frame methods to appropriate locations ([#15963](https://github.com/rapidsai/cudf/pull/15963)) [@mroeschke](https://github.com/mroeschke) +- Condense pylibcudf data fixtures ([#15958](https://github.com/rapidsai/cudf/pull/15958)) [@lithomas1](https://github.com/lithomas1) +- Refactor fillna logic to push specifics toward Frame subclasses and Column subclasses ([#15957](https://github.com/rapidsai/cudf/pull/15957)) [@mroeschke](https://github.com/mroeschke) +- Remove unused parsing utilities ([#15955](https://github.com/rapidsai/cudf/pull/15955)) [@vuule](https://github.com/vuule) +- Remove `Scalar` container type from polars interpreter ([#15953](https://github.com/rapidsai/cudf/pull/15953)) [@wence-](https://github.com/wence-) +- Support arbitrary CUDA versions in UDF code ([#15950](https://github.com/rapidsai/cudf/pull/15950)) [@bdice](https://github.com/bdice) +- Support large strings in cudf::io::text::multibyte_split ([#15947](https://github.com/rapidsai/cudf/pull/15947)) [@davidwendt](https://github.com/davidwendt) +- Add external issue label and project automation ([#15945](https://github.com/rapidsai/cudf/pull/15945)) [@jarmak-nv](https://github.com/jarmak-nv) +- Enable round-tripping of large strings in `cudf` ([#15944](https://github.com/rapidsai/cudf/pull/15944)) [@galipremsagar](https://github.com/galipremsagar) +- Add more complete type annotations in polars interpreter ([#15942](https://github.com/rapidsai/cudf/pull/15942)) [@wence-](https://github.com/wence-) +- Update implementations to build with the latest cuco ([#15938](https://github.com/rapidsai/cudf/pull/15938)) [@PointKernel](https://github.com/PointKernel) +- Support timezone aware pandas inputs in cudf ([#15935](https://github.com/rapidsai/cudf/pull/15935)) [@mroeschke](https://github.com/mroeschke) +- Define Column.nan_as_null to return self ([#15923](https://github.com/rapidsai/cudf/pull/15923)) [@mroeschke](https://github.com/mroeschke) +- Make Frame._dtype an iterator instead of a dict ([#15920](https://github.com/rapidsai/cudf/pull/15920)) [@mroeschke](https://github.com/mroeschke) +- Port start of datetime.hpp to pylibcudf ([#15916](https://github.com/rapidsai/cudf/pull/15916)) [@wence-](https://github.com/wence-) +- Introduce `NamedColumn` concept in cudf-polars ([#15914](https://github.com/rapidsai/cudf/pull/15914)) [@wence-](https://github.com/wence-) +- Avoid redefining Frame._get_columns_by_label in subclasses ([#15912](https://github.com/rapidsai/cudf/pull/15912)) [@mroeschke](https://github.com/mroeschke) +- Templatization of fixed-width parquet decoding kernels. ([#15911](https://github.com/rapidsai/cudf/pull/15911)) [@nvdbaranec](https://github.com/nvdbaranec) +- New Decimal <--> Floating conversion ([#15905](https://github.com/rapidsai/cudf/pull/15905)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Use Arrow C Data Interface functions for Python interop ([#15904](https://github.com/rapidsai/cudf/pull/15904)) [@vyasr](https://github.com/vyasr) +- Use offsetalator in cudf::io::json::detail::parse_string ([#15900](https://github.com/rapidsai/cudf/pull/15900)) [@davidwendt](https://github.com/davidwendt) +- Rename strings multiple target replace API ([#15898](https://github.com/rapidsai/cudf/pull/15898)) [@davidwendt](https://github.com/davidwendt) +- Apply clang-tidy autofixes ([#15894](https://github.com/rapidsai/cudf/pull/15894)) [@vyasr](https://github.com/vyasr) +- Update Python labels and remove unnecessary ones ([#15893](https://github.com/rapidsai/cudf/pull/15893)) [@vyasr](https://github.com/vyasr) +- Clean up pylibcudf test assertations ([#15892](https://github.com/rapidsai/cudf/pull/15892)) [@lithomas1](https://github.com/lithomas1) +- Use offsetalator in orc rowgroup_char_counts_kernel ([#15891](https://github.com/rapidsai/cudf/pull/15891)) [@davidwendt](https://github.com/davidwendt) +- Ensure literals have correct dtype ([#15890](https://github.com/rapidsai/cudf/pull/15890)) [@wence-](https://github.com/wence-) +- Add overflow check when converting large strings to lists columns ([#15887](https://github.com/rapidsai/cudf/pull/15887)) [@davidwendt](https://github.com/davidwendt) +- Use offsetalator in nvtext::tokenize_with_vocabulary ([#15878](https://github.com/rapidsai/cudf/pull/15878)) [@davidwendt](https://github.com/davidwendt) +- Update interleave lists column for large strings ([#15877](https://github.com/rapidsai/cudf/pull/15877)) [@davidwendt](https://github.com/davidwendt) +- Simple NumPy 2 fixes that are clearly no behavior change ([#15876](https://github.com/rapidsai/cudf/pull/15876)) [@seberg](https://github.com/seberg) +- Support `arrow:schema` in Parquet writer to faithfully roundtrip `duration` types with Arrow ([#15875](https://github.com/rapidsai/cudf/pull/15875)) [@mhaseeb123](https://github.com/mhaseeb123) +- Refactor join benchmarks to target public APIs with the default stream ([#15873](https://github.com/rapidsai/cudf/pull/15873)) [@PointKernel](https://github.com/PointKernel) +- Fix url-decode benchmark to use offsetalator ([#15871](https://github.com/rapidsai/cudf/pull/15871)) [@davidwendt](https://github.com/davidwendt) +- Use offsetalator in strings shift functor ([#15870](https://github.com/rapidsai/cudf/pull/15870)) [@davidwendt](https://github.com/davidwendt) +- Memory Profiling ([#15866](https://github.com/rapidsai/cudf/pull/15866)) [@madsbk](https://github.com/madsbk) +- Expose stream parameter to public rolling APIs ([#15865](https://github.com/rapidsai/cudf/pull/15865)) [@srinivasyadav18](https://github.com/srinivasyadav18) +- Make Frame.astype return Self instead of a ColumnAccessor ([#15861](https://github.com/rapidsai/cudf/pull/15861)) [@mroeschke](https://github.com/mroeschke) +- Use ColumnAccessor row and column length attributes more consistently ([#15857](https://github.com/rapidsai/cudf/pull/15857)) [@mroeschke](https://github.com/mroeschke) +- add unit test setup for cudf_kafka ([#15853](https://github.com/rapidsai/cudf/pull/15853)) [@jameslamb](https://github.com/jameslamb) +- Remove internal usage of core.index.as_index in favor of cudf.Index ([#15851](https://github.com/rapidsai/cudf/pull/15851)) [@mroeschke](https://github.com/mroeschke) +- Ensure cudf.Series(cudf.Series(...)) creates a reference to the same index ([#15845](https://github.com/rapidsai/cudf/pull/15845)) [@mroeschke](https://github.com/mroeschke) +- Remove benchmark-specific use of pinned-pooled memory in Parquet multithreaded benchmark. ([#15838](https://github.com/rapidsai/cudf/pull/15838)) [@nvdbaranec](https://github.com/nvdbaranec) +- Implement `on_bad_lines` in json reader ([#15834](https://github.com/rapidsai/cudf/pull/15834)) [@galipremsagar](https://github.com/galipremsagar) +- Make Column.to_pandas return Index instead of Series ([#15833](https://github.com/rapidsai/cudf/pull/15833)) [@mroeschke](https://github.com/mroeschke) +- Add test of interoperability of cuDF and arrow BYTE_STREAM_SPLIT encoders ([#15832](https://github.com/rapidsai/cudf/pull/15832)) [@etseidl](https://github.com/etseidl) +- Refactor Parquet writer options and builders ([#15831](https://github.com/rapidsai/cudf/pull/15831)) [@etseidl](https://github.com/etseidl) +- Migrate reshape.pxd to pylibcudf ([#15827](https://github.com/rapidsai/cudf/pull/15827)) [@lithomas1](https://github.com/lithomas1) +- Remove legacy JSON reader and concurrent_unordered_map.cuh. ([#15813](https://github.com/rapidsai/cudf/pull/15813)) [@bdice](https://github.com/bdice) +- Switch cuIO benchmarks to use pinned-pool host allocations by default. ([#15805](https://github.com/rapidsai/cudf/pull/15805)) [@nvdbaranec](https://github.com/nvdbaranec) +- Change thrust::count_if call to raw kernel in strings split APIs ([#15762](https://github.com/rapidsai/cudf/pull/15762)) [@davidwendt](https://github.com/davidwendt) +- Improve performance for long strings for nvtext::replace_tokens ([#15756](https://github.com/rapidsai/cudf/pull/15756)) [@davidwendt](https://github.com/davidwendt) +- Implement chunked parquet reader in cudf-python ([#15728](https://github.com/rapidsai/cudf/pull/15728)) [@galipremsagar](https://github.com/galipremsagar) +- Add `from_arrow_host` functions for cudf interop with nanoarrow ([#15645](https://github.com/rapidsai/cudf/pull/15645)) [@zeroshade](https://github.com/zeroshade) +- Add ability to enable rmm pool on `cudf.pandas` import ([#15628](https://github.com/rapidsai/cudf/pull/15628)) [@galipremsagar](https://github.com/galipremsagar) +- Executor for polars logical plans ([#15504](https://github.com/rapidsai/cudf/pull/15504)) [@wence-](https://github.com/wence-) +- Implement day_name and month_name to match pandas ([#15479](https://github.com/rapidsai/cudf/pull/15479)) [@btepera](https://github.com/btepera) +- Utilities for decimal <--> floating conversion ([#15359](https://github.com/rapidsai/cudf/pull/15359)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- For powers of 10, replace ipow with switch ([#15353](https://github.com/rapidsai/cudf/pull/15353)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Use rapids-build-backend. ([#15245](https://github.com/rapidsai/cudf/pull/15245)) [@vyasr](https://github.com/vyasr) +- Add `codecov` coverage for `pandas_tests` ([#14513](https://github.com/rapidsai/cudf/pull/14513)) [@galipremsagar](https://github.com/galipremsagar) + # cudf 24.06.00 (5 Jun 2024) ## 🚨 Breaking Changes From 496151225aaf90318c089939d3a74e6ccee4e28d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 14 Aug 2024 17:32:28 -0500 Subject: [PATCH 2/2] Ensure managed memory is supported in cudf.pandas. (#16552) Currently, WSL users of `cudf.pandas` can try to enable UVM (managed memory) but it is not supported by the driver. This PR detects whether UVM is supported before enabling a managed memory pool or prefetching. Closes https://github.com/rapidsai/cudf/issues/16551. --------- Co-authored-by: Vyas Ramasubramani Co-authored-by: Lawrence Mitchell --- dependencies.yaml | 2 +- docs/cudf/source/cudf_pandas/how-it-works.md | 21 ++++++++++++----- python/cudf/cudf/_lib/pylibcudf/utils.pyx | 22 ++++++++++++++++++ python/cudf/cudf/pandas/__init__.py | 24 ++++++++++++++++---- python/cudf_polars/pyproject.toml | 2 +- 5 files changed, 59 insertions(+), 12 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index 7ecce362101..4c93ef60dd3 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -631,7 +631,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.0 + - polars>=1.0,<1.3 run_dask_cudf: common: - output_types: [conda, requirements, pyproject] diff --git a/docs/cudf/source/cudf_pandas/how-it-works.md b/docs/cudf/source/cudf_pandas/how-it-works.md index 8efd9d7e063..0bb87f60afe 100644 --- a/docs/cudf/source/cudf_pandas/how-it-works.md +++ b/docs/cudf/source/cudf_pandas/how-it-works.md @@ -44,11 +44,20 @@ allocation may be a bottleneck depending on the workload. Managed memory enables oversubscribing GPU memory. This allows cudf.pandas to process data larger than GPU memory in many cases, without CPU (Pandas) fallback. +```{note} +CUDA Managed Memory on Windows, and more specifically Windows Subsystem for +Linux (WSL2), [does not support oversubscription]( +https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#system-requirements-for-unified-memory), +only unified addressing. Furthermore, managed memory on WSL2 has undesirable +performance characteristics. Therefore, `cudf.pandas` uses a non-managed pool +allocator on WSL2, so `cudf.pandas` is limited to the physical size of GPU memory. +``` + Other memory allocators can be used by changing the environment -variable `CUDF_PANDAS_RMM_MODE` to one of the following. +variable `CUDF_PANDAS_RMM_MODE` to one of the following: -1. "managed_pool" (default): CUDA Unified Memory (managed memory) with RMM's asynchronous pool allocator. -2. "managed": CUDA Unified Memory, (managed memory) with no pool allocator. -3. "async": CUDA's built-in pool asynchronous pool allocator with normal CUDA device memory. -4. "pool": RMM's asynchronous pool allocator with normal CUDA device memory. -5. "cuda": normal CUDA device memory with no pool allocator. +1. `"managed_pool"` (default, if supported): CUDA Unified Memory (managed memory) with RMM's asynchronous pool allocator. +2. `"managed"`: CUDA Unified Memory, (managed memory) with no pool allocator. +3. `"async"`: CUDA's built-in pool asynchronous pool allocator with normal CUDA device memory. +4. `"pool"` (default if `"managed_pool"` is not supported): RMM's asynchronous pool allocator with normal CUDA device memory. +5. `"cuda"`: normal CUDA device memory with no pool allocator. diff --git a/python/cudf/cudf/_lib/pylibcudf/utils.pyx b/python/cudf/cudf/_lib/pylibcudf/utils.pyx index b4427e8ecff..42e3575ed44 100644 --- a/python/cudf/cudf/_lib/pylibcudf/utils.pyx +++ b/python/cudf/cudf/_lib/pylibcudf/utils.pyx @@ -6,6 +6,8 @@ from libc.stdint cimport uintptr_t from libcpp.functional cimport reference_wrapper from libcpp.vector cimport vector +from cuda import cudart + from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar from cudf._lib.pylibcudf.libcudf.types cimport bitmask_type @@ -34,3 +36,23 @@ cdef vector[reference_wrapper[const scalar]] _as_vector(list source): c_scalars.push_back( reference_wrapper[constscalar](dereference((slr).c_obj))) return c_scalars + + +def _is_concurrent_managed_access_supported(): + """Check the availability of concurrent managed access (UVM). + + Note that WSL2 does not support managed memory. + """ + + # Ensure CUDA is initialized before checking cudaDevAttrConcurrentManagedAccess + cudart.cudaFree(0) + + device_id = 0 + err, supports_managed_access = cudart.cudaDeviceGetAttribute( + cudart.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess, device_id + ) + if err != cudart.cudaError_t.cudaSuccess: + raise RuntimeError( + f"Failed to check cudaDevAttrConcurrentManagedAccess with error {err}" + ) + return supports_managed_access != 0 diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index a6667a7bcd9..e88e795671e 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -26,8 +26,8 @@ } -def _enable_managed_prefetching(rmm_mode): - if "managed" in rmm_mode: +def _enable_managed_prefetching(rmm_mode, managed_memory_is_supported): + if managed_memory_is_supported and "managed" in rmm_mode: for key in _SUPPORTED_PREFETCHES: pylibcudf.experimental.enable_prefetching(key) @@ -40,7 +40,20 @@ def install(): global LOADED LOADED = loader is not None - rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", "managed_pool") + # The default mode is "managed_pool" if UVM is supported, otherwise "pool" + managed_memory_is_supported = ( + pylibcudf.utils._is_concurrent_managed_access_supported() + ) + default_rmm_mode = ( + "managed_pool" if managed_memory_is_supported else "pool" + ) + rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", default_rmm_mode) + + if "managed" in rmm_mode and not managed_memory_is_supported: + raise ValueError( + f"Managed memory is not supported on this system, so the requested {rmm_mode=} is invalid." + ) + # Check if a non-default memory resource is set current_mr = rmm.mr.get_current_device_resource() if not isinstance(current_mr, rmm.mr.CudaMemoryResource): @@ -53,6 +66,7 @@ def install(): free_memory, _ = rmm.mr.available_device_memory() free_memory = int(round(float(free_memory) * 0.80 / 256) * 256) new_mr = current_mr + if rmm_mode == "pool": new_mr = rmm.mr.PoolMemoryResource( current_mr, @@ -71,8 +85,10 @@ def install(): ) elif rmm_mode != "cuda": raise ValueError(f"Unsupported {rmm_mode=}") + rmm.mr.set_current_device_resource(new_mr) - _enable_managed_prefetching(rmm_mode) + + _enable_managed_prefetching(rmm_mode, managed_memory_is_supported) def pytest_load_initial_conftests(early_config, parser, args): diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index def1d086cc1..7b29ad3373d 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -20,7 +20,7 @@ license = { text = "Apache 2.0" } requires-python = ">=3.9" dependencies = [ "cudf==24.8.*,>=0.0.0a0", - "polars>=1.0", + "polars>=1.0,<1.3", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers",