Skip to content

Commit

Permalink
Merge branch 'branch-24.06' into docs/cudf-pandas-ga
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored May 15, 2024
2 parents 3a64140 + 0fea3ed commit c39e73f
Show file tree
Hide file tree
Showing 274 changed files with 1,183 additions and 1,013 deletions.
21 changes: 14 additions & 7 deletions cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,20 @@ include_guard(GLOBAL)
# pyarrow.
function(find_libarrow_in_python_wheel PYARROW_VERSION)
string(REPLACE "." ";" PYARROW_VER_COMPONENTS "${PYARROW_VERSION}")
list(GET PYARROW_VER_COMPONENTS 0 PYARROW_SO_VER)
# The soname for Arrow libraries is constructed using the major version plus "00". Note that,
# although it may seem like it due to Arrow almost exclusively releasing new major versions (i.e.
# `${MINOR_VERSION}${PATCH_VERSION}` is almost always equivalent to "00"),
# the soname is not generated by concatenating the major, minor, and patch versions into a single
# version number soname, just `${MAJOR_VERSION}00`
set(PYARROW_LIB "libarrow.so.${PYARROW_SO_VER}00")
list(GET PYARROW_VER_COMPONENTS 0 PYARROW_MAJOR_VER)
list(GET PYARROW_VER_COMPONENTS 1 PYARROW_MINOR_VER)

# Ensure that the major and minor versions are two digits long
string(LENGTH ${PYARROW_MAJOR_VER} PYARROW_MAJOR_LENGTH)
string(LENGTH ${PYARROW_MINOR_VER} PYARROW_MINOR_LENGTH)
if(${PYARROW_MAJOR_LENGTH} EQUAL 1)
set(PYARROW_MAJOR_VER "0${PYARROW_MAJOR_VER}")
endif()
if(${PYARROW_MINOR_LENGTH} EQUAL 1)
set(PYARROW_MINOR_VER "0${PYARROW_MINOR_VER}")
endif()

set(PYARROW_LIB "libarrow.so.${PYARROW_MAJOR_VER}${PYARROW_MINOR_VER}")

string(
APPEND
Expand Down
4 changes: 2 additions & 2 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -352,8 +352,8 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
# Allow runtime version to float up to minor version
- pyarrow>=16.0.0,<17.0.0a0
# Allow runtime version to float up to patch version
- pyarrow>=16.0.0,<16.1.0a0
cuda_version:
specific:
- output_types: conda
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ target_link_libraries(strings_udf PUBLIC cudf_strings_udf)
set(targets_using_arrow_headers interop avro csv orc json parquet)
link_to_pyarrow_headers("${targets_using_arrow_headers}")

add_subdirectory(cpp)
add_subdirectory(io)
add_subdirectory(nvtext)
add_subdirectory(pylibcudf)
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/avro.pyx
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp.string cimport string
from libcpp.utility cimport move
from libcpp.vector cimport vector

from cudf._lib.cpp.io.avro cimport (
from cudf._lib.io.utils cimport make_source_info
from cudf._lib.pylibcudf.libcudf.io.avro cimport (
avro_reader_options,
read_avro as libcudf_read_avro,
)
from cudf._lib.cpp.io.types cimport table_with_metadata
from cudf._lib.cpp.types cimport size_type
from cudf._lib.io.utils cimport make_source_info
from cudf._lib.pylibcudf.libcudf.io.types cimport table_with_metadata
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.utils cimport data_from_unique_ptr


Expand Down
11 changes: 7 additions & 4 deletions python/cudf/cudf/_lib/column.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from typing import Literal

Expand All @@ -7,9 +7,12 @@ from libcpp.memory cimport unique_ptr

from rmm._lib.device_buffer cimport device_buffer

from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view, mutable_column_view
from cudf._lib.cpp.types cimport size_type
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
column_view,
mutable_column_view,
)
from cudf._lib.pylibcudf.libcudf.types cimport size_type


cdef class Column:
Expand Down
20 changes: 10 additions & 10 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,18 @@ from cudf._lib.types import dtype_from_pylibcudf_column
# from_pylibcudf by instead creating an empty numeric column. We will be able
# to remove this once column factories are exposed to pylibcudf.

cimport cudf._lib.cpp.copying as cpp_copying
cimport cudf._lib.cpp.types as libcudf_types
cimport cudf._lib.cpp.unary as libcudf_unary
from cudf._lib cimport pylibcudf
from cudf._lib.cpp.column.column cimport column, column_contents
from cudf._lib.cpp.column.column_factories cimport (
cimport cudf._lib.pylibcudf.libcudf.copying as cpp_copying
cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
cimport cudf._lib.pylibcudf.libcudf.unary as libcudf_unary
from cudf._lib.pylibcudf cimport Column as plc_Column
from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_contents
from cudf._lib.pylibcudf.libcudf.column.column_factories cimport (
make_column_from_scalar as cpp_make_column_from_scalar,
make_numeric_column,
)
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.null_mask cimport null_count as cpp_null_count
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.scalar cimport DeviceScalar


Expand Down Expand Up @@ -633,7 +633,7 @@ cdef class Column:
# TODO: This function call is what requires cimporting pylibcudf.
# We can remove the cimport once we can directly do
# pylibcudf.column_factories.make_numeric_column or equivalent.
col = pylibcudf.Column.from_libcudf(
col = plc_Column.from_libcudf(
move(
make_numeric_column(
new_dtype, col.size(), libcudf_types.mask_state.ALL_NULL
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/copying.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from cudf._lib.cpp.contiguous_split cimport packed_columns
from cudf._lib.pylibcudf.libcudf.contiguous_split cimport packed_columns


cdef class _CPackedColumns:
Expand Down
16 changes: 9 additions & 7 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,17 @@ from cudf.core.abc import Serializable

from libcpp.memory cimport make_unique

cimport cudf._lib.cpp.contiguous_split as cpp_contiguous_split
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.lists.gather cimport (
cimport cudf._lib.pylibcudf.libcudf.contiguous_split as cpp_contiguous_split
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.lists.gather cimport (
segmented_gather as cpp_segmented_gather,
)
from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.cpp.types cimport size_type
from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
lists_column_view,
)
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.utils cimport columns_from_pylibcudf_table, data_from_table_view

# workaround for https://github.com/cython/cython/issues/3885
Expand Down
10 changes: 0 additions & 10 deletions python/cudf/cudf/_lib/cpp/lists/count_elements.pxd

This file was deleted.

14 changes: 0 additions & 14 deletions python/cudf/cudf/_lib/cpp/lists/explode.pxd

This file was deleted.

15 changes: 0 additions & 15 deletions python/cudf/cudf/_lib/cpp/strings/extract.pxd

This file was deleted.

14 changes: 0 additions & 14 deletions python/cudf/cudf/_lib/cpp/strings/findall.pxd

This file was deleted.

16 changes: 0 additions & 16 deletions python/cudf/cudf/_lib/cpp/strings/strip.pxd

This file was deleted.

14 changes: 0 additions & 14 deletions python/cudf/cudf/_lib/cpp/strings/wrap.pxd

This file was deleted.

16 changes: 8 additions & 8 deletions python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ from libcpp.string cimport string
from libcpp.utility cimport move
from libcpp.vector cimport vector

cimport cudf._lib.cpp.types as libcudf_types
from cudf._lib.cpp.types cimport data_type
cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
from cudf._lib.io.datasource cimport Datasource, NativeFileDatasource
from cudf._lib.pylibcudf.libcudf.types cimport data_type
from cudf._lib.types cimport dtype_to_data_type

import numpy as np
Expand All @@ -18,7 +18,7 @@ import pandas as pd
import cudf
from cudf.core.buffer import acquire_spill_lock

from cudf._lib.cpp.types cimport size_type
from cudf._lib.pylibcudf.libcudf.types cimport size_type

import errno
import os
Expand All @@ -29,22 +29,22 @@ from io import BytesIO, StringIO
from libc.stdint cimport int32_t
from libcpp cimport bool

from cudf._lib.cpp.io.csv cimport (
from cudf._lib.io.utils cimport make_sink_info, make_source_info
from cudf._lib.pylibcudf.libcudf.io.csv cimport (
csv_reader_options,
csv_writer_options,
read_csv as cpp_read_csv,
write_csv as cpp_write_csv,
)
from cudf._lib.cpp.io.data_sink cimport data_sink
from cudf._lib.cpp.io.types cimport (
from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
from cudf._lib.pylibcudf.libcudf.io.types cimport (
compression_type,
quote_style,
sink_info,
source_info,
table_with_metadata,
)
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.io.utils cimport make_sink_info, make_source_info
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table

from pyarrow.lib import NativeFile
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ from cudf.core.buffer import acquire_spill_lock
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

cimport cudf._lib.cpp.datetime as libcudf_datetime
cimport cudf._lib.pylibcudf.libcudf.datetime as libcudf_datetime
from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.filling cimport calendrical_month_sequence
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.cpp.types cimport size_type
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.filling cimport calendrical_month_sequence
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.scalar cimport DeviceScalar


Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/expressions.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from libc.stdint cimport int32_t, int64_t
from libcpp.memory cimport unique_ptr

from cudf._lib.cpp.expressions cimport (
from cudf._lib.pylibcudf.libcudf.expressions cimport (
column_reference,
expression,
literal,
operation,
)
from cudf._lib.cpp.scalar.scalar cimport (
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport (
numeric_scalar,
scalar,
string_scalar,
Expand Down
9 changes: 6 additions & 3 deletions python/cudf/cudf/_lib/expressions.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ from libcpp.memory cimport make_unique, unique_ptr
from libcpp.string cimport string
from libcpp.utility cimport move

from cudf._lib.cpp cimport expressions as libcudf_exp
from cudf._lib.cpp.types cimport size_type
from cudf._lib.cpp.wrappers.timestamps cimport timestamp_ms, timestamp_us
from cudf._lib.pylibcudf.libcudf cimport expressions as libcudf_exp
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.pylibcudf.libcudf.wrappers.timestamps cimport (
timestamp_ms,
timestamp_us,
)

# Necessary for proper casting, see below.
ctypedef int32_t underlying_type_ast_operator
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf._lib.scalar import as_device_scalar

from cudf._lib.cpp.replace cimport replace_policy
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.replace cimport replace_policy
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar

from cudf._lib import pylibcudf
from cudf._lib.aggregation import make_aggregation
Expand Down
14 changes: 8 additions & 6 deletions python/cudf/cudf/_lib/hash.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ from libcpp.pair cimport pair
from libcpp.utility cimport move
from libcpp.vector cimport vector

cimport cudf._lib.cpp.types as libcudf_types
cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.hash cimport (
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.hash cimport (
md5,
murmurhash3_x86_32,
sha1,
Expand All @@ -20,9 +20,11 @@ from cudf._lib.cpp.hash cimport (
sha512,
xxhash_64,
)
from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.pylibcudf.libcudf.partitioning cimport (
hash_partition as cpp_hash_partition,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns


Expand Down
Loading

0 comments on commit c39e73f

Please sign in to comment.