Skip to content

Commit

Permalink
Migrate all cpp pxd files into pylibcudf (#15740)
Browse files Browse the repository at this point in the history
This PR is a mass migration of all the Cython headers exposing libcudf to Cython into the pylibcudf subpackage. This will facilitate splitting out pylibcudf from cudf, and it should also allow us to do some cleanups sooner than that with respect to our imports since this preempts any concerns with circular imports (cudf->pylibcudf->cudf._lib.cpp).

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #15740
  • Loading branch information
vyasr authored May 14, 2024
1 parent cbe2775 commit 2fb8efb
Show file tree
Hide file tree
Showing 267 changed files with 1,134 additions and 995 deletions.
1 change: 0 additions & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ target_link_libraries(strings_udf PUBLIC cudf_strings_udf)
set(targets_using_arrow_headers interop avro csv orc json parquet)
link_to_pyarrow_headers("${targets_using_arrow_headers}")

add_subdirectory(cpp)
add_subdirectory(io)
add_subdirectory(nvtext)
add_subdirectory(pylibcudf)
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/avro.pyx
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp.string cimport string
from libcpp.utility cimport move
from libcpp.vector cimport vector

from cudf._lib.cpp.io.avro cimport (
from cudf._lib.io.utils cimport make_source_info
from cudf._lib.pylibcudf.libcudf.io.avro cimport (
avro_reader_options,
read_avro as libcudf_read_avro,
)
from cudf._lib.cpp.io.types cimport table_with_metadata
from cudf._lib.cpp.types cimport size_type
from cudf._lib.io.utils cimport make_source_info
from cudf._lib.pylibcudf.libcudf.io.types cimport table_with_metadata
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.utils cimport data_from_unique_ptr


Expand Down
11 changes: 7 additions & 4 deletions python/cudf/cudf/_lib/column.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from typing import Literal

Expand All @@ -7,9 +7,12 @@ from libcpp.memory cimport unique_ptr

from rmm._lib.device_buffer cimport device_buffer

from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view, mutable_column_view
from cudf._lib.cpp.types cimport size_type
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport (
column_view,
mutable_column_view,
)
from cudf._lib.pylibcudf.libcudf.types cimport size_type


cdef class Column:
Expand Down
20 changes: 10 additions & 10 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,18 @@ from cudf._lib.types import dtype_from_pylibcudf_column
# from_pylibcudf by instead creating an empty numeric column. We will be able
# to remove this once column factories are exposed to pylibcudf.

cimport cudf._lib.cpp.copying as cpp_copying
cimport cudf._lib.cpp.types as libcudf_types
cimport cudf._lib.cpp.unary as libcudf_unary
from cudf._lib cimport pylibcudf
from cudf._lib.cpp.column.column cimport column, column_contents
from cudf._lib.cpp.column.column_factories cimport (
cimport cudf._lib.pylibcudf.libcudf.copying as cpp_copying
cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
cimport cudf._lib.pylibcudf.libcudf.unary as libcudf_unary
from cudf._lib.pylibcudf cimport Column as plc_Column
from cudf._lib.pylibcudf.libcudf.column.column cimport column, column_contents
from cudf._lib.pylibcudf.libcudf.column.column_factories cimport (
make_column_from_scalar as cpp_make_column_from_scalar,
make_numeric_column,
)
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.null_mask cimport null_count as cpp_null_count
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.scalar cimport DeviceScalar


Expand Down Expand Up @@ -633,7 +633,7 @@ cdef class Column:
# TODO: This function call is what requires cimporting pylibcudf.
# We can remove the cimport once we can directly do
# pylibcudf.column_factories.make_numeric_column or equivalent.
col = pylibcudf.Column.from_libcudf(
col = plc_Column.from_libcudf(
move(
make_numeric_column(
new_dtype, col.size(), libcudf_types.mask_state.ALL_NULL
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/copying.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from cudf._lib.cpp.contiguous_split cimport packed_columns
from cudf._lib.pylibcudf.libcudf.contiguous_split cimport packed_columns


cdef class _CPackedColumns:
Expand Down
16 changes: 9 additions & 7 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,17 @@ from cudf.core.abc import Serializable

from libcpp.memory cimport make_unique

cimport cudf._lib.cpp.contiguous_split as cpp_contiguous_split
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.lists.gather cimport (
cimport cudf._lib.pylibcudf.libcudf.contiguous_split as cpp_contiguous_split
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.lists.gather cimport (
segmented_gather as cpp_segmented_gather,
)
from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.cpp.types cimport size_type
from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
lists_column_view,
)
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.utils cimport columns_from_pylibcudf_table, data_from_table_view

# workaround for https://github.com/cython/cython/issues/3885
Expand Down
10 changes: 0 additions & 10 deletions python/cudf/cudf/_lib/cpp/lists/count_elements.pxd

This file was deleted.

14 changes: 0 additions & 14 deletions python/cudf/cudf/_lib/cpp/lists/explode.pxd

This file was deleted.

15 changes: 0 additions & 15 deletions python/cudf/cudf/_lib/cpp/strings/extract.pxd

This file was deleted.

14 changes: 0 additions & 14 deletions python/cudf/cudf/_lib/cpp/strings/findall.pxd

This file was deleted.

16 changes: 0 additions & 16 deletions python/cudf/cudf/_lib/cpp/strings/strip.pxd

This file was deleted.

14 changes: 0 additions & 14 deletions python/cudf/cudf/_lib/cpp/strings/wrap.pxd

This file was deleted.

16 changes: 8 additions & 8 deletions python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ from libcpp.string cimport string
from libcpp.utility cimport move
from libcpp.vector cimport vector

cimport cudf._lib.cpp.types as libcudf_types
from cudf._lib.cpp.types cimport data_type
cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
from cudf._lib.io.datasource cimport Datasource, NativeFileDatasource
from cudf._lib.pylibcudf.libcudf.types cimport data_type
from cudf._lib.types cimport dtype_to_data_type

import numpy as np
Expand All @@ -18,7 +18,7 @@ import pandas as pd
import cudf
from cudf.core.buffer import acquire_spill_lock

from cudf._lib.cpp.types cimport size_type
from cudf._lib.pylibcudf.libcudf.types cimport size_type

import errno
import os
Expand All @@ -29,22 +29,22 @@ from io import BytesIO, StringIO
from libc.stdint cimport int32_t
from libcpp cimport bool

from cudf._lib.cpp.io.csv cimport (
from cudf._lib.io.utils cimport make_sink_info, make_source_info
from cudf._lib.pylibcudf.libcudf.io.csv cimport (
csv_reader_options,
csv_writer_options,
read_csv as cpp_read_csv,
write_csv as cpp_write_csv,
)
from cudf._lib.cpp.io.data_sink cimport data_sink
from cudf._lib.cpp.io.types cimport (
from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
from cudf._lib.pylibcudf.libcudf.io.types cimport (
compression_type,
quote_style,
sink_info,
source_info,
table_with_metadata,
)
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.io.utils cimport make_sink_info, make_source_info
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table

from pyarrow.lib import NativeFile
Expand Down
12 changes: 6 additions & 6 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ from cudf.core.buffer import acquire_spill_lock
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

cimport cudf._lib.cpp.datetime as libcudf_datetime
cimport cudf._lib.pylibcudf.libcudf.datetime as libcudf_datetime
from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.filling cimport calendrical_month_sequence
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.cpp.types cimport size_type
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.filling cimport calendrical_month_sequence
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.scalar cimport DeviceScalar


Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/expressions.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from libc.stdint cimport int32_t, int64_t
from libcpp.memory cimport unique_ptr

from cudf._lib.cpp.expressions cimport (
from cudf._lib.pylibcudf.libcudf.expressions cimport (
column_reference,
expression,
literal,
operation,
)
from cudf._lib.cpp.scalar.scalar cimport (
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport (
numeric_scalar,
scalar,
string_scalar,
Expand Down
9 changes: 6 additions & 3 deletions python/cudf/cudf/_lib/expressions.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ from libcpp.memory cimport make_unique, unique_ptr
from libcpp.string cimport string
from libcpp.utility cimport move

from cudf._lib.cpp cimport expressions as libcudf_exp
from cudf._lib.cpp.types cimport size_type
from cudf._lib.cpp.wrappers.timestamps cimport timestamp_ms, timestamp_us
from cudf._lib.pylibcudf.libcudf cimport expressions as libcudf_exp
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.pylibcudf.libcudf.wrappers.timestamps cimport (
timestamp_ms,
timestamp_us,
)

# Necessary for proper casting, see below.
ctypedef int32_t underlying_type_ast_operator
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf._lib.scalar import as_device_scalar

from cudf._lib.cpp.replace cimport replace_policy
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.pylibcudf.libcudf.replace cimport replace_policy
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport scalar

from cudf._lib import pylibcudf
from cudf._lib.aggregation import make_aggregation
Expand Down
14 changes: 8 additions & 6 deletions python/cudf/cudf/_lib/hash.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ from libcpp.pair cimport pair
from libcpp.utility cimport move
from libcpp.vector cimport vector

cimport cudf._lib.cpp.types as libcudf_types
cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.hash cimport (
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.hash cimport (
md5,
murmurhash3_x86_32,
sha1,
Expand All @@ -20,9 +20,11 @@ from cudf._lib.cpp.hash cimport (
sha512,
xxhash_64,
)
from cudf._lib.cpp.partitioning cimport hash_partition as cpp_hash_partition
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.pylibcudf.libcudf.partitioning cimport (
hash_partition as cpp_hash_partition,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns


Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/interop.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ from libcpp.utility cimport move

from cudf._lib import pylibcudf

from cudf._lib.cpp.interop cimport (
from cudf._lib.pylibcudf.libcudf.interop cimport (
DLManagedTensor,
from_dlpack as cpp_from_dlpack,
to_dlpack as cpp_to_dlpack,
)
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.utils cimport (
columns_from_pylibcudf_table,
columns_from_unique_ptr,
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/io/datasource.pxd
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp.memory cimport shared_ptr

from cudf._lib.cpp.io.arrow_io_source cimport arrow_io_source
from cudf._lib.cpp.io.datasource cimport datasource
from cudf._lib.pylibcudf.libcudf.io.arrow_io_source cimport arrow_io_source
from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource


cdef class Datasource:
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/io/datasource.pyx
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp.memory cimport shared_ptr
from pyarrow.includes.libarrow cimport CRandomAccessFile
from pyarrow.lib cimport NativeFile

from cudf._lib.cpp.io.arrow_io_source cimport arrow_io_source
from cudf._lib.cpp.io.datasource cimport datasource
from cudf._lib.pylibcudf.libcudf.io.arrow_io_source cimport arrow_io_source
from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource


cdef class Datasource:
Expand Down
Loading

0 comments on commit 2fb8efb

Please sign in to comment.