Skip to content

Commit

Permalink
Merge branch 'branch-24.08' into 15776
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored Jun 26, 2024
2 parents 10112aa + cdfb550 commit 865d756
Show file tree
Hide file tree
Showing 14 changed files with 69 additions and 16 deletions.
16 changes: 16 additions & 0 deletions cpp/tests/ast/transform_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@ TEST_F(TransformTest, ColumnReference)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
}

TEST_F(TransformTest, BasicAdditionDoubleCast)
{
auto c_0 = column_wrapper<double>{3, 20, 1, 50};
std::vector<__int128_t> data1{10, 7, 20, 0};
auto c_1 = cudf::test::fixed_point_column_wrapper<__int128_t>(
data1.begin(), data1.end(), numeric::scale_type{0});
auto table = cudf::table_view{{c_0, c_1}};
auto col_ref_0 = cudf::ast::column_reference(0);
auto col_ref_1 = cudf::ast::column_reference(1);
auto cast = cudf::ast::operation(cudf::ast::ast_operator::CAST_TO_FLOAT64, col_ref_1);
auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, cast);
auto expected = column_wrapper<double>{13, 27, 21, 50};
auto result = cudf::compute_column(table, expression);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
}

TEST_F(TransformTest, Literal)
{
auto c_0 = column_wrapper<int32_t>{3, 20, 1, 50};
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/csv.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ from libcpp.utility cimport move
from libcpp.vector cimport vector

cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
from cudf._lib.io.datasource cimport Datasource, NativeFileDatasource
from cudf._lib.pylibcudf.io.datasource cimport Datasource, NativeFileDatasource
from cudf._lib.pylibcudf.libcudf.types cimport data_type
from cudf._lib.types cimport dtype_to_data_type

Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/io/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand All @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources datasource.pyx utils.pyx)
set(cython_sources utils.pyx)
set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
CXX
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/io/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ from libcpp.utility cimport move
from libcpp.vector cimport vector

from cudf._lib.column cimport Column
from cudf._lib.io.datasource cimport Datasource
from cudf._lib.pylibcudf.io.datasource cimport Datasource
from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
from cudf._lib.pylibcudf.libcudf.io.types cimport (
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/orc.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ except ImportError:

cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
from cudf._lib.column cimport Column
from cudf._lib.io.datasource cimport NativeFileDatasource
from cudf._lib.io.utils cimport (
make_sink_info,
make_source_info,
update_column_struct_field_names,
)
from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource
from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
from cudf._lib.pylibcudf.libcudf.io.orc cimport (
chunked_orc_writer_options,
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
cimport cudf._lib.pylibcudf.libcudf.types as cudf_types
from cudf._lib.column cimport Column
from cudf._lib.expressions cimport Expression
from cudf._lib.io.datasource cimport NativeFileDatasource
from cudf._lib.io.utils cimport (
make_sinks_info,
make_source_info,
update_struct_field_names,
)
from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource
from cudf._lib.pylibcudf.libcudf.expressions cimport expression
from cudf._lib.pylibcudf.libcudf.io.parquet cimport (
chunked_parquet_reader as cpp_chunked_parquet_reader,
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources avro.pyx types.pyx)
set(cython_sources avro.pyx datasource.pyx types.pyx)

set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
Expand All @@ -21,5 +21,5 @@ rapids_cython_create_modules(
LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_io_ ASSOCIATED_TARGETS cudf
)

set(targets_using_arrow_headers pylibcudf_io_avro pylibcudf_io_types)
set(targets_using_arrow_headers pylibcudf_io_avro pylibcudf_io_datasource pylibcudf_io_types)
link_to_pyarrow_headers("${targets_using_arrow_headers}")
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from . cimport avro, types
from . cimport avro, datasource, types
from .types cimport SourceInfo, TableWithMetadata
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/pylibcudf/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from . import avro, types
from . import avro, datasource, types
from .types import SourceInfo, TableWithMetadata
File renamed without changes.
File renamed without changes.
23 changes: 19 additions & 4 deletions python/cudf/cudf/_lib/pylibcudf/io/types.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ from libcpp.string cimport string
from libcpp.utility cimport move
from libcpp.vector cimport vector

from cudf._lib.pylibcudf.io.datasource cimport Datasource
from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
from cudf._lib.pylibcudf.libcudf.io.types cimport (
host_buffer,
source_info,
Expand Down Expand Up @@ -56,9 +58,8 @@ cdef class SourceInfo:
Parameters
----------
sources : List[Union[str, os.PathLike, bytes, io.BytesIO]]
A homogeneous list of sources (this can be a string filename,
an os.PathLike, bytes, or an io.BytesIO) to read from.
sources : List[Union[str, os.PathLike, bytes, io.BytesIO, DataSource]]
A homogeneous list of sources to read from.
Mixing different types of sources will raise a `ValueError`.
"""
Expand All @@ -68,6 +69,7 @@ cdef class SourceInfo:
raise ValueError("Need to pass at least one source")

cdef vector[string] c_files
cdef vector[datasource*] c_datasources

if isinstance(sources[0], (os.PathLike, str)):
c_files.reserve(len(sources))
Expand All @@ -84,6 +86,13 @@ cdef class SourceInfo:

self.c_obj = move(source_info(c_files))
return
elif isinstance(sources[0], Datasource):
for csrc in sources:
if not isinstance(csrc, Datasource):
raise ValueError("All sources must be of the same type!")
c_datasources.push_back((<Datasource>csrc).get_datasource())
self.c_obj = move(source_info(c_datasources))
return

# TODO: host_buffer is deprecated API, use host_span instead
cdef vector[host_buffer] c_host_buffers
Expand All @@ -106,5 +115,11 @@ cdef class SourceInfo:
c_buffer = bio.getbuffer() # check if empty?
c_host_buffers.push_back(host_buffer(<char*>&c_buffer[0],
c_buffer.shape[0]))
else:
raise ValueError("Sources must be a list of str/paths, "
"bytes, io.BytesIO, or a Datasource")

if empty_buffer is True:
c_host_buffers.push_back(host_buffer(<char*>NULL, 0))

self.c_obj = source_info(c_host_buffers)
self.c_obj = move(source_info(c_host_buffers))
24 changes: 23 additions & 1 deletion python/cudf/cudf/pylibcudf_tests/test_source_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@

import io

import pyarrow as pa
import pytest

import cudf._lib.pylibcudf as plc
from cudf._lib.pylibcudf.io.datasource import NativeFileDatasource


@pytest.mark.parametrize(
"source", ["a.txt", b"hello world", io.BytesIO(b"hello world")]
"source",
[
"a.txt",
b"hello world",
io.BytesIO(b"hello world"),
NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")),
],
)
def test_source_info_ctor(source, tmp_path):
if isinstance(source, str):
Expand All @@ -28,6 +36,10 @@ def test_source_info_ctor(source, tmp_path):
["a.txt", "a.txt"],
[b"hello world", b"hello there"],
[io.BytesIO(b"hello world"), io.BytesIO(b"hello there")],
[
NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")),
NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")),
],
],
)
def test_source_info_ctor_multiple(sources, tmp_path):
Expand All @@ -54,6 +66,11 @@ def test_source_info_ctor_multiple(sources, tmp_path):
io.BytesIO(b"hello there"),
b"hello world",
],
[
NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")),
"awef.txt",
b"hello world",
],
],
)
def test_source_info_ctor_mixing_invalid(sources, tmp_path):
Expand All @@ -67,3 +84,8 @@ def test_source_info_ctor_mixing_invalid(sources, tmp_path):
sources[i] = str(file)
with pytest.raises(ValueError):
plc.io.SourceInfo(sources)


def test_source_info_invalid():
with pytest.raises(ValueError):
plc.io.SourceInfo([123])
2 changes: 1 addition & 1 deletion python/cudf_kafka/cudf_kafka/_lib/kafka.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from libcpp.vector cimport vector

from cudf._lib.io.datasource cimport Datasource
from cudf._lib.pylibcudf.io.datasource cimport Datasource
from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource


Expand Down

0 comments on commit 865d756

Please sign in to comment.