From 5595fda27107129d76e48b475e1453ec487a801a Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 13 Oct 2021 11:05:17 -0400 Subject: [PATCH 01/70] pass pyobject to c++ library rather than map --- cpp/libcudf_kafka/CMakeLists.txt | 5 +- .../include/cudf_kafka/kafka_consumer.hpp | 4 +- cpp/libcudf_kafka/src/kafka_consumer.cpp | 111 ++++++++++++------ python/cudf_kafka/cudf_kafka/_lib/kafka.pxd | 4 +- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 2 +- python/custreamz/custreamz/kafka.py | 8 +- 6 files changed, 82 insertions(+), 52 deletions(-) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index 9f060c93215..0c27352e003 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -44,6 +44,9 @@ rapids_cpm_init() include(cmake/thirdparty/get_cudf.cmake) include(cmake/thirdparty/get_rdkafka.cmake) +# Locate Python Development headers +find_package(Python3 COMPONENTS Interpreter Development) + # # GTests if enabled if (BUILD_TESTS) # GoogleTest @@ -68,7 +71,7 @@ target_include_directories(cudf_kafka ################################################################################################### # - library paths --------------------------------------------------------------------------------- -target_link_libraries(cudf_kafka PUBLIC cudf::cudf RDKAFKA::RDKAFKA) +target_link_libraries(cudf_kafka PUBLIC cudf::cudf RDKAFKA::RDKAFKA Python3::Python) set_target_properties(cudf_kafka PROPERTIES BUILD_RPATH "\$ORIGIN" diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index d752acbceaf..d7e295fac64 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -49,7 +49,7 @@ class kafka_consumer : public cudf::io::datasource { * @param configs key/value pairs of librdkafka configurations that will be * passed to the librdkafka client */ - kafka_consumer(std::map const& configs); + kafka_consumer(PyObject const& configs); /** * @brief Instantiate a Kafka consumer object. Documentation for librdkafka configurations can be @@ -66,7 +66,7 @@ class kafka_consumer : public cudf::io::datasource { * before batch_timeout, a smaller subset will be returned * @param delimiter optional delimiter to insert into the output between kafka messages, Ex: "\n" */ - kafka_consumer(std::map const& configs, + kafka_consumer(PyObject const& configs, std::string const& topic_name, int partition, int64_t start_offset, diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index a76d6b0a985..7f40164111c 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -14,37 +14,70 @@ * limitations under the License. */ -#include "cudf_kafka/kafka_consumer.hpp" +#define PY_SSIZE_T_CLEAN +#include + #include #include #include +#include "cudf_kafka/kafka_consumer.hpp" + +/** + * @brief Callback to retrieve OAuth token from external source. Invoked when + * token refresh is required. + */ +class OAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { + void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config) + { + printf("oauthbearer_token_refresh_cb\n"); + Py_Initialize(); + Py_Finalize(); + } +}; + +// void rd_kafka_conf_set_oauthbearer_token_refresh_cb(rd_kafka_conf_t *conf, +// void (*oauthbearer_token_refresh_cb) ( +// rd_kafka_t *rk, +// const char *oauthbearer_config, +// void *opaque)) { +// #if WITH_SASL_OAUTHBEARER +// rd_kafka_anyconf_set_internal(_RK_GLOBAL, conf, +// "oauthbearer_token_refresh_cb", oauthbearer_token_refresh_cb); +// #endif +// } namespace cudf { namespace io { namespace external { namespace kafka { -kafka_consumer::kafka_consumer(std::map const& configs) +// kafka_consumer::kafka_consumer(std::map const& configs) +kafka_consumer::kafka_consumer(PyObject const& configs) : kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) { - for (auto const& key_value : configs) { - std::string error_string; - CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == - kafka_conf->set(key_value.first, key_value.second, error_string), - "Invalid Kafka configuration"); - } - - // Kafka 0.9 > requires group.id in the configuration - std::string conf_val; - CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), - "Kafka group.id must be configured"); - - std::string errstr; - consumer = std::unique_ptr( - RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); + // for (auto const& key_value : configs) { + // std::string error_string; + // CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == + // kafka_conf->set(key_value.first, key_value.second, error_string), + // "Invalid Kafka configuration"); + // } + + // // Kafka 0.9 > requires group.id in the configuration + // std::string conf_val; + // CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), + // "Kafka group.id must be configured"); + + // // Sets the OAuth Callback if the configuration is present + // OAuthRefreshCb cb; + // std::string error_string; + // kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); + + // std::string errstr; + // consumer = std::unique_ptr( + // RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); } -kafka_consumer::kafka_consumer(std::map const& configs, +kafka_consumer::kafka_consumer(PyObject const& configs, std::string const& topic_name, int partition, int64_t start_offset, @@ -58,27 +91,27 @@ kafka_consumer::kafka_consumer(std::map const& configs batch_timeout(batch_timeout), delimiter(delimiter) { - kafka_conf = std::unique_ptr(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)); - - for (auto const& key_value : configs) { - std::string error_string; - CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == - kafka_conf->set(key_value.first, key_value.second, error_string), - "Invalid Kafka configuration"); - } - - // Kafka 0.9 > requires group.id in the configuration - std::string conf_val; - CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), - "Kafka group.id must be configured"); - - std::string errstr; - consumer = std::unique_ptr( - RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); - - // Pre fill the local buffer with messages so the datasource->size() invocation - // will return a valid size. - consume_to_buffer(); + // kafka_conf = std::unique_ptr(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)); + + // for (auto const& key_value : configs) { + // std::string error_string; + // CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == + // kafka_conf->set(key_value.first, key_value.second, error_string), + // "Invalid Kafka configuration"); + // } + + // // Kafka 0.9 > requires group.id in the configuration + // std::string conf_val; + // CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), + // "Kafka group.id must be configured"); + + // std::string errstr; + // consumer = std::unique_ptr( + // RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); + + // // Pre fill the local buffer with messages so the datasource->size() invocation + // // will return a valid size. + // consume_to_buffer(); } std::unique_ptr kafka_consumer::host_read(size_t offset, size_t size) diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd index fc985e58b68..772e911b713 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd @@ -16,9 +16,9 @@ cdef extern from "kafka_consumer.hpp" \ cpdef cppclass kafka_consumer: - kafka_consumer(map[string, string] configs) except + + kafka_consumer(object configs) except + - kafka_consumer(map[string, string] configs, + kafka_consumer(object configs, string topic_name, int32_t partition, int64_t start_offset, diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index 5588b69938b..daddf53cd82 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -14,7 +14,7 @@ from cudf_kafka._lib.kafka cimport kafka_consumer cdef class KafkaDatasource(Datasource): def __cinit__(self, - map[string, string] kafka_configs, + object kafka_configs, string topic=b"", int32_t partition=-1, int64_t start_offset=0, diff --git a/python/custreamz/custreamz/kafka.py b/python/custreamz/custreamz/kafka.py index a301660a2e4..056f25cba31 100644 --- a/python/custreamz/custreamz/kafka.py +++ b/python/custreamz/custreamz/kafka.py @@ -25,13 +25,7 @@ def __init__(self, kafka_configs): """ self.kafka_configs = kafka_configs - - self.kafka_confs = { - str.encode(key): str.encode(value) - for key, value in self.kafka_configs.items() - } - - self.kafka_meta_client = KafkaDatasource(self.kafka_confs) + self.kafka_meta_client = KafkaDatasource(kafka_configs) def list_topics(self, specific_topic=None): From 0cfcfd0023041c90670d29bb44854ff1ef354296 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 14 Oct 2021 10:52:48 -0400 Subject: [PATCH 02/70] Checkpoint: C++, Cython, and Python compiling working. Callback object generated --- .../include/cudf_kafka/kafka_consumer.hpp | 4 +- cpp/libcudf_kafka/src/kafka_consumer.cpp | 88 ++++++++++++------- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 5 +- 3 files changed, 61 insertions(+), 36 deletions(-) diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index d7e295fac64..45f6b261f13 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -49,7 +49,7 @@ class kafka_consumer : public cudf::io::datasource { * @param configs key/value pairs of librdkafka configurations that will be * passed to the librdkafka client */ - kafka_consumer(PyObject const& configs); + kafka_consumer(PyObject* configs); /** * @brief Instantiate a Kafka consumer object. Documentation for librdkafka configurations can be @@ -66,7 +66,7 @@ class kafka_consumer : public cudf::io::datasource { * before batch_timeout, a smaller subset will be returned * @param delimiter optional delimiter to insert into the output between kafka messages, Ex: "\n" */ - kafka_consumer(PyObject const& configs, + kafka_consumer(PyObject* configs, std::string const& topic_name, int partition, int64_t start_offset, diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 7f40164111c..b1fea549247 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -27,24 +27,29 @@ * token refresh is required. */ class OAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { + public: + OAuthRefreshCb(PyObject* callback, PyObject* args, PyObject* kwargs) + : callback(callback), args(args), kwargs(kwargs){}; + void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config) { printf("oauthbearer_token_refresh_cb\n"); - Py_Initialize(); - Py_Finalize(); + PyObject* args; + PyObject* kwargs; + + CUDF_EXPECTS(PyCallable_Check(callback), "A Python callable is required"); + + // Make sure that we own the GIL + PyGILState_STATE state = PyGILState_Ensure(); + PyObject* result = PyObject_CallObject(callback, args); + PyGILState_Release(state); } -}; -// void rd_kafka_conf_set_oauthbearer_token_refresh_cb(rd_kafka_conf_t *conf, -// void (*oauthbearer_token_refresh_cb) ( -// rd_kafka_t *rk, -// const char *oauthbearer_config, -// void *opaque)) { -// #if WITH_SASL_OAUTHBEARER -// rd_kafka_anyconf_set_internal(_RK_GLOBAL, conf, -// "oauthbearer_token_refresh_cb", oauthbearer_token_refresh_cb); -// #endif -// } + private: + PyObject* callback; + PyObject* args; + PyObject* kwargs; +}; namespace cudf { namespace io { @@ -52,32 +57,51 @@ namespace external { namespace kafka { // kafka_consumer::kafka_consumer(std::map const& configs) -kafka_consumer::kafka_consumer(PyObject const& configs) +kafka_consumer::kafka_consumer(PyObject* confdict) : kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) { - // for (auto const& key_value : configs) { - // std::string error_string; - // CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == - // kafka_conf->set(key_value.first, key_value.second, error_string), - // "Invalid Kafka configuration"); - // } + Py_ssize_t pos = 0; + PyObject *ko, *vo; + + // Configurations that can be Python callables. Anything else is expected to be a str + std::vector callableConfigs{"oauth_cb"}; + + while (PyDict_Next(confdict, &pos, &ko, &vo)) { + CUDF_EXPECTS(PyUnicode_Check(ko), "expected kafka configuration property name as type string"); + std::string key(PyUnicode_AsUTF8(ko)); + std::string valueType(Py_TYPE(vo)->tp_name); + + std::string error_string; + if (std::find(callableConfigs.begin(), callableConfigs.end(), key) != callableConfigs.end()) { + // Properly configure the callable. This is a Python callback for callback processing + // Sets the OAuth Callback if the configuration is present + // https://github.com/edenhill/librdkafka/blob/6d5fbf9131693288f0f198692fae5aa169b61912/src/rdkafka_conf.c#L2797 + // rd_kafka_conf_set_oauthbearer_token_refresh_cb(kafka_conf, oauth_cb); + PyObject* args; + PyObject* kwargs; + OAuthRefreshCb cb(vo, args, kwargs); + kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); - // // Kafka 0.9 > requires group.id in the configuration - // std::string conf_val; - // CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), - // "Kafka group.id must be configured"); + } else { + CUDF_EXPECTS(valueType.compare("str") == 0, + "Only string values are supported for this configuration"); + CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == + kafka_conf->set(key, PyUnicode_AsUTF8(vo), error_string), + "Invalid Kafka configuration provided"); + } + } - // // Sets the OAuth Callback if the configuration is present - // OAuthRefreshCb cb; - // std::string error_string; - // kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); + // Kafka 0.9 > requires group.id in the configuration + std::string conf_val; + CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), + "Kafka group.id must be configured"); - // std::string errstr; - // consumer = std::unique_ptr( - // RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); + std::string errstr; + consumer = std::unique_ptr( + RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); } -kafka_consumer::kafka_consumer(PyObject const& configs, +kafka_consumer::kafka_consumer(PyObject* configs, std::string const& topic_name, int partition, int64_t start_offset, diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index daddf53cd82..c78777346f6 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -1,5 +1,6 @@ # Copyright (c) 2020, NVIDIA CORPORATION. +cimport cpython from libc.stdint cimport int32_t, int64_t from libcpp cimport bool from libcpp.map cimport map @@ -23,7 +24,7 @@ cdef class KafkaDatasource(Datasource): string delimiter=b"",): if topic != b"" and partition != -1: self.c_datasource = \ - make_unique[kafka_consumer](kafka_configs, + make_unique[kafka_consumer]( kafka_configs, topic, partition, start_offset, @@ -32,7 +33,7 @@ cdef class KafkaDatasource(Datasource): delimiter) else: self.c_datasource = \ - make_unique[kafka_consumer](kafka_configs) + make_unique[kafka_consumer]( kafka_configs) cdef datasource* get_datasource(self) nogil: return self.c_datasource.get() From 1b6b19c9115cfc0a0a9b4cfe6079b36bab034f21 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 14 Oct 2021 11:22:24 -0400 Subject: [PATCH 03/70] moved configuration building and validating logic to its own method --- .../include/cudf_kafka/kafka_consumer.hpp | 11 ++ cpp/libcudf_kafka/src/kafka_consumer.cpp | 103 ++++++++---------- python/custreamz/custreamz/kafka.py | 2 +- 3 files changed, 59 insertions(+), 57 deletions(-) diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index 45f6b261f13..03bb86331b8 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -15,6 +15,9 @@ */ #pragma once +#define PY_SSIZE_T_CLEAN +#include + #include #include #include @@ -178,6 +181,12 @@ class kafka_consumer : public cudf::io::datasource { std::unique_ptr kafka_conf; // RDKafka configuration object std::unique_ptr consumer; + // Configurations that can be Python callables. Anything else is expected to be a str + const std::vector callableConfigs{"oauth_cb"}; + + // The Python configuration dict that was used to create this instance + PyObject* conf_dict; + std::string topic_name; int partition; int64_t start_offset; @@ -193,6 +202,8 @@ class kafka_consumer : public cudf::io::datasource { int partition, int64_t offset); + void build_validate_configs(PyObject* python_config_dict); + /** * Convenience method for getting "now()" in Kafka's standard format */ diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index b1fea549247..014f78804bf 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -13,14 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -#define PY_SSIZE_T_CLEAN -#include - +#include "cudf_kafka/kafka_consumer.hpp" #include #include #include -#include "cudf_kafka/kafka_consumer.hpp" /** * @brief Callback to retrieve OAuth token from external source. Invoked when @@ -41,7 +37,8 @@ class OAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { // Make sure that we own the GIL PyGILState_STATE state = PyGILState_Ensure(); - PyObject* result = PyObject_CallObject(callback, args); + std::string result(PyUnicode_AsUTF8(PyObject_CallObject(callback, args))); + printf("Result: %s\n", result.c_str()); PyGILState_Release(state); } @@ -56,17 +53,55 @@ namespace io { namespace external { namespace kafka { -// kafka_consumer::kafka_consumer(std::map const& configs) kafka_consumer::kafka_consumer(PyObject* confdict) - : kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) + : conf_dict(confdict), kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) +{ + build_validate_configs(confdict); + + std::string errstr; + consumer = std::unique_ptr( + RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); +} + +kafka_consumer::kafka_consumer(PyObject* confdict, + std::string const& topic_name, + int partition, + int64_t start_offset, + int64_t end_offset, + int batch_timeout, + std::string const& delimiter) + : topic_name(topic_name), + partition(partition), + start_offset(start_offset), + end_offset(end_offset), + batch_timeout(batch_timeout), + delimiter(delimiter), + conf_dict(confdict), + kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) +{ + build_validate_configs(confdict); + + std::string errstr; + consumer = std::unique_ptr( + RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); + + // Pre fill the local buffer with messages so the datasource->size() invocation + // will return a valid size. + consume_to_buffer(); +} + +/** + * @brief Builds and validates Kafka C++ configuration object from Python values + * + * @param kafka_configs + * Python Dict of configuration values and possibly callables for callbacks + */ +void kafka_consumer::build_validate_configs(PyObject* python_config_dict) { Py_ssize_t pos = 0; PyObject *ko, *vo; - // Configurations that can be Python callables. Anything else is expected to be a str - std::vector callableConfigs{"oauth_cb"}; - - while (PyDict_Next(confdict, &pos, &ko, &vo)) { + while (PyDict_Next(python_config_dict, &pos, &ko, &vo)) { CUDF_EXPECTS(PyUnicode_Check(ko), "expected kafka configuration property name as type string"); std::string key(PyUnicode_AsUTF8(ko)); std::string valueType(Py_TYPE(vo)->tp_name); @@ -74,9 +109,6 @@ kafka_consumer::kafka_consumer(PyObject* confdict) std::string error_string; if (std::find(callableConfigs.begin(), callableConfigs.end(), key) != callableConfigs.end()) { // Properly configure the callable. This is a Python callback for callback processing - // Sets the OAuth Callback if the configuration is present - // https://github.com/edenhill/librdkafka/blob/6d5fbf9131693288f0f198692fae5aa169b61912/src/rdkafka_conf.c#L2797 - // rd_kafka_conf_set_oauthbearer_token_refresh_cb(kafka_conf, oauth_cb); PyObject* args; PyObject* kwargs; OAuthRefreshCb cb(vo, args, kwargs); @@ -95,47 +127,6 @@ kafka_consumer::kafka_consumer(PyObject* confdict) std::string conf_val; CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), "Kafka group.id must be configured"); - - std::string errstr; - consumer = std::unique_ptr( - RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); -} - -kafka_consumer::kafka_consumer(PyObject* configs, - std::string const& topic_name, - int partition, - int64_t start_offset, - int64_t end_offset, - int batch_timeout, - std::string const& delimiter) - : topic_name(topic_name), - partition(partition), - start_offset(start_offset), - end_offset(end_offset), - batch_timeout(batch_timeout), - delimiter(delimiter) -{ - // kafka_conf = std::unique_ptr(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)); - - // for (auto const& key_value : configs) { - // std::string error_string; - // CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == - // kafka_conf->set(key_value.first, key_value.second, error_string), - // "Invalid Kafka configuration"); - // } - - // // Kafka 0.9 > requires group.id in the configuration - // std::string conf_val; - // CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), - // "Kafka group.id must be configured"); - - // std::string errstr; - // consumer = std::unique_ptr( - // RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); - - // // Pre fill the local buffer with messages so the datasource->size() invocation - // // will return a valid size. - // consume_to_buffer(); } std::unique_ptr kafka_consumer::host_read(size_t offset, size_t size) diff --git a/python/custreamz/custreamz/kafka.py b/python/custreamz/custreamz/kafka.py index 056f25cba31..a5bfc1c506b 100644 --- a/python/custreamz/custreamz/kafka.py +++ b/python/custreamz/custreamz/kafka.py @@ -139,7 +139,7 @@ def read_gdf( ) kafka_datasource = KafkaDatasource( - self.kafka_confs, + self.kafka_configs, topic.encode(), partition, start, From 25a8b331ab59fdc713661c263334bcfb4a406d5a Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 25 Oct 2021 13:54:23 -0400 Subject: [PATCH 04/70] Introduce callbacks class where all the possible kafka callbacks can be implemented --- cpp/libcudf_kafka/CMakeLists.txt | 3 +- .../include/cudf_kafka/kafka_callback.hpp | 50 +++++++++++++++ .../include/cudf_kafka/kafka_consumer.hpp | 1 + cpp/libcudf_kafka/src/kafka_callback.cpp | 62 +++++++++++++++++++ cpp/libcudf_kafka/src/kafka_consumer.cpp | 36 +---------- 5 files changed, 117 insertions(+), 35 deletions(-) create mode 100644 cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp create mode 100644 cpp/libcudf_kafka/src/kafka_callback.cpp diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index 0c27352e003..d093a8dfa56 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -60,7 +60,8 @@ endif() ################################################################################################### # - library target -------------------------------------------------------------------------------- add_library(cudf_kafka SHARED - src/kafka_consumer.cpp) + src/kafka_consumer.cpp + src/kafka_callback.cpp) ################################################################################################### # - include paths --------------------------------------------------------------------------------- diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp new file mode 100644 index 00000000000..5ec6fc9f059 --- /dev/null +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#define PY_SSIZE_T_CLEAN +#include + +#include +#include +#include +#include +#include + +namespace cudf { +namespace io { +namespace external { +namespace kafka { + +/** + * @brief Callback to retrieve OAuth token from external source. Invoked when + * token refresh is required. + */ +class OAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { + public: + OAuthRefreshCb(PyObject* callback, PyObject* args); + + void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config); + + private: + PyObject* callback; + PyObject* args; +}; + +} // namespace kafka +} // namespace external +} // namespace io +} // namespace cudf diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index 03bb86331b8..d9c997f5e60 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -25,6 +25,7 @@ #include #include #include +#include "kafka_callback.hpp" namespace cudf { namespace io { diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp new file mode 100644 index 00000000000..8589f96bede --- /dev/null +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cudf_kafka/kafka_callback.hpp" + +namespace cudf { +namespace io { +namespace external { +namespace kafka { + +OAuthRefreshCb::OAuthRefreshCb(PyObject* callback, PyObject* args) + : callback(callback), args(args){}; + +void OAuthRefreshCb::oauthbearer_token_refresh_cb(RdKafka::Handle* handle, + const std::string& oauthbearer_config) +{ + CUDF_EXPECTS(PyCallable_Check(callback), "A Python callable is required"); + + // Make sure that we own the GIL + PyGILState_STATE state = PyGILState_Ensure(); + PyObject* result = PyObject_CallObject(callback, args); + Py_XINCREF(result); + + // Set the token in the Kafka context + if (result) { + CUDF_EXPECTS(PyDict_Check(result), + "cudf_kafka requires a Dictionary response from the Python OAuthRefreshCb with " + "dictionary keys (token, token_lifetime_ms, principal, extensions)"); + + // Ensure that expected keys are present from the Python callback response. + std::string token = PyUnicode_AsUTF8(PyDict_GetItemString(result, "token")); + int64_t token_lifetime_ms = + PyLong_AsLongLong(PyDict_GetItemString(result, "token_lifetime_ms")); + std::string principal = PyUnicode_AsUTF8(PyDict_GetItemString(result, "principal")); + std::list extensions; + std::string errstr; + + handle->oauthbearer_set_token(token, token_lifetime_ms, principal, extensions, errstr); + } else { + handle->oauthbearer_set_token_failure(""); + } + + Py_XDECREF(result); + PyGILState_Release(state); +} + +} // namespace kafka +} // namespace external +} // namespace io +} // namespace cudf diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 014f78804bf..7dbccfc84dc 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -18,36 +18,6 @@ #include #include -/** - * @brief Callback to retrieve OAuth token from external source. Invoked when - * token refresh is required. - */ -class OAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { - public: - OAuthRefreshCb(PyObject* callback, PyObject* args, PyObject* kwargs) - : callback(callback), args(args), kwargs(kwargs){}; - - void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config) - { - printf("oauthbearer_token_refresh_cb\n"); - PyObject* args; - PyObject* kwargs; - - CUDF_EXPECTS(PyCallable_Check(callback), "A Python callable is required"); - - // Make sure that we own the GIL - PyGILState_STATE state = PyGILState_Ensure(); - std::string result(PyUnicode_AsUTF8(PyObject_CallObject(callback, args))); - printf("Result: %s\n", result.c_str()); - PyGILState_Release(state); - } - - private: - PyObject* callback; - PyObject* args; - PyObject* kwargs; -}; - namespace cudf { namespace io { namespace external { @@ -108,10 +78,8 @@ void kafka_consumer::build_validate_configs(PyObject* python_config_dict) std::string error_string; if (std::find(callableConfigs.begin(), callableConfigs.end(), key) != callableConfigs.end()) { - // Properly configure the callable. This is a Python callback for callback processing - PyObject* args; - PyObject* kwargs; - OAuthRefreshCb cb(vo, args, kwargs); + // Properly configure the callable. This is a Python callback for oauth processing + OAuthRefreshCb cb(vo, NULL); kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); } else { From 4af5b054a5b863313c89c4d49b8c24bcb66a8435 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 25 Oct 2021 14:10:03 -0400 Subject: [PATCH 05/70] Refactored class names --- cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp | 4 ++-- cpp/libcudf_kafka/src/kafka_callback.cpp | 6 +++--- cpp/libcudf_kafka/src/kafka_consumer.cpp | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index 5ec6fc9f059..2d85db2e4bd 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -33,9 +33,9 @@ namespace kafka { * @brief Callback to retrieve OAuth token from external source. Invoked when * token refresh is required. */ -class OAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { +class PythonOAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { public: - OAuthRefreshCb(PyObject* callback, PyObject* args); + PythonOAuthRefreshCb(PyObject* callback, PyObject* args); void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config); diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index 8589f96bede..6c738da5303 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -20,11 +20,11 @@ namespace io { namespace external { namespace kafka { -OAuthRefreshCb::OAuthRefreshCb(PyObject* callback, PyObject* args) +PythonOAuthRefreshCb::PythonOAuthRefreshCb(PyObject* callback, PyObject* args) : callback(callback), args(args){}; -void OAuthRefreshCb::oauthbearer_token_refresh_cb(RdKafka::Handle* handle, - const std::string& oauthbearer_config) +void PythonOAuthRefreshCb::oauthbearer_token_refresh_cb(RdKafka::Handle* handle, + const std::string& oauthbearer_config) { CUDF_EXPECTS(PyCallable_Check(callback), "A Python callable is required"); diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 7dbccfc84dc..dad0b809c30 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -79,9 +79,8 @@ void kafka_consumer::build_validate_configs(PyObject* python_config_dict) std::string error_string; if (std::find(callableConfigs.begin(), callableConfigs.end(), key) != callableConfigs.end()) { // Properly configure the callable. This is a Python callback for oauth processing - OAuthRefreshCb cb(vo, NULL); + PythonOAuthRefreshCb cb(vo, NULL); kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); - } else { CUDF_EXPECTS(valueType.compare("str") == 0, "Only string values are supported for this configuration"); From 97f8830a86cbbc69dc24acb75e95bf74f47772a7 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 26 Oct 2021 11:21:56 -0400 Subject: [PATCH 06/70] Updated tests to match new function parameters --- cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp index fa3d7d887aa..1a92c3741ce 100644 --- a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp +++ b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp @@ -14,6 +14,9 @@ * limitations under the License. */ +#define PY_SSIZE_T_CLEAN +#include + #include #include #include @@ -31,8 +34,7 @@ struct KafkaDatasourceTest : public ::testing::Test { TEST_F(KafkaDatasourceTest, MissingGroupID) { // group.id is a required configuration. - std::map kafka_configs; - kafka_configs.insert({"bootstrap.servers", "localhost:9092"}); + PyObject* kafka_configs = Py_BuildValue("{s:s}", "bootstrap.servers", "localhost:9092"); EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, "csv-topic", 0, 0, 3, 5000, "\n"), cudf::logic_error); @@ -43,14 +45,13 @@ TEST_F(KafkaDatasourceTest, InvalidConfigValues) // Give a made up configuration value std::map kafka_configs; kafka_configs.insert({"completely_made_up_config", "wrong"}); + PyObject* kafka_configs = Py_BuildValue("{s:s}", "completely_made_up_config", "wrong"); EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, "csv-topic", 0, 0, 3, 5000, "\n"), cudf::logic_error); - kafka_configs.clear(); - // Give a good config property with a bad value - kafka_configs.insert({"message.max.bytes", "this should be a number not text"}); + kafka_configs = Py_BuildValue("{s:s}", "message.max.bytes", "his should be a number not text"); EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, "csv-topic", 0, 0, 3, 5000, "\n"), cudf::logic_error); } From 38e52c1926aaabee951393bcdd37531d164cf0bc Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 26 Oct 2021 13:23:46 -0400 Subject: [PATCH 07/70] Remove error of leaving previous decleration --- cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp index 1a92c3741ce..70652261e56 100644 --- a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp +++ b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp @@ -43,8 +43,6 @@ TEST_F(KafkaDatasourceTest, MissingGroupID) TEST_F(KafkaDatasourceTest, InvalidConfigValues) { // Give a made up configuration value - std::map kafka_configs; - kafka_configs.insert({"completely_made_up_config", "wrong"}); PyObject* kafka_configs = Py_BuildValue("{s:s}", "completely_made_up_config", "wrong"); EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, "csv-topic", 0, 0, 3, 5000, "\n"), From 2401b8b0a7aa0d0014e4beae13730c66479c2a8c Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 26 Oct 2021 16:00:35 -0400 Subject: [PATCH 08/70] upgrade librdkafka version for CI --- conda/recipes/libcudf_kafka/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 6b15890e7c7..0349da0a0ea 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -26,7 +26,7 @@ requirements: - cmake >=3.20.1 host: - libcudf {{version}} - - librdkafka >=1.6.0,<1.7.0a0 + - librdkafka >=1.7.0 run: - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not From 69a8639888d00a68fa84f24230e5b177dab93375 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 27 Oct 2021 12:57:27 -0400 Subject: [PATCH 09/70] link python development headers against test --- cpp/libcudf_kafka/tests/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index f0c2664cd96..a6e8b0f1a89 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -14,6 +14,9 @@ # limitations under the License. #============================================================================= +# Locate Python Development headers +find_package(Python3 COMPONENTS Interpreter Development) + ################################################################################################### # - compiler function ----------------------------------------------------------------------------- @@ -21,7 +24,7 @@ function(ConfigureTest test_name ) add_executable(${test_name} ${ARGN}) set_target_properties(${test_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") - target_link_libraries(${test_name} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) + target_link_libraries(${test_name} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka Python3::Python) add_test(NAME ${test_name} COMMAND ${test_name}) endfunction() From 4e4ed72ee23bc76bdd8688d6ed9e58d1a9031263 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 27 Oct 2021 19:24:29 -0400 Subject: [PATCH 10/70] latest version of librdkafka requires that committed offsets be > 0. Adjusted unit tests to account for this --- python/custreamz/custreamz/tests/test_kafka.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/custreamz/custreamz/tests/test_kafka.py b/python/custreamz/custreamz/tests/test_kafka.py index d29ebf8db8b..ad3b829544b 100644 --- a/python/custreamz/custreamz/tests/test_kafka.py +++ b/python/custreamz/custreamz/tests/test_kafka.py @@ -5,11 +5,10 @@ from cudf.testing._utils import assert_eq -@pytest.mark.parametrize("commit_offset", [-1, 0, 1, 1000]) +@pytest.mark.parametrize("commit_offset", [1, 45, 100, 22, 1000, 10]) @pytest.mark.parametrize("topic", ["cudf-kafka-test-topic"]) def test_kafka_offset(kafka_client, topic, commit_offset): - ck_top = ck.TopicPartition(topic, 0, commit_offset) - offsets = [ck_top] + offsets = [ck.TopicPartition(topic, 0, commit_offset)] kafka_client.commit(offsets=offsets) # Get the offsets that were just committed to Kafka From 98ed97dc55f9c102f4fbe7fefac7a74382cab2f9 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 29 Oct 2021 12:57:20 -0400 Subject: [PATCH 11/70] updates --- ci/cpu/build.sh | 2 +- ci/gpu/build.sh | 7 ++++--- ci/local/build.sh | 2 ++ .../cpp/build.sh | 10 ++++++++++ python/cudf_kafka/cudf_kafka/_lib/kafka.pxd | 1 - 5 files changed, 17 insertions(+), 5 deletions(-) create mode 100755 ci/local/build_rapidsai_21.12-cuda11.0-devel-ubuntu18.04-py3.7/cpp/build.sh diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 00dffa57683..2e10386bc4b 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -58,7 +58,7 @@ conda config --show-sources conda list --show-channel-urls # FIX Added to deal with Anancoda SSL verification issues during conda builds -conda config --set ssl_verify False +#conda config --set ssl_verify False ################################################################################ # BUILD - Conda package builds diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index e2e95c34650..bcc6c444c98 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -3,6 +3,7 @@ ############################################## # cuDF GPU build and test script for CI # ############################################## +set -x set -e NUMARGS=$# ARGS=$* @@ -65,7 +66,7 @@ gpuci_logger "Check environment variables" env gpuci_logger "Check GPU usage" -nvidia-smi +#nvidia-smi gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh @@ -86,8 +87,8 @@ gpuci_mamba_retry install -y \ "ucx-py=0.23.*" # https://docs.rapids.ai/maintainers/depmgmt/ -# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -# gpuci_mamba_retry install -y "your-pkg=1.0.0" +#gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env +gpuci_mamba_retry install -y "librdkafka=1.7.0" gpuci_logger "Check compiler versions" diff --git a/ci/local/build.sh b/ci/local/build.sh index 1bfb8b63fef..1d5c97da879 100755 --- a/ci/local/build.sh +++ b/ci/local/build.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -x + GIT_DESCRIBE_TAG=`git describe --tags` MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` diff --git a/ci/local/build_rapidsai_21.12-cuda11.0-devel-ubuntu18.04-py3.7/cpp/build.sh b/ci/local/build_rapidsai_21.12-cuda11.0-devel-ubuntu18.04-py3.7/cpp/build.sh new file mode 100755 index 00000000000..11d1d195024 --- /dev/null +++ b/ci/local/build_rapidsai_21.12-cuda11.0-devel-ubuntu18.04-py3.7/cpp/build.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -e +WORKSPACE=/rapids/local +PREBUILD_SCRIPT=/rapids/local/ci/gpu/prebuild.sh +BUILD_SCRIPT=/rapids/local/ci/gpu/build.sh +if [ -f ${PREBUILD_SCRIPT} ]; then + source ${PREBUILD_SCRIPT} +fi +yes | source ${BUILD_SCRIPT} + diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd index 772e911b713..495680b95a0 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd @@ -49,7 +49,6 @@ cdef extern from "kafka_consumer.hpp" \ cdef class KafkaDatasource(Datasource): cdef unique_ptr[datasource] c_datasource - cdef map[string, string] kafka_configs cdef string topic cdef int32_t partition cdef int64_t start_offset From d1925ab2f027531cd81b4a2a9c5ea0ca87171370 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 29 Oct 2021 13:19:00 -0400 Subject: [PATCH 12/70] Make Python3 package REQUIRED in cmake --- cpp/libcudf_kafka/CMakeLists.txt | 2 +- cpp/libcudf_kafka/tests/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index d093a8dfa56..81c81903085 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -45,7 +45,7 @@ include(cmake/thirdparty/get_cudf.cmake) include(cmake/thirdparty/get_rdkafka.cmake) # Locate Python Development headers -find_package(Python3 COMPONENTS Interpreter Development) +find_package(Python3 REQUIRED COMPONENTS Interpreter Development) # # GTests if enabled if (BUILD_TESTS) diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index a6e8b0f1a89..609ded9df2d 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -15,7 +15,7 @@ #============================================================================= # Locate Python Development headers -find_package(Python3 COMPONENTS Interpreter Development) +find_package(Python3 REQUIRED COMPONENTS Interpreter Development) ################################################################################################### # - compiler function ----------------------------------------------------------------------------- From 9916e4d7ce8a595ec40a0e69f8f29e90c209f9ef Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 2 Nov 2021 12:30:03 -0400 Subject: [PATCH 13/70] updates per review --- cpp/libcudf_kafka/CMakeLists.txt | 5 +- .../include/cudf_kafka/kafka_callback.hpp | 8 +- .../include/cudf_kafka/kafka_consumer.hpp | 18 ++-- cpp/libcudf_kafka/src/kafka_callback.cpp | 34 ++------ cpp/libcudf_kafka/src/kafka_consumer.cpp | 87 ++++++++++--------- cpp/libcudf_kafka/tests/CMakeLists.txt | 5 +- python/cudf_kafka/cudf_kafka/_lib/kafka.pxd | 6 +- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 17 +++- 8 files changed, 79 insertions(+), 101 deletions(-) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index 81c81903085..fbc55c0e5f2 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -44,9 +44,6 @@ rapids_cpm_init() include(cmake/thirdparty/get_cudf.cmake) include(cmake/thirdparty/get_rdkafka.cmake) -# Locate Python Development headers -find_package(Python3 REQUIRED COMPONENTS Interpreter Development) - # # GTests if enabled if (BUILD_TESTS) # GoogleTest @@ -72,7 +69,7 @@ target_include_directories(cudf_kafka ################################################################################################### # - library paths --------------------------------------------------------------------------------- -target_link_libraries(cudf_kafka PUBLIC cudf::cudf RDKAFKA::RDKAFKA Python3::Python) +target_link_libraries(cudf_kafka PUBLIC cudf::cudf RDKAFKA::RDKAFKA) set_target_properties(cudf_kafka PROPERTIES BUILD_RPATH "\$ORIGIN" diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index 2d85db2e4bd..9ac8173ba9e 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -15,9 +15,6 @@ */ #pragma once -#define PY_SSIZE_T_CLEAN -#include - #include #include #include @@ -35,13 +32,12 @@ namespace kafka { */ class PythonOAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { public: - PythonOAuthRefreshCb(PyObject* callback, PyObject* args); + PythonOAuthRefreshCb(void* callback()); void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config); private: - PyObject* callback; - PyObject* args; + PyObject (*callback)(); }; } // namespace kafka diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index d9c997f5e60..3ccc0ef6e7e 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -15,9 +15,6 @@ */ #pragma once -#define PY_SSIZE_T_CLEAN -#include - #include #include #include @@ -53,7 +50,8 @@ class kafka_consumer : public cudf::io::datasource { * @param configs key/value pairs of librdkafka configurations that will be * passed to the librdkafka client */ - kafka_consumer(PyObject* configs); + kafka_consumer(std::map configs, + std::map callbacks); /** * @brief Instantiate a Kafka consumer object. Documentation for librdkafka configurations can be @@ -70,7 +68,8 @@ class kafka_consumer : public cudf::io::datasource { * before batch_timeout, a smaller subset will be returned * @param delimiter optional delimiter to insert into the output between kafka messages, Ex: "\n" */ - kafka_consumer(PyObject* configs, + kafka_consumer(std::map configs, + std::map callbacks, std::string const& topic_name, int partition, int64_t start_offset, @@ -182,11 +181,8 @@ class kafka_consumer : public cudf::io::datasource { std::unique_ptr kafka_conf; // RDKafka configuration object std::unique_ptr consumer; - // Configurations that can be Python callables. Anything else is expected to be a str - const std::vector callableConfigs{"oauth_cb"}; - - // The Python configuration dict that was used to create this instance - PyObject* conf_dict; + std::map configs; + std::map callbacks; std::string topic_name; int partition; @@ -203,8 +199,6 @@ class kafka_consumer : public cudf::io::datasource { int partition, int64_t offset); - void build_validate_configs(PyObject* python_config_dict); - /** * Convenience method for getting "now()" in Kafka's standard format */ diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index 6c738da5303..c2ea63ac232 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -20,40 +20,18 @@ namespace io { namespace external { namespace kafka { -PythonOAuthRefreshCb::PythonOAuthRefreshCb(PyObject* callback, PyObject* args) - : callback(callback), args(args){}; +PythonOAuthRefreshCb::PythonOAuthRefreshCb(void* callback) : callback(callback){}; void PythonOAuthRefreshCb::oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config) { - CUDF_EXPECTS(PyCallable_Check(callback), "A Python callable is required"); + printf("oauthbearer_token_refresh_cb... I want this called so bad!!!\n"); - // Make sure that we own the GIL - PyGILState_STATE state = PyGILState_Ensure(); - PyObject* result = PyObject_CallObject(callback, args); - Py_XINCREF(result); + // Since I need to get the results of the invoked Python function (PyObject) here + // I don't see how this avoids importing Python dependencies? + PyObject result = callback(); - // Set the token in the Kafka context - if (result) { - CUDF_EXPECTS(PyDict_Check(result), - "cudf_kafka requires a Dictionary response from the Python OAuthRefreshCb with " - "dictionary keys (token, token_lifetime_ms, principal, extensions)"); - - // Ensure that expected keys are present from the Python callback response. - std::string token = PyUnicode_AsUTF8(PyDict_GetItemString(result, "token")); - int64_t token_lifetime_ms = - PyLong_AsLongLong(PyDict_GetItemString(result, "token_lifetime_ms")); - std::string principal = PyUnicode_AsUTF8(PyDict_GetItemString(result, "principal")); - std::list extensions; - std::string errstr; - - handle->oauthbearer_set_token(token, token_lifetime_ms, principal, extensions, errstr); - } else { - handle->oauthbearer_set_token_failure(""); - } - - Py_XDECREF(result); - PyGILState_Release(state); + // Need to get 3 dict elements and set them here .... } } // namespace kafka diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index dad0b809c30..29d197a5c3e 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -23,33 +23,70 @@ namespace io { namespace external { namespace kafka { -kafka_consumer::kafka_consumer(PyObject* confdict) - : conf_dict(confdict), kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) +kafka_consumer::kafka_consumer(std::map configs, + std::map callbacks) + : configs(configs), + callbacks(callbacks), + kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) { - build_validate_configs(confdict); + for (auto const& key_value : configs) { + std::string error_string; + CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == + kafka_conf->set(key_value.first, key_value.second, error_string), + "Invalid Kafka configuration"); + } + + // TODO: Just for testing ... want to make sure this works + std::string error_string; + PythonOAuthRefreshCb cb(callbacks.find("oauth_cb")->second); + kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); + + // Kafka 0.9 > requires group.id in the configuration + std::string conf_val; + CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), + "Kafka group.id must be configured"); std::string errstr; consumer = std::unique_ptr( RdKafka::KafkaConsumer::create(kafka_conf.get(), errstr)); } -kafka_consumer::kafka_consumer(PyObject* confdict, +kafka_consumer::kafka_consumer(std::map configs, + std::map callbacks, std::string const& topic_name, int partition, int64_t start_offset, int64_t end_offset, int batch_timeout, std::string const& delimiter) - : topic_name(topic_name), + : configs(configs), + callbacks(callbacks), + topic_name(topic_name), partition(partition), start_offset(start_offset), end_offset(end_offset), batch_timeout(batch_timeout), delimiter(delimiter), - conf_dict(confdict), kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) { - build_validate_configs(confdict); + for (auto const& key_value : configs) { + std::string error_string; + CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == + kafka_conf->set(key_value.first, key_value.second, error_string), + "Invalid Kafka configuration"); + } + + // TODO: Just for testing ... want to make sure this works + std::string error_string; + PythonOAuthRefreshCb cb(callbacks.find("oauth_cb")->second); + CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == + kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string), + "Failed to set Kafka oauth callback"); + + // Kafka 0.9 > requires group.id in the configuration + std::string conf_val; + CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), + "Kafka group.id must be configured"); std::string errstr; consumer = std::unique_ptr( @@ -60,42 +97,6 @@ kafka_consumer::kafka_consumer(PyObject* confdict, consume_to_buffer(); } -/** - * @brief Builds and validates Kafka C++ configuration object from Python values - * - * @param kafka_configs - * Python Dict of configuration values and possibly callables for callbacks - */ -void kafka_consumer::build_validate_configs(PyObject* python_config_dict) -{ - Py_ssize_t pos = 0; - PyObject *ko, *vo; - - while (PyDict_Next(python_config_dict, &pos, &ko, &vo)) { - CUDF_EXPECTS(PyUnicode_Check(ko), "expected kafka configuration property name as type string"); - std::string key(PyUnicode_AsUTF8(ko)); - std::string valueType(Py_TYPE(vo)->tp_name); - - std::string error_string; - if (std::find(callableConfigs.begin(), callableConfigs.end(), key) != callableConfigs.end()) { - // Properly configure the callable. This is a Python callback for oauth processing - PythonOAuthRefreshCb cb(vo, NULL); - kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); - } else { - CUDF_EXPECTS(valueType.compare("str") == 0, - "Only string values are supported for this configuration"); - CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == - kafka_conf->set(key, PyUnicode_AsUTF8(vo), error_string), - "Invalid Kafka configuration provided"); - } - } - - // Kafka 0.9 > requires group.id in the configuration - std::string conf_val; - CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->get("group.id", conf_val), - "Kafka group.id must be configured"); -} - std::unique_ptr kafka_consumer::host_read(size_t offset, size_t size) { if (offset > buffer.size()) { return 0; } diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index 609ded9df2d..f0c2664cd96 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -14,9 +14,6 @@ # limitations under the License. #============================================================================= -# Locate Python Development headers -find_package(Python3 REQUIRED COMPONENTS Interpreter Development) - ################################################################################################### # - compiler function ----------------------------------------------------------------------------- @@ -24,7 +21,7 @@ function(ConfigureTest test_name ) add_executable(${test_name} ${ARGN}) set_target_properties(${test_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") - target_link_libraries(${test_name} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka Python3::Python) + target_link_libraries(${test_name} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) add_test(NAME ${test_name} COMMAND ${test_name}) endfunction() diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd index 495680b95a0..773f4017d55 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd @@ -16,9 +16,11 @@ cdef extern from "kafka_consumer.hpp" \ cpdef cppclass kafka_consumer: - kafka_consumer(object configs) except + + kafka_consumer(map[string, string] configs, + map[string, void] callbacks) except + - kafka_consumer(object configs, + kafka_consumer(map[string, string] configs, + map[string, void] callbacks, string topic_name, int32_t partition, int64_t start_offset, diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index c78777346f6..51c60999917 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -22,9 +22,22 @@ cdef class KafkaDatasource(Datasource): int64_t end_offset=0, int32_t batch_timeout=10000, string delimiter=b"",): + + # Convert Python-confluent-kafka configuration dict + # to map[string, string] and map[string, void] for callbacks + cdef map[string, string] configs + cdef map[string, void*] callbacks + + for key in kafka_configs: + if callable(kafka_configs[key]): + callbacks[key.encode()] = kafka_configs[key] + else: + configs[key.encode()] = kafka_configs[key].encode() + if topic != b"" and partition != -1: self.c_datasource = \ - make_unique[kafka_consumer]( kafka_configs, + make_unique[kafka_consumer](configs, + callbacks, topic, partition, start_offset, @@ -33,7 +46,7 @@ cdef class KafkaDatasource(Datasource): delimiter) else: self.c_datasource = \ - make_unique[kafka_consumer]( kafka_configs) + make_unique[kafka_consumer](configs, callbacks) cdef datasource* get_datasource(self) nogil: return self.c_datasource.get() From 2499a7835fc2281f2bb087d19975678a350c9640 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 4 Nov 2021 11:32:54 -0400 Subject: [PATCH 14/70] modified to use std::function --- ci/cpu/build.sh | 2 +- ci/gpu/build.sh | 5 ++- ci/local/build.sh | 2 -- .../cpp/build.sh | 10 ------ cpp/libcudf_kafka/CMakeLists.txt | 5 ++- .../include/cudf_kafka/kafka_callback.hpp | 27 +++++++++++++-- .../include/cudf_kafka/kafka_consumer.hpp | 18 +++++++--- cpp/libcudf_kafka/src/kafka_callback.cpp | 33 +++++++++++++++---- cpp/libcudf_kafka/src/kafka_consumer.cpp | 12 +++---- cpp/libcudf_kafka/tests/CMakeLists.txt | 5 ++- python/cudf_kafka/cudf_kafka/_lib/kafka.pxd | 9 +++-- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 20 +++++++---- 12 files changed, 103 insertions(+), 45 deletions(-) delete mode 100755 ci/local/build_rapidsai_21.12-cuda11.0-devel-ubuntu18.04-py3.7/cpp/build.sh diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 2e10386bc4b..00dffa57683 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -58,7 +58,7 @@ conda config --show-sources conda list --show-channel-urls # FIX Added to deal with Anancoda SSL verification issues during conda builds -#conda config --set ssl_verify False +conda config --set ssl_verify False ################################################################################ # BUILD - Conda package builds diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index bcc6c444c98..87205fa5cf4 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -3,7 +3,6 @@ ############################################## # cuDF GPU build and test script for CI # ############################################## -set -x set -e NUMARGS=$# ARGS=$* @@ -66,7 +65,7 @@ gpuci_logger "Check environment variables" env gpuci_logger "Check GPU usage" -#nvidia-smi +nvidia-smi gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh @@ -88,7 +87,7 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ #gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y "librdkafka=1.7.0" +#gpuci_mamba_retry install -y "your-pkg=1.0.0" gpuci_logger "Check compiler versions" diff --git a/ci/local/build.sh b/ci/local/build.sh index 1d5c97da879..1bfb8b63fef 100755 --- a/ci/local/build.sh +++ b/ci/local/build.sh @@ -1,7 +1,5 @@ #!/bin/bash -set -x - GIT_DESCRIBE_TAG=`git describe --tags` MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` diff --git a/ci/local/build_rapidsai_21.12-cuda11.0-devel-ubuntu18.04-py3.7/cpp/build.sh b/ci/local/build_rapidsai_21.12-cuda11.0-devel-ubuntu18.04-py3.7/cpp/build.sh deleted file mode 100755 index 11d1d195024..00000000000 --- a/ci/local/build_rapidsai_21.12-cuda11.0-devel-ubuntu18.04-py3.7/cpp/build.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -set -e -WORKSPACE=/rapids/local -PREBUILD_SCRIPT=/rapids/local/ci/gpu/prebuild.sh -BUILD_SCRIPT=/rapids/local/ci/gpu/build.sh -if [ -f ${PREBUILD_SCRIPT} ]; then - source ${PREBUILD_SCRIPT} -fi -yes | source ${BUILD_SCRIPT} - diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index fbc55c0e5f2..81c81903085 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -44,6 +44,9 @@ rapids_cpm_init() include(cmake/thirdparty/get_cudf.cmake) include(cmake/thirdparty/get_rdkafka.cmake) +# Locate Python Development headers +find_package(Python3 REQUIRED COMPONENTS Interpreter Development) + # # GTests if enabled if (BUILD_TESTS) # GoogleTest @@ -69,7 +72,7 @@ target_include_directories(cudf_kafka ################################################################################################### # - library paths --------------------------------------------------------------------------------- -target_link_libraries(cudf_kafka PUBLIC cudf::cudf RDKAFKA::RDKAFKA) +target_link_libraries(cudf_kafka PUBLIC cudf::cudf RDKAFKA::RDKAFKA Python3::Python) set_target_properties(cudf_kafka PROPERTIES BUILD_RPATH "\$ORIGIN" diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index 9ac8173ba9e..64a3684db04 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -15,6 +15,9 @@ */ #pragma once +#define PY_SSIZE_T_CLEAN +#include + #include #include #include @@ -26,18 +29,38 @@ namespace io { namespace external { namespace kafka { +/** + * @brief Callback function type used for Kafka OAuth events + * + * The KafkaConsumer calls the `kafka_oauth_callback_t` when the existing + * oauth token is considered expired by the KafkaConsumer. Typically that + * means this will be invoked a single time when the KafkaConsumer is created + * to get the initial token and then intermediately as the token becomes + * expired. + * + * The callback function signature is: + * `PyObject* kafka_oauth_callback_t()` + * + * The callback function returns a PyObject, Python Tuple, + * where the Tuple consists of the Oauth token and its + * linux epoch expiration time. Generally the token and expiration + * time is retrieved from an external service by the callback. + * Ex: [token, token_expiration_in_epoch] + */ +using kafka_oauth_callback_t = std::function; + /** * @brief Callback to retrieve OAuth token from external source. Invoked when * token refresh is required. */ class PythonOAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { public: - PythonOAuthRefreshCb(void* callback()); + PythonOAuthRefreshCb(kafka_oauth_callback_t cb); void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config); private: - PyObject (*callback)(); + kafka_oauth_callback_t oauth_callback; }; } // namespace kafka diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index 3ccc0ef6e7e..91de2b0bec6 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -15,6 +15,9 @@ */ #pragma once +#define PY_SSIZE_T_CLEAN +#include + #include #include #include @@ -49,9 +52,12 @@ class kafka_consumer : public cudf::io::datasource { * * @param configs key/value pairs of librdkafka configurations that will be * passed to the librdkafka client + * @param oauth_callback `kafka_oauth_callback_t` Python callable provided by the + * user to this constructor. This callback is responsible for both + * retrieving the initial OAuth token and refreshing the OAuth + * token when it expires */ - kafka_consumer(std::map configs, - std::map callbacks); + kafka_consumer(std::map configs, kafka_oauth_callback_t oauth_callback); /** * @brief Instantiate a Kafka consumer object. Documentation for librdkafka configurations can be @@ -59,6 +65,10 @@ class kafka_consumer : public cudf::io::datasource { * * @param configs key/value pairs of librdkafka configurations that will be * passed to the librdkafka client + * @param oauth_callback `kafka_oauth_callback_t` Python callable provided by the + * user to this constructor. This callback is responsible for both + * retrieving the initial OAuth token and refreshing the OAuth + * token when it expires * @param topic_name name of the Kafka topic to consume from * @param partition partition index to consume from between `0` and `TOPIC_NUM_PARTITIONS - 1` * inclusive @@ -69,7 +79,7 @@ class kafka_consumer : public cudf::io::datasource { * @param delimiter optional delimiter to insert into the output between kafka messages, Ex: "\n" */ kafka_consumer(std::map configs, - std::map callbacks, + kafka_oauth_callback_t oauth_callback, std::string const& topic_name, int partition, int64_t start_offset, @@ -182,7 +192,7 @@ class kafka_consumer : public cudf::io::datasource { std::unique_ptr consumer; std::map configs; - std::map callbacks; + kafka_oauth_callback_t oauth_callback; std::string topic_name; int partition; diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index c2ea63ac232..b5043d9c369 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -14,24 +14,45 @@ * limitations under the License. */ #include "cudf_kafka/kafka_callback.hpp" +#include namespace cudf { namespace io { namespace external { namespace kafka { -PythonOAuthRefreshCb::PythonOAuthRefreshCb(void* callback) : callback(callback){}; +PythonOAuthRefreshCb::PythonOAuthRefreshCb(kafka_oauth_callback_t cb) : oauth_callback(cb){}; void PythonOAuthRefreshCb::oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config) { - printf("oauthbearer_token_refresh_cb... I want this called so bad!!!\n"); + // Make sure that we own the GIL + PyGILState_STATE state = PyGILState_Ensure(); + PyObject* result = oauth_callback(); + Py_XINCREF(result); - // Since I need to get the results of the invoked Python function (PyObject) here - // I don't see how this avoids importing Python dependencies? - PyObject result = callback(); + // Set the token in the Kafka context + if (result) { + CUDF_EXPECTS(PyTuple_Check(result) && PyTuple_Size(result) == 2, + "cudf_kafka requires a Tuple response with 2 entries from the " + "PythonOAuthRefreshCb containing [token, token_expiration_ms_in_epoch"); - // Need to get 3 dict elements and set them here .... + // Ensure that expected keys are present from the Python callback response. + std::string token = PyUnicode_AsUTF8(PyTuple_GetItem(result, 0)); + int64_t token_lifetime_ms = PyLong_AsLongLong(PyTuple_GetItem(result, 1)); + std::list extensions; // currently not supported + std::string errstr; + + CUDF_EXPECTS( + RdKafka::ErrorCode::ERR_NO_ERROR == + handle->oauthbearer_set_token(token, token_lifetime_ms, "kafka", extensions, errstr), + "Error occurred while setting the oauthbearer token"); + } else { + handle->oauthbearer_set_token_failure("Unable to acquire oauth bearer token"); + } + + Py_XDECREF(result); + PyGILState_Release(state); } } // namespace kafka diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 29d197a5c3e..449dad8c2e4 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -24,9 +24,9 @@ namespace external { namespace kafka { kafka_consumer::kafka_consumer(std::map configs, - std::map callbacks) + kafka_oauth_callback_t oauth_callback) : configs(configs), - callbacks(callbacks), + oauth_callback(oauth_callback), kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) { for (auto const& key_value : configs) { @@ -38,7 +38,7 @@ kafka_consumer::kafka_consumer(std::map configs, // TODO: Just for testing ... want to make sure this works std::string error_string; - PythonOAuthRefreshCb cb(callbacks.find("oauth_cb")->second); + PythonOAuthRefreshCb cb(oauth_callback); kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); // Kafka 0.9 > requires group.id in the configuration @@ -52,7 +52,7 @@ kafka_consumer::kafka_consumer(std::map configs, } kafka_consumer::kafka_consumer(std::map configs, - std::map callbacks, + kafka_oauth_callback_t oauth_callback, std::string const& topic_name, int partition, int64_t start_offset, @@ -60,7 +60,7 @@ kafka_consumer::kafka_consumer(std::map configs, int batch_timeout, std::string const& delimiter) : configs(configs), - callbacks(callbacks), + oauth_callback(oauth_callback), topic_name(topic_name), partition(partition), start_offset(start_offset), @@ -78,7 +78,7 @@ kafka_consumer::kafka_consumer(std::map configs, // TODO: Just for testing ... want to make sure this works std::string error_string; - PythonOAuthRefreshCb cb(callbacks.find("oauth_cb")->second); + PythonOAuthRefreshCb cb(oauth_callback); CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string), "Failed to set Kafka oauth callback"); diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index f0c2664cd96..609ded9df2d 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -14,6 +14,9 @@ # limitations under the License. #============================================================================= +# Locate Python Development headers +find_package(Python3 REQUIRED COMPONENTS Interpreter Development) + ################################################################################################### # - compiler function ----------------------------------------------------------------------------- @@ -21,7 +24,7 @@ function(ConfigureTest test_name ) add_executable(${test_name} ${ARGN}) set_target_properties(${test_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$") - target_link_libraries(${test_name} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) + target_link_libraries(${test_name} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka Python3::Python) add_test(NAME ${test_name} COMMAND ${test_name}) endfunction() diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd index 773f4017d55..0d2696da46f 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd @@ -11,16 +11,21 @@ from cudf._lib.cpp.io.types cimport datasource from cudf._lib.io.datasource cimport Datasource +cdef extern from "kafka_callback.hpp" \ + namespace "cudf::io::external::kafka" nogil: + ctypedef object (*kafka_oauth_callback_t)() + + cdef extern from "kafka_consumer.hpp" \ namespace "cudf::io::external::kafka" nogil: cpdef cppclass kafka_consumer: kafka_consumer(map[string, string] configs, - map[string, void] callbacks) except + + kafka_oauth_callback_t oauth_callback) except + kafka_consumer(map[string, string] configs, - map[string, void] callbacks, + kafka_oauth_callback_t oauth_callback, string topic_name, int32_t partition, int64_t start_offset, diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index 51c60999917..0ad5382c6fd 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -23,21 +23,27 @@ cdef class KafkaDatasource(Datasource): int32_t batch_timeout=10000, string delimiter=b"",): - # Convert Python-confluent-kafka configuration dict - # to map[string, string] and map[string, void] for callbacks cdef map[string, string] configs - cdef map[string, void*] callbacks + cdef kafka_oauth_callback_t cb for key in kafka_configs: - if callable(kafka_configs[key]): - callbacks[key.encode()] = kafka_configs[key] + if key == 'oauth_cb': + if callable(kafka_configs[key]): + # kafka_oauth_callback_t in C++ is + # std::function + # Takes no params and returns PyObject + # cb = kafka_configs[key] + print("^^^ is erroring...") + else: + raise TypeError("'oauth_cb' configuration must \ + be a Python callable object") else: configs[key.encode()] = kafka_configs[key].encode() if topic != b"" and partition != -1: self.c_datasource = \ make_unique[kafka_consumer](configs, - callbacks, + cb, topic, partition, start_offset, @@ -46,7 +52,7 @@ cdef class KafkaDatasource(Datasource): delimiter) else: self.c_datasource = \ - make_unique[kafka_consumer](configs, callbacks) + make_unique[kafka_consumer](configs, cb) cdef datasource* get_datasource(self) nogil: return self.c_datasource.get() From ec2cab72be247ef0bbfd24f48a8392fce93cd123 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 4 Nov 2021 11:34:22 -0400 Subject: [PATCH 15/70] added back whitespace that was removed by accident --- ci/gpu/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 87205fa5cf4..e2e95c34650 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -86,8 +86,8 @@ gpuci_mamba_retry install -y \ "ucx-py=0.23.*" # https://docs.rapids.ai/maintainers/depmgmt/ -#gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -#gpuci_mamba_retry install -y "your-pkg=1.0.0" +# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env +# gpuci_mamba_retry install -y "your-pkg=1.0.0" gpuci_logger "Check compiler versions" From 4672922129b1efd4ea1d57f7b11bb794161669f5 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Mon, 8 Nov 2021 10:07:27 -0500 Subject: [PATCH 16/70] temporarily update librdkakfa for CI testing --- ci/gpu/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index e2e95c34650..962b0d36e3a 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -86,8 +86,8 @@ gpuci_mamba_retry install -y \ "ucx-py=0.23.*" # https://docs.rapids.ai/maintainers/depmgmt/ -# gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env -# gpuci_mamba_retry install -y "your-pkg=1.0.0" +gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env +gpuci_mamba_retry install -y "librdkafka=1.7.0" gpuci_logger "Check compiler versions" From 6a6e629b3f1e37942806e1b682ffc7a3e7f952c0 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 8 Nov 2021 16:27:19 -0500 Subject: [PATCH 17/70] updates per review --- cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp | 6 +++--- cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp | 4 ++-- cpp/libcudf_kafka/src/kafka_callback.cpp | 4 ++-- cpp/libcudf_kafka/src/kafka_consumer.cpp | 4 ++-- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 9 +++------ 5 files changed, 12 insertions(+), 15 deletions(-) diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index 64a3684db04..e6b2a83a1ef 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -39,7 +39,7 @@ namespace kafka { * expired. * * The callback function signature is: - * `PyObject* kafka_oauth_callback_t()` + * `PyObject* kafka_oauth_callback_t() * * The callback function returns a PyObject, Python Tuple, * where the Tuple consists of the Oauth token and its @@ -47,7 +47,7 @@ namespace kafka { * time is retrieved from an external service by the callback. * Ex: [token, token_expiration_in_epoch] */ -using kafka_oauth_callback_t = std::function; +using kafka_oauth_callback_t = PyObject* (*)(); /** * @brief Callback to retrieve OAuth token from external source. Invoked when @@ -60,7 +60,7 @@ class PythonOAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config); private: - kafka_oauth_callback_t oauth_callback; + kafka_oauth_callback_t oauth_callback_; }; } // namespace kafka diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index 91de2b0bec6..991d27d2bdd 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -55,7 +55,7 @@ class kafka_consumer : public cudf::io::datasource { * @param oauth_callback `kafka_oauth_callback_t` Python callable provided by the * user to this constructor. This callback is responsible for both * retrieving the initial OAuth token and refreshing the OAuth - * token when it expires + * token when it expireskafka_oauth_callback_t */ kafka_consumer(std::map configs, kafka_oauth_callback_t oauth_callback); @@ -192,7 +192,7 @@ class kafka_consumer : public cudf::io::datasource { std::unique_ptr consumer; std::map configs; - kafka_oauth_callback_t oauth_callback; + kafka_oauth_callback_t oauth_callback_; std::string topic_name; int partition; diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index b5043d9c369..f4e58f0387f 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -21,14 +21,14 @@ namespace io { namespace external { namespace kafka { -PythonOAuthRefreshCb::PythonOAuthRefreshCb(kafka_oauth_callback_t cb) : oauth_callback(cb){}; +PythonOAuthRefreshCb::PythonOAuthRefreshCb(kafka_oauth_callback_t cb) : oauth_callback_(cb){}; void PythonOAuthRefreshCb::oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config) { // Make sure that we own the GIL PyGILState_STATE state = PyGILState_Ensure(); - PyObject* result = oauth_callback(); + PyObject* result = oauth_callback_(); Py_XINCREF(result); // Set the token in the Kafka context diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 449dad8c2e4..2b12eb8f6c9 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -26,7 +26,7 @@ namespace kafka { kafka_consumer::kafka_consumer(std::map configs, kafka_oauth_callback_t oauth_callback) : configs(configs), - oauth_callback(oauth_callback), + oauth_callback_(oauth_callback), kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) { for (auto const& key_value : configs) { @@ -60,7 +60,7 @@ kafka_consumer::kafka_consumer(std::map configs, int batch_timeout, std::string const& delimiter) : configs(configs), - oauth_callback(oauth_callback), + oauth_callback_(oauth_callback), topic_name(topic_name), partition(partition), start_offset(start_offset), diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index 0ad5382c6fd..d5acb641ed7 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -1,6 +1,7 @@ # Copyright (c) 2020, NVIDIA CORPORATION. cimport cpython +from cpython.ref cimport PyObject from libc.stdint cimport int32_t, int64_t from libcpp cimport bool from libcpp.map cimport map @@ -24,16 +25,12 @@ cdef class KafkaDatasource(Datasource): string delimiter=b"",): cdef map[string, string] configs - cdef kafka_oauth_callback_t cb + cdef PyObject* (*cb)() for key in kafka_configs: if key == 'oauth_cb': if callable(kafka_configs[key]): - # kafka_oauth_callback_t in C++ is - # std::function - # Takes no params and returns PyObject - # cb = kafka_configs[key] - print("^^^ is erroring...") + print("oauth_cb type: " + str(type(kafka_configs[key]))) else: raise TypeError("'oauth_cb' configuration must \ be a Python callable object") From 981d44d74b85e2c88803a3386f6145a6b864d612 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 9 Nov 2021 12:48:25 -0500 Subject: [PATCH 18/70] test fixes --- .../tests/kafka_consumer_tests.cpp | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp index 70652261e56..f584ed44292 100644 --- a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp +++ b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp @@ -34,22 +34,30 @@ struct KafkaDatasourceTest : public ::testing::Test { TEST_F(KafkaDatasourceTest, MissingGroupID) { // group.id is a required configuration. - PyObject* kafka_configs = Py_BuildValue("{s:s}", "bootstrap.servers", "localhost:9092"); + std::map kafka_configs; + kafka_configs["bootstrap.servers"] = "localhost:9092"; - EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, "csv-topic", 0, 0, 3, 5000, "\n"), + kafka::kafka_oauth_callback_t callback; + + EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, callback, "csv-topic", 0, 0, 3, 5000, "\n"), cudf::logic_error); } TEST_F(KafkaDatasourceTest, InvalidConfigValues) { // Give a made up configuration value - PyObject* kafka_configs = Py_BuildValue("{s:s}", "completely_made_up_config", "wrong"); + std::map kafka_configs; + kafka_configs["completely_made_up_config"] = "wrong"; + + kafka::kafka_oauth_callback_t callback; - EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, "csv-topic", 0, 0, 3, 5000, "\n"), + EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, callback, "csv-topic", 0, 0, 3, 5000, "\n"), cudf::logic_error); // Give a good config property with a bad value - kafka_configs = Py_BuildValue("{s:s}", "message.max.bytes", "his should be a number not text"); - EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, "csv-topic", 0, 0, 3, 5000, "\n"), + kafka_configs.clear(); + kafka_configs["message.max.bytes"] = "this should be a number not text"; + + EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, callback, "csv-topic", 0, 0, 3, 5000, "\n"), cudf::logic_error); } From 269d962da31cf8b127b0f1e93f65c586487900ac Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 9 Nov 2021 16:45:06 -0500 Subject: [PATCH 19/70] removed python dependency from tests as libcudf brings it in --- cpp/libcudf_kafka/tests/CMakeLists.txt | 7 +------ cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp | 3 --- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index 2f3846a2fbf..171682fd682 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -12,9 +12,6 @@ # the License. # ============================================================================= -# Locate Python Development headers -find_package(Python3 REQUIRED COMPONENTS Interpreter Development) - # ################################################################################################## # * compiler function ----------------------------------------------------------------------------- @@ -26,9 +23,7 @@ function(ConfigureTest test_name) ${test_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" ) - target_link_libraries( - ${test_name} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka Python3::Python - ) + target_link_libraries(${test_name} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) add_test(NAME ${test_name} COMMAND ${test_name}) endfunction() diff --git a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp index f584ed44292..871b9125481 100644 --- a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp +++ b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp @@ -14,9 +14,6 @@ * limitations under the License. */ -#define PY_SSIZE_T_CLEAN -#include - #include #include #include From 20ecb3fa510682df0bbf5e3a1f3f93b87d89c927 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 10 Nov 2021 15:48:04 -0500 Subject: [PATCH 20/70] update custreamz read_gdf() to check type and handle appropriately --- python/custreamz/custreamz/kafka.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/custreamz/custreamz/kafka.py b/python/custreamz/custreamz/kafka.py index a5bfc1c506b..891bc1af4e2 100644 --- a/python/custreamz/custreamz/kafka.py +++ b/python/custreamz/custreamz/kafka.py @@ -167,7 +167,10 @@ def read_gdf( kafka_datasource.close(batch_timeout) if result is not None: - return cudf.DataFrame._from_table(result) + if isinstance(result, cudf.DataFrame): + return result + else: + return cudf.DataFrame._from_data(result) else: # empty Dataframe return cudf.DataFrame() From ac77f2ee45c9b1cdf04645e4dd2804e79bfa1f69 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 11 Nov 2021 14:26:55 -0500 Subject: [PATCH 21/70] modify cmake --- ci/gpu/build.sh | 2 +- cpp/libcudf_kafka/CMakeLists.txt | 2 +- cpp/libcudf_kafka/tests/CMakeLists.txt | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 962b0d36e3a..ca6f5e27ac5 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -65,7 +65,7 @@ gpuci_logger "Check environment variables" env gpuci_logger "Check GPU usage" -nvidia-smi +#nvidia-smi gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index fda6d86e9a8..d67b9cdd909 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) -file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-21.10/RAPIDS.cmake +file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-21.12/RAPIDS.cmake ${CMAKE_BINARY_DIR}/RAPIDS.cmake ) include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index 171682fd682..e1219f8cf61 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -23,7 +23,9 @@ function(ConfigureTest test_name) ${test_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$" ) - target_link_libraries(${test_name} PRIVATE GTest::gmock_main GTest::gtest_main cudf_kafka) + target_link_libraries( + ${test_name} PRIVATE GTest::gmock GTest::gmock_main GTest::gtest_main cudf_kafka + ) add_test(NAME ${test_name} COMMAND ${test_name}) endfunction() From 3c525a15a11857a33fc5861dabecc2c2cb821a85 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 11 Nov 2021 16:07:35 -0500 Subject: [PATCH 22/70] updated conda recipe to include python which is needed for the python development libraries --- conda/recipes/libcudf_kafka/meta.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 0349da0a0ea..5e4aa7aa9e4 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -24,11 +24,13 @@ build: requirements: build: - cmake >=3.20.1 + - python host: - libcudf {{version}} - librdkafka >=1.7.0 run: - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not + - python test: commands: From 1d73af2c7b3cabccdef64ce263f00cc1e04402af Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 11 Nov 2021 19:56:06 -0500 Subject: [PATCH 23/70] removing all doubt --- conda/recipes/cudf_kafka/meta.yaml | 2 ++ conda/recipes/custreamz/meta.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index af27d888b46..f1ab2652291 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -25,6 +25,7 @@ build: requirements: build: - cmake >=3.20.1 + - python host: - python - cython >=0.29,<0.30 @@ -35,6 +36,7 @@ requirements: - libcudf_kafka {{ version }} - python-confluent-kafka - cudf {{ version }} + - python test: # [linux64] requires: # [linux64] diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index dc3a17f03ab..f1f589bf727 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -27,6 +27,7 @@ requirements: - python - python-confluent-kafka - cudf_kafka {{ version }} + - cython run: - python - streamz @@ -35,6 +36,7 @@ requirements: - distributed>=2021.09.1 - python-confluent-kafka - cudf_kafka {{ version }} + - cython test: # [linux64] requires: # [linux64] From 3c44432b19b7774cdb788ee18628edcf041393d0 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 12 Nov 2021 11:00:13 -0500 Subject: [PATCH 24/70] add numpy to conda recipe build --- conda/recipes/cudf_kafka/meta.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index f1ab2652291..5e9db533983 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -26,6 +26,7 @@ requirements: build: - cmake >=3.20.1 - python + - numpy host: - python - cython >=0.29,<0.30 @@ -37,6 +38,7 @@ requirements: - python-confluent-kafka - cudf {{ version }} - python + - numpy test: # [linux64] requires: # [linux64] From b5c3ffd96425df5f307d68ef17ea58641eb6d53f Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 12 Nov 2021 16:08:54 -0500 Subject: [PATCH 25/70] updates --- conda/recipes/cudf_kafka/meta.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 5e9db533983..52a863a1c9b 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -27,6 +27,10 @@ requirements: - cmake >=3.20.1 - python - numpy + - cython >=0.29,<0.30 + - setuptools + - libcudf_kafka {{ version }} + - cudatoolkit {{ cuda_version }} host: - python - cython >=0.29,<0.30 @@ -39,6 +43,8 @@ requirements: - cudf {{ version }} - python - numpy + - packaging + - cachetools test: # [linux64] requires: # [linux64] From 16bef4cf764ce1ec0b96698fd30d60275444ef5e Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 12 Nov 2021 17:28:04 -0500 Subject: [PATCH 26/70] hopefully resolve conda errors --- conda/recipes/cudf_kafka/meta.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 52a863a1c9b..3649e07af98 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -27,10 +27,9 @@ requirements: - cmake >=3.20.1 - python - numpy - - cython >=0.29,<0.30 + - cython - setuptools - libcudf_kafka {{ version }} - - cudatoolkit {{ cuda_version }} host: - python - cython >=0.29,<0.30 From fafc831f7cb719abbc9922e37e2e66028139125d Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 19 Nov 2021 13:27:39 -0500 Subject: [PATCH 27/70] review updates --- ci/gpu/build.sh | 2 +- conda/recipes/cudf_kafka/meta.yaml | 11 +---------- conda/recipes/libcudf_kafka/meta.yaml | 2 -- .../include/cudf_kafka/kafka_callback.hpp | 8 ++++---- .../include/cudf_kafka/kafka_consumer.hpp | 14 +++++++++----- cpp/libcudf_kafka/src/kafka_callback.cpp | 8 +++++--- cpp/libcudf_kafka/src/kafka_consumer.cpp | 10 ++++++---- cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp | 4 ++-- 8 files changed, 28 insertions(+), 31 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index ca6f5e27ac5..6103ace53e9 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -86,7 +86,7 @@ gpuci_mamba_retry install -y \ "ucx-py=0.23.*" # https://docs.rapids.ai/maintainers/depmgmt/ -gpuci_mamba_retry remove --force rapids-build-env rapids-notebook-env +gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env gpuci_mamba_retry install -y "librdkafka=1.7.0" diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 3649e07af98..72fa7a0fee6 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -25,11 +25,6 @@ build: requirements: build: - cmake >=3.20.1 - - python - - numpy - - cython - - setuptools - - libcudf_kafka {{ version }} host: - python - cython >=0.29,<0.30 @@ -38,12 +33,8 @@ requirements: - libcudf_kafka {{ version }} run: - libcudf_kafka {{ version }} - - python-confluent-kafka + - python-confluent-kafka=1.7.0 - cudf {{ version }} - - python - - numpy - - packaging - - cachetools test: # [linux64] requires: # [linux64] diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 5e4aa7aa9e4..0349da0a0ea 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -24,13 +24,11 @@ build: requirements: build: - cmake >=3.20.1 - - python host: - libcudf {{version}} - librdkafka >=1.7.0 run: - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not - - python test: commands: diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index e6b2a83a1ef..885ccaf2d68 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -47,20 +47,20 @@ namespace kafka { * time is retrieved from an external service by the callback. * Ex: [token, token_expiration_in_epoch] */ -using kafka_oauth_callback_t = PyObject* (*)(); +using kafka_oauth_callback_type = std::function; /** * @brief Callback to retrieve OAuth token from external source. Invoked when * token refresh is required. */ -class PythonOAuthRefreshCb : public RdKafka::OAuthBearerTokenRefreshCb { +class python_oauth_refresh_callback : public RdKafka::OAuthBearerTokenRefreshCb { public: - PythonOAuthRefreshCb(kafka_oauth_callback_t cb); + python_oauth_refresh_callback(kafka_oauth_callback_type cb); void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config); private: - kafka_oauth_callback_t oauth_callback_; + kafka_oauth_callback_type oauth_callback_; }; } // namespace kafka diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index 991d27d2bdd..096b2495cb1 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -18,14 +18,17 @@ #define PY_SSIZE_T_CLEAN #include +#include "kafka_callback.hpp" + +#include + #include + #include #include -#include #include #include #include -#include "kafka_callback.hpp" namespace cudf { namespace io { @@ -57,7 +60,8 @@ class kafka_consumer : public cudf::io::datasource { * retrieving the initial OAuth token and refreshing the OAuth * token when it expireskafka_oauth_callback_t */ - kafka_consumer(std::map configs, kafka_oauth_callback_t oauth_callback); + kafka_consumer(std::map configs, + kafka_oauth_callback_type oauth_callback); /** * @brief Instantiate a Kafka consumer object. Documentation for librdkafka configurations can be @@ -79,7 +83,7 @@ class kafka_consumer : public cudf::io::datasource { * @param delimiter optional delimiter to insert into the output between kafka messages, Ex: "\n" */ kafka_consumer(std::map configs, - kafka_oauth_callback_t oauth_callback, + kafka_oauth_callback_type oauth_callback, std::string const& topic_name, int partition, int64_t start_offset, @@ -192,7 +196,7 @@ class kafka_consumer : public cudf::io::datasource { std::unique_ptr consumer; std::map configs; - kafka_oauth_callback_t oauth_callback_; + kafka_oauth_callback_type oauth_callback_; std::string topic_name; int partition; diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index f4e58f0387f..bd266b4ef9f 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ #include "cudf_kafka/kafka_callback.hpp" + #include namespace cudf { @@ -21,10 +22,11 @@ namespace io { namespace external { namespace kafka { -PythonOAuthRefreshCb::PythonOAuthRefreshCb(kafka_oauth_callback_t cb) : oauth_callback_(cb){}; +python_oauth_refresh_callback::python_oauth_refresh_callback(kafka_oauth_callback_type cb) + : oauth_callback_(cb){}; -void PythonOAuthRefreshCb::oauthbearer_token_refresh_cb(RdKafka::Handle* handle, - const std::string& oauthbearer_config) +void python_oauth_refresh_callback::oauthbearer_token_refresh_cb( + RdKafka::Handle* handle, const std::string& oauthbearer_config) { // Make sure that we own the GIL PyGILState_STATE state = PyGILState_Ensure(); diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 2b12eb8f6c9..7a9ad953e05 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -14,7 +14,9 @@ * limitations under the License. */ #include "cudf_kafka/kafka_consumer.hpp" + #include + #include #include @@ -24,7 +26,7 @@ namespace external { namespace kafka { kafka_consumer::kafka_consumer(std::map configs, - kafka_oauth_callback_t oauth_callback) + kafka_oauth_callback_type oauth_callback) : configs(configs), oauth_callback_(oauth_callback), kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) @@ -38,7 +40,7 @@ kafka_consumer::kafka_consumer(std::map configs, // TODO: Just for testing ... want to make sure this works std::string error_string; - PythonOAuthRefreshCb cb(oauth_callback); + python_oauth_refresh_callback cb(oauth_callback); kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); // Kafka 0.9 > requires group.id in the configuration @@ -52,7 +54,7 @@ kafka_consumer::kafka_consumer(std::map configs, } kafka_consumer::kafka_consumer(std::map configs, - kafka_oauth_callback_t oauth_callback, + kafka_oauth_callback_type oauth_callback, std::string const& topic_name, int partition, int64_t start_offset, @@ -78,7 +80,7 @@ kafka_consumer::kafka_consumer(std::map configs, // TODO: Just for testing ... want to make sure this works std::string error_string; - PythonOAuthRefreshCb cb(oauth_callback); + python_oauth_refresh_callback cb(oauth_callback); CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string), "Failed to set Kafka oauth callback"); diff --git a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp index 871b9125481..cf9fd8882a4 100644 --- a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp +++ b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp @@ -34,7 +34,7 @@ TEST_F(KafkaDatasourceTest, MissingGroupID) std::map kafka_configs; kafka_configs["bootstrap.servers"] = "localhost:9092"; - kafka::kafka_oauth_callback_t callback; + kafka::kafka_oauth_callback_type callback; EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, callback, "csv-topic", 0, 0, 3, 5000, "\n"), cudf::logic_error); @@ -46,7 +46,7 @@ TEST_F(KafkaDatasourceTest, InvalidConfigValues) std::map kafka_configs; kafka_configs["completely_made_up_config"] = "wrong"; - kafka::kafka_oauth_callback_t callback; + kafka::kafka_oauth_callback_type callback; EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, callback, "csv-topic", 0, 0, 3, 5000, "\n"), cudf::logic_error); From 43e1aa9f35fca454451299c4ab850456880259b3 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sat, 20 Nov 2021 14:02:19 -0500 Subject: [PATCH 28/70] add python to conda recipes since that is needed for python-dev now --- conda/recipes/cudf_kafka/meta.yaml | 2 ++ conda/recipes/libcudf_kafka/meta.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 72fa7a0fee6..333527d3614 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -25,6 +25,7 @@ build: requirements: build: - cmake >=3.20.1 + - python host: - python - cython >=0.29,<0.30 @@ -32,6 +33,7 @@ requirements: - cudf {{ version }} - libcudf_kafka {{ version }} run: + - python - libcudf_kafka {{ version }} - python-confluent-kafka=1.7.0 - cudf {{ version }} diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 0349da0a0ea..0a3ad37323c 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -24,9 +24,11 @@ build: requirements: build: - cmake >=3.20.1 + - python host: - libcudf {{version}} - librdkafka >=1.7.0 + - python run: - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not From 099ca277c144e474b17a580f4744f49797e79dd6 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sat, 20 Nov 2021 15:44:19 -0500 Subject: [PATCH 29/70] add numpy to conda recipe for cudf_kafka --- conda/recipes/cudf_kafka/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 333527d3614..6ffb6f7e584 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -26,6 +26,7 @@ requirements: build: - cmake >=3.20.1 - python + - numpy host: - python - cython >=0.29,<0.30 From 1d96eef63dd0c7b6a85451bc2d4aa30384b07377 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sat, 20 Nov 2021 18:40:57 -0500 Subject: [PATCH 30/70] add numpy to conda recipe for cudf_kafka --- conda/recipes/cudf_kafka/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 6ffb6f7e584..c07413ef45f 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -26,7 +26,6 @@ requirements: build: - cmake >=3.20.1 - python - - numpy host: - python - cython >=0.29,<0.30 @@ -35,6 +34,7 @@ requirements: - libcudf_kafka {{ version }} run: - python + - numpy - libcudf_kafka {{ version }} - python-confluent-kafka=1.7.0 - cudf {{ version }} From 5ac47367aa8237a89a4278e892a2cb15a0fe9f98 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sun, 21 Nov 2021 11:00:05 -0500 Subject: [PATCH 31/70] add numpy to conda recipe for cudf_kafka --- conda/recipes/cudf_kafka/meta.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index c07413ef45f..0b9a7f76fd6 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -28,13 +28,12 @@ requirements: - python host: - python + - numpy - cython >=0.29,<0.30 - setuptools - cudf {{ version }} - libcudf_kafka {{ version }} run: - - python - - numpy - libcudf_kafka {{ version }} - python-confluent-kafka=1.7.0 - cudf {{ version }} From 793246856a2a4cde96a2e2b341226641b5961d0d Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sun, 21 Nov 2021 15:00:55 -0500 Subject: [PATCH 32/70] add numpy to conda recipe for cudf_kafka --- conda/recipes/cudf_kafka/meta.yaml | 1 - .../cudf_kafka/cudf_kafka/_lib/.kafka.pxd.swo | Bin 0 -> 12288 bytes python/cudf_kafka/setup.py | 2 -- 3 files changed, 3 deletions(-) create mode 100644 python/cudf_kafka/cudf_kafka/_lib/.kafka.pxd.swo diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 0b9a7f76fd6..31eb6325f4c 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -28,7 +28,6 @@ requirements: - python host: - python - - numpy - cython >=0.29,<0.30 - setuptools - cudf {{ version }} diff --git a/python/cudf_kafka/cudf_kafka/_lib/.kafka.pxd.swo b/python/cudf_kafka/cudf_kafka/_lib/.kafka.pxd.swo new file mode 100644 index 0000000000000000000000000000000000000000..624b60798ae94ece673e4028670bd4d559d7c663 GIT binary patch literal 12288 zcmeI2%ZnUE9LHzI-CdpT zn%N!j5fsFW2mKEc<0D2q=v7Y&9z2?eiYJd=a`6#JB!2r{-TRnbL=dTh5B;cL{i^C) zk6-spH>Xa|&e0?FDS~uAAwS%F!+dbh!z0&^6LPjS;C^_2NOJpD%Q7ccg})L=Q(D5^ zxWKEC!^s)m=eF=1?#fBlYqcl6f$R!5IyY9#_KK;V8n)G(jPBOGwbsUt^oPi5E1(t73TOqi0$KsBfL1^&pcVLEDImQO@)~+{ zN7Ac>1fPS?fDb0XuX_l& z2rhtEz{_9(%!9|kA#md!LVf|?fXm=*;DS@&MKB5~;P%~wTm_fFD(HhMcpUt+8*K*f zfakz2@HKS%3S0yiK&az~-~%uK3>*hz-~hM|t*?Qr;1YNboCk~G5V&zCA%B9);3KdE z&VYm9Tl`4#CU_RifKgBZf8fWO-_sv#wl&8n67yQTP5Z)XQD%#PR|9Uh$EoGY!_$VO z(sH=y$r>Rgl&B?4IF2PHZ*9?%Y)G?C4jZzxhPZb_h9@0| z%=?g$D5?~WraBX&{uDZsbUmD`EmN9-==qF~MZM3jBjsw;6&=e~3M6&4xNSiue9RP8 z7rIJY-83cZ8p&iYWJelvH&u3h9=>(*Wzy6_ZnSLNqYIE z3j90E58E>drDowq&~vSGJ#Ki?U&^*gC2z<#(*?x*jMV(rzZr-y|X{wI@#nX}Qudb(4hMZ4KSJ=H&v% zbFdgtH*QNbVKBL^j$2KXjAPYuEP60bO9InuKm$b&QtD88Sq{TWk<_GgOuJPG&w1C@ zTbH8sP*XK{Z)IGJ<(pb&p&fgR;hGNL)KWCh;t7xnvnRU-Gi|$RvX$IA`qJ580uR{^ z_p9n0Plp7VZL1S(K9@GEVgt>0Tvz!qS;k_ox4IS3QkHYJgsONh$WtQU!U($R( zF59EsVHr8_c#xUr9uoiX92&;+Ju{0hV~>eNGURE!@nyln;g0YJnU6fu6s=4zGr@4H zdD;DNf#w Date: Sun, 21 Nov 2021 17:24:55 -0500 Subject: [PATCH 33/70] add numpy to conda recipe for cudf_kafka --- conda/recipes/cudf_kafka/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 31eb6325f4c..db9c37a3c07 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -26,6 +26,7 @@ requirements: build: - cmake >=3.20.1 - python + - cython >=0.29,<0.30 host: - python - cython >=0.29,<0.30 From ab8f6a0e0336caa61504fdea244b3109c7d44077 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 22 Nov 2021 09:45:22 -0500 Subject: [PATCH 34/70] add numpy to conda recipe for cudf_kafka --- conda/recipes/cudf_kafka/meta.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index db9c37a3c07..5cf46a9ec24 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -27,10 +27,6 @@ requirements: - cmake >=3.20.1 - python - cython >=0.29,<0.30 - host: - - python - - cython >=0.29,<0.30 - - setuptools - cudf {{ version }} - libcudf_kafka {{ version }} run: From 9bbedd477aa958ec07281aae23a890b40bf05f8d Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 22 Nov 2021 11:20:00 -0500 Subject: [PATCH 35/70] Debugging setup.py dependency issues --- conda/recipes/cudf_kafka/meta.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 5cf46a9ec24..cb609a947ac 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -3,7 +3,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version=environ.get('CONDA_PY', 36) %} -{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} +{% set cuda_version='.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %} package: name: cudf_kafka @@ -30,6 +30,7 @@ requirements: - cudf {{ version }} - libcudf_kafka {{ version }} run: + - numpy - libcudf_kafka {{ version }} - python-confluent-kafka=1.7.0 - cudf {{ version }} From b4f92327817d37f8a022da720b9eca798a6375ca Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 23 Nov 2021 10:02:00 -0500 Subject: [PATCH 36/70] Change CUDA version for debugging --- conda/recipes/cudf_kafka/meta.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index cb609a947ac..6da543e9d2b 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -3,7 +3,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set py_version=environ.get('CONDA_PY', 36) %} -{% set cuda_version='.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %} +{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} package: name: cudf_kafka @@ -25,6 +25,7 @@ build: requirements: build: - cmake >=3.20.1 + - numpy - python - cython >=0.29,<0.30 - cudf {{ version }} From d1f4fe5651c06283c419aa697fa02e9da13afe79 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 23 Nov 2021 11:45:11 -0500 Subject: [PATCH 37/70] debug conda build for cudf_kafka --- conda/recipes/cudf_kafka/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 6da543e9d2b..72fa7a0fee6 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -25,13 +25,13 @@ build: requirements: build: - cmake >=3.20.1 - - numpy + host: - python - cython >=0.29,<0.30 + - setuptools - cudf {{ version }} - libcudf_kafka {{ version }} run: - - numpy - libcudf_kafka {{ version }} - python-confluent-kafka=1.7.0 - cudf {{ version }} From 037b38617c11ada122fd916627cffdf5f12491a8 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sat, 27 Nov 2021 09:41:24 -0500 Subject: [PATCH 38/70] debug cudf_kafka conda --- conda/recipes/cudf_kafka/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 72fa7a0fee6..501daafe32a 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -26,9 +26,9 @@ requirements: build: - cmake >=3.20.1 host: + - numpy - python - cython >=0.29,<0.30 - - setuptools - cudf {{ version }} - libcudf_kafka {{ version }} run: From 2c8adced9199a79a7a32905efae897131a5ca276 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 29 Nov 2021 12:29:44 -0500 Subject: [PATCH 39/70] conda debugging --- ci/gpu/build.sh | 8 +++++++- conda/recipes/cudf_kafka/meta.yaml | 3 +-- conda/recipes/custreamz/meta.yaml | 6 ++---- conda/recipes/libcudf_kafka/meta.yaml | 4 +--- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 6103ace53e9..9b0d850f7be 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -65,7 +65,9 @@ gpuci_logger "Check environment variables" env gpuci_logger "Check GPU usage" -#nvidia-smi +set +e +nvidia-smi +set -e gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh @@ -141,7 +143,9 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then exit 0 else gpuci_logger "Check GPU usage" + set +e nvidia-smi + set -e gpuci_logger "GoogleTests" set -x @@ -164,7 +168,9 @@ else fi gpuci_logger "Check GPU usage" + set +e nvidia-smi + set -e gpuci_logger "GoogleTests" set -x diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 501daafe32a..dd78ff03f9b 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -26,14 +26,13 @@ requirements: build: - cmake >=3.20.1 host: - - numpy - python - cython >=0.29,<0.30 - cudf {{ version }} - libcudf_kafka {{ version }} run: - libcudf_kafka {{ version }} - - python-confluent-kafka=1.7.0 + - python-confluent-kafka >=1.7.0,<1.8.0a0 - cudf {{ version }} test: # [linux64] diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index f1f589bf727..db8af9b0bed 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -27,16 +27,14 @@ requirements: - python - python-confluent-kafka - cudf_kafka {{ version }} - - cython run: - python - streamz - cudf {{ version }} - - dask>=2021.09.1 - - distributed>=2021.09.1 + - dask>=2021.11.1,<=2021.11.2 + - distributed>=2021.11.1,<=2021.11.2 - python-confluent-kafka - cudf_kafka {{ version }} - - cython test: # [linux64] requires: # [linux64] diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 0a3ad37323c..d82d50c62e5 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -24,11 +24,9 @@ build: requirements: build: - cmake >=3.20.1 - - python host: - libcudf {{version}} - - librdkafka >=1.7.0 - - python + - librdkafka >=1.7.0,<1.8.0a0 run: - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not From ef5e07262fe7ec10b89c4a2946551539bc93819f Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 29 Nov 2021 14:28:14 -0500 Subject: [PATCH 40/70] Add numpy include directory --- python/cudf_kafka/setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cudf_kafka/setup.py b/python/cudf_kafka/setup.py index 9a241be2b84..824babfa10a 100644 --- a/python/cudf_kafka/setup.py +++ b/python/cudf_kafka/setup.py @@ -4,6 +4,7 @@ import sysconfig from distutils.sysconfig import get_python_lib +import numpy as np from Cython.Build import cythonize from setuptools import find_packages, setup from setuptools.extension import Extension @@ -65,6 +66,7 @@ "rapids/libcudacxx", ), os.path.dirname(sysconfig.get_path("include")), + np.get_include(), cuda_include_dir, ], library_dirs=([get_python_lib(), os.path.join(os.sys.prefix, "lib")]), From 5d57b1ae6f3aca176bb5af129a968d9a6809842e Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 29 Nov 2021 17:36:34 -0500 Subject: [PATCH 41/70] Add Python has host and build requirement since libcudf_kafka uses PyObject --- conda/recipes/libcudf_kafka/meta.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index d82d50c62e5..0ce97640b5d 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -23,8 +23,10 @@ build: requirements: build: + - python - cmake >=3.20.1 host: + - python - libcudf {{version}} - librdkafka >=1.7.0,<1.8.0a0 run: From 0a3ee3ac9131c741aa5f4953e30d1c8574cb2578 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 29 Nov 2021 19:09:40 -0500 Subject: [PATCH 42/70] Updated conda meta.yml files --- conda/recipes/cudf_kafka/meta.yaml | 2 ++ conda/recipes/libcudf_kafka/meta.yaml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index dd78ff03f9b..54ddcccf4d8 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -30,7 +30,9 @@ requirements: - cython >=0.29,<0.30 - cudf {{ version }} - libcudf_kafka {{ version }} + - setuptools run: + - python - libcudf_kafka {{ version }} - python-confluent-kafka >=1.7.0,<1.8.0a0 - cudf {{ version }} diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 0ce97640b5d..46daba5668b 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -23,13 +23,13 @@ build: requirements: build: - - python - cmake >=3.20.1 host: - python - libcudf {{version}} - librdkafka >=1.7.0,<1.8.0a0 run: + - python - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not test: From 97897ada7d8a06204668f8ecc1dc52ff1f2f56c8 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 1 Dec 2021 15:02:34 -0500 Subject: [PATCH 43/70] Testing out a theory about python versions --- conda/recipes/libcudf_kafka/meta.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 46daba5668b..d82d50c62e5 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -25,11 +25,9 @@ requirements: build: - cmake >=3.20.1 host: - - python - libcudf {{version}} - librdkafka >=1.7.0,<1.8.0a0 run: - - python - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not test: From 03475408a259360688fc8e29a00816288cb17258 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Thu, 2 Dec 2021 13:18:17 -0500 Subject: [PATCH 44/70] add cmake function for setting up conda environment --- cpp/libcudf_kafka/CMakeLists.txt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index 6c02060b4cd..a8f2785791c 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -22,13 +22,17 @@ include(rapids-find) project( CUDA_KAFKA - VERSION 21.12.00 + VERSION 22.02.00 LANGUAGES CXX ) # Set a default build type if none was specified rapids_cmake_build_type(Release) +# ################################################################################################## +# * conda environment ----------------------------------------------------------------------------- +rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) + # ################################################################################################## # * Build options option(BUILD_TESTS "Build tests for libcudf_kafka" ON) @@ -71,6 +75,11 @@ target_include_directories( # * library paths --------------------------------------------------------------------------------- target_link_libraries(cudf_kafka PUBLIC cudf::cudf RDKAFKA::RDKAFKA Python3::Python) +# Add Conda library, and include paths if specified +if(TARGET conda_env) + target_link_libraries(cudf_kafka PRIVATE conda_env) +endif() + set_target_properties( cudf_kafka PROPERTIES BUILD_RPATH "\$ORIGIN" INSTALL_RPATH "\$ORIGIN" # set target compile options From e10b1e9a55beadd28172d7852aeebddee8e1454c Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 6 Dec 2021 09:24:23 -0500 Subject: [PATCH 45/70] Disable PROJECT_FLASH test --- ci/cpu/build.sh | 3 ++- ci/cpu/prebuild.sh | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 00dffa57683..3422f22a3d8 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -63,7 +63,8 @@ conda config --set ssl_verify False ################################################################################ # BUILD - Conda package builds ################################################################################ - +echo "!!!! PROJECT_FLASH: ${PROJECT_FLASH}" +PROJECT_FLASH="0" if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then CONDA_BUILD_ARGS="" CONDA_CHANNEL="" diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh index 746c0005a47..17b7cfd8bb0 100755 --- a/ci/cpu/prebuild.sh +++ b/ci/cpu/prebuild.sh @@ -29,8 +29,10 @@ else export UPLOAD_LIBCUDF_KAFKA=0 fi +PROJECT_FLASH="0" if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then #If project flash is not activate, always build both + echo "!!!! PROJECT FLASH !!!!" export BUILD_LIBCUDF=1 export BUILD_CUDF=1 fi From 0047d646b0e9fc9df791bc56b6866b77781466e7 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 6 Dec 2021 11:19:12 -0500 Subject: [PATCH 46/70] test manually specifying python version --- conda/recipes/cudf_kafka/meta.yaml | 6 +++--- conda/recipes/custreamz/meta.yaml | 8 ++++---- conda/recipes/libcudf_kafka/meta.yaml | 2 ++ 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 54ddcccf4d8..35d1453bbaf 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -26,15 +26,15 @@ requirements: build: - cmake >=3.20.1 host: - - python + - python=3.8 - cython >=0.29,<0.30 - cudf {{ version }} - libcudf_kafka {{ version }} - setuptools run: - - python + - python=3.8 - libcudf_kafka {{ version }} - - python-confluent-kafka >=1.7.0,<1.8.0a0 + - python-confluent-kafka >=1.7.0,<1.8.0a0=py38* - cudf {{ version }} test: # [linux64] diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index db8af9b0bed..597a5ae5b70 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -24,16 +24,16 @@ build: requirements: host: - - python - - python-confluent-kafka + - python=3.8 + - python-confluent-kafka >=1.7.0,<1.8.0a0=py38* - cudf_kafka {{ version }} run: - - python + - python=3.8 - streamz - cudf {{ version }} - dask>=2021.11.1,<=2021.11.2 - distributed>=2021.11.1,<=2021.11.2 - - python-confluent-kafka + - python-confluent-kafka >=1.7.0,<1.8.0a0=py38* - cudf_kafka {{ version }} test: # [linux64] diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index d82d50c62e5..34ca3f5cd8f 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -25,9 +25,11 @@ requirements: build: - cmake >=3.20.1 host: + - python=3.8 - libcudf {{version}} - librdkafka >=1.7.0,<1.8.0a0 run: + - python=3.8 - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not test: From 6af1cd1de4009a2deb428baf1bd16b4832f4728a Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 6 Dec 2021 12:41:12 -0500 Subject: [PATCH 47/70] Re-enable PROJECT_FLASH support --- ci/cpu/build.sh | 2 -- ci/cpu/prebuild.sh | 2 -- 2 files changed, 4 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 3422f22a3d8..03238ba84aa 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -63,8 +63,6 @@ conda config --set ssl_verify False ################################################################################ # BUILD - Conda package builds ################################################################################ -echo "!!!! PROJECT_FLASH: ${PROJECT_FLASH}" -PROJECT_FLASH="0" if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then CONDA_BUILD_ARGS="" CONDA_CHANNEL="" diff --git a/ci/cpu/prebuild.sh b/ci/cpu/prebuild.sh index 17b7cfd8bb0..746c0005a47 100755 --- a/ci/cpu/prebuild.sh +++ b/ci/cpu/prebuild.sh @@ -29,10 +29,8 @@ else export UPLOAD_LIBCUDF_KAFKA=0 fi -PROJECT_FLASH="0" if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then #If project flash is not activate, always build both - echo "!!!! PROJECT FLASH !!!!" export BUILD_LIBCUDF=1 export BUILD_CUDF=1 fi From 25a27ef8b4bace06d9ece8a46a295c295e2297b3 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 6 Dec 2021 16:40:21 -0500 Subject: [PATCH 48/70] use environment version of python --- ci/cpu/build.sh | 1 + ci/gpu/build.sh | 6 ------ conda/recipes/cudf_kafka/meta.yaml | 8 ++++---- conda/recipes/custreamz/meta.yaml | 8 ++++---- conda/recipes/libcudf_kafka/meta.yaml | 5 +++-- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 03238ba84aa..00dffa57683 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -63,6 +63,7 @@ conda config --set ssl_verify False ################################################################################ # BUILD - Conda package builds ################################################################################ + if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then CONDA_BUILD_ARGS="" CONDA_CHANNEL="" diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 928f11f7ec3..92036793fb8 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -65,9 +65,7 @@ gpuci_logger "Check environment variables" env gpuci_logger "Check GPU usage" -set +e nvidia-smi -set -e gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh @@ -143,9 +141,7 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then exit 0 else gpuci_logger "Check GPU usage" - set +e nvidia-smi - set -e gpuci_logger "GoogleTests" set -x @@ -168,9 +164,7 @@ else fi gpuci_logger "Check GPU usage" - set +e nvidia-smi - set -e gpuci_logger "GoogleTests" set -x diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 35d1453bbaf..fc3cc1d826c 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -2,8 +2,8 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version=environ.get('CONDA_PY', 36) %} {% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} +{% set py_version=environ.get('PYTHON', 3.8) %} package: name: cudf_kafka @@ -14,7 +14,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version.replace('.', '') }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - CC - CXX @@ -26,13 +26,13 @@ requirements: build: - cmake >=3.20.1 host: - - python=3.8 + - python {{ py_version }} - cython >=0.29,<0.30 - cudf {{ version }} - libcudf_kafka {{ version }} - setuptools run: - - python=3.8 + - python {{ py_version }} - libcudf_kafka {{ version }} - python-confluent-kafka >=1.7.0,<1.8.0a0=py38* - cudf {{ version }} diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 597a5ae5b70..c869a765d08 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -2,8 +2,8 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version=environ.get('CONDA_PY', 36) %} {% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} +{% set py_version=environ.get('PYTHON', 3.8) %} package: name: custreamz @@ -14,7 +14,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version.replace(',', '') }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - VERSION_SUFFIX - PARALLEL_LEVEL @@ -24,11 +24,11 @@ build: requirements: host: - - python=3.8 + - python {{ py_version }} - python-confluent-kafka >=1.7.0,<1.8.0a0=py38* - cudf_kafka {{ version }} run: - - python=3.8 + - python {{ py_version }} - streamz - cudf {{ version }} - dask>=2021.11.1,<=2021.11.2 diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 34ca3f5cd8f..1ed0d40f477 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -2,6 +2,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version=environ.get('PYTHON', 3.8) %} package: name: libcudf_kafka @@ -25,11 +26,11 @@ requirements: build: - cmake >=3.20.1 host: - - python=3.8 + - python {{ py_version }} - libcudf {{version}} - librdkafka >=1.7.0,<1.8.0a0 run: - - python=3.8 + - python {{ py_version }} - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not test: From a867d1b716f37d8c9e187c7698bedbdcf8448de2 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 6 Dec 2021 17:29:48 -0500 Subject: [PATCH 49/70] fix typo --- conda/recipes/custreamz/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index c869a765d08..a476ee470a9 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -14,7 +14,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version.replace(',', '') }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version.replace('.', '') }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - VERSION_SUFFIX - PARALLEL_LEVEL From e5d4f59b4e1b2bd1d5b22948af4716f988aaba7c Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 6 Dec 2021 22:16:50 -0500 Subject: [PATCH 50/70] change version of python --- conda/recipes/libcudf_kafka/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 1ed0d40f477..54c2f710280 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -2,7 +2,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version=environ.get('PYTHON', 3.8) %} +{% set py_version='.'.join([char for char in environ.get('CONDA_PY', 36)]) %} package: name: libcudf_kafka From 2f582bb87182475f7800fe2109f0a0997c480b6a Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 7 Dec 2021 10:24:05 -0500 Subject: [PATCH 51/70] libcudf_kafka is not being passed a conda python environment variable --- conda/recipes/libcudf_kafka/meta.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index 54c2f710280..bac65a42df2 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -2,7 +2,6 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version='.'.join([char for char in environ.get('CONDA_PY', 36)]) %} package: name: libcudf_kafka @@ -26,11 +25,11 @@ requirements: build: - cmake >=3.20.1 host: - - python {{ py_version }} + - python 3.8 - libcudf {{version}} - librdkafka >=1.7.0,<1.8.0a0 run: - - python {{ py_version }} + - python 3.8 - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not test: From 33c9cbb7ea9100d06aeed1cbd163c3a08a468c45 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 7 Dec 2021 11:24:59 -0500 Subject: [PATCH 52/70] use python instead of PYTHON environment variable --- conda/recipes/cudf_kafka/meta.yaml | 4 ++-- conda/recipes/custreamz/meta.yaml | 8 ++++---- conda/recipes/libcudf_kafka/meta.yaml | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index fc3cc1d826c..ad64beec161 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -2,8 +2,8 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} -{% set py_version=environ.get('PYTHON', 3.8) %} +{% set cuda_version = '.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} +{% set py_version = environ.get('python', 3.8) %} package: name: cudf_kafka diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index a476ee470a9..68eb17b132e 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -2,8 +2,8 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} -{% set py_version=environ.get('PYTHON', 3.8) %} +{% set cuda_version = '.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} +{% set py_version = environ.get('python', 3.8) %} package: name: custreamz @@ -25,7 +25,7 @@ build: requirements: host: - python {{ py_version }} - - python-confluent-kafka >=1.7.0,<1.8.0a0=py38* + - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version.replace('.', '') }}* - cudf_kafka {{ version }} run: - python {{ py_version }} @@ -33,7 +33,7 @@ requirements: - cudf {{ version }} - dask>=2021.11.1,<=2021.11.2 - distributed>=2021.11.1,<=2021.11.2 - - python-confluent-kafka >=1.7.0,<1.8.0a0=py38* + - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version.replace('.', '') }}* - cudf_kafka {{ version }} test: # [linux64] diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index bac65a42df2..db10d0b2373 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -2,6 +2,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ.get('python', '3.8') %} package: name: libcudf_kafka @@ -25,11 +26,11 @@ requirements: build: - cmake >=3.20.1 host: - - python 3.8 + - python {{ py_version }} - libcudf {{version}} - librdkafka >=1.7.0,<1.8.0a0 run: - - python 3.8 + - python {{ py_version }} - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not test: From 0cd3fe91cc2f81f79ff1876808eac7ba76f9343b Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 7 Dec 2021 13:48:52 -0500 Subject: [PATCH 53/70] make versions strings instead of floats --- conda/recipes/cudf_kafka/meta.yaml | 2 +- conda/recipes/custreamz/meta.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index ad64beec161..356c0004271 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -3,7 +3,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set cuda_version = '.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} -{% set py_version = environ.get('python', 3.8) %} +{% set py_version = environ.get('python', '3.8') %} package: name: cudf_kafka diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 68eb17b132e..519893b64ab 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -3,7 +3,7 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} {% set cuda_version = '.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} -{% set py_version = environ.get('python', 3.8) %} +{% set py_version = environ.get('python', '3.8') %} package: name: custreamz From 86f02c6f81abb6a4a77715cac7a238023daac440 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 8 Dec 2021 18:41:57 -0500 Subject: [PATCH 54/70] removed references in python in cpp --- conda/recipes/libcudf_kafka/meta.yaml | 3 -- .../include/cudf_kafka/kafka_callback.hpp | 5 +-- .../include/cudf_kafka/kafka_consumer.hpp | 3 -- cpp/libcudf_kafka/src/kafka_callback.cpp | 37 +++++-------------- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 4 +- 5 files changed, 13 insertions(+), 39 deletions(-) diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index db10d0b2373..d82d50c62e5 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -2,7 +2,6 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version = environ.get('python', '3.8') %} package: name: libcudf_kafka @@ -26,11 +25,9 @@ requirements: build: - cmake >=3.20.1 host: - - python {{ py_version }} - libcudf {{version}} - librdkafka >=1.7.0,<1.8.0a0 run: - - python {{ py_version }} - {{ pin_compatible('librdkafka', max_pin='x.x') }} #TODO: librdkafka should be automatically included here by run_exports but is not test: diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index 885ccaf2d68..eb48a89aa90 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -15,9 +15,6 @@ */ #pragma once -#define PY_SSIZE_T_CLEAN -#include - #include #include #include @@ -47,7 +44,7 @@ namespace kafka { * time is retrieved from an external service by the callback. * Ex: [token, token_expiration_in_epoch] */ -using kafka_oauth_callback_type = std::function; +using kafka_oauth_callback_type = void*; /** * @brief Callback to retrieve OAuth token from external source. Invoked when diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index 096b2495cb1..480a6461260 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -15,9 +15,6 @@ */ #pragma once -#define PY_SSIZE_T_CLEAN -#include - #include "kafka_callback.hpp" #include diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index bd266b4ef9f..bc5498ca30d 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -28,33 +28,16 @@ python_oauth_refresh_callback::python_oauth_refresh_callback(kafka_oauth_callbac void python_oauth_refresh_callback::oauthbearer_token_refresh_cb( RdKafka::Handle* handle, const std::string& oauthbearer_config) { - // Make sure that we own the GIL - PyGILState_STATE state = PyGILState_Ensure(); - PyObject* result = oauth_callback_(); - Py_XINCREF(result); - - // Set the token in the Kafka context - if (result) { - CUDF_EXPECTS(PyTuple_Check(result) && PyTuple_Size(result) == 2, - "cudf_kafka requires a Tuple response with 2 entries from the " - "PythonOAuthRefreshCb containing [token, token_expiration_ms_in_epoch"); - - // Ensure that expected keys are present from the Python callback response. - std::string token = PyUnicode_AsUTF8(PyTuple_GetItem(result, 0)); - int64_t token_lifetime_ms = PyLong_AsLongLong(PyTuple_GetItem(result, 1)); - std::list extensions; // currently not supported - std::string errstr; - - CUDF_EXPECTS( - RdKafka::ErrorCode::ERR_NO_ERROR == - handle->oauthbearer_set_token(token, token_lifetime_ms, "kafka", extensions, errstr), - "Error occurred while setting the oauthbearer token"); - } else { - handle->oauthbearer_set_token_failure("Unable to acquire oauth bearer token"); - } - - Py_XDECREF(result); - PyGILState_Release(state); + printf("!!!!Invoking the python_oauth_callback!!!!\n"); + std::string token = ((std::string(*)())oauth_callback_)(); + printf("Response Token: %s\n", token); + int64_t token_lifetime_ms = 100000; + std::list extensions; // currently not supported + std::string errstr; + CUDF_EXPECTS( + RdKafka::ErrorCode::ERR_NO_ERROR == + handle->oauthbearer_set_token(token, token_lifetime_ms, "kafka", extensions, errstr), + "Error occurred while setting the oauthbearer token"); } } // namespace kafka diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index d5acb641ed7..cfffaeb952d 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -25,12 +25,12 @@ cdef class KafkaDatasource(Datasource): string delimiter=b"",): cdef map[string, string] configs - cdef PyObject* (*cb)() + cdef void *cb for key in kafka_configs: if key == 'oauth_cb': if callable(kafka_configs[key]): - print("oauth_cb type: " + str(type(kafka_configs[key]))) + cb = kafka_configs[key] else: raise TypeError("'oauth_cb' configuration must \ be a Python callable object") From c249b5fddfb83181c038b351a0849b16f2599ba7 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Dec 2021 12:24:52 -0500 Subject: [PATCH 55/70] introduce wrapper --- .../include/cudf_kafka/kafka_callback.hpp | 5 ++-- .../include/cudf_kafka/kafka_consumer.hpp | 3 +++ cpp/libcudf_kafka/src/kafka_callback.cpp | 14 +++++++---- cpp/libcudf_kafka/src/kafka_consumer.cpp | 8 ++++-- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 25 ++++++++++++++++--- 5 files changed, 42 insertions(+), 13 deletions(-) diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index eb48a89aa90..a89f68a325a 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -44,7 +44,7 @@ namespace kafka { * time is retrieved from an external service by the callback. * Ex: [token, token_expiration_in_epoch] */ -using kafka_oauth_callback_type = void*; +using kafka_oauth_callback_type = std::map (*)(void*); /** * @brief Callback to retrieve OAuth token from external source. Invoked when @@ -52,12 +52,13 @@ using kafka_oauth_callback_type = void*; */ class python_oauth_refresh_callback : public RdKafka::OAuthBearerTokenRefreshCb { public: - python_oauth_refresh_callback(kafka_oauth_callback_type cb); + python_oauth_refresh_callback(kafka_oauth_callback_type cb, void* python_callable); void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config); private: kafka_oauth_callback_type oauth_callback_; + void* python_callable_; }; } // namespace kafka diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index 480a6461260..b164a54749d 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -58,6 +58,7 @@ class kafka_consumer : public cudf::io::datasource { * token when it expireskafka_oauth_callback_t */ kafka_consumer(std::map configs, + void* python_callable, kafka_oauth_callback_type oauth_callback); /** @@ -80,6 +81,7 @@ class kafka_consumer : public cudf::io::datasource { * @param delimiter optional delimiter to insert into the output between kafka messages, Ex: "\n" */ kafka_consumer(std::map configs, + void* python_callable, kafka_oauth_callback_type oauth_callback, std::string const& topic_name, int partition, @@ -193,6 +195,7 @@ class kafka_consumer : public cudf::io::datasource { std::unique_ptr consumer; std::map configs; + void* python_callable_; kafka_oauth_callback_type oauth_callback_; std::string topic_name; diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index bc5498ca30d..df5edbf1000 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -22,16 +22,20 @@ namespace io { namespace external { namespace kafka { -python_oauth_refresh_callback::python_oauth_refresh_callback(kafka_oauth_callback_type cb) - : oauth_callback_(cb){}; +python_oauth_refresh_callback::python_oauth_refresh_callback(kafka_oauth_callback_type cb, + void* python_callable) + : oauth_callback_(cb), python_callable_(python_callable){}; void python_oauth_refresh_callback::oauthbearer_token_refresh_cb( RdKafka::Handle* handle, const std::string& oauthbearer_config) { printf("!!!!Invoking the python_oauth_callback!!!!\n"); - std::string token = ((std::string(*)())oauth_callback_)(); - printf("Response Token: %s\n", token); - int64_t token_lifetime_ms = 100000; + + std::map resp = oauth_callback_(python_callable_); + + // Build parameters to pass to librdkafka + std::string token = resp["token"]; + int64_t token_lifetime_ms = std::stoll(resp["token_expiration_in_epoch"]); std::list extensions; // currently not supported std::string errstr; CUDF_EXPECTS( diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 7a9ad953e05..8279c7c031a 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -26,8 +26,10 @@ namespace external { namespace kafka { kafka_consumer::kafka_consumer(std::map configs, + void* python_callable, kafka_oauth_callback_type oauth_callback) : configs(configs), + python_callable_(python_callable), oauth_callback_(oauth_callback), kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) { @@ -40,7 +42,7 @@ kafka_consumer::kafka_consumer(std::map configs, // TODO: Just for testing ... want to make sure this works std::string error_string; - python_oauth_refresh_callback cb(oauth_callback); + python_oauth_refresh_callback cb(oauth_callback, python_callable_); kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); // Kafka 0.9 > requires group.id in the configuration @@ -54,6 +56,7 @@ kafka_consumer::kafka_consumer(std::map configs, } kafka_consumer::kafka_consumer(std::map configs, + void* python_callable, kafka_oauth_callback_type oauth_callback, std::string const& topic_name, int partition, @@ -62,6 +65,7 @@ kafka_consumer::kafka_consumer(std::map configs, int batch_timeout, std::string const& delimiter) : configs(configs), + python_callable_(python_callable), oauth_callback_(oauth_callback), topic_name(topic_name), partition(partition), @@ -80,7 +84,7 @@ kafka_consumer::kafka_consumer(std::map configs, // TODO: Just for testing ... want to make sure this works std::string error_string; - python_oauth_refresh_callback cb(oauth_callback); + python_oauth_refresh_callback cb(oauth_callback, python_callable_); CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string), "Failed to set Kafka oauth callback"); diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index cfffaeb952d..aa79c11e5c2 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -1,7 +1,6 @@ # Copyright (c) 2020, NVIDIA CORPORATION. cimport cpython -from cpython.ref cimport PyObject from libc.stdint cimport int32_t, int64_t from libcpp cimport bool from libcpp.map cimport map @@ -12,6 +11,21 @@ from cudf._lib.cpp.io.types cimport datasource from cudf_kafka._lib.kafka cimport kafka_consumer +import functools + + +cdef map[string, string] oauth_callback_wrapper(void *ctx): + print("Entering oauth_callback_wrapper") + + # ctx is a functools.partial + func, args = (ctx) + + # Never makes it here, ^^ + print("Func: " + str(func)) + print("Args: " + str(args)) + ret = func(*args) + return ret + cdef class KafkaDatasource(Datasource): @@ -25,12 +39,14 @@ cdef class KafkaDatasource(Datasource): string delimiter=b"",): cdef map[string, string] configs - cdef void *cb + cdef void* python_callable + cdef map[string, string] (*cb)(void *) for key in kafka_configs: if key == 'oauth_cb': if callable(kafka_configs[key]): - cb = kafka_configs[key] + python_callable = kafka_configs[key] + cb = &oauth_callback_wrapper else: raise TypeError("'oauth_cb' configuration must \ be a Python callable object") @@ -40,6 +56,7 @@ cdef class KafkaDatasource(Datasource): if topic != b"" and partition != -1: self.c_datasource = \ make_unique[kafka_consumer](configs, + python_callable, cb, topic, partition, @@ -49,7 +66,7 @@ cdef class KafkaDatasource(Datasource): delimiter) else: self.c_datasource = \ - make_unique[kafka_consumer](configs, cb) + make_unique[kafka_consumer](configs, python_callable, cb) cdef datasource* get_datasource(self) nogil: return self.c_datasource.get() From bcfae4a1ac3aed6490f0d267d3f10118a2369585 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Dec 2021 15:06:01 -0500 Subject: [PATCH 56/70] Refactor to use functools.partial --- .../include/cudf_kafka/kafka_callback.hpp | 20 ++++++----- .../include/cudf_kafka/kafka_consumer.hpp | 30 ++++++++-------- cpp/libcudf_kafka/src/kafka_callback.cpp | 10 +++--- cpp/libcudf_kafka/src/kafka_consumer.cpp | 36 ++++++++++--------- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 29 ++++++--------- 5 files changed, 62 insertions(+), 63 deletions(-) diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index a89f68a325a..584f345c764 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -27,24 +27,25 @@ namespace external { namespace kafka { /** - * @brief Callback function type used for Kafka OAuth events + * @brief Python Callback function wrapper type used for Kafka OAuth events * - * The KafkaConsumer calls the `kafka_oauth_callback_t` when the existing + * The KafkaConsumer calls the `kafka_oauth_callback_wrapper_type` when the existing * oauth token is considered expired by the KafkaConsumer. Typically that * means this will be invoked a single time when the KafkaConsumer is created * to get the initial token and then intermediately as the token becomes * expired. * * The callback function signature is: - * `PyObject* kafka_oauth_callback_t() + * `std::map kafka_oauth_callback_wrapper_type(void*)` * - * The callback function returns a PyObject, Python Tuple, - * where the Tuple consists of the Oauth token and its + * The callback function returns a std::map, + * where the std::map consists of the Oauth token and its * linux epoch expiration time. Generally the token and expiration * time is retrieved from an external service by the callback. * Ex: [token, token_expiration_in_epoch] */ -using kafka_oauth_callback_type = std::map (*)(void*); +using kafka_oauth_callback_wrapper_type = std::map (*)(void*); +using python_callable_type = void*; /** * @brief Callback to retrieve OAuth token from external source. Invoked when @@ -52,13 +53,14 @@ using kafka_oauth_callback_type = std::map (*)(void*); */ class python_oauth_refresh_callback : public RdKafka::OAuthBearerTokenRefreshCb { public: - python_oauth_refresh_callback(kafka_oauth_callback_type cb, void* python_callable); + python_oauth_refresh_callback(kafka_oauth_callback_wrapper_type callback_wrapper, + python_callable_type python_callable); void oauthbearer_token_refresh_cb(RdKafka::Handle* handle, const std::string& oauthbearer_config); private: - kafka_oauth_callback_type oauth_callback_; - void* python_callable_; + kafka_oauth_callback_wrapper_type callback_wrapper_; + python_callable_type python_callable_; }; } // namespace kafka diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index b164a54749d..e1617baba2e 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -52,14 +52,15 @@ class kafka_consumer : public cudf::io::datasource { * * @param configs key/value pairs of librdkafka configurations that will be * passed to the librdkafka client - * @param oauth_callback `kafka_oauth_callback_t` Python callable provided by the - * user to this constructor. This callback is responsible for both - * retrieving the initial OAuth token and refreshing the OAuth - * token when it expireskafka_oauth_callback_t + * @param python_callable `python_callable_type` pointer to a Python functools.partial object + * @param callable_wrapper `kafka_oauth_callback_wrapper_type` Cython wrapper that will + * be used to invoke the `python_callable`. This wrapper serves the purpose + * of preventing us from having to link against the Python development library + * in libcudf_kafka. */ kafka_consumer(std::map configs, - void* python_callable, - kafka_oauth_callback_type oauth_callback); + python_callable_type python_callable, + kafka_oauth_callback_wrapper_type callable_wrapper); /** * @brief Instantiate a Kafka consumer object. Documentation for librdkafka configurations can be @@ -67,10 +68,11 @@ class kafka_consumer : public cudf::io::datasource { * * @param configs key/value pairs of librdkafka configurations that will be * passed to the librdkafka client - * @param oauth_callback `kafka_oauth_callback_t` Python callable provided by the - * user to this constructor. This callback is responsible for both - * retrieving the initial OAuth token and refreshing the OAuth - * token when it expires + * @param python_callable `python_callable_type` pointer to a Python functools.partial object + * @param callable_wrapper `kafka_oauth_callback_wrapper_type` Cython wrapper that will + * be used to invoke the `python_callable`. This wrapper serves the purpose + * of preventing us from having to link against the Python development library + * in libcudf_kafka. * @param topic_name name of the Kafka topic to consume from * @param partition partition index to consume from between `0` and `TOPIC_NUM_PARTITIONS - 1` * inclusive @@ -81,8 +83,8 @@ class kafka_consumer : public cudf::io::datasource { * @param delimiter optional delimiter to insert into the output between kafka messages, Ex: "\n" */ kafka_consumer(std::map configs, - void* python_callable, - kafka_oauth_callback_type oauth_callback, + python_callable_type python_callable, + kafka_oauth_callback_wrapper_type callable_wrapper, std::string const& topic_name, int partition, int64_t start_offset, @@ -195,8 +197,8 @@ class kafka_consumer : public cudf::io::datasource { std::unique_ptr consumer; std::map configs; - void* python_callable_; - kafka_oauth_callback_type oauth_callback_; + python_callable_type python_callable_; + kafka_oauth_callback_wrapper_type callable_wrapper_; std::string topic_name; int partition; diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index df5edbf1000..dea36c7c04b 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -22,16 +22,14 @@ namespace io { namespace external { namespace kafka { -python_oauth_refresh_callback::python_oauth_refresh_callback(kafka_oauth_callback_type cb, - void* python_callable) - : oauth_callback_(cb), python_callable_(python_callable){}; +python_oauth_refresh_callback::python_oauth_refresh_callback( + kafka_oauth_callback_wrapper_type callback_wrapper, python_callable_type python_callable) + : callback_wrapper_(callback_wrapper), python_callable_(python_callable){}; void python_oauth_refresh_callback::oauthbearer_token_refresh_cb( RdKafka::Handle* handle, const std::string& oauthbearer_config) { - printf("!!!!Invoking the python_oauth_callback!!!!\n"); - - std::map resp = oauth_callback_(python_callable_); + std::map resp = callback_wrapper_(python_callable_); // Build parameters to pass to librdkafka std::string token = resp["token"]; diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 8279c7c031a..183bb465741 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -26,11 +26,11 @@ namespace external { namespace kafka { kafka_consumer::kafka_consumer(std::map configs, - void* python_callable, - kafka_oauth_callback_type oauth_callback) + python_callable_type python_callable, + kafka_oauth_callback_wrapper_type callable_wrapper) : configs(configs), python_callable_(python_callable), - oauth_callback_(oauth_callback), + callable_wrapper_(callable_wrapper), kafka_conf(RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL)) { for (auto const& key_value : configs) { @@ -40,10 +40,13 @@ kafka_consumer::kafka_consumer(std::map configs, "Invalid Kafka configuration"); } - // TODO: Just for testing ... want to make sure this works - std::string error_string; - python_oauth_refresh_callback cb(oauth_callback, python_callable_); - kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string); + if (python_callable_ != nullptr) { + std::string error_string; + python_oauth_refresh_callback cb(callable_wrapper_, python_callable_); + CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == + kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string), + "Failed to set Kafka oauth callback"); + } // Kafka 0.9 > requires group.id in the configuration std::string conf_val; @@ -56,8 +59,8 @@ kafka_consumer::kafka_consumer(std::map configs, } kafka_consumer::kafka_consumer(std::map configs, - void* python_callable, - kafka_oauth_callback_type oauth_callback, + python_callable_type python_callable, + kafka_oauth_callback_wrapper_type callback_wrapper, std::string const& topic_name, int partition, int64_t start_offset, @@ -66,7 +69,7 @@ kafka_consumer::kafka_consumer(std::map configs, std::string const& delimiter) : configs(configs), python_callable_(python_callable), - oauth_callback_(oauth_callback), + callable_wrapper_(callback_wrapper), topic_name(topic_name), partition(partition), start_offset(start_offset), @@ -82,12 +85,13 @@ kafka_consumer::kafka_consumer(std::map configs, "Invalid Kafka configuration"); } - // TODO: Just for testing ... want to make sure this works - std::string error_string; - python_oauth_refresh_callback cb(oauth_callback, python_callable_); - CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == - kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string), - "Failed to set Kafka oauth callback"); + if (python_callable_ != nullptr) { + std::string error_string; + python_oauth_refresh_callback cb(callable_wrapper_, python_callable_); + CUDF_EXPECTS(RdKafka::Conf::ConfResult::CONF_OK == + kafka_conf->set("oauthbearer_token_refresh_cb", &cb, error_string), + "Failed to set Kafka oauth callback"); + } // Kafka 0.9 > requires group.id in the configuration std::string conf_val; diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index aa79c11e5c2..95fa2ce1734 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -2,7 +2,7 @@ cimport cpython from libc.stdint cimport int32_t, int64_t -from libcpp cimport bool +from libcpp cimport bool, nullptr from libcpp.map cimport map from libcpp.memory cimport make_unique, unique_ptr from libcpp.string cimport string @@ -11,20 +11,11 @@ from cudf._lib.cpp.io.types cimport datasource from cudf_kafka._lib.kafka cimport kafka_consumer -import functools - +# To avoid including in libcudf_kafka +# we introduce this wrapper in Cython cdef map[string, string] oauth_callback_wrapper(void *ctx): - print("Entering oauth_callback_wrapper") - - # ctx is a functools.partial - func, args = (ctx) - - # Never makes it here, ^^ - print("Func: " + str(func)) - print("Args: " + str(args)) - ret = func(*args) - return ret + return ((ctx))() cdef class KafkaDatasource(Datasource): @@ -39,14 +30,14 @@ cdef class KafkaDatasource(Datasource): string delimiter=b"",): cdef map[string, string] configs - cdef void* python_callable - cdef map[string, string] (*cb)(void *) + cdef void* python_callable = nullptr + cdef map[string, string] (*python_callable_wrapper)(void *) for key in kafka_configs: if key == 'oauth_cb': if callable(kafka_configs[key]): python_callable = kafka_configs[key] - cb = &oauth_callback_wrapper + python_callable_wrapper = &oauth_callback_wrapper else: raise TypeError("'oauth_cb' configuration must \ be a Python callable object") @@ -57,7 +48,7 @@ cdef class KafkaDatasource(Datasource): self.c_datasource = \ make_unique[kafka_consumer](configs, python_callable, - cb, + python_callable_wrapper, topic, partition, start_offset, @@ -66,7 +57,9 @@ cdef class KafkaDatasource(Datasource): delimiter) else: self.c_datasource = \ - make_unique[kafka_consumer](configs, python_callable, cb) + make_unique[kafka_consumer](configs, + python_callable, + python_callable_wrapper) cdef datasource* get_datasource(self) nogil: return self.c_datasource.get() From daca5220a306346856da837a9a6eb6dd38333d2e Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Dec 2021 15:09:02 -0500 Subject: [PATCH 57/70] Remove python find_package command from cmake --- cpp/libcudf_kafka/CMakeLists.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index a8f2785791c..7e575312fbd 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -47,9 +47,6 @@ rapids_cpm_init() include(cmake/thirdparty/get_cudf.cmake) include(cmake/thirdparty/get_rdkafka.cmake) -# Locate Python Development headers -find_package(Python3 REQUIRED COMPONENTS Interpreter Development) - # # GTests if enabled if(BUILD_TESTS) # GoogleTest @@ -73,7 +70,7 @@ target_include_directories( # ################################################################################################## # * library paths --------------------------------------------------------------------------------- -target_link_libraries(cudf_kafka PUBLIC cudf::cudf RDKAFKA::RDKAFKA Python3::Python) +target_link_libraries(cudf_kafka PUBLIC cudf::cudf RDKAFKA::RDKAFKA) # Add Conda library, and include paths if specified if(TARGET conda_env) From ac990199648bb91932f5b220c5ac31b0a8fdbc5a Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Dec 2021 16:05:52 -0500 Subject: [PATCH 58/70] Update test syntax after refactoring --- .../tests/kafka_consumer_tests.cpp | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp index cf9fd8882a4..38a62f10efd 100644 --- a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp +++ b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp @@ -34,10 +34,13 @@ TEST_F(KafkaDatasourceTest, MissingGroupID) std::map kafka_configs; kafka_configs["bootstrap.servers"] = "localhost:9092"; - kafka::kafka_oauth_callback_type callback; + kafka::python_callable_type python_callable; + kafka::kafka_oauth_callback_wrapper_type callback_wrapper; - EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, callback, "csv-topic", 0, 0, 3, 5000, "\n"), - cudf::logic_error); + EXPECT_THROW( + kafka::kafka_consumer kc( + kafka_configs, python_callable, callback_wrapper, "csv-topic", 0, 0, 3, 5000, "\n"), + cudf::logic_error); } TEST_F(KafkaDatasourceTest, InvalidConfigValues) @@ -46,15 +49,20 @@ TEST_F(KafkaDatasourceTest, InvalidConfigValues) std::map kafka_configs; kafka_configs["completely_made_up_config"] = "wrong"; - kafka::kafka_oauth_callback_type callback; + kafka::python_callable_type python_callable; + kafka::kafka_oauth_callback_wrapper_type callback_wrapper; - EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, callback, "csv-topic", 0, 0, 3, 5000, "\n"), - cudf::logic_error); + EXPECT_THROW( + kafka::kafka_consumer kc( + kafka_configs, python_callable, callback_wrapper, "csv-topic", 0, 0, 3, 5000, "\n"), + cudf::logic_error); // Give a good config property with a bad value kafka_configs.clear(); kafka_configs["message.max.bytes"] = "this should be a number not text"; - EXPECT_THROW(kafka::kafka_consumer kc(kafka_configs, callback, "csv-topic", 0, 0, 3, 5000, "\n"), - cudf::logic_error); + EXPECT_THROW( + kafka::kafka_consumer kc( + kafka_configs, python_callable, callback_wrapper, "csv-topic", 0, 0, 3, 5000, "\n"), + cudf::logic_error); } From cc2800051bbb4456448b00058560be7bcceddb7c Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Dec 2021 19:24:45 -0500 Subject: [PATCH 59/70] Remove Python versions from anaconda builds to satisfy Java build process --- conda/recipes/cudf_kafka/meta.yaml | 7 +++---- conda/recipes/custreamz/meta.yaml | 14 +++++++------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 356c0004271..54f56b6ad38 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -2,8 +2,8 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version=environ.get('CONDA_PY', 36) %} {% set cuda_version = '.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} -{% set py_version = environ.get('python', '3.8') %} package: name: cudf_kafka @@ -14,7 +14,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version.replace('.', '') }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - CC - CXX @@ -26,13 +26,12 @@ requirements: build: - cmake >=3.20.1 host: - - python {{ py_version }} + - python - cython >=0.29,<0.30 - cudf {{ version }} - libcudf_kafka {{ version }} - setuptools run: - - python {{ py_version }} - libcudf_kafka {{ version }} - python-confluent-kafka >=1.7.0,<1.8.0a0=py38* - cudf {{ version }} diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 519893b64ab..d57005da5c3 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -2,8 +2,8 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set cuda_version = '.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} -{% set py_version = environ.get('python', '3.8') %} +{% set py_version=environ.get('CONDA_PY', 36) %} +{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} package: name: custreamz @@ -14,7 +14,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version.replace('.', '') }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - VERSION_SUFFIX - PARALLEL_LEVEL @@ -24,16 +24,16 @@ build: requirements: host: - - python {{ py_version }} - - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version.replace('.', '') }}* + - python + - python-confluent-kafka >=1.7.0,<1.8.0a0 - cudf_kafka {{ version }} run: - - python {{ py_version }} + - python - streamz - cudf {{ version }} - dask>=2021.11.1,<=2021.11.2 - distributed>=2021.11.1,<=2021.11.2 - - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version.replace('.', '') }}* + - python-confluent-kafka >=1.7.0,<1.8.0a0 - cudf_kafka {{ version }} test: # [linux64] From 7e59211b3afbe2d668d160051cb33fbc2784a826 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 10 Dec 2021 23:40:48 -0500 Subject: [PATCH 60/70] update conda recipes to get the correct version of python-confluent-kafka for java --- conda/recipes/cudf_kafka/meta.yaml | 9 +++++---- conda/recipes/custreamz/meta.yaml | 14 +++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 54f56b6ad38..dce203c93f9 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -2,8 +2,8 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version=environ.get('CONDA_PY', 36) %} {% set cuda_version = '.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} +{% set py_version = environ.get('python', '3.8') %} package: name: cudf_kafka @@ -14,7 +14,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version.replace('.', '') }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - CC - CXX @@ -26,14 +26,15 @@ requirements: build: - cmake >=3.20.1 host: - - python + - python {{ py_version }} - cython >=0.29,<0.30 - cudf {{ version }} - libcudf_kafka {{ version }} - setuptools run: + - python {{ py_version }} - libcudf_kafka {{ version }} - - python-confluent-kafka >=1.7.0,<1.8.0a0=py38* + - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version.replace('.', '') }}* - cudf {{ version }} test: # [linux64] diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index d57005da5c3..519893b64ab 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -2,8 +2,8 @@ {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} -{% set py_version=environ.get('CONDA_PY', 36) %} -{% set cuda_version='.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} +{% set cuda_version = '.'.join(environ.get('CUDA', '10.1').split('.')[:2]) %} +{% set py_version = environ.get('python', '3.8') %} package: name: custreamz @@ -14,7 +14,7 @@ source: build: number: {{ GIT_DESCRIBE_NUMBER }} - string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version.replace('.', '') }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} script_env: - VERSION_SUFFIX - PARALLEL_LEVEL @@ -24,16 +24,16 @@ build: requirements: host: - - python - - python-confluent-kafka >=1.7.0,<1.8.0a0 + - python {{ py_version }} + - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version.replace('.', '') }}* - cudf_kafka {{ version }} run: - - python + - python {{ py_version }} - streamz - cudf {{ version }} - dask>=2021.11.1,<=2021.11.2 - distributed>=2021.11.1,<=2021.11.2 - - python-confluent-kafka >=1.7.0,<1.8.0a0 + - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version.replace('.', '') }}* - cudf_kafka {{ version }} test: # [linux64] From ac46705afe577ce0579df947e9da8abae43ae43f Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sun, 12 Dec 2021 12:06:57 -0500 Subject: [PATCH 61/70] include librdkafka 1.7.0 in Java test gpu CI script --- ci/gpu/java.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh index bada16bd40e..c20cc6e1bb7 100755 --- a/ci/gpu/java.sh +++ b/ci/gpu/java.sh @@ -87,6 +87,8 @@ gpuci_conda_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env # gpuci_conda_retry install -y "your-pkg=1.0.0" +gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env +gpuci_mamba_retry install -y "librdkafka=1.7.0" gpuci_logger "Check compiler versions" From 1e1dc90b268dd3a8a8c0c7ff319a21157180f472 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 13 Dec 2021 09:41:51 -0500 Subject: [PATCH 62/70] add back conda installs for ops test --- ci/gpu/build.sh | 2 ++ ci/gpu/java.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 08ca00ff852..4d474f964f6 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -88,6 +88,8 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env # gpuci_mamba_retry install -y "your-pkg=1.0.0" +gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env +gpuci_mamba_retry install -y "librdkafka=1.7.0" gpuci_logger "Check compiler versions" diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh index bada16bd40e..c20cc6e1bb7 100755 --- a/ci/gpu/java.sh +++ b/ci/gpu/java.sh @@ -87,6 +87,8 @@ gpuci_conda_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env # gpuci_conda_retry install -y "your-pkg=1.0.0" +gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env +gpuci_mamba_retry install -y "librdkafka=1.7.0" gpuci_logger "Check compiler versions" From 53f5465f571c732280bb3ce837f113420d1d4625 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Mon, 13 Dec 2021 09:47:47 -0500 Subject: [PATCH 63/70] clang formatting --- cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index 584f345c764..1cc3672db38 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -15,8 +15,8 @@ */ #pragma once -#include #include +#include #include #include #include From e0c70487015e017cc542eff0f59e87ec69f4ce9a Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 14 Dec 2021 10:18:34 -0500 Subject: [PATCH 64/70] remove manual librdkafka updates now that gpuci integrations have been updated --- ci/gpu/build.sh | 2 -- ci/gpu/java.sh | 2 -- 2 files changed, 4 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 4d474f964f6..08ca00ff852 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -88,8 +88,6 @@ gpuci_mamba_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env # gpuci_mamba_retry install -y "your-pkg=1.0.0" -gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y "librdkafka=1.7.0" gpuci_logger "Check compiler versions" diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh index c20cc6e1bb7..bada16bd40e 100755 --- a/ci/gpu/java.sh +++ b/ci/gpu/java.sh @@ -87,8 +87,6 @@ gpuci_conda_retry install -y \ # https://docs.rapids.ai/maintainers/depmgmt/ # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env # gpuci_conda_retry install -y "your-pkg=1.0.0" -gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env -gpuci_mamba_retry install -y "librdkafka=1.7.0" gpuci_logger "Check compiler versions" From eba67e23d6e53ed82473eea4385a10960af7f472 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Fri, 17 Dec 2021 19:31:12 -0500 Subject: [PATCH 65/70] Fix missed merge conflict --- conda/recipes/custreamz/meta.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 122dfe45b79..604c60d3e34 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -28,13 +28,8 @@ requirements: - python-confluent-kafka >=1.7.0,<1.8.0a0=py{{ py_version.replace('.', '') }}* - cudf_kafka {{ version }} run: -<<<<<<< HEAD - python {{ py_version }} - streamz -======= - - python - - streamz ->>>>>>> upstream/branch-22.02 - cudf {{ version }} - dask>=2021.11.1,<=2021.11.2 - distributed>=2021.11.1,<=2021.11.2 From f365667b205ace5d777849fbd82e86dd0ce53d43 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 22 Dec 2021 14:39:08 -0500 Subject: [PATCH 66/70] Update cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp Co-authored-by: Vyas Ramasubramani --- cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index 1cc3672db38..cd0411f35a3 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -16,7 +16,9 @@ #pragma once #include + #include + #include #include #include From 5437b84feae51699fe69d5dcd8eaa5fba90bfabc Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 22 Dec 2021 14:39:16 -0500 Subject: [PATCH 67/70] Update cpp/libcudf_kafka/src/kafka_callback.cpp Co-authored-by: Vyas Ramasubramani --- cpp/libcudf_kafka/src/kafka_callback.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index dea36c7c04b..fce7dfa3ee2 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -27,7 +27,7 @@ python_oauth_refresh_callback::python_oauth_refresh_callback( : callback_wrapper_(callback_wrapper), python_callable_(python_callable){}; void python_oauth_refresh_callback::oauthbearer_token_refresh_cb( - RdKafka::Handle* handle, const std::string& oauthbearer_config) + RdKafka::Handle* handle, std::string const& oauthbearer_config) { std::map resp = callback_wrapper_(python_callable_); From 831b84b26b3bf967c817a7585968b50da816a2c3 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 22 Dec 2021 14:48:47 -0500 Subject: [PATCH 68/70] Address reviewers suggestions --- python/cudf_kafka/cudf_kafka/_lib/kafka.pxd | 6 +++--- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd index 0d2696da46f..0f9c57e727d 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd @@ -13,7 +13,7 @@ from cudf._lib.io.datasource cimport Datasource cdef extern from "kafka_callback.hpp" \ namespace "cudf::io::external::kafka" nogil: - ctypedef object (*kafka_oauth_callback_t)() + ctypedef object (*python_callable_type)() cdef extern from "kafka_consumer.hpp" \ @@ -22,10 +22,10 @@ cdef extern from "kafka_consumer.hpp" \ cpdef cppclass kafka_consumer: kafka_consumer(map[string, string] configs, - kafka_oauth_callback_t oauth_callback) except + + python_callable_type python_callable) except + kafka_consumer(map[string, string] configs, - kafka_oauth_callback_t oauth_callback, + python_callable_type python_callable, string topic_name, int32_t partition, int64_t start_offset, diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index 95fa2ce1734..0d33647f4f7 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -1,6 +1,5 @@ # Copyright (c) 2020, NVIDIA CORPORATION. -cimport cpython from libc.stdint cimport int32_t, int64_t from libcpp cimport bool, nullptr from libcpp.map cimport map From 4087acc881bb93e86aa323e3a28b132c9d831926 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 5 Jan 2022 13:35:12 -0500 Subject: [PATCH 69/70] Update source file years and also adjust import for cudf_kafka --- cpp/libcudf_kafka/CMakeLists.txt | 2 +- cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake | 2 +- cpp/libcudf_kafka/cmake/thirdparty/get_rdkafka.cmake | 2 +- cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp | 2 +- cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp | 2 +- cpp/libcudf_kafka/src/kafka_callback.cpp | 2 +- cpp/libcudf_kafka/src/kafka_consumer.cpp | 2 +- cpp/libcudf_kafka/tests/CMakeLists.txt | 2 +- cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp | 4 ++-- python/cudf_kafka/cudf_kafka/_lib/kafka.pxd | 2 +- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 2 +- python/custreamz/custreamz/kafka.py | 2 +- 12 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt index 7e575312fbd..e6abba207d9 100644 --- a/cpp/libcudf_kafka/CMakeLists.txt +++ b/cpp/libcudf_kafka/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake b/cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake index 1e04d40a7d5..aa4c5b60e7a 100644 --- a/cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake +++ b/cpp/libcudf_kafka/cmake/thirdparty/get_cudf.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/cpp/libcudf_kafka/cmake/thirdparty/get_rdkafka.cmake b/cpp/libcudf_kafka/cmake/thirdparty/get_rdkafka.cmake index 3b3342cb297..5c3c9f01f17 100644 --- a/cpp/libcudf_kafka/cmake/thirdparty/get_rdkafka.cmake +++ b/cpp/libcudf_kafka/cmake/thirdparty/get_rdkafka.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index cd0411f35a3..4918382827f 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index e1617baba2e..a6293349da9 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index fce7dfa3ee2..13d5b1cc4ea 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 183bb465741..90c6e14f5b1 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/libcudf_kafka/tests/CMakeLists.txt b/cpp/libcudf_kafka/tests/CMakeLists.txt index e1219f8cf61..db2131ba00c 100644 --- a/cpp/libcudf_kafka/tests/CMakeLists.txt +++ b/cpp/libcudf_kafka/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp index 28215c1e522..613c2435f4d 100644 --- a/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp +++ b/cpp/libcudf_kafka/tests/kafka_consumer_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "cudf_kafka/kafka_consumer.hpp" +#include #include #include #include diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd index 0f9c57e727d..50a8e6f23d4 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2022, NVIDIA CORPORATION. from libc.stdint cimport int32_t, int64_t from libcpp cimport bool diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index 0d33647f4f7..711cb7dbac7 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2022, NVIDIA CORPORATION. from libc.stdint cimport int32_t, int64_t from libcpp cimport bool, nullptr diff --git a/python/custreamz/custreamz/kafka.py b/python/custreamz/custreamz/kafka.py index 891bc1af4e2..01400b37e09 100644 --- a/python/custreamz/custreamz/kafka.py +++ b/python/custreamz/custreamz/kafka.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2022, NVIDIA CORPORATION. import confluent_kafka as ck from cudf_kafka._lib.kafka import KafkaDatasource From 44eea02f9b5e6b26f8ecb33f421d4fd11119eefc Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 5 Jan 2022 13:52:33 -0500 Subject: [PATCH 70/70] Adjust source file years --- conda/recipes/cudf_kafka/build.sh | 2 +- conda/recipes/cudf_kafka/meta.yaml | 2 +- conda/recipes/custreamz/build.sh | 2 +- conda/recipes/custreamz/meta.yaml | 2 +- conda/recipes/libcudf_kafka/build.sh | 2 +- conda/recipes/libcudf_kafka/meta.yaml | 2 +- cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp | 2 +- cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp | 2 +- cpp/libcudf_kafka/src/kafka_callback.cpp | 2 +- cpp/libcudf_kafka/src/kafka_consumer.cpp | 2 +- python/cudf_kafka/cudf_kafka/_lib/kafka.pxd | 2 +- python/cudf_kafka/cudf_kafka/_lib/kafka.pyx | 2 +- python/custreamz/custreamz/kafka.py | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/conda/recipes/cudf_kafka/build.sh b/conda/recipes/cudf_kafka/build.sh index 3db559c144d..5d8720f1c98 100644 --- a/conda/recipes/cudf_kafka/build.sh +++ b/conda/recipes/cudf_kafka/build.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # This assumes the script is executed from the root of the repo directory ./build.sh -v cudf_kafka diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 2537643e2c8..d434e53c9b1 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} diff --git a/conda/recipes/custreamz/build.sh b/conda/recipes/custreamz/build.sh index 6ce9e4f21a9..88fccf90c69 100644 --- a/conda/recipes/custreamz/build.sh +++ b/conda/recipes/custreamz/build.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. # This assumes the script is executed from the root of the repo directory ./build.sh -v custreamz diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index 604c60d3e34..73f4727b70b 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2019, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} diff --git a/conda/recipes/libcudf_kafka/build.sh b/conda/recipes/libcudf_kafka/build.sh index cbe4584cb63..b656f55a64e 100644 --- a/conda/recipes/libcudf_kafka/build.sh +++ b/conda/recipes/libcudf_kafka/build.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then # This assumes the script is executed from the root of the repo directory diff --git a/conda/recipes/libcudf_kafka/meta.yaml b/conda/recipes/libcudf_kafka/meta.yaml index d82d50c62e5..0b274f3a41d 100644 --- a/conda/recipes/libcudf_kafka/meta.yaml +++ b/conda/recipes/libcudf_kafka/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp index 4918382827f..a4ff18054b1 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_callback.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp index a6293349da9..c65774d2e1a 100644 --- a/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp +++ b/cpp/libcudf_kafka/include/cudf_kafka/kafka_consumer.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp index 13d5b1cc4ea..6b98747c145 100644 --- a/cpp/libcudf_kafka/src/kafka_callback.cpp +++ b/cpp/libcudf_kafka/src/kafka_callback.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp index 90c6e14f5b1..49e89a56e60 100644 --- a/cpp/libcudf_kafka/src/kafka_consumer.cpp +++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd index 50a8e6f23d4..e64d8f82739 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libc.stdint cimport int32_t, int64_t from libcpp cimport bool diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx index 711cb7dbac7..24d072c544e 100644 --- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx +++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libc.stdint cimport int32_t, int64_t from libcpp cimport bool, nullptr diff --git a/python/custreamz/custreamz/kafka.py b/python/custreamz/custreamz/kafka.py index 01400b37e09..f5d5031602f 100644 --- a/python/custreamz/custreamz/kafka.py +++ b/python/custreamz/custreamz/kafka.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import confluent_kafka as ck from cudf_kafka._lib.kafka import KafkaDatasource