diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 614ff155c44..d67e4c08e50 100755 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,9 +15,35 @@ #============================================================================= cmake_minimum_required(VERSION 3.12 FATAL_ERROR) +# Use GPU_ARCHS if CMAKE_CUDA_ARCHITECTURES is not defined +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND DEFINED GPU_ARCHS) + if(NOT "${GPU_ARCHS}" STREQUAL "ALL") + set(CMAKE_CUDA_ARCHITECTURES "${GPU_ARCHS}") + endif() +endif() + +# If `CMAKE_CUDA_ARCHITECTURES` is not defined, build for all supported architectures. If +# `CMAKE_CUDA_ARCHITECTURES` is set to an empty string (""), build for only the current +# architecture. If `CMAKE_CUDA_ARCHITECTURES` is specified by the user, use user setting. + +# This needs to be run before enabling the CUDA language due to the default initialization behavior +# of `CMAKE_CUDA_ARCHITECTURES`, https://gitlab.kitware.com/cmake/cmake/-/issues/21302 +if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CUDF_JNI_BUILD_FOR_ALL_ARCHS TRUE) +elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "") + unset(CMAKE_CUDA_ARCHITECTURES CACHE) + set(CUDF_JNI_BUILD_FOR_DETECTED_ARCHS TRUE) +endif() + project(CUDF_JNI VERSION 0.7.0 LANGUAGES C CXX CUDA) -set(CUDF_CPP_BUILD_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp/build") +set(CUDA_DATAFRAME_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp") +set(CUDF_CPP_BUILD_DIR "${CUDA_DATAFRAME_SOURCE_DIR}/build") + +set(CMAKE_MODULE_PATH + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/" + "${CUDA_DATAFRAME_SOURCE_DIR}/cmake/Modules/" + ${CMAKE_MODULE_PATH}) ################################################################################################### # - build type ------------------------------------------------------------------------------------ @@ -45,88 +71,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CUDA_STANDARD 14) set(CMAKE_CUDA_STANDARD_REQUIRED ON) -if(CMAKE_COMPILER_IS_GNUCXX) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated-declarations") -endif(CMAKE_COMPILER_IS_GNUCXX) - -if(CMAKE_CUDA_COMPILER_VERSION) - # Compute the version. from CMAKE_CUDA_COMPILER_VERSION - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION}) - string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION}) - set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.") - mark_as_advanced(CUDA_VERSION) -endif() - -message(STATUS "CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}") -message(STATUS "CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}") -message(STATUS "CUDA_VERSION: ${CUDA_VERSION}") - -# Always set this convenience variable -set(CUDA_VERSION_STRING "${CUDA_VERSION}") - -# Auto-detect available GPU compute architectures -set(GPU_ARCHS "ALL" CACHE STRING - "List of GPU architectures (semicolon-separated) to be compiled for. Pass 'ALL' if you want to compile for all supported GPU architectures. Empty string means to auto-detect the GPUs on the current system") - -if("${GPU_ARCHS}" STREQUAL "") - include(cmake/EvalGpuArchs.cmake) - evaluate_gpu_archs(GPU_ARCHS) -endif() - -if("${GPU_ARCHS}" STREQUAL "ALL") - - # Check for embedded vs workstation architectures - if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") - # This is being built for Linux4Tegra or SBSA ARM64 - set(GPU_ARCHS "62") - if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9)) - set(GPU_ARCHS "${GPU_ARCHS};72") - endif() - if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11)) - set(GPU_ARCHS "${GPU_ARCHS};75;80") - endif() - - else() - # This is being built for an x86 or x86_64 architecture - set(GPU_ARCHS "60") - if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9)) - set(GPU_ARCHS "${GPU_ARCHS};70") - endif() - if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10)) - set(GPU_ARCHS "${GPU_ARCHS};75") - endif() - if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11)) - set(GPU_ARCHS "${GPU_ARCHS};80") - endif() - - endif() - -endif() -message("GPU_ARCHS = ${GPU_ARCHS}") - -foreach(arch ${GPU_ARCHS}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${arch},code=sm_${arch}") -endforeach() - - -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") - -# set warnings as errors -# TODO: remove `no-maybe-unitialized` used to suppress warnings in rmm::exec_policy -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror cross-execution-space-call -Xcompiler -Wall,-Werror,-Wno-error=deprecated-declarations") - -# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking -option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) -if (CMAKE_CUDA_LINEINFO) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") -endif(CMAKE_CUDA_LINEINFO) - -# Debug options -if(CMAKE_BUILD_TYPE MATCHES Debug) - message(STATUS "Building with debugging flags") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -Xcompiler -rdynamic") -endif(CMAKE_BUILD_TYPE MATCHES Debug) - option(BUILD_TESTS "Configure CMake to build tests" ON) @@ -146,11 +90,7 @@ endif(CUDA_STATIC_RUNTIME) ################################################################################################### # - cmake modules --------------------------------------------------------------------------------- -set(CMAKE_MODULE_PATH - "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/" - "${PROJECT_SOURCE_DIR}/../../../../cpp/cmake/Modules/" - ${CMAKE_MODULE_PATH}) - +include(ConfigureCUDA) include(FeatureSummary) include(CheckIncludeFiles) include(CheckLibraryExists) diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp index a1e8517c646..3385343c291 100644 --- a/java/src/main/native/src/ColumnVectorJni.cpp +++ b/java/src/main/native/src/ColumnVectorJni.cpp @@ -147,7 +147,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeList(JNIEnv *env, j if (children.size() == 0) { // special case because cudf::interleave_columns does not support no columns auto offsets = cudf::make_column_from_scalar(*zero, row_count + 1); - cudf::type_id n_type = static_cast(j_type); cudf::data_type n_data_type = cudf::jni::make_data_type(j_type, scale); auto empty_col = cudf::make_empty_column(n_data_type); ret = cudf::make_lists_column(row_count, std::move(offsets), std::move(empty_col), @@ -308,7 +307,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeEmptyCudfColumn(JNI try { cudf::jni::auto_set_device(env); - cudf::type_id n_type = static_cast(j_type); cudf::data_type n_data_type = cudf::jni::make_data_type(j_type, scale); std::unique_ptr column(cudf::make_empty_column(n_data_type)); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 82e71b04a2f..47aa30e5d31 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -303,11 +303,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_slice(JNIEnv *env, j std::vector result = cudf::slice(*n_column, indices); cudf::jni::native_jlongArray n_result(env, result.size()); std::vector> column_result(result.size()); - for (int i = 0; i < result.size(); i++) { + for (size_t i = 0; i < result.size(); i++) { column_result[i].reset(new cudf::column(result[i])); n_result[i] = reinterpret_cast(column_result[i].get()); } - for (int i = 0; i < result.size(); i++) { + for (size_t i = 0; i < result.size(); i++) { column_result[i].release(); } return n_result.get_jArray(); @@ -418,11 +418,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_split(JNIEnv *env, j std::vector result = cudf::split(*n_column, indices); cudf::jni::native_jlongArray n_result(env, result.size()); std::vector> column_result(result.size()); - for (int i = 0; i < result.size(); i++) { + for (size_t i = 0; i < result.size(); i++) { column_result[i].reset(new cudf::column(result[i])); n_result[i] = reinterpret_cast(column_result[i].get()); } - for (int i = 0; i < result.size(); i++) { + for (size_t i = 0; i < result.size(); i++) { column_result[i].release(); } return n_result.get_jArray(); diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 96b6d1d9a74..f5140a8810b 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -86,7 +86,7 @@ class jni_writer_data_sink final : public cudf::io::data_sink { void host_write(void const *data, size_t size) override { JNIEnv *env = cudf::jni::get_jni_env(jvm); - size_t left_to_copy = size; + long left_to_copy = static_cast(size); const char *copy_from = static_cast(data); while (left_to_copy > 0) { long buffer_amount_available = current_buffer_len - current_buffer_written; @@ -111,7 +111,7 @@ class jni_writer_data_sink final : public cudf::io::data_sink { void device_write(void const *gpu_data, size_t size, rmm::cuda_stream_view stream) override { JNIEnv *env = cudf::jni::get_jni_env(jvm); - size_t left_to_copy = size; + long left_to_copy = static_cast(size); const char *copy_from = static_cast(gpu_data); while (left_to_copy > 0) { long buffer_amount_available = current_buffer_len - current_buffer_written; @@ -209,7 +209,7 @@ class native_arrow_ipc_writer_handle final { explicit native_arrow_ipc_writer_handle(const std::vector &col_names, const std::shared_ptr &sink) - : initialized(false), column_names(col_names), sink(sink), file_name("") {} + : initialized(false), column_names(col_names), file_name(""), sink(sink) {} bool initialized; std::vector column_names; @@ -541,7 +541,7 @@ convert_table_for_return(JNIEnv *env, std::unique_ptr &table_result for (int i = 0; i < table_cols; i++) { outcol_handles[i] = reinterpret_cast(ret[i].release()); } - for (int i = 0; i < extra_columns.size(); i++) { + for (size_t i = 0; i < extra_columns.size(); i++) { outcol_handles[i + table_cols] = reinterpret_cast(extra_columns[i].release()); } return outcol_handles.get_jArray(); @@ -553,6 +553,7 @@ jlongArray convert_table_for_return(JNIEnv *env, std::unique_ptr &t } namespace { + // Check that window parameters are valid. bool valid_window_parameters(native_jintArray const &values, native_jpointerArray const &ops, @@ -562,14 +563,6 @@ bool valid_window_parameters(native_jintArray const &values, values.size() == preceding.size() && values.size() == following.size(); } -// Check that time-range window parameters are valid. -bool valid_window_parameters(native_jintArray const &values, native_jintArray const ×tamps, - native_jpointerArray const &ops, - native_jintArray const &min_periods, native_jintArray const &preceding, - native_jintArray const &following) { - return values.size() == timestamps.size() && - valid_window_parameters(values, ops, min_periods, preceding, following); -} } // namespace } // namespace jni @@ -927,7 +920,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin( table_metadata_with_nullability metadata; metadata.column_nullable = nullability; metadata.column_names = col_names.as_cpp_vector(); - for (size_t i = 0; i < meta_keys.size(); ++i) { + for (auto i = 0; i < meta_keys.size(); ++i) { metadata.user_data[meta_keys[i].get()] = meta_values[i].get(); } @@ -977,7 +970,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin( table_metadata_with_nullability metadata; metadata.column_nullable = nullability; metadata.column_names = col_names.as_cpp_vector(); - for (size_t i = 0; i < meta_keys.size(); ++i) { + for (int i = 0; i < meta_keys.size(); ++i) { metadata.user_data[meta_keys[i].get()] = meta_values[i].get(); } cudf::jni::native_jintArray precisions(env, j_precisions); @@ -1106,7 +1099,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin( table_metadata_with_nullability metadata; metadata.column_nullable = nullability; metadata.column_names = col_names.as_cpp_vector(); - for (size_t i = 0; i < meta_keys.size(); ++i) { + for (int i = 0; i < meta_keys.size(); ++i) { metadata.user_data[meta_keys[i].get()] = meta_values[i].get(); } @@ -1149,7 +1142,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin( table_metadata_with_nullability metadata; metadata.column_nullable = nullability; metadata.column_names = col_names.as_cpp_vector(); - for (size_t i = 0; i < meta_keys.size(); ++i) { + for (int i = 0; i < meta_keys.size(); ++i) { metadata.user_data[meta_keys[i].get()] = meta_values[i].get(); } @@ -1605,7 +1598,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_concatenate(JNIEnv *env, cudf::jni::auto_set_device(env); cudf::jni::native_jpointerArray tables(env, table_handles); - long unsigned int num_tables = tables.size(); + int num_tables = tables.size(); // There are some issues with table_view and std::vector. We cannot give the // vector a size or it will not compile. std::vector to_concat; @@ -1635,7 +1628,6 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(JNIEnv *env cudf::jni::auto_set_device(env); cudf::table_view *n_input_table = reinterpret_cast(input_table); cudf::jni::native_jintArray n_columns_to_hash(env, columns_to_hash); - int n_number_of_partitions = static_cast(number_of_partitions); cudf::jni::native_jintArray n_output_offsets(env, output_offsets); JNI_ARG_CHECK(env, n_columns_to_hash.size() > 0, "columns_to_hash is zero", NULL); @@ -1648,7 +1640,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(JNIEnv *env std::pair, std::vector> result = cudf::hash_partition(*n_input_table, columns_to_hash_vec, number_of_partitions); - for (int i = 0; i < result.second.size(); i++) { + for (size_t i = 0; i < result.second.size(); i++) { n_output_offsets[i] = result.second[i]; } @@ -1668,12 +1660,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_roundRobinPartition( try { cudf::jni::auto_set_device(env); auto n_input_table = reinterpret_cast(input_table); - int n_num_partitions = static_cast(num_partitions); cudf::jni::native_jintArray n_output_offsets(env, output_offsets); auto result = cudf::round_robin_partition(*n_input_table, num_partitions, start_partition); - for (int i = 0; i < result.second.size(); i++) { + for (size_t i = 0; i < result.second.size(); i++) { n_output_offsets[i] = result.second[i]; } @@ -1859,8 +1850,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_bound(JNIEnv *env, jclass, jlo JNI_ARG_CHECK(env, (column_desc_flags.size() == column_null_orders.size()), "null-order and sort-order size mismatch", 0); - uint32_t num_columns = column_null_orders.size(); - for (int i = 0; i < num_columns; i++) { + size_t num_columns = column_null_orders.size(); + for (size_t i = 0; i < num_columns; i++) { column_desc_flags[i] = n_desc_flags[i] ? cudf::order::DESCENDING : cudf::order::ASCENDING; column_null_orders[i] = n_are_nulls_smallest[i] ? cudf::null_order::BEFORE : cudf::null_order::AFTER; @@ -1894,7 +1885,7 @@ JNIEXPORT jobjectArray JNICALL Java_ai_rapids_cudf_Table_contiguousSplit(JNIEnv std::vector result = cudf::contiguous_split(*n_table, indices); cudf::jni::native_jobjectArray n_result = cudf::jni::contiguous_table_array(env, result.size()); - for (int i = 0; i < result.size(); i++) { + for (size_t i = 0; i < result.size(); i++) { n_result.set(i, cudf::jni::contiguous_table_from(env, result[i].data, result[i].table.num_rows())); }