Skip to content

Commit

Permalink
Update JNI build to use CMAKE_CUDA_ARCHITECTURES (#7425)
Browse files Browse the repository at this point in the history
This eliminates the `Policy CMP0104 is not set` warning during the JNI build by using `CMAKE_CUDA_ARCHITECTURES` to specify the targeted CUDA architectures during the build.  This eliminates a lot of code replicated from the cpp build and reuses the `ConfigureCUDA` module added in #7391.  The architectures being used by the build are visible in the `mvn` output, e.g.:
```
     [exec] -- CUDF: Building CUDF for GPU architectures: 60-real;70-real;75
```

This also configures the CPU compiler to use the same flags as the cpp build which required fixing a number of warnings (e.g.: sign mismatch comparisons, unused variables, etc.) in the JNI code since warnings are errors in the build.

Authors:
  - Jason Lowe (@jlowe)

Approvers:
  - Robert (Bobby) Evans (@revans2)
  - Alessandro Bellina (@abellina)
  - MithunR (@mythrocks)

URL: #7425
  • Loading branch information
jlowe authored Feb 24, 2021
1 parent b0e5aef commit 3c8b831
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 119 deletions.
118 changes: 29 additions & 89 deletions java/src/main/native/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#=============================================================================
# Copyright (c) 2019-2020, NVIDIA CORPORATION.
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -15,9 +15,35 @@
#=============================================================================
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)

# Use GPU_ARCHS if CMAKE_CUDA_ARCHITECTURES is not defined
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES AND DEFINED GPU_ARCHS)
if(NOT "${GPU_ARCHS}" STREQUAL "ALL")
set(CMAKE_CUDA_ARCHITECTURES "${GPU_ARCHS}")
endif()
endif()

# If `CMAKE_CUDA_ARCHITECTURES` is not defined, build for all supported architectures. If
# `CMAKE_CUDA_ARCHITECTURES` is set to an empty string (""), build for only the current
# architecture. If `CMAKE_CUDA_ARCHITECTURES` is specified by the user, use user setting.

# This needs to be run before enabling the CUDA language due to the default initialization behavior
# of `CMAKE_CUDA_ARCHITECTURES`, https://gitlab.kitware.com/cmake/cmake/-/issues/21302
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CUDF_JNI_BUILD_FOR_ALL_ARCHS TRUE)
elseif(CMAKE_CUDA_ARCHITECTURES STREQUAL "")
unset(CMAKE_CUDA_ARCHITECTURES CACHE)
set(CUDF_JNI_BUILD_FOR_DETECTED_ARCHS TRUE)
endif()

project(CUDF_JNI VERSION 0.7.0 LANGUAGES C CXX CUDA)

set(CUDF_CPP_BUILD_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp/build")
set(CUDA_DATAFRAME_SOURCE_DIR "${PROJECT_SOURCE_DIR}/../../../../cpp")
set(CUDF_CPP_BUILD_DIR "${CUDA_DATAFRAME_SOURCE_DIR}/build")

set(CMAKE_MODULE_PATH
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/"
"${CUDA_DATAFRAME_SOURCE_DIR}/cmake/Modules/"
${CMAKE_MODULE_PATH})

###################################################################################################
# - build type ------------------------------------------------------------------------------------
Expand Down Expand Up @@ -45,88 +71,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)

if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated-declarations")
endif(CMAKE_COMPILER_IS_GNUCXX)

if(CMAKE_CUDA_COMPILER_VERSION)
# Compute the version. from CMAKE_CUDA_COMPILER_VERSION
string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION})
string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${CMAKE_CUDA_COMPILER_VERSION})
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")
mark_as_advanced(CUDA_VERSION)
endif()

message(STATUS "CUDA_VERSION_MAJOR: ${CUDA_VERSION_MAJOR}")
message(STATUS "CUDA_VERSION_MINOR: ${CUDA_VERSION_MINOR}")
message(STATUS "CUDA_VERSION: ${CUDA_VERSION}")

# Always set this convenience variable
set(CUDA_VERSION_STRING "${CUDA_VERSION}")

# Auto-detect available GPU compute architectures
set(GPU_ARCHS "ALL" CACHE STRING
"List of GPU architectures (semicolon-separated) to be compiled for. Pass 'ALL' if you want to compile for all supported GPU architectures. Empty string means to auto-detect the GPUs on the current system")

if("${GPU_ARCHS}" STREQUAL "")
include(cmake/EvalGpuArchs.cmake)
evaluate_gpu_archs(GPU_ARCHS)
endif()

if("${GPU_ARCHS}" STREQUAL "ALL")

# Check for embedded vs workstation architectures
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
# This is being built for Linux4Tegra or SBSA ARM64
set(GPU_ARCHS "62")
if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9))
set(GPU_ARCHS "${GPU_ARCHS};72")
endif()
if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11))
set(GPU_ARCHS "${GPU_ARCHS};75;80")
endif()

else()
# This is being built for an x86 or x86_64 architecture
set(GPU_ARCHS "60")
if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9))
set(GPU_ARCHS "${GPU_ARCHS};70")
endif()
if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10))
set(GPU_ARCHS "${GPU_ARCHS};75")
endif()
if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11))
set(GPU_ARCHS "${GPU_ARCHS};80")
endif()

endif()

endif()
message("GPU_ARCHS = ${GPU_ARCHS}")

foreach(arch ${GPU_ARCHS})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${arch},code=sm_${arch}")
endforeach()


set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr")

# set warnings as errors
# TODO: remove `no-maybe-unitialized` used to suppress warnings in rmm::exec_policy
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror cross-execution-space-call -Xcompiler -Wall,-Werror,-Wno-error=deprecated-declarations")

# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF)
if (CMAKE_CUDA_LINEINFO)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo")
endif(CMAKE_CUDA_LINEINFO)

# Debug options
if(CMAKE_BUILD_TYPE MATCHES Debug)
message(STATUS "Building with debugging flags")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -Xcompiler -rdynamic")
endif(CMAKE_BUILD_TYPE MATCHES Debug)

option(BUILD_TESTS "Configure CMake to build tests"
ON)

Expand All @@ -146,11 +90,7 @@ endif(CUDA_STATIC_RUNTIME)
###################################################################################################
# - cmake modules ---------------------------------------------------------------------------------

set(CMAKE_MODULE_PATH
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/"
"${PROJECT_SOURCE_DIR}/../../../../cpp/cmake/Modules/"
${CMAKE_MODULE_PATH})

include(ConfigureCUDA)
include(FeatureSummary)
include(CheckIncludeFiles)
include(CheckLibraryExists)
Expand Down
2 changes: 0 additions & 2 deletions java/src/main/native/src/ColumnVectorJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeList(JNIEnv *env, j
if (children.size() == 0) {
// special case because cudf::interleave_columns does not support no columns
auto offsets = cudf::make_column_from_scalar(*zero, row_count + 1);
cudf::type_id n_type = static_cast<cudf::type_id>(j_type);
cudf::data_type n_data_type = cudf::jni::make_data_type(j_type, scale);
auto empty_col = cudf::make_empty_column(n_data_type);
ret = cudf::make_lists_column(row_count, std::move(offsets), std::move(empty_col),
Expand Down Expand Up @@ -308,7 +307,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_makeEmptyCudfColumn(JNI

try {
cudf::jni::auto_set_device(env);
cudf::type_id n_type = static_cast<cudf::type_id>(j_type);
cudf::data_type n_data_type = cudf::jni::make_data_type(j_type, scale);

std::unique_ptr<cudf::column> column(cudf::make_empty_column(n_data_type));
Expand Down
8 changes: 4 additions & 4 deletions java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_slice(JNIEnv *env, j
std::vector<cudf::column_view> result = cudf::slice(*n_column, indices);
cudf::jni::native_jlongArray n_result(env, result.size());
std::vector<std::unique_ptr<cudf::column>> column_result(result.size());
for (int i = 0; i < result.size(); i++) {
for (size_t i = 0; i < result.size(); i++) {
column_result[i].reset(new cudf::column(result[i]));
n_result[i] = reinterpret_cast<jlong>(column_result[i].get());
}
for (int i = 0; i < result.size(); i++) {
for (size_t i = 0; i < result.size(); i++) {
column_result[i].release();
}
return n_result.get_jArray();
Expand Down Expand Up @@ -418,11 +418,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ColumnView_split(JNIEnv *env, j
std::vector<cudf::column_view> result = cudf::split(*n_column, indices);
cudf::jni::native_jlongArray n_result(env, result.size());
std::vector<std::unique_ptr<cudf::column>> column_result(result.size());
for (int i = 0; i < result.size(); i++) {
for (size_t i = 0; i < result.size(); i++) {
column_result[i].reset(new cudf::column(result[i]));
n_result[i] = reinterpret_cast<jlong>(column_result[i].get());
}
for (int i = 0; i < result.size(); i++) {
for (size_t i = 0; i < result.size(); i++) {
column_result[i].release();
}
return n_result.get_jArray();
Expand Down
39 changes: 15 additions & 24 deletions java/src/main/native/src/TableJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ class jni_writer_data_sink final : public cudf::io::data_sink {

void host_write(void const *data, size_t size) override {
JNIEnv *env = cudf::jni::get_jni_env(jvm);
size_t left_to_copy = size;
long left_to_copy = static_cast<long>(size);
const char *copy_from = static_cast<const char *>(data);
while (left_to_copy > 0) {
long buffer_amount_available = current_buffer_len - current_buffer_written;
Expand All @@ -111,7 +111,7 @@ class jni_writer_data_sink final : public cudf::io::data_sink {

void device_write(void const *gpu_data, size_t size, rmm::cuda_stream_view stream) override {
JNIEnv *env = cudf::jni::get_jni_env(jvm);
size_t left_to_copy = size;
long left_to_copy = static_cast<long>(size);
const char *copy_from = static_cast<const char *>(gpu_data);
while (left_to_copy > 0) {
long buffer_amount_available = current_buffer_len - current_buffer_written;
Expand Down Expand Up @@ -209,7 +209,7 @@ class native_arrow_ipc_writer_handle final {

explicit native_arrow_ipc_writer_handle(const std::vector<std::string> &col_names,
const std::shared_ptr<arrow::io::OutputStream> &sink)
: initialized(false), column_names(col_names), sink(sink), file_name("") {}
: initialized(false), column_names(col_names), file_name(""), sink(sink) {}

bool initialized;
std::vector<std::string> column_names;
Expand Down Expand Up @@ -541,7 +541,7 @@ convert_table_for_return(JNIEnv *env, std::unique_ptr<cudf::table> &table_result
for (int i = 0; i < table_cols; i++) {
outcol_handles[i] = reinterpret_cast<jlong>(ret[i].release());
}
for (int i = 0; i < extra_columns.size(); i++) {
for (size_t i = 0; i < extra_columns.size(); i++) {
outcol_handles[i + table_cols] = reinterpret_cast<jlong>(extra_columns[i].release());
}
return outcol_handles.get_jArray();
Expand All @@ -553,6 +553,7 @@ jlongArray convert_table_for_return(JNIEnv *env, std::unique_ptr<cudf::table> &t
}

namespace {

// Check that window parameters are valid.
bool valid_window_parameters(native_jintArray const &values,
native_jpointerArray<cudf::aggregation> const &ops,
Expand All @@ -562,14 +563,6 @@ bool valid_window_parameters(native_jintArray const &values,
values.size() == preceding.size() && values.size() == following.size();
}

// Check that time-range window parameters are valid.
bool valid_window_parameters(native_jintArray const &values, native_jintArray const &timestamps,
native_jpointerArray<cudf::aggregation> const &ops,
native_jintArray const &min_periods, native_jintArray const &preceding,
native_jintArray const &following) {
return values.size() == timestamps.size() &&
valid_window_parameters(values, ops, min_periods, preceding, following);
}
} // namespace

} // namespace jni
Expand Down Expand Up @@ -927,7 +920,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetBufferBegin(
table_metadata_with_nullability metadata;
metadata.column_nullable = nullability;
metadata.column_names = col_names.as_cpp_vector();
for (size_t i = 0; i < meta_keys.size(); ++i) {
for (auto i = 0; i < meta_keys.size(); ++i) {
metadata.user_data[meta_keys[i].get()] = meta_values[i].get();
}

Expand Down Expand Up @@ -977,7 +970,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeParquetFileBegin(
table_metadata_with_nullability metadata;
metadata.column_nullable = nullability;
metadata.column_names = col_names.as_cpp_vector();
for (size_t i = 0; i < meta_keys.size(); ++i) {
for (int i = 0; i < meta_keys.size(); ++i) {
metadata.user_data[meta_keys[i].get()] = meta_values[i].get();
}
cudf::jni::native_jintArray precisions(env, j_precisions);
Expand Down Expand Up @@ -1106,7 +1099,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCBufferBegin(
table_metadata_with_nullability metadata;
metadata.column_nullable = nullability;
metadata.column_names = col_names.as_cpp_vector();
for (size_t i = 0; i < meta_keys.size(); ++i) {
for (int i = 0; i < meta_keys.size(); ++i) {
metadata.user_data[meta_keys[i].get()] = meta_values[i].get();
}

Expand Down Expand Up @@ -1149,7 +1142,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(
table_metadata_with_nullability metadata;
metadata.column_nullable = nullability;
metadata.column_names = col_names.as_cpp_vector();
for (size_t i = 0; i < meta_keys.size(); ++i) {
for (int i = 0; i < meta_keys.size(); ++i) {
metadata.user_data[meta_keys[i].get()] = meta_values[i].get();
}

Expand Down Expand Up @@ -1605,7 +1598,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_concatenate(JNIEnv *env,
cudf::jni::auto_set_device(env);
cudf::jni::native_jpointerArray<cudf::table_view> tables(env, table_handles);

long unsigned int num_tables = tables.size();
int num_tables = tables.size();
// There are some issues with table_view and std::vector. We cannot give the
// vector a size or it will not compile.
std::vector<cudf::table_view> to_concat;
Expand Down Expand Up @@ -1635,7 +1628,6 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(JNIEnv *env
cudf::jni::auto_set_device(env);
cudf::table_view *n_input_table = reinterpret_cast<cudf::table_view *>(input_table);
cudf::jni::native_jintArray n_columns_to_hash(env, columns_to_hash);
int n_number_of_partitions = static_cast<int>(number_of_partitions);
cudf::jni::native_jintArray n_output_offsets(env, output_offsets);

JNI_ARG_CHECK(env, n_columns_to_hash.size() > 0, "columns_to_hash is zero", NULL);
Expand All @@ -1648,7 +1640,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_hashPartition(JNIEnv *env
std::pair<std::unique_ptr<cudf::table>, std::vector<cudf::size_type>> result =
cudf::hash_partition(*n_input_table, columns_to_hash_vec, number_of_partitions);

for (int i = 0; i < result.second.size(); i++) {
for (size_t i = 0; i < result.second.size(); i++) {
n_output_offsets[i] = result.second[i];
}

Expand All @@ -1668,12 +1660,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_roundRobinPartition(
try {
cudf::jni::auto_set_device(env);
auto n_input_table = reinterpret_cast<cudf::table_view *>(input_table);
int n_num_partitions = static_cast<int>(num_partitions);
cudf::jni::native_jintArray n_output_offsets(env, output_offsets);

auto result = cudf::round_robin_partition(*n_input_table, num_partitions, start_partition);

for (int i = 0; i < result.second.size(); i++) {
for (size_t i = 0; i < result.second.size(); i++) {
n_output_offsets[i] = result.second[i];
}

Expand Down Expand Up @@ -1859,8 +1850,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_bound(JNIEnv *env, jclass, jlo

JNI_ARG_CHECK(env, (column_desc_flags.size() == column_null_orders.size()),
"null-order and sort-order size mismatch", 0);
uint32_t num_columns = column_null_orders.size();
for (int i = 0; i < num_columns; i++) {
size_t num_columns = column_null_orders.size();
for (size_t i = 0; i < num_columns; i++) {
column_desc_flags[i] = n_desc_flags[i] ? cudf::order::DESCENDING : cudf::order::ASCENDING;
column_null_orders[i] =
n_are_nulls_smallest[i] ? cudf::null_order::BEFORE : cudf::null_order::AFTER;
Expand Down Expand Up @@ -1894,7 +1885,7 @@ JNIEXPORT jobjectArray JNICALL Java_ai_rapids_cudf_Table_contiguousSplit(JNIEnv
std::vector<cudf::packed_table> result = cudf::contiguous_split(*n_table, indices);
cudf::jni::native_jobjectArray<jobject> n_result =
cudf::jni::contiguous_table_array(env, result.size());
for (int i = 0; i < result.size(); i++) {
for (size_t i = 0; i < result.size(); i++) {
n_result.set(i, cudf::jni::contiguous_table_from(env, result[i].data,
result[i].table.num_rows()));
}
Expand Down

0 comments on commit 3c8b831

Please sign in to comment.