Skip to content

Commit

Permalink
Support cudf pin with rapids-cmake (NVIDIA#1877)
Browse files Browse the repository at this point in the history
* Use rapids-cmake to pin cudf dependencies to known working SHA1's

* Use git download rapids-cmake

So that we can get SHA
Use same format of CopyRight
Use cudf/rapids_config.cmake which is renamed from fetch_rapids.cmake
Update versions to latest

Signed-off-by: Gary Shen <[email protected]>

* Commit cudf to the latest code before mvn verify

Signed-off-by: Gary Shen <[email protected]>

* Remove the first commit of cudf

since the second one can commit it at once

Signed-off-by: Gary Shen <[email protected]>

---------

Signed-off-by: Gary Shen <[email protected]>
Co-authored-by: Robert Maynard <[email protected]>
  • Loading branch information
GaryShen2008 and robertmaynard authored Mar 21, 2024
1 parent 5f642ba commit d8771da
Show file tree
Hide file tree
Showing 7 changed files with 211 additions and 3 deletions.
4 changes: 3 additions & 1 deletion build-libcudf.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0"?>
<!--
Copyright (c) 2022, NVIDIA CORPORATION.
Copyright (c) 2022-2024, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,13 +40,15 @@
<arg value="-DBUILD_TESTS=OFF"/>
<arg value="-DCMAKE_CUDA_ARCHITECTURES=${GPU_ARCHS}"/>
<arg value="-DCMAKE_INSTALL_PREFIX=${libcudf.install.path}"/>
<arg value="-DCUDF_DEPENDENCY_PIN_MODE=${libcudf.dependency.mode}"/>
<arg value="-DCUDA_STATIC_RUNTIME=ON"/>
<arg value="-DCUDF_ENABLE_ARROW_S3=OFF"/>
<arg value="-DCUDF_ENABLE_ARROW_PARQUET=ON"/>
<arg value="-DCUDF_USE_ARROW_STATIC=ON"/>
<arg value="-DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${CUDF_USE_PER_THREAD_DEFAULT_STREAM}" />
<arg value="-DRMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL}" />
<arg value="-DUSE_GDS=${USE_GDS}" />
<arg value="-C=${cudf.pin.path}/setup.cmake"/>
</exec>

<exec dir="${libcudf.build.path}"
Expand Down
15 changes: 13 additions & 2 deletions ci/submodule-sync.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ fi

echo "Try update cudf submodule to ${cudf_sha}..."
git add .
git diff-index --quiet HEAD || git commit -s -m "Update submodule cudf to ${CUDF_TAG:-$cudf_sha}"
sha=$(git rev-parse HEAD)

echo "Test against ${cudf_sha}..."

Expand All @@ -75,6 +73,7 @@ set +e
${MVN} verify ${MVN_MIRROR} \
-DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
-Dlibcudf.build.configure=true \
-Dlibcudf.dependency.mode=latest \
-DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \
-DBUILD_TESTS=ON \
-DUSE_SANITIZER=ON
Expand All @@ -89,6 +88,18 @@ else
echo "Test failed, will update the result"
fi

# Extract the rapids-cmake sha1 that we need to pin too
rapids_cmake_sha=$(git -C thirdparty/cudf/cpp/build/_deps/rapids-cmake-src/ rev-parse HEAD)
echo "Update rapids-cmake pinned SHA1 to ${rapids_cmake_sha}"
echo "${rapids_cmake_sha}" > thirdparty/cudf-pins/rapids-cmake.sha

# Do the git add after the build so that we get
# the updated versions.json generated by the build
echo "Update cudf submodule to ${cudf_sha} with updated pinned versions"
git add .
git diff-index --quiet HEAD || git commit -s -m "Update submodule cudf to ${cudf_sha}"
sha=$(git rev-parse HEAD)

# push the intermediate branch and create PR against REF
# if test passed, it will try auto-merge the PR
# if test failed, it will only comment the test result in the PR
Expand Down
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,14 @@
<cuda.version>cuda11</cuda.version>
<jni.classifier>${cuda.version}</jni.classifier>
<cudf.path>${project.basedir}/thirdparty/cudf</cudf.path>
<cudf.pin.path>${project.basedir}/thirdparty/cudf-pins/</cudf.pin.path>
<hadoop.version>3.2.4</hadoop.version>
<junit.version>5.8.1</junit.version>
<libcudf.build.path>${cudf.path}/cpp/build</libcudf.build.path>
<libcudf.build.configure>false</libcudf.build.configure>
<libcudf.clean.skip>true</libcudf.clean.skip>
<libcudf.install.path>${project.build.directory}/libcudf-install</libcudf.install.path>
<libcudf.dependency.mode>pinned</libcudf.dependency.mode>
<libcudfjni.build.path>${project.build.directory}/libcudfjni</libcudfjni.build.path>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
Expand Down
37 changes: 37 additions & 0 deletions thirdparty/cudf-pins/add_dependency_pins.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

if(NOT DEFINED rapids-cmake-dir)
include(../cudf/rapids_config.cmake)
endif()

include(rapids-cpm)
rapids_cpm_init()

function(add_override_if_requested)
if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned)
include(${rapids-cmake-dir}/cpm/package_override.cmake)
rapids_cpm_package_override(${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)

message(STATUS "Pinning CUDF dependencies to values found in ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json")
else()
include(${rapids-cmake-dir}/cpm/generate_pinned_versions.cmake)
rapids_cpm_generate_pinned_versions(OUTPUT ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)

message(STATUS "Building with latest CUDF dependencies (saving pinned versions to ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)")
endif()
endfunction()
add_override_if_requested()
1 change: 1 addition & 0 deletions thirdparty/cudf-pins/rapids-cmake.sha
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
d0670231b614f757645e6b3d4f456fea89aa282a
45 changes: 45 additions & 0 deletions thirdparty/cudf-pins/setup.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

string(TOLOWER "${CUDF_DEPENDENCY_PIN_MODE}" CUDF_DEPENDENCY_PIN_MODE)
if(NOT (CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned OR
CUDF_DEPENDENCY_PIN_MODE STREQUAL latest))
message(FATAL_ERROR "The CUDF_DEPENDENCY_PIN_MODE variable must be set to either `pinned` or `latest`.")
endif()

function(set_rapids_cmake_pin_sha1)
set(rapids-cmake-sha "${rapids-cmake-sha}" PARENT_SCOPE)

message(STATUS "Pinning rapids-cmake SHA1 to ${rapids-cmake-sha}")
endfunction()

# We need to set the rapids-cmake SHA1 before any CMake code in libcudf is executed when
# we are in pin mode. Otherwise we will use the latest rapids-cmake version since that
# is what cudf does via `fetch_rapids.cmake`
if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned)
# Extract the rapids sha1 from the file
file(READ "${CMAKE_CURRENT_LIST_DIR}/rapids-cmake.sha" rapids-cmake-sha)
string(STRIP rapids-cmake-sha "${rapids-cmake-sha}")
string(REPLACE "\n" "" rapids-cmake-sha "${rapids-cmake-sha}")
set(rapids-cmake-sha "${rapids-cmake-sha}" CACHE STRING "rapids-cmake sha to use" FORCE)
message(STATUS "Pinning rapids-cmake SHA1 [${rapids-cmake-sha}]")
else()
set(rapids-cmake-fetch-via-git "ON" CACHE STRING "Make sure rapids-cmake is cloned so we can get SHA value" FORCE)
endif()

# We need to use a project() call hook, since rapids-cmake cpm_init()
# can't be called from a `-C` CMake file
set(CMAKE_PROJECT_TOP_LEVEL_INCLUDES "${CMAKE_CURRENT_LIST_DIR}/add_dependency_pins.cmake" CACHE FILEPATH "" )
110 changes: 110 additions & 0 deletions thirdparty/cudf-pins/versions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
{
"packages" :
{
"Arrow" :
{
"always_download" : true,
"git_shallow" : false,
"git_tag" : "740889f413af9b1ae1d81eb1e5a4a9fb4ce9cf97",
"git_url" : "https://github.com/apache/arrow.git",
"version" : "14.0.2"
},
"CCCL" :
{
"always_download" : true,
"git_shallow" : false,
"git_tag" : "36f379f29660761fe033a1306ca9dab6a88cb65c",
"git_url" : "https://github.com/NVIDIA/cccl.git",
"patches" :
[
{
"file" : "cccl/bug_fixes.diff",
"fixed_in" : "2.3",
"issue" : "CCCL installs header-search.cmake files in nondeterministic order and has a typo in checking target creation that leads to duplicates"
},
{
"file" : "cccl/hide_kernels.diff",
"fixed_in" : "2.3",
"issue" : "Mark all cub and thrust kernels with hidden visibility [https://github.com/nvidia/cccl/pulls/443]"
},
{
"file" : "cccl/revert_pr_211.diff",
"fixed_in" : "",
"issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue."
},
{
"file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
"fixed_in" : "",
"issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]"
},
{
"file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
"fixed_in" : "",
"issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]"
},
{
"file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
"fixed_in" : "",
"issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]"
}
],
"version" : "2.2.0"
},
"GTest" :
{
"always_download" : true,
"git_shallow" : false,
"git_tag" : "b796f7d44681514f58a683a3a71ff17c94edb0c1",
"git_url" : "https://github.com/google/googletest.git",
"version" : "1.13.0"
},
"KvikIO" :
{
"always_download" : true,
"git_shallow" : false,
"git_tag" : "30b1dc1aa9dc8694fc7babb81e33664c56a9b2ff",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.04.0"
},
"NVTX3" :
{
"always_download" : true,
"git_shallow" : false,
"git_tag" : "e170594ac7cf1dac584da473d4ca9301087090c1",
"git_url" : "https://github.com/NVIDIA/NVTX.git",
"version" : "3.1.0"
},
"cuco" :
{
"always_download" : true,
"git_shallow" : false,
"git_tag" : "56c53beb6fb0cafd265b7fcc3df78ae487811b22",
"git_url" : "https://github.com/NVIDIA/cuCollections.git",
"version" : "0.0.1"
},
"dlpack" :
{
"always_download" : true,
"git_shallow" : false,
"git_tag" : "365b823cedb281cd0240ca601aba9b78771f91a3",
"git_url" : "https://github.com/dmlc/dlpack.git",
"version" : "0.8"
},
"jitify" :
{
"always_download" : true,
"git_shallow" : false,
"git_tag" : "047df5e87d84834f8f4225898476145741acfa80",
"git_url" : "https://github.com/rapidsai/jitify.git",
"version" : "2.0.0"
},
"rmm" :
{
"always_download" : true,
"git_shallow" : false,
"git_tag" : "2c161dad4aa732e7a6901ab512aa9a0fac85afc4",
"git_url" : "https://github.com/rapidsai/rmm.git",
"version" : "24.04.0"
}
}
}

0 comments on commit d8771da

Please sign in to comment.