Pin cudf dependencies during build (#1901)

* Support cudf pin with rapids-cmake (#1877) * Use rapids-cmake to pin cudf dependencies to known working SHA1's * Use git download rapids-cmake So that we can get SHA Use same format of CopyRight Use cudf/rapids_config.cmake which is renamed from fetch_rapids.cmake Update versions to latest Signed-off-by: Gary Shen <[email protected]> * Commit cudf to the latest code before mvn verify Signed-off-by: Gary Shen <[email protected]> * Remove the first commit of cudf since the second one can commit it at once Signed-off-by: Gary Shen <[email protected]> --------- Signed-off-by: Gary Shen <[email protected]> Co-authored-by: Robert Maynard <[email protected]> * Update cudf pin versions Signed-off-by: Jason Lowe <[email protected]> * Update pin versions Signed-off-by: Jason Lowe <[email protected]> --------- Signed-off-by: Gary Shen <[email protected]> Signed-off-by: Jason Lowe <[email protected]> Co-authored-by: Gary Shen <[email protected]> Co-authored-by: Robert Maynard <[email protected]>
NVIDIA · Apr 2, 2024 · fa7c202 · fa7c202
1 parent 7559c88
commit fa7c202
Show file tree

Hide file tree

Showing 7 changed files with 255 additions and 2 deletions.
diff --git a/build-libcudf.xml b/build-libcudf.xml
@@ -40,6 +40,7 @@
       <arg value="-DBUILD_TESTS=OFF"/>
       <arg value="-DCMAKE_CUDA_ARCHITECTURES=${GPU_ARCHS}"/>
       <arg value="-DCMAKE_INSTALL_PREFIX=${libcudf.install.path}"/>
+      <arg value="-DCUDF_DEPENDENCY_PIN_MODE=${libcudf.dependency.mode}"/>
       <arg value="-DCUDA_STATIC_RUNTIME=ON"/>
       <arg value="-DCUDF_ENABLE_ARROW_S3=OFF"/>
       <arg value="-DCUDF_ENABLE_ARROW_PARQUET=ON"/>
@@ -48,6 +49,7 @@
       <arg value="-DLIBCUDF_LOGGING_LEVEL=${RMM_LOGGING_LEVEL}" />
       <arg value="-DRMM_LOGGING_LEVEL=${RMM_LOGGING_LEVEL}" />
       <arg value="-DUSE_GDS=${USE_GDS}" />
+      <arg value="-C=${cudf.pin.path}/setup.cmake"/>
     </exec>
 
     <exec dir="${libcudf.build.path}"

diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh
@@ -65,8 +65,6 @@ fi
 
 echo "Try update cudf submodule to ${cudf_sha}..."
 git add .
-git diff-index --quiet HEAD || git commit -s -m "Update submodule cudf to ${CUDF_TAG:-$cudf_sha}"
-sha=$(git rev-parse HEAD)
 
 echo "Test against ${cudf_sha}..."
 
@@ -75,6 +73,7 @@ set +e
 ${MVN} verify ${MVN_MIRROR} \
   -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
   -Dlibcudf.build.configure=true \
+  -Dlibcudf.dependency.mode=latest \
   -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \
   -DBUILD_TESTS=ON \
   -DUSE_SANITIZER=ON
@@ -89,6 +88,18 @@ else
   echo "Test failed, will update the result"
 fi
 
+# Extract the rapids-cmake sha1 that we need to pin too
+rapids_cmake_sha=$(git -C thirdparty/cudf/cpp/build/_deps/rapids-cmake-src/ rev-parse HEAD)
+echo "Update rapids-cmake pinned SHA1 to ${rapids_cmake_sha}"
+echo "${rapids_cmake_sha}" > thirdparty/cudf-pins/rapids-cmake.sha
+
+# Do the git add after the build so that we get
+# the updated versions.json generated by the build
+echo "Update cudf submodule to ${cudf_sha} with updated pinned versions"
+git add .
+git diff-index --quiet HEAD || git commit -s -m "Update submodule cudf to ${cudf_sha}"
+sha=$(git rev-parse HEAD)
+
 # push the intermediate branch and create PR against REF
 # if test passed, it will try auto-merge the PR
 # if test failed, it will only comment the test result in the PR

diff --git a/pom.xml b/pom.xml
@@ -89,12 +89,14 @@
     <cuda.version>cuda11</cuda.version>
     <jni.classifier>${cuda.version}</jni.classifier>
     <cudf.path>${project.basedir}/thirdparty/cudf</cudf.path>
+    <cudf.pin.path>${project.basedir}/thirdparty/cudf-pins/</cudf.pin.path>
     <hadoop.version>3.2.4</hadoop.version>
     <junit.version>5.8.1</junit.version>
     <libcudf.build.path>${cudf.path}/cpp/build</libcudf.build.path>
     <libcudf.build.configure>false</libcudf.build.configure>
     <libcudf.clean.skip>true</libcudf.clean.skip>
     <libcudf.install.path>${project.build.directory}/libcudf-install</libcudf.install.path>
+    <libcudf.dependency.mode>pinned</libcudf.dependency.mode>
     <libcudfjni.build.path>${project.build.directory}/libcudfjni</libcudfjni.build.path>
     <maven.compiler.source>1.8</maven.compiler.source>
     <maven.compiler.target>1.8</maven.compiler.target>

diff --git a/thirdparty/cudf-pins/add_dependency_pins.cmake b/thirdparty/cudf-pins/add_dependency_pins.cmake
@@ -0,0 +1,37 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if(NOT DEFINED rapids-cmake-dir)
+  include(../cudf/rapids_config.cmake)
+endif()
+
+include(rapids-cpm)
+rapids_cpm_init()
+
+function(add_override_if_requested)
+  if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned)
+    include(${rapids-cmake-dir}/cpm/package_override.cmake)
+    rapids_cpm_package_override(${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)
+
+    message(STATUS "Pinning CUDF dependencies to values found in ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json")
+  else()
+    include(${rapids-cmake-dir}/cpm/generate_pinned_versions.cmake)
+    rapids_cpm_generate_pinned_versions(OUTPUT ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)
+
+    message(STATUS "Building with latest CUDF dependencies (saving pinned versions to ${CMAKE_CURRENT_FUNCTION_LIST_DIR}/versions.json)")
+  endif()
+endfunction()
+add_override_if_requested()
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -0,0 +1 @@
+096ae3c0a6b2c593f8fdb38468be527027bf79d7
diff --git a/thirdparty/cudf-pins/setup.cmake b/thirdparty/cudf-pins/setup.cmake
@@ -0,0 +1,45 @@
+#
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+string(TOLOWER "${CUDF_DEPENDENCY_PIN_MODE}" CUDF_DEPENDENCY_PIN_MODE)
+if(NOT (CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned OR
+        CUDF_DEPENDENCY_PIN_MODE STREQUAL latest))
+  message(FATAL_ERROR "The CUDF_DEPENDENCY_PIN_MODE variable must be set to either `pinned` or `latest`.")
+ endif()
+
+function(set_rapids_cmake_pin_sha1)
+  set(rapids-cmake-sha "${rapids-cmake-sha}" PARENT_SCOPE)
+
+  message(STATUS "Pinning rapids-cmake SHA1 to ${rapids-cmake-sha}")
+endfunction()
+
+# We need to set the rapids-cmake SHA1 before any CMake code in libcudf is executed when
+# we are in pin mode. Otherwise we will use the latest rapids-cmake version since that
+# is what cudf does via `fetch_rapids.cmake`
+if(CUDF_DEPENDENCY_PIN_MODE STREQUAL pinned)
+  # Extract the rapids sha1 from the file
+  file(READ "${CMAKE_CURRENT_LIST_DIR}/rapids-cmake.sha" rapids-cmake-sha)
+  string(STRIP rapids-cmake-sha "${rapids-cmake-sha}")
+  string(REPLACE "\n" "" rapids-cmake-sha "${rapids-cmake-sha}")
+  set(rapids-cmake-sha "${rapids-cmake-sha}" CACHE STRING "rapids-cmake sha to use" FORCE)
+  message(STATUS "Pinning rapids-cmake SHA1 [${rapids-cmake-sha}]")
+else()
+  set(rapids-cmake-fetch-via-git "ON" CACHE STRING "Make sure rapids-cmake is cloned so we can get SHA value" FORCE)
+endif()
+
+# We need to use a project() call hook, since rapids-cmake cpm_init()
+# can't be called from a `-C` CMake file
+set(CMAKE_PROJECT_TOP_LEVEL_INCLUDES "${CMAKE_CURRENT_LIST_DIR}/add_dependency_pins.cmake" CACHE FILEPATH "" )
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
@@ -0,0 +1,155 @@
+{
+  "packages" : 
+  {
+    "Arrow" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "740889f413af9b1ae1d81eb1e5a4a9fb4ce9cf97",
+      "git_url" : "https://github.com/apache/arrow.git",
+      "version" : "14.0.2"
+    },
+    "CCCL" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "36f379f29660761fe033a1306ca9dab6a88cb65c",
+      "git_url" : "https://github.com/NVIDIA/cccl.git",
+      "patches" : 
+      [
+        {
+          "file" : "cccl/bug_fixes.diff",
+          "fixed_in" : "2.3",
+          "issue" : "CCCL installs header-search.cmake files in nondeterministic order and has a typo in checking target creation that leads to duplicates"
+        },
+        {
+          "file" : "cccl/hide_kernels.diff",
+          "fixed_in" : "2.3",
+          "issue" : "Mark all cub and thrust kernels with hidden visibility [https://github.com/nvidia/cccl/pulls/443]"
+        },
+        {
+          "file" : "cccl/revert_pr_211.diff",
+          "fixed_in" : "",
+          "issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue."
+        },
+        {
+          "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
+          "fixed_in" : "",
+          "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]"
+        },
+        {
+          "file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
+          "fixed_in" : "",
+          "issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]"
+        },
+        {
+          "file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
+          "fixed_in" : "",
+          "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]"
+        }
+      ],
+      "version" : "2.2.0"
+    },
+    "GTest" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "b796f7d44681514f58a683a3a71ff17c94edb0c1",
+      "git_url" : "https://github.com/google/googletest.git",
+      "version" : "1.13.0"
+    },
+    "KvikIO" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "c98eabbad1f60dfe640d03f03a2df58b23f3e167",
+      "git_url" : "https://github.com/rapidsai/kvikio.git",
+      "version" : "24.04"
+    },
+    "NVTX3" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "e170594ac7cf1dac584da473d4ca9301087090c1",
+      "git_url" : "https://github.com/NVIDIA/NVTX.git",
+      "version" : "3.1.0"
+    },
+    "cuco" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "56c53beb6fb0cafd265b7fcc3df78ae487811b22",
+      "git_url" : "https://github.com/NVIDIA/cuCollections.git",
+      "version" : "0.0.1"
+    },
+    "dlpack" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "365b823cedb281cd0240ca601aba9b78771f91a3",
+      "git_url" : "https://github.com/dmlc/dlpack.git",
+      "version" : "0.8"
+    },
+    "fmt" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "f5e54359df4c26b6230fc61d38aa294581393084",
+      "git_url" : "https://github.com/fmtlib/fmt.git",
+      "patches" : 
+      [
+        {
+          "file" : "fmt/fix_10_1_1_version.diff",
+          "fixed_in" : "10.2.0",
+          "issue" : "fmt 10.1.1 produces a CMake package with version 10.1.0"
+        }
+      ],
+      "version" : "10.1.1"
+    },
+    "jitify" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "047df5e87d84834f8f4225898476145741acfa80",
+      "git_url" : "https://github.com/rapidsai/jitify.git",
+      "version" : "2.0.0"
+    },
+    "nvcomp" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "v2.2.0",
+      "git_url" : "https://github.com/NVIDIA/nvcomp.git",
+      "proprietary_binary" : 
+      {
+        "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-major}.x.tgz",
+        "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-major}.x.tgz"
+      },
+      "version" : "3.0.6"
+    },
+    "rmm" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "0651edf0fce5ebf53528382b475fc29a2f3afa67",
+      "git_url" : "https://github.com/rapidsai/rmm.git",
+      "version" : "24.04"
+    },
+    "spdlog" : 
+    {
+      "always_download" : true,
+      "git_shallow" : false,
+      "git_tag" : "7e635fca68d014934b4af8a1cf874f63989352b7",
+      "git_url" : "https://github.com/gabime/spdlog.git",
+      "patches" : 
+      [
+        {
+          "file" : "spdlog/nvcc_constexpr_fix.diff",
+          "fixed_in" : "1.13",
+          "issue" : "Fix constexpr mismatch between spdlog and fmt [https://github.com/gabime/spdlog/issues/2856]"
+        }
+      ],
+      "version" : "1.12.0"
+    }
+  }
+}