Skip to content

Commit

Permalink
use libucx wheels
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed May 6, 2024
1 parent 03c864b commit 1b7399a
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 86 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ dask-worker-space
__pytestcache__
__pycache__
*.egg-info/
final_dist/
dist/
.vscode

*.sw[po]

*.whl
95 changes: 10 additions & 85 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,91 +40,16 @@ if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
fi


python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check

mkdir -p final_dist
python -m auditwheel repair -w final_dist dist/*

# Auditwheel rewrites dynamic libraries that are referenced at link time in the
# package. However, UCX loads a number of sub-libraries at runtime via dlopen;
# these are not picked up by auditwheel. Since we have a priori knowledge of
# what these libraries are, we mimic the behaviour of auditwheel by using the
# same hash-based uniqueness scheme and rewriting the link paths.

WHL=$(realpath final_dist/${underscore_package_name}*manylinux*.whl)

# first grab the auditwheel hashes for libuc{tms}
LIBUCM=$(unzip -l $WHL | awk 'match($4, /libucm-[^\.]+\./) { print substr($4, RSTART) }')
LIBUCT=$(unzip -l $WHL | awk 'match($4, /libuct-[^\.]+\./) { print substr($4, RSTART) }')
LIBUCS=$(unzip -l $WHL | awk 'match($4, /libucs-[^\.]+\./) { print substr($4, RSTART) }')

# Extract the libraries that have already been patched in by auditwheel
mkdir -p repair_dist/${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx
unzip $WHL "${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs/*.so*" -d repair_dist/

# Patch the RPATH to include ORIGIN for each library
pushd repair_dist/${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs
for f in libu*.so*
do
if [[ -f $f ]]; then
patchelf --add-rpath '$ORIGIN' $f
fi
done

popd

# Now copy in all the extra libraries that are only ever loaded at runtime
pushd repair_dist/${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx
if [[ -d /usr/lib64/ucx ]]; then
cp -P /usr/lib64/ucx/* .
elif [[ -d /usr/lib/ucx ]]; then
cp -P /usr/lib/ucx/* .
else
echo "Could not find ucx libraries"
exit 1
fi

# we link against <python>/lib/site-packages/${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.lib/libuc{ptsm}
# we also amend the rpath to search one directory above to *find* libuc{tsm}
for f in libu*.so*
do
# Avoid patching symlinks, which is redundant
if [[ ! -L $f ]]; then
patchelf --replace-needed libuct.so.0 $LIBUCT $f
patchelf --replace-needed libucs.so.0 $LIBUCS $f
patchelf --replace-needed libucm.so.0 $LIBUCM $f
patchelf --add-rpath '$ORIGIN/..' $f
fi
done

# Bring in cudart as well. To avoid symbol collision with other libraries e.g.
# cupy we mimic auditwheel by renaming the libraries to include the hashes of
# their names. Since there will typically be a chain of symlinks
# libcudart.so->libcudart.so.X->libcudart.so.X.Y.Z we need to follow the chain
# and rename all of them.

find /usr/local/cuda/ -name "libcudart*.so*" | xargs cp -P -t .
src=libcudart.so
hash=$(sha256sum ${src} | awk '{print substr($1, 0, 8)}')
target=$(basename $(readlink -f ${src}))

mv ${target} ${target/libcudart/libcudart-${hash}}
while readlink ${src} > /dev/null; do
target=$(readlink ${src})
ln -s ${target/libcudart/libcudart-${hash}} ${src/libcudart/libcudart-${hash}}
rm -f ${src}
src=${target}
done

to_rewrite=$(ldd libuct_cuda.so | awk '/libcudart/ { print $1 }')
patchelf --replace-needed ${to_rewrite} libcudart-${hash}.so libuct_cuda.so
patchelf --add-rpath '$ORIGIN' libuct_cuda.so

popd

pushd repair_dist
zip -r $WHL ${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs/
popd

RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
python -m auditwheel repair \
-w final_dist \
--exclude "libucm.so.0" \
--exclude "libucp.so.0" \
--exclude "libucx.so.0" \
--exclude "libucs.so.0" \
--exclude "libuct.so.0" \
dist/*

#RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
20 changes: 20 additions & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ files:
table: project
includes:
- run
- depends_on_ucx_run
py_optional_test:
output: pyproject
pyproject_dir: .
Expand Down Expand Up @@ -116,9 +117,28 @@ dependencies:
packages:
- numpy>=1.23,<2.0a0
- pynvml>=11.4.1
depends_on_ucx_run:
common:
- output_types: conda
packages:
- ucx>=1.15.0,<1.16
- output_types: requirements
packages:
# pip recognizes the index as a global option for the requirements.txt file
- --extra-index-url=https://pypi.nvidia.com
- --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
specific:
- output_types: [requirements, pyproject]
matrices:
- matrix: {cuda: "12.*"}
packages:
- libucx-cu12>=1.15.0,<1.16
- matrix: {cuda: "11.*"}
packages:
- libucx-cu11>=1.15.0,<1.16
- matrix: null
packages:
- libucx>=1.15.0,<1.16
test_python:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ authors = [
license = { text = "BSD-3-Clause" }
requires-python = ">=3.9"
dependencies = [
"libucx>=1.15.0,<1.16",
"numpy>=1.23,<2.0a0",
"pynvml>=11.4.1",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`.
Expand Down
11 changes: 11 additions & 0 deletions ucp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@
except ImportError:
pynvml = None

# If libucx was installed as a wheel, we must request it to load the library symbols.
# Otherwise, we assume that the library was installed in a system path that ld can find.
try:
import libucx
except ModuleNotFoundError:
pass
else:
libucx.load_library()
del libucx


# Setup UCX-Py logger
logger = get_ucxpy_logger()

Expand Down

0 comments on commit 1b7399a

Please sign in to comment.