Skip to content

Commit

Permalink
Fix issues with building cudf in a non-conda environment (#7647)
Browse files Browse the repository at this point in the history
Extension to rapidsai/rmm#733

Also resolves: #7578

This PR introduces fixes to `setup.py` in cudf build and also adds a DockerFile with which one can create a container with cudf fully built from source. Arrow & rmm will also be built from the source. This PR also contains a `requirements.txt` which has packages needed by cudf and that are available to install via pip.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Devavret Makkar (https://github.com/devavret)
  - Keith Kraus (https://github.com/kkraus14)
  - Christopher Harris (https://github.com/cwharris)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: #7647
  • Loading branch information
galipremsagar authored Apr 1, 2021
1 parent b9415ca commit f4d5bde
Show file tree
Hide file tree
Showing 26 changed files with 747 additions and 28 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ flag. Below is a list of the available arguments and their purpose:
| `PYARROW_VERSION` | 1.0.1 | Not supported | set pyarrow version |
| `CMAKE_VERSION` | newest | >=3.14 | set cmake version |
| `CYTHON_VERSION` | 0.29 | Not supported | set Cython version |
| `PYTHON_VERSION` | 3.6 | 3.7 | set python version |
| `PYTHON_VERSION` | 3.7 | 3.8 | set python version |

---

Expand Down
3 changes: 1 addition & 2 deletions conda/environments/cudf_dev_cuda10.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies:
- rmm=0.19.*
- cmake>=3.14
- cmake_setuptools>=0.1.3
- python>=3.6,<3.8
- python>=3.7,<3.9
- numba>=0.49.0,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
Expand All @@ -36,7 +36,6 @@ dependencies:
- pandoc=<2.0.0
- cudatoolkit=10.1
- pip
- partd
- flake8=3.8.3
- black=19.10
- isort=5.0.7
Expand Down
3 changes: 1 addition & 2 deletions conda/environments/cudf_dev_cuda10.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies:
- rmm=0.19.*
- cmake>=3.14
- cmake_setuptools>=0.1.3
- python>=3.6,<3.8
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
Expand All @@ -36,7 +36,6 @@ dependencies:
- pandoc=<2.0.0
- cudatoolkit=10.2
- pip
- partd
- flake8=3.8.3
- black=19.10
- isort=5.0.7
Expand Down
3 changes: 1 addition & 2 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies:
- rmm=0.19.*
- cmake>=3.14
- cmake_setuptools>=0.1.3
- python>=3.6,<3.8
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
Expand All @@ -36,7 +36,6 @@ dependencies:
- pandoc=<2.0.0
- cudatoolkit=11.0
- pip
- partd
- flake8=3.8.3
- black=19.10
- isort=5.0.7
Expand Down
67 changes: 67 additions & 0 deletions conda/environments/cudf_dev_cuda11.1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

name: cudf_dev
channels:
- rapidsai
- nvidia
- rapidsai-nightly
- conda-forge
- defaults
dependencies:
- clang=8.0.1
- clang-tools=8.0.1
- cupy>7.1.0,<9.0.0a0
- rmm=0.19.*
- cmake>=3.14
- cmake_setuptools>=0.1.3
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
- fsspec>=0.6.0
- pytest
- pytest-benchmark
- pytest-xdist
- sphinx
- sphinx_rtd_theme
- sphinxcontrib-websupport
- nbsphinx
- numpydoc
- ipython
- recommonmark
- pandoc=<2.0.0
- cudatoolkit=11.1
- pip
- flake8=3.8.3
- black=19.10
- isort=5.0.7
- mypy=0.782
- typing_extensions
- pre_commit
- dask>=2021.3.1
- distributed>=2.22.0
- streamz
- dlpack
- arrow-cpp=1.0.1
- arrow-cpp-proc * cuda
- boost-cpp>=1.72.0
- double-conversion
- rapidjson
- flatbuffers
- hypothesis
- sphinx-markdown-tables
- sphinx-copybutton
- mimesis
- packaging
- protobuf
- nvtx>=0.2.1
- cachetools
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
- git+https://github.com/python-streamz/streamz.git
- pyorc
67 changes: 67 additions & 0 deletions conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

name: cudf_dev
channels:
- rapidsai
- nvidia
- rapidsai-nightly
- conda-forge
- defaults
dependencies:
- clang=8.0.1
- clang-tools=8.0.1
- cupy>7.1.0,<9.0.0a0
- rmm=0.19.*
- cmake>=3.14
- cmake_setuptools>=0.1.3
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
- fsspec>=0.6.0
- pytest
- pytest-benchmark
- pytest-xdist
- sphinx
- sphinx_rtd_theme
- sphinxcontrib-websupport
- nbsphinx
- numpydoc
- ipython
- recommonmark
- pandoc=<2.0.0
- cudatoolkit=11.2
- pip
- flake8=3.8.3
- black=19.10
- isort=5.0.7
- mypy=0.782
- typing_extensions
- pre_commit
- dask>=2021.3.1
- distributed>=2.22.0
- streamz
- dlpack
- arrow-cpp=1.0.1
- arrow-cpp-proc * cuda
- boost-cpp>=1.72.0
- double-conversion
- rapidjson
- flatbuffers
- hypothesis
- sphinx-markdown-tables
- sphinx-copybutton
- mimesis
- packaging
- protobuf
- nvtx>=0.2.1
- cachetools
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
- git+https://github.com/python-streamz/streamz.git
- pyorc
4 changes: 4 additions & 0 deletions cpp/cmake/Modules/StringifyJITHeaders.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ add_custom_command(WORKING_DIRECTORY ${CUDF_SOURCE_DIR}/include
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/timestamps.hpp.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/fixed_point.hpp.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/durations.hpp.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/assert.cuh.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/libcudacxx/cuda/std/chrono.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/libcudacxx/cuda/std/climits.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/libcudacxx/cuda/std/cstddef.jit
Expand Down Expand Up @@ -65,6 +66,7 @@ add_custom_command(WORKING_DIRECTORY ${CUDF_SOURCE_DIR}/include
${CUDF_SOURCE_DIR}/include/cudf/wrappers/timestamps.hpp
${CUDF_SOURCE_DIR}/include/cudf/fixed_point/fixed_point.hpp
${CUDF_SOURCE_DIR}/include/cudf/wrappers/durations.hpp
${CUDF_SOURCE_DIR}/include/cudf/detail/utilities/assert.cuh
${LIBCUDACXX_INCLUDE_DIR}/cuda/std/chrono
${LIBCUDACXX_INCLUDE_DIR}/cuda/std/climits
${LIBCUDACXX_INCLUDE_DIR}/cuda/std/cstddef
Expand Down Expand Up @@ -99,6 +101,7 @@ add_custom_command(WORKING_DIRECTORY ${CUDF_SOURCE_DIR}/include
COMMAND ${CUDF_BINARY_DIR}/stringify cudf/wrappers/timestamps.hpp > ${CUDF_GENERATED_INCLUDE_DIR}/include/jit/timestamps.hpp.jit
COMMAND ${CUDF_BINARY_DIR}/stringify cudf/fixed_point/fixed_point.hpp > ${CUDF_GENERATED_INCLUDE_DIR}/include/jit/fixed_point.hpp.jit
COMMAND ${CUDF_BINARY_DIR}/stringify cudf/wrappers/durations.hpp > ${CUDF_GENERATED_INCLUDE_DIR}/include/jit/durations.hpp.jit
COMMAND ${CUDF_BINARY_DIR}/stringify cudf/detail/utilities/assert.cuh > ${CUDF_GENERATED_INCLUDE_DIR}/include/jit/assert.cuh.jit
COMMAND ${CUDF_BINARY_DIR}/stringify ${LIBCUDACXX_INCLUDE_DIR}/cuda/std/chrono cuda_std_chrono > ${CUDF_GENERATED_INCLUDE_DIR}/include/jit/libcudacxx/cuda/std/chrono.jit
COMMAND ${CUDF_BINARY_DIR}/stringify ${LIBCUDACXX_INCLUDE_DIR}/cuda/std/climits cuda_std_climits > ${CUDF_GENERATED_INCLUDE_DIR}/include/jit/libcudacxx/cuda/std/climits.jit
COMMAND ${CUDF_BINARY_DIR}/stringify ${LIBCUDACXX_INCLUDE_DIR}/cuda/std/cstddef cuda_std_cstddef > ${CUDF_GENERATED_INCLUDE_DIR}/include/jit/libcudacxx/cuda/std/cstddef.jit
Expand Down Expand Up @@ -133,6 +136,7 @@ add_custom_target(stringify_run DEPENDS
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/timestamps.hpp.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/fixed_point.hpp.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/durations.hpp.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/assert.cuh.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/libcudacxx/cuda/std/chrono.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/libcudacxx/cuda/std/climits.jit
${CUDF_GENERATED_INCLUDE_DIR}/include/jit/libcudacxx/cuda/std/cstddef.jit
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <jit/parser.h>
#include <jit/type.h>

#include <jit/assert.cuh.jit>
#include <jit/bit.hpp.jit>
#include <jit/common_headers.hpp>
#include <jit/durations.hpp.jit>
Expand Down Expand Up @@ -86,7 +87,8 @@ const std::vector<std::string> header_names{"operation.h",
cudf_utilities_bit_hpp,
cudf_wrappers_timestamps_hpp,
cudf_wrappers_durations_hpp,
cudf_fixed_point_fixed_point_hpp};
cudf_fixed_point_fixed_point_hpp,
cudf_detail_utilities_assert_cuh};

std::istream* headers_code(std::string filename, std::iostream& stream)
{
Expand Down
80 changes: 80 additions & 0 deletions docker_build/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

ARG CUDA_VERSION=11.2.2
FROM nvidia/cuda:${CUDA_VERSION}-devel
ENV CUDA_SHORT_VERSION=11.2

SHELL ["/bin/bash", "-c"]
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/lib:/repos/dist/lib

ENV DEBIAN_FRONTEND=noninteractive

ENV CUDA_HOME=/usr/local/cuda
ENV CUDA_PATH=$CUDA_HOME
ENV PATH=${CUDA_HOME}/lib64/:${PATH}:${CUDA_HOME}/bin

# Build env variables for arrow
ENV CMAKE_BUILD_TYPE=release
ENV PYARROW_WITH_PARQUET=1
ENV PYARROW_WITH_CUDA=1
ENV PYARROW_WITH_ORC=1
ENV PYARROW_WITH_DATASET=1

ENV ARROW_HOME=/repos/dist

# Build env variables for rmm
ENV INSTALL_PREFIX=/usr


RUN apt update -y --fix-missing && \
apt upgrade -y && \
apt install -y --no-install-recommends software-properties-common && \
add-apt-repository ppa:deadsnakes/ppa && \
apt update -y --fix-missing

RUN apt install -y --no-install-recommends \
git \
libboost-all-dev \
python3.8-dev \
build-essential \
autoconf \
bison \
flex \
libboost-filesystem-dev \
libboost-system-dev \
libboost-regex-dev \
libjemalloc-dev \
wget \
libssl-dev \
protobuf-compiler && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1 && \
wget https://bootstrap.pypa.io/get-pip.py && \
python get-pip.py

# Install cmake
RUN version=3.18 && build=5 && mkdir ~/temp && cd ~/temp && wget https://cmake.org/files/v$version/cmake-$version.$build.tar.gz && \
tar -xzvf cmake-$version.$build.tar.gz && cd cmake-$version.$build/ && ./bootstrap && make -j$(nproc) && make install

# Install arrow from source
RUN git clone https://github.com/apache/arrow.git /repos/arrow && mkdir /repos/dist/ && cd /repos/arrow && git checkout apache-arrow-1.0.1 && git submodule init && \
git submodule update && export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data" && export ARROW_TEST_DATA="${PWD}/testing/data" && \
cd /repos/arrow/cpp && mkdir release && cd /repos/arrow/cpp/release && pip install -r /repos/arrow/python/requirements-build.txt && \
cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME -DCMAKE_INSTALL_LIBDIR=lib -DARROW_FLIGHT=ON -DARROW_GANDIVA=OFF -DARROW_ORC=ON -DARROW_WITH_BZ2=ON -DARROW_WITH_ZLIB=ON -DARROW_WITH_ZSTD=ON -DARROW_WITH_LZ4=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_BROTLI=ON -DARROW_PARQUET=ON -DARROW_PYTHON=ON -DARROW_PLASMA=ON -DARROW_BUILD_TESTS=ON -DARROW_CUDA=ON -DARROW_DATASET=ON .. && \
make -j$(nproc) && make install && cd /repos/arrow/python/ && python setup.py build_ext --build-type=release bdist_wheel && pip install /repos/arrow/python/dist/*.whl


# Install rmm from source
RUN cd /repos/ && git clone https://github.com/rapidsai/rmm.git && cd /repos/rmm/ && ./build.sh librmm && pip install /repos/rmm/python/.

ADD . /repos/cudf/

# Build env for CUDF build
ENV CUDF_ROOT=/repos/cudf/cpp/build/

# Install cudf from source
RUN cd /repos/cudf/ && git submodule update --init --recursive && ./build.sh libcudf && \
pip install /repos/cudf/python/cudf/.

29 changes: 29 additions & 0 deletions python/cudf/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

[build-system]

requires = [
"wheel",
"setuptools",
"cython>=0.29,<0.30",
]

[tool.black]
line-length = 79
target-version = ["py36"]
include = '\.py?$'
exclude = '''
/(
thirdparty |
\.eggs |
\.git |
\.hg |
\.mypy_cache |
\.tox |
\.venv |
_build |
buck-out |
build |
dist
)/
'''
41 changes: 41 additions & 0 deletions python/cudf/requirements/cuda-10.1/dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Copyright (c) 2021, NVIDIA CORPORATION.

# pyarrow gpu package will have to be built from source :
# https://arrow.apache.org/docs/python/install.html#installing-from-source

cupy-cuda101
cachetools
cmake
cmake-setuptools>=0.1.3
cython>=0.29,<0.30
dlpack
fastavro>=0.22.9
flatbuffers
fsspec>=0.6.0
hypothesis
mimesis
mypy==0.782
nbsphinx
numba>=0.49.0,!=0.51.0
numpy
numpydoc
nvtx>=0.2.1
packaging
pandas>=1.0,<1.3.0dev0
pandoc==2.0a4
protobuf
pyorc
pytest
pytest-benchmark
pytest-xdist
rapidjson
recommonmark
setuptools
sphinx
sphinx-copybutton
sphinx-markdown-tables
sphinx_rtd_theme
sphinxcontrib-websupport
typing_extensions
typing_extensions
wheel
Loading

0 comments on commit f4d5bde

Please sign in to comment.