Merge branch 'branch-0.20' of https://github.com/rapidsai/cudf into i…

…dexpress
rapidsai · Apr 16, 2021 · c06335f · c06335f
2 parents f00bb2f + 3327f7b
commit c06335f
Show file tree

Hide file tree

Showing 39 changed files with 1,078 additions and 964 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -160,7 +160,7 @@ git submodule update --init --remote --recursive
 ```bash
 # create the conda environment (assuming in base `cudf` directory)
 # note: RAPIDS currently doesn't support `channel_priority: strict`; use `channel_priority: flexible` instead
-conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda10.0.yml
+conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.0.yml
 # activate the environment
 conda activate cudf_dev
 ```
@@ -281,8 +281,8 @@ A Dockerfile is provided with a preconfigured conda environment for building and
 ### Prerequisites
 
 * Install [nvidia-docker2](https://github.com/nvidia/nvidia-docker/wiki/Installation-(version-2.0)) for Docker + GPU support
-* Verify NVIDIA driver is `410.48` or higher
-* Ensure CUDA 10.0+ is installed
+* Verify NVIDIA driver is `450.80.02` or higher
+* Ensure CUDA 11.0+ is installed
 
 ### Usage
 
@@ -309,9 +309,9 @@ flag. Below is a list of the available arguments and their purpose:
 
 | Build Argument | Default Value | Other Value(s) | Purpose |
 | --- | --- | --- | --- |
-| `CUDA_VERSION` | 10.0 | 10.1, 10.2 | set CUDA version |
-| `LINUX_VERSION` | ubuntu16.04 | ubuntu18.04 | set Ubuntu version |
-| `CC` & `CXX` | 5 | 7 | set gcc/g++ version; **NOTE:** gcc7 requires Ubuntu 18.04 |
+| `CUDA_VERSION` | 11.0 | 11.1, 11.2.2 | set CUDA version |
+| `LINUX_VERSION` | ubuntu18.04 | ubuntu20.04 | set Ubuntu version |
+| `CC` & `CXX` | 9 | 10 | set gcc/g++ version |
 | `CUDF_REPO` | This repo | Forks of cuDF | set git URL to use for `git clone` |
 | `CUDF_BRANCH` | main | Any branch name | set git branch to checkout of `CUDF_REPO` |
 | `NUMBA_VERSION` | newest | >=0.40.0 | set numba version |

diff --git a/Dockerfile b/Dockerfile
@@ -1,18 +1,20 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 # An integration test & dev container which builds and installs cuDF from main
-ARG CUDA_VERSION=10.1
+ARG CUDA_VERSION=11.0
 ARG CUDA_SHORT_VERSION=${CUDA_VERSION}
-ARG LINUX_VERSION=ubuntu16.04
+ARG LINUX_VERSION=ubuntu18.04
 FROM nvidia/cuda:${CUDA_VERSION}-devel-${LINUX_VERSION}
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/lib
-# Needed for cudf.concat(), avoids "OSError: library nvvm not found"
-ENV NUMBAPRO_NVVM=/usr/local/cuda/nvvm/lib64/libnvvm.so
-ENV NUMBAPRO_LIBDEVICE=/usr/local/cuda/nvvm/libdevice/
 ENV DEBIAN_FRONTEND=noninteractive
 
-ARG CC=5
-ARG CXX=5
+ARG CC=9
+ARG CXX=9
 RUN apt update -y --fix-missing && \
     apt upgrade -y && \
+    apt install -y --no-install-recommends software-properties-common && \
+    add-apt-repository ppa:ubuntu-toolchain-r/test && \
+    apt update -y --fix-missing && \
     apt install -y --no-install-recommends \
       git \
       gcc-${CC} \
@@ -66,18 +68,10 @@ RUN if [ -f /cudf/docker/package_versions.sh ]; \
          conda env create --name cudf --file /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml ; \
     fi
 
-# libcudf build/install
 ENV CC=/usr/bin/gcc-${CC}
 ENV CXX=/usr/bin/g++-${CXX}
-RUN source activate cudf && \
-    mkdir -p /cudf/cpp/build && \
-    cd /cudf/cpp/build && \
-    cmake .. -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX} && \
-    make -j"$(nproc)" install
 
-# cuDF build/install
+# libcudf & cudf build/install
 RUN source activate cudf && \
-    cd /cudf/python/cudf && \
-    python setup.py build_ext --inplace && \
-    python setup.py install && \
-    python setup.py install
+    cd /cudf/ && \
+    ./build.sh libcudf cudf
diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml
diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml
diff --git a/cpp/benchmarks/string/json_benchmark.cpp b/cpp/benchmarks/string/json_benchmark.cpp
@@ -113,7 +113,7 @@ static void BM_case(benchmark::State& state, QueryArg&&... query_arg)
   std::string json_path(query_arg...);
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, 0);
+    cuda_event_timer raii(state, true);
     auto result = cudf::strings::get_json_object(scv, json_path);
     cudaStreamSynchronize(0);
   }

diff --git a/cpp/benchmarks/text/ngrams_benchmark.cpp b/cpp/benchmarks/text/ngrams_benchmark.cpp
@@ -43,7 +43,7 @@ static void BM_ngrams(benchmark::State& state, ngrams_type nt)
   cudf::strings_column_view input(table->view().column(0));
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, 0);
+    cuda_event_timer raii(state, true);
     switch (nt) {
       case ngrams_type::tokens: nvtext::generate_ngrams(input); break;
       case ngrams_type::characters: nvtext::generate_character_ngrams(input); break;

diff --git a/cpp/benchmarks/text/replace_benchmark.cpp b/cpp/benchmarks/text/replace_benchmark.cpp
@@ -54,7 +54,7 @@ static void BM_replace(benchmark::State& state)
   cudf::test::strings_column_wrapper replacements({"1", "2", "7", "0"});
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, 0);
+    cuda_event_timer raii(state, true);
     nvtext::replace_tokens(
       view, cudf::strings_column_view(targets), cudf::strings_column_view(replacements));
   }

diff --git a/cpp/docs/TESTING.md b/cpp/docs/TESTING.md
@@ -1,7 +1,7 @@
 # Unit Testing in libcudf
 
 Unit tests in libcudf are written using 
-[Google Test](https://github.com/google/googletest/blob/master/googletest/docs/primer.md).
+[Google Test](https://github.com/google/googletest/blob/master/docs/primer.md).
 
 **Important:** Instead of including `gtest/gtest.h` directly, use 
 `#include <cudf_test/cudf_gtest.hpp>`.
@@ -59,7 +59,7 @@ files, and are therefore preferred in test code over `thrust::device_vector`.
 
 ## Base Fixture
 
-All libcudf unit tests should make use of a GTest ["Test Fixture"](https://github.com/google/googletest/blob/master/googletest/docs/primer.md#test-fixtures-using-the-same-data-configuration-for-multiple-tests-same-data-multiple-tests).
+All libcudf unit tests should make use of a GTest ["Test Fixture"](https://github.com/google/googletest/blob/master/docs/primer.md#test-fixtures-using-the-same-data-configuration-for-multiple-tests-same-data-multiple-tests).
 Even if the fixture is empty, it should inherit from the base fixture `cudf::test::BaseFixture` 
 found in `include/cudf_test/base_fixture.hpp`. This ensures that RMM is properly initialized and 
 finalized. `cudf::test::BaseFixture` already inherits from `::testing::Test` and therefore it is 
@@ -75,7 +75,7 @@ class MyTestFiture : public cudf::test::BaseFixture {...};
 In general, libcudf features must work across all of the supported types (there are exceptions e.g.
 not all binary operations are supported for all types). In order to automate the process of running
 the same tests across multiple types, we use GTest's 
-[Typed Tests](https://github.com/google/googletest/blob/master/googletest/docs/advanced.md#typed-tests).
+[Typed Tests](https://github.com/google/googletest/blob/master/docs/advanced.md#typed-tests).
 Typed tests allow you to write a test once and run it across a list of types.
 
 For example:

diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
@@ -389,9 +389,10 @@ class column_in_metadata {
   bool _use_int96_timestamp = false;
   // bool _output_as_binary = false;
   thrust::optional<uint8_t> _decimal_precision;
-  std::vector<column_in_metadata> children;
 
  public:
+  std::vector<column_in_metadata> children;
+
   /**
    * @brief Set the name of this column
    *

diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu
@@ -357,8 +357,6 @@ static __device__ uint8_t *ext_heap_alloc(uint32_t bytes,
     first_free_block = atomicExch((unsigned int *)heap_ptr, first_free_block);
     if (first_free_block == ~0 || first_free_block >= ext_heap_size) {
       // Some other block is holding the heap or there are no free blocks: try again later
-      // Wait a bit in an attempt to make the spin less resource-hungry
-      nanosleep(100);
       continue;
     }
     if (first_free_block == 0) {
@@ -408,8 +406,7 @@ static __device__ uint8_t *ext_heap_alloc(uint32_t bytes,
       }
     } while (blk_next != 0 && blk_next < ext_heap_size);
     first_free_block = atomicExch((unsigned int *)heap_ptr, first_free_block);
-    // Wait a while since reaching here means the heap is full
-    nanosleep(10000);
+    // Reaching here means the heap is full
     // Just in case we're trying to allocate more than the entire heap
     if (len > ext_heap_size - 4 * sizeof(uint32_t)) { break; }
   }
@@ -429,8 +426,7 @@ static __device__ void ext_heap_free(void *ptr,
   for (;;) {
     first_free_block = atomicExch((unsigned int *)heap_ptr, first_free_block);
     if (first_free_block != ~0) { break; }
-    // Some other block is holding the heap: wait
-    nanosleep(50);
+    // Some other block is holding the heap
   }
   if (first_free_block >= ext_heap_size) {
     // Heap is currently empty

diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu
@@ -512,13 +512,10 @@ __device__ void decode_symbols(inflate_state_s *s)
 #if ENABLE_PREFETCH
     // Wait for prefetcher to fetch a worst-case of 48 bits per symbol
     while ((*(volatile int32_t *)&s->pref.cur_p - (int32_t)(size_t)cur < batch_size * 6) ||
-           (s->x.batch_len[batch] != 0))
+           (s->x.batch_len[batch] != 0)) {}
 #else
-    while (s->x.batch_len[batch] != 0)
+    while (s->x.batch_len[batch] != 0) {}
 #endif
-    {
-      nanosleep(100);
-    }
     batch_len = 0;
 #if ENABLE_PREFETCH
     if (cur + (bitpos >> 3) >= end) {
@@ -662,7 +659,7 @@ __device__ void decode_symbols(inflate_state_s *s)
     if (batch_len != 0) batch = (batch + 1) & (batch_count - 1);
   } while (sym != 256);
 
-  while (s->x.batch_len[batch] != 0) { nanosleep(150); }
+  while (s->x.batch_len[batch] != 0) {}
   s->x.batch_len[batch] = -1;
   s->bitbuf             = bitbuf;
   s->bitpos             = bitpos;
@@ -779,7 +776,7 @@ __device__ void process_symbols(inflate_state_s *s, int t)
     uint32_t lit_mask;
 
     if (t == 0) {
-      while ((batch_len = s->x.batch_len[batch]) == 0) { nanosleep(100); }
+      while ((batch_len = s->x.batch_len[batch]) == 0) {}
     } else {
       batch_len = 0;
     }
@@ -962,8 +959,6 @@ __device__ void prefetch_warp(volatile inflate_state_s *s, int t)
         s->pref.cur_p = cur_p;
         __threadfence_block();
       }
-    } else if (t == 0) {
-      nanosleep(150);
     }
   }
 }