Merge remote-tracking branch 'upstream/branch-22.06' into misc_doc

rapidsai · Apr 14, 2022 · 43c7cc3 · 43c7cc3
2 parents 4f3b09a + 14a3261
commit 43c7cc3
Show file tree

Hide file tree

Showing 22 changed files with 260 additions and 118 deletions.
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "cudf/io/types.hpp"
 #include <benchmark/benchmark.h>
 
 #include <benchmarks/common/generate_input.hpp>
@@ -23,6 +22,7 @@
 #include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/io/orc.hpp>
+#include <cudf/io/types.hpp>
 
 // to enable, run cmake with -DBUILD_BENCHMARKS=ON
 

diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "cudf/column/column_view.hpp"
+#include <cudf/column/column_view.hpp>
 #include <cudf/sorting.hpp>
 
 #include <cudf_test/base_fixture.hpp>

diff --git a/cpp/benchmarks/string/convert_durations.cpp b/cpp/benchmarks/string/convert_durations.cpp
@@ -13,25 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include <benchmark/benchmark.h>
-
+#include <cudf/column/column_view.hpp>
 #include <cudf/strings/convert/convert_durations.hpp>
 #include <cudf/types.hpp>
+#include <cudf/wrappers/durations.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 
+#include <benchmark/benchmark.h>
+
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
 #include <algorithm>
 #include <random>
 
-#include "../fixture/benchmark_fixture.hpp"
-#include "../synchronization/synchronization.hpp"
-#include "cudf/column/column_view.hpp"
-#include "cudf/wrappers/durations.hpp"
-
 class DurationsToString : public cudf::benchmark {
 };
 template <class TypeParam>

diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -17,9 +17,9 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
+#include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
 
-#include "cudf/lists/lists_column_view.hpp"
 #include <rmm/cuda_stream_view.hpp>
 
 namespace cudf {

diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "cudf_kafka/kafka_callback.hpp"
+#include <cudf_kafka/kafka_callback.hpp>
 
 #include <librdkafka/rdkafkacpp.h>
 

diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "cudf_kafka/kafka_consumer.hpp"
+#include <cudf_kafka/kafka_consumer.hpp>
 
 #include <librdkafka/rdkafkacpp.h>
 

diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu
@@ -26,6 +26,7 @@
 #include <cudf/structs/structs_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/traits.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -38,7 +39,6 @@
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
-#include "cudf/utilities/traits.hpp"
 #include <queue>
 #include <vector>
 

diff --git a/cpp/src/structs/structs_column_view.cpp b/cpp/src/structs/structs_column_view.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "cudf/utilities/error.hpp"
 #include <cudf/column/column.hpp>
 #include <cudf/structs/structs_column_view.hpp>
+#include <cudf/utilities/error.hpp>
 
 namespace cudf {
 

diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
@@ -20,13 +20,13 @@
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/unary.hpp>
+#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include "cudf/utilities/error.hpp"
 #include <tests/binaryop/assert-binops.h>
 #include <tests/binaryop/binop-fixture.hpp>
 

diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
@@ -23,12 +23,12 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/logical.h>
 #include <thrust/pair.h>
 #include <thrust/tabulate.h>
 
-#include "rmm/exec_policy.hpp"
 #include <cstdlib>
 #include <iostream>
 #include <limits>

diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu
@@ -12,10 +12,12 @@
  * or implied. See the License for the specific language governing permissions and limitations under
  * the License.
  */
-#include "cudf/detail/utilities/vector_factories.hpp"
-#include "rmm/cuda_stream_view.hpp"
-#include "rmm/device_uvector.hpp"
-#include <tests/iterator/iterator_tests.cuh>
+#include "iterator_tests.cuh"
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 
 #include <thrust/host_vector.h>
 #include <thrust/iterator/counting_iterator.h>

diff --git a/cpp/tests/partitioning/partition_test.cpp b/cpp/tests/partitioning/partition_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <cudf/copying.hpp>
-#include <cudf/partitioning.hpp>
-#include <cudf/table/table.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include "cudf/sorting.hpp"
+#include <cudf/copying.hpp>
+#include <cudf/partitioning.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/table/table.hpp>
 
 template <typename T>
 class PartitionTest : public cudf::test::BaseFixture {

diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst
@@ -15,36 +15,40 @@ The following table lists all of cudf types. For methods requiring dtype argumen
 .. rst-class:: special-table
 .. table::
 
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Kind of Data           | Data Type        | Scalar                                                                              | String Aliases                              |
-    +========================+==================+=====================================================================================+=============================================+
-    | Integer                |                  | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_,                   | ``'int8'``, ``'int16'``, ``'int32'``,       |
-    |                        |                  | np.uint32_, np.uint64_                                                              | ``'int64'``, ``'uint8'``, ``'uint16'``,     |
-    |                        |                  |                                                                                     | ``'uint32'``, ``'uint64'``                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Float                  |                  | np.float32_, np.float64_                                                            | ``'float32'``, ``'float64'``                |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Strings                |                  | `str <https://docs.python.org/3/library/stdtypes.html#str>`_                        | ``'string'``, ``'object'``                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Datetime               |                  | np.datetime64_                                                                      | ``'datetime64[s]'``, ``'datetime64[ms]'``,  |
-    |                        |                  |                                                                                     | ``'datetime64[us]'``, ``'datetime64[ns]'``  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Timedelta              |                  | np.timedelta64_                                                                     | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,|
-    | (duration type)        |                  |                                                                                     | ``'timedelta64[us]'``, ``'timedelta64[ns]'``|
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Categorical            | CategoricalDtype | (none)                                                                              | ``'category'``                              |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Boolean                |                  | np.bool_                                                                            | ``'bool'``                                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Decimal                | Decimal32Dtype,  | (none)                                                                              | (none)                                      |
-    |                        | Decimal64Dtype,  |                                                                                     |                                             |
-    |                        | Decimal128Dtype  |                                                                                     |                                             |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Kind of Data    | Data Type        | Scalar                                                       | String Aliases                               |
+    +=================+==================+==============================================================+==============================================+
+    | Integer         |                  | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_,        | ``'int8'``, ``'int16'``, ``'int32'``,        |
+    |                 |                  | np.uint16_, np.uint32_, np.uint64_                           | ``'int64'``, ``'uint8'``, ``'uint16'``,      |
+    |                 |                  |                                                              | ``'uint32'``, ``'uint64'``                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Float           |                  | np.float32_, np.float64_                                     | ``'float32'``, ``'float64'``                 |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Strings         |                  | `str <https://docs.python.org/3/library/stdtypes.html#str>`_ | ``'string'``, ``'object'``                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Datetime        |                  | np.datetime64_                                               | ``'datetime64[s]'``, ``'datetime64[ms]'``,   |
+    |                 |                  |                                                              | ``'datetime64[us]'``, ``'datetime64[ns]'``   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Timedelta       |                  | np.timedelta64_                                              | ``'timedelta64[s]'``, ``'timedelta64[ms]'``, |
+    | (duration type) |                  |                                                              | ``'timedelta64[us]'``, ``'timedelta64[ns]'`` |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Categorical     | CategoricalDtype | (none)                                                       | ``'category'``                               |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Boolean         |                  | np.bool_                                                     | ``'bool'``                                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Decimal         | Decimal32Dtype,  | (none)                                                       | (none)                                       |
+    |                 | Decimal64Dtype,  |                                                              |                                              |
+    |                 | Decimal128Dtype  |                                                              |                                              |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Lists           | ListDtype        | list                                                         | ``'list'``                                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Structs         | StructDtype      | dict                                                         | ``'struct'``                                 |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
 
 **Note: All dtypes above are Nullable**
 
-.. _np.int8: 
-.. _np.int16: 
+.. _np.int8:
+.. _np.int16:
 .. _np.int32:
 .. _np.int64:
 .. _np.uint8:

diff --git a/docs/cudf/source/basics/internals.rst b/docs/cudf/source/basics/internals.rst
@@ -54,7 +54,7 @@ As another example, the ``StringColumn`` backing the Series
 2. No mask buffer as there are no nulls in the Series
 3. Two children columns:
 
-    -  A column of 8-bit characters
+    -  A column of UTF-8 characters
        ``['d', 'o', 'y', 'o', 'u', h' ... '?']``
     -  A column of "offsets" to the characters column (in this case,
        ``[0, 2, 5, 9, 12, 19]``)
@@ -172,7 +172,7 @@ Selecting columns by index:
     >>> ca.select_by_index(1)
     ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
     >>> ca.select_by_index([0, 1])
-    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))    
+    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
     >>> ca.select_by_index(slice(1, 3))
     ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5788c0>)]), multiindex=False, level_names=(None,))
 

diff --git a/docs/cudf/source/basics/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst
@@ -1,14 +1,14 @@
 GPUDirect Storage Integration
 =============================
 
-Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations. 
-GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU. 
-GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer. 
+Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations.
+GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU.
+GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer.
 The SDK is available for download `here <https://developer.nvidia.com/gpudirect-storage>`_.
 GDS is also included in CUDA Toolkit 11.4 and higher.
 
-Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``. 
-This variable also controls the GDS compatibility mode. 
+Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``.
+This variable also controls the GDS compatibility mode.
 
 There are three valid values for the environment variable:
 
@@ -20,17 +20,17 @@ If no value is set, behavior will be the same as the "GDS" option.
 
 This environment variable also affects how cuDF treats GDS errors.
 When ``LIBCUDF_CUFILE_POLICY`` is set to "GDS" and a GDS API call fails for any reason, cuDF falls back to the internal implementation with bounce buffers.
-When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on), 
+When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on),
 cuDF throws an exception to propagate the error to te user.
 
 Operations that support the use of GPUDirect Storage:
 
-- `read_avro`
-- `read_parquet`
-- `read_orc`
-- `to_csv`
-- `to_parquet`
-- `to_orc`
+- :py:func:`cudf.read_avro`
+- :py:func:`cudf.read_parquet`
+- :py:func:`cudf.read_orc`
+- :py:meth:`cudf.DataFrame.to_csv`
+- :py:meth:`cudf.DataFrame.to_parquet`
+- :py:meth:`cudf.DataFrame.to_orc`
 
 Several parameters that can be used to tune the performance of GDS-enabled I/O are exposed through environment variables:
 

diff --git a/docs/cudf/source/basics/io-nvcomp-integration.rst b/docs/cudf/source/basics/io-nvcomp-integration.rst
@@ -1,14 +1,14 @@
 nvCOMP Integration
 =============================
 
-Some types of compression/decompression can be performed using either `nvCOMP library <https://github.com/NVIDIA/nvcomp>`_ or the internal implementation. 
+Some types of compression/decompression can be performed using either the `nvCOMP library <https://github.com/NVIDIA/nvcomp>`_ or the internal implementation.
 
 Which implementation is used by default depends on the data format and the compression type.
 Behavior can be influenced through environment variable ``LIBCUDF_NVCOMP_POLICY``.
 
 There are three valid values for the environment variable:
 
-- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use. 
+- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use.
 - "ALWAYS": Enable all available uses of nvCOMP, including new, experimental combinations.
 - "OFF": Disable nvCOMP use whenever possible and use the internal implementations instead.