diff --git a/ci/checks/style.sh b/ci/checks/style.sh
index 67e926a0768..13f7f0e6267 100755
--- a/ci/checks/style.sh
+++ b/ci/checks/style.sh
@@ -14,7 +14,7 @@ LANG=C.UTF-8
. /opt/conda/etc/profile.d/conda.sh
conda activate rapids
-FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/main/cmake-format-rapids-cmake.json
+FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-21.12/cmake-format-rapids-cmake.json
export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
diff --git a/cpp/cmake/thirdparty/get_dlpack.cmake b/cpp/cmake/thirdparty/get_dlpack.cmake
index aeffd64f371..252d50c7af8 100644
--- a/cpp/cmake/thirdparty/get_dlpack.cmake
+++ b/cpp/cmake/thirdparty/get_dlpack.cmake
@@ -21,7 +21,8 @@ function(find_and_configure_dlpack VERSION)
dlpack ${VERSION}
GIT_REPOSITORY https://github.com/dmlc/dlpack.git
GIT_TAG v${VERSION}
- GIT_SHALLOW TRUE DOWNLOAD_ONLY TRUE
+ GIT_SHALLOW TRUE
+ DOWNLOAD_ONLY TRUE
OPTIONS "BUILD_MOCK OFF"
)
diff --git a/cpp/cmake/thirdparty/get_jitify.cmake b/cpp/cmake/thirdparty/get_jitify.cmake
index 7c4526107a3..51bd41ea079 100644
--- a/cpp/cmake/thirdparty/get_jitify.cmake
+++ b/cpp/cmake/thirdparty/get_jitify.cmake
@@ -20,7 +20,8 @@ function(find_and_configure_jitify)
jitify 2.0.0
GIT_REPOSITORY https://github.com/rapidsai/jitify.git
GIT_TAG cudf_0.19
- GIT_SHALLOW TRUE DOWNLOAD_ONLY TRUE
+ GIT_SHALLOW TRUE
+ DOWNLOAD_ONLY TRUE
)
set(JITIFY_INCLUDE_DIR
"${jitify_SOURCE_DIR}"
diff --git a/cpp/cmake/thirdparty/get_libcudacxx.cmake b/cpp/cmake/thirdparty/get_libcudacxx.cmake
index 290c4f61e41..0917adcd764 100644
--- a/cpp/cmake/thirdparty/get_libcudacxx.cmake
+++ b/cpp/cmake/thirdparty/get_libcudacxx.cmake
@@ -17,8 +17,9 @@ function(find_and_configure_libcudacxx)
include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)
rapids_cpm_libcudacxx(
- BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports PATCH_COMMAND patch
- --reject-file=- -p1 -N < ${CUDF_SOURCE_DIR}/cmake/libcudacxx.patch || true
+ BUILD_EXPORT_SET cudf-exports
+ INSTALL_EXPORT_SET cudf-exports PATCH_COMMAND patch --reject-file=- -p1 -N <
+ ${CUDF_SOURCE_DIR}/cmake/libcudacxx.patch || true
)
set(LIBCUDACXX_INCLUDE_DIR
diff --git a/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb b/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb
index 0985291f3c2..169eec07914 100644
--- a/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb
+++ b/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb
@@ -45,9 +45,23 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "44.1 µs ± 689 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
- "209 µs ± 2.77 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n",
- "208 µs ± 3.14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
+ "158 µs ± 306 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
+ "419 µs ± 149 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/envs/rapids/lib/python3.7/site-packages/cudf/core/dataframe.py:3044: FutureWarning: The as_gpu_matrix method will be removed in a future cuDF release. Consider using `to_cupy` instead.\n",
+ " FutureWarning,\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "339 µs ± 282 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
]
}
],
@@ -117,9 +131,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "22.1 µs ± 518 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
- "58.3 µs ± 647 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
- "80.2 µs ± 647 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
+ "45.4 µs ± 63.9 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
+ "127 µs ± 351 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n",
+ "135 µs ± 5.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
]
}
],
@@ -256,7 +270,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "13.1 ms ± 193 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+ "15.5 ms ± 7.55 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -510,7 +524,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "4.9 ms ± 26.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+ "7.26 ms ± 3.32 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -530,7 +544,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "5.1 ms ± 23.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
+ "4.87 ms ± 2.08 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
@@ -1139,135 +1153,135 @@
"
0 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
+ " 4.704433 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
- " 0.00000 | \n",
" 0.0 | \n",
- " 16.822959 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
+ " -1.162275 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" \n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
- " 0.00000 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
+ " 11.460403 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 6.618972 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
- " 2.25678 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
+ " 0.407392 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.000000 | \n",
- " 0.00000 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" 0.0 | \n",
- " 2.715802 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
+ " 8.299425 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 4.296568 | \n",
- " 0.00000 | \n",
+ " 2.096401 | \n",
" 0.0 | \n",
- " 0.000000 | \n",
" 0.0 | \n",
" 0.000000 | \n",
" 0.0 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
" 0.0 | \n",
" 0.0 | \n",
- " 4.865495 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 \\\n",
- "0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.00000 \n",
- "1 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.00000 \n",
- "2 0.0 0.0 6.618972 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 2.25678 \n",
- "3 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.00000 \n",
- "4 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.296568 0.00000 \n",
+ " a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 \\\n",
+ "0 0.0 0.0 0.0 0.0 0.0 4.704433 0.0 0.000000 0.0 0.0 0.0 0.000000 \n",
+ "1 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n",
+ "2 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n",
+ "3 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n",
+ "4 0.0 0.0 0.0 0.0 0.0 0.000000 0.0 8.299425 0.0 0.0 0.0 2.096401 \n",
"\n",
- " a12 a13 a14 a15 a16 a17 a18 a19 \n",
- "0 0.0 16.822959 0.0 0.000000 0.0 0.0 0.0 0.000000 \n",
- "1 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n",
- "2 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n",
- "3 0.0 0.000000 0.0 2.715802 0.0 0.0 0.0 0.000000 \n",
- "4 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 4.865495 "
+ " a12 a13 a14 a15 a16 a17 a18 a19 \n",
+ "0 0.0 0.0 0.000000 0.0 -1.162275 0.000000 0.0 0.0 \n",
+ "1 0.0 0.0 11.460403 0.0 0.000000 0.000000 0.0 0.0 \n",
+ "2 0.0 0.0 0.000000 0.0 0.000000 0.407392 0.0 0.0 \n",
+ "3 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.0 0.0 \n",
+ "4 0.0 0.0 0.000000 0.0 0.000000 0.000000 0.0 0.0 "
]
},
"execution_count": 20,
@@ -1285,19 +1299,66 @@
"metadata": {},
"outputs": [
{
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " (41, 0)\t8.237732918475851\n",
+ " (49, 0)\t-4.161219849238402\n",
+ " (70, 0)\t-1.646588718395583\n",
+ " (80, 0)\t11.607048248828713\n",
+ " (81, 0)\t11.387095517746493\n",
+ " (105, 0)\t4.059008225609349\n",
+ " (107, 0)\t9.299030876304984\n",
+ " (108, 0)\t10.652087054434446\n",
+ " (127, 0)\t2.442578989241219\n",
+ " (133, 0)\t-0.7674141633646347\n",
+ " (135, 0)\t-6.091151515788713\n",
+ " (145, 0)\t2.968949150266586\n",
+ " (148, 0)\t5.649147779687932\n",
+ " (158, 0)\t7.7809955768930745\n",
+ " (166, 0)\t5.801884262747882\n",
+ " (175, 0)\t7.3205065025042\n",
+ " (181, 0)\t13.704683370645277\n",
+ " (204, 0)\t15.915619596241733\n",
+ " (207, 0)\t-0.2205888963107494\n",
+ " (209, 0)\t3.565578265020142\n",
+ " (215, 0)\t4.1493767841754154\n",
+ " (231, 0)\t3.4286524053271803\n",
+ " (233, 0)\t6.021200022977307\n",
+ " (241, 0)\t4.247163658236771\n",
+ " (249, 0)\t1.8502158424149273\n",
+ " :\t:\n",
+ " (9729, 19)\t7.226429647432215\n",
+ " (9762, 19)\t-0.6042314722021014\n",
+ " (9764, 19)\t-1.4827372788735615\n",
+ " (9769, 19)\t4.140245505599609\n",
+ " (9776, 19)\t-0.3441145182655059\n",
+ " (9781, 19)\t-0.235562982602191\n",
+ " (9782, 19)\t2.1458765970993223\n",
+ " (9791, 19)\t7.219427633840467\n",
+ " (9803, 19)\t6.6874487362355115\n",
+ " (9807, 19)\t5.1769501512294465\n",
+ " (9823, 19)\t-1.1040045399744103\n",
+ " (9828, 19)\t3.074156937033751\n",
+ " (9849, 19)\t0.4663962936122451\n",
+ " (9851, 19)\t10.302861735090476\n",
+ " (9862, 19)\t1.9377857550195872\n",
+ " (9893, 19)\t8.991541850619656\n",
+ " (9896, 19)\t-0.9003118390325282\n",
+ " (9919, 19)\t2.4984693551284587\n",
+ " (9934, 19)\t1.6161057487404191\n",
+ " (9944, 19)\t6.063387997554039\n",
+ " (9945, 19)\t11.038782286791717\n",
+ " (9954, 19)\t13.750186699958661\n",
+ " (9979, 19)\t0.9225731640357893\n",
+ " (9995, 19)\t-1.775155437069923\n",
+ " (9998, 19)\t12.265785237649636\n"
+ ]
}
],
"source": [
"sparse_data = cudf_to_cupy_sparse_matrix(df)\n",
- "sparse_data"
+ "print(sparse_data)"
]
},
{
@@ -1326,7 +1387,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.6"
+ "version": "3.7.12"
}
},
"nbformat": 4,
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index e0c68e56f63..65c79b4cf59 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -658,11 +658,11 @@ def _compute_levels_and_codes(self):
def _compute_validity_mask(self, index, row_tuple, max_length):
"""Computes the valid set of indices of values in the lookup"""
lookup = cudf.DataFrame()
- for name, row in zip(index.names, row_tuple):
+ for i, row in enumerate(row_tuple):
if isinstance(row, slice) and row == slice(None):
continue
- lookup[name] = cudf.Series(row)
- frame = index.to_frame(index=False)
+ lookup[i] = cudf.Series(row)
+ frame = cudf.DataFrame(dict(enumerate(index._data.columns)))
data_table = cudf.concat(
[
frame,
@@ -729,16 +729,26 @@ def _index_and_downcast(self, result, index, index_key):
for k in range(size, len(index._data)):
out_index.insert(
out_index._num_columns,
- k if index.names is None else index.names[k],
+ k,
cudf.Series._from_data({None: index._data.columns[k]}),
)
- if len(result) == 1 and size == 0 and not slice_access:
- # If the final result is one row and it was not mapped into
- # directly, return a Series with a tuple as name.
+ # determine if we should downcast from a DataFrame to a Series
+ need_downcast = (
+ isinstance(result, cudf.DataFrame)
+ and len(result) == 1 # only downcast if we have a single row
+ and not slice_access # never downcast if we sliced
+ and (
+ size == 0 # index_key was an integer
+ # we indexed into a single row directly, using its label:
+ or len(index_key) == self.nlevels
+ )
+ )
+ if need_downcast:
result = result.T
- result = result[result._data.names[0]]
- elif len(result) == 0 and not slice_access:
+ return result[result._data.names[0]]
+
+ if len(result) == 0 and not slice_access:
# Pandas returns an empty Series with a tuple as name
# the one expected result column
result = cudf.Series._from_data(
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index 07407b8d359..eaef002f37d 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -830,6 +830,19 @@ def test_multiindex_iloc(pdf, gdf, pdfIndex, iloc_rows, iloc_columns):
assert_eq(presult, gresult, check_index_type=False, check_dtype=False)
+def test_multiindex_iloc_scalar():
+ arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
+ tuples = list(zip(*arrays))
+ idx = cudf.MultiIndex.from_tuples(tuples)
+ gdf = cudf.DataFrame(
+ {"first": cp.random.rand(4), "second": cp.random.rand(4)}
+ )
+ gdf.index = idx
+
+ pdf = gdf.to_pandas()
+ assert_eq(pdf.iloc[3], gdf.iloc[3])
+
+
@pytest.mark.parametrize(
"iloc_rows",
[
@@ -1742,3 +1755,15 @@ def test_multiIndex_type_methods(pidx, func):
assert_eq(False, actual)
else:
assert_eq(expected, actual)
+
+
+def test_multiindex_index_single_row():
+ arrays = [["a", "a", "b", "b"], [1, 2, 3, 4]]
+ tuples = list(zip(*arrays))
+ idx = cudf.MultiIndex.from_tuples(tuples)
+ gdf = cudf.DataFrame(
+ {"first": cp.random.rand(4), "second": cp.random.rand(4)}
+ )
+ gdf.index = idx
+ pdf = gdf.to_pandas()
+ assert_eq(pdf.loc[("b", 3)], gdf.loc[("b", 3)])