diff --git a/README.md b/README.md index a3255d9d68..e6b1792719 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ repo](https://github.com/rapidsai/notebooks-contrib). | [Based on SHAP](https://shap.readthedocs.io/en/latest/) | | | SHAP Permutation Explainer | [Based on SHAP](https://shap.readthedocs.io/en/latest/) | +| **Execution device interoperability** | | Run estimators interchangeably from host/cpu or device/gpu with minimal code change [demo](https://docs.rapids.ai/api/cuml/stable/execution_device_interoperability.html) | | **Other** | K-Nearest Neighbors (KNN) Search | Multi-node multi-GPU via Dask+[UCX](https://github.com/rapidsai/ucx-py), uses [Faiss](https://github.com/facebookresearch/faiss) for Nearest Neighbors Query. | --- diff --git a/docs/source/execution_device_interoperability.ipynb b/docs/source/execution_device_interoperability.ipynb new file mode 100644 index 0000000000..64e519ddb3 --- /dev/null +++ b/docs/source/execution_device_interoperability.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Using cuML on CPU, GPU, or both\n", + "\n", + "This notebook demonstrates the `CPU/GPU interoperability feature`." + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 1, + "source": [ + "import pickle\n", + "import cuml\n", + "from cuml.common.device_selection import using_device_type\n", + "from cuml.common.device_selection import set_global_device_type, get_global_device_type\n", + "from cuml.neighbors import NearestNeighbors\n", + "from cuml.manifold import UMAP\n", + "from cuml.linear_model import LinearRegression\n", + "from cuml.datasets import make_regression, make_blobs\n", + "from cuml.model_selection import train_test_split\n", + "\n", + "X_blobs, y_blobs = make_blobs(n_samples=2000, n_features=20)\n", + "X_train_blobs, X_test_blobs, y_train_blobs, y_test_blobs = train_test_split(X_blobs, y_blobs, test_size=0.2, shuffle=True)\n", + "\n", + "X_reg, y_reg = make_regression(n_samples=2000, n_features=20)\n", + "X_train_reg, X_test_reg, y_train_reg, y_tes_reg = train_test_split(X_reg, y_reg, test_size=0.2, shuffle=True)" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Don't have a GPU at your disposal at the moment? You can work on prototyping and run estimators in CPU-mode." + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 2, + "source": [ + "nn = NearestNeighbors()\n", + "with using_device_type('cpu'):\n", + " nn.fit(X_train_blobs)\n", + " nearest_neighbors = nn.kneighbors(X_test_blobs)" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Need to train your estimator with a special feature or hyperparameter only available in the paired CPU library? Initialize the cuML model with it and train on CPU." + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 3, + "source": [ + "umap_model = UMAP(angular_rp_forest=True) # `angular_rp_forest` hyperparameter only available in UMAP library\n", + "with using_device_type('cpu'):\n", + " umap_model.fit(X_train_blobs) # will run the UMAP library with the hyperparameter\n", + "with using_device_type('gpu'):\n", + " transformed = umap_model.transform(X_test_blobs) # will run the cuML implementation of UMAP" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[I] [14:46:20.500110] Unused keyword parameter: angular_rp_forest during cuML estimator initialization\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/home/vic/mambaforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "While ML training workflows almost always benefit from the superior speed of GPUs, small-scale applications with limited traffic and loose latency requirements may be able to perform inference on CPU. Please note that this feature would only work with models implementing pickle serialization and GPU to CPU transfers.\n", + "\n", + "To train a model on GPU but deploy it on CPU : first, train the estimator on device and save it to disk" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 4, + "source": [ + "lin_reg = LinearRegression()\n", + "with using_device_type('gpu'):\n", + " lin_reg.fit(X_train_reg, y_train_reg)\n", + "\n", + "pickle.dump(lin_reg, open(\"lin_reg.pkl\", \"wb\"))\n", + "del lin_reg" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Then, on the server, recover the estimator and run the inference on host." + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 5, + "source": [ + "recovered_lin_reg = pickle.load(open(\"lin_reg.pkl\", \"rb\"))\n", + "with using_device_type('cpu'):\n", + " predictions = recovered_lin_reg.predict(X_test_reg)" + ], + "outputs": [], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "The GPU/device is the default execution platform :" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 6, + "source": [ + "initial_device_type = get_global_device_type()\n", + "print('default execution device:', initial_device_type)" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "default execution device: DeviceType.device\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "Estimators trainings and inferences inside a `using_device_type` context will be executed according to the execution platform selected :" + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 7, + "source": [ + "for param in ['cpu', 'host', 'gpu', 'device']:\n", + " with using_device_type(param):\n", + " print('using_device_type({}):'.format(param), get_global_device_type())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "using_device_type(cpu): DeviceType.host\n", + "using_device_type(host): DeviceType.host\n", + "using_device_type(gpu): DeviceType.device\n", + "using_device_type(device): DeviceType.device\n" + ] + } + ], + "metadata": {} + }, + { + "cell_type": "markdown", + "source": [ + "The execution platform can also be set at the global level from the `set_global_device_type` function." + ], + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 8, + "source": [ + "set_global_device_type('gpu')\n", + "print('new device type:', get_global_device_type())" + ], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "new device type: DeviceType.device\n" + ] + } + ], + "metadata": {} + } + ], + "metadata": { + "kernelspec": { + "name": "python3", + "display_name": "Python 3.9.15 64-bit ('all_cuda-115_arch-x86_64': conda)" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + }, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + }, + "interpreter": { + "hash": "35840739db47a5016f18b089945bf3e154a2dca6d71cfb13687d370b69a146e3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/source/user_guide.rst b/docs/source/user_guide.rst index b22677e812..73f0f1d057 100644 --- a/docs/source/user_guide.rst +++ b/docs/source/user_guide.rst @@ -6,4 +6,5 @@ User Guide estimator_intro.ipynb pickling_cuml_models.ipynb + execution_device_interoperability.ipynb diff --git a/python/cuml/common/device_selection.py b/python/cuml/common/device_selection.py index 8d6b5ecd1c..8005f4983a 100644 --- a/python/cuml/common/device_selection.py +++ b/python/cuml/common/device_selection.py @@ -23,6 +23,10 @@ def set_global_device_type(device_type): GlobalSettings().device_type = DeviceType.from_str(device_type) +def get_global_device_type(): + return GlobalSettings().device_type + + class using_device_type: def __init__(self, device_type): self.device_type = device_type