Skip to content

Commit

Permalink
Merge branch 'branch-0.16' of https://github.com/rapidsai/cuml into f…
Browse files Browse the repository at this point in the history
…ea-ext-lobpcg
  • Loading branch information
venkywonka committed Sep 10, 2020
2 parents 9836fdd + d84283e commit e19f1b6
Show file tree
Hide file tree
Showing 100 changed files with 595 additions and 443 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

## New Features
- PR #2698: Distributed TF-IDF transformer
- PR #2638: Improve cython build with custom `build_ext`
- PR #2772: Linearly Optimal Block Preconditioned Conjugate Gradient (LOBPCG) Solver

## Improvements
- PR #2796: Remove tokens of length 1 by default for text vectorizers
- PR #2741: Use rapids build packages in conda environments
- PR #2735: Update seed to random_state in random forest and associated tests
- PR #2739: Use cusparse_wrappers.h from RAFT
Expand All @@ -13,10 +15,18 @@
- PR #2762: Fix broken links and provide minor edits to docs
- PR #2723: Support and enable convert_dtype in estimator predict
- PR #2758: Match sklearn's default n_components behavior for PCA
- PR #2770: Fix doxygen version during cmake
- PR #2766: Update default RandomForestRegressor score function to use r2
- PR #2783: Add pytest that will fail when GPU IDs in Dask cluster are not unique
- PR #2785: Add in cuML-specific dev conda dependencies
- PR #2778: Add README for FIL
- PR #2799: Reenable lightgbm test with lower (1%) proba accuracy
- PR #2800: Align cuML's spdlog version with RMM's

## Bug Fixes
- PR #2744: Supporting larger number of classes in KNeighborsClassifier
- PR #2769: Remove outdated doxygen options for 1.8.20
- PR #2787: Skip lightgbm test for version 3 and above temporarily

# cuML 0.15.0 (Date TBD)

Expand Down Expand Up @@ -111,6 +121,7 @@
- PR #2623: Fixing kmeans score() API to be compatible with Scikit-learn
- PR #2629: Add naive_bayes api docs
- PR #2643: 'dense' and 'sparse' values of `storage_type` for FIL
- PR #2691: Generic Base class attribute setter
- PR #2666: Update MBSGD documentation to mention that the model is experimental
- PR #2687: Update xgboost version to 1.2.0dev.rapidsai0.15
- PR #2684: CUDA 11 conda development environment yml and faiss patch
Expand Down Expand Up @@ -366,6 +377,7 @@
- PR #2305: Fixed race condition in DBScan
- PR #2354: Fix broken links in README
- PR #2619: Explicitly skip raft test folder for pytest 6.0.0
- PR #2788: Set the minimum number of columns that can be sampled to 1 to fix 0 mem allocation error

# cuML 0.13.0 (31 Mar 2020)

Expand Down
5 changes: 2 additions & 3 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,9 @@ fi
if completeBuild || hasArg cuml || hasArg pydocs; then
cd ${REPODIR}/python
if [[ ${INSTALL_TARGET} != "" ]]; then
python setup.py build_ext -j${PARALLEL_LEVEL:-1} --inplace ${SINGLEGPU_PYTHON_FLAG}
python setup.py install --single-version-externally-managed --record=record.txt ${SINGLEGPU_PYTHON_FLAG}
python setup.py build_ext -j${PARALLEL_LEVEL:-1} ${SINGLEGPU_PYTHON_FLAG} install --single-version-externally-managed --record=record.txt
else
python setup.py build_ext -j${PARALLEL_LEVEL:-1} --inplace --library-dir=${LIBCUML_BUILD_DIR} ${SINGLEGPU_PYTHON_FLAG}
python setup.py build_ext -j${PARALLEL_LEVEL:-1} --library-dir=${LIBCUML_BUILD_DIR} ${SINGLEGPU_PYTHON_FLAG}
fi

if hasArg pydocs; then
Expand Down
7 changes: 7 additions & 0 deletions conda/environments/cuml_dev_cuda10.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ dependencies:
- dask-cudf=0.16.*
- dask-cuda=0.16.*
- ucx-py=0.16.*
- dask-ml
- doxygen>=1.8.20
- libfaiss>=1.6.3
- faiss-proc=*=cuda
- umap-learn
- scikit-learn=0.23.1
- treelite=0.92
- pip
- pip:
- sphinx_markdown_tables
Expand Down
7 changes: 7 additions & 0 deletions conda/environments/cuml_dev_cuda10.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ dependencies:
- dask-cudf=0.16.*
- dask-cuda=0.16.*
- ucx-py=0.16.*
- dask-ml
- doxygen>=1.8.20
- libfaiss>=1.6.3
- faiss-proc=*=cuda
- umap-learn
- scikit-learn=0.23.1
- treelite=0.92
- pip
- pip:
- sphinx_markdown_tables
Expand Down
7 changes: 7 additions & 0 deletions conda/environments/cuml_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ dependencies:
- dask-cudf=0.16.*
- dask-cuda=0.16.*
- ucx-py=0.16.*
- dask-ml
- doxygen>=1.8.20
- libfaiss>=1.6.3
- faiss-proc=*=cuda
- umap-learn
- scikit-learn=0.23.1
- treelite=0.92
- pip
- pip:
- sphinx_markdown_tables
Expand Down
3 changes: 1 addition & 2 deletions cpp/cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ set(SPDLOG_DIR ${CMAKE_CURRENT_BINARY_DIR}/spdlog CACHE STRING
"Path to spdlog install directory")
ExternalProject_Add(spdlog
GIT_REPOSITORY https://github.com/gabime/spdlog.git
GIT_TAG v1.x
GIT_TAG v1.7.0
PREFIX ${SPDLOG_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
Expand Down Expand Up @@ -254,4 +254,3 @@ add_dependencies(GTest::GTest spdlog)
add_dependencies(benchmark GTest::GTest)
add_dependencies(FAISS::FAISS benchmark)
add_dependencies(FAISS::FAISS faiss)

4 changes: 2 additions & 2 deletions cpp/cmake/doxygen.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2020, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -13,7 +13,7 @@
# limitations under the License.
#

find_package(Doxygen 1.8.11)
find_package(Doxygen 1.8.20 REQUIRED)

function(add_doxygen_target)
if(Doxygen_FOUND)
Expand Down
1 change: 1 addition & 0 deletions cpp/src/decisiontree/memory.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ void TemporaryMemory<T, L>::LevelMemAllocator(
}
size_t maxnodes = max_nodes_per_level;
size_t ncols_sampled = (size_t)(ncols * tree_params.max_features);
ncols_sampled = ncols_sampled > 0 ? ncols_sampled : 1;
if (depth < 64) {
gather_max_nodes = std::min((size_t)(nrows + 1),
(size_t)(pow((size_t)2, (size_t)depth) + 1));
Expand Down
6 changes: 1 addition & 5 deletions python/cuml/cluster/dbscan.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

import ctypes
import cudf
Expand Down Expand Up @@ -217,8 +214,7 @@ class DBSCAN(Base):
"int64", np.int64}.
"""
self._set_n_features_in(X)
self._set_output_type(X)
self._set_base_attributes(output_type=X, n_features=X)

if self._labels_ is not None:
del self._labels_
Expand Down
6 changes: 1 addition & 5 deletions python/cuml/cluster/kmeans.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

import ctypes
import cudf
Expand Down Expand Up @@ -313,8 +310,7 @@ class KMeans(Base):
Compute k-means clustering with X.
"""
self._set_n_features_in(X)
self._set_output_type(X)
self._set_base_attributes(output_type=X, n_features=X)

if self.init == 'preset':
check_cols = self.n_cols
Expand Down
5 changes: 1 addition & 4 deletions python/cuml/cluster/kmeans_mg.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

import ctypes
import cudf
Expand Down Expand Up @@ -87,7 +84,7 @@ class KMeansMG(KMeans):
ndarray, cuda array interface compliant array like CuPy
"""
self._set_n_features_in(X)
self._set_base_attributes(n_features=X)

X_m, self.n_rows, self.n_cols, self.dtype = \
input_to_cuml_array(X, order='C')
Expand Down
5 changes: 0 additions & 5 deletions python/cuml/cluster/kmeans_utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,6 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

import ctypes
from libcpp cimport bool

Expand Down
56 changes: 41 additions & 15 deletions python/cuml/common/base.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

import cuml
import cuml.common.cuda
Expand Down Expand Up @@ -282,12 +279,49 @@ class Base:
else:
raise AttributeError

def _set_output_type(self, input):
def _set_base_attributes(self,
output_type=None,
target_dtype=None,
n_features=None):
"""
Method to be called by fit methods of inheriting classes
to correctly set the output type depending on the type of inputs,
class output type and global output type
Method to set the base class attributes - output type,
target dtype and n_features. It combines the three different
function calls. It's called in fit function from estimators.
Parameters
--------
output_type : DataFrame (default = None)
Is output_type is passed, aets the output_type on the
dataframe passed
target_dtype : Target column (default = None)
If target_dtype is passed, we call _set_target_dtype
on it
n_features: int or DataFrame (default=None)
If an int is passed, we set it to the number passed
If dataframe, we set it based on the passed df.
Examples
--------
.. code-block:: python
# To set output_type and n_features based on X
self._set_base_attributes(output_type=X, n_features=X)
# To set output_type on X and n_features to 10
self._set_base_attributes(output_type=X, n_features=10)
# To only set target_dtype
self._set_base_attributes(output_type=X, target_dtype=y)
"""
if output_type is not None:
self._set_output_type(output_type)
if target_dtype is not None:
self._set_target_dtype(target_dtype)
if n_features is not None:
self._set_n_features_in(n_features)

def _set_output_type(self, input):
if self.output_type == 'input' or self._mirror_input:
self.output_type = _input_to_type(input)

Expand All @@ -303,11 +337,6 @@ class Base:
return self.output_type

def _set_target_dtype(self, target):
"""
Method to be called by fit methods of inheriting classifier
classes to correctly set the output dtype depending on the dtype of
the target.
"""
self.target_dtype = _input_target_to_dtype(target)

def _get_target_dtype(self):
Expand All @@ -323,9 +352,6 @@ class Base:
return out_dtype

def _set_n_features_in(self, X):
"""Method to be called by the fit method of the inheriting class.
Sets the n_features_in_ attribute based on the data passed to fit.
"""
if isinstance(X, int):
self.n_features_in_ = X
else:
Expand Down
6 changes: 0 additions & 6 deletions python/cuml/common/cuda.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3


# Populate this with more typedef's (eg: events) as and when needed
cdef extern from * nogil:
ctypedef void* _Stream "cudaStream_t"
Expand Down
3 changes: 0 additions & 3 deletions python/cuml/common/cuda.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

import functools
from libcpp.string cimport string
Expand Down
6 changes: 0 additions & 6 deletions python/cuml/common/handle.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3


from libcpp.memory cimport shared_ptr
cimport cuml.common.cuda

Expand Down
3 changes: 0 additions & 3 deletions python/cuml/common/handle.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3


import cuml
Expand Down
3 changes: 0 additions & 3 deletions python/cuml/common/logger.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3


from libcpp.string cimport string
Expand Down
4 changes: 0 additions & 4 deletions python/cuml/common/opg_data_utils_mg.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

# Util functions, will be moved to their own file as the other methods are
# refactored
Expand Down
3 changes: 0 additions & 3 deletions python/cuml/common/pointer_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
# limitations under the License.
#

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

from libc.stdint cimport uintptr_t

Expand Down
3 changes: 0 additions & 3 deletions python/cuml/dask/common/comms_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

from libc.stdlib cimport malloc, free
from cython.operator cimport dereference as deref
Expand Down
Loading

0 comments on commit e19f1b6

Please sign in to comment.