diff --git a/CHANGELOG.md b/CHANGELOG.md index 4459b8a678..37678df443 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ - PR #2810: Update Rf MNMG threshold to prevent sporadic test failure - PR #2808: Relax Doxygen version required in CMake to coincide with integration repo - PR #2818: Fix parsing of singlegpu option in build command +- PR #2829: Fixing description for labels in docs and removing row number constraint from PCA xform/inverse_xform - PR #2832: Updating stress tests that fail with OOM - PR #2831: Removing repeated capture and parameter in lambda function diff --git a/cpp/src/pca/pca.cuh b/cpp/src/pca/pca.cuh index fc7285ae89..d3206dcaf6 100644 --- a/cpp/src/pca/pca.cuh +++ b/cpp/src/pca/pca.cuh @@ -167,14 +167,15 @@ void pcaInverseTransform(const raft::handle_t &handle, math_t *trans_input, cudaStream_t stream) { ASSERT(prms.n_cols > 1, "Parameter n_cols: number of columns cannot be less than two"); - ASSERT(prms.n_rows > 1, - "Parameter n_rows: number of rows cannot be less than two"); + ASSERT(prms.n_rows > 0, + "Parameter n_rows: number of rows cannot be less than one"); ASSERT( prms.n_components > 0, "Parameter n_components: number of components cannot be less than one"); if (prms.whiten) { - math_t scalar = math_t(1 / sqrt(prms.n_rows - 1)); + math_t sqrt_n_samples = sqrt(prms.n_rows - 1); + math_t scalar = prms.n_rows - 1 > 0 ? math_t(1 / sqrt_n_samples) : 0; LinAlg::scalarMultiply(components, components, scalar, prms.n_rows * prms.n_components, stream); Matrix::matrixVectorBinaryMultSkipZero(components, singular_vals, @@ -190,7 +191,8 @@ void pcaInverseTransform(const raft::handle_t &handle, math_t *trans_input, Matrix::matrixVectorBinaryDivSkipZero(components, singular_vals, prms.n_rows, prms.n_components, true, true, stream); - math_t scalar = math_t(sqrt(prms.n_rows - 1)); + math_t sqrt_n_samples = sqrt(prms.n_rows - 1); + math_t scalar = prms.n_rows - 1 > 0 ? math_t(1 / sqrt_n_samples) : 0; LinAlg::scalarMultiply(components, components, scalar, prms.n_rows * prms.n_components, stream); } @@ -226,8 +228,8 @@ void pcaTransform(const raft::handle_t &handle, math_t *input, cudaStream_t stream) { ASSERT(prms.n_cols > 1, "Parameter n_cols: number of columns cannot be less than two"); - ASSERT(prms.n_rows > 1, - "Parameter n_rows: number of rows cannot be less than two"); + ASSERT(prms.n_rows > 0, + "Parameter n_rows: number of rows cannot be less than one"); ASSERT( prms.n_components > 0, "Parameter n_components: number of components cannot be less than one"); @@ -251,7 +253,8 @@ void pcaTransform(const raft::handle_t &handle, math_t *input, Matrix::matrixVectorBinaryMultSkipZero(components, singular_vals, prms.n_rows, prms.n_components, true, true, stream); - math_t scalar = math_t(1 / sqrt(prms.n_rows - 1)); + math_t sqrt_n_samples = sqrt(prms.n_rows - 1); + math_t scalar = prms.n_rows - 1 > 0 ? math_t(1 / sqrt_n_samples) : 0; LinAlg::scalarMultiply(components, components, scalar, prms.n_rows * prms.n_components, stream); } diff --git a/cpp/src/tsvd/tsvd.cuh b/cpp/src/tsvd/tsvd.cuh index ff478c3c5e..d18c2f1715 100644 --- a/cpp/src/tsvd/tsvd.cuh +++ b/cpp/src/tsvd/tsvd.cuh @@ -273,8 +273,8 @@ void tsvdTransform(const raft::handle_t &handle, math_t *input, ASSERT(prms.n_cols > 1, "Parameter n_cols: number of columns cannot be less than two"); - ASSERT(prms.n_rows > 1, - "Parameter n_rows: number of rows cannot be less than two"); + ASSERT(prms.n_rows > 0, + "Parameter n_rows: number of rows cannot be less than one"); ASSERT( prms.n_components > 0, "Parameter n_components: number of components cannot be less than one"); @@ -303,7 +303,7 @@ void tsvdInverseTransform(const raft::handle_t &handle, math_t *trans_input, ASSERT(prms.n_cols > 1, "Parameter n_cols: number of columns cannot be less than one"); - ASSERT(prms.n_rows > 1, + ASSERT(prms.n_rows > 0, "Parameter n_rows: number of rows cannot be less than one"); ASSERT( prms.n_components > 0, diff --git a/python/cuml/common/doc_utils.py b/python/cuml/common/doc_utils.py index 1b8bbf85d9..f452e2c66b 100644 --- a/python/cuml/common/doc_utils.py +++ b/python/cuml/common/doc_utils.py @@ -268,10 +268,14 @@ def deco(func): pass # X and y are the most common - elif par in ['X', 'y'] and par not in skip_parameters: + elif par == 'X' and par not in skip_parameters: func.__doc__ += \ _parameters_docstrings[X].format(name=par, shape=X_shape) + elif par == 'y' and par not in skip_parameters: + func.__doc__ += \ + _parameters_docstrings[y].format(name=par, + shape=y_shape) # convert_dtype requires some magic to distinguish # whether we use the fit version or the version diff --git a/python/cuml/preprocessing/model_selection.py b/python/cuml/preprocessing/model_selection.py index dacc0eab8a..57eead444c 100644 --- a/python/cuml/preprocessing/model_selection.py +++ b/python/cuml/preprocessing/model_selection.py @@ -240,7 +240,7 @@ def train_test_split(X, # Alternatively, if our labels are stored separately labels = df['y'] - df = df.drop(['y']) + df = df.drop(['y'], axis=1) # we can also do X_train, X_test, y_train, y_test = train_test_split(df, labels, @@ -271,7 +271,7 @@ def train_test_split(X, if isinstance(X, cudf.DataFrame): name = y y = X[name] - X = X.drop(name) + X = X.drop(name, axis=1) else: raise TypeError("X needs to be a cuDF Dataframe when y is a \ string")