Skip to content

Commit

Permalink
docstring for KMeans
Browse files Browse the repository at this point in the history
  • Loading branch information
SylvanBrocard committed Nov 10, 2021
1 parent 92e0e53 commit d14194d
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/dpu_kmeans/_dimm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ class DIMM_data:
The path to the data file, or a numeric iterable containing the data.
For best performance, provide a contiguous float32 numpy array.
is_binary_file : bool
True if the data is in binary format, False otherwise.
Unused if type is "array".
Atrributes
----------
data_id : str or int
Expand All @@ -50,10 +54,6 @@ class DIMM_data:
X : numpy.ndarray[np.float32]
Data as a numpy array usable by the compiled library.
is_binary_file : bool
True if the data is in binary format, False otherwise.
Unused if type is "array".
"""

def __init__(self, data, is_binary_file=False):
Expand Down
57 changes: 57 additions & 0 deletions src/dpu_kmeans/_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,45 @@


class KMeans(BaseEstimator):
"""KMeans estimator object
Parameters
----------
n_clusters : int, default=8
The number of clusters to form as well as the number of
centroids to generate.
n_init : int, default=10
Number of time the k-means algorithm will be run with different
centroid seeds. The final results will be the best output of
n_init consecutive runs in terms of inertia.
max_iter : int, default=300
Maximum number of iterations of the k-means algorithm for a
single run.
tol : float, default=1e-4
Relative tolerance with regards to Frobenius norm of the difference
in the cluster centers of two consecutive iterations to declare
convergence.
verbose : int, default=0
Verbosity mode.
Examples
--------
>>> import numpy as np
>>> from dpu_kmeans import DIMM_data, KMeans
>>> X = np.array([[1, 2], [1, 4], [1, 0],[10, 2], [10, 4], [10, 0]])
>>> dimm_data = DIMM_data(X)
>>> kmeans = KMeans(2)
>>> centroids, iterations, time = kmeans.fit(dimm_data)
>>> print(centroids)
[[ 0.9998627 2. ]
[10.000137 2. ]]
"""

def __init__(
self,
n_clusters: int = 8,
Expand All @@ -30,6 +69,24 @@ def __init__(
_dimm.load_kernel("kmeans", self.verbose)

def fit(self, X: DIMM_data):
"""Compute k-means clustering.
Parameters
----------
X : DIMM_data
Training instances to cluster. It must be noted that the data
will be converted to C ordering, which will cause a memory
copy if the given data is not C-contiguous.
Returns
-------
result : ndarray
The centroids found by the clustering algorithm.
iterations : int
Number of iterations performed during the best run.
time : float
Total clustering time.
"""
_dimm.load_kernel("kmeans", self.verbose)
_dimm.load_data(X, self.tol, self.verbose)
result, iterations, time = self._kmeans()
Expand Down

0 comments on commit d14194d

Please sign in to comment.