docstring for KMeans

upmem · Nov 10, 2021 · d14194d · d14194d
1 parent 92e0e53
commit d14194d
Show file tree

Hide file tree

Showing 2 changed files with 61 additions and 4 deletions.
diff --git a/src/dpu_kmeans/_dimm.py b/src/dpu_kmeans/_dimm.py
@@ -34,6 +34,10 @@ class DIMM_data:
         The path to the data file, or a numeric iterable containing the data.
         For best performance, provide a contiguous float32 numpy array.
 
+    is_binary_file : bool
+        True if the data is in binary format, False otherwise.
+        Unused if type is "array".
+
     Atrributes
     ----------
     data_id : str or int
@@ -50,10 +54,6 @@ class DIMM_data:
 
     X : numpy.ndarray[np.float32]
         Data as a numpy array usable by the compiled library.
-
-    is_binary_file : bool
-        True if the data is in binary format, False otherwise.
-        Unused if type is "array".
     """
 
     def __init__(self, data, is_binary_file=False):

diff --git a/src/dpu_kmeans/_kmeans.py b/src/dpu_kmeans/_kmeans.py
@@ -12,6 +12,45 @@
 
 
 class KMeans(BaseEstimator):
+    """KMeans estimator object
+
+    Parameters
+    ----------
+    n_clusters : int, default=8
+        The number of clusters to form as well as the number of
+        centroids to generate.
+
+    n_init : int, default=10
+        Number of time the k-means algorithm will be run with different
+        centroid seeds. The final results will be the best output of
+        n_init consecutive runs in terms of inertia.
+
+    max_iter : int, default=300
+        Maximum number of iterations of the k-means algorithm for a
+        single run.
+
+    tol : float, default=1e-4
+        Relative tolerance with regards to Frobenius norm of the difference
+        in the cluster centers of two consecutive iterations to declare
+        convergence.
+
+    verbose : int, default=0
+        Verbosity mode.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from dpu_kmeans import DIMM_data, KMeans
+    >>> X = np.array([[1, 2], [1, 4], [1, 0],[10, 2], [10, 4], [10, 0]])
+    >>> dimm_data = DIMM_data(X)
+    >>> kmeans = KMeans(2)
+    >>> centroids, iterations, time = kmeans.fit(dimm_data)
+    >>> print(centroids)
+    [[ 0.9998627  2.       ]
+    [10.000137   2.       ]]
+    """
+
     def __init__(
         self,
         n_clusters: int = 8,
@@ -30,6 +69,24 @@ def __init__(
         _dimm.load_kernel("kmeans", self.verbose)
 
     def fit(self, X: DIMM_data):
+        """Compute k-means clustering.
+
+        Parameters
+        ----------
+        X : DIMM_data
+            Training instances to cluster. It must be noted that the data
+            will be converted to C ordering, which will cause a memory
+            copy if the given data is not C-contiguous.
+
+        Returns
+        -------
+        result : ndarray
+            The centroids found by the clustering algorithm.
+        iterations : int
+            Number of iterations performed during the best run.
+        time : float
+            Total clustering time.
+        """
         _dimm.load_kernel("kmeans", self.verbose)
         _dimm.load_data(X, self.tol, self.verbose)
         result, iterations, time = self._kmeans()