From 4a70dedf513249beeea28e5d3d584886edbf576c Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 15 Nov 2021 11:33:52 +0530
Subject: [PATCH] spell check fix python/

---
 python/cudf/cudf/core/column/column.py              |  4 ++--
 python/cudf/cudf/core/column/datetime.py            |  2 +-
 python/cudf/cudf/core/column/decimal.py             |  2 +-
 python/cudf/cudf/core/dataframe.py                  |  2 +-
 python/cudf/cudf/core/groupby/groupby.py            |  2 +-
 python/cudf/cudf/core/index.py                      |  2 +-
 python/cudf/cudf/core/multiindex.py                 |  2 +-
 python/cudf/cudf/core/series.py                     |  8 ++++----
 python/cudf/cudf/core/udf/pipeline.py               |  2 +-
 python/cudf/cudf/core/udf/typing.py                 |  4 ++--
 python/cudf/cudf/testing/testing.py                 |  2 +-
 python/cudf/cudf/tests/test_binops.py               |  2 +-
 python/cudf/cudf/tests/test_custom_accessor.py      |  2 +-
 python/cudf/cudf/tests/test_datetime.py             |  2 +-
 python/cudf/cudf/tests/test_multiindex.py           | 10 +++++-----
 python/cudf/cudf/tests/test_orc.py                  |  4 ++--
 python/cudf/cudf/utils/gpu_utils.py                 |  2 +-
 python/cudf/cudf/utils/ioutils.py                   |  4 ++--
 python/cudf/cudf/utils/utils.py                     |  4 ++--
 python/dask_cudf/dask_cudf/_version.py              |  2 +-
 python/dask_cudf/dask_cudf/backends.py              |  2 +-
 python/dask_cudf/dask_cudf/io/parquet.py            |  4 ++--
 python/dask_cudf/dask_cudf/io/tests/test_parquet.py |  2 +-
 23 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 5f9104263b1..cfff2d3e267 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -337,7 +337,7 @@ def to_gpu_array(self, fillna=None) -> "cuda.devicearray.DeviceNDArray":
         else:
             return self.dropna(drop_nan=False).data_array_view
 
-    # TODO: This method is decpreated and can be removed when the associated
+    # TODO: This method is deprecated and can be removed when the associated
     # Frame methods are removed.
     def to_array(self, fillna=None) -> np.ndarray:
         """Get a dense numpy array for the data.
@@ -1851,7 +1851,7 @@ def as_column(
 
         arbitrary = np.asarray(arbitrary)
 
-        # Handle case that `arbitary` elements are cupy arrays
+        # Handle case that `arbitrary` elements are cupy arrays
         if (
             shape
             and shape[0]
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 68379002e6b..d1b4266b80b 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -540,7 +540,7 @@ def infer_format(element: str, **kwargs) -> str:
     if len(second_parts) > 1:
         # "Z" indicates Zulu time(widely used in aviation) - Which is
         # UTC timezone that currently cudf only supports. Having any other
-        # unsuppported timezone will let the code fail below
+        # unsupported timezone will let the code fail below
         # with a ValueError.
         second_parts.remove("Z")
         second_part = "".join(second_parts[1:])
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 6409a9f9196..7037b8e6f36 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -160,7 +160,7 @@ def binary_operator(self, op, other, reflect=False):
         if reflect:
             self, other = other, self
 
-        # Binary Arithmatics between decimal columns. `Scale` and `precision`
+        # Binary Arithmetics between decimal columns. `Scale` and `precision`
         # are computed outside of libcudf
         if op in ("add", "sub", "mul", "div"):
             scale = _binop_scale(self.dtype, other.dtype, op)
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index b2e6588edb2..6734566b731 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6368,7 +6368,7 @@ def wrapper(self, other, axis="columns", level=None, fill_value=None):
     # __wrapped__ attributes to `wrapped_func`. Cpython looks up the signature
     # string of a function by recursively delving into __wrapped__ until
     # it hits the first function that has __signature__ attribute set. To make
-    # the signature stirng of `wrapper` matches with its actual parameter list,
+    # the signature string of `wrapper` matches with its actual parameter list,
     # we directly set the __signature__ attribute of `wrapper` below.
 
     new_sig = inspect.signature(
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index ba69e42674a..13e7d0897f4 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -18,7 +18,7 @@
 
 
 # The three functions below return the quantiles [25%, 50%, 75%]
-# respectively, which are called in the describe() method to ouput
+# respectively, which are called in the describe() method to output
 # the summary stats of a GroupBy object
 def _quantile_25(x):
     return x.quantile(0.25)
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 35b80715cca..d187c0dc2d0 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -623,7 +623,7 @@ def _union(self, other, sort=None):
                 else:
                     return result
 
-        # If all the above optimizations don't cater to the inpputs,
+        # If all the above optimizations don't cater to the inputs,
         # we materialize RangeIndex's into `Int64Index` and
         # then perform `union`.
         return Int64Index(self._values)._union(other, sort=sort)
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 7c132e3fb71..d82d5dd6e26 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -970,7 +970,7 @@ def _concat(cls, objs):
 
         source_data = [o.to_frame(index=False) for o in objs]
 
-        # TODO: Verify if this is really necesary or if we can rely on
+        # TODO: Verify if this is really necessary or if we can rely on
         # DataFrame._concat.
         if len(source_data) > 1:
             colnames = source_data[0].columns
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 00a8ebabe34..5c271bdc4bb 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2916,7 +2916,7 @@ def unique(self):
 
     def nunique(self, method="sort", dropna=True):
         """Returns the number of unique values of the Series: approximate version,
-        and exact version to be moved to libgdf
+        and exact version to be moved to libcudf
 
         Excludes NA values by default.
 
@@ -2985,7 +2985,7 @@ def value_counts(
 
         Returns
         -------
-        result : Series contanining counts of unique values.
+        result : Series containing counts of unique values.
 
         See also
         --------
@@ -3802,7 +3802,7 @@ def wrapper(self, other, level=None, fill_value=None, axis=0):
     # __wrapped__ attributes to `wrapped_func`. Cpython looks up the signature
     # string of a function by recursively delving into __wrapped__ until
     # it hits the first function that has __signature__ attribute set. To make
-    # the signature stirng of `wrapper` matches with its actual parameter list,
+    # the signature string of `wrapper` matches with its actual parameter list,
     # we directly set the __signature__ attribute of `wrapper` below.
 
     new_sig = inspect.signature(
@@ -4989,7 +4989,7 @@ def _align_indices(series_list, how="outer", allow_non_unique=False):
 def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
     """Returns a boolean array where two arrays are equal within a tolerance.
 
-    Two values in ``a`` and ``b`` are  considiered equal when the following
+    Two values in ``a`` and ``b`` are  considered equal when the following
     equation is satisfied.
 
     .. math::
diff --git a/python/cudf/cudf/core/udf/pipeline.py b/python/cudf/cudf/core/udf/pipeline.py
index deb4546e8b8..2464906be04 100644
--- a/python/cudf/cudf/core/udf/pipeline.py
+++ b/python/cudf/cudf/core/udf/pipeline.py
@@ -316,7 +316,7 @@ def compile_or_get(frame, func, args):
     Return a compiled kernel in terms of MaskedTypes that launches a
     kernel equivalent of `f` for the dtypes of `df`. The kernel uses
     a thread for each row and calls `f` using that rows data / mask
-    to produce an output value and output valdity for each row.
+    to produce an output value and output validity for each row.
 
     If the UDF has already been compiled for this requested dtypes,
     a cached version will be returned instead of running compilation.
diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py
index 4b0f0bf1283..da7ff4c0e32 100644
--- a/python/cudf/cudf/core/udf/typing.py
+++ b/python/cudf/cudf/core/udf/typing.py
@@ -67,7 +67,7 @@ def unify(self, context, other):
         """
         Often within a UDF an instance arises where a variable could
         be a `MaskedType`, an `NAType`, or a literal based off
-        the data at runtime, for examplem the variable `ret` here:
+        the data at runtime, for example the variable `ret` here:
 
         def f(x):
             if x == 1:
@@ -185,7 +185,7 @@ class NAType(types.Type):
     """
     A type for handling ops against nulls
     Exists so we can:
-    1. Teach numba that all occurances of `cudf.NA` are
+    1. Teach numba that all occurrences of `cudf.NA` are
        to be read as instances of this type instead
     2. Define ops like `if x is cudf.NA` where `x` is of
        type `Masked` to mean `if x.valid is False`
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index 9562fca7399..59c291eea0b 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -410,7 +410,7 @@ def assert_series_equal(
         Whether to check the Index class, dtype and inferred_type
         are identical.
     check_series_type : bool, default True
-        Whether to check the seires class, dtype and
+        Whether to check the series class, dtype and
         inferred_type are identical. Currently it is idle,
         and similar to pandas.
     check_less_precise : bool or int, default False
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 542dcd9301c..ba2a6dce369 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -1173,7 +1173,7 @@ def make_scalar_product_data():
         )
     )
 
-    # we can muliply any timedelta by any int, or bool
+    # we can multiply any timedelta by any int, or bool
     valid |= set(product(TIMEDELTA_TYPES, INTEGER_TYPES | BOOL_TYPES))
 
     # we can multiply a float by any int, float, or bool
diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/test_custom_accessor.py
index 16e5b345ce2..bfd2ccbccef 100644
--- a/python/cudf/cudf/tests/test_custom_accessor.py
+++ b/python/cudf/cudf/tests/test_custom_accessor.py
@@ -44,7 +44,7 @@ def test_dataframe_accessor(gdf):
     "gdf2", [gd.datasets.randomdata(nrows=1, dtypes={"x": int, "y": int})]
 )
 def test_dataframe_accessor_idendity(gdf1, gdf2):
-    """Test for accessor idendities
+    """Test for accessor identities
     - An object should hold persistent reference to the same accessor
     - Different objects should hold difference instances of the accessor
     """
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index d666dfc0ec1..1768947ab8d 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -171,7 +171,7 @@ def test_dt_ops(data):
     assert_eq(pd_data > pd_data, gdf_data > gdf_data)
 
 
-# libgdf doesn't respect timezones
+# licudf doesn't respect timezones
 @pytest.mark.parametrize("data", [data1()])
 @pytest.mark.parametrize("field", fields)
 def test_dt_series(data, field):
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index d409a099806..07407b8d359 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -738,9 +738,9 @@ def test_multiindex_copy_sem(data, levels, codes, names):
 )
 @pytest.mark.parametrize("deep", [True, False])
 def test_multiindex_copy_deep(data, deep):
-    """Test memory idendity for deep copy
+    """Test memory identity for deep copy
     Case1: Constructed from GroupBy, StringColumns
-    Case2: Constrcuted from MultiIndex, NumericColumns
+    Case2: Constructed from MultiIndex, NumericColumns
     """
     same_ref = not deep
 
@@ -768,19 +768,19 @@ def test_multiindex_copy_deep(data, deep):
         mi1 = data
         mi2 = mi1.copy(deep=deep)
 
-        # Assert ._levels idendity
+        # Assert ._levels identity
         lptrs = [lv._data._data[None].base_data.ptr for lv in mi1._levels]
         rptrs = [lv._data._data[None].base_data.ptr for lv in mi2._levels]
 
         assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)])
 
-        # Assert ._codes idendity
+        # Assert ._codes identity
         lptrs = [c.base_data.ptr for _, c in mi1._codes._data.items()]
         rptrs = [c.base_data.ptr for _, c in mi2._codes._data.items()]
 
         assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)])
 
-        # Assert ._data idendity
+        # Assert ._data identity
         lptrs = [d.base_data.ptr for _, d in mi1._data.items()]
         rptrs = [d.base_data.ptr for _, d in mi2._data.items()]
 
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 99b5652110b..6b02874146e 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -502,7 +502,7 @@ def test_orc_writer_sliced(tmpdir):
         "TestOrcFile.decimal.orc",
         "TestOrcFile.decimal.same.values.orc",
         "TestOrcFile.decimal.multiple.values.orc",
-        # For addional information take look at PR 7034
+        # For additional information take look at PR 7034
         "TestOrcFile.decimal.runpos.issue.orc",
     ],
 )
@@ -541,7 +541,7 @@ def test_orc_decimal_precision_fail(datadir):
     assert_eq(pdf, gdf)
 
 
-# For addional information take look at PR 6636 and 6702
+# For additional information take look at PR 6636 and 6702
 @pytest.mark.parametrize(
     "orc_file",
     [
diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py
index 77963f8bcc1..dbdd68f2df8 100644
--- a/python/cudf/cudf/utils/gpu_utils.py
+++ b/python/cudf/cudf/utils/gpu_utils.py
@@ -143,7 +143,7 @@ def _try_get_old_or_new_symbols():
             cuda_driver_supported_rt_version >= 11000
             and cuda_runtime_version >= 11000
         ):
-            # With cuda enhanced compatibitlity any code compiled
+            # With cuda enhanced compatibility any code compiled
             # with 11.x version of cuda can now run on any
             # driver >= 450.80.02. 11000 is the minimum cuda
             # version 450.80.02 supports.
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 6746753249c..a7891957102 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -1032,7 +1032,7 @@
     should consume messages from. Valid values are 0 - (N-1)
 start_offset : int, Kafka Topic/Partition offset that consumption
     should begin at. Inclusive.
-end_offset : int, Kafka Topic/Parition offset that consumption
+end_offset : int, Kafka Topic/Partition offset that consumption
     should end at. Inclusive.
 batch_timeout : int, default 10000
     Maximum number of milliseconds that will be spent trying to
@@ -1055,7 +1055,7 @@
     or any object with a `read()` method (such as builtin `open()` file handler
     function or `StringIO`).
 delimiter : string, default None, The delimiter that should be used
-    for splitting text chunks into seperate cudf column rows. Currently
+    for splitting text chunks into separate cudf column rows. Currently
     only a single delimiter is supported.
 
 Returns
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 4f9b23bf6fe..a9611a91554 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -353,7 +353,7 @@ def get_appropriate_dispatched_func(
 
         elif hasattr(cupy_submodule, fname):
             cupy_func = getattr(cupy_submodule, fname)
-            # Handle case if cupy impliments it as a numpy function
+            # Handle case if cupy implements it as a numpy function
             # Unsure if needed
             if cupy_func is func:
                 return NotImplemented
@@ -374,7 +374,7 @@ def _cast_to_appropriate_cudf_type(val, index=None):
     elif (val.ndim == 1) or (val.ndim == 2 and val.shape[1] == 1):
         # if index is not None and is of a different length
         # than the index, cupy dispatching behaviour is undefined
-        # so we dont impliment it
+        # so we don't implement it
         if (index is None) or (len(index) == len(val)):
             return cudf.Series(val, index=index)
 
diff --git a/python/dask_cudf/dask_cudf/_version.py b/python/dask_cudf/dask_cudf/_version.py
index eb7457f3465..8ca2cf98381 100644
--- a/python/dask_cudf/dask_cudf/_version.py
+++ b/python/dask_cudf/dask_cudf/_version.py
@@ -417,7 +417,7 @@ def render_pep440_old(pieces):
 
     The ".dev0" means dirty.
 
-    Eexceptions:
+    Exceptions:
     1: no tags. 0.postDISTANCE[.dev0]
     """
     if pieces["closest-tag"]:
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index f81a4743a4a..89b5301ee83 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -196,7 +196,7 @@ def make_meta_object_cudf(x, index=None):
         )
     elif not hasattr(x, "dtype") and x is not None:
         # could be a string, a dtype object, or a python type. Skip `None`,
-        # because it is implictly converted to `dtype('f8')`, which we don't
+        # because it is implicitly converted to `dtype('f8')`, which we don't
         # want here.
         try:
             dtype = np.dtype(x)
diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index 2e5d55e92d2..b47a5e78095 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -111,7 +111,7 @@ def _read_paths(
             frag = next(ds.get_fragments())
             if frag:
                 # Extract hive-partition keys, and make sure they
-                # are orderd the same as they are in `partitions`
+                # are ordered the same as they are in `partitions`
                 raw_keys = pa_ds._get_partition_keys(frag.partition_expression)
                 partition_keys = [
                     (hive_part.name, raw_keys[hive_part.name])
@@ -173,7 +173,7 @@ def read_partition(
 
         strings_to_cats = kwargs.get("strings_to_categorical", False)
 
-        # Assume multi-peice read
+        # Assume multi-piece read
         paths = []
         rgs = []
         last_partition_keys = None
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
index d93037b3802..706b0e272ea 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
@@ -378,7 +378,7 @@ def test_chunksize(tmpdir, chunksize, metadata):
             # one output partition
             assert ddf3.npartitions == 1
         else:
-            # Files can be aggregateed together, but
+            # Files can be aggregated together, but
             # chunksize is not large enough to produce
             # a single output partition
             assert ddf3.npartitions < num_row_groups