From 4a70dedf513249beeea28e5d3d584886edbf576c Mon Sep 17 00:00:00 2001 From: Karthikeyan Natarajan Date: Mon, 15 Nov 2021 11:33:52 +0530 Subject: [PATCH] spell check fix python/ --- python/cudf/cudf/core/column/column.py | 4 ++-- python/cudf/cudf/core/column/datetime.py | 2 +- python/cudf/cudf/core/column/decimal.py | 2 +- python/cudf/cudf/core/dataframe.py | 2 +- python/cudf/cudf/core/groupby/groupby.py | 2 +- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/multiindex.py | 2 +- python/cudf/cudf/core/series.py | 8 ++++---- python/cudf/cudf/core/udf/pipeline.py | 2 +- python/cudf/cudf/core/udf/typing.py | 4 ++-- python/cudf/cudf/testing/testing.py | 2 +- python/cudf/cudf/tests/test_binops.py | 2 +- python/cudf/cudf/tests/test_custom_accessor.py | 2 +- python/cudf/cudf/tests/test_datetime.py | 2 +- python/cudf/cudf/tests/test_multiindex.py | 10 +++++----- python/cudf/cudf/tests/test_orc.py | 4 ++-- python/cudf/cudf/utils/gpu_utils.py | 2 +- python/cudf/cudf/utils/ioutils.py | 4 ++-- python/cudf/cudf/utils/utils.py | 4 ++-- python/dask_cudf/dask_cudf/_version.py | 2 +- python/dask_cudf/dask_cudf/backends.py | 2 +- python/dask_cudf/dask_cudf/io/parquet.py | 4 ++-- python/dask_cudf/dask_cudf/io/tests/test_parquet.py | 2 +- 23 files changed, 36 insertions(+), 36 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 5f9104263b1..cfff2d3e267 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -337,7 +337,7 @@ def to_gpu_array(self, fillna=None) -> "cuda.devicearray.DeviceNDArray": else: return self.dropna(drop_nan=False).data_array_view - # TODO: This method is decpreated and can be removed when the associated + # TODO: This method is deprecated and can be removed when the associated # Frame methods are removed. def to_array(self, fillna=None) -> np.ndarray: """Get a dense numpy array for the data. @@ -1851,7 +1851,7 @@ def as_column( arbitrary = np.asarray(arbitrary) - # Handle case that `arbitary` elements are cupy arrays + # Handle case that `arbitrary` elements are cupy arrays if ( shape and shape[0] diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 68379002e6b..d1b4266b80b 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -540,7 +540,7 @@ def infer_format(element: str, **kwargs) -> str: if len(second_parts) > 1: # "Z" indicates Zulu time(widely used in aviation) - Which is # UTC timezone that currently cudf only supports. Having any other - # unsuppported timezone will let the code fail below + # unsupported timezone will let the code fail below # with a ValueError. second_parts.remove("Z") second_part = "".join(second_parts[1:]) diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py index 6409a9f9196..7037b8e6f36 100644 --- a/python/cudf/cudf/core/column/decimal.py +++ b/python/cudf/cudf/core/column/decimal.py @@ -160,7 +160,7 @@ def binary_operator(self, op, other, reflect=False): if reflect: self, other = other, self - # Binary Arithmatics between decimal columns. `Scale` and `precision` + # Binary Arithmetics between decimal columns. `Scale` and `precision` # are computed outside of libcudf if op in ("add", "sub", "mul", "div"): scale = _binop_scale(self.dtype, other.dtype, op) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index b2e6588edb2..6734566b731 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -6368,7 +6368,7 @@ def wrapper(self, other, axis="columns", level=None, fill_value=None): # __wrapped__ attributes to `wrapped_func`. Cpython looks up the signature # string of a function by recursively delving into __wrapped__ until # it hits the first function that has __signature__ attribute set. To make - # the signature stirng of `wrapper` matches with its actual parameter list, + # the signature string of `wrapper` matches with its actual parameter list, # we directly set the __signature__ attribute of `wrapper` below. new_sig = inspect.signature( diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index ba69e42674a..13e7d0897f4 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -18,7 +18,7 @@ # The three functions below return the quantiles [25%, 50%, 75%] -# respectively, which are called in the describe() method to ouput +# respectively, which are called in the describe() method to output # the summary stats of a GroupBy object def _quantile_25(x): return x.quantile(0.25) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 35b80715cca..d187c0dc2d0 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -623,7 +623,7 @@ def _union(self, other, sort=None): else: return result - # If all the above optimizations don't cater to the inpputs, + # If all the above optimizations don't cater to the inputs, # we materialize RangeIndex's into `Int64Index` and # then perform `union`. return Int64Index(self._values)._union(other, sort=sort) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 7c132e3fb71..d82d5dd6e26 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -970,7 +970,7 @@ def _concat(cls, objs): source_data = [o.to_frame(index=False) for o in objs] - # TODO: Verify if this is really necesary or if we can rely on + # TODO: Verify if this is really necessary or if we can rely on # DataFrame._concat. if len(source_data) > 1: colnames = source_data[0].columns diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 00a8ebabe34..5c271bdc4bb 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2916,7 +2916,7 @@ def unique(self): def nunique(self, method="sort", dropna=True): """Returns the number of unique values of the Series: approximate version, - and exact version to be moved to libgdf + and exact version to be moved to libcudf Excludes NA values by default. @@ -2985,7 +2985,7 @@ def value_counts( Returns ------- - result : Series contanining counts of unique values. + result : Series containing counts of unique values. See also -------- @@ -3802,7 +3802,7 @@ def wrapper(self, other, level=None, fill_value=None, axis=0): # __wrapped__ attributes to `wrapped_func`. Cpython looks up the signature # string of a function by recursively delving into __wrapped__ until # it hits the first function that has __signature__ attribute set. To make - # the signature stirng of `wrapper` matches with its actual parameter list, + # the signature string of `wrapper` matches with its actual parameter list, # we directly set the __signature__ attribute of `wrapper` below. new_sig = inspect.signature( @@ -4989,7 +4989,7 @@ def _align_indices(series_list, how="outer", allow_non_unique=False): def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): """Returns a boolean array where two arrays are equal within a tolerance. - Two values in ``a`` and ``b`` are considiered equal when the following + Two values in ``a`` and ``b`` are considered equal when the following equation is satisfied. .. math:: diff --git a/python/cudf/cudf/core/udf/pipeline.py b/python/cudf/cudf/core/udf/pipeline.py index deb4546e8b8..2464906be04 100644 --- a/python/cudf/cudf/core/udf/pipeline.py +++ b/python/cudf/cudf/core/udf/pipeline.py @@ -316,7 +316,7 @@ def compile_or_get(frame, func, args): Return a compiled kernel in terms of MaskedTypes that launches a kernel equivalent of `f` for the dtypes of `df`. The kernel uses a thread for each row and calls `f` using that rows data / mask - to produce an output value and output valdity for each row. + to produce an output value and output validity for each row. If the UDF has already been compiled for this requested dtypes, a cached version will be returned instead of running compilation. diff --git a/python/cudf/cudf/core/udf/typing.py b/python/cudf/cudf/core/udf/typing.py index 4b0f0bf1283..da7ff4c0e32 100644 --- a/python/cudf/cudf/core/udf/typing.py +++ b/python/cudf/cudf/core/udf/typing.py @@ -67,7 +67,7 @@ def unify(self, context, other): """ Often within a UDF an instance arises where a variable could be a `MaskedType`, an `NAType`, or a literal based off - the data at runtime, for examplem the variable `ret` here: + the data at runtime, for example the variable `ret` here: def f(x): if x == 1: @@ -185,7 +185,7 @@ class NAType(types.Type): """ A type for handling ops against nulls Exists so we can: - 1. Teach numba that all occurances of `cudf.NA` are + 1. Teach numba that all occurrences of `cudf.NA` are to be read as instances of this type instead 2. Define ops like `if x is cudf.NA` where `x` is of type `Masked` to mean `if x.valid is False` diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index 9562fca7399..59c291eea0b 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -410,7 +410,7 @@ def assert_series_equal( Whether to check the Index class, dtype and inferred_type are identical. check_series_type : bool, default True - Whether to check the seires class, dtype and + Whether to check the series class, dtype and inferred_type are identical. Currently it is idle, and similar to pandas. check_less_precise : bool or int, default False diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 542dcd9301c..ba2a6dce369 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -1173,7 +1173,7 @@ def make_scalar_product_data(): ) ) - # we can muliply any timedelta by any int, or bool + # we can multiply any timedelta by any int, or bool valid |= set(product(TIMEDELTA_TYPES, INTEGER_TYPES | BOOL_TYPES)) # we can multiply a float by any int, float, or bool diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/test_custom_accessor.py index 16e5b345ce2..bfd2ccbccef 100644 --- a/python/cudf/cudf/tests/test_custom_accessor.py +++ b/python/cudf/cudf/tests/test_custom_accessor.py @@ -44,7 +44,7 @@ def test_dataframe_accessor(gdf): "gdf2", [gd.datasets.randomdata(nrows=1, dtypes={"x": int, "y": int})] ) def test_dataframe_accessor_idendity(gdf1, gdf2): - """Test for accessor idendities + """Test for accessor identities - An object should hold persistent reference to the same accessor - Different objects should hold difference instances of the accessor """ diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index d666dfc0ec1..1768947ab8d 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -171,7 +171,7 @@ def test_dt_ops(data): assert_eq(pd_data > pd_data, gdf_data > gdf_data) -# libgdf doesn't respect timezones +# licudf doesn't respect timezones @pytest.mark.parametrize("data", [data1()]) @pytest.mark.parametrize("field", fields) def test_dt_series(data, field): diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py index d409a099806..07407b8d359 100644 --- a/python/cudf/cudf/tests/test_multiindex.py +++ b/python/cudf/cudf/tests/test_multiindex.py @@ -738,9 +738,9 @@ def test_multiindex_copy_sem(data, levels, codes, names): ) @pytest.mark.parametrize("deep", [True, False]) def test_multiindex_copy_deep(data, deep): - """Test memory idendity for deep copy + """Test memory identity for deep copy Case1: Constructed from GroupBy, StringColumns - Case2: Constrcuted from MultiIndex, NumericColumns + Case2: Constructed from MultiIndex, NumericColumns """ same_ref = not deep @@ -768,19 +768,19 @@ def test_multiindex_copy_deep(data, deep): mi1 = data mi2 = mi1.copy(deep=deep) - # Assert ._levels idendity + # Assert ._levels identity lptrs = [lv._data._data[None].base_data.ptr for lv in mi1._levels] rptrs = [lv._data._data[None].base_data.ptr for lv in mi2._levels] assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)]) - # Assert ._codes idendity + # Assert ._codes identity lptrs = [c.base_data.ptr for _, c in mi1._codes._data.items()] rptrs = [c.base_data.ptr for _, c in mi2._codes._data.items()] assert all([(x == y) is same_ref for x, y in zip(lptrs, rptrs)]) - # Assert ._data idendity + # Assert ._data identity lptrs = [d.base_data.ptr for _, d in mi1._data.items()] rptrs = [d.base_data.ptr for _, d in mi2._data.items()] diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 99b5652110b..6b02874146e 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -502,7 +502,7 @@ def test_orc_writer_sliced(tmpdir): "TestOrcFile.decimal.orc", "TestOrcFile.decimal.same.values.orc", "TestOrcFile.decimal.multiple.values.orc", - # For addional information take look at PR 7034 + # For additional information take look at PR 7034 "TestOrcFile.decimal.runpos.issue.orc", ], ) @@ -541,7 +541,7 @@ def test_orc_decimal_precision_fail(datadir): assert_eq(pdf, gdf) -# For addional information take look at PR 6636 and 6702 +# For additional information take look at PR 6636 and 6702 @pytest.mark.parametrize( "orc_file", [ diff --git a/python/cudf/cudf/utils/gpu_utils.py b/python/cudf/cudf/utils/gpu_utils.py index 77963f8bcc1..dbdd68f2df8 100644 --- a/python/cudf/cudf/utils/gpu_utils.py +++ b/python/cudf/cudf/utils/gpu_utils.py @@ -143,7 +143,7 @@ def _try_get_old_or_new_symbols(): cuda_driver_supported_rt_version >= 11000 and cuda_runtime_version >= 11000 ): - # With cuda enhanced compatibitlity any code compiled + # With cuda enhanced compatibility any code compiled # with 11.x version of cuda can now run on any # driver >= 450.80.02. 11000 is the minimum cuda # version 450.80.02 supports. diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 6746753249c..a7891957102 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -1032,7 +1032,7 @@ should consume messages from. Valid values are 0 - (N-1) start_offset : int, Kafka Topic/Partition offset that consumption should begin at. Inclusive. -end_offset : int, Kafka Topic/Parition offset that consumption +end_offset : int, Kafka Topic/Partition offset that consumption should end at. Inclusive. batch_timeout : int, default 10000 Maximum number of milliseconds that will be spent trying to @@ -1055,7 +1055,7 @@ or any object with a `read()` method (such as builtin `open()` file handler function or `StringIO`). delimiter : string, default None, The delimiter that should be used - for splitting text chunks into seperate cudf column rows. Currently + for splitting text chunks into separate cudf column rows. Currently only a single delimiter is supported. Returns diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py index 4f9b23bf6fe..a9611a91554 100644 --- a/python/cudf/cudf/utils/utils.py +++ b/python/cudf/cudf/utils/utils.py @@ -353,7 +353,7 @@ def get_appropriate_dispatched_func( elif hasattr(cupy_submodule, fname): cupy_func = getattr(cupy_submodule, fname) - # Handle case if cupy impliments it as a numpy function + # Handle case if cupy implements it as a numpy function # Unsure if needed if cupy_func is func: return NotImplemented @@ -374,7 +374,7 @@ def _cast_to_appropriate_cudf_type(val, index=None): elif (val.ndim == 1) or (val.ndim == 2 and val.shape[1] == 1): # if index is not None and is of a different length # than the index, cupy dispatching behaviour is undefined - # so we dont impliment it + # so we don't implement it if (index is None) or (len(index) == len(val)): return cudf.Series(val, index=index) diff --git a/python/dask_cudf/dask_cudf/_version.py b/python/dask_cudf/dask_cudf/_version.py index eb7457f3465..8ca2cf98381 100644 --- a/python/dask_cudf/dask_cudf/_version.py +++ b/python/dask_cudf/dask_cudf/_version.py @@ -417,7 +417,7 @@ def render_pep440_old(pieces): The ".dev0" means dirty. - Eexceptions: + Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index f81a4743a4a..89b5301ee83 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -196,7 +196,7 @@ def make_meta_object_cudf(x, index=None): ) elif not hasattr(x, "dtype") and x is not None: # could be a string, a dtype object, or a python type. Skip `None`, - # because it is implictly converted to `dtype('f8')`, which we don't + # because it is implicitly converted to `dtype('f8')`, which we don't # want here. try: dtype = np.dtype(x) diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py index 2e5d55e92d2..b47a5e78095 100644 --- a/python/dask_cudf/dask_cudf/io/parquet.py +++ b/python/dask_cudf/dask_cudf/io/parquet.py @@ -111,7 +111,7 @@ def _read_paths( frag = next(ds.get_fragments()) if frag: # Extract hive-partition keys, and make sure they - # are orderd the same as they are in `partitions` + # are ordered the same as they are in `partitions` raw_keys = pa_ds._get_partition_keys(frag.partition_expression) partition_keys = [ (hive_part.name, raw_keys[hive_part.name]) @@ -173,7 +173,7 @@ def read_partition( strings_to_cats = kwargs.get("strings_to_categorical", False) - # Assume multi-peice read + # Assume multi-piece read paths = [] rgs = [] last_partition_keys = None diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py index d93037b3802..706b0e272ea 100644 --- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py +++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py @@ -378,7 +378,7 @@ def test_chunksize(tmpdir, chunksize, metadata): # one output partition assert ddf3.npartitions == 1 else: - # Files can be aggregateed together, but + # Files can be aggregated together, but # chunksize is not large enough to produce # a single output partition assert ddf3.npartitions < num_row_groups