diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 06a69719517..2b52b040672 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,6 +52,16 @@ repos: - id: clang-format types_or: [c, c++, cuda] args: ["-fallback-style=none", "-style=file", "-i"] + - repo: https://github.com/sirosen/texthooks + rev: 0.4.0 + hooks: + - id: fix-smartquotes + exclude: | + (?x)^( + ^cpp/include/cudf_test/cxxopts.hpp| + ^python/cudf/cudf/tests/data/subword_tokenizer_data/.*| + ^python/cudf/cudf/tests/test_text.py + ) - repo: local hooks: - id: no-deprecationwarning diff --git a/README.md b/README.md index 641ce1316b3..a013d3a9ea4 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ For additional examples, browse our complete [API documentation](https://docs.ra ## Quick Start -Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize cuDF. +Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you're running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize cuDF. ## Installation diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb index ce6c55fe134..870e334c216 100644 --- a/docs/cudf/source/user_guide/10min.ipynb +++ b/docs/cudf/source/user_guide/10min.ipynb @@ -15,7 +15,7 @@ "\n", "[Dask](https://dask.org/) is a flexible library for parallel computing in Python that makes scaling out your workflow smooth and simple. On the CPU, Dask uses Pandas to execute operations in parallel on DataFrame partitions.\n", "\n", - "[Dask-cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed by cuDF GPU DataFrames as opposed to Pandas DataFrames. For instance, when you call dask_cudf.read_csv(...), your cluster’s GPUs do the work of parsing the CSV file(s) with underlying cudf.read_csv().\n", + "[Dask-cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed by cuDF GPU DataFrames as opposed to Pandas DataFrames. For instance, when you call dask_cudf.read_csv(...), your cluster's GPUs do the work of parsing the CSV file(s) with underlying cudf.read_csv().\n", "\n", "\n", "### When to use cuDF and Dask-cuDF\n", diff --git a/docs/cudf/source/user_guide/missing-data.ipynb b/docs/cudf/source/user_guide/missing-data.ipynb index ad12c675373..ac5bddd34cf 100644 --- a/docs/cudf/source/user_guide/missing-data.ipynb +++ b/docs/cudf/source/user_guide/missing-data.ipynb @@ -229,7 +229,7 @@ "id": "acdf29d7", "metadata": {}, "source": [ - "One has to be mindful that in Python (and NumPy), the nan's don’t compare equal, but None's do. Note that cudf/NumPy uses the fact that `np.nan != np.nan`, and treats `None` like `np.nan`." + "One has to be mindful that in Python (and NumPy), the nan's don't compare equal, but None's do. Note that cudf/NumPy uses the fact that `np.nan != np.nan`, and treats `None` like `np.nan`." ] }, { @@ -279,7 +279,7 @@ "id": "4fdb8bc7", "metadata": {}, "source": [ - "So as compared to above, a scalar equality comparison versus a None/np.nan doesn’t provide useful information." + "So as compared to above, a scalar equality comparison versus a None/np.nan doesn't provide useful information." ] }, { diff --git a/python/cudf/cudf/_lib/search.pyx b/python/cudf/cudf/_lib/search.pyx index d5568f53231..b8abe3d0dab 100644 --- a/python/cudf/cudf/_lib/search.pyx +++ b/python/cudf/cudf/_lib/search.pyx @@ -24,9 +24,9 @@ def search_sorted( List of columns to search in values : List of columns List of value columns to search for - side : str {‘left’, ‘right’} optional - If ‘left’, the index of the first suitable location is given. - If ‘right’, return the last such index + side : str {'left', 'right'} optional + If 'left', the index of the first suitable location is given. + If 'right', return the last such index """ cdef unique_ptr[column] c_result cdef vector[libcudf_types.order] c_column_order diff --git a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx index c391719e853..8d673de12b8 100644 --- a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx +++ b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp.memory cimport unique_ptr from libcpp.utility cimport move @@ -41,7 +41,7 @@ def url_encode(Column source_strings): """ Encode each string in column. No format checking is performed. All characters are encoded except for ASCII letters, digits, - and these characters: ‘.’,’_’,’-‘,’~’. Encoding converts to + and these characters: '.','_','-','~'. Encoding converts to hex using UTF-8 encoded bytes. Parameters diff --git a/python/cudf/cudf/_lib/strings/padding.pyx b/python/cudf/cudf/_lib/strings/padding.pyx index 99270b340eb..f53feab7936 100644 --- a/python/cudf/cudf/_lib/strings/padding.pyx +++ b/python/cudf/cudf/_lib/strings/padding.pyx @@ -59,7 +59,7 @@ def zfill(Column source_strings, size_type width): """ Returns a Column by prepending strings in `source_strings` - with ‘0’ characters up to the given `width`. + with '0' characters up to the given `width`. """ cdef unique_ptr[column] c_result cdef column_view source_view = source_strings.view() diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index c84e4ff4adb..625a9c70873 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -116,8 +116,8 @@ class StringMethods(ColumnMethods): This mimics pandas ``df.str`` interface. nulls stay null unless handled otherwise by a particular method. - Patterned after Python’s string methods, with some - inspiration from R’s stringr package. + Patterned after Python's string methods, with some + inspiration from R's stringr package. """ _column: StringColumn @@ -709,7 +709,7 @@ def contains( >>> idx.str.contains('23', regex=False) GenericIndex([False, False, False, True, ], dtype='bool') - Returning ‘house’ or ‘dog’ when either expression occurs in a string. + Returning 'house' or 'dog' when either expression occurs in a string. >>> s1.str.contains('house|dog', regex=True) 0 False @@ -732,7 +732,7 @@ def contains( Ensure ``pat`` is a not a literal pattern when ``regex`` is set to True. Note in the following example one might expect only `s2[1]` and `s2[3]` to return True. However, - ‘.0’ as a regex matches any character followed by a 0. + '.0' as a regex matches any character followed by a 0. >>> s2 = cudf.Series(['40', '40.0', '41', '41.0', '35']) >>> s2.str.contains('.0', regex=True) @@ -2903,7 +2903,7 @@ def pad( additional characters will be filled with character defined in fillchar. - side : {‘left’, ‘right’, ‘both’}, default ‘left’ + side : {'left', 'right', 'both'}, default 'left' Side from which to fill resulting string. fillchar : str, default ' ' (whitespace) @@ -2930,7 +2930,7 @@ def pad( Equivalent to ``Series.str.pad(side='both')``. zfill - Pad strings in the Series/Index by prepending ‘0’ character. + Pad strings in the Series/Index by prepending '0' character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``. Examples @@ -2970,7 +2970,7 @@ def pad( side = libstrings.SideType[side.upper()] except KeyError: raise ValueError( - "side has to be either one of {‘left’, ‘right’, ‘both’}" + "side has to be either one of {'left', 'right', 'both'}" ) return self._return_or_inplace( @@ -2979,9 +2979,9 @@ def pad( def zfill(self, width: int) -> SeriesOrIndex: """ - Pad strings in the Series/Index by prepending ‘0’ characters. + Pad strings in the Series/Index by prepending '0' characters. - Strings in the Series/Index are padded with ‘0’ characters + Strings in the Series/Index are padded with '0' characters on the left of the string to reach a total string length width. Strings in the Series/Index with length greater or equal to width are unchanged. @@ -2994,12 +2994,12 @@ def zfill(self, width: int) -> SeriesOrIndex: width : int Minimum length of resulting string; strings with length less than width - be prepended with ‘0’ characters. + be prepended with '0' characters. Returns ------- Series/Index of str dtype - Returns Series or Index with prepended ‘0’ characters. + Returns Series or Index with prepended '0' characters. See Also -------- @@ -3405,7 +3405,7 @@ def wrap(self, width: int, **kwargs) -> SeriesOrIndex: `expand_tabsbool` are not yet supported and will raise a NotImplementedError if they are set to any value. - This method currently achieves behavior matching R’s + This method currently achieves behavior matching R's stringr library ``str_wrap`` function, the equivalent pandas implementation can be obtained using the following parameter setting: @@ -3576,7 +3576,7 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex: >>> import cudf >>> s = cudf.Series(['Lion', 'Monkey', 'Rabbit']) - The search for the pattern ‘Monkey’ returns one match: + The search for the pattern 'Monkey' returns one match: >>> s.str.findall('Monkey') 0 [] @@ -3595,7 +3595,7 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex: Regular expressions are supported too. For instance, the search for all the strings ending with - the word ‘on’ is shown next: + the word 'on' is shown next: >>> s.str.findall('on$') 0 [on] @@ -4228,7 +4228,7 @@ def url_encode(self) -> SeriesOrIndex: Returns a URL-encoded format of each string. No format checking is performed. All characters are encoded except for ASCII letters, - digits, and these characters: ``‘.’,’_’,’-‘,’~’``. + digits, and these characters: ``'.','_','-','~'``. Encoding converts to hex using UTF-8 encoded bytes. Returns diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 82a4a4a8b65..5c24b222a1b 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2293,7 +2293,7 @@ def reindex( Return a new object, even if the passed indexes are the same. level : Not supported fill_value : Value to use for missing values. - Defaults to ``NA``, but can be any “compatible” value. + Defaults to ``NA``, but can be any "compatible" value. limit : Not supported tolerance : Not supported @@ -2358,7 +2358,7 @@ def reindex( IE10 404 Konqueror 301 - Or we can use “axis-style” keyword arguments + Or we can use "axis-style" keyword arguments >>> df.reindex(columns=['http_status', 'user_agent']) http_status user_agent Firefox 200 @@ -3028,7 +3028,7 @@ def rename( """Alter column and index labels. Function / dict values must be unique (1-to-1). Labels not contained in - a dict / Series will be left as-is. Extra labels listed don’t throw an + a dict / Series will be left as-is. Extra labels listed don't throw an error. ``DataFrame.rename`` supports two calling conventions: @@ -3635,8 +3635,8 @@ def merge( If on is None and not merging on indexes then this defaults to the intersection of the columns in both DataFrames. - how : {‘left’, ‘outer’, ‘inner’, 'leftsemi', 'leftanti'}, \ - default ‘inner’ + how : {'left', 'outer', 'inner', 'leftsemi', 'leftanti'}, \ + default 'inner' Type of merge to be performed. - left : use only keys from left frame, similar to a SQL left @@ -5363,7 +5363,7 @@ def isin(self, values): ---------- values : iterable, Series, DataFrame or dict The result will only be true at a location if all - the labels match. If values is a Series, that’s the index. + the labels match. If values is a Series, that's the index. If values is a dict, the keys must be the column names, which must match. If values is a DataFrame, then both the index and column labels must match. diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 12c53ae258d..29d5c9ae26d 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1363,12 +1363,12 @@ def searchsorted( ---------- value : Frame (Shape must be consistent with self) Values to be hypothetically inserted into Self - side : str {‘left’, ‘right’} optional, default ‘left‘ - If ‘left’, the index of the first suitable location found is given - If ‘right’, return the last such index + side : str {'left', 'right'} optional, default 'left' + If 'left', the index of the first suitable location found is given + If 'right', return the last such index ascending : bool optional, default True Sorted Frame is in ascending order (otherwise descending) - na_position : str {‘last’, ‘first’} optional, default ‘last‘ + na_position : str {'last', 'first'} optional, default 'last' Position of null values in sorted order Returns @@ -1476,8 +1476,8 @@ def argsort( Has no effect but is accepted for compatibility with numpy. ascending : bool or list of bool, default True If True, sort values in ascending order, otherwise descending. - na_position : {‘first’ or ‘last’}, default ‘last’ - Argument ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs + na_position : {'first' or 'last'}, default 'last' + Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. Returns diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 0ab64bd985a..e4ea59c1f15 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -52,9 +52,9 @@ def _quantile_75(x): ---------- by : mapping, function, label, or list of labels Used to determine the groups for the groupby. If by is a - function, it’s called on each value of the object’s index. + function, it's called on each value of the object's index. If a dict or Series is passed, the Series or dict VALUES will - be used to determine the groups (the Series’ values are first + be used to determine the groups (the Series' values are first aligned; see .align() method). If an cupy array is passed, the values are used as-is determine the groups. A label or list of labels may be passed to group by the columns in self. @@ -65,7 +65,7 @@ def _quantile_75(x): as_index : bool, default True For aggregated output, return object with group labels as the index. Only relevant for DataFrame input. - as_index=False is effectively “SQL-style” grouped output. + as_index=False is effectively "SQL-style" grouped output. sort : bool, default False Sort result by group key. Differ from Pandas, cudf defaults to ``False`` for better performance. Note this does not influence @@ -717,7 +717,7 @@ def _normalize_aggs( def pipe(self, func, *args, **kwargs): """ Apply a function `func` with arguments to this GroupBy - object and return the function’s result. + object and return the function's result. Parameters ---------- @@ -1103,13 +1103,13 @@ def func(x): def describe(self, include=None, exclude=None): """ Generate descriptive statistics that summarizes the central tendency, - dispersion and shape of a dataset’s distribution, excluding NaN values. + dispersion and shape of a dataset's distribution, excluding NaN values. Analyzes numeric DataFrames only Parameters ---------- - include: ‘all’, list-like of dtypes or None (default), optional + include: 'all', list-like of dtypes or None (default), optional list of data types to include in the result. Ignored for Series. diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 0628497fc29..fbaa95763a1 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1062,7 +1062,7 @@ def equals(self, other, **kwargs): Returns ------- out: bool - True if “other” is an Index and it has the same elements + True if "other" is an Index and it has the same elements as calling index; False otherwise. """ if ( @@ -1414,8 +1414,8 @@ def argsort( Has no effect but is accepted for compatibility with numpy. ascending : bool or list of bool, default True If True, sort values in ascending order, otherwise descending. - na_position : {‘first’ or ‘last’}, default ‘last’ - Argument ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs + na_position : {'first' or 'last'}, default 'last' + Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. Returns @@ -1853,7 +1853,7 @@ class DatetimeIndex(GenericIndex): This is not yet supported tz : pytz.timezone or dateutil.tz.tzfile This is not yet supported - ambiguous : ‘infer’, bool-ndarray, ‘NaT’, default ‘raise’ + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' This is not yet supported name : object Name to be stored in the index. @@ -2547,7 +2547,7 @@ class CategoricalIndex(GenericIndex): Whether or not this categorical is treated as an ordered categorical. If not given here or in dtype, the resulting categorical will be unordered. - dtype : CategoricalDtype or “category”, optional + dtype : CategoricalDtype or "category", optional If CategoricalDtype, cannot be used together with categories or ordered. copy : bool, default False diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index bbb1c95bef6..57469c0ff72 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -562,8 +562,8 @@ def replace( * dict: - Dicts can be used to specify different replacement values for different existing values. For example, {'a': 'b', - 'y': 'z'} replaces the value ‘a’ with ‘b’ and - ‘y’ with ‘z’. + 'y': 'z'} replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the ``value`` parameter should be ``None``. value : scalar, dict, list-like, str, default None @@ -1865,7 +1865,7 @@ def sort_values( Sort ascending vs. descending. Specify list for multiple sort orders. If this is a list of bools, must match the length of the by. - na_position : {‘first’, ‘last’}, default ‘last’ + na_position : {'first', 'last'}, default 'last' 'first' puts nulls at the beginning, 'last' puts nulls at the end ignore_index : bool, default False If True, index will not be sorted. diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 8e5d0ece729..ba9da2bcb0c 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -484,7 +484,7 @@ def melt( 4 b C 4 5 c C 6 - The names of ‘variable’ and ‘value’ columns can be customized: + The names of 'variable' and 'value' columns can be customized: >>> cudf.melt(df, id_vars=['A'], value_vars=['B'], ... var_name='myVarname', value_name='myValname') diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 07e1782d788..f9600c84f5e 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -815,7 +815,7 @@ def reindex(self, *args, **kwargs): copy : boolean, default True level: Not Supported fill_value : Value to use for missing values. - Defaults to ``NA``, but can be any “compatible” value. + Defaults to ``NA``, but can be any "compatible" value. limit: Not Supported tolerance: Not Supported @@ -1605,7 +1605,7 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False): Name: animal, dtype: object The value `False` for parameter `keep` discards all sets - of duplicated entries. Setting the value of ‘inplace’ to + of duplicated entries. Setting the value of 'inplace' to `True` performs the operation inplace and returns `None`. >>> s.drop_duplicates(keep=False, inplace=True) @@ -1881,7 +1881,7 @@ def sort_values( Sort ascending vs. descending. Specify list for multiple sort orders. If this is a list of bools, must match the length of the by. - na_position : {‘first’, ‘last’}, default ‘last’ + na_position : {'first', 'last'}, default 'last' 'first' puts nulls at the beginning, 'last' puts nulls at the end ignore_index : bool, default False If True, index will not be sorted. @@ -2763,7 +2763,7 @@ def value_counts( only works with numeric data. dropna : bool, default True - Don’t include counts of NaN and None. + Don't include counts of NaN and None. Returns ------- @@ -2886,7 +2886,7 @@ def quantile( ---------- q : float or array-like, default 0.5 (50% quantile) 0 <= q <= 1, the quantile(s) to compute - interpolation : {’linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’} + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to use, when the desired quantile lies between two data points i and j: columns : list of str @@ -4352,7 +4352,7 @@ def strftime(self, date_format, *args, **kwargs): Parameters ---------- date_format : str - Date format string (e.g. “%Y-%m-%d”). + Date format string (e.g. "%Y-%m-%d"). Returns ------- diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py index 070e4649c7b..a8428c2647b 100644 --- a/python/cudf/cudf/testing/testing.py +++ b/python/cudf/cudf/testing/testing.py @@ -134,7 +134,7 @@ def assert_column_equal( right Column to compare check_dtype : bool, default True Whether to check the Column dtype is identical. - check_column_type : bool or {‘equiv’}, default ‘equiv’ + check_column_type : bool or {'equiv'}, default 'equiv' Whether to check the columns class, dtype and inferred_type are identical. Currently it is idle, and similar to pandas. @@ -152,7 +152,7 @@ def assert_column_equal( Relative tolerance. Only used when `check_exact` is False. atol : float, default 1e-8 Absolute tolerance. Only used when `check_exact` is False. - obj : str, default ‘ColumnBase’ + obj : str, default 'ColumnBase' Specify object name being compared, internally used to show appropriate assertion message. """ @@ -322,9 +322,9 @@ def assert_index_equal( left Index to compare right : Index right Index to compare - exact : bool or {‘equiv’}, default ‘equiv’ + exact : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type - are identical. If ‘equiv’, then RangeIndex can be substituted + are identical. If 'equiv', then RangeIndex can be substituted for Int8Index, Int16Index, Int32Index, Int64Index as well. check_names : bool, default True Whether to check the names attribute. @@ -345,7 +345,7 @@ def assert_index_equal( Relative tolerance. Only used when `check_exact` is False. atol : float, default 1e-8 Absolute tolerance. Only used when `check_exact` is False. - obj : str, default ‘Index’ + obj : str, default 'Index' Specify object name being compared, internally used to show appropriate assertion message. @@ -467,7 +467,7 @@ def assert_series_equal( right Series to compare check_dtype : bool, default True Whether to check the Series dtype is identical. - check_index_type : bool or {‘equiv’}, default ‘equiv’ + check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. check_series_type : bool, default True @@ -491,7 +491,7 @@ def assert_series_equal( Relative tolerance. Only used when `check_exact` is False. atol : float, default 1e-8 Absolute tolerance. Only used when `check_exact` is False. - obj : str, default ‘Series’ + obj : str, default 'Series' Specify object name being compared, internally used to show appropriate assertion message. @@ -600,7 +600,7 @@ def assert_frame_equal( right DataFrame to compare check_dtype : bool, default True Whether to check the DataFrame dtype is identical. - check_index_type : bool or {‘equiv’}, default ‘equiv’ + check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. check_column_type : bool, default True @@ -630,7 +630,7 @@ def assert_frame_equal( Relative tolerance. Only used when `check_exact` is False. atol : float, default 1e-8 Absolute tolerance. Only used when `check_exact` is False. - obj : str, default ‘DataFrame’ + obj : str, default 'DataFrame' Specify object name being compared, internally used to show appropriate assertion message. diff --git a/python/cudf/cudf/utils/docutils.py b/python/cudf/cudf/utils/docutils.py index 9f04e30fb28..09f0eb05eb6 100644 --- a/python/cudf/cudf/utils/docutils.py +++ b/python/cudf/cudf/utils/docutils.py @@ -83,7 +83,7 @@ def wrapper(func): Generate descriptive statistics. Descriptive statistics include those that summarize the - central tendency, dispersion and shape of a dataset’s + central tendency, dispersion and shape of a dataset's distribution, excluding ``NaN`` values. Analyzes both numeric and object series, as well as diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index ebb73ba0ca6..9146405c6ed 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -515,7 +515,7 @@ If ``None``, similar to ``True`` the dataframe's index(es) will be saved, however, instead of being saved as values any ``RangeIndex`` will be stored as a range in the metadata so it - doesn’t require much space and is faster. Other indexes will + doesn't require much space and is faster. Other indexes will be included as columns in the file output. See Also @@ -1046,7 +1046,7 @@ are mapped to the particular type passed. If list, types are applied in the same order as the column names. If dict, types are mapped to the column names. - E.g. {{‘a’: np.float64, ‘b’: int32, ‘c’: ‘float’}} + E.g. {{'a': np.float64, 'b': int32, 'c': 'float'}} If `None`, dtypes are inferred from the dataset. Use `str` to preserve data and not infer or interpret to dtype. true_values : list, default None @@ -1084,9 +1084,9 @@ dayfirst : bool, default False DD/MM format dates, international and European format. compression : {{'infer', 'gzip', 'zip', None}}, default 'infer' - For on-the-fly decompression of on-disk data. If ‘infer’, then detect - compression from the following extensions: ‘.gz’,‘.zip’ (otherwise no - decompression). If using ‘zip’, the ZIP file must contain only one + For on-the-fly decompression of on-disk data. If 'infer', then detect + compression from the following extensions: '.gz','.zip' (otherwise no + decompression). If using 'zip', the ZIP file must contain only one data file to be read in, otherwise the first non-zero-sized file will be used. Set to None for no decompression. thousands : char, default None @@ -1210,7 +1210,7 @@ Write out the index as a column encoding : str, default 'utf-8' A string representing the encoding to use in the output file - Only ‘utf-8’ is currently supported + Only 'utf-8' is currently supported compression : str, None A string representing the compression scheme to use in the the output file Compression while writing csv is not supported currently diff --git a/python/custreamz/README.md b/python/custreamz/README.md index 0bddc6473a8..6b105c9ea4a 100644 --- a/python/custreamz/README.md +++ b/python/custreamz/README.md @@ -41,7 +41,7 @@ A more detailed example of [parsing haproxy logs](https://github.com/rapidsai-co ## Quick Start -Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with cuStreamz already installed. +Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you're running. This provides a ready to run Docker container with cuStreamz already installed. ## Installation