From a75cf995d1836102624ee4a1c5a4f8159dc3986b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Apr 2022 06:22:12 -0700 Subject: [PATCH 1/3] Update config files with proper includes/ignores. --- .pre-commit-config.yaml | 13 +++++++++++++ python/.flake8 | 12 ++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5f690f5f827..f793bc23289 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,6 +56,19 @@ repos: hooks: - id: pydocstyle args: ["--config=python/.flake8"] + exclude: | + (?x)^( + ci| + cpp| + conda| + docs| + java| + notebooks| + python/dask_cudf| + python/cudf_kafka| + python/custreamz| + python/cudf/cudf/tests + ) - repo: https://github.com/pre-commit/mirrors-clang-format rev: v11.1.0 hooks: diff --git a/python/.flake8 b/python/.flake8 index c645c46a216..c13c61526b1 100644 --- a/python/.flake8 +++ b/python/.flake8 @@ -9,10 +9,14 @@ ignore = E203 [pydocstyle] -match = ^(.*abc\.py|.*api/types\.py|.*single_column_frame\.py|.*indexed_frame\.py)$ -# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather than include using match-dir. -match-dir = ^(?!ci|cpp|python/dask_cudf|python/cudf_kafka|python/custreamz).*$ -# In addition to numpy style, we additionally ignore: +match = ^(.*abc\.py|.*api/types\.py|.*single_column_frame\.py|.*indexed_frame\.py|.*series\.py)$ +# Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather +# than include using match-dir. Note that as discussed in +# https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle, +# unlike the match option above this match-dir will have no effect when +# pydocstyle is invoked from pre-commit. Therefore this exclusion list must +# also be maintained in the pre-commit config file. +match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks|dask_cudf|cudf_kafka|custreamz|tests)).*$ add-ignore = # magic methods D105, From 1f275be2420d7d99986cbd69e475e9f42c14110f Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Apr 2022 07:00:43 -0700 Subject: [PATCH 2/3] Apply D3* pydocstyle rules. --- .pre-commit-config.yaml | 2 + python/.flake8 | 12 ++--- python/cudf/cudf/comm/gpuarrow.py | 4 +- python/cudf/cudf/core/column/string.py | 66 +++++++++++++------------- python/cudf/cudf/core/frame.py | 4 +- python/cudf/cudf/core/series.py | 4 +- 6 files changed, 44 insertions(+), 48 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f793bc23289..cd7b8aea6d7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,5 @@ +# Copyright (c) 2019-2022, NVIDIA CORPORATION. + repos: - repo: https://github.com/PyCQA/isort rev: 5.6.4 diff --git a/python/.flake8 b/python/.flake8 index c13c61526b1..d4d762dd50f 100644 --- a/python/.flake8 +++ b/python/.flake8 @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. [flake8] exclude = __init__.py @@ -9,7 +9,6 @@ ignore = E203 [pydocstyle] -match = ^(.*abc\.py|.*api/types\.py|.*single_column_frame\.py|.*indexed_frame\.py|.*series\.py)$ # Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather # than include using match-dir. Note that as discussed in # https://stackoverflow.com/questions/65478393/how-to-filter-directories-using-the-match-dir-flag-for-pydocstyle, @@ -17,10 +16,5 @@ match = ^(.*abc\.py|.*api/types\.py|.*single_column_frame\.py|.*indexed_frame\.p # pydocstyle is invoked from pre-commit. Therefore this exclusion list must # also be maintained in the pre-commit config file. match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks|dask_cudf|cudf_kafka|custreamz|tests)).*$ -add-ignore = - # magic methods - D105, - # no docstring in __init__ - D107, - # newlines before docstrings - D204 +select = + D30 diff --git a/python/cudf/cudf/comm/gpuarrow.py b/python/cudf/cudf/comm/gpuarrow.py index 09b4cc5ffba..0c4d9d7f77e 100644 --- a/python/cudf/cudf/comm/gpuarrow.py +++ b/python/cudf/cudf/comm/gpuarrow.py @@ -119,12 +119,12 @@ def null(self): @property def data_raw(self): - "Accessor for the data buffer as a device array" + """Accessor for the data buffer as a device array""" return self._series._column.data_array_view @property def null_raw(self): - "Accessor for the null buffer as a device array" + """Accessor for the null buffer as a device array""" return self._series._column.mask_array_view def make_series(self): diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 1d836d9b759..0db7e7d9a27 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -201,7 +201,7 @@ def __getitem__(self, key): return self.get(key) def len(self) -> SeriesOrIndex: - """ + r""" Computes the length of each element in the Series/Index. Returns @@ -213,7 +213,7 @@ def len(self) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series(["dog", "", "\\n", None]) + >>> s = cudf.Series(["dog", "", "\n", None]) >>> s.str.len() 0 3 1 0 @@ -960,7 +960,7 @@ def replace( ) def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex: - """ + r""" Use the ``repl`` back-ref template to create a new string with the extracted elements found using the ``pat`` expression. @@ -980,7 +980,7 @@ def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex: -------- >>> import cudf >>> s = cudf.Series(["A543","Z756"]) - >>> s.str.replace_with_backrefs('(\\\\d)(\\\\d)', 'V\\\\2\\\\1') + >>> s.str.replace_with_backrefs('(\\d)(\\d)', 'V\\2\\1') 0 AV453 1 ZV576 dtype: object @@ -1195,7 +1195,7 @@ def istimestamp(self, format: str) -> SeriesOrIndex: ) def isfloat(self) -> SeriesOrIndex: - """ + r""" Check whether all characters in each string form floating value. If a string has zero characters, False is returned for @@ -1249,7 +1249,7 @@ def isfloat(self) -> SeriesOrIndex: 4 True 5 False dtype: bool - >>> s = cudf.Series(["this is plain text", "\\t\\n", "9.9", "9.9.9"]) + >>> s = cudf.Series(["this is plain text", "\t\n", "9.9", "9.9.9"]) >>> s.str.isfloat() 0 False 1 False @@ -2239,7 +2239,7 @@ def get(self, i: int = 0) -> SeriesOrIndex: return self._return_or_inplace(libstrings.get(self._column, i)) def get_json_object(self, json_path): - """ + r""" Applies a JSONPath string to an input strings column where each row in the column is a valid json string @@ -2258,7 +2258,7 @@ def get_json_object(self, json_path): >>> import cudf >>> s = cudf.Series( [ - \\"\\"\\" + \"\"\" { "store":{ "book":[ @@ -2277,13 +2277,13 @@ def get_json_object(self, json_path): ] } } - \\"\\"\\" + \"\"\" ]) >>> s - 0 {"store": {\\n "book": [\\n { "cat... + 0 {"store": {\n "book": [\n { "cat... dtype: object >>> s.str.get_json_object("$.store.book") - 0 [\\n { "category": "reference",\\n ... + 0 [\n { "category": "reference",\n ... dtype: object """ @@ -3138,7 +3138,7 @@ def rjust(self, width: int, fillchar: str = " ") -> SeriesOrIndex: ) def strip(self, to_strip: str = None) -> SeriesOrIndex: - """ + r""" Remove leading and trailing characters. Strip whitespaces (including newlines) or a set of @@ -3169,11 +3169,11 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series(['1. Ant. ', '2. Bee!\\n', '3. Cat?\\t', None]) + >>> s = cudf.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t', None]) >>> s 0 1. Ant. - 1 2. Bee!\\n - 2 3. Cat?\\t + 1 2. Bee!\n + 2 3. Cat?\t 3 dtype: object >>> s.str.strip() @@ -3182,7 +3182,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: 2 3. Cat? 3 dtype: object - >>> s.str.strip('123.!? \\n\\t') + >>> s.str.strip('123.!? \n\t') 0 Ant 1 Bee 2 Cat @@ -3197,7 +3197,7 @@ def strip(self, to_strip: str = None) -> SeriesOrIndex: ) def lstrip(self, to_strip: str = None) -> SeriesOrIndex: - """ + r""" Remove leading and trailing characters. Strip whitespaces (including newlines) @@ -3228,11 +3228,11 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series(['1. Ant. ', '2. Bee!\\n', '3. Cat?\\t', None]) + >>> s = cudf.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t', None]) >>> s.str.lstrip('123.') 0 Ant. - 1 Bee!\\n - 2 Cat?\\t + 1 Bee!\n + 2 Cat?\t 3 dtype: object """ @@ -3244,7 +3244,7 @@ def lstrip(self, to_strip: str = None) -> SeriesOrIndex: ) def rstrip(self, to_strip: str = None) -> SeriesOrIndex: - """ + r""" Remove leading and trailing characters. Strip whitespaces (including newlines) @@ -3277,14 +3277,14 @@ def rstrip(self, to_strip: str = None) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series(['1. Ant. ', '2. Bee!\\n', '3. Cat?\\t', None]) + >>> s = cudf.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t', None]) >>> s 0 1. Ant. - 1 2. Bee!\\n - 2 3. Cat?\\t + 1 2. Bee!\n + 2 3. Cat?\t 3 dtype: object - >>> s.str.rstrip('.!? \\n\\t') + >>> s.str.rstrip('.!? \n\t') 0 1. Ant 1 2. Bee 2 3. Cat @@ -3299,7 +3299,7 @@ def rstrip(self, to_strip: str = None) -> SeriesOrIndex: ) def wrap(self, width: int, **kwargs) -> SeriesOrIndex: - """ + r""" Wrap long strings in the Series/Index to be formatted in paragraphs with length less than a given width. @@ -3340,8 +3340,8 @@ def wrap(self, width: int, **kwargs) -> SeriesOrIndex: >>> data = ['line to be wrapped', 'another line to be wrapped'] >>> s = cudf.Series(data) >>> s.str.wrap(12) - 0 line to be\\nwrapped - 1 another line\\nto be\\nwrapped + 0 line to be\nwrapped + 1 another line\nto be\nwrapped dtype: object """ if not is_integer(width): @@ -3575,7 +3575,7 @@ def isempty(self) -> SeriesOrIndex: return self._return_or_inplace((self._column == "").fillna(False)) def isspace(self) -> SeriesOrIndex: - """ + r""" Check whether all characters in each string are whitespace. This is equivalent to running the Python string method @@ -3623,7 +3623,7 @@ def isspace(self) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> s = cudf.Series([' ', '\\t\\r\\n ', '']) + >>> s = cudf.Series([' ', '\t\r\n ', '']) >>> s.str.isspace() 0 True 1 True @@ -4271,7 +4271,7 @@ def normalize_spaces(self) -> SeriesOrIndex: ) def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex: - """ + r""" Normalizes strings characters for tokenizing. This uses the normalizer that is built into the @@ -4280,7 +4280,7 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex: - adding padding around punctuation (unicode category starts with "P") as well as certain ASCII symbols like "^" and "$" - adding padding around the CJK Unicode block characters - - changing whitespace (e.g. ``\\t``, ``\\n``, ``\\r``) to space + - changing whitespace (e.g. ``\t``, ``\n``, ``\r``) to space - removing control characters (unicode categories "Cc" and "Cf") If `do_lower_case = true`, lower-casing also removes the accents. @@ -4303,7 +4303,7 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex: Examples -------- >>> import cudf - >>> ser = cudf.Series(["héllo, \\tworld","ĂĆCĖÑTED","$99"]) + >>> ser = cudf.Series(["héllo, \tworld","ĂĆCĖÑTED","$99"]) >>> ser.str.normalize_characters() 0 hello , world 1 accented diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 104ed3eeb67..d0e9e6d94c1 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3356,7 +3356,7 @@ def to_dlpack(self): @_cudf_nvtx_annotate def to_string(self): - """ + r""" Convert to string cuDF uses Pandas internals for efficient string formatting. @@ -3373,7 +3373,7 @@ def to_string(self): >>> df['key'] = [0, 1, 2] >>> df['val'] = [float(i + 10) for i in range(3)] >>> df.to_string() - ' key val\\n0 0 10.0\\n1 1 11.0\\n2 2 12.0' + ' key val\n0 0 10.0\n1 1 11.0\n2 2 12.0' """ return repr(self) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 4ff671509a0..d813db58d1e 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -4614,13 +4614,13 @@ def _align_indices(series_list, how="outer", allow_non_unique=False): @_cudf_nvtx_annotate def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): - """Returns a boolean array where two arrays are equal within a tolerance. + r"""Returns a boolean array where two arrays are equal within a tolerance. Two values in ``a`` and ``b`` are considered equal when the following equation is satisfied. .. math:: - |a - b| \\le \\mathrm{atol} + \\mathrm{rtol} |b| + |a - b| \le \mathrm{atol} + \mathrm{rtol} |b| Parameters ---------- From 13f6a07a4ea7823572a37a7d07ae9504b2b0b81b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 27 Apr 2022 07:48:03 -0700 Subject: [PATCH 3/3] Add ignore-decorators to simplify future PRs. --- python/.flake8 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/.flake8 b/python/.flake8 index d4d762dd50f..667875030cc 100644 --- a/python/.flake8 +++ b/python/.flake8 @@ -16,5 +16,7 @@ ignore = # pydocstyle is invoked from pre-commit. Therefore this exclusion list must # also be maintained in the pre-commit config file. match-dir = ^(?!(ci|cpp|conda|docs|java|notebooks|dask_cudf|cudf_kafka|custreamz|tests)).*$ +# Allow missing docstrings for docutils +ignore-decorators = .*(docutils|doc_apply|copy_docstring).* select = D30