diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1046f4ebe6f..b4e57947cf9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,12 +18,14 @@ repos: # Explicitly specify the pyproject.toml at the repo root, not per-project. args: ["--config", "pyproject.toml"] - repo: https://github.com/PyCQA/flake8 - rev: 3.8.3 + rev: 5.0.4 hooks: - id: flake8 args: ["--config=setup.cfg"] - files: python/.*\.(py|pyx|pxd)$ + files: python/.*$ types: [file] + types_or: [python, cython] + additional_dependencies: ["flake8-force"] - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v0.971' hooks: diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml index 142d3c7d9cb..1cad2356906 100644 --- a/conda/environments/cudf_dev_cuda11.5.yml +++ b/conda/environments/cudf_dev_cuda11.5.yml @@ -38,15 +38,9 @@ dependencies: - ipython - pandoc<=2.0.0 - cudatoolkit=11.5 - - cuda-python >=11.5,<11.7.1 + - cuda-python>=11.5,<11.7.1 - pip - - flake8=3.8.3 - - black=22.3.0 - - isort=5.10.1 - - mypy=0.971 - - types-cachetools - doxygen=1.8.20 - - pydocstyle=6.1.1 - typing_extensions - pre-commit - dask>=2022.9.2 diff --git a/python/cudf/cudf/_lib/cpp/io/avro.pxd b/python/cudf/cudf/_lib/cpp/io/avro.pxd index 6efe42e5208..9b683e5bce3 100644 --- a/python/cudf/cudf/_lib/cpp/io/avro.pxd +++ b/python/cudf/cudf/_lib/cpp/io/avro.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libcpp.string cimport string from libcpp.vector cimport vector @@ -11,17 +11,17 @@ cdef extern from "cudf/io/avro.hpp" \ namespace "cudf::io" nogil: cdef cppclass avro_reader_options: - avro_reader_options() except+ - cudf_io_types.source_info get_source() except+ - vector[string] get_columns() except+ - size_type get_skip_rows() except+ - size_type get_num_rows() except+ + avro_reader_options() except + + cudf_io_types.source_info get_source() except + + vector[string] get_columns() except + + size_type get_skip_rows() except + + size_type get_num_rows() except + # setters - void set_columns(vector[string] col_names) except+ - void set_skip_rows(size_type val) except+ - void set_num_rows(size_type val) except+ + void set_columns(vector[string] col_names) except + + void set_skip_rows(size_type val) except + + void set_num_rows(size_type val) except + @staticmethod avro_reader_options_builder builder( @@ -29,13 +29,13 @@ cdef extern from "cudf/io/avro.hpp" \ ) except + cdef cppclass avro_reader_options_builder: - avro_reader_options_builder() except+ + avro_reader_options_builder() except + avro_reader_options_builder( cudf_io_types.source_info src ) except + - avro_reader_options_builder& columns(vector[string] col_names) except+ - avro_reader_options_builder& skip_rows(size_type val) except+ - avro_reader_options_builder& num_rows(size_type val) except+ + avro_reader_options_builder& columns(vector[string] col_names) except + + avro_reader_options_builder& skip_rows(size_type val) except + + avro_reader_options_builder& num_rows(size_type val) except + avro_reader_options build() except + diff --git a/python/cudf/cudf/_lib/cpp/io/csv.pxd b/python/cudf/cudf/_lib/cpp/io/csv.pxd index 4afd8732320..e8064557592 100644 --- a/python/cudf/cudf/_lib/cpp/io/csv.pxd +++ b/python/cudf/cudf/_lib/cpp/io/csv.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. from libc.stdint cimport uint8_t from libcpp cimport bool @@ -20,96 +20,96 @@ cdef extern from "cudf/io/csv.hpp" \ # Getter - cudf_io_types.source_info get_source() except+ + cudf_io_types.source_info get_source() except + # Reader settings - cudf_io_types.compression_type get_compression() except+ - size_t get_byte_range_offset() except+ - size_t get_byte_range_size() except+ - vector[string] get_names() except+ - string get_prefix() except+ - bool is_enabled_mangle_dupe_cols() except+ + cudf_io_types.compression_type get_compression() except + + size_t get_byte_range_offset() except + + size_t get_byte_range_size() except + + vector[string] get_names() except + + string get_prefix() except + + bool is_enabled_mangle_dupe_cols() except + # Filter settings - vector[string] get_use_cols_names() except+ - vector[int] get_use_cols_indexes() except+ - size_type get_nrows() except+ - size_type get_skiprows() except+ - size_type get_skipfooter() except+ - size_type get_header() except+ + vector[string] get_use_cols_names() except + + vector[int] get_use_cols_indexes() except + + size_type get_nrows() except + + size_type get_skiprows() except + + size_type get_skipfooter() except + + size_type get_header() except + # Parsing settings - char get_lineterminator() except+ - char get_delimiter() except+ - char get_thousands() except+ - char get_decimal() except+ - char get_comment() except+ - bool is_enabled_windowslinetermination() except+ - bool is_enabled_delim_whitespace() except+ - bool is_enabled_skipinitialspace() except+ - bool is_enabled_skip_blank_lines() except+ - cudf_io_types.quote_style get_quoting() except+ - char get_quotechar() except+ - bool is_enabled_doublequote() except+ - vector[string] get_parse_dates_names() except+ - vector[int] get_parse_dates_indexes() except+ - vector[string] get_parse_hex_names() except+ - vector[int] get_parse_hex_indexes() except+ + char get_lineterminator() except + + char get_delimiter() except + + char get_thousands() except + + char get_decimal() except + + char get_comment() except + + bool is_enabled_windowslinetermination() except + + bool is_enabled_delim_whitespace() except + + bool is_enabled_skipinitialspace() except + + bool is_enabled_skip_blank_lines() except + + cudf_io_types.quote_style get_quoting() except + + char get_quotechar() except + + bool is_enabled_doublequote() except + + vector[string] get_parse_dates_names() except + + vector[int] get_parse_dates_indexes() except + + vector[string] get_parse_hex_names() except + + vector[int] get_parse_hex_indexes() except + # Conversion settings - vector[string] get_dtype() except+ - vector[string] get_true_values() except+ - vector[string] get_false_values() except+ - vector[string] get_na_values() except+ - bool is_enabled_keep_default_na() except+ - bool is_enabled_na_filter() except+ - bool is_enabled_dayfirst() except+ + vector[string] get_dtype() except + + vector[string] get_true_values() except + + vector[string] get_false_values() except + + vector[string] get_na_values() except + + bool is_enabled_keep_default_na() except + + bool is_enabled_na_filter() except + + bool is_enabled_dayfirst() except + # setter # Reader settings - void set_compression(cudf_io_types.compression_type comp) except+ - void set_byte_range_offset(size_t val) except+ - void set_byte_range_size(size_t val) except+ - void set_names(vector[string] val) except+ - void set_prefix(string pfx) except+ - void set_mangle_dupe_cols(bool val) except+ + void set_compression(cudf_io_types.compression_type comp) except + + void set_byte_range_offset(size_t val) except + + void set_byte_range_size(size_t val) except + + void set_names(vector[string] val) except + + void set_prefix(string pfx) except + + void set_mangle_dupe_cols(bool val) except + # Filter settings - void set_use_cols_names(vector[string] col_names) except+ - void set_use_cols_indexes(vector[int] col_ind) except+ - void set_nrows(size_type n_rows) except+ - void set_skiprows(size_type val) except+ - void set_skipfooter(size_type val) except+ - void set_header(size_type hdr) except+ + void set_use_cols_names(vector[string] col_names) except + + void set_use_cols_indexes(vector[int] col_ind) except + + void set_nrows(size_type n_rows) except + + void set_skiprows(size_type val) except + + void set_skipfooter(size_type val) except + + void set_header(size_type hdr) except + # Parsing settings - void set_lineterminator(char val) except+ - void set_delimiter(char val) except+ - void set_thousands(char val) except+ - void set_decimal(char val) except+ - void set_comment(char val) except+ - void enable_windowslinetermination(bool val) except+ - void enable_delim_whitespace(bool val) except+ - void enable_skipinitialspace(bool val) except+ - void enable_skip_blank_lines(bool val) except+ - void set_quoting(cudf_io_types.quote_style style) except+ - void set_quotechar(char val) except+ - void set_doublequote(bool val) except+ - void set_parse_dates(vector[string]) except+ - void set_parse_dates(vector[int]) except+ - void set_parse_hex(vector[string]) except+ - void set_parse_hex(vector[int]) except+ + void set_lineterminator(char val) except + + void set_delimiter(char val) except + + void set_thousands(char val) except + + void set_decimal(char val) except + + void set_comment(char val) except + + void enable_windowslinetermination(bool val) except + + void enable_delim_whitespace(bool val) except + + void enable_skipinitialspace(bool val) except + + void enable_skip_blank_lines(bool val) except + + void set_quoting(cudf_io_types.quote_style style) except + + void set_quotechar(char val) except + + void set_doublequote(bool val) except + + void set_parse_dates(vector[string]) except + + void set_parse_dates(vector[int]) except + + void set_parse_hex(vector[string]) except + + void set_parse_hex(vector[int]) except + # Conversion settings - void set_dtypes(vector[data_type] types) except+ - void set_dtypes(map[string, data_type] types) except+ - void set_true_values(vector[string] vals) except+ - void set_false_values(vector[string] vals) except+ - void set_na_values(vector[string] vals) except+ - void enable_keep_default_na(bool val) except+ - void enable_na_filter(bool val) except+ - void enable_dayfirst(bool val) except+ - void set_timestamp_type(data_type type) except+ + void set_dtypes(vector[data_type] types) except + + void set_dtypes(map[string, data_type] types) except + + void set_true_values(vector[string] vals) except + + void set_false_values(vector[string] vals) except + + void set_na_values(vector[string] vals) except + + void enable_keep_default_na(bool val) except + + void enable_na_filter(bool val) except + + void enable_dayfirst(bool val) except + + void set_timestamp_type(data_type type) except + @staticmethod csv_reader_options_builder builder( @@ -125,115 +125,115 @@ cdef extern from "cudf/io/csv.hpp" \ csv_reader_options_builder& source( cudf_io_types.source_info info - ) except+ + ) except + # Reader settings csv_reader_options_builder& compression( cudf_io_types.compression_type comp - ) except+ - csv_reader_options_builder& byte_range_offset(size_t val) except+ - csv_reader_options_builder& byte_range_size(size_t val) except+ - csv_reader_options_builder& names(vector[string] val) except+ - csv_reader_options_builder& prefix(string pfx) except+ - csv_reader_options_builder& mangle_dupe_cols(bool val) except+ + ) except + + csv_reader_options_builder& byte_range_offset(size_t val) except + + csv_reader_options_builder& byte_range_size(size_t val) except + + csv_reader_options_builder& names(vector[string] val) except + + csv_reader_options_builder& prefix(string pfx) except + + csv_reader_options_builder& mangle_dupe_cols(bool val) except + # Filter settings csv_reader_options_builder& use_cols_names( vector[string] col_names - ) except+ + ) except + csv_reader_options_builder& use_cols_indexes( vector[int] col_ind - ) except+ - csv_reader_options_builder& nrows(size_type n_rows) except+ - csv_reader_options_builder& skiprows(size_type val) except+ - csv_reader_options_builder& skipfooter(size_type val) except+ - csv_reader_options_builder& header(size_type hdr) except+ + ) except + + csv_reader_options_builder& nrows(size_type n_rows) except + + csv_reader_options_builder& skiprows(size_type val) except + + csv_reader_options_builder& skipfooter(size_type val) except + + csv_reader_options_builder& header(size_type hdr) except + # Parsing settings - csv_reader_options_builder& lineterminator(char val) except+ - csv_reader_options_builder& delimiter(char val) except+ - csv_reader_options_builder& thousands(char val) except+ - csv_reader_options_builder& decimal(char val) except+ - csv_reader_options_builder& comment(char val) except+ - csv_reader_options_builder& windowslinetermination(bool val) except+ - csv_reader_options_builder& delim_whitespace(bool val) except+ - csv_reader_options_builder& skipinitialspace(bool val) except+ - csv_reader_options_builder& skip_blank_lines(bool val) except+ + csv_reader_options_builder& lineterminator(char val) except + + csv_reader_options_builder& delimiter(char val) except + + csv_reader_options_builder& thousands(char val) except + + csv_reader_options_builder& decimal(char val) except + + csv_reader_options_builder& comment(char val) except + + csv_reader_options_builder& windowslinetermination(bool val) except + + csv_reader_options_builder& delim_whitespace(bool val) except + + csv_reader_options_builder& skipinitialspace(bool val) except + + csv_reader_options_builder& skip_blank_lines(bool val) except + csv_reader_options_builder& quoting( cudf_io_types.quote_style style - ) except+ - csv_reader_options_builder& quotechar(char val) except+ - csv_reader_options_builder& doublequote(bool val) except+ - csv_reader_options_builder& parse_dates(vector[string]) except+ - csv_reader_options_builder& parse_dates(vector[int]) except+ + ) except + + csv_reader_options_builder& quotechar(char val) except + + csv_reader_options_builder& doublequote(bool val) except + + csv_reader_options_builder& parse_dates(vector[string]) except + + csv_reader_options_builder& parse_dates(vector[int]) except + # Conversion settings - csv_reader_options_builder& dtypes(vector[string] types) except+ - csv_reader_options_builder& dtypes(vector[data_type] types) except+ + csv_reader_options_builder& dtypes(vector[string] types) except + + csv_reader_options_builder& dtypes(vector[data_type] types) except + csv_reader_options_builder& dtypes( map[string, data_type] types - ) except+ - csv_reader_options_builder& true_values(vector[string] vals) except+ - csv_reader_options_builder& false_values(vector[string] vals) except+ - csv_reader_options_builder& na_values(vector[string] vals) except+ - csv_reader_options_builder& keep_default_na(bool val) except+ - csv_reader_options_builder& na_filter(bool val) except+ - csv_reader_options_builder& dayfirst(bool val) except+ - csv_reader_options_builder& timestamp_type(data_type type) except+ + ) except + + csv_reader_options_builder& true_values(vector[string] vals) except + + csv_reader_options_builder& false_values(vector[string] vals) except + + csv_reader_options_builder& na_values(vector[string] vals) except + + csv_reader_options_builder& keep_default_na(bool val) except + + csv_reader_options_builder& na_filter(bool val) except + + csv_reader_options_builder& dayfirst(bool val) except + + csv_reader_options_builder& timestamp_type(data_type type) except + - csv_reader_options build() except+ + csv_reader_options build() except + cdef cudf_io_types.table_with_metadata read_csv( csv_reader_options &options ) except + cdef cppclass csv_writer_options: - csv_writer_options() except+ - - cudf_io_types.sink_info get_sink() except+ - cudf_table_view.table_view get_table() except+ - cudf_io_types.table_metadata get_metadata() except+ - string get_na_rep() except+ - bool is_enabled_include_header() except+ - size_type get_rows_per_chunk() except+ - string get_line_terminator() except+ - char get_inter_column_delimiter() except+ - string get_true_value() except+ - string get_false_value() except+ + csv_writer_options() except + + + cudf_io_types.sink_info get_sink() except + + cudf_table_view.table_view get_table() except + + cudf_io_types.table_metadata get_metadata() except + + string get_na_rep() except + + bool is_enabled_include_header() except + + size_type get_rows_per_chunk() except + + string get_line_terminator() except + + char get_inter_column_delimiter() except + + string get_true_value() except + + string get_false_value() except + # setter - void set_metadata(cudf_io_types.table_metadata* val) except+ - void set_na_rep(string val) except+ - void enable_include_header(bool val) except+ - void set_rows_per_chunk(size_type val) except+ - void set_line_terminator(string term) except+ - void set_inter_column_delimiter(char delim) except+ - void set__true_value(string val) except+ - void set_false_value(string val) except+ + void set_metadata(cudf_io_types.table_metadata* val) except + + void set_na_rep(string val) except + + void enable_include_header(bool val) except + + void set_rows_per_chunk(size_type val) except + + void set_line_terminator(string term) except + + void set_inter_column_delimiter(char delim) except + + void set__true_value(string val) except + + void set_false_value(string val) except + @staticmethod csv_writer_options_builder builder( cudf_io_types.sink_info sink, cudf_table_view.table_view table - ) except+ + ) except + cdef cppclass csv_writer_options_builder: - csv_writer_options_builder() except+ + csv_writer_options_builder() except + csv_writer_options_builder( cudf_io_types.sink_info sink, cudf_table_view.table_view table - ) except+ + ) except + csv_writer_options_builder& metadata( cudf_io_types.table_metadata* val - ) except+ - csv_writer_options_builder& na_rep(string val) except+ - csv_writer_options_builder& include_header(bool val) except+ - csv_writer_options_builder& rows_per_chunk(size_type val) except+ - csv_writer_options_builder& line_terminator(string term) except+ - csv_writer_options_builder& inter_column_delimiter(char delim) except+ - csv_writer_options_builder& true_value(string val) except+ - csv_writer_options_builder& false_value(string val) except+ - - csv_writer_options build() except+ + ) except + + csv_writer_options_builder& na_rep(string val) except + + csv_writer_options_builder& include_header(bool val) except + + csv_writer_options_builder& rows_per_chunk(size_type val) except + + csv_writer_options_builder& line_terminator(string term) except + + csv_writer_options_builder& inter_column_delimiter(char delim) except + + csv_writer_options_builder& true_value(string val) except + + csv_writer_options_builder& false_value(string val) except + + + csv_writer_options build() except + cdef void write_csv(csv_writer_options args) except + diff --git a/python/cudf/cudf/_lib/cpp/io/json.pxd b/python/cudf/cudf/_lib/cpp/io/json.pxd index 7333aad7ddf..ab87e2cbb4b 100644 --- a/python/cudf/cudf/_lib/cpp/io/json.pxd +++ b/python/cudf/cudf/_lib/cpp/io/json.pxd @@ -20,71 +20,71 @@ cdef extern from "cudf/io/json.hpp" \ map[string, schema_element] child_types cdef cppclass json_reader_options: - json_reader_options() except+ - cudf_io_types.source_info get_source() except+ - vector[string] get_dtypes() except+ + json_reader_options() except + + cudf_io_types.source_info get_source() except + + vector[string] get_dtypes() except + cudf_io_types.compression_type get_compression() except + - size_type get_byte_range_offset() except+ - size_type get_byte_range_size() except+ - bool is_enabled_lines() except+ - bool is_enabled_dayfirst() except+ - bool is_enabled_experimental() except+ + size_type get_byte_range_offset() except + + size_type get_byte_range_size() except + + bool is_enabled_lines() except + + bool is_enabled_dayfirst() except + + bool is_enabled_experimental() except + # setter - void set_dtypes(vector[data_type] types) except+ - void set_dtypes(map[string, schema_element] types) except+ + void set_dtypes(vector[data_type] types) except + + void set_dtypes(map[string, schema_element] types) except + void set_compression( cudf_io_types.compression_type compression - ) except+ - void set_byte_range_offset(size_type offset) except+ - void set_byte_range_size(size_type size) except+ - void enable_lines(bool val) except+ - void enable_dayfirst(bool val) except+ - void enable_experimental(bool val) except+ - void enable_keep_quotes(bool val) except+ + ) except + + void set_byte_range_offset(size_type offset) except + + void set_byte_range_size(size_type size) except + + void enable_lines(bool val) except + + void enable_dayfirst(bool val) except + + void enable_experimental(bool val) except + + void enable_keep_quotes(bool val) except + @staticmethod json_reader_options_builder builder( cudf_io_types.source_info src - ) except+ + ) except + cdef cppclass json_reader_options_builder: - json_reader_options_builder() except+ + json_reader_options_builder() except + json_reader_options_builder( cudf_io_types.source_info src - ) except+ + ) except + json_reader_options_builder& dtypes( vector[string] types - ) except+ + ) except + json_reader_options_builder& dtypes( vector[data_type] types - ) except+ + ) except + json_reader_options_builder& dtypes( map[string, schema_element] types - ) except+ + ) except + json_reader_options_builder& compression( cudf_io_types.compression_type compression - ) except+ + ) except + json_reader_options_builder& byte_range_offset( size_type offset - ) except+ + ) except + json_reader_options_builder& byte_range_size( size_type size - ) except+ + ) except + json_reader_options_builder& lines( bool val - ) except+ + ) except + json_reader_options_builder& dayfirst( bool val - ) except+ + ) except + json_reader_options_builder& experimental( bool val - ) except+ + ) except + json_reader_options_builder& keep_quotes( bool val - ) except+ + ) except + - json_reader_options build() except+ + json_reader_options build() except + cdef cudf_io_types.table_with_metadata read_json( - json_reader_options &options) except+ + json_reader_options &options) except + diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd index 3e44ef98348..ec26fff3779 100644 --- a/python/cudf/cudf/_lib/cpp/io/orc.pxd +++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd @@ -16,45 +16,45 @@ cdef extern from "cudf/io/orc.hpp" \ namespace "cudf::io" nogil: cdef cppclass orc_reader_options: - orc_reader_options() except+ - - cudf_io_types.source_info get_source() except+ - vector[vector[size_type]] get_stripes() except+ - size_type get_skip_rows() except+ - size_type get_num_rows() except+ - bool is_enabled_use_index() except+ - bool is_enabled_use_np_dtypes() except+ - data_type get_timestamp_type() except+ - bool is_enabled_decimals_as_float64() except+ - int get_forced_decimals_scale() except+ - - void set_columns(vector[string] col_names) except+ - void set_stripes(vector[vector[size_type]] strps) except+ - void set_skip_rows(size_type rows) except+ - void set_num_rows(size_type nrows) except+ - void enable_use_index(bool val) except+ - void enable_use_np_dtypes(bool val) except+ - void set_timestamp_type(data_type type) except+ + orc_reader_options() except + + + cudf_io_types.source_info get_source() except + + vector[vector[size_type]] get_stripes() except + + size_type get_skip_rows() except + + size_type get_num_rows() except + + bool is_enabled_use_index() except + + bool is_enabled_use_np_dtypes() except + + data_type get_timestamp_type() except + + bool is_enabled_decimals_as_float64() except + + int get_forced_decimals_scale() except + + + void set_columns(vector[string] col_names) except + + void set_stripes(vector[vector[size_type]] strps) except + + void set_skip_rows(size_type rows) except + + void set_num_rows(size_type nrows) except + + void enable_use_index(bool val) except + + void enable_use_np_dtypes(bool val) except + + void set_timestamp_type(data_type type) except + @staticmethod orc_reader_options_builder builder( cudf_io_types.source_info src - ) except+ + ) except + cdef cppclass orc_reader_options_builder: - orc_reader_options_builder() except+ - orc_reader_options_builder(cudf_io_types.source_info &src) except+ + orc_reader_options_builder() except + + orc_reader_options_builder(cudf_io_types.source_info &src) except + - orc_reader_options_builder& columns(vector[string] col_names) except+ + orc_reader_options_builder& columns(vector[string] col_names) except + orc_reader_options_builder& \ - stripes(vector[vector[size_type]] strps) except+ - orc_reader_options_builder& skip_rows(size_type rows) except+ - orc_reader_options_builder& num_rows(size_type nrows) except+ - orc_reader_options_builder& use_index(bool val) except+ - orc_reader_options_builder& use_np_dtypes(bool val) except+ - orc_reader_options_builder& timestamp_type(data_type type) except+ + stripes(vector[vector[size_type]] strps) except + + orc_reader_options_builder& skip_rows(size_type rows) except + + orc_reader_options_builder& num_rows(size_type nrows) except + + orc_reader_options_builder& use_index(bool val) except + + orc_reader_options_builder& use_np_dtypes(bool val) except + + orc_reader_options_builder& timestamp_type(data_type type) except + - orc_reader_options build() except+ + orc_reader_options build() except + cdef cudf_io_types.table_with_metadata read_orc( orc_reader_options opts @@ -62,108 +62,110 @@ cdef extern from "cudf/io/orc.hpp" \ cdef cppclass orc_writer_options: orc_writer_options() - cudf_io_types.sink_info get_sink() except+ - cudf_io_types.compression_type get_compression() except+ - bool is_enabled_statistics() except+ - size_t get_stripe_size_bytes() except+ - size_type get_stripe_size_rows() except+ - size_type get_row_index_stride() except+ - cudf_table_view.table_view get_table() except+ - const cudf_io_types.table_input_metadata *get_metadata() except+ + cudf_io_types.sink_info get_sink() except + + cudf_io_types.compression_type get_compression() except + + bool is_enabled_statistics() except + + size_t get_stripe_size_bytes() except + + size_type get_stripe_size_rows() except + + size_type get_row_index_stride() except + + cudf_table_view.table_view get_table() except + + const cudf_io_types.table_input_metadata *get_metadata() except + # setter - void set_compression(cudf_io_types.compression_type comp) except+ - void enable_statistics(bool val) except+ - void set_stripe_size_bytes(size_t val) except+ - void set_stripe_size_rows(size_type val) except+ - void set_row_index_stride(size_type val) except+ - void set_table(cudf_table_view.table_view tbl) except+ - void set_metadata(cudf_io_types.table_input_metadata* meta) except+ + void set_compression(cudf_io_types.compression_type comp) except + + void enable_statistics(bool val) except + + void set_stripe_size_bytes(size_t val) except + + void set_stripe_size_rows(size_type val) except + + void set_row_index_stride(size_type val) except + + void set_table(cudf_table_view.table_view tbl) except + + void set_metadata(cudf_io_types.table_input_metadata* meta) except + void set_key_value_metadata(map[string, string] kvm) except + @staticmethod orc_writer_options_builder builder( cudf_io_types.sink_info &sink, cudf_table_view.table_view &tbl - ) except+ + ) except + cdef cppclass orc_writer_options_builder: # setter orc_writer_options_builder& compression( cudf_io_types.compression_type comp - ) except+ - orc_writer_options_builder& enable_statistics(bool val) except+ - orc_writer_options_builder& stripe_size_bytes(size_t val) except+ - orc_writer_options_builder& stripe_size_rows(size_type val) except+ - orc_writer_options_builder& row_index_stride(size_type val) except+ + ) except + + orc_writer_options_builder& enable_statistics(bool val) except + + orc_writer_options_builder& stripe_size_bytes(size_t val) except + + orc_writer_options_builder& stripe_size_rows(size_type val) except + + orc_writer_options_builder& row_index_stride(size_type val) except + orc_writer_options_builder& table( cudf_table_view.table_view tbl - ) except+ + ) except + orc_writer_options_builder& metadata( cudf_io_types.table_input_metadata *meta - ) except+ + ) except + orc_writer_options_builder& key_value_metadata( map[string, string] kvm - ) except+ + ) except + - orc_writer_options build() except+ + orc_writer_options build() except + cdef void write_orc(orc_writer_options options) except + cdef cppclass chunked_orc_writer_options: - chunked_orc_writer_options() except+ - cudf_io_types.sink_info get_sink() except+ - cudf_io_types.compression_type get_compression() except+ - bool enable_statistics() except+ - size_t stripe_size_bytes() except+ - size_type stripe_size_rows() except+ - size_type row_index_stride() except+ - cudf_table_view.table_view get_table() except+ + chunked_orc_writer_options() except + + cudf_io_types.sink_info get_sink() except + + cudf_io_types.compression_type get_compression() except + + bool enable_statistics() except + + size_t stripe_size_bytes() except + + size_type stripe_size_rows() except + + size_type row_index_stride() except + + cudf_table_view.table_view get_table() except + const cudf_io_types.table_input_metadata *get_metadata( - ) except+ + ) except + # setter - void set_compression(cudf_io_types.compression_type comp) except+ - void enable_statistics(bool val) except+ - void set_stripe_size_bytes(size_t val) except+ - void set_stripe_size_rows(size_type val) except+ - void set_row_index_stride(size_type val) except+ - void set_table(cudf_table_view.table_view tbl) except+ + void set_compression(cudf_io_types.compression_type comp) except + + void enable_statistics(bool val) except + + void set_stripe_size_bytes(size_t val) except + + void set_stripe_size_rows(size_type val) except + + void set_row_index_stride(size_type val) except + + void set_table(cudf_table_view.table_view tbl) except + void set_metadata( cudf_io_types.table_input_metadata* meta - ) except+ + ) except + void set_key_value_metadata(map[string, string] kvm) except + @staticmethod chunked_orc_writer_options_builder builder( cudf_io_types.sink_info &sink - ) except+ + ) except + cdef cppclass chunked_orc_writer_options_builder: # setter chunked_orc_writer_options_builder& compression( cudf_io_types.compression_type comp - ) except+ - chunked_orc_writer_options_builder& enable_statistics(bool val) except+ - orc_writer_options_builder& stripe_size_bytes(size_t val) except+ - orc_writer_options_builder& stripe_size_rows(size_type val) except+ - orc_writer_options_builder& row_index_stride(size_type val) except+ + ) except + + chunked_orc_writer_options_builder& enable_statistics( + bool val + ) except + + orc_writer_options_builder& stripe_size_bytes(size_t val) except + + orc_writer_options_builder& stripe_size_rows(size_type val) except + + orc_writer_options_builder& row_index_stride(size_type val) except + chunked_orc_writer_options_builder& table( cudf_table_view.table_view tbl - ) except+ + ) except + chunked_orc_writer_options_builder& metadata( cudf_io_types.table_input_metadata *meta - ) except+ + ) except + chunked_orc_writer_options_builder& key_value_metadata( map[string, string] kvm - ) except+ + ) except + - chunked_orc_writer_options build() except+ + chunked_orc_writer_options build() except + cdef cppclass orc_chunked_writer: - orc_chunked_writer() except+ - orc_chunked_writer(chunked_orc_writer_options args) except+ + orc_chunked_writer() except + + orc_chunked_writer(chunked_orc_writer_options args) except + orc_chunked_writer& write( cudf_table_view.table_view table_, - ) except+ - void close() except+ + ) except + + void close() except + diff --git a/python/cudf/cudf/_lib/cpp/io/parquet.pxd b/python/cudf/cudf/_lib/cpp/io/parquet.pxd index f388fff3beb..98b839ba9b8 100644 --- a/python/cudf/cudf/_lib/cpp/io/parquet.pxd +++ b/python/cudf/cudf/_lib/cpp/io/parquet.pxd @@ -66,11 +66,11 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cudf_io_types.statistics_freq get_stats_level() except + cudf_table_view.table_view get_table() except + const cudf_io_types.table_input_metadata get_metadata() except + - string get_column_chunks_file_paths() except+ - size_t get_row_group_size_bytes() except+ - size_type get_row_group_size_rows() except+ - size_t get_max_page_size_bytes() except+ - size_type get_max_page_size_rows() except+ + string get_column_chunks_file_paths() except + + size_t get_row_group_size_bytes() except + + size_type get_row_group_size_rows() except + + size_t get_max_page_size_bytes() except + + size_type get_max_page_size_rows() except + void set_partitions( vector[cudf_io_types.partition_info] partitions @@ -90,10 +90,10 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: void set_column_chunks_file_paths( vector[string] column_chunks_file_paths ) except + - void set_row_group_size_bytes(size_t val) except+ - void set_row_group_size_rows(size_type val) except+ - void set_max_page_size_bytes(size_t val) except+ - void set_max_page_size_rows(size_type val) except+ + void set_row_group_size_bytes(size_t val) except + + void set_row_group_size_rows(size_type val) except + + void set_max_page_size_bytes(size_t val) except + + void set_max_page_size_rows(size_type val) except + @staticmethod parquet_writer_options_builder builder( @@ -131,16 +131,16 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: ) except + parquet_writer_options_builder& row_group_size_bytes( size_t val - ) except+ + ) except + parquet_writer_options_builder& row_group_size_rows( size_type val - ) except+ + ) except + parquet_writer_options_builder& max_page_size_bytes( size_t val - ) except+ + ) except + parquet_writer_options_builder& max_page_size_rows( size_type val - ) except+ + ) except + parquet_writer_options build() except + @@ -154,11 +154,11 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: cudf_io_types.compression_type get_compression() except + cudf_io_types.statistics_freq get_stats_level() except + cudf_io_types.table_input_metadata* get_metadata( - ) except+ - size_t get_row_group_size_bytes() except+ - size_type get_row_group_size_rows() except+ - size_t get_max_page_size_bytes() except+ - size_type get_max_page_size_rows() except+ + ) except + + size_t get_row_group_size_bytes() except + + size_type get_row_group_size_rows() except + + size_t get_max_page_size_bytes() except + + size_type get_max_page_size_rows() except + void set_metadata( cudf_io_types.table_input_metadata *m @@ -172,10 +172,10 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: void set_compression( cudf_io_types.compression_type compression ) except + - void set_row_group_size_bytes(size_t val) except+ - void set_row_group_size_rows(size_type val) except+ - void set_max_page_size_bytes(size_t val) except+ - void set_max_page_size_rows(size_type val) except+ + void set_row_group_size_bytes(size_t val) except + + void set_row_group_size_rows(size_type val) except + + void set_max_page_size_bytes(size_t val) except + + void set_max_page_size_rows(size_type val) except + @staticmethod chunked_parquet_writer_options_builder builder( @@ -201,32 +201,32 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil: ) except + chunked_parquet_writer_options_builder& row_group_size_bytes( size_t val - ) except+ + ) except + chunked_parquet_writer_options_builder& row_group_size_rows( size_type val - ) except+ + ) except + chunked_parquet_writer_options_builder& max_page_size_bytes( size_t val - ) except+ + ) except + chunked_parquet_writer_options_builder& max_page_size_rows( size_type val - ) except+ + ) except + chunked_parquet_writer_options build() except + cdef cppclass parquet_chunked_writer: - parquet_chunked_writer() except+ - parquet_chunked_writer(chunked_parquet_writer_options args) except+ + parquet_chunked_writer() except + + parquet_chunked_writer(chunked_parquet_writer_options args) except + parquet_chunked_writer& write( cudf_table_view.table_view table_, - ) except+ + ) except + parquet_chunked_writer& write( const cudf_table_view.table_view& table_, const vector[cudf_io_types.partition_info]& partitions, - ) except+ + ) except + unique_ptr[vector[uint8_t]] close( vector[string] column_chunks_file_paths, - ) except+ + ) except + cdef unique_ptr[vector[uint8_t]] merge_row_group_metadata( const vector[unique_ptr[vector[uint8_t]]]& metadata_list diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f00c7d1f2b5..126da0f883a 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3333,7 +3333,11 @@ def agg(self, aggs, axis=None): @_cudf_nvtx_annotate def nlargest(self, n, columns, keep="first"): - """Get the rows of the DataFrame sorted by the n largest value of *columns* + """Return the first *n* rows ordered by *columns* in descending order. + + Return the first *n* rows with the largest values in *columns*, in + descending order. The columns that are not specified are returned as + well, but not used for ordering. Parameters ---------- @@ -3396,7 +3400,11 @@ def nlargest(self, n, columns, keep="first"): return self._n_largest_or_smallest(True, n, columns, keep) def nsmallest(self, n, columns, keep="first"): - """Get the rows of the DataFrame sorted by the n smallest value of *columns* + """Return the first *n* rows ordered by *columns* in ascending order. + + Return the first *n* rows with the smallest values in *columns*, in + ascending order. The columns that are not specified are returned as + well, but not used for ordering. Parameters ---------- @@ -5879,7 +5887,7 @@ def _columns_view(self, columns): @_cudf_nvtx_annotate def select_dtypes(self, include=None, exclude=None): - """Return a subset of the DataFrame’s columns based on the column dtypes. + """Return a subset of the DataFrame's columns based on the column dtypes. Parameters ---------- @@ -5938,7 +5946,7 @@ def select_dtypes(self, include=None, exclude=None): 3 False 2.0 4 True 1.0 5 False 2.0 - """ + """ # noqa: E501 # code modified from: # https://github.com/pandas-dev/pandas/blob/master/pandas/core/frame.py#L3196 diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 0acacc798a1..bbb1c95bef6 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -4719,10 +4719,12 @@ def _drop_rows_by_labels( level: Union[int, str], errors: str, ) -> DataFrameOrSeries: - """Remove rows specified by `labels`. If `errors="raise"`, an error is raised - if some items in `labels` do not exist in `obj._index`. + """Remove rows specified by `labels`. - Will raise if level(int) is greater or equal to index nlevels + If `errors="raise"`, an error is raised if some items in `labels` do not + exist in `obj._index`. + + Will raise if level(int) is greater or equal to index nlevels. """ if isinstance(level, int) and level >= obj.index.nlevels: raise ValueError("Param level out of bounds.") diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 7493202a3d1..07e1782d788 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -889,7 +889,7 @@ def reindex(self, *args, **kwargs): DataFrame, followed by the original Series values. When `drop` is True, a `Series` is returned. In either case, if ``inplace=True``, no value is returned. -""", +""", # noqa: E501 example=""" >>> series = cudf.Series(['a', 'b', 'c', 'd'], index=[10, 11, 12, 13]) >>> series @@ -2998,7 +2998,7 @@ def describe( @_cudf_nvtx_annotate def digitize(self, bins, right=False): - """Return the indices of the bins to which each value in series belongs. + """Return the indices of the bins to which each value belongs. Notes ----- diff --git a/python/cudf/cudf/utils/hash_vocab_utils.py b/python/cudf/cudf/utils/hash_vocab_utils.py index cecf0c36bc2..a0915951240 100644 --- a/python/cudf/cudf/utils/hash_vocab_utils.py +++ b/python/cudf/cudf/utils/hash_vocab_utils.py @@ -253,9 +253,10 @@ def hash_vocab( hashed_vocab = {_sdbm_hash(key): value for key, value in vocab.items()} - error_message = """Collision occurred and only sdbm token hash current supported :( - Can be extended to use random hashes if needed""" - + error_message = ( + "A collision occurred and only sdbm token hash is currently " + "supported. This can be extended to use random hashes if needed." + ) assert len(hashed_vocab) == len(vocab), error_message ( diff --git a/setup.cfg b/setup.cfg index d196e8605b2..d810178c44b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,9 @@ # Copyright (c) 2017-2022, NVIDIA CORPORATION. [flake8] -filename = *.py, *.pyx, *.pxd +filename = *.py, *.pyx, *.pxd, *.pxi exclude = __init__.py, *.egg, build, docs, .git +force-check = True ignore = # line break before binary operator W503, @@ -14,11 +15,13 @@ per-file-ignores = # E225: Missing whitespace around operators (breaks cython casting syntax like ) # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*) # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax) + # E275: Missing whitespace after keyword (Doesn't work with Cython except?) # E402: invalid syntax (works for Python, not Cython) # E999: invalid syntax (works for Python, not Cython) # W504: line break after binary operator (breaks lines that end with a pointer) - *.pyx: E211, E225, E226, E227, E402, E999, W504 - *.pxd: E211, E225, E226, E227, E402, E999, W504 + *.pyx: E211, E225, E226, E227, E275, E402, E999, W504 + *.pxd: E211, E225, E226, E227, E275, E402, E999, W504 + *.pxi: E211, E225, E226, E227, E275, E402, E999, W504 [pydocstyle] # Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather