From e1f1f62aefb1e3f08b09ee7c1f913417741f4b53 Mon Sep 17 00:00:00 2001 From: Vukasin Milovanovic Date: Thu, 17 Aug 2023 14:03:33 -0700 Subject: [PATCH] use empty --- cpp/src/io/avro/reader_impl.cu | 2 +- cpp/src/io/csv/reader_impl.cu | 4 ++-- cpp/src/io/json/json_column.cu | 2 +- cpp/src/io/json/legacy/reader_impl.cu | 6 +++--- cpp/src/io/json/nested_json_gpu.cu | 4 ++-- cpp/src/io/orc/reader_impl.cu | 6 +++--- cpp/src/io/orc/writer_impl.cu | 2 +- cpp/src/io/parquet/compact_protocol_writer.cpp | 16 ++++++++-------- cpp/src/io/parquet/reader_impl.cpp | 4 ++-- cpp/src/io/parquet/writer_impl.cu | 16 ++++++++-------- cpp/src/io/utilities/column_buffer.cpp | 2 +- cpp/src/io/utilities/hostdevice_vector.hpp | 1 + 12 files changed, 33 insertions(+), 32 deletions(-) diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index c25010c4e5f..f73e1db91c3 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -499,7 +499,7 @@ table_with_metadata read_avro(std::unique_ptr&& source, // Select only columns required by the options auto selected_columns = meta.select_columns(options.get_columns()); - if (selected_columns.size() != 0) { + if (not selected_columns.empty()) { // Get a list of column data types std::vector column_types; for (auto const& col : selected_columns) { diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 16667e4394d..ac50e8a79a7 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -973,14 +973,14 @@ parse_options make_parse_options(csv_reader_options const& reader_opts, // Handle user-defined true values, whereby field data is substituted with a // boolean true or numeric `1` value - if (reader_opts.get_true_values().size() != 0) { + if (not reader_opts.get_true_values().empty()) { parse_opts.trie_true = cudf::detail::create_serialized_trie(reader_opts.get_true_values(), stream); } // Handle user-defined false values, whereby field data is substituted with a // boolean false or numeric `0` value - if (reader_opts.get_false_values().size() != 0) { + if (not reader_opts.get_false_values().empty()) { parse_opts.trie_false = cudf::detail::create_serialized_trie(reader_opts.get_false_values(), stream); } diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 0cd8edaf78c..487a4bc4068 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -958,7 +958,7 @@ table_with_metadata device_parse_nested_json(device_span d_input, options.is_enabled_lines() ? root_column : root_column.child_columns.begin()->second; // Zero row entries - if (data_root.type == json_col_t::ListColumn && data_root.child_columns.size() == 0) { + if (data_root.type == json_col_t::ListColumn && data_root.child_columns.empty()) { return table_with_metadata{std::make_unique(std::vector>{})}; } diff --git a/cpp/src/io/json/legacy/reader_impl.cu b/cpp/src/io/json/legacy/reader_impl.cu index c524c041df7..1ae7ccf71c1 100644 --- a/cpp/src/io/json/legacy/reader_impl.cu +++ b/cpp/src/io/json/legacy/reader_impl.cu @@ -438,7 +438,7 @@ std::vector get_data_types(json_reader_options const& reader_opts, }}, reader_opts.get_dtypes()); } else { - CUDF_EXPECTS(rec_starts.size() != 0, "No data available for data type inference.\n"); + CUDF_EXPECTS(not rec_starts.empty(), "No data available for data type inference.\n"); auto const num_columns = column_names.size(); auto const do_set_null_count = column_map->capacity() > 0; @@ -612,7 +612,7 @@ table_with_metadata read_json(host_span> sources, sources, reader_opts.get_compression(), range_offset, range_size, range_size_padded); host_span h_data{reinterpret_cast(h_raw_data.data()), h_raw_data.size()}; - CUDF_EXPECTS(h_data.size() != 0, "Ingest failed: uncompressed input data has zero size.\n"); + CUDF_EXPECTS(not h_data.empty(), "Ingest failed: uncompressed input data has zero size.\n"); auto d_data = rmm::device_uvector(0, stream); @@ -629,7 +629,7 @@ table_with_metadata read_json(host_span> sources, d_data = upload_data_to_device(reader_opts, h_data, rec_starts, stream); } - CUDF_EXPECTS(d_data.size() != 0, "Error uploading input data to the GPU.\n"); + CUDF_EXPECTS(not d_data.is_empty(), "Error uploading input data to the GPU.\n"); auto column_names_and_map = get_column_names_and_map(parse_opts.view(), h_data, rec_starts, d_data, stream); diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 8552db9a719..9a08b5f9353 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -1647,7 +1647,7 @@ void make_json_column(json_column& root_column, CUDF_EXPECTS(current_data_path.top().column->child_columns.size() <= 1, "Encountered a list column with more than a single child column"); // The child column has yet to be created - if (current_data_path.top().column->child_columns.size() == 0) { + if (current_data_path.top().column->child_columns.empty()) { current_data_path.top().column->child_columns.emplace(std::string{list_child_name}, json_column{json_col_t::Unknown}); current_data_path.top().column->column_order.push_back(list_child_name); @@ -2119,7 +2119,7 @@ table_with_metadata host_parse_nested_json(device_span d_input, new_line_delimited_json ? root_column : root_column.child_columns.begin()->second; // Zero row entries - if (data_root.type == json_col_t::ListColumn && data_root.child_columns.size() == 0) { + if (data_root.type == json_col_t::ListColumn && data_root.child_columns.empty()) { return table_with_metadata{std::make_unique
(std::vector>{})}; } diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 0f1b72d6126..157269cf52e 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -149,7 +149,7 @@ std::size_t gather_stream_info(std::size_t stripe_index, // for each of its fields. There is only a PRESENT stream, which // needs to be included for the reader. auto const schema_type = types[column_id]; - if (schema_type.subtypes.size() != 0) { + if (not schema_type.subtypes.empty()) { if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) { for (auto const& idx : schema_type.subtypes) { auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1; @@ -249,7 +249,7 @@ rmm::device_buffer decompress_stripe_data( // Required by `gpuDecodeOrcColumnData`. rmm::device_buffer decomp_data( cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream); - if (decomp_data.size() == 0) { return decomp_data; } + if (decomp_data.is_empty()) { return decomp_data; } rmm::device_uvector> inflate_in( num_compressed_blocks + num_uncompressed_blocks, stream); @@ -1232,7 +1232,7 @@ table_with_metadata reader::impl::read(uint64_t skip_rows, CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read."); } - if (stripe_data.size() == 0) { continue; } + if (stripe_data.empty()) { continue; } // Process dataset chunk pages into output columns auto row_groups = diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 881fc3b5caf..6a3c5f0134d 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -2625,7 +2625,7 @@ void writer::impl::close() }); // Write statistics metadata - if (_orc_meta.stripeStats.size() != 0) { + if (not _orc_meta.stripeStats.empty()) { ProtobufWriter pbw((_compression_kind != NONE) ? 3 : 0); pbw.write(_orc_meta); add_uncompressed_block_headers(_compression_kind, _compression_blocksize, pbw.buffer()); diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp index 8aaff77d0dd..b2a89129645 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.cpp +++ b/cpp/src/io/parquet/compact_protocol_writer.cpp @@ -31,8 +31,8 @@ size_t CompactProtocolWriter::write(FileMetaData const& f) c.field_struct_list(2, f.schema); c.field_int(3, f.num_rows); c.field_struct_list(4, f.row_groups); - if (f.key_value_metadata.size() != 0) { c.field_struct_list(5, f.key_value_metadata); } - if (f.created_by.size() != 0) { c.field_string(6, f.created_by); } + if (not f.key_value_metadata.empty()) { c.field_struct_list(5, f.key_value_metadata); } + if (not f.created_by.empty()) { c.field_string(6, f.created_by); } if (f.column_order_listsize != 0) { // Dummy list of struct containing an empty field1 struct c.put_field_header(7, c.current_field(), ST_FLD_LIST); @@ -167,14 +167,14 @@ size_t CompactProtocolWriter::write(KeyValue const& k) { CompactProtocolFieldWriter c(*this); c.field_string(1, k.key); - if (k.value.size() != 0) { c.field_string(2, k.value); } + if (not k.value.empty()) { c.field_string(2, k.value); } return c.value(); } size_t CompactProtocolWriter::write(ColumnChunk const& s) { CompactProtocolFieldWriter c(*this); - if (s.file_path.size() != 0) { c.field_string(1, s.file_path); } + if (not s.file_path.empty()) { c.field_string(1, s.file_path); } c.field_int(2, s.file_offset); c.field_struct(3, s.meta_data); if (s.offset_index_length != 0) { @@ -208,12 +208,12 @@ size_t CompactProtocolWriter::write(ColumnChunkMetaData const& s) size_t CompactProtocolWriter::write(Statistics const& s) { CompactProtocolFieldWriter c(*this); - if (s.max.size() != 0) { c.field_binary(1, s.max); } - if (s.min.size() != 0) { c.field_binary(2, s.min); } + if (not s.max.empty()) { c.field_binary(1, s.max); } + if (not s.min.empty()) { c.field_binary(2, s.min); } if (s.null_count != -1) { c.field_int(3, s.null_count); } if (s.distinct_count != -1) { c.field_int(4, s.distinct_count); } - if (s.max_value.size() != 0) { c.field_binary(5, s.max_value); } - if (s.min_value.size() != 0) { c.field_binary(6, s.min_value); } + if (not s.max_value.empty()) { c.field_binary(5, s.max_value); } + if (not s.min_value.empty()) { c.field_binary(6, s.min_value); } return c.value(); } diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 88e520c99a4..5a44eb6baa0 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -328,7 +328,7 @@ void reader::impl::prepare_data(int64_t skip_rows, auto const [skip_rows_corrected, num_rows_corrected, row_groups_info] = _metadata->select_row_groups(row_group_indices, skip_rows, num_rows, output_types, filter); - if (num_rows_corrected > 0 && row_groups_info.size() != 0 && _input_columns.size() != 0) { + if (num_rows_corrected > 0 && not row_groups_info.empty() && not _input_columns.empty()) { load_and_decompress_data(row_groups_info, num_rows_corrected); preprocess_pages( skip_rows_corrected, num_rows_corrected, uses_custom_row_bounds, _chunk_read_limit); @@ -368,7 +368,7 @@ table_with_metadata reader::impl::read_chunk_internal( auto out_columns = std::vector>{}; out_columns.reserve(_output_buffers.size()); - if (!has_next() || _chunk_read_info.size() == 0) { + if (!has_next() || _chunk_read_info.empty()) { return finalize_output(out_metadata, out_columns, filter); } diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 021b6cffa5a..c5fc852d20b 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -202,7 +202,7 @@ parquet::Compression to_parquet_compression(compression_type compression) */ size_t column_size(column_view const& column, rmm::cuda_stream_view stream) { - if (column.size() == 0) { return 0; } + if (column.is_empty()) { return 0; } if (is_fixed_width(column.type())) { return size_of(column.type()) * column.size(); @@ -573,7 +573,7 @@ std::vector construct_schema_tree( CUDF_EXPECTS(col_meta.num_children() == 2 or col_meta.num_children() == 0, "Binary column's corresponding metadata should have zero or two children!"); if (col_meta.num_children() > 0) { - CUDF_EXPECTS(col->children[lists_column_view::child_column_index]->children.size() == 0, + CUDF_EXPECTS(col->children[lists_column_view::child_column_index]->children.empty(), "Binary column must not be nested!"); } @@ -859,7 +859,7 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node, _is_list = (_max_rep_level > 0); - if (cudf_col.size() == 0) { return; } + if (cudf_col.is_empty()) { return; } if (_is_list) { // Top level column's offsets are not applied to all children. Get the effective offset and @@ -1103,7 +1103,7 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks, std::vector> dict_data; std::vector> dict_index; - if (h_chunks.size() == 0) { return std::pair(std::move(dict_data), std::move(dict_index)); } + if (h_chunks.empty()) { return std::pair(std::move(dict_data), std::move(dict_index)); } if (dict_policy == dictionary_policy::NEVER) { thrust::for_each( @@ -2369,11 +2369,11 @@ std::unique_ptr> writer::merge_row_group_metadata( } } // Reader doesn't currently populate column_order, so infer it here - if (md.row_groups.size() != 0) { + if (not md.row_groups.empty()) { auto const is_valid_stats = [](auto const& stats) { - return stats.max.size() != 0 || stats.min.size() != 0 || stats.null_count != -1 || - stats.distinct_count != -1 || stats.max_value.size() != 0 || - stats.min_value.size() != 0; + return not stats.max.empty() || not stats.min.empty() || stats.null_count != -1 || + stats.distinct_count != -1 || not stats.max_value.empty() || + not stats.min_value.empty(); }; uint32_t num_columns = static_cast(md.row_groups[0].columns.size()); diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index 9b8754d6318..3248d94d60a 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -68,7 +68,7 @@ std::unique_ptr inline_column_buffer::make_string_column_impl(rmm::cuda_ // no need for copies, just transfer ownership of the data_buffers to the columns auto const state = mask_state::UNALLOCATED; auto str_col = - _string_data.size() == 0 + _string_data.is_empty() ? make_empty_column(data_type{type_id::INT8}) : std::make_unique(data_type{type_id::INT8}, string_size(), diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp index c8655cdcc7c..a6a93c41472 100644 --- a/cpp/src/io/utilities/hostdevice_vector.hpp +++ b/cpp/src/io/utilities/hostdevice_vector.hpp @@ -94,6 +94,7 @@ class hostdevice_vector { [[nodiscard]] size_t capacity() const noexcept { return d_data.size(); } [[nodiscard]] size_t size() const noexcept { return current_size; } [[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); } + [[nodiscard]] bool empty() const noexcept { return size() == 0; } [[nodiscard]] T& operator[](size_t i) { return host_data[i]; } [[nodiscard]] T const& operator[](size_t i) const { return host_data[i]; }