Skip to content

Commit

Permalink
Use empty() instead of size() where possible (#13908)
Browse files Browse the repository at this point in the history
Adds `empty()` to `hostdevice_vector`.
Check `empty()` instead of checking the column/buffer/vector `size()`.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - David Wendt (https://github.com/davidwendt)

URL: #13908
  • Loading branch information
vuule authored Aug 18, 2023
1 parent 28b5b6e commit f233422
Show file tree
Hide file tree
Showing 12 changed files with 33 additions and 32 deletions.
2 changes: 1 addition & 1 deletion cpp/src/io/avro/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ table_with_metadata read_avro(std::unique_ptr<cudf::io::datasource>&& source,

// Select only columns required by the options
auto selected_columns = meta.select_columns(options.get_columns());
if (selected_columns.size() != 0) {
if (not selected_columns.empty()) {
// Get a list of column data types
std::vector<data_type> column_types;
for (auto const& col : selected_columns) {
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/csv/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -972,14 +972,14 @@ parse_options make_parse_options(csv_reader_options const& reader_opts,

// Handle user-defined true values, whereby field data is substituted with a
// boolean true or numeric `1` value
if (reader_opts.get_true_values().size() != 0) {
if (not reader_opts.get_true_values().empty()) {
parse_opts.trie_true =
cudf::detail::create_serialized_trie(reader_opts.get_true_values(), stream);
}

// Handle user-defined false values, whereby field data is substituted with a
// boolean false or numeric `0` value
if (reader_opts.get_false_values().size() != 0) {
if (not reader_opts.get_false_values().empty()) {
parse_opts.trie_false =
cudf::detail::create_serialized_trie(reader_opts.get_false_values(), stream);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/json/json_column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@ table_with_metadata device_parse_nested_json(device_span<SymbolT const> d_input,
options.is_enabled_lines() ? root_column : root_column.child_columns.begin()->second;

// Zero row entries
if (data_root.type == json_col_t::ListColumn && data_root.child_columns.size() == 0) {
if (data_root.type == json_col_t::ListColumn && data_root.child_columns.empty()) {
return table_with_metadata{std::make_unique<table>(std::vector<std::unique_ptr<column>>{})};
}

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/json/legacy/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ std::vector<data_type> get_data_types(json_reader_options const& reader_opts,
}},
reader_opts.get_dtypes());
} else {
CUDF_EXPECTS(rec_starts.size() != 0, "No data available for data type inference.\n");
CUDF_EXPECTS(not rec_starts.empty(), "No data available for data type inference.\n");
auto const num_columns = column_names.size();
auto const do_set_null_count = column_map->capacity() > 0;

Expand Down Expand Up @@ -612,7 +612,7 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
sources, reader_opts.get_compression(), range_offset, range_size, range_size_padded);
host_span<char const> h_data{reinterpret_cast<char const*>(h_raw_data.data()), h_raw_data.size()};

CUDF_EXPECTS(h_data.size() != 0, "Ingest failed: uncompressed input data has zero size.\n");
CUDF_EXPECTS(not h_data.empty(), "Ingest failed: uncompressed input data has zero size.\n");

auto d_data = rmm::device_uvector<char>(0, stream);

Expand All @@ -629,7 +629,7 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
d_data = upload_data_to_device(reader_opts, h_data, rec_starts, stream);
}

CUDF_EXPECTS(d_data.size() != 0, "Error uploading input data to the GPU.\n");
CUDF_EXPECTS(not d_data.is_empty(), "Error uploading input data to the GPU.\n");

auto column_names_and_map =
get_column_names_and_map(parse_opts.view(), h_data, rec_starts, d_data, stream);
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/json/nested_json_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1647,7 +1647,7 @@ void make_json_column(json_column& root_column,
CUDF_EXPECTS(current_data_path.top().column->child_columns.size() <= 1,
"Encountered a list column with more than a single child column");
// The child column has yet to be created
if (current_data_path.top().column->child_columns.size() == 0) {
if (current_data_path.top().column->child_columns.empty()) {
current_data_path.top().column->child_columns.emplace(std::string{list_child_name},
json_column{json_col_t::Unknown});
current_data_path.top().column->column_order.push_back(list_child_name);
Expand Down Expand Up @@ -2119,7 +2119,7 @@ table_with_metadata host_parse_nested_json(device_span<SymbolT const> d_input,
new_line_delimited_json ? root_column : root_column.child_columns.begin()->second;

// Zero row entries
if (data_root.type == json_col_t::ListColumn && data_root.child_columns.size() == 0) {
if (data_root.type == json_col_t::ListColumn && data_root.child_columns.empty()) {
return table_with_metadata{std::make_unique<table>(std::vector<std::unique_ptr<column>>{})};
}

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/orc/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ std::size_t gather_stream_info(std::size_t stripe_index,
// for each of its fields. There is only a PRESENT stream, which
// needs to be included for the reader.
auto const schema_type = types[column_id];
if (schema_type.subtypes.size() != 0) {
if (not schema_type.subtypes.empty()) {
if (schema_type.kind == orc::STRUCT && stream.kind == orc::PRESENT) {
for (auto const& idx : schema_type.subtypes) {
auto child_idx = (idx < orc2gdf.size()) ? orc2gdf[idx] : -1;
Expand Down Expand Up @@ -249,7 +249,7 @@ rmm::device_buffer decompress_stripe_data(
// Required by `gpuDecodeOrcColumnData`.
rmm::device_buffer decomp_data(
cudf::util::round_up_safe(total_decomp_size, BUFFER_PADDING_MULTIPLE), stream);
if (decomp_data.size() == 0) { return decomp_data; }
if (decomp_data.is_empty()) { return decomp_data; }

rmm::device_uvector<device_span<uint8_t const>> inflate_in(
num_compressed_blocks + num_uncompressed_blocks, stream);
Expand Down Expand Up @@ -1232,7 +1232,7 @@ table_with_metadata reader::impl::read(uint64_t skip_rows,
CUDF_EXPECTS(task.first.get() == task.second, "Unexpected discrepancy in bytes read.");
}

if (stripe_data.size() == 0) { continue; }
if (stripe_data.empty()) { continue; }

// Process dataset chunk pages into output columns
auto row_groups =
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2625,7 +2625,7 @@ void writer::impl::close()
});

// Write statistics metadata
if (_orc_meta.stripeStats.size() != 0) {
if (not _orc_meta.stripeStats.empty()) {
ProtobufWriter pbw((_compression_kind != NONE) ? 3 : 0);
pbw.write(_orc_meta);
add_uncompressed_block_headers(_compression_kind, _compression_blocksize, pbw.buffer());
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/io/parquet/compact_protocol_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ size_t CompactProtocolWriter::write(FileMetaData const& f)
c.field_struct_list(2, f.schema);
c.field_int(3, f.num_rows);
c.field_struct_list(4, f.row_groups);
if (f.key_value_metadata.size() != 0) { c.field_struct_list(5, f.key_value_metadata); }
if (f.created_by.size() != 0) { c.field_string(6, f.created_by); }
if (not f.key_value_metadata.empty()) { c.field_struct_list(5, f.key_value_metadata); }
if (not f.created_by.empty()) { c.field_string(6, f.created_by); }
if (f.column_order_listsize != 0) {
// Dummy list of struct containing an empty field1 struct
c.put_field_header(7, c.current_field(), ST_FLD_LIST);
Expand Down Expand Up @@ -167,14 +167,14 @@ size_t CompactProtocolWriter::write(KeyValue const& k)
{
CompactProtocolFieldWriter c(*this);
c.field_string(1, k.key);
if (k.value.size() != 0) { c.field_string(2, k.value); }
if (not k.value.empty()) { c.field_string(2, k.value); }
return c.value();
}

size_t CompactProtocolWriter::write(ColumnChunk const& s)
{
CompactProtocolFieldWriter c(*this);
if (s.file_path.size() != 0) { c.field_string(1, s.file_path); }
if (not s.file_path.empty()) { c.field_string(1, s.file_path); }
c.field_int(2, s.file_offset);
c.field_struct(3, s.meta_data);
if (s.offset_index_length != 0) {
Expand Down Expand Up @@ -208,12 +208,12 @@ size_t CompactProtocolWriter::write(ColumnChunkMetaData const& s)
size_t CompactProtocolWriter::write(Statistics const& s)
{
CompactProtocolFieldWriter c(*this);
if (s.max.size() != 0) { c.field_binary(1, s.max); }
if (s.min.size() != 0) { c.field_binary(2, s.min); }
if (not s.max.empty()) { c.field_binary(1, s.max); }
if (not s.min.empty()) { c.field_binary(2, s.min); }
if (s.null_count != -1) { c.field_int(3, s.null_count); }
if (s.distinct_count != -1) { c.field_int(4, s.distinct_count); }
if (s.max_value.size() != 0) { c.field_binary(5, s.max_value); }
if (s.min_value.size() != 0) { c.field_binary(6, s.min_value); }
if (not s.max_value.empty()) { c.field_binary(5, s.max_value); }
if (not s.min_value.empty()) { c.field_binary(6, s.min_value); }
return c.value();
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/parquet/reader_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ void reader::impl::prepare_data(int64_t skip_rows,
auto const [skip_rows_corrected, num_rows_corrected, row_groups_info] =
_metadata->select_row_groups(row_group_indices, skip_rows, num_rows, output_types, filter);

if (num_rows_corrected > 0 && row_groups_info.size() != 0 && _input_columns.size() != 0) {
if (num_rows_corrected > 0 && not row_groups_info.empty() && not _input_columns.empty()) {
load_and_decompress_data(row_groups_info, num_rows_corrected);
preprocess_pages(
skip_rows_corrected, num_rows_corrected, uses_custom_row_bounds, _chunk_read_limit);
Expand Down Expand Up @@ -368,7 +368,7 @@ table_with_metadata reader::impl::read_chunk_internal(
auto out_columns = std::vector<std::unique_ptr<column>>{};
out_columns.reserve(_output_buffers.size());

if (!has_next() || _chunk_read_info.size() == 0) {
if (!has_next() || _chunk_read_info.empty()) {
return finalize_output(out_metadata, out_columns, filter);
}

Expand Down
16 changes: 8 additions & 8 deletions cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ parquet::Compression to_parquet_compression(compression_type compression)
*/
size_t column_size(column_view const& column, rmm::cuda_stream_view stream)
{
if (column.size() == 0) { return 0; }
if (column.is_empty()) { return 0; }

if (is_fixed_width(column.type())) {
return size_of(column.type()) * column.size();
Expand Down Expand Up @@ -573,7 +573,7 @@ std::vector<schema_tree_node> construct_schema_tree(
CUDF_EXPECTS(col_meta.num_children() == 2 or col_meta.num_children() == 0,
"Binary column's corresponding metadata should have zero or two children!");
if (col_meta.num_children() > 0) {
CUDF_EXPECTS(col->children[lists_column_view::child_column_index]->children.size() == 0,
CUDF_EXPECTS(col->children[lists_column_view::child_column_index]->children.empty(),
"Binary column must not be nested!");
}

Expand Down Expand Up @@ -859,7 +859,7 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node,

_is_list = (_max_rep_level > 0);

if (cudf_col.size() == 0) { return; }
if (cudf_col.is_empty()) { return; }

if (_is_list) {
// Top level column's offsets are not applied to all children. Get the effective offset and
Expand Down Expand Up @@ -1103,7 +1103,7 @@ build_chunk_dictionaries(hostdevice_2dvector<gpu::EncColumnChunk>& chunks,
std::vector<rmm::device_uvector<size_type>> dict_data;
std::vector<rmm::device_uvector<size_type>> dict_index;

if (h_chunks.size() == 0) { return std::pair(std::move(dict_data), std::move(dict_index)); }
if (h_chunks.empty()) { return std::pair(std::move(dict_data), std::move(dict_index)); }

if (dict_policy == dictionary_policy::NEVER) {
thrust::for_each(
Expand Down Expand Up @@ -2369,11 +2369,11 @@ std::unique_ptr<std::vector<uint8_t>> writer::merge_row_group_metadata(
}
}
// Reader doesn't currently populate column_order, so infer it here
if (md.row_groups.size() != 0) {
if (not md.row_groups.empty()) {
auto const is_valid_stats = [](auto const& stats) {
return stats.max.size() != 0 || stats.min.size() != 0 || stats.null_count != -1 ||
stats.distinct_count != -1 || stats.max_value.size() != 0 ||
stats.min_value.size() != 0;
return not stats.max.empty() || not stats.min.empty() || stats.null_count != -1 ||
stats.distinct_count != -1 || not stats.max_value.empty() ||
not stats.min_value.empty();
};

uint32_t num_columns = static_cast<uint32_t>(md.row_groups[0].columns.size());
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/utilities/column_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ std::unique_ptr<column> inline_column_buffer::make_string_column_impl(rmm::cuda_
// no need for copies, just transfer ownership of the data_buffers to the columns
auto const state = mask_state::UNALLOCATED;
auto str_col =
_string_data.size() == 0
_string_data.is_empty()
? make_empty_column(data_type{type_id::INT8})
: std::make_unique<column>(data_type{type_id::INT8},
string_size(),
Expand Down
1 change: 1 addition & 0 deletions cpp/src/io/utilities/hostdevice_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class hostdevice_vector {
[[nodiscard]] size_t capacity() const noexcept { return d_data.size(); }
[[nodiscard]] size_t size() const noexcept { return current_size; }
[[nodiscard]] size_t size_bytes() const noexcept { return sizeof(T) * size(); }
[[nodiscard]] bool empty() const noexcept { return size() == 0; }

[[nodiscard]] T& operator[](size_t i) { return host_data[i]; }
[[nodiscard]] T const& operator[](size_t i) const { return host_data[i]; }
Expand Down

0 comments on commit f233422

Please sign in to comment.