From ed5e4aa3923279965f733a263d92f4dabf9b434d Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:44:26 -0400 Subject: [PATCH] Fix parquet_field_list read_func lambda capture invalid this pointer (#16440) ## Description Fixes internal parquet_field_list subclass constructors capturing invalid this pointer when passing objects to std::make_tuple. The std::make_tuple usage creates a parameter object that is constructed, moved, and destroyed. The this pointer is captured during constructor call. The move constructor is called which creates its own separate this pointer (all member data is moved/copied appropriately). The original this pointer is invalidated by the following destructor. The lambda that was captured in the constructor no longer contains a valid this value in the final moved object. This PR removes the dependency on the this pointer in the lambda and captures the vector reference instead which is preserved correctly in the object move. The ctor, move, dtor pattern occurs because of how std::make_tuple is implemented by the standard library. Closes https://github.com/rapidsai/cudf/issues/16408 ## Checklist - [x] I am familiar with the [Contributing Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md). - [x] New or existing tests cover these changes. - [x] The documentation is up to date with these changes. --- .../io/parquet/compact_protocol_reader.cpp | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index 192833507b0..e13ed5e85e5 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -137,10 +137,10 @@ class parquet_field_bool : public parquet_field { struct parquet_field_bool_list : public parquet_field_list { parquet_field_bool_list(int f, std::vector& v) : parquet_field_list(f, v) { - auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) { + auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { auto const current_byte = cpr->getb(); assert_bool_field_type(current_byte); - this->val[i] = current_byte == static_cast(FieldType::BOOLEAN_TRUE); + val[i] = current_byte == static_cast(FieldType::BOOLEAN_TRUE); }; bind_read_func(read_value); } @@ -188,8 +188,8 @@ template struct parquet_field_int_list : public parquet_field_list { parquet_field_int_list(int f, std::vector& v) : parquet_field_list(f, v) { - auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) { - this->val[i] = cpr->get_zigzag(); + auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { + val[i] = cpr->get_zigzag(); }; this->bind_read_func(read_value); } @@ -229,11 +229,11 @@ class parquet_field_string : public parquet_field { struct parquet_field_string_list : public parquet_field_list { parquet_field_string_list(int f, std::vector& v) : parquet_field_list(f, v) { - auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) { + auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { auto const l = cpr->get_u32(); CUDF_EXPECTS(l < static_cast(cpr->m_end - cpr->m_cur), "string length mismatch"); - this->val[i].assign(reinterpret_cast(cpr->m_cur), l); + val[i].assign(reinterpret_cast(cpr->m_cur), l); cpr->m_cur += l; }; bind_read_func(read_value); @@ -269,8 +269,8 @@ struct parquet_field_enum_list : public parquet_field_list parquet_field_enum_list(int f, std::vector& v) : parquet_field_list(f, v) { - auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) { - this->val[i] = static_cast(cpr->get_i32()); + auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { + val[i] = static_cast(cpr->get_i32()); }; this->bind_read_func(read_value); } @@ -354,8 +354,8 @@ struct parquet_field_struct_list : public parquet_field_list& v) : parquet_field_list(f, v) { - auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) { - cpr->read(&this->val[i]); + auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { + cpr->read(&val[i]); }; this->bind_read_func(read_value); } @@ -395,7 +395,7 @@ struct parquet_field_binary_list : public parquet_field_list, FieldType::BINARY> { parquet_field_binary_list(int f, std::vector>& v) : parquet_field_list(f, v) { - auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) { + auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { auto const l = cpr->get_u32(); CUDF_EXPECTS(l <= static_cast(cpr->m_end - cpr->m_cur), "binary length mismatch"); @@ -482,9 +482,7 @@ void CompactProtocolReader::skip_struct_field(int t, int depth) skip_struct_field(t, depth + 1); } break; - default: - // printf("unsupported skip for type %d\n", t); - break; + default: break; } }