Skip to content

Commit

Permalink
Fix parquet_field_list read_func lambda capture invalid this pointer (r…
Browse files Browse the repository at this point in the history
…apidsai#16440)

## Description
Fixes internal parquet_field_list subclass constructors capturing
invalid this pointer when passing objects to std::make_tuple. The
std::make_tuple usage creates a parameter object that is constructed,
moved, and destroyed. The this pointer is captured during constructor
call. The move constructor is called which creates its own separate this
pointer (all member data is moved/copied appropriately). The original
this pointer is invalidated by the following destructor. The lambda that
was captured in the constructor no longer contains a valid this value in
the final moved object.

This PR removes the dependency on the this pointer in the lambda and
captures the vector reference instead which is preserved correctly in
the object move. The ctor, move, dtor pattern occurs because of how
std::make_tuple is implemented by the standard library.

Closes rapidsai#16408

## Checklist
- [x] I am familiar with the [Contributing
Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [x] New or existing tests cover these changes.
- [x] The documentation is up to date with these changes.
  • Loading branch information
davidwendt authored Jul 31, 2024
1 parent 9f5e4a3 commit ed5e4aa
Showing 1 changed file with 12 additions and 14 deletions.
26 changes: 12 additions & 14 deletions cpp/src/io/parquet/compact_protocol_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,10 @@ class parquet_field_bool : public parquet_field {
struct parquet_field_bool_list : public parquet_field_list<bool, FieldType::BOOLEAN_TRUE> {
parquet_field_bool_list(int f, std::vector<bool>& v) : parquet_field_list(f, v)
{
auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
auto const current_byte = cpr->getb();
assert_bool_field_type(current_byte);
this->val[i] = current_byte == static_cast<int>(FieldType::BOOLEAN_TRUE);
val[i] = current_byte == static_cast<int>(FieldType::BOOLEAN_TRUE);
};
bind_read_func(read_value);
}
Expand Down Expand Up @@ -188,8 +188,8 @@ template <typename T, FieldType EXPECTED_TYPE>
struct parquet_field_int_list : public parquet_field_list<T, EXPECTED_TYPE> {
parquet_field_int_list(int f, std::vector<T>& v) : parquet_field_list<T, EXPECTED_TYPE>(f, v)
{
auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
this->val[i] = cpr->get_zigzag<T>();
auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
val[i] = cpr->get_zigzag<T>();
};
this->bind_read_func(read_value);
}
Expand Down Expand Up @@ -229,11 +229,11 @@ class parquet_field_string : public parquet_field {
struct parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
parquet_field_string_list(int f, std::vector<std::string>& v) : parquet_field_list(f, v)
{
auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
auto const l = cpr->get_u32();
CUDF_EXPECTS(l < static_cast<size_t>(cpr->m_end - cpr->m_cur), "string length mismatch");

this->val[i].assign(reinterpret_cast<char const*>(cpr->m_cur), l);
val[i].assign(reinterpret_cast<char const*>(cpr->m_cur), l);
cpr->m_cur += l;
};
bind_read_func(read_value);
Expand Down Expand Up @@ -269,8 +269,8 @@ struct parquet_field_enum_list : public parquet_field_list<Enum, FieldType::I32>
parquet_field_enum_list(int f, std::vector<Enum>& v)
: parquet_field_list<Enum, FieldType::I32>(f, v)
{
auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
this->val[i] = static_cast<Enum>(cpr->get_i32());
auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
val[i] = static_cast<Enum>(cpr->get_i32());
};
this->bind_read_func(read_value);
}
Expand Down Expand Up @@ -354,8 +354,8 @@ struct parquet_field_struct_list : public parquet_field_list<T, FieldType::STRUC
parquet_field_struct_list(int f, std::vector<T>& v)
: parquet_field_list<T, FieldType::STRUCT>(f, v)
{
auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
cpr->read(&this->val[i]);
auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
cpr->read(&val[i]);
};
this->bind_read_func(read_value);
}
Expand Down Expand Up @@ -395,7 +395,7 @@ struct parquet_field_binary_list
: public parquet_field_list<std::vector<uint8_t>, FieldType::BINARY> {
parquet_field_binary_list(int f, std::vector<std::vector<uint8_t>>& v) : parquet_field_list(f, v)
{
auto const read_value = [this](uint32_t i, CompactProtocolReader* cpr) {
auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
auto const l = cpr->get_u32();
CUDF_EXPECTS(l <= static_cast<size_t>(cpr->m_end - cpr->m_cur), "binary length mismatch");

Expand Down Expand Up @@ -482,9 +482,7 @@ void CompactProtocolReader::skip_struct_field(int t, int depth)
skip_struct_field(t, depth + 1);
}
break;
default:
// printf("unsupported skip for type %d\n", t);
break;
default: break;
}
}

Expand Down

0 comments on commit ed5e4aa

Please sign in to comment.