From d696b00f6e0b19f0505ad95fdcf4469f5d79533d Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Fri, 9 Aug 2024 15:55:05 -0700 Subject: [PATCH] Add v2 FieldEntry #1. PiperOrigin-RevId: 661433949 --- src/google/protobuf/generated_message_table.h | 248 ++++++++++++++++++ .../protobuf/generated_message_table_gen.cc | 103 ++++++++ .../protobuf/generated_message_table_gen.h | 30 +++ .../generated_message_table_gen_test.cc | 183 +++++++++++++ src/google/protobuf/message.h | 5 + 5 files changed, 569 insertions(+) create mode 100644 src/google/protobuf/generated_message_table.h create mode 100644 src/google/protobuf/generated_message_table_gen.cc create mode 100644 src/google/protobuf/generated_message_table_gen.h create mode 100644 src/google/protobuf/generated_message_table_gen_test.cc diff --git a/src/google/protobuf/generated_message_table.h b/src/google/protobuf/generated_message_table.h new file mode 100644 index 0000000000000..f9b2126c03c33 --- /dev/null +++ b/src/google/protobuf/generated_message_table.h @@ -0,0 +1,248 @@ +#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__ +#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__ + +#include +#include + +#include "absl/log/absl_check.h" + +namespace google { +namespace protobuf { +namespace internal { +namespace v2 { + +// Field layout enums. +// +// Structural information about fields is packed into a 8-bit value. The enum +// types below represent bitwise fields, along with their respective widths, +// shifts, and masks. To pack into one byte, some mutually exclusive types share +// bits in [5, 7]. +// +// <> +// Bit: +// +---------------+---------------+ +// |7 ... 4|3 ... 0| +// +---------------+---------------+ +// : . : . : . : . : 3|===========| [3] FieldKind +// : . : . : 5|=======| . : . : . : [2] Cardinality +// : . : 6|===| . : . : . : . : . : [1] NumericKind +// +---------------+---------------+ +// +// <> +// Bit: +// +---------------+---------------+ +// |7 ... 4|3 ... 0| +// +---------------+---------------+ +// : . : . : . : . : 3|===========| [3] FieldKind +// : . : . : 5|=======| . : . : . : [2] Cardinality +// : 7|=======| . : . : . : . : . : [2] MessageKind +// +---------------+---------------+ +// +// <> +// Bit: +// +---------------+---------------+ +// |7 ... 4|3 ... 0| +// +---------------+---------------+ +// : . : . : . : . : 3|===========| [3] FieldKind +// : . : . : 5|=======| . : . : . : [2] Cardinality +// |===========| . : . : . : . : . : [3] StringKind +// +---------------+---------------+ +// + +// clang-format off + +// FieldKind (3 bits): +// These values broadly represent a wire type and an in-memory storage class. +namespace FieldKind { +inline constexpr int kShift = 0; +inline constexpr int kBits = 3; +inline constexpr int kMask = ((1 << kBits) - 1) << kShift; + +enum Kinds : uint8_t { + kFixed8 = 0, // bool + kFixed16, // place holder + kFixed32, // (s|u)?int32, (s)?fixed32, float, enum + kFixed64, // (s|u)?int64, (s)?fixed64, double + kBytes, // bytes + kString, // string + kMessage, // group, message + kMap, // map<...> +}; + +static_assert(kMap < (1 << kBits), "too many types"); +} // namespace FieldKind + +// Cardinality (2 bits): +// These values determine how many values a field can have and its presence. +namespace Cardinality { +inline constexpr int kShift = FieldKind::kShift + FieldKind::kBits; +inline constexpr int kBits = 2; +inline constexpr int kMask = ((1 << kBits) - 1) << kShift; + +enum Kinds : uint8_t { + kSingular = 0, + kOptional = 1 << kShift, + kRepeated = 2 << kShift, + kOneof = 3 << kShift, +}; +} // namespace Cardinality + +// NumericKind, MessageKind, StringKind are mutually exclusive and share the +// same bit-space (i.e. the same shift). + +// NumericKind (1 bit): +// Indicates whether a numeric is signed. +namespace NumericKind { +inline constexpr int kShift = Cardinality::kShift + Cardinality::kBits; +inline constexpr int kBits = 1; +inline constexpr int kMask = ((1 << kBits) - 1) << kShift; + +enum Kinds : uint8_t { + kUnsigned = 0, + kSigned = 1 << kShift, +}; +} // namespace NumericKind + +// MessageKind (2 bits): +// Indicates if it's LazyField or eager message / group. +namespace MessageKind { +inline constexpr int kShift = Cardinality::kShift + Cardinality::kBits; +inline constexpr int kBits = 2; +inline constexpr int kMask = ((1 << kBits) - 1) << kShift; + +enum Kinds : uint8_t { + kEager = 0, + kLazy = 1 << kShift, + kGroup = 2 << kShift, +}; +} // namespace MessageKind + +// StringKind (3 bits): +// Indicates if it's LazyField or eager message / group. +namespace StringKind { +inline constexpr int kShift = Cardinality::kShift + Cardinality::kBits; +inline constexpr int kBits = 3; +inline constexpr int kMask = ((1 << kBits) - 1) << kShift; + +enum Kinds : uint8_t { + kArenaPtr = 0, + kInlined = 1 << kShift, + kView = 2 << kShift, + kCord = 3 << kShift, + kStringPiece = 4 << kShift, + kStringPtr = 5 << kShift, +}; +} // namespace StringKind + +// Convenience aliases except cardinality (8 bits, with format): +enum FieldType : uint8_t { + // Numeric types: + kBool = 0 | FieldKind::kFixed8 | NumericKind::kUnsigned, + + kInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned, + kSInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned, + kSFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned, + kUInt32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned, + kFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned, + kFloat = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned, + kEnum = 0 | FieldKind::kFixed32 | NumericKind::kSigned, + + kInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned, + kSInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned, + kSFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned, + kUInt64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned, + kFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned, + kDouble = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned, + + // String types: + kBytes = FieldKind::kBytes, + kString = FieldKind::kString, + + // Message types: + kMessage = 0 | FieldKind::kMessage | MessageKind::kEager, + kLazyMessage = 0 | FieldKind::kMessage | MessageKind::kLazy, + kGroup = 0 | FieldKind::kMessage | MessageKind::kGroup, + + // Map types: + kMap = FieldKind::kMap, +}; +// clang-format on + +struct FieldEntry { + // Constructors without aux index. (Should be common cases.) + constexpr FieldEntry(uint8_t type, uint8_t hasbit_index, uint16_t offset, + uint16_t number) + : field_type(type), + hasbit_index(hasbit_index), + offset(offset), + field_number(number), + aux_index(kNoAuxIdx) {} + + // If any of hasbit_index, offset, field_number is too big to fit, fallback to + // aux entry for all. + constexpr FieldEntry(uint8_t type, uint16_t aux_index) + : field_type(type), + hasbit_index(kHasbitFallbackToAux), + offset(kFallbackToAux), + field_number(kFallbackToAux), + aux_index(aux_index) {} + + constexpr bool ShouldLookupAuxEntry() const { return aux_index != kNoAuxIdx; } + + uint8_t GetFieldKind() const { return field_type & FieldKind::kMask; } + uint8_t GetCardinality() const { return field_type & Cardinality::kMask; } + uint8_t GetNumericKind() const { + ABSL_DCHECK_LT(GetFieldKind(), FieldKind::kBytes); + return field_type & NumericKind::kMask; + } + uint8_t GetMessageKind() const { + ABSL_DCHECK_EQ(GetFieldKind(), FieldKind::kMessage); + return field_type & MessageKind::kMask; + } + uint8_t GetStringKind() const { + ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes || + GetFieldKind() == FieldKind::kString); + return field_type & StringKind::kMask; + } + + bool IsSigned() const { return GetNumericKind() == NumericKind::kSigned; } + bool IsUTF8() const { + ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes || + GetFieldKind() == FieldKind::kString); + return GetFieldKind() == FieldKind::kString; + } + + bool IsRepeated() const { return GetCardinality() == Cardinality::kRepeated; } + + // Field type consists of FieldKind, Cardinality and type-specific Kind. + uint8_t field_type; + // Covers up to 256 fields. Fallback to aux if 0xFF. + uint8_t hasbit_index; + // Covers sizeof(Message) up to 64 KiB. Fallback to aux if 0xFFFF. + uint16_t offset; + // Most field numbers should fit 16 bits. Fallback to aux if 0xFFFF. + uint16_t field_number; + // Only up to 2^16 fallback cases are supported. + uint16_t aux_index; + + static constexpr uint16_t kHasbitFallbackToAux = 0xFF; + static constexpr uint16_t kFallbackToAux = 0xFFFF; + static constexpr uint16_t kNoAuxIdx = 0xFFFF; + + // These constants are same as the above but compared against values from + // reflection or protoc (hence different types) to determine whether to use + // aux entries. + static constexpr uint32_t kHasbitIdxLimit = + std::numeric_limits::max(); + static constexpr uint32_t kOffsetLimit = std::numeric_limits::max(); + static constexpr int kFieldNumberLimit = std::numeric_limits::max(); +}; + +static_assert(sizeof(FieldEntry) == sizeof(uint64_t), ""); + +} // namespace v2 +} // namespace internal +} // namespace protobuf +} // namespace google + +#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__ diff --git a/src/google/protobuf/generated_message_table_gen.cc b/src/google/protobuf/generated_message_table_gen.cc new file mode 100644 index 0000000000000..71413992cdabe --- /dev/null +++ b/src/google/protobuf/generated_message_table_gen.cc @@ -0,0 +1,103 @@ +#include "google/protobuf/generated_message_table_gen.h" + +#include + +#include "absl/log/absl_check.h" +#include "google/protobuf/descriptor.h" +#include "google/protobuf/generated_message_table.h" +#include "google/protobuf/port.h" + +namespace google { +namespace protobuf { +namespace internal { +namespace v2 { + +using CppStringType = FieldDescriptor::CppStringType; + +namespace { + +uint8_t GenerateStringKind(const FieldDescriptor* field, bool is_inlined) { + switch (field->cpp_string_type()) { + // VIEW fields are treated as strings for now. + case CppStringType::kView: + case CppStringType::kString: + return field->is_repeated() ? StringKind::kStringPtr + : is_inlined ? StringKind::kInlined + : StringKind::kArenaPtr; + case CppStringType::kCord: + ABSL_CHECK(!is_inlined); + return StringKind::kCord; + case CppStringType::kStringPiece: + ABSL_CHECK(!is_inlined); + return StringKind::kStringPiece; + default: + Unreachable(); + break; + } +} + +} // namespace + +uint8_t MakeTypeCardForField(const FieldDescriptor* field, FieldTypeInfo info) { + constexpr uint8_t field_type_to_type_card[] = { + 0, // placeholder as type starts from 1. + FieldType::kDouble, // TYPE_DOUBLE + FieldType::kFloat, // TYPE_FLOAT + FieldType::kInt64, // TYPE_INT64 + FieldType::kUInt64, // TYPE_UINT64 + FieldType::kInt32, // TYPE_INT32 + FieldType::kFixed64, // TYPE_FIXED64 + FieldType::kFixed32, // TYPE_FIXED32 + FieldType::kBool, // TYPE_BOOL + FieldType::kBytes, // TYPE_STRING + FieldType::kGroup, // TYPE_GROUP + FieldType::kMessage, // TYPE_MESSAGE + FieldType::kBytes, // TYPE_BYTES + FieldType::kUInt32, // TYPE_UINT32 + FieldType::kEnum, // TYPE_ENUM + FieldType::kSFixed32, // TYPE_SFIXED32 + FieldType::kSFixed64, // TYPE_SFIXED64 + FieldType::kSInt32, // TYPE_SINT32 + FieldType::kSInt64, // TYPE_SINT64 + }; + static_assert( + sizeof(field_type_to_type_card) == (FieldDescriptor::MAX_TYPE + 1), ""); + + if (field->is_map()) return FieldType::kMap; + + auto field_type = field->type(); + uint8_t type_card = field_type_to_type_card[field_type]; + // Override previously set type for lazy message and UTF8 strings. + switch (field_type) { + case FieldDescriptor::TYPE_MESSAGE: + if (info.is_lazy) type_card = FieldType::kLazyMessage; + break; + case FieldDescriptor::TYPE_STRING: + if (field->requires_utf8_validation()) type_card = FieldType::kString; + break; + default: + break; + } + + // Set cardinality. + if (field->is_repeated()) { + type_card |= Cardinality::kRepeated; + } else if (field->real_containing_oneof()) { + type_card |= Cardinality::kOneof; + } else if (field->has_presence()) { + type_card |= Cardinality::kOptional; + } else { + type_card |= Cardinality::kSingular; + } + + // Set StringKind for string fields. Note that numerics (signedness) and + // messages (lazy) are already specified. + return field->cpp_type() != FieldDescriptor::CPPTYPE_STRING + ? type_card + : type_card | GenerateStringKind(field, info.is_inlined); +} + +} // namespace v2 +} // namespace internal +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/generated_message_table_gen.h b/src/google/protobuf/generated_message_table_gen.h new file mode 100644 index 0000000000000..cbb58d7c662fe --- /dev/null +++ b/src/google/protobuf/generated_message_table_gen.h @@ -0,0 +1,30 @@ +#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_GEN_H__ +#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_GEN_H__ + +#include + +#include "google/protobuf/descriptor.h" + +// This file contains types and APIs to generate tables for v2 wireformat. + +namespace google { +namespace protobuf { +namespace internal { +namespace v2 { + +struct FieldTypeInfo { + bool is_inlined; + bool is_lazy; +}; + +// Returns 8 bit type card for a given field. Type cards contains information +// about field types and cardinality that are needed to iterate fields per +// message. +uint8_t MakeTypeCardForField(const FieldDescriptor* field, FieldTypeInfo info); + +} // namespace v2 +} // namespace internal +} // namespace protobuf +} // namespace google + +#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_GEN_H__ diff --git a/src/google/protobuf/generated_message_table_gen_test.cc b/src/google/protobuf/generated_message_table_gen_test.cc new file mode 100644 index 0000000000000..3ce80f24add60 --- /dev/null +++ b/src/google/protobuf/generated_message_table_gen_test.cc @@ -0,0 +1,183 @@ +#include "google/protobuf/generated_message_table_gen.h" + +#include +#include +#include + +#include +#include "absl/algorithm/container.h" +#include "absl/log/absl_check.h" +#include "google/protobuf/generated_message_table.h" +#include "google/protobuf/port.h" +#include "google/protobuf/unittest.pb.h" +#include "google/protobuf/unittest_mset.pb.h" + +namespace google { +namespace protobuf { +namespace internal { +namespace v2 { + +class V2TableGenTester { + public: + static uint32_t HasBitIndex(const Reflection* reflection, + const FieldDescriptor* field) { + return reflection->schema_.HasBitIndex(field); + } + static uint32_t GetFieldOffset(const Reflection* reflection, + const FieldDescriptor* field) { + return reflection->schema_.GetFieldOffset(field); + } + static bool IsLazyField(const Reflection* reflection, + const FieldDescriptor* field) { + ABSL_CHECK(!field->is_extension()); + return reflection->IsLazyField(field); + } + static bool IsInlined(const Reflection* reflection, + const FieldDescriptor* field) { + return reflection->schema_.IsFieldInlined(field); + } +}; + +namespace { + +using ::protobuf_unittest::TestAllTypes; +using ::protobuf_unittest::TestMessageSetExtension1; + +// Creates FieldEntry that won't require AuxEntry, which requires all fields to +// fit into smaller (but common) limit. Specifically, hasbit_index for 1B, +// offset and field number for 2B. +FieldEntry CreateFieldEntryWithoutAux(const Reflection* reflection, + const Message* message, + const FieldDescriptor* field) { + ABSL_CHECK_EQ(reflection, message->GetReflection()); + + uint32_t hasbit_index = V2TableGenTester::HasBitIndex(reflection, field); + uint32_t offset = V2TableGenTester::GetFieldOffset(reflection, field); + + // CHECK if "field" cannot fit into FieldEntry alone and require AuxEntry. + static constexpr uint32_t kNoHasbit = static_cast(-1); + ABSL_CHECK(hasbit_index == kNoHasbit || + hasbit_index < FieldEntry::kHasbitIdxLimit); + ABSL_CHECK_LT(offset, FieldEntry::kOffsetLimit); + ABSL_CHECK_LT(field->number(), FieldEntry::kFieldNumberLimit); + + bool is_lazy = V2TableGenTester::IsLazyField(reflection, field); + bool is_inlined = V2TableGenTester::IsInlined(reflection, field); + + return FieldEntry(MakeTypeCardForField( + field, {.is_inlined = is_inlined, .is_lazy = is_lazy}), + hasbit_index, offset, field->number()); +} + +class TableGenTest : public testing::TestWithParam { + public: + TableGenTest() + : message_(GetParam()), reflection_(message_->GetReflection()) {} + + protected: + const Message* message_; + const Reflection* reflection_; +}; + +TEST_P(TableGenTest, ValidateTypeCardForField) { + const Descriptor* desc = message_->GetDescriptor(); + for (int i = 0, count = desc->field_count(); i < count; ++i) { + const FieldDescriptor* field = desc->field(i); + auto field_entry = CreateFieldEntryWithoutAux(reflection_, message_, field); + + // Validate cardinality. + EXPECT_EQ(field->is_repeated(), field_entry.IsRepeated()); + uint8_t cardinality = field_entry.GetCardinality(); + switch (cardinality) { + case Cardinality::kRepeated: + EXPECT_TRUE(field->is_repeated()); + break; + case Cardinality::kOptional: + EXPECT_FALSE(field->is_repeated()); + EXPECT_TRUE(field->has_presence()); + break; + case Cardinality::kSingular: + EXPECT_FALSE(field->is_repeated()); + EXPECT_FALSE(field->has_presence()); + break; + case Cardinality::kOneof: + EXPECT_FALSE(field->is_repeated()); + EXPECT_TRUE(field->real_containing_oneof()); + break; + default: + Unreachable(); + break; + } + EXPECT_EQ(field->is_repeated(), field_entry.IsRepeated()); + + // Validate field types, etc. + switch (field->cpp_type()) { + case FieldDescriptor::CPPTYPE_ENUM: + case FieldDescriptor::CPPTYPE_INT32: + EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed32); + EXPECT_TRUE(field_entry.IsSigned()); + break; + case FieldDescriptor::CPPTYPE_INT64: + EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed64); + EXPECT_TRUE(field_entry.IsSigned()); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + case FieldDescriptor::CPPTYPE_UINT32: + EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed32); + EXPECT_FALSE(field_entry.IsSigned()); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + case FieldDescriptor::CPPTYPE_UINT64: + EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed64); + EXPECT_FALSE(field_entry.IsSigned()); + break; + case FieldDescriptor::CPPTYPE_BOOL: + EXPECT_EQ(field_entry.GetFieldKind(), FieldKind::kFixed8); + EXPECT_FALSE(field_entry.IsSigned()); + break; + case FieldDescriptor::CPPTYPE_STRING: + EXPECT_EQ(field->requires_utf8_validation(), field_entry.IsUTF8()) + << field->full_name(); + + switch (field->cpp_string_type()) { + case FieldDescriptor::CppStringType::kView: + EXPECT_EQ(field_entry.GetStringKind(), StringKind::kView); + break; + case FieldDescriptor::CppStringType::kCord: + EXPECT_EQ(field_entry.GetStringKind(), StringKind::kCord); + break; + case FieldDescriptor::CppStringType::kString: + if (field->is_repeated()) { + EXPECT_EQ(field_entry.GetStringKind(), StringKind::kStringPtr); + } else if (V2TableGenTester::IsInlined(reflection_, field)) { + EXPECT_EQ(field_entry.GetStringKind(), StringKind::kInlined); + } else { + EXPECT_EQ(field_entry.GetStringKind(), StringKind::kArenaPtr); + } + break; + } + break; + case FieldDescriptor::CPPTYPE_MESSAGE: + break; + default: + Unreachable(); + break; + } + } +} + +INSTANTIATE_TEST_SUITE_P( + V2, TableGenTest, + testing::Values(&TestAllTypes::default_instance(), + &TestMessageSetExtension1::default_instance()), + [](const testing::TestParamInfo& info) { + std::string name = info.param->GetTypeName(); + absl::c_replace_if(name, [](char c) { return !std::isalnum(c); }, '_'); + return name; + }); + +} // namespace +} // namespace v2 +} // namespace internal +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/message.h b/src/google/protobuf/message.h index 85bb049a30f72..22a5f209340f4 100644 --- a/src/google/protobuf/message.h +++ b/src/google/protobuf/message.h @@ -154,6 +154,9 @@ namespace field_layout { enum TransformValidation : uint16_t; } // namespace field_layout +namespace v2 { +class V2TableGenTester; +} // namespace v2 } // namespace internal class UnknownFieldSet; // unknown_field_set.h namespace io { @@ -1133,6 +1136,8 @@ class PROTOBUF_EXPORT Reflection final { bool is_string) const; friend class MapReflectionTester; + friend class internal::v2::V2TableGenTester; + // Returns true if key is in map. Returns false if key is not in map field. bool ContainsMapKey(const Message& message, const FieldDescriptor* field, const MapKey& key) const;