Skip to content

Commit

Permalink
Add v2 FieldEntry #1.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 661433949
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Aug 16, 2024
1 parent 546c8e0 commit e9b87ca
Show file tree
Hide file tree
Showing 6 changed files with 583 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/google/protobuf/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ cc_library(
"extension_set.h",
"extension_set_inl.h",
"generated_enum_util.h",
"generated_message_table.h",
"generated_message_tctable_decl.h",
"generated_message_tctable_impl.h",
"generated_message_util.h",
Expand Down Expand Up @@ -573,6 +574,7 @@ PROTOBUF_HEADERS = [
"generated_enum_reflection.h",
"generated_message_bases.h",
"generated_message_reflection.h",
"generated_message_table_gen.h",
"generated_message_tctable_gen.h",
"map_entry.h",
"map_field.h",
Expand Down Expand Up @@ -604,6 +606,7 @@ cc_library(
"feature_resolver.cc",
"generated_message_bases.cc",
"generated_message_reflection.cc",
"generated_message_table_gen.cc",
"generated_message_tctable_full.cc",
"generated_message_tctable_gen.cc",
"map_field.cc",
Expand Down Expand Up @@ -1468,6 +1471,20 @@ cc_test(
],
)

cc_test(
name = "generated_message_table_gen_test",
srcs = ["generated_message_table_gen_test.cc"],
deps = [
":cc_test_protos",
":port",
":protobuf",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/log:absl_check",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
],
)

cc_test(
name = "inlined_string_field_unittest",
srcs = ["inlined_string_field_unittest.cc"],
Expand Down
248 changes: 248 additions & 0 deletions src/google/protobuf/generated_message_table.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__
#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__

#include <cstdint>
#include <limits>

#include "absl/log/absl_check.h"

namespace google {
namespace protobuf {
namespace internal {
namespace v2 {

// Field layout enums.
//
// Structural information about fields is packed into a 8-bit value. The enum
// types below represent bitwise fields, along with their respective widths,
// shifts, and masks. To pack into one byte, some mutually exclusive types share
// bits in [5, 7].
//
// <<Numeric Fields>>
// Bit:
// +---------------+---------------+
// |7 ... 4|3 ... 0|
// +---------------+---------------+
// : . : . : . : . : 3|===========| [3] FieldKind
// : . : . : 5|=======| . : . : . : [2] Cardinality
// : . : 6|===| . : . : . : . : . : [1] NumericKind
// +---------------+---------------+
//
// <<Message Fields>>
// Bit:
// +---------------+---------------+
// |7 ... 4|3 ... 0|
// +---------------+---------------+
// : . : . : . : . : 3|===========| [3] FieldKind
// : . : . : 5|=======| . : . : . : [2] Cardinality
// : 7|=======| . : . : . : . : . : [2] MessageKind
// +---------------+---------------+
//
// <<String Fields>>
// Bit:
// +---------------+---------------+
// |7 ... 4|3 ... 0|
// +---------------+---------------+
// : . : . : . : . : 3|===========| [3] FieldKind
// : . : . : 5|=======| . : . : . : [2] Cardinality
// |===========| . : . : . : . : . : [3] StringKind
// +---------------+---------------+
//

// clang-format off

// FieldKind (3 bits):
// These values broadly represent a wire type and an in-memory storage class.
namespace FieldKind {
constexpr int kShift = 0;
constexpr int kBits = 3;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kFixed8 = 0, // bool
kFixed16, // place holder
kFixed32, // (s|u)?int32, (s)?fixed32, float, enum
kFixed64, // (s|u)?int64, (s)?fixed64, double
kBytes, // bytes
kString, // string
kMessage, // group, message
kMap, // map<...>
};

static_assert(kMap < (1 << kBits), "too many types");
} // namespace FieldKind

// Cardinality (2 bits):
// These values determine how many values a field can have and its presence.
namespace Cardinality {
constexpr int kShift = FieldKind::kShift + FieldKind::kBits;
constexpr int kBits = 2;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kSingular = 0,
kOptional = 1 << kShift,
kRepeated = 2 << kShift,
kOneof = 3 << kShift,
};
} // namespace Cardinality

// NumericKind, MessageKind, StringKind are mutually exclusive and share the
// same bit-space (i.e. the same shift).

// NumericKind (1 bit):
// Indicates whether a numeric is signed.
namespace NumericKind {
constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
constexpr int kBits = 1;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kUnsigned = 0,
kSigned = 1 << kShift,
};
} // namespace NumericKind

// MessageKind (2 bits):
// Indicates if it's LazyField or eager message / group.
namespace MessageKind {
constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
constexpr int kBits = 2;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kEager = 0,
kLazy = 1 << kShift,
kGroup = 2 << kShift,
};
} // namespace MessageKind

// StringKind (3 bits):
// Indicates if it's LazyField or eager message / group.
namespace StringKind {
constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
constexpr int kBits = 3;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kArenaPtr = 0,
kInlined = 1 << kShift,
kView = 2 << kShift,
kCord = 3 << kShift,
kStringPiece = 4 << kShift,
kStringPtr = 5 << kShift,
};
} // namespace StringKind

// Convenience aliases except cardinality (8 bits, with format):
enum FieldType : uint8_t {
// Numeric types:
kBool = 0 | FieldKind::kFixed8 | NumericKind::kUnsigned,

kInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kSInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kSFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kUInt32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
kFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
kFloat = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
kEnum = 0 | FieldKind::kFixed32 | NumericKind::kSigned,

kInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
kSInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
kSFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
kUInt64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
kFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
kDouble = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,

// String types:
kBytes = FieldKind::kBytes,
kString = FieldKind::kString,

// Message types:
kMessage = 0 | FieldKind::kMessage | MessageKind::kEager,
kLazyMessage = 0 | FieldKind::kMessage | MessageKind::kLazy,
kGroup = 0 | FieldKind::kMessage | MessageKind::kGroup,

// Map types:
kMap = FieldKind::kMap,
};
// clang-format on

struct FieldEntry {
// Constructors without aux index. (Should be common cases.)
constexpr FieldEntry(uint8_t type, uint8_t hasbit_index, uint16_t offset,
uint16_t number)
: field_type(type),
hasbit_index(hasbit_index),
offset(offset),
field_number(number),
aux_index(kNoAuxIdx) {}

// If any of hasbit_index, offset, field_number is too big to fit, fallback to
// aux entry for all.
constexpr FieldEntry(uint8_t type, uint16_t aux_index)
: field_type(type),
hasbit_index(kHasbitFallbackToAux),
offset(kFallbackToAux),
field_number(kFallbackToAux),
aux_index(aux_index) {}

constexpr bool ShouldLookupAuxEntry() const { return aux_index != kNoAuxIdx; }

uint8_t GetFieldKind() const { return field_type & FieldKind::kMask; }
uint8_t GetCardinality() const { return field_type & Cardinality::kMask; }
uint8_t GetNumericKind() const {
ABSL_DCHECK_LT(GetFieldKind(), FieldKind::kBytes);
return field_type & NumericKind::kMask;
}
uint8_t GetMessageKind() const {
ABSL_DCHECK_EQ(GetFieldKind(), FieldKind::kMessage);
return field_type & MessageKind::kMask;
}
uint8_t GetStringKind() const {
ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes ||
GetFieldKind() == FieldKind::kString);
return field_type & StringKind::kMask;
}

bool IsSigned() const { return GetNumericKind() == NumericKind::kSigned; }
bool IsUTF8() const {
ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes ||
GetFieldKind() == FieldKind::kString);
return GetFieldKind() == FieldKind::kString;
}

bool IsRepeated() const { return GetCardinality() == Cardinality::kRepeated; }

// Field type consists of FieldKind, Cardinality and type-specific Kind.
uint8_t field_type;
// Covers up to 256 fields. Fallback to aux if 0xFF.
uint8_t hasbit_index;
// Covers sizeof(Message) up to 64 KiB. Fallback to aux if 0xFFFF.
uint16_t offset;
// Most field numbers should fit 16 bits. Fallback to aux if 0xFFFF.
uint16_t field_number;
// Only up to 2^16 fallback cases are supported.
uint16_t aux_index;

static constexpr uint16_t kHasbitFallbackToAux = 0xFF;
static constexpr uint16_t kFallbackToAux = 0xFFFF;
static constexpr uint16_t kNoAuxIdx = 0xFFFF;

// These constants are same as the above but compared against values from
// reflection or protoc (hence different types) to determine whether to use
// aux entries.
static constexpr uint32_t kHasbitIdxLimit =
std::numeric_limits<uint8_t>::max();
static constexpr uint32_t kOffsetLimit = std::numeric_limits<uint16_t>::max();
static constexpr int kFieldNumberLimit = std::numeric_limits<uint16_t>::max();
};

static_assert(sizeof(FieldEntry) == sizeof(uint64_t), "");

} // namespace v2
} // namespace internal
} // namespace protobuf
} // namespace google

#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__
100 changes: 100 additions & 0 deletions src/google/protobuf/generated_message_table_gen.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#include "google/protobuf/generated_message_table_gen.h"

#include <cstdint>

#include "absl/log/absl_check.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/generated_message_table.h"
#include "google/protobuf/port.h"

namespace google {
namespace protobuf {
namespace internal {
namespace v2 {

using CppStringType = FieldDescriptor::CppStringType;

namespace {

uint8_t GenerateStringKind(const FieldDescriptor* field, bool is_inlined) {
switch (field->cpp_string_type()) {
// VIEW fields are treated as strings for now.
case CppStringType::kView:
case CppStringType::kString:
return field->is_repeated() ? StringKind::kStringPtr
: is_inlined ? StringKind::kInlined
: StringKind::kArenaPtr;
case CppStringType::kCord:
ABSL_CHECK(!is_inlined);
return StringKind::kCord;
default:
Unreachable();
break;
}
}

} // namespace

uint8_t MakeTypeCardForField(const FieldDescriptor* field, FieldTypeInfo info) {
constexpr uint8_t field_type_to_type_card[] = {
0, // placeholder as type starts from 1.
FieldType::kDouble, // TYPE_DOUBLE
FieldType::kFloat, // TYPE_FLOAT
FieldType::kInt64, // TYPE_INT64
FieldType::kUInt64, // TYPE_UINT64
FieldType::kInt32, // TYPE_INT32
FieldType::kFixed64, // TYPE_FIXED64
FieldType::kFixed32, // TYPE_FIXED32
FieldType::kBool, // TYPE_BOOL
FieldType::kBytes, // TYPE_STRING
FieldType::kGroup, // TYPE_GROUP
FieldType::kMessage, // TYPE_MESSAGE
FieldType::kBytes, // TYPE_BYTES
FieldType::kUInt32, // TYPE_UINT32
FieldType::kEnum, // TYPE_ENUM
FieldType::kSFixed32, // TYPE_SFIXED32
FieldType::kSFixed64, // TYPE_SFIXED64
FieldType::kSInt32, // TYPE_SINT32
FieldType::kSInt64, // TYPE_SINT64
};
static_assert(
sizeof(field_type_to_type_card) == (FieldDescriptor::MAX_TYPE + 1), "");

if (field->is_map()) return FieldType::kMap;

auto field_type = field->type();
uint8_t type_card = field_type_to_type_card[field_type];
// Override previously set type for lazy message and UTF8 strings.
switch (field_type) {
case FieldDescriptor::TYPE_MESSAGE:
if (info.is_lazy) type_card = FieldType::kLazyMessage;
break;
case FieldDescriptor::TYPE_STRING:
if (field->requires_utf8_validation()) type_card = FieldType::kString;
break;
default:
break;
}

// Set cardinality.
if (field->is_repeated()) {
type_card |= Cardinality::kRepeated;
} else if (field->real_containing_oneof()) {
type_card |= Cardinality::kOneof;
} else if (field->has_presence()) {
type_card |= Cardinality::kOptional;
} else {
type_card |= Cardinality::kSingular;
}

// Set StringKind for string fields. Note that numerics (signedness) and
// messages (lazy) are already specified.
return field->cpp_type() != FieldDescriptor::CPPTYPE_STRING
? type_card
: type_card | GenerateStringKind(field, info.is_inlined);
}

} // namespace v2
} // namespace internal
} // namespace protobuf
} // namespace google
Loading

0 comments on commit e9b87ca

Please sign in to comment.