Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add v2 FieldEntry #1. #17790

Merged
merged 1 commit into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/google/protobuf/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ cc_library(
"extension_set.h",
"extension_set_inl.h",
"generated_enum_util.h",
"generated_message_table.h",
"generated_message_tctable_decl.h",
"generated_message_tctable_impl.h",
"generated_message_util.h",
Expand Down Expand Up @@ -573,6 +574,7 @@ PROTOBUF_HEADERS = [
"generated_enum_reflection.h",
"generated_message_bases.h",
"generated_message_reflection.h",
"generated_message_table_gen.h",
"generated_message_tctable_gen.h",
"map_entry.h",
"map_field.h",
Expand Down Expand Up @@ -604,6 +606,7 @@ cc_library(
"feature_resolver.cc",
"generated_message_bases.cc",
"generated_message_reflection.cc",
"generated_message_table_gen.cc",
"generated_message_tctable_full.cc",
"generated_message_tctable_gen.cc",
"map_field.cc",
Expand Down Expand Up @@ -1468,6 +1471,21 @@ cc_test(
],
)

cc_test(
name = "generated_message_table_gen_test",
srcs = ["generated_message_table_gen_test.cc"],
deps = [
":cc_test_protos",
":port",
":protobuf",
":protobuf_lite",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/log:absl_check",
"@com_google_googletest//:gtest",
"@com_google_googletest//:gtest_main",
],
)

cc_test(
name = "inlined_string_field_unittest",
srcs = ["inlined_string_field_unittest.cc"],
Expand Down
248 changes: 248 additions & 0 deletions src/google/protobuf/generated_message_table.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
#ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__
#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__

#include <cstdint>
#include <limits>

#include "absl/log/absl_check.h"

namespace google {
namespace protobuf {
namespace internal {
namespace v2 {

// Field layout enums.
//
// Structural information about fields is packed into a 8-bit value. The enum
// types below represent bitwise fields, along with their respective widths,
// shifts, and masks. To pack into one byte, some mutually exclusive types share
// bits in [5, 7].
//
// <<Numeric Fields>>
// Bit:
// +---------------+---------------+
// |7 ... 4|3 ... 0|
// +---------------+---------------+
// : . : . : . : . : 3|===========| [3] FieldKind
// : . : . : 5|=======| . : . : . : [2] Cardinality
// : . : 6|===| . : . : . : . : . : [1] NumericKind
// +---------------+---------------+
//
// <<Message Fields>>
// Bit:
// +---------------+---------------+
// |7 ... 4|3 ... 0|
// +---------------+---------------+
// : . : . : . : . : 3|===========| [3] FieldKind
// : . : . : 5|=======| . : . : . : [2] Cardinality
// : 7|=======| . : . : . : . : . : [2] MessageKind
// +---------------+---------------+
//
// <<String Fields>>
// Bit:
// +---------------+---------------+
// |7 ... 4|3 ... 0|
// +---------------+---------------+
// : . : . : . : . : 3|===========| [3] FieldKind
// : . : . : 5|=======| . : . : . : [2] Cardinality
// |===========| . : . : . : . : . : [3] StringKind
// +---------------+---------------+
//

// clang-format off

// FieldKind (3 bits):
// These values broadly represent a wire type and an in-memory storage class.
namespace FieldKind {
constexpr int kShift = 0;
constexpr int kBits = 3;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kFixed8 = 0, // bool
kFixed16, // place holder
kFixed32, // (s|u)?int32, (s)?fixed32, float, enum
kFixed64, // (s|u)?int64, (s)?fixed64, double
kBytes, // bytes
kString, // string
kMessage, // group, message
kMap, // map<...>
};

static_assert(kMap < (1 << kBits), "too many types");
} // namespace FieldKind

// Cardinality (2 bits):
// These values determine how many values a field can have and its presence.
namespace Cardinality {
constexpr int kShift = FieldKind::kShift + FieldKind::kBits;
constexpr int kBits = 2;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kSingular = 0,
kOptional = 1 << kShift,
kRepeated = 2 << kShift,
kOneof = 3 << kShift,
};
} // namespace Cardinality

// NumericKind, MessageKind, StringKind are mutually exclusive and share the
// same bit-space (i.e. the same shift).

// NumericKind (1 bit):
// Indicates whether a numeric is signed.
namespace NumericKind {
constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
constexpr int kBits = 1;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kUnsigned = 0,
kSigned = 1 << kShift,
};
} // namespace NumericKind

// MessageKind (2 bits):
// Indicates if it's LazyField or eager message / group.
namespace MessageKind {
constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
constexpr int kBits = 2;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kEager = 0,
kLazy = 1 << kShift,
kGroup = 2 << kShift,
};
} // namespace MessageKind

// StringKind (3 bits):
// Indicates if it's LazyField or eager message / group.
namespace StringKind {
constexpr int kShift = Cardinality::kShift + Cardinality::kBits;
constexpr int kBits = 3;
constexpr int kMask = ((1 << kBits) - 1) << kShift;

enum Kinds : uint8_t {
kArenaPtr = 0,
kInlined = 1 << kShift,
kView = 2 << kShift,
kCord = 3 << kShift,
kStringPiece = 4 << kShift,
kStringPtr = 5 << kShift,
};
} // namespace StringKind

// Convenience aliases except cardinality (8 bits, with format):
enum FieldType : uint8_t {
// Numeric types:
kBool = 0 | FieldKind::kFixed8 | NumericKind::kUnsigned,

kInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kSInt32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kSFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kSigned,
kUInt32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
kFixed32 = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
kFloat = 0 | FieldKind::kFixed32 | NumericKind::kUnsigned,
kEnum = 0 | FieldKind::kFixed32 | NumericKind::kSigned,

kInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
kSInt64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
kSFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kSigned,
kUInt64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
kFixed64 = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,
kDouble = 0 | FieldKind::kFixed64 | NumericKind::kUnsigned,

// String types:
kBytes = FieldKind::kBytes,
kString = FieldKind::kString,

// Message types:
kMessage = 0 | FieldKind::kMessage | MessageKind::kEager,
kLazyMessage = 0 | FieldKind::kMessage | MessageKind::kLazy,
kGroup = 0 | FieldKind::kMessage | MessageKind::kGroup,

// Map types:
kMap = FieldKind::kMap,
};
// clang-format on

struct FieldEntry {
// Constructors without aux index. (Should be common cases.)
constexpr FieldEntry(uint8_t type, uint8_t hasbit_index, uint16_t offset,
uint16_t number)
: field_type(type),
hasbit_index(hasbit_index),
offset(offset),
field_number(number),
aux_index(kNoAuxIdx) {}

// If any of hasbit_index, offset, field_number is too big to fit, fallback to
// aux entry for all.
constexpr FieldEntry(uint8_t type, uint16_t aux_index)
: field_type(type),
hasbit_index(kHasbitFallbackToAux),
offset(kFallbackToAux),
field_number(kFallbackToAux),
aux_index(aux_index) {}

constexpr bool ShouldLookupAuxEntry() const { return aux_index != kNoAuxIdx; }

uint8_t GetFieldKind() const { return field_type & FieldKind::kMask; }
uint8_t GetCardinality() const { return field_type & Cardinality::kMask; }
uint8_t GetNumericKind() const {
ABSL_DCHECK_LT(GetFieldKind(), FieldKind::kBytes);
return field_type & NumericKind::kMask;
}
uint8_t GetMessageKind() const {
ABSL_DCHECK_EQ(GetFieldKind(), FieldKind::kMessage);
return field_type & MessageKind::kMask;
}
uint8_t GetStringKind() const {
ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes ||
GetFieldKind() == FieldKind::kString);
return field_type & StringKind::kMask;
}

bool IsSigned() const { return GetNumericKind() == NumericKind::kSigned; }
bool IsUTF8() const {
ABSL_DCHECK(GetFieldKind() == FieldKind::kBytes ||
GetFieldKind() == FieldKind::kString);
return GetFieldKind() == FieldKind::kString;
}

bool IsRepeated() const { return GetCardinality() == Cardinality::kRepeated; }

// Field type consists of FieldKind, Cardinality and type-specific Kind.
uint8_t field_type;
// Covers up to 256 fields. Fallback to aux if 0xFF.
uint8_t hasbit_index;
// Covers sizeof(Message) up to 64 KiB. Fallback to aux if 0xFFFF.
uint16_t offset;
// Most field numbers should fit 16 bits. Fallback to aux if 0xFFFF.
uint16_t field_number;
// Only up to 2^16 fallback cases are supported.
uint16_t aux_index;

static constexpr uint16_t kHasbitFallbackToAux = 0xFF;
static constexpr uint16_t kFallbackToAux = 0xFFFF;
static constexpr uint16_t kNoAuxIdx = 0xFFFF;

// These constants are same as the above but compared against values from
// reflection or protoc (hence different types) to determine whether to use
// aux entries.
static constexpr uint32_t kHasbitIdxLimit =
std::numeric_limits<uint8_t>::max();
static constexpr uint32_t kOffsetLimit = std::numeric_limits<uint16_t>::max();
static constexpr int kFieldNumberLimit = std::numeric_limits<uint16_t>::max();
};

static_assert(sizeof(FieldEntry) == sizeof(uint64_t), "");

} // namespace v2
} // namespace internal
} // namespace protobuf
} // namespace google

#endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DECL_H__
100 changes: 100 additions & 0 deletions src/google/protobuf/generated_message_table_gen.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#include "google/protobuf/generated_message_table_gen.h"

#include <cstdint>

#include "absl/log/absl_check.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/generated_message_table.h"
#include "google/protobuf/port.h"

namespace google {
namespace protobuf {
namespace internal {
namespace v2 {

using CppStringType = FieldDescriptor::CppStringType;

namespace {

uint8_t GenerateStringKind(const FieldDescriptor* field, bool is_inlined) {
switch (field->cpp_string_type()) {
// VIEW fields are treated as strings for now.
case CppStringType::kView:
case CppStringType::kString:
return field->is_repeated() ? StringKind::kStringPtr
: is_inlined ? StringKind::kInlined
: StringKind::kArenaPtr;
case CppStringType::kCord:
ABSL_CHECK(!is_inlined);
return StringKind::kCord;
default:
Unreachable();
break;
}
}

} // namespace

uint8_t MakeTypeCardForField(const FieldDescriptor* field, FieldTypeInfo info) {
constexpr uint8_t field_type_to_type_card[] = {
0, // placeholder as type starts from 1.
FieldType::kDouble, // TYPE_DOUBLE
FieldType::kFloat, // TYPE_FLOAT
FieldType::kInt64, // TYPE_INT64
FieldType::kUInt64, // TYPE_UINT64
FieldType::kInt32, // TYPE_INT32
FieldType::kFixed64, // TYPE_FIXED64
FieldType::kFixed32, // TYPE_FIXED32
FieldType::kBool, // TYPE_BOOL
FieldType::kBytes, // TYPE_STRING
FieldType::kGroup, // TYPE_GROUP
FieldType::kMessage, // TYPE_MESSAGE
FieldType::kBytes, // TYPE_BYTES
FieldType::kUInt32, // TYPE_UINT32
FieldType::kEnum, // TYPE_ENUM
FieldType::kSFixed32, // TYPE_SFIXED32
FieldType::kSFixed64, // TYPE_SFIXED64
FieldType::kSInt32, // TYPE_SINT32
FieldType::kSInt64, // TYPE_SINT64
};
static_assert(
sizeof(field_type_to_type_card) == (FieldDescriptor::MAX_TYPE + 1), "");

if (field->is_map()) return FieldType::kMap;

auto field_type = field->type();
uint8_t type_card = field_type_to_type_card[field_type];
// Override previously set type for lazy message and UTF8 strings.
switch (field_type) {
case FieldDescriptor::TYPE_MESSAGE:
if (info.is_lazy) type_card = FieldType::kLazyMessage;
break;
case FieldDescriptor::TYPE_STRING:
if (field->requires_utf8_validation()) type_card = FieldType::kString;
break;
default:
break;
}

// Set cardinality.
if (field->is_repeated()) {
type_card |= Cardinality::kRepeated;
} else if (field->real_containing_oneof()) {
type_card |= Cardinality::kOneof;
} else if (field->has_presence()) {
type_card |= Cardinality::kOptional;
} else {
type_card |= Cardinality::kSingular;
}

// Set StringKind for string fields. Note that numerics (signedness) and
// messages (lazy) are already specified.
return field->cpp_type() != FieldDescriptor::CPPTYPE_STRING
? type_card
: type_card | GenerateStringKind(field, info.is_inlined);
}

} // namespace v2
} // namespace internal
} // namespace protobuf
} // namespace google
Loading
Loading