Skip to content

Commit

Permalink
#3365: [Colocation] Use 4 byte PG table ID as dockey prefix
Browse files Browse the repository at this point in the history
Summary:
Currently, colocated system tables use 16 byte cotable ID as doc key prefix. For colocated user tables, we want to optimize the storage space and use 4 byte PG table ID instead.
This diff includes:
1) User colocated tables will use 4 byte PG table ID. This uses a new ValueTypeChar for PG table OIDs.
2) System tables will continue to use 16 byte cotable ID for backward compatibility.

Test Plan:
Jenkins
Added pgtable ID tests to doc_key-test.

Reviewers: mikhail, jason

Reviewed By: jason

Subscribers: yql, bogdan

Differential Revision: https://phabricator.dev.yugabyte.com/D7817
  • Loading branch information
ndeodhar committed Jan 29, 2020
1 parent 44f2aee commit f4d4ea7
Show file tree
Hide file tree
Showing 19 changed files with 352 additions and 77 deletions.
28 changes: 22 additions & 6 deletions src/yb/common/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,10 @@ void Schema::CopyFrom(const Schema& other) {
has_statics_ = other.has_statics_;
table_properties_ = other.table_properties_;
cotable_id_ = other.cotable_id_;
pgtable_id_ = other.pgtable_id_;

// Schema cannot have both, cotable ID and pgtable ID.
DCHECK(cotable_id_.IsNil() || pgtable_id_ == 0);
}

void Schema::swap(Schema& other) {
Expand All @@ -242,18 +246,24 @@ void Schema::swap(Schema& other) {
std::swap(has_statics_, other.has_statics_);
std::swap(table_properties_, other.table_properties_);
std::swap(cotable_id_, other.cotable_id_);
std::swap(pgtable_id_, other.pgtable_id_);

// Schema cannot have both, cotable ID or pgtable ID.
DCHECK(cotable_id_.IsNil() || pgtable_id_ == 0);
}

Status Schema::Reset(const vector<ColumnSchema>& cols,
const vector<ColumnId>& ids,
int key_columns,
const TableProperties& table_properties,
const Uuid& cotable_id) {
const Uuid& cotable_id,
const PgTableOid pgtable_id) {
cols_ = cols;
num_key_columns_ = key_columns;
num_hash_key_columns_ = 0;
table_properties_ = table_properties;
cotable_id_ = cotable_id;
pgtable_id_ = pgtable_id;

// Determine whether any column is nullable or static, and count number of hash columns.
has_nullables_ = false;
Expand Down Expand Up @@ -285,6 +295,11 @@ Status Schema::Reset(const vector<ColumnSchema>& cols,
"The number of ids does not match with the number of columns");
}

if (PREDICT_FALSE(!cotable_id.IsNil() && pgtable_id > 0)) {
return STATUS(InvalidArgument,
"Bad schema", "Cannot have both cotable ID and pgtable ID");
}

// Verify that the key columns are not nullable nor static
for (int i = 0; i < key_columns; ++i) {
if (PREDICT_FALSE(cols_[i].is_nullable())) {
Expand Down Expand Up @@ -359,7 +374,7 @@ Status Schema::CreateProjectionByNames(const std::vector<GStringPiece>& col_name
}
cols.push_back(column(idx));
}
return out->Reset(cols, ids, num_key_columns, TableProperties(), cotable_id_);
return out->Reset(cols, ids, num_key_columns, TableProperties(), cotable_id_, pgtable_id_);
}

Status Schema::CreateProjectionByIdsIgnoreMissing(const std::vector<ColumnId>& col_ids,
Expand All @@ -374,7 +389,7 @@ Status Schema::CreateProjectionByIdsIgnoreMissing(const std::vector<ColumnId>& c
cols.push_back(column(idx));
filtered_col_ids.push_back(id);
}
return out->Reset(cols, filtered_col_ids, 0, TableProperties(), cotable_id_);
return out->Reset(cols, filtered_col_ids, 0, TableProperties(), cotable_id_, pgtable_id_);
}

Schema Schema::CopyWithColumnIds() const {
Expand All @@ -383,12 +398,12 @@ Schema Schema::CopyWithColumnIds() const {
for (int32_t i = 0; i < num_columns(); i++) {
ids.push_back(ColumnId(kFirstColumnId + i));
}
return Schema(cols_, ids, num_key_columns_, table_properties_, cotable_id_);
return Schema(cols_, ids, num_key_columns_, table_properties_, cotable_id_, pgtable_id_);
}

Schema Schema::CopyWithoutColumnIds() const {
CHECK(has_column_ids());
return Schema(cols_, num_key_columns_, table_properties_, cotable_id_);
return Schema(cols_, num_key_columns_, table_properties_, cotable_id_, pgtable_id_);
}

Status Schema::VerifyProjectionCompatibility(const Schema& projection) const {
Expand Down Expand Up @@ -462,7 +477,8 @@ string Schema::ToString() const {
JoinStrings(col_strs, ",\n\t"),
"\n]\nproperties: ",
tablet_properties_pb.ShortDebugString(),
cotable_id_.IsNil() ? "" : ("\ncotable_id: " + cotable_id_.ToString()));
cotable_id_.IsNil() ? "" : ("\ncotable_id: " + cotable_id_.ToString()),
pgtable_id_ == 0 ? "" : ("\npgtable_id: " + std::to_string(pgtable_id_)));
}

Status Schema::DecodeRowKey(Slice encoded_key,
Expand Down
51 changes: 43 additions & 8 deletions src/yb/common/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,8 @@ class TableProperties {
bool is_ysql_catalog_table_ = false;
};

typedef uint32_t PgTableOid;

// The schema for a set of rows.
//
// A Schema is simply a set of columns, along with information about
Expand All @@ -507,7 +509,8 @@ class Schema {
NameToIndexMap::key_equal(),
NameToIndexMapAllocator(&name_to_index_bytes_)),
has_nullables_(false),
cotable_id_(boost::uuids::nil_uuid()) {
cotable_id_(boost::uuids::nil_uuid()),
pgtable_id_(0) {
}

Schema(const Schema& other);
Expand All @@ -526,14 +529,15 @@ class Schema {
Schema(const vector<ColumnSchema>& cols,
int key_columns,
const TableProperties& table_properties = TableProperties(),
const Uuid& cotable_id = Uuid(boost::uuids::nil_uuid()))
const Uuid& cotable_id = Uuid(boost::uuids::nil_uuid()),
const PgTableOid pgtable_id = 0)
: name_to_index_bytes_(0),
// TODO: C++11 provides a single-arg constructor
name_to_index_(10,
NameToIndexMap::hasher(),
NameToIndexMap::key_equal(),
NameToIndexMapAllocator(&name_to_index_bytes_)) {
CHECK_OK(Reset(cols, key_columns, table_properties, cotable_id));
CHECK_OK(Reset(cols, key_columns, table_properties, cotable_id, pgtable_id));
}

// Construct a schema with the given information.
Expand All @@ -546,24 +550,26 @@ class Schema {
const vector<ColumnId>& ids,
int key_columns,
const TableProperties& table_properties = TableProperties(),
const Uuid& cotable_id = Uuid(boost::uuids::nil_uuid()))
const Uuid& cotable_id = Uuid(boost::uuids::nil_uuid()),
const PgTableOid pgtable_id = 0)
: name_to_index_bytes_(0),
// TODO: C++11 provides a single-arg constructor
name_to_index_(10,
NameToIndexMap::hasher(),
NameToIndexMap::key_equal(),
NameToIndexMapAllocator(&name_to_index_bytes_)) {
CHECK_OK(Reset(cols, ids, key_columns, table_properties, cotable_id));
CHECK_OK(Reset(cols, ids, key_columns, table_properties, cotable_id, pgtable_id));
}

// Reset this Schema object to the given schema.
// If this fails, the Schema object is left in an inconsistent
// state and may not be used.
CHECKED_STATUS Reset(const vector<ColumnSchema>& cols, int key_columns,
const TableProperties& table_properties = TableProperties(),
const Uuid& cotable_id = Uuid(boost::uuids::nil_uuid())) {
const Uuid& cotable_id = Uuid(boost::uuids::nil_uuid()),
const PgTableOid pgtable_id = 0) {
std::vector<ColumnId> ids;
return Reset(cols, ids, key_columns, table_properties, cotable_id);
return Reset(cols, ids, key_columns, table_properties, cotable_id, pgtable_id);
}

// Reset this Schema object to the given schema.
Expand All @@ -573,7 +579,8 @@ class Schema {
const vector<ColumnId>& ids,
int key_columns,
const TableProperties& table_properties = TableProperties(),
const Uuid& cotable_id = Uuid(boost::uuids::nil_uuid()));
const Uuid& cotable_id = Uuid(boost::uuids::nil_uuid()),
const PgTableOid pgtable_id = 0);

// Return the number of bytes needed to represent a single row of this schema.
//
Expand Down Expand Up @@ -762,10 +769,34 @@ class Schema {
return cotable_id_;
}

bool has_cotable_id() const {
return !cotable_id_.IsNil();
}

void set_cotable_id(const Uuid& cotable_id) {
if (!cotable_id.IsNil()) {
DCHECK_EQ(pgtable_id_, 0);
}
cotable_id_ = cotable_id;
}

// Gets and sets the PG table OID of the non-primary table this schema belongs to in a tablet
// with colocated tables.
const PgTableOid pgtable_id() const {
return pgtable_id_;
}

bool has_pgtable_id() const {
return pgtable_id_ > 0;
}

void set_pgtable_id(const PgTableOid pgtable_id) {
if (pgtable_id > 0) {
DCHECK(cotable_id_.IsNil());
}
pgtable_id_ = pgtable_id;
}

// Extract a given column from a row where the type is
// known at compile-time. The type is checked with a debug
// assertion -- but if the wrong type is used and these assertions
Expand Down Expand Up @@ -1090,6 +1121,10 @@ class Schema {
// primary or single-tenant table.
Uuid cotable_id_;

// PG table OID of the non-primary table this schema belongs to in a tablet with colocated
// tables. Nil for the primary or single-tenant table.
PgTableOid pgtable_id_;

// NOTE: if you add more members, make sure to add the appropriate
// code to swap() and CopyFrom() as well to prevent subtle bugs.
};
Expand Down
62 changes: 45 additions & 17 deletions src/yb/docdb/doc_key-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,25 @@ class DocKeyTest : public YBTest {
vector<SubDocKey> sub_doc_keys;
Uuid cotable_id;
EXPECT_OK(cotable_id.FromHexString("0123456789abcdef0123456789abcdef"));
for (bool has_cotable_id : {false, true}) {

std::vector<std::pair<Uuid, PgTableOid>> table_id_pairs;
table_id_pairs.emplace_back(cotable_id, 0);
table_id_pairs.emplace_back(Uuid(boost::uuids::nil_uuid()), 9911);
table_id_pairs.emplace_back(Uuid(boost::uuids::nil_uuid()), 0);

for (const auto& table_id_pair : table_id_pairs) {
for (int num_hash_keys = 0; num_hash_keys <= kMaxNumHashKeys; ++num_hash_keys) {
for (int num_range_keys = 0; num_range_keys <= kMaxNumRangeKeys; ++num_range_keys) {
for (int num_sub_keys = 0; num_sub_keys <= kMaxNumSubKeys; ++num_sub_keys) {
for (int has_hybrid_time = 0; has_hybrid_time <= 1; ++has_hybrid_time) {
SubDocKey sub_doc_key;

if (has_cotable_id) {
if (!table_id_pair.first.IsNil()) {
sub_doc_key.doc_key().set_cotable_id(cotable_id);
} else if (table_id_pair.second > 0) {
sub_doc_key.doc_key().set_pgtable_id(table_id_pair.second);
}

if (num_hash_keys > 0) {
sub_doc_key.doc_key().set_hash(kAsciiFriendlyHash);
}
Expand Down Expand Up @@ -464,13 +473,19 @@ TEST_F(DocKeyTest, TestDecodePrefixLengths) {
size_t expected_hash_enc_size = 0;
const DocKey& doc_key = sub_doc_key.doc_key();
DocKey hash_only_key;
if (!doc_key.hashed_group().empty() || doc_key.has_cotable_id()) {
if (!doc_key.hashed_group().empty() || doc_key.has_cotable_id() || doc_key.has_pgtable_id()) {
if (doc_key.has_cotable_id()) {
if (doc_key.hashed_group().empty()) {
hash_only_key = DocKey(doc_key.cotable_id());
} else {
hash_only_key = DocKey(doc_key.cotable_id(), doc_key.hash(), doc_key.hashed_group());
}
} else if (doc_key.has_pgtable_id()) {
if (doc_key.hashed_group().empty()) {
hash_only_key = DocKey(doc_key.pgtable_id());
} else {
hash_only_key = DocKey(doc_key.pgtable_id(), doc_key.hash(), doc_key.hashed_group());
}
} else {
hash_only_key = DocKey(doc_key.hash(), doc_key.hashed_group());
}
Expand All @@ -484,14 +499,16 @@ TEST_F(DocKeyTest, TestDecodePrefixLengths) {
SubDocKey cur_key;
boost::container::small_vector<size_t, 8> prefix_lengths;
std::vector<size_t> expected_prefix_lengths;
if (doc_key.has_hash() || doc_key.has_cotable_id()) {
if (doc_key.has_hash() && doc_key.has_cotable_id()) {
cur_key.doc_key() = DocKey(doc_key.cotable_id(), doc_key.hash(), doc_key.hashed_group());
} else if (doc_key.has_hash()) {
if (doc_key.has_hash() || doc_key.has_cotable_id() || doc_key.has_pgtable_id()) {
if (doc_key.has_hash()) {
cur_key.doc_key() = DocKey(doc_key.hash(), doc_key.hashed_group());
} else {
cur_key.doc_key() = DocKey(doc_key.cotable_id());
}
if (doc_key.has_cotable_id()) {
cur_key.doc_key().set_cotable_id(doc_key.cotable_id());
} else if (doc_key.has_pgtable_id()) {
cur_key.doc_key().set_pgtable_id(doc_key.pgtable_id());
}

// Subtract one to avoid counting the final kGroupEnd, unless this is the entire key.
if (doc_key.range_group().empty()) {
expected_prefix_lengths.push_back(cur_key.Encode().size());
Expand Down Expand Up @@ -567,20 +584,31 @@ TEST_F(DocKeyTest, TestEnumerateIntents) {
std::vector<SubDocKey> expected_intents;
SubDocKey current_expected_intent;

if (sub_doc_key.doc_key().has_cotable_id()) {
DocKey cotable_id_only_doc_key;
cotable_id_only_doc_key.set_cotable_id(sub_doc_key.doc_key().cotable_id());
current_expected_intent = SubDocKey(cotable_id_only_doc_key);
if (sub_doc_key.doc_key().has_cotable_id() || sub_doc_key.doc_key().has_pgtable_id()) {
DocKey table_id_only_doc_key;
if (sub_doc_key.doc_key().has_cotable_id()) {
table_id_only_doc_key.set_cotable_id(sub_doc_key.doc_key().cotable_id());
} else {
table_id_only_doc_key.set_pgtable_id(sub_doc_key.doc_key().pgtable_id());
}
current_expected_intent = SubDocKey(table_id_only_doc_key);
expected_intents.push_back(current_expected_intent);
} else {
expected_intents.push_back(SubDocKey());
}

if (!sub_doc_key.doc_key().hashed_group().empty()) {
current_expected_intent = SubDocKey(DocKey(
sub_doc_key.doc_key().cotable_id(),
sub_doc_key.doc_key().hash(),
sub_doc_key.doc_key().hashed_group()));
if (sub_doc_key.doc_key().has_cotable_id()) {
current_expected_intent = SubDocKey(DocKey(
sub_doc_key.doc_key().cotable_id(),
sub_doc_key.doc_key().hash(),
sub_doc_key.doc_key().hashed_group()));
} else {
current_expected_intent = SubDocKey(DocKey(
sub_doc_key.doc_key().pgtable_id(),
sub_doc_key.doc_key().hash(),
sub_doc_key.doc_key().hashed_group()));
}
expected_intents.push_back(current_expected_intent);
}

Expand Down
Loading

0 comments on commit f4d4ea7

Please sign in to comment.