Skip to content

Commit

Permalink
[#9343] ybase: PITR Fix VerifyRestoredObjects logic
Browse files Browse the repository at this point in the history
Summary:
After restoration is complete, when we verify if Restoration completed properly, we currently match the RUNNING tables (and
corresponding tablets/namespaces) against all the objects that should have been restored. This is
incorrect and leads to failures for instance for objects that were deleted even before the restoration time. There
are several scenarios that can cause this. For e.g.

1. Create a table
2. Drop the table
3. Create another table
4. Restore before 4 and after 3

Another scenario:

1. Create a table
2. Drop the table
3. Wait for snapshot schedule retention time so that the table gets deleted
4. Create another table
5. Restore before 4 and after 3

Another scenario:

1. Create table
2. Restore before create table
3. Create another table
4. Restore before 3 and after 2

Test Plan: ybd --cxx_test yb-admin-snapshot-schedule-test YbAdminSnapshotScheduleTest.TestVerifyRestorationLogic

Reviewers: bogdan, sergei

Reviewed By: sergei

Subscribers: ybase

Differential Revision: https://phabricator.dev.yugabyte.com/D12556
  • Loading branch information
sanketkedia committed Aug 12, 2021
1 parent 5ad090f commit 68dcd38
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 2 deletions.
36 changes: 34 additions & 2 deletions ent/src/yb/master/restore_sys_catalog_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ CHECKED_STATUS ApplyWriteRequest(
return operation.Apply(apply_data);
}

bool TableDeleted(const SysTablesEntryPB& table) {
return table.state() == SysTablesEntryPB::DELETED ||
table.state() == SysTablesEntryPB::DELETING ||
table.hide_state() == SysTablesEntryPB::HIDING ||
table.hide_state() == SysTablesEntryPB::HIDDEN;
}

template <class PB>
struct GetEntryType;

Expand Down Expand Up @@ -97,7 +104,6 @@ void RestoreSysCatalogState::AddRestoringEntry(
entry.set_id(id);
pb_util::SerializeToString(pb, buffer);
entry.set_data(buffer->data(), buffer->size());
restoration_.non_system_objects_to_restore.emplace(id, type);
}

Status RestoreSysCatalogState::Process() {
Expand Down Expand Up @@ -153,13 +159,15 @@ template <class ProcessEntry>
Status RestoreSysCatalogState::DetermineEntries(
const Objects& objects, bool is_restoration_objects, const ProcessEntry& process_entry) {
std::unordered_set<NamespaceId> restored_namespaces;
std::unordered_set<NamespaceId> non_deleted_restored_namespaces;
std::unordered_set<TableId> restored_tables;
std::unordered_set<TableId> non_deleted_restored_tables;
faststring buffer;
for (const auto& id_and_metadata : objects.tables) {
VLOG_WITH_FUNC(3) << "Checking: " << id_and_metadata.first << ", "
<< id_and_metadata.second.ShortDebugString();

bool match;
bool match, deleted;
if (id_and_metadata.second.has_index_info()) {
auto it = objects.tables.find(id_and_metadata.second.index_info().indexed_table_id());
if (it == objects.tables.end()) {
Expand All @@ -169,9 +177,11 @@ Status RestoreSysCatalogState::DetermineEntries(
id_and_metadata.second.name());
}
match = VERIFY_RESULT(objects.MatchTable(restoration_.filter, it->first, it->second));
deleted = TableDeleted(it->second);
} else {
match = VERIFY_RESULT(objects.MatchTable(
restoration_.filter, id_and_metadata.first, id_and_metadata.second));
deleted = TableDeleted(id_and_metadata.second);
}
if (!match) {
continue;
Expand All @@ -192,6 +202,24 @@ Status RestoreSysCatalogState::DetermineEntries(
}
continue;
}
if (is_restoration_objects && !deleted) {
auto type = GetEntryType<SysTablesEntryPB>::value;
restoration_.non_system_objects_to_restore.emplace(id_and_metadata.first, type);

if (non_deleted_restored_namespaces.insert(id_and_metadata.second.namespace_id()).second) {
auto namespace_it = objects.namespaces.find(id_and_metadata.second.namespace_id());
if (namespace_it == objects.namespaces.end()) {
return STATUS_FORMAT(
NotFound, "Namespace $0 not found for table $1",
id_and_metadata.second.namespace_id(),
id_and_metadata.first, id_and_metadata.second.name());
}
auto type = GetEntryType<SysNamespaceEntryPB>::value;
restoration_.non_system_objects_to_restore.emplace(namespace_it->first, type);
}

non_deleted_restored_tables.insert(id_and_metadata.first);
}
if (restored_namespaces.insert(id_and_metadata.second.namespace_id()).second) {
auto namespace_it = objects.namespaces.find(id_and_metadata.second.namespace_id());
if (namespace_it == objects.namespaces.end()) {
Expand All @@ -210,6 +238,10 @@ Status RestoreSysCatalogState::DetermineEntries(
if (restored_tables.count(id_and_metadata.second.table_id()) == 0) {
continue;
}
if (non_deleted_restored_tables.count(id_and_metadata.second.table_id()) != 0) {
auto type = GetEntryType<SysTabletsEntryPB>::value;
restoration_.non_system_objects_to_restore.emplace(id_and_metadata.first, type);
}
process_entry(id_and_metadata, &buffer);
VLOG(2) << "Tablet to restore: " << id_and_metadata.first << ", "
<< id_and_metadata.second.ShortDebugString();
Expand Down
43 changes: 43 additions & 0 deletions src/yb/tools/yb-admin-snapshot-schedule-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,49 @@ TEST_F(YbAdminSnapshotScheduleTest, AlterTable) {
}
}

TEST_F(YbAdminSnapshotScheduleTest, TestVerifyRestorationLogic) {
const auto kKeys = Range(10);

auto schedule_id = ASSERT_RESULT(PrepareCql());

auto conn = ASSERT_RESULT(CqlConnect(client::kTableName.namespace_name()));

ASSERT_OK(conn.ExecuteQuery(
"CREATE TABLE test_table (key INT PRIMARY KEY, value TEXT)"));

for (auto key : kKeys) {
ASSERT_OK(conn.ExecuteQuery(Format(
"INSERT INTO test_table (key, value) VALUES ($0, 'A')", key)));
}

ASSERT_OK(conn.ExecuteQuery("DROP TABLE test_table"));

Timestamp time(ASSERT_RESULT(WallClock()->Now()).time_point);

ASSERT_OK(conn.ExecuteQuery(
"CREATE TABLE test_table1 (key INT PRIMARY KEY)"));

for (auto key : kKeys) {
ASSERT_OK(conn.ExecuteQuery(Format(
"INSERT INTO test_table1 (key) VALUES ($0)", key)));
}

ASSERT_OK(RestoreSnapshotSchedule(schedule_id, time));

// Reconnect because of caching issues with YCQL.
conn = ASSERT_RESULT(CqlConnect(client::kTableName.namespace_name()));

ASSERT_NOK(conn.ExecuteQuery("SELECT * from test_table"));

ASSERT_OK(WaitFor([&conn]() -> Result<bool> {
auto res = conn.ExecuteQuery("SELECT * from test_table1");
if (res.ok()) {
return false;
}
return true;
}, 30s * kTimeMultiplier, "Wait for table to be deleted"));
}

class YbAdminSnapshotConsistentRestoreTest : public YbAdminSnapshotScheduleTest {
public:
virtual std::vector<std::string> ExtraTSFlags() {
Expand Down

0 comments on commit 68dcd38

Please sign in to comment.