Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add a Postgres REPLICA IDENTITY to tables that do not have an implicit one. This should allow use of Postgres logical replication. #16456

Merged
merged 12 commits into from
Nov 13, 2023
Merged
1 change: 1 addition & 0 deletions changelog.d/16456.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a Postgres `REPLICA IDENTITY` to tables that do not have an implicit one. This should allow use of Postgres logical replication.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I think these will all go away in #16522, but no reason not to have it be correct for now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ohhhh that's why these were there a while ago, went away and now came back

Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/* Copyright 2023 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

-- Annotate some tables in Postgres with a REPLICA IDENTITY.
-- Any table that doesn't have a primary key should be annotated explicitly with
-- a REPLICA IDENTITY so that logical replication can be used.
-- If this is not done, then UPDATE and DELETE statements on those tables
-- will fail if logical replication is in use.


-- Where possible, re-use unique indices already defined on tables as a replica
-- identity.
ALTER TABLE appservice_room_list REPLICA IDENTITY USING INDEX appservice_room_list_idx;
ALTER TABLE batch_events REPLICA IDENTITY USING INDEX chunk_events_event_id;
ALTER TABLE blocked_rooms REPLICA IDENTITY USING INDEX blocked_rooms_idx;
ALTER TABLE cache_invalidation_stream_by_instance REPLICA IDENTITY USING INDEX cache_invalidation_stream_by_instance_id;
ALTER TABLE device_lists_changes_in_room REPLICA IDENTITY USING INDEX device_lists_changes_in_stream_id;
ALTER TABLE device_lists_outbound_last_success REPLICA IDENTITY USING INDEX device_lists_outbound_last_success_unique_idx;
ALTER TABLE device_lists_remote_cache REPLICA IDENTITY USING INDEX device_lists_remote_cache_unique_id;
ALTER TABLE device_lists_remote_extremeties REPLICA IDENTITY USING INDEX device_lists_remote_extremeties_unique_idx;
ALTER TABLE device_lists_remote_resync REPLICA IDENTITY USING INDEX device_lists_remote_resync_idx;
ALTER TABLE e2e_cross_signing_keys REPLICA IDENTITY USING INDEX e2e_cross_signing_keys_stream_idx;
ALTER TABLE e2e_room_keys REPLICA IDENTITY USING INDEX e2e_room_keys_with_version_idx;
ALTER TABLE e2e_room_keys_versions REPLICA IDENTITY USING INDEX e2e_room_keys_versions_idx;
ALTER TABLE erased_users REPLICA IDENTITY USING INDEX erased_users_user;
ALTER TABLE event_relations REPLICA IDENTITY USING INDEX event_relations_id;
ALTER TABLE federation_inbound_events_staging REPLICA IDENTITY USING INDEX federation_inbound_events_staging_instance_event;
ALTER TABLE federation_stream_position REPLICA IDENTITY USING INDEX federation_stream_position_instance;
ALTER TABLE ignored_users REPLICA IDENTITY USING INDEX ignored_users_uniqueness;
ALTER TABLE insertion_events REPLICA IDENTITY USING INDEX insertion_events_event_id;
ALTER TABLE insertion_event_extremities REPLICA IDENTITY USING INDEX insertion_event_extremities_event_id;
ALTER TABLE monthly_active_users REPLICA IDENTITY USING INDEX monthly_active_users_users;
ALTER TABLE ratelimit_override REPLICA IDENTITY USING INDEX ratelimit_override_idx;
ALTER TABLE room_stats_earliest_token REPLICA IDENTITY USING INDEX room_stats_earliest_token_idx;
ALTER TABLE room_stats_state REPLICA IDENTITY USING INDEX room_stats_state_room;
ALTER TABLE stream_positions REPLICA IDENTITY USING INDEX stream_positions_idx;
ALTER TABLE user_directory REPLICA IDENTITY USING INDEX user_directory_user_idx;
ALTER TABLE user_directory_search REPLICA IDENTITY USING INDEX user_directory_search_user_idx;
ALTER TABLE user_ips REPLICA IDENTITY USING INDEX user_ips_user_token_ip_unique_index;
ALTER TABLE user_signature_stream REPLICA IDENTITY USING INDEX user_signature_stream_idx;
ALTER TABLE users_in_public_rooms REPLICA IDENTITY USING INDEX users_in_public_rooms_u_idx;
ALTER TABLE users_who_share_private_rooms REPLICA IDENTITY USING INDEX users_who_share_private_rooms_u_idx;
ALTER TABLE user_threepid_id_server REPLICA IDENTITY USING INDEX user_threepid_id_server_idx;
ALTER TABLE worker_locks REPLICA IDENTITY USING INDEX worker_locks_key;


-- Where there are no unique indices, use the entire rows as replica identities.
ALTER TABLE current_state_delta_stream REPLICA IDENTITY FULL;
ALTER TABLE deleted_pushers REPLICA IDENTITY FULL;
ALTER TABLE device_auth_providers REPLICA IDENTITY FULL;
ALTER TABLE device_federation_inbox REPLICA IDENTITY FULL;
ALTER TABLE device_federation_outbox REPLICA IDENTITY FULL;
ALTER TABLE device_inbox REPLICA IDENTITY FULL;
ALTER TABLE device_lists_outbound_pokes REPLICA IDENTITY FULL;
ALTER TABLE device_lists_stream REPLICA IDENTITY FULL;
ALTER TABLE e2e_cross_signing_signatures REPLICA IDENTITY FULL;
ALTER TABLE event_auth_chain_links REPLICA IDENTITY FULL;
ALTER TABLE event_auth REPLICA IDENTITY FULL;
ALTER TABLE event_push_actions_staging REPLICA IDENTITY FULL;
ALTER TABLE insertion_event_edges REPLICA IDENTITY FULL;
ALTER TABLE local_media_repository_url_cache REPLICA IDENTITY FULL;
ALTER TABLE presence_stream REPLICA IDENTITY FULL;
ALTER TABLE push_rules_stream REPLICA IDENTITY FULL;
ALTER TABLE room_alias_servers REPLICA IDENTITY FULL;
ALTER TABLE stream_ordering_to_exterm REPLICA IDENTITY FULL;
ALTER TABLE timeline_gaps REPLICA IDENTITY FULL;
ALTER TABLE user_daily_visits REPLICA IDENTITY FULL;
ALTER TABLE users_pending_deactivation REPLICA IDENTITY FULL;

-- special cases: unique indices on nullable columns can't be used
ALTER TABLE event_push_summary REPLICA IDENTITY FULL;
ALTER TABLE event_search REPLICA IDENTITY FULL;
ALTER TABLE local_media_repository_thumbnails REPLICA IDENTITY FULL;
ALTER TABLE remote_media_cache_thumbnails REPLICA IDENTITY FULL;
ALTER TABLE threepid_guest_access_tokens REPLICA IDENTITY FULL;
ALTER TABLE user_filters REPLICA IDENTITY FULL; -- sadly the `CHECK` constraint is not enough here
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* Copyright 2023 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

-- Annotate some tables in Postgres with a REPLICA IDENTITY.
-- Any table that doesn't have a primary key should be annotated explicitly with
-- a REPLICA IDENTITY so that logical replication can be used.
-- If this is not done, then UPDATE and DELETE statements on those tables
-- will fail if logical replication is in use.
-- See also: 82/04_replica_identities.sql.postgres on the main database


-- Where possible, re-use unique indices already defined on tables as a replica
-- identity.
ALTER TABLE state_group_edges REPLICA IDENTITY USING INDEX state_group_edges_unique_idx;


-- Where there are no unique indices, use the entire rows as replica identities.
ALTER TABLE state_groups_state REPLICA IDENTITY FULL;
84 changes: 83 additions & 1 deletion tests/storage/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, Tuple
from typing import Callable, List, Tuple
from unittest.mock import Mock, call

from twisted.internet import defer
Expand All @@ -29,6 +29,7 @@
from synapse.util import Clock

from tests import unittest
from tests.utils import USE_POSTGRES_FOR_TESTS


class TupleComparisonClauseTestCase(unittest.TestCase):
Expand Down Expand Up @@ -278,3 +279,84 @@ def _test_txn(txn: LoggingTransaction) -> None:
]
)
self.assertEqual(exception_callback.call_count, 6) # no additional calls


class PostgresReplicaIdentityTestCase(unittest.HomeserverTestCase):
if not USE_POSTGRES_FOR_TESTS:
skip = "Requires Postgres"

def prepare(
self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
) -> None:
self.db_pools = homeserver.get_datastores().databases

def test_all_tables_have_postgres_replica_identity(self) -> None:
"""
Tests that all tables have a Postgres REPLICA IDENTITY.
(See #16224).

Tables with a PRIMARY KEY have an implied REPLICA IDENTITY and are fine.
Other tables need them to be set with `ALTER TABLE`.

A REPLICA IDENTITY is required for Postgres logical replication to work
properly without blocking updates and deletes.
"""

sql = """
-- Select tables that have no primary key and use the default replica identity rule
-- (the default is to use the primary key)
WITH tables_no_pkey AS (
SELECT tbl.table_schema, tbl.table_name
FROM information_schema.tables tbl
WHERE table_type = 'BASE TABLE'
AND table_schema not in ('pg_catalog', 'information_schema')
AND NOT EXISTS (
SELECT 1
FROM information_schema.key_column_usage kcu
WHERE kcu.table_name = tbl.table_name
AND kcu.table_schema = tbl.table_schema
)
)
SELECT pg_class.oid::regclass FROM tables_no_pkey INNER JOIN pg_class ON pg_class.oid::regclass = table_name::regclass
WHERE relreplident = 'd'

UNION

-- Also select tables that use an index as a replica identity
-- but where the index doesn't exist
-- (e.g. it could have been deleted)
SELECT pg_class.oid::regclass
FROM information_schema.tables tbl
INNER JOIN pg_class ON pg_class.oid::regclass = table_name::regclass
WHERE table_type = 'BASE TABLE'
AND table_schema not in ('pg_catalog', 'information_schema')

-- 'i' means an index is used as the replica identity
AND relreplident = 'i'

-- look for indices that are marked as the replica identity
AND NOT EXISTS (
SELECT indexrelid::regclass
FROM pg_index
WHERE indrelid = pg_class.oid::regclass AND indisreplident
)
"""

def _list_tables_with_missing_replica_identities_txn(
txn: LoggingTransaction,
) -> List[str]:
txn.execute(sql)
return [table_name for table_name, in txn]

for pool in self.db_pools:
missing = self.get_success(
pool.runInteraction(
"test_list_missing_replica_identities",
_list_tables_with_missing_replica_identities_txn,
)
)
self.assertEqual(
len(missing),
0,
f"The following tables in the {pool.name()!r} database are missing REPLICA IDENTITIES: {missing!r}.",
)
Loading