diff --git a/changelog.d/14917.misc b/changelog.d/14917.misc new file mode 100644 index 0000000000..4d1dd2639a --- /dev/null +++ b/changelog.d/14917.misc @@ -0,0 +1 @@ +Faster joins: Improve performance of looking up partial-state status of rooms. diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index ee11764567..5ebd3ea855 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1383,16 +1383,21 @@ class SyncHandler: if not sync_config.filter_collection.lazy_load_members(): # Non-lazy syncs should never include partially stated rooms. # Exclude all partially stated rooms from this sync. - for room_id in mutable_joined_room_ids: - if await self.store.is_partial_state_room(room_id): - mutable_rooms_to_exclude.add(room_id) + results = await self.store.is_partial_state_room_batched( + mutable_joined_room_ids + ) + mutable_rooms_to_exclude.update( + room_id + for room_id, is_partial_state in results.items() + if is_partial_state + ) # Incremental eager syncs should additionally include rooms that # - we are joined to # - are full-stated # - became fully-stated at some point during the sync period # (These rooms will have been omitted during a previous eager sync.) - forced_newly_joined_room_ids = set() + forced_newly_joined_room_ids: Set[str] = set() if since_token and not sync_config.filter_collection.lazy_load_members(): un_partial_stated_rooms = ( await self.store.get_un_partial_stated_rooms_between( @@ -1401,9 +1406,14 @@ class SyncHandler: mutable_joined_room_ids, ) ) - for room_id in un_partial_stated_rooms: - if not await self.store.is_partial_state_room(room_id): - forced_newly_joined_room_ids.add(room_id) + results = await self.store.is_partial_state_room_batched( + un_partial_stated_rooms + ) + forced_newly_joined_room_ids.update( + room_id + for room_id, is_partial_state in results.items() + if not is_partial_state + ) # Now we have our list of joined room IDs, exclude as configured and freeze joined_room_ids = frozenset( diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 88479a16db..e20c5c5302 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -1819,7 +1819,7 @@ class DatabasePool: keyvalues: Optional[Dict[str, Any]] = None, desc: str = "simple_select_many_batch", batch_size: int = 100, - ) -> List[Any]: + ) -> List[Dict[str, Any]]: """Executes a SELECT query on the named table, which may return zero or more rows, returning the result as a list of dicts. diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 3aa7b94560..fbbc018887 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -60,9 +60,9 @@ from synapse.storage.util.id_generators import ( MultiWriterIdGenerator, StreamIdGenerator, ) -from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID +from synapse.types import JsonDict, RetentionPolicy, StrCollection, ThirdPartyInstanceID from synapse.util import json_encoder -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import cached, cachedList from synapse.util.stringutils import MXC_REGEX if TYPE_CHECKING: @@ -1255,7 +1255,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): return room_servers - @cached() + @cached(max_entries=10000) async def is_partial_state_room(self, room_id: str) -> bool: """Checks if this room has partial state. @@ -1274,6 +1274,27 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): return entry is not None + @cachedList(cached_method_name="is_partial_state_room", list_name="room_ids") + async def is_partial_state_room_batched( + self, room_ids: StrCollection + ) -> Mapping[str, bool]: + """Checks if the given rooms have partial state. + + Returns true for "partial-state" rooms, which means that the state + at events in the room, and `current_state_events`, may not yet be + complete. + """ + + rows: List[Dict[str, str]] = await self.db_pool.simple_select_many_batch( + table="partial_state_rooms", + column="room_id", + iterable=room_ids, + retcols=("room_id",), + desc="is_partial_state_room_batched", + ) + partial_state_rooms = {row_dict["room_id"] for row_dict in rows} + return {room_id: room_id in partial_state_rooms for room_id in room_ids} + async def get_join_event_id_and_device_lists_stream_id_for_partial_state( self, room_id: str ) -> Tuple[str, int]: