* Batch look-ups to see if rooms are partial stated. * Fix issues found in linting. * Fix typo. * Apply suggestions from code review Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Clarify comments. Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> * Also improve the cache size while we're at it * is_partial_state_rooms -> is_partial_state_room_batched * Run `black` * Improve annotation for `simple_select_many_batch` * Fix is_partial_state_room_batched impl * Okay, _actually_ fix impl * Update description. * Update synapse/storage/databases/main/room.py Co-authored-by: Patrick Cloke <clokep@users.noreply.github.com> * Run black. Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> Co-authored-by: David Robertson <davidr@element.io>tags/v1.76.0rc2
@@ -0,0 +1 @@ | |||||
Faster joins: Improve performance of looking up partial-state status of rooms. |
@@ -1383,16 +1383,21 @@ class SyncHandler: | |||||
if not sync_config.filter_collection.lazy_load_members(): | if not sync_config.filter_collection.lazy_load_members(): | ||||
# Non-lazy syncs should never include partially stated rooms. | # Non-lazy syncs should never include partially stated rooms. | ||||
# Exclude all partially stated rooms from this sync. | # Exclude all partially stated rooms from this sync. | ||||
for room_id in mutable_joined_room_ids: | |||||
if await self.store.is_partial_state_room(room_id): | |||||
mutable_rooms_to_exclude.add(room_id) | |||||
results = await self.store.is_partial_state_room_batched( | |||||
mutable_joined_room_ids | |||||
) | |||||
mutable_rooms_to_exclude.update( | |||||
room_id | |||||
for room_id, is_partial_state in results.items() | |||||
if is_partial_state | |||||
) | |||||
# Incremental eager syncs should additionally include rooms that | # Incremental eager syncs should additionally include rooms that | ||||
# - we are joined to | # - we are joined to | ||||
# - are full-stated | # - are full-stated | ||||
# - became fully-stated at some point during the sync period | # - became fully-stated at some point during the sync period | ||||
# (These rooms will have been omitted during a previous eager sync.) | # (These rooms will have been omitted during a previous eager sync.) | ||||
forced_newly_joined_room_ids = set() | |||||
forced_newly_joined_room_ids: Set[str] = set() | |||||
if since_token and not sync_config.filter_collection.lazy_load_members(): | if since_token and not sync_config.filter_collection.lazy_load_members(): | ||||
un_partial_stated_rooms = ( | un_partial_stated_rooms = ( | ||||
await self.store.get_un_partial_stated_rooms_between( | await self.store.get_un_partial_stated_rooms_between( | ||||
@@ -1401,9 +1406,14 @@ class SyncHandler: | |||||
mutable_joined_room_ids, | mutable_joined_room_ids, | ||||
) | ) | ||||
) | ) | ||||
for room_id in un_partial_stated_rooms: | |||||
if not await self.store.is_partial_state_room(room_id): | |||||
forced_newly_joined_room_ids.add(room_id) | |||||
results = await self.store.is_partial_state_room_batched( | |||||
un_partial_stated_rooms | |||||
) | |||||
forced_newly_joined_room_ids.update( | |||||
room_id | |||||
for room_id, is_partial_state in results.items() | |||||
if not is_partial_state | |||||
) | |||||
# Now we have our list of joined room IDs, exclude as configured and freeze | # Now we have our list of joined room IDs, exclude as configured and freeze | ||||
joined_room_ids = frozenset( | joined_room_ids = frozenset( | ||||
@@ -1819,7 +1819,7 @@ class DatabasePool: | |||||
keyvalues: Optional[Dict[str, Any]] = None, | keyvalues: Optional[Dict[str, Any]] = None, | ||||
desc: str = "simple_select_many_batch", | desc: str = "simple_select_many_batch", | ||||
batch_size: int = 100, | batch_size: int = 100, | ||||
) -> List[Any]: | |||||
) -> List[Dict[str, Any]]: | |||||
"""Executes a SELECT query on the named table, which may return zero or | """Executes a SELECT query on the named table, which may return zero or | ||||
more rows, returning the result as a list of dicts. | more rows, returning the result as a list of dicts. | ||||
@@ -60,9 +60,9 @@ from synapse.storage.util.id_generators import ( | |||||
MultiWriterIdGenerator, | MultiWriterIdGenerator, | ||||
StreamIdGenerator, | StreamIdGenerator, | ||||
) | ) | ||||
from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID | |||||
from synapse.types import JsonDict, RetentionPolicy, StrCollection, ThirdPartyInstanceID | |||||
from synapse.util import json_encoder | from synapse.util import json_encoder | ||||
from synapse.util.caches.descriptors import cached | |||||
from synapse.util.caches.descriptors import cached, cachedList | |||||
from synapse.util.stringutils import MXC_REGEX | from synapse.util.stringutils import MXC_REGEX | ||||
if TYPE_CHECKING: | if TYPE_CHECKING: | ||||
@@ -1255,7 +1255,7 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): | |||||
return room_servers | return room_servers | ||||
@cached() | |||||
@cached(max_entries=10000) | |||||
async def is_partial_state_room(self, room_id: str) -> bool: | async def is_partial_state_room(self, room_id: str) -> bool: | ||||
"""Checks if this room has partial state. | """Checks if this room has partial state. | ||||
@@ -1274,6 +1274,27 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): | |||||
return entry is not None | return entry is not None | ||||
@cachedList(cached_method_name="is_partial_state_room", list_name="room_ids") | |||||
async def is_partial_state_room_batched( | |||||
self, room_ids: StrCollection | |||||
) -> Mapping[str, bool]: | |||||
"""Checks if the given rooms have partial state. | |||||
Returns true for "partial-state" rooms, which means that the state | |||||
at events in the room, and `current_state_events`, may not yet be | |||||
complete. | |||||
""" | |||||
rows: List[Dict[str, str]] = await self.db_pool.simple_select_many_batch( | |||||
table="partial_state_rooms", | |||||
column="room_id", | |||||
iterable=room_ids, | |||||
retcols=("room_id",), | |||||
desc="is_partial_state_room_batched", | |||||
) | |||||
partial_state_rooms = {row_dict["room_id"] for row_dict in rows} | |||||
return {room_id: room_id in partial_state_rooms for room_id in room_ids} | |||||
async def get_join_event_id_and_device_lists_stream_id_for_partial_state( | async def get_join_event_id_and_device_lists_stream_id_for_partial_state( | ||||
self, room_id: str | self, room_id: str | ||||
) -> Tuple[str, int]: | ) -> Tuple[str, int]: | ||||