* Enable Complement tests for Faster Remote Room Joins on worker-mode * (dangerous) Add an override to allow Complement to use FRRJ under workers * Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org> * Fix race where we didn't send out replication notification * MORE HACKS * Fix get_un_partial_stated_rooms_token to take instance_name * Fix bad merge * Remove warning * Correctly advance un_partial_stated_room_stream * Fix merge * Add another notify_replication * Fixups * Create a separate ReplicationNotifier * Fix test * Fix portdb * Create a separate ReplicationNotifier * Fix test * Fix portdb * Fix presence test * Newsfile * Apply suggestions from code review * Update changelog.d/14752.misc Co-authored-by: Erik Johnston <erik@matrix.org> * lint Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org> Co-authored-by: Erik Johnston <erik@matrix.org>tags/v1.76.0rc1
@@ -0,0 +1 @@ | |||||
Enable Complement tests for Faster Remote Room Joins against worker-mode Synapse. |
@@ -94,10 +94,8 @@ allow_device_name_lookup_over_federation: true | |||||
experimental_features: | experimental_features: | ||||
# Enable history backfilling support | # Enable history backfilling support | ||||
msc2716_enabled: true | msc2716_enabled: true | ||||
{% if not workers_in_use %} | |||||
# client-side support for partial state in /send_join responses | # client-side support for partial state in /send_join responses | ||||
faster_joins: true | faster_joins: true | ||||
{% endif %} | |||||
# Enable support for polls | # Enable support for polls | ||||
msc3381_polls_enabled: true | msc3381_polls_enabled: true | ||||
# Enable deleting device-specific notification settings stored in account data | # Enable deleting device-specific notification settings stored in account data | ||||
@@ -190,7 +190,7 @@ fi | |||||
extra_test_args=() | extra_test_args=() | ||||
test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930" | |||||
test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930,faster_joins" | |||||
# All environment variables starting with PASS_ will be shared. | # All environment variables starting with PASS_ will be shared. | ||||
# (The prefix is stripped off before reaching the container.) | # (The prefix is stripped off before reaching the container.) | ||||
@@ -223,12 +223,9 @@ else | |||||
export PASS_SYNAPSE_COMPLEMENT_DATABASE=sqlite | export PASS_SYNAPSE_COMPLEMENT_DATABASE=sqlite | ||||
fi | fi | ||||
# We only test faster room joins on monoliths, because they are purposefully | |||||
# being developed without worker support to start with. | |||||
# | |||||
# The tests for importing historical messages (MSC2716) also only pass with monoliths, | |||||
# currently. | |||||
test_tags="$test_tags,faster_joins,msc2716" | |||||
# The tests for importing historical messages (MSC2716) | |||||
# only pass with monoliths, currently. | |||||
test_tags="$test_tags,msc2716" | |||||
fi | fi | ||||
@@ -282,13 +282,6 @@ def start(config_options: List[str]) -> None: | |||||
"synapse.app.user_dir", | "synapse.app.user_dir", | ||||
) | ) | ||||
if config.experimental.faster_joins_enabled: | |||||
raise ConfigError( | |||||
"You have enabled the experimental `faster_joins` config option, but it is " | |||||
"not compatible with worker deployments yet. Please disable `faster_joins` " | |||||
"or run Synapse as a single process deployment instead." | |||||
) | |||||
synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts | synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts | ||||
synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage | synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage | ||||
@@ -974,6 +974,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater): | |||||
self.federation = hs.get_federation_client() | self.federation = hs.get_federation_client() | ||||
self.clock = hs.get_clock() | self.clock = hs.get_clock() | ||||
self.device_handler = device_handler | self.device_handler = device_handler | ||||
self._notifier = hs.get_notifier() | |||||
self._remote_edu_linearizer = Linearizer(name="remote_device_list") | self._remote_edu_linearizer = Linearizer(name="remote_device_list") | ||||
@@ -1054,6 +1055,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater): | |||||
user_id, | user_id, | ||||
device_id, | device_id, | ||||
) | ) | ||||
self._notifier.notify_replication() | |||||
room_ids = await self.store.get_rooms_for_user(user_id) | room_ids = await self.store.get_rooms_for_user(user_id) | ||||
if not room_ids: | if not room_ids: | ||||
@@ -1870,14 +1870,15 @@ class FederationHandler: | |||||
logger.info("Clearing partial-state flag for %s", room_id) | logger.info("Clearing partial-state flag for %s", room_id) | ||||
success = await self.store.clear_partial_state_room(room_id) | success = await self.store.clear_partial_state_room(room_id) | ||||
# Poke the notifier so that other workers see the write to | |||||
# the un-partial-stated rooms stream. | |||||
self._notifier.notify_replication() | |||||
if success: | if success: | ||||
logger.info("State resync complete for %s", room_id) | logger.info("State resync complete for %s", room_id) | ||||
self._storage_controllers.state.notify_room_un_partial_stated( | self._storage_controllers.state.notify_room_un_partial_stated( | ||||
room_id | room_id | ||||
) | ) | ||||
# Poke the notifier so that other workers see the write to | |||||
# the un-partial-stated rooms stream. | |||||
self._notifier.notify_replication() | |||||
# TODO(faster_joins) update room stats and user directory? | # TODO(faster_joins) update room stats and user directory? | ||||
# https://github.com/matrix-org/synapse/issues/12814 | # https://github.com/matrix-org/synapse/issues/12814 | ||||
@@ -16,7 +16,6 @@ from typing import TYPE_CHECKING | |||||
import attr | import attr | ||||
from synapse.replication.tcp.streams import Stream | from synapse.replication.tcp.streams import Stream | ||||
from synapse.replication.tcp.streams._base import current_token_without_instance | |||||
if TYPE_CHECKING: | if TYPE_CHECKING: | ||||
from synapse.server import HomeServer | from synapse.server import HomeServer | ||||
@@ -42,8 +41,7 @@ class UnPartialStatedRoomStream(Stream): | |||||
store = hs.get_datastores().main | store = hs.get_datastores().main | ||||
super().__init__( | super().__init__( | ||||
hs.get_instance_name(), | hs.get_instance_name(), | ||||
# TODO(faster_joins, multiple writers): we need to account for instance names | |||||
current_token_without_instance(store.get_un_partial_stated_rooms_token), | |||||
store.get_un_partial_stated_rooms_token, | |||||
store.get_un_partial_stated_rooms_from_stream, | store.get_un_partial_stated_rooms_from_stream, | ||||
) | ) | ||||
@@ -70,7 +68,6 @@ class UnPartialStatedEventStream(Stream): | |||||
store = hs.get_datastores().main | store = hs.get_datastores().main | ||||
super().__init__( | super().__init__( | ||||
hs.get_instance_name(), | hs.get_instance_name(), | ||||
# TODO(faster_joins, multiple writers): we need to account for instance names | |||||
current_token_without_instance(store.get_un_partial_stated_events_token), | |||||
store.get_un_partial_stated_events_token, | |||||
store.get_un_partial_stated_events_from_stream, | store.get_un_partial_stated_events_from_stream, | ||||
) | ) |
@@ -322,11 +322,12 @@ class EventsWorkerStore(SQLBaseStore): | |||||
"stream_id", | "stream_id", | ||||
) | ) | ||||
def get_un_partial_stated_events_token(self) -> int: | |||||
# TODO(faster_joins, multiple writers): This is inappropriate if there are multiple | |||||
# writers because workers that don't write often will hold all | |||||
# readers up. | |||||
return self._un_partial_stated_events_stream_id_gen.get_current_token() | |||||
def get_un_partial_stated_events_token(self, instance_name: str) -> int: | |||||
return ( | |||||
self._un_partial_stated_events_stream_id_gen.get_current_token_for_writer( | |||||
instance_name | |||||
) | |||||
) | |||||
async def get_un_partial_stated_events_from_stream( | async def get_un_partial_stated_events_from_stream( | ||||
self, instance_name: str, last_id: int, current_id: int, limit: int | self, instance_name: str, last_id: int, current_id: int, limit: int | ||||
@@ -416,6 +417,8 @@ class EventsWorkerStore(SQLBaseStore): | |||||
self._stream_id_gen.advance(instance_name, token) | self._stream_id_gen.advance(instance_name, token) | ||||
elif stream_name == BackfillStream.NAME: | elif stream_name == BackfillStream.NAME: | ||||
self._backfill_id_gen.advance(instance_name, -token) | self._backfill_id_gen.advance(instance_name, -token) | ||||
elif stream_name == UnPartialStatedEventStream.NAME: | |||||
self._un_partial_stated_events_stream_id_gen.advance(instance_name, token) | |||||
super().process_replication_position(stream_name, instance_name, token) | super().process_replication_position(stream_name, instance_name, token) | ||||
async def have_censored_event(self, event_id: str) -> bool: | async def have_censored_event(self, event_id: str) -> bool: | ||||
@@ -43,6 +43,7 @@ from synapse.api.errors import StoreError | |||||
from synapse.api.room_versions import RoomVersion, RoomVersions | from synapse.api.room_versions import RoomVersion, RoomVersions | ||||
from synapse.config.homeserver import HomeServerConfig | from synapse.config.homeserver import HomeServerConfig | ||||
from synapse.events import EventBase | from synapse.events import EventBase | ||||
from synapse.replication.tcp.streams.partial_state import UnPartialStatedRoomStream | |||||
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause | from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause | ||||
from synapse.storage.database import ( | from synapse.storage.database import ( | ||||
DatabasePool, | DatabasePool, | ||||
@@ -144,6 +145,13 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): | |||||
"stream_id", | "stream_id", | ||||
) | ) | ||||
def process_replication_position( | |||||
self, stream_name: str, instance_name: str, token: int | |||||
) -> None: | |||||
if stream_name == UnPartialStatedRoomStream.NAME: | |||||
self._un_partial_stated_rooms_stream_id_gen.advance(instance_name, token) | |||||
return super().process_replication_position(stream_name, instance_name, token) | |||||
async def store_room( | async def store_room( | ||||
self, | self, | ||||
room_id: str, | room_id: str, | ||||
@@ -1281,13 +1289,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore): | |||||
) | ) | ||||
return result["join_event_id"], result["device_lists_stream_id"] | return result["join_event_id"], result["device_lists_stream_id"] | ||||
def get_un_partial_stated_rooms_token(self) -> int: | |||||
# TODO(faster_joins, multiple writers): This is inappropriate if there | |||||
# are multiple writers because workers that don't write often will | |||||
# hold all readers up. | |||||
# (See `MultiWriterIdGenerator.get_persisted_upto_position` for an | |||||
# explanation.) | |||||
return self._un_partial_stated_rooms_stream_id_gen.get_current_token() | |||||
def get_un_partial_stated_rooms_token(self, instance_name: str) -> int: | |||||
return self._un_partial_stated_rooms_stream_id_gen.get_current_token_for_writer( | |||||
instance_name | |||||
) | |||||
async def get_un_partial_stated_rooms_from_stream( | async def get_un_partial_stated_rooms_from_stream( | ||||
self, instance_name: str, last_id: int, current_id: int, limit: int | self, instance_name: str, last_id: int, current_id: int, limit: int | ||||
@@ -95,6 +95,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): | |||||
for row in rows: | for row in rows: | ||||
assert isinstance(row, UnPartialStatedEventStreamRow) | assert isinstance(row, UnPartialStatedEventStreamRow) | ||||
self._get_state_group_for_event.invalidate((row.event_id,)) | self._get_state_group_for_event.invalidate((row.event_id,)) | ||||
self.is_partial_state_event.invalidate((row.event_id,)) | |||||
super().process_replication_rows(stream_name, instance_name, token, rows) | super().process_replication_rows(stream_name, instance_name, token, rows) | ||||
@@ -485,6 +486,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): | |||||
"rejection_status_changed": rejection_status_changed, | "rejection_status_changed": rejection_status_changed, | ||||
}, | }, | ||||
) | ) | ||||
txn.call_after(self.hs.get_notifier().on_new_replication_data) | |||||
class MainStateBackgroundUpdateStore(RoomMemberWorkerStore): | class MainStateBackgroundUpdateStore(RoomMemberWorkerStore): | ||||