You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1142 lines
47 KiB

  1. # Copyright 2022 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Optional
  15. from unittest import mock
  16. from twisted.test.proto_helpers import MemoryReactor
  17. from synapse.api.errors import AuthError, StoreError
  18. from synapse.api.room_versions import RoomVersion
  19. from synapse.event_auth import (
  20. check_state_dependent_auth_rules,
  21. check_state_independent_auth_rules,
  22. )
  23. from synapse.events import make_event_from_dict
  24. from synapse.events.snapshot import EventContext
  25. from synapse.federation.transport.client import StateRequestResponse
  26. from synapse.logging.context import LoggingContext
  27. from synapse.rest import admin
  28. from synapse.rest.client import login, room
  29. from synapse.server import HomeServer
  30. from synapse.state import StateResolutionStore
  31. from synapse.state.v2 import _mainline_sort, _reverse_topological_power_sort
  32. from synapse.types import JsonDict
  33. from synapse.util import Clock
  34. from tests import unittest
  35. from tests.test_utils import event_injection
  36. class FederationEventHandlerTests(unittest.FederatingHomeserverTestCase):
  37. servlets = [
  38. admin.register_servlets,
  39. login.register_servlets,
  40. room.register_servlets,
  41. ]
  42. def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer:
  43. # mock out the federation transport client
  44. self.mock_federation_transport_client = mock.Mock(
  45. spec=["get_room_state_ids", "get_room_state", "get_event", "backfill"]
  46. )
  47. self.mock_federation_transport_client.get_room_state_ids = mock.AsyncMock()
  48. self.mock_federation_transport_client.get_room_state = mock.AsyncMock()
  49. self.mock_federation_transport_client.get_event = mock.AsyncMock()
  50. self.mock_federation_transport_client.backfill = mock.AsyncMock()
  51. return super().setup_test_homeserver(
  52. federation_transport_client=self.mock_federation_transport_client
  53. )
  54. def test_process_pulled_event_with_missing_state(self) -> None:
  55. """Ensure that we correctly handle pulled events with lots of missing state
  56. In this test, we pretend we are processing a "pulled" event (eg, via backfill
  57. or get_missing_events). The pulled event has a prev_event we haven't previously
  58. seen, so the server requests the state at that prev_event. There is a lot
  59. of state we don't have, so we expect the server to make a /state request.
  60. We check that the pulled event is correctly persisted, and that the state is
  61. as we expect.
  62. """
  63. return self._test_process_pulled_event_with_missing_state(False)
  64. def test_process_pulled_event_with_missing_state_where_prev_is_outlier(
  65. self,
  66. ) -> None:
  67. """Ensure that we correctly handle pulled events with lots of missing state
  68. A slight modification to test_process_pulled_event_with_missing_state. Again
  69. we have a "pulled" event which refers to a prev_event with lots of state,
  70. but in this case we already have the prev_event (as an outlier, obviously -
  71. if it were a regular event, we wouldn't need to request the state).
  72. """
  73. return self._test_process_pulled_event_with_missing_state(True)
  74. def _test_process_pulled_event_with_missing_state(
  75. self, prev_exists_as_outlier: bool
  76. ) -> None:
  77. OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
  78. main_store = self.hs.get_datastores().main
  79. state_storage_controller = self.hs.get_storage_controllers().state
  80. # create the room
  81. user_id = self.register_user("kermit", "test")
  82. tok = self.login("kermit", "test")
  83. room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
  84. room_version = self.get_success(main_store.get_room_version(room_id))
  85. # allow the remote user to send state events
  86. self.helper.send_state(
  87. room_id,
  88. "m.room.power_levels",
  89. {"events_default": 0, "state_default": 0},
  90. tok=tok,
  91. )
  92. # add the remote user to the room
  93. member_event = self.get_success(
  94. event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
  95. )
  96. initial_state_map = self.get_success(
  97. main_store.get_partial_current_state_ids(room_id)
  98. )
  99. auth_event_ids = [
  100. initial_state_map[("m.room.create", "")],
  101. initial_state_map[("m.room.power_levels", "")],
  102. member_event.event_id,
  103. ]
  104. # mock up a load of state events which we are missing
  105. state_events = [
  106. make_event_from_dict(
  107. self.add_hashes_and_signatures_from_other_server(
  108. {
  109. "type": "test_state_type",
  110. "state_key": f"state_{i}",
  111. "room_id": room_id,
  112. "sender": OTHER_USER,
  113. "prev_events": [member_event.event_id],
  114. "auth_events": auth_event_ids,
  115. "origin_server_ts": 1,
  116. "depth": 10,
  117. "content": {"body": f"state_{i}"},
  118. }
  119. ),
  120. room_version,
  121. )
  122. for i in range(1, 10)
  123. ]
  124. # this is the state that we are going to claim is active at the prev_event.
  125. state_at_prev_event = state_events + self.get_success(
  126. main_store.get_events_as_list(initial_state_map.values())
  127. )
  128. # mock up a prev event.
  129. # Depending on the test, we either persist this upfront (as an outlier),
  130. # or let the server request it.
  131. prev_event = make_event_from_dict(
  132. self.add_hashes_and_signatures_from_other_server(
  133. {
  134. "type": "test_regular_type",
  135. "room_id": room_id,
  136. "sender": OTHER_USER,
  137. "prev_events": [],
  138. "auth_events": auth_event_ids,
  139. "origin_server_ts": 1,
  140. "depth": 11,
  141. "content": {"body": "missing_prev"},
  142. }
  143. ),
  144. room_version,
  145. )
  146. if prev_exists_as_outlier:
  147. prev_event.internal_metadata.outlier = True
  148. persistence = self.hs.get_storage_controllers().persistence
  149. assert persistence is not None
  150. self.get_success(
  151. persistence.persist_event(
  152. prev_event,
  153. EventContext.for_outlier(self.hs.get_storage_controllers()),
  154. )
  155. )
  156. else:
  157. async def get_event(
  158. destination: str, event_id: str, timeout: Optional[int] = None
  159. ) -> JsonDict:
  160. self.assertEqual(destination, self.OTHER_SERVER_NAME)
  161. self.assertEqual(event_id, prev_event.event_id)
  162. return {"pdus": [prev_event.get_pdu_json()]}
  163. self.mock_federation_transport_client.get_event.side_effect = get_event
  164. # mock up a regular event to pass into _process_pulled_event
  165. pulled_event = make_event_from_dict(
  166. self.add_hashes_and_signatures_from_other_server(
  167. {
  168. "type": "test_regular_type",
  169. "room_id": room_id,
  170. "sender": OTHER_USER,
  171. "prev_events": [prev_event.event_id],
  172. "auth_events": auth_event_ids,
  173. "origin_server_ts": 1,
  174. "depth": 12,
  175. "content": {"body": "pulled"},
  176. }
  177. ),
  178. room_version,
  179. )
  180. # we expect an outbound request to /state_ids, so stub that out
  181. self.mock_federation_transport_client.get_room_state_ids.return_value = {
  182. "pdu_ids": [e.event_id for e in state_at_prev_event],
  183. "auth_chain_ids": [],
  184. }
  185. # we also expect an outbound request to /state
  186. self.mock_federation_transport_client.get_room_state.return_value = (
  187. StateRequestResponse(auth_events=[], state=state_at_prev_event)
  188. )
  189. # we have to bump the clock a bit, to keep the retry logic in
  190. # FederationClient.get_pdu happy
  191. self.reactor.advance(60000)
  192. # Finally, the call under test: send the pulled event into _process_pulled_event
  193. with LoggingContext("test"):
  194. self.get_success(
  195. self.hs.get_federation_event_handler()._process_pulled_event(
  196. self.OTHER_SERVER_NAME, pulled_event, backfilled=False
  197. )
  198. )
  199. # check that the event is correctly persisted
  200. persisted = self.get_success(main_store.get_event(pulled_event.event_id))
  201. self.assertIsNotNone(persisted, "pulled event was not persisted at all")
  202. self.assertFalse(
  203. persisted.internal_metadata.is_outlier(), "pulled event was an outlier"
  204. )
  205. # check that the state at that event is as expected
  206. state = self.get_success(
  207. state_storage_controller.get_state_ids_for_event(pulled_event.event_id)
  208. )
  209. expected_state = {
  210. (e.type, e.state_key): e.event_id for e in state_at_prev_event
  211. }
  212. self.assertEqual(state, expected_state)
  213. if prev_exists_as_outlier:
  214. self.mock_federation_transport_client.get_event.assert_not_called()
  215. def test_process_pulled_event_records_failed_backfill_attempts(
  216. self,
  217. ) -> None:
  218. """
  219. Test to make sure that failed backfill attempts for an event are
  220. recorded in the `event_failed_pull_attempts` table.
  221. In this test, we pretend we are processing a "pulled" event via
  222. backfill. The pulled event has a fake `prev_event` which our server has
  223. obviously never seen before so it attempts to request the state at that
  224. `prev_event` which expectedly fails because it's a fake event. Because
  225. the server can't fetch the state at the missing `prev_event`, the
  226. "pulled" event fails the history check and is fails to process.
  227. We check that we correctly record the number of failed pull attempts
  228. of the pulled event and as a sanity check, that the "pulled" event isn't
  229. persisted.
  230. """
  231. OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
  232. main_store = self.hs.get_datastores().main
  233. # Create the room
  234. user_id = self.register_user("kermit", "test")
  235. tok = self.login("kermit", "test")
  236. room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
  237. room_version = self.get_success(main_store.get_room_version(room_id))
  238. # We expect an outbound request to /state_ids, so stub that out
  239. self.mock_federation_transport_client.get_room_state_ids.return_value = {
  240. # Mimic the other server not knowing about the state at all.
  241. # We want to cause Synapse to throw an error (`Unable to get
  242. # missing prev_event $fake_prev_event`) and fail to backfill
  243. # the pulled event.
  244. "pdu_ids": [],
  245. "auth_chain_ids": [],
  246. }
  247. # We also expect an outbound request to /state
  248. self.mock_federation_transport_client.get_room_state.return_value = StateRequestResponse(
  249. # Mimic the other server not knowing about the state at all.
  250. # We want to cause Synapse to throw an error (`Unable to get
  251. # missing prev_event $fake_prev_event`) and fail to backfill
  252. # the pulled event.
  253. auth_events=[],
  254. state=[],
  255. )
  256. pulled_event = make_event_from_dict(
  257. self.add_hashes_and_signatures_from_other_server(
  258. {
  259. "type": "test_regular_type",
  260. "room_id": room_id,
  261. "sender": OTHER_USER,
  262. "prev_events": [
  263. # The fake prev event will make the pulled event fail
  264. # the history check (`Unable to get missing prev_event
  265. # $fake_prev_event`)
  266. "$fake_prev_event"
  267. ],
  268. "auth_events": [],
  269. "origin_server_ts": 1,
  270. "depth": 12,
  271. "content": {"body": "pulled"},
  272. }
  273. ),
  274. room_version,
  275. )
  276. # The function under test: try to process the pulled event
  277. with LoggingContext("test"):
  278. self.get_success(
  279. self.hs.get_federation_event_handler()._process_pulled_event(
  280. self.OTHER_SERVER_NAME, pulled_event, backfilled=True
  281. )
  282. )
  283. # Make sure our failed pull attempt was recorded
  284. backfill_num_attempts = self.get_success(
  285. main_store.db_pool.simple_select_one_onecol(
  286. table="event_failed_pull_attempts",
  287. keyvalues={"event_id": pulled_event.event_id},
  288. retcol="num_attempts",
  289. )
  290. )
  291. self.assertEqual(backfill_num_attempts, 1)
  292. # The function under test: try to process the pulled event again
  293. with LoggingContext("test"):
  294. self.get_success(
  295. self.hs.get_federation_event_handler()._process_pulled_event(
  296. self.OTHER_SERVER_NAME, pulled_event, backfilled=True
  297. )
  298. )
  299. # Make sure our second failed pull attempt was recorded (`num_attempts` was incremented)
  300. backfill_num_attempts = self.get_success(
  301. main_store.db_pool.simple_select_one_onecol(
  302. table="event_failed_pull_attempts",
  303. keyvalues={"event_id": pulled_event.event_id},
  304. retcol="num_attempts",
  305. )
  306. )
  307. self.assertEqual(backfill_num_attempts, 2)
  308. # And as a sanity check, make sure the event was not persisted through all of this.
  309. persisted = self.get_success(
  310. main_store.get_event(pulled_event.event_id, allow_none=True)
  311. )
  312. self.assertIsNone(
  313. persisted,
  314. "pulled event that fails the history check should not be persisted at all",
  315. )
  316. def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_persisted(
  317. self,
  318. ) -> None:
  319. """
  320. Test to make sure that failed pull attempts
  321. (`event_failed_pull_attempts` table) for an event are cleared after the
  322. event is successfully persisted.
  323. In this test, we pretend we are processing a "pulled" event via
  324. backfill. The pulled event succesfully processes and the backward
  325. extremeties are updated along with clearing out any failed pull attempts
  326. for those old extremities.
  327. We check that we correctly cleared failed pull attempts of the
  328. pulled event.
  329. """
  330. OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
  331. main_store = self.hs.get_datastores().main
  332. # Create the room
  333. user_id = self.register_user("kermit", "test")
  334. tok = self.login("kermit", "test")
  335. room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
  336. room_version = self.get_success(main_store.get_room_version(room_id))
  337. # allow the remote user to send state events
  338. self.helper.send_state(
  339. room_id,
  340. "m.room.power_levels",
  341. {"events_default": 0, "state_default": 0},
  342. tok=tok,
  343. )
  344. # add the remote user to the room
  345. member_event = self.get_success(
  346. event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
  347. )
  348. initial_state_map = self.get_success(
  349. main_store.get_partial_current_state_ids(room_id)
  350. )
  351. auth_event_ids = [
  352. initial_state_map[("m.room.create", "")],
  353. initial_state_map[("m.room.power_levels", "")],
  354. member_event.event_id,
  355. ]
  356. pulled_event = make_event_from_dict(
  357. self.add_hashes_and_signatures_from_other_server(
  358. {
  359. "type": "test_regular_type",
  360. "room_id": room_id,
  361. "sender": OTHER_USER,
  362. "prev_events": [member_event.event_id],
  363. "auth_events": auth_event_ids,
  364. "origin_server_ts": 1,
  365. "depth": 12,
  366. "content": {"body": "pulled"},
  367. }
  368. ),
  369. room_version,
  370. )
  371. # Fake the "pulled" event failing to backfill once so we can test
  372. # if it's cleared out later on.
  373. self.get_success(
  374. main_store.record_event_failed_pull_attempt(
  375. pulled_event.room_id, pulled_event.event_id, "fake cause"
  376. )
  377. )
  378. # Make sure we have a failed pull attempt recorded for the pulled event
  379. backfill_num_attempts = self.get_success(
  380. main_store.db_pool.simple_select_one_onecol(
  381. table="event_failed_pull_attempts",
  382. keyvalues={"event_id": pulled_event.event_id},
  383. retcol="num_attempts",
  384. )
  385. )
  386. self.assertEqual(backfill_num_attempts, 1)
  387. # The function under test: try to process the pulled event
  388. with LoggingContext("test"):
  389. self.get_success(
  390. self.hs.get_federation_event_handler()._process_pulled_event(
  391. self.OTHER_SERVER_NAME, pulled_event, backfilled=True
  392. )
  393. )
  394. # Make sure the failed pull attempts for the pulled event are cleared
  395. backfill_num_attempts = self.get_success(
  396. main_store.db_pool.simple_select_one_onecol(
  397. table="event_failed_pull_attempts",
  398. keyvalues={"event_id": pulled_event.event_id},
  399. retcol="num_attempts",
  400. allow_none=True,
  401. )
  402. )
  403. self.assertIsNone(backfill_num_attempts)
  404. # And as a sanity check, make sure the "pulled" event was persisted.
  405. persisted = self.get_success(
  406. main_store.get_event(pulled_event.event_id, allow_none=True)
  407. )
  408. self.assertIsNotNone(persisted, "pulled event was not persisted at all")
  409. def test_backfill_signature_failure_does_not_fetch_same_prev_event_later(
  410. self,
  411. ) -> None:
  412. """
  413. Test to make sure we backoff and don't try to fetch a missing prev_event when we
  414. already know it has a invalid signature from checking the signatures of all of
  415. the events in the backfill response.
  416. """
  417. OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
  418. main_store = self.hs.get_datastores().main
  419. # Create the room
  420. user_id = self.register_user("kermit", "test")
  421. tok = self.login("kermit", "test")
  422. room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
  423. room_version = self.get_success(main_store.get_room_version(room_id))
  424. # Allow the remote user to send state events
  425. self.helper.send_state(
  426. room_id,
  427. "m.room.power_levels",
  428. {"events_default": 0, "state_default": 0},
  429. tok=tok,
  430. )
  431. # Add the remote user to the room
  432. member_event = self.get_success(
  433. event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
  434. )
  435. initial_state_map = self.get_success(
  436. main_store.get_partial_current_state_ids(room_id)
  437. )
  438. auth_event_ids = [
  439. initial_state_map[("m.room.create", "")],
  440. initial_state_map[("m.room.power_levels", "")],
  441. member_event.event_id,
  442. ]
  443. # We purposely don't run `add_hashes_and_signatures_from_other_server`
  444. # over this because we want the signature check to fail.
  445. pulled_event_without_signatures = make_event_from_dict(
  446. {
  447. "type": "test_regular_type",
  448. "room_id": room_id,
  449. "sender": OTHER_USER,
  450. "prev_events": [member_event.event_id],
  451. "auth_events": auth_event_ids,
  452. "origin_server_ts": 1,
  453. "depth": 12,
  454. "content": {"body": "pulled_event_without_signatures"},
  455. },
  456. room_version,
  457. )
  458. # Create a regular event that should pass except for the
  459. # `pulled_event_without_signatures` in the `prev_event`.
  460. pulled_event = make_event_from_dict(
  461. self.add_hashes_and_signatures_from_other_server(
  462. {
  463. "type": "test_regular_type",
  464. "room_id": room_id,
  465. "sender": OTHER_USER,
  466. "prev_events": [
  467. member_event.event_id,
  468. pulled_event_without_signatures.event_id,
  469. ],
  470. "auth_events": auth_event_ids,
  471. "origin_server_ts": 1,
  472. "depth": 12,
  473. "content": {"body": "pulled_event"},
  474. }
  475. ),
  476. room_version,
  477. )
  478. # We expect an outbound request to /backfill, so stub that out
  479. self.mock_federation_transport_client.backfill.return_value = {
  480. "origin": self.OTHER_SERVER_NAME,
  481. "origin_server_ts": 123,
  482. "pdus": [
  483. # This is one of the important aspects of this test: we include
  484. # `pulled_event_without_signatures` so it fails the signature check
  485. # when we filter down the backfill response down to events which
  486. # have valid signatures in
  487. # `_check_sigs_and_hash_for_pulled_events_and_fetch`
  488. pulled_event_without_signatures.get_pdu_json(),
  489. # Then later when we process this valid signature event, when we
  490. # fetch the missing `prev_event`s, we want to make sure that we
  491. # backoff and don't try and fetch `pulled_event_without_signatures`
  492. # again since we know it just had an invalid signature.
  493. pulled_event.get_pdu_json(),
  494. ],
  495. }
  496. # Keep track of the count and make sure we don't make any of these requests
  497. event_endpoint_requested_count = 0
  498. room_state_ids_endpoint_requested_count = 0
  499. room_state_endpoint_requested_count = 0
  500. async def get_event(
  501. destination: str, event_id: str, timeout: Optional[int] = None
  502. ) -> None:
  503. nonlocal event_endpoint_requested_count
  504. event_endpoint_requested_count += 1
  505. async def get_room_state_ids(
  506. destination: str, room_id: str, event_id: str
  507. ) -> None:
  508. nonlocal room_state_ids_endpoint_requested_count
  509. room_state_ids_endpoint_requested_count += 1
  510. async def get_room_state(
  511. room_version: RoomVersion, destination: str, room_id: str, event_id: str
  512. ) -> None:
  513. nonlocal room_state_endpoint_requested_count
  514. room_state_endpoint_requested_count += 1
  515. # We don't expect an outbound request to `/event`, `/state_ids`, or `/state` in
  516. # the happy path but if the logic is sneaking around what we expect, stub that
  517. # out so we can detect that failure
  518. self.mock_federation_transport_client.get_event.side_effect = get_event
  519. self.mock_federation_transport_client.get_room_state_ids.side_effect = (
  520. get_room_state_ids
  521. )
  522. self.mock_federation_transport_client.get_room_state.side_effect = (
  523. get_room_state
  524. )
  525. # The function under test: try to backfill and process the pulled event
  526. with LoggingContext("test"):
  527. self.get_success(
  528. self.hs.get_federation_event_handler().backfill(
  529. self.OTHER_SERVER_NAME,
  530. room_id,
  531. limit=1,
  532. extremities=["$some_extremity"],
  533. )
  534. )
  535. if event_endpoint_requested_count > 0:
  536. self.fail(
  537. "We don't expect an outbound request to /event in the happy path but if "
  538. "the logic is sneaking around what we expect, make sure to fail the test. "
  539. "We don't expect it because the signature failure should cause us to backoff "
  540. "and not asking about pulled_event_without_signatures="
  541. f"{pulled_event_without_signatures.event_id} again"
  542. )
  543. if room_state_ids_endpoint_requested_count > 0:
  544. self.fail(
  545. "We don't expect an outbound request to /state_ids in the happy path but if "
  546. "the logic is sneaking around what we expect, make sure to fail the test. "
  547. "We don't expect it because the signature failure should cause us to backoff "
  548. "and not asking about pulled_event_without_signatures="
  549. f"{pulled_event_without_signatures.event_id} again"
  550. )
  551. if room_state_endpoint_requested_count > 0:
  552. self.fail(
  553. "We don't expect an outbound request to /state in the happy path but if "
  554. "the logic is sneaking around what we expect, make sure to fail the test. "
  555. "We don't expect it because the signature failure should cause us to backoff "
  556. "and not asking about pulled_event_without_signatures="
  557. f"{pulled_event_without_signatures.event_id} again"
  558. )
  559. # Make sure we only recorded a single failure which corresponds to the signature
  560. # failure initially in `_check_sigs_and_hash_for_pulled_events_and_fetch` before
  561. # we process all of the pulled events.
  562. backfill_num_attempts_for_event_without_signatures = self.get_success(
  563. main_store.db_pool.simple_select_one_onecol(
  564. table="event_failed_pull_attempts",
  565. keyvalues={"event_id": pulled_event_without_signatures.event_id},
  566. retcol="num_attempts",
  567. )
  568. )
  569. self.assertEqual(backfill_num_attempts_for_event_without_signatures, 1)
  570. # And make sure we didn't record a failure for the event that has the missing
  571. # prev_event because we don't want to cause a cascade of failures. Not being
  572. # able to fetch the `prev_events` just means we won't be able to de-outlier the
  573. # pulled event. But we can still use an `outlier` in the state/auth chain for
  574. # another event. So we shouldn't stop a downstream event from trying to pull it.
  575. self.get_failure(
  576. main_store.db_pool.simple_select_one_onecol(
  577. table="event_failed_pull_attempts",
  578. keyvalues={"event_id": pulled_event.event_id},
  579. retcol="num_attempts",
  580. ),
  581. # StoreError: 404: No row found
  582. StoreError,
  583. )
  584. def test_backfill_process_previously_failed_pull_attempt_event_in_the_background(
  585. self,
  586. ) -> None:
  587. """
  588. Sanity check that events are still processed even if it is in the background
  589. for events that already have failed pull attempts.
  590. """
  591. OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
  592. main_store = self.hs.get_datastores().main
  593. # Create the room
  594. user_id = self.register_user("kermit", "test")
  595. tok = self.login("kermit", "test")
  596. room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
  597. room_version = self.get_success(main_store.get_room_version(room_id))
  598. # Allow the remote user to send state events
  599. self.helper.send_state(
  600. room_id,
  601. "m.room.power_levels",
  602. {"events_default": 0, "state_default": 0},
  603. tok=tok,
  604. )
  605. # Add the remote user to the room
  606. member_event = self.get_success(
  607. event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
  608. )
  609. initial_state_map = self.get_success(
  610. main_store.get_partial_current_state_ids(room_id)
  611. )
  612. auth_event_ids = [
  613. initial_state_map[("m.room.create", "")],
  614. initial_state_map[("m.room.power_levels", "")],
  615. member_event.event_id,
  616. ]
  617. # Create a regular event that should process
  618. pulled_event = make_event_from_dict(
  619. self.add_hashes_and_signatures_from_other_server(
  620. {
  621. "type": "test_regular_type",
  622. "room_id": room_id,
  623. "sender": OTHER_USER,
  624. "prev_events": [
  625. member_event.event_id,
  626. ],
  627. "auth_events": auth_event_ids,
  628. "origin_server_ts": 1,
  629. "depth": 12,
  630. "content": {"body": "pulled_event"},
  631. }
  632. ),
  633. room_version,
  634. )
  635. # Record a failed pull attempt for this event which will cause us to backfill it
  636. # in the background from here on out.
  637. self.get_success(
  638. main_store.record_event_failed_pull_attempt(
  639. room_id, pulled_event.event_id, "fake cause"
  640. )
  641. )
  642. # We expect an outbound request to /backfill, so stub that out
  643. self.mock_federation_transport_client.backfill.return_value = {
  644. "origin": self.OTHER_SERVER_NAME,
  645. "origin_server_ts": 123,
  646. "pdus": [
  647. pulled_event.get_pdu_json(),
  648. ],
  649. }
  650. # The function under test: try to backfill and process the pulled event
  651. with LoggingContext("test"):
  652. self.get_success(
  653. self.hs.get_federation_event_handler().backfill(
  654. self.OTHER_SERVER_NAME,
  655. room_id,
  656. limit=1,
  657. extremities=["$some_extremity"],
  658. )
  659. )
  660. # Ensure `run_as_background_process(...)` has a chance to run (essentially
  661. # `wait_for_background_processes()`)
  662. self.reactor.pump((0.1,))
  663. # Make sure we processed and persisted the pulled event
  664. self.get_success(main_store.get_event(pulled_event.event_id, allow_none=False))
  665. def test_process_pulled_event_with_rejected_missing_state(self) -> None:
  666. """Ensure that we correctly handle pulled events with missing state containing a
  667. rejected state event
  668. In this test, we pretend we are processing a "pulled" event (eg, via backfill
  669. or get_missing_events). The pulled event has a prev_event we haven't previously
  670. seen, so the server requests the state at that prev_event. We expect the server
  671. to make a /state request.
  672. We simulate a remote server whose /state includes a rejected kick event for a
  673. local user. Notably, the kick event is rejected only because it cites a rejected
  674. auth event and would otherwise be accepted based on the room state. During state
  675. resolution, we re-run auth and can potentially introduce such rejected events
  676. into the state if we are not careful.
  677. We check that the pulled event is correctly persisted, and that the state
  678. afterwards does not include the rejected kick.
  679. """
  680. # The DAG we are testing looks like:
  681. #
  682. # ...
  683. # |
  684. # v
  685. # remote admin user joins
  686. # | |
  687. # +-------+ +-------+
  688. # | |
  689. # | rejected power levels
  690. # | from remote server
  691. # | |
  692. # | v
  693. # | rejected kick of local user
  694. # v from remote server
  695. # new power levels |
  696. # | v
  697. # | missing event
  698. # | from remote server
  699. # | |
  700. # +-------+ +-------+
  701. # | |
  702. # v v
  703. # pulled event
  704. # from remote server
  705. #
  706. # (arrows are in the opposite direction to prev_events.)
  707. OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
  708. main_store = self.hs.get_datastores().main
  709. # Create the room.
  710. kermit_user_id = self.register_user("kermit", "test")
  711. kermit_tok = self.login("kermit", "test")
  712. room_id = self.helper.create_room_as(
  713. room_creator=kermit_user_id, tok=kermit_tok
  714. )
  715. room_version = self.get_success(main_store.get_room_version(room_id))
  716. # Add another local user to the room. This user is going to be kicked in a
  717. # rejected event.
  718. bert_user_id = self.register_user("bert", "test")
  719. bert_tok = self.login("bert", "test")
  720. self.helper.join(room_id, user=bert_user_id, tok=bert_tok)
  721. # Allow the remote user to kick bert.
  722. # The remote user is going to send a rejected power levels event later on and we
  723. # need state resolution to order it before another power levels event kermit is
  724. # going to send later on. Hence we give both users the same power level, so that
  725. # ties are broken by `origin_server_ts`.
  726. self.helper.send_state(
  727. room_id,
  728. "m.room.power_levels",
  729. {"users": {kermit_user_id: 100, OTHER_USER: 100}},
  730. tok=kermit_tok,
  731. )
  732. # Add the remote user to the room.
  733. other_member_event = self.get_success(
  734. event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join")
  735. )
  736. initial_state_map = self.get_success(
  737. main_store.get_partial_current_state_ids(room_id)
  738. )
  739. create_event = self.get_success(
  740. main_store.get_event(initial_state_map[("m.room.create", "")])
  741. )
  742. bert_member_event = self.get_success(
  743. main_store.get_event(initial_state_map[("m.room.member", bert_user_id)])
  744. )
  745. power_levels_event = self.get_success(
  746. main_store.get_event(initial_state_map[("m.room.power_levels", "")])
  747. )
  748. # We now need a rejected state event that will fail
  749. # `check_state_independent_auth_rules` but pass
  750. # `check_state_dependent_auth_rules`.
  751. # First, we create a power levels event that we pretend the remote server has
  752. # accepted, but the local homeserver will reject.
  753. next_depth = 100
  754. next_timestamp = other_member_event.origin_server_ts + 100
  755. rejected_power_levels_event = make_event_from_dict(
  756. self.add_hashes_and_signatures_from_other_server(
  757. {
  758. "type": "m.room.power_levels",
  759. "state_key": "",
  760. "room_id": room_id,
  761. "sender": OTHER_USER,
  762. "prev_events": [other_member_event.event_id],
  763. "auth_events": [
  764. initial_state_map[("m.room.create", "")],
  765. initial_state_map[("m.room.power_levels", "")],
  766. # The event will be rejected because of the duplicated auth
  767. # event.
  768. other_member_event.event_id,
  769. other_member_event.event_id,
  770. ],
  771. "origin_server_ts": next_timestamp,
  772. "depth": next_depth,
  773. "content": power_levels_event.content,
  774. }
  775. ),
  776. room_version,
  777. )
  778. next_depth += 1
  779. next_timestamp += 100
  780. with LoggingContext("send_rejected_power_levels_event"):
  781. self.get_success(
  782. self.hs.get_federation_event_handler()._process_pulled_event(
  783. self.OTHER_SERVER_NAME,
  784. rejected_power_levels_event,
  785. backfilled=False,
  786. )
  787. )
  788. self.assertEqual(
  789. self.get_success(
  790. main_store.get_rejection_reason(
  791. rejected_power_levels_event.event_id
  792. )
  793. ),
  794. "auth_error",
  795. )
  796. # Then we create a kick event for a local user that cites the rejected power
  797. # levels event in its auth events. The kick event will be rejected solely
  798. # because of the rejected auth event and would otherwise be accepted.
  799. rejected_kick_event = make_event_from_dict(
  800. self.add_hashes_and_signatures_from_other_server(
  801. {
  802. "type": "m.room.member",
  803. "state_key": bert_user_id,
  804. "room_id": room_id,
  805. "sender": OTHER_USER,
  806. "prev_events": [rejected_power_levels_event.event_id],
  807. "auth_events": [
  808. initial_state_map[("m.room.create", "")],
  809. rejected_power_levels_event.event_id,
  810. initial_state_map[("m.room.member", bert_user_id)],
  811. initial_state_map[("m.room.member", OTHER_USER)],
  812. ],
  813. "origin_server_ts": next_timestamp,
  814. "depth": next_depth,
  815. "content": {"membership": "leave"},
  816. }
  817. ),
  818. room_version,
  819. )
  820. next_depth += 1
  821. next_timestamp += 100
  822. # The kick event must fail the state-independent auth rules, but pass the
  823. # state-dependent auth rules, so that it has a chance of making it through state
  824. # resolution.
  825. self.get_failure(
  826. check_state_independent_auth_rules(main_store, rejected_kick_event),
  827. AuthError,
  828. )
  829. check_state_dependent_auth_rules(
  830. rejected_kick_event,
  831. [create_event, power_levels_event, other_member_event, bert_member_event],
  832. )
  833. # The kick event must also win over the original member event during state
  834. # resolution.
  835. self.assertEqual(
  836. self.get_success(
  837. _mainline_sort(
  838. self.clock,
  839. room_id,
  840. event_ids=[
  841. bert_member_event.event_id,
  842. rejected_kick_event.event_id,
  843. ],
  844. resolved_power_event_id=power_levels_event.event_id,
  845. event_map={
  846. bert_member_event.event_id: bert_member_event,
  847. rejected_kick_event.event_id: rejected_kick_event,
  848. },
  849. state_res_store=StateResolutionStore(main_store),
  850. )
  851. ),
  852. [bert_member_event.event_id, rejected_kick_event.event_id],
  853. "The rejected kick event will not be applied after bert's join event "
  854. "during state resolution. The test setup is incorrect.",
  855. )
  856. with LoggingContext("send_rejected_kick_event"):
  857. self.get_success(
  858. self.hs.get_federation_event_handler()._process_pulled_event(
  859. self.OTHER_SERVER_NAME, rejected_kick_event, backfilled=False
  860. )
  861. )
  862. self.assertEqual(
  863. self.get_success(
  864. main_store.get_rejection_reason(rejected_kick_event.event_id)
  865. ),
  866. "auth_error",
  867. )
  868. # We need another power levels event which will win over the rejected one during
  869. # state resolution, otherwise we hit other issues where we end up with rejected
  870. # a power levels event during state resolution.
  871. self.reactor.advance(100) # ensure the `origin_server_ts` is larger
  872. new_power_levels_event = self.get_success(
  873. main_store.get_event(
  874. self.helper.send_state(
  875. room_id,
  876. "m.room.power_levels",
  877. {"users": {kermit_user_id: 100, OTHER_USER: 100, bert_user_id: 1}},
  878. tok=kermit_tok,
  879. )["event_id"]
  880. )
  881. )
  882. self.assertEqual(
  883. self.get_success(
  884. _reverse_topological_power_sort(
  885. self.clock,
  886. room_id,
  887. event_ids=[
  888. new_power_levels_event.event_id,
  889. rejected_power_levels_event.event_id,
  890. ],
  891. event_map={},
  892. state_res_store=StateResolutionStore(main_store),
  893. full_conflicted_set=set(),
  894. )
  895. ),
  896. [rejected_power_levels_event.event_id, new_power_levels_event.event_id],
  897. "The power levels events will not have the desired ordering during state "
  898. "resolution. The test setup is incorrect.",
  899. )
  900. # Create a missing event, so that the local homeserver has to do a `/state` or
  901. # `/state_ids` request to pull state from the remote homeserver.
  902. missing_event = make_event_from_dict(
  903. self.add_hashes_and_signatures_from_other_server(
  904. {
  905. "type": "m.room.message",
  906. "room_id": room_id,
  907. "sender": OTHER_USER,
  908. "prev_events": [rejected_kick_event.event_id],
  909. "auth_events": [
  910. initial_state_map[("m.room.create", "")],
  911. initial_state_map[("m.room.power_levels", "")],
  912. initial_state_map[("m.room.member", OTHER_USER)],
  913. ],
  914. "origin_server_ts": next_timestamp,
  915. "depth": next_depth,
  916. "content": {"msgtype": "m.text", "body": "foo"},
  917. }
  918. ),
  919. room_version,
  920. )
  921. next_depth += 1
  922. next_timestamp += 100
  923. # The pulled event has two prev events, one of which is missing. We will make a
  924. # `/state` or `/state_ids` request to the remote homeserver to ask it for the
  925. # state before the missing prev event.
  926. pulled_event = make_event_from_dict(
  927. self.add_hashes_and_signatures_from_other_server(
  928. {
  929. "type": "m.room.message",
  930. "room_id": room_id,
  931. "sender": OTHER_USER,
  932. "prev_events": [
  933. new_power_levels_event.event_id,
  934. missing_event.event_id,
  935. ],
  936. "auth_events": [
  937. initial_state_map[("m.room.create", "")],
  938. new_power_levels_event.event_id,
  939. initial_state_map[("m.room.member", OTHER_USER)],
  940. ],
  941. "origin_server_ts": next_timestamp,
  942. "depth": next_depth,
  943. "content": {"msgtype": "m.text", "body": "bar"},
  944. }
  945. ),
  946. room_version,
  947. )
  948. next_depth += 1
  949. next_timestamp += 100
  950. # Prepare the response for the `/state` or `/state_ids` request.
  951. # The remote server believes bert has been kicked, while the local server does
  952. # not.
  953. state_before_missing_event = self.get_success(
  954. main_store.get_events_as_list(initial_state_map.values())
  955. )
  956. state_before_missing_event = [
  957. event
  958. for event in state_before_missing_event
  959. if event.event_id != bert_member_event.event_id
  960. ]
  961. state_before_missing_event.append(rejected_kick_event)
  962. # We have to bump the clock a bit, to keep the retry logic in
  963. # `FederationClient.get_pdu` happy
  964. self.reactor.advance(60000)
  965. with LoggingContext("send_pulled_event"):
  966. async def get_event(
  967. destination: str, event_id: str, timeout: Optional[int] = None
  968. ) -> JsonDict:
  969. self.assertEqual(destination, self.OTHER_SERVER_NAME)
  970. self.assertEqual(event_id, missing_event.event_id)
  971. return {"pdus": [missing_event.get_pdu_json()]}
  972. async def get_room_state_ids(
  973. destination: str, room_id: str, event_id: str
  974. ) -> JsonDict:
  975. self.assertEqual(destination, self.OTHER_SERVER_NAME)
  976. self.assertEqual(event_id, missing_event.event_id)
  977. return {
  978. "pdu_ids": [event.event_id for event in state_before_missing_event],
  979. "auth_chain_ids": [],
  980. }
  981. async def get_room_state(
  982. room_version: RoomVersion, destination: str, room_id: str, event_id: str
  983. ) -> StateRequestResponse:
  984. self.assertEqual(destination, self.OTHER_SERVER_NAME)
  985. self.assertEqual(event_id, missing_event.event_id)
  986. return StateRequestResponse(
  987. state=state_before_missing_event,
  988. auth_events=[],
  989. )
  990. self.mock_federation_transport_client.get_event.side_effect = get_event
  991. self.mock_federation_transport_client.get_room_state_ids.side_effect = (
  992. get_room_state_ids
  993. )
  994. self.mock_federation_transport_client.get_room_state.side_effect = (
  995. get_room_state
  996. )
  997. self.get_success(
  998. self.hs.get_federation_event_handler()._process_pulled_event(
  999. self.OTHER_SERVER_NAME, pulled_event, backfilled=False
  1000. )
  1001. )
  1002. self.assertIsNone(
  1003. self.get_success(
  1004. main_store.get_rejection_reason(pulled_event.event_id)
  1005. ),
  1006. "Pulled event was unexpectedly rejected, likely due to a problem with "
  1007. "the test setup.",
  1008. )
  1009. self.assertEqual(
  1010. {pulled_event.event_id},
  1011. self.get_success(
  1012. main_store.have_events_in_timeline([pulled_event.event_id])
  1013. ),
  1014. "Pulled event was not persisted, likely due to a problem with the test "
  1015. "setup.",
  1016. )
  1017. # We must not accept rejected events into the room state, so we expect bert
  1018. # to not be kicked, even if the remote server believes so.
  1019. new_state_map = self.get_success(
  1020. main_store.get_partial_current_state_ids(room_id)
  1021. )
  1022. self.assertEqual(
  1023. new_state_map[("m.room.member", bert_user_id)],
  1024. bert_member_event.event_id,
  1025. "Rejected kick event unexpectedly became part of room state.",
  1026. )