You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1213 lines
42 KiB

  1. # Copyright 2018 New Vector Ltd
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the 'License');
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an 'AS IS' BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import datetime
  15. from typing import (
  16. Collection,
  17. Dict,
  18. FrozenSet,
  19. Iterable,
  20. List,
  21. Mapping,
  22. Set,
  23. Tuple,
  24. TypeVar,
  25. Union,
  26. cast,
  27. )
  28. import attr
  29. from parameterized import parameterized
  30. from twisted.test.proto_helpers import MemoryReactor
  31. from synapse.api.constants import EventTypes
  32. from synapse.api.room_versions import (
  33. KNOWN_ROOM_VERSIONS,
  34. EventFormatVersions,
  35. RoomVersion,
  36. )
  37. from synapse.events import EventBase, _EventInternalMetadata
  38. from synapse.rest import admin
  39. from synapse.rest.client import login, room
  40. from synapse.server import HomeServer
  41. from synapse.storage.database import LoggingTransaction
  42. from synapse.storage.types import Cursor
  43. from synapse.types import JsonDict
  44. from synapse.util import Clock, json_encoder
  45. import tests.unittest
  46. import tests.utils
  47. # The silly auth graph we use to test the auth difference algorithm,
  48. # where the top are the most recent events.
  49. #
  50. # A B
  51. # \ /
  52. # D E
  53. # \ |
  54. # ` F C
  55. # | /|
  56. # G ´ |
  57. # | \ |
  58. # H I
  59. # | |
  60. # K J
  61. AUTH_GRAPH: Dict[str, List[str]] = {
  62. "a": ["e"],
  63. "b": ["e"],
  64. "c": ["g", "i"],
  65. "d": ["f"],
  66. "e": ["f"],
  67. "f": ["g"],
  68. "g": ["h", "i"],
  69. "h": ["k"],
  70. "i": ["j"],
  71. "k": [],
  72. "j": [],
  73. }
  74. DEPTH_GRAPH = {
  75. "a": 7,
  76. "b": 7,
  77. "c": 4,
  78. "d": 6,
  79. "e": 6,
  80. "f": 5,
  81. "g": 3,
  82. "h": 2,
  83. "i": 2,
  84. "k": 1,
  85. "j": 1,
  86. }
  87. T = TypeVar("T")
  88. def get_all_topologically_sorted_orders(
  89. nodes: Iterable[T],
  90. graph: Mapping[T, Collection[T]],
  91. ) -> List[List[T]]:
  92. """Given a set of nodes and a graph, return all possible topological
  93. orderings.
  94. """
  95. # This is implemented by Kahn's algorithm, and forking execution each time
  96. # we have a choice over which node to consider next.
  97. degree_map = {node: 0 for node in nodes}
  98. reverse_graph: Dict[T, Set[T]] = {}
  99. for node, edges in graph.items():
  100. if node not in degree_map:
  101. continue
  102. for edge in set(edges):
  103. if edge in degree_map:
  104. degree_map[node] += 1
  105. reverse_graph.setdefault(edge, set()).add(node)
  106. reverse_graph.setdefault(node, set())
  107. zero_degree = [node for node, degree in degree_map.items() if degree == 0]
  108. return _get_all_topologically_sorted_orders_inner(
  109. reverse_graph, zero_degree, degree_map
  110. )
  111. def _get_all_topologically_sorted_orders_inner(
  112. reverse_graph: Dict[T, Set[T]],
  113. zero_degree: List[T],
  114. degree_map: Dict[T, int],
  115. ) -> List[List[T]]:
  116. new_paths = []
  117. # Rather than only choosing *one* item from the list of nodes with zero
  118. # degree, we "fork" execution and run the algorithm for each node in the
  119. # zero degree.
  120. for node in zero_degree:
  121. new_degree_map = degree_map.copy()
  122. new_zero_degree = zero_degree.copy()
  123. new_zero_degree.remove(node)
  124. for edge in reverse_graph.get(node, []):
  125. if edge in new_degree_map:
  126. new_degree_map[edge] -= 1
  127. if new_degree_map[edge] == 0:
  128. new_zero_degree.append(edge)
  129. paths = _get_all_topologically_sorted_orders_inner(
  130. reverse_graph, new_zero_degree, new_degree_map
  131. )
  132. for path in paths:
  133. path.insert(0, node)
  134. new_paths.extend(paths)
  135. if not new_paths:
  136. return [[]]
  137. return new_paths
  138. def get_all_topologically_consistent_subsets(
  139. nodes: Iterable[T],
  140. graph: Mapping[T, Collection[T]],
  141. ) -> Set[FrozenSet[T]]:
  142. """Get all subsets of the graph where if node N is in the subgraph, then all
  143. nodes that can reach that node (i.e. for all X there exists a path X -> N)
  144. are in the subgraph.
  145. """
  146. all_topological_orderings = get_all_topologically_sorted_orders(nodes, graph)
  147. graph_subsets = set()
  148. for ordering in all_topological_orderings:
  149. ordering.reverse()
  150. for idx in range(len(ordering)):
  151. graph_subsets.add(frozenset(ordering[:idx]))
  152. return graph_subsets
  153. @attr.s(auto_attribs=True, frozen=True, slots=True)
  154. class _BackfillSetupInfo:
  155. room_id: str
  156. depth_map: Dict[str, int]
  157. class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase):
  158. servlets = [
  159. admin.register_servlets,
  160. room.register_servlets,
  161. login.register_servlets,
  162. ]
  163. def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
  164. self.store = hs.get_datastores().main
  165. persist_events = hs.get_datastores().persist_events
  166. assert persist_events is not None
  167. self.persist_events = persist_events
  168. def test_get_prev_events_for_room(self) -> None:
  169. room_id = "@ROOM:local"
  170. # add a bunch of events and hashes to act as forward extremities
  171. def insert_event(txn: Cursor, i: int) -> None:
  172. event_id = "$event_%i:local" % i
  173. txn.execute(
  174. (
  175. "INSERT INTO events ("
  176. " room_id, event_id, type, depth, topological_ordering,"
  177. " content, processed, outlier, stream_ordering) "
  178. "VALUES (?, ?, 'm.test', ?, ?, 'test', ?, ?, ?)"
  179. ),
  180. (room_id, event_id, i, i, True, False, i),
  181. )
  182. txn.execute(
  183. (
  184. "INSERT INTO event_forward_extremities (room_id, event_id) "
  185. "VALUES (?, ?)"
  186. ),
  187. (room_id, event_id),
  188. )
  189. for i in range(20):
  190. self.get_success(
  191. self.store.db_pool.runInteraction("insert", insert_event, i)
  192. )
  193. # this should get the last ten
  194. r = self.get_success(self.store.get_prev_events_for_room(room_id))
  195. self.assertEqual(10, len(r))
  196. for i in range(10):
  197. self.assertEqual("$event_%i:local" % (19 - i), r[i])
  198. def test_get_rooms_with_many_extremities(self) -> None:
  199. room1 = "#room1"
  200. room2 = "#room2"
  201. room3 = "#room3"
  202. def insert_event(txn: LoggingTransaction, i: int, room_id: str) -> None:
  203. event_id = "$event_%i:local" % i
  204. # We need to insert into events table to get around the foreign key constraint.
  205. self.store.db_pool.simple_insert_txn(
  206. txn,
  207. table="events",
  208. values={
  209. "instance_name": "master",
  210. "stream_ordering": self.store._stream_id_gen.get_next_txn(txn),
  211. "topological_ordering": 1,
  212. "depth": 1,
  213. "event_id": event_id,
  214. "room_id": room_id,
  215. "type": EventTypes.Message,
  216. "processed": True,
  217. "outlier": False,
  218. "origin_server_ts": 0,
  219. "received_ts": 0,
  220. "sender": "@user:local",
  221. "contains_url": False,
  222. "state_key": None,
  223. "rejection_reason": None,
  224. },
  225. )
  226. txn.execute(
  227. (
  228. "INSERT INTO event_forward_extremities (room_id, event_id) "
  229. "VALUES (?, ?)"
  230. ),
  231. (room_id, event_id),
  232. )
  233. for i in range(20):
  234. self.get_success(
  235. self.store.db_pool.runInteraction("insert", insert_event, i, room1)
  236. )
  237. self.get_success(
  238. self.store.db_pool.runInteraction(
  239. "insert", insert_event, i + 100, room2
  240. )
  241. )
  242. self.get_success(
  243. self.store.db_pool.runInteraction(
  244. "insert", insert_event, i + 200, room3
  245. )
  246. )
  247. # Test simple case
  248. r = self.get_success(self.store.get_rooms_with_many_extremities(5, 5, []))
  249. self.assertEqual(len(r), 3)
  250. # Does filter work?
  251. r = self.get_success(self.store.get_rooms_with_many_extremities(5, 5, [room1]))
  252. self.assertTrue(room2 in r)
  253. self.assertTrue(room3 in r)
  254. self.assertEqual(len(r), 2)
  255. r = self.get_success(
  256. self.store.get_rooms_with_many_extremities(5, 5, [room1, room2])
  257. )
  258. self.assertEqual(r, [room3])
  259. # Does filter and limit work?
  260. r = self.get_success(self.store.get_rooms_with_many_extremities(5, 1, [room1]))
  261. self.assertTrue(r == [room2] or r == [room3])
  262. def _setup_auth_chain(self, use_chain_cover_index: bool) -> str:
  263. room_id = "@ROOM:local"
  264. # Mark the room as maybe having a cover index.
  265. def store_room(txn: LoggingTransaction) -> None:
  266. self.store.db_pool.simple_insert_txn(
  267. txn,
  268. "rooms",
  269. {
  270. "room_id": room_id,
  271. "creator": "room_creator_user_id",
  272. "is_public": True,
  273. "room_version": "6",
  274. "has_auth_chain_index": use_chain_cover_index,
  275. },
  276. )
  277. self.get_success(self.store.db_pool.runInteraction("store_room", store_room))
  278. # We rudely fiddle with the appropriate tables directly, as that's much
  279. # easier than constructing events properly.
  280. def insert_event(txn: LoggingTransaction) -> None:
  281. stream_ordering = 0
  282. for event_id in AUTH_GRAPH:
  283. stream_ordering += 1
  284. depth = DEPTH_GRAPH[event_id]
  285. self.store.db_pool.simple_insert_txn(
  286. txn,
  287. table="events",
  288. values={
  289. "event_id": event_id,
  290. "room_id": room_id,
  291. "depth": depth,
  292. "topological_ordering": depth,
  293. "type": "m.test",
  294. "processed": True,
  295. "outlier": False,
  296. "stream_ordering": stream_ordering,
  297. },
  298. )
  299. self.persist_events._persist_event_auth_chain_txn(
  300. txn,
  301. [
  302. cast(EventBase, FakeEvent(event_id, room_id, AUTH_GRAPH[event_id]))
  303. for event_id in AUTH_GRAPH
  304. ],
  305. )
  306. self.get_success(
  307. self.store.db_pool.runInteraction(
  308. "insert",
  309. insert_event,
  310. )
  311. )
  312. return room_id
  313. @parameterized.expand([(True,), (False,)])
  314. def test_auth_chain_ids(self, use_chain_cover_index: bool) -> None:
  315. room_id = self._setup_auth_chain(use_chain_cover_index)
  316. # a and b have the same auth chain.
  317. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["a"]))
  318. self.assertCountEqual(auth_chain_ids, ["e", "f", "g", "h", "i", "j", "k"])
  319. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["b"]))
  320. self.assertCountEqual(auth_chain_ids, ["e", "f", "g", "h", "i", "j", "k"])
  321. auth_chain_ids = self.get_success(
  322. self.store.get_auth_chain_ids(room_id, ["a", "b"])
  323. )
  324. self.assertCountEqual(auth_chain_ids, ["e", "f", "g", "h", "i", "j", "k"])
  325. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["c"]))
  326. self.assertCountEqual(auth_chain_ids, ["g", "h", "i", "j", "k"])
  327. # d and e have the same auth chain.
  328. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["d"]))
  329. self.assertCountEqual(auth_chain_ids, ["f", "g", "h", "i", "j", "k"])
  330. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["e"]))
  331. self.assertCountEqual(auth_chain_ids, ["f", "g", "h", "i", "j", "k"])
  332. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["f"]))
  333. self.assertCountEqual(auth_chain_ids, ["g", "h", "i", "j", "k"])
  334. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["g"]))
  335. self.assertCountEqual(auth_chain_ids, ["h", "i", "j", "k"])
  336. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["h"]))
  337. self.assertEqual(auth_chain_ids, {"k"})
  338. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["i"]))
  339. self.assertEqual(auth_chain_ids, {"j"})
  340. # j and k have no parents.
  341. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["j"]))
  342. self.assertEqual(auth_chain_ids, set())
  343. auth_chain_ids = self.get_success(self.store.get_auth_chain_ids(room_id, ["k"]))
  344. self.assertEqual(auth_chain_ids, set())
  345. # More complex input sequences.
  346. auth_chain_ids = self.get_success(
  347. self.store.get_auth_chain_ids(room_id, ["b", "c", "d"])
  348. )
  349. self.assertCountEqual(auth_chain_ids, ["e", "f", "g", "h", "i", "j", "k"])
  350. auth_chain_ids = self.get_success(
  351. self.store.get_auth_chain_ids(room_id, ["h", "i"])
  352. )
  353. self.assertCountEqual(auth_chain_ids, ["k", "j"])
  354. # e gets returned even though include_given is false, but it is in the
  355. # auth chain of b.
  356. auth_chain_ids = self.get_success(
  357. self.store.get_auth_chain_ids(room_id, ["b", "e"])
  358. )
  359. self.assertCountEqual(auth_chain_ids, ["e", "f", "g", "h", "i", "j", "k"])
  360. # Test include_given.
  361. auth_chain_ids = self.get_success(
  362. self.store.get_auth_chain_ids(room_id, ["i"], include_given=True)
  363. )
  364. self.assertCountEqual(auth_chain_ids, ["i", "j"])
  365. @parameterized.expand([(True,), (False,)])
  366. def test_auth_difference(self, use_chain_cover_index: bool) -> None:
  367. room_id = self._setup_auth_chain(use_chain_cover_index)
  368. # Now actually test that various combinations give the right result:
  369. self.assert_auth_diff_is_expected(room_id)
  370. @parameterized.expand(
  371. [
  372. [graph_subset]
  373. for graph_subset in get_all_topologically_consistent_subsets(
  374. AUTH_GRAPH, AUTH_GRAPH
  375. )
  376. ]
  377. )
  378. def test_auth_difference_partial(self, graph_subset: Collection[str]) -> None:
  379. """Test that if we only have a chain cover index on a partial subset of
  380. the room we still get the correct auth chain difference.
  381. We do this by removing the chain cover index for every valid subset of the
  382. graph.
  383. """
  384. room_id = self._setup_auth_chain(True)
  385. for event_id in graph_subset:
  386. # Remove chain cover from that event.
  387. self.get_success(
  388. self.store.db_pool.simple_delete(
  389. table="event_auth_chains",
  390. keyvalues={"event_id": event_id},
  391. desc="test_auth_difference_partial_remove",
  392. )
  393. )
  394. self.get_success(
  395. self.store.db_pool.simple_insert(
  396. table="event_auth_chain_to_calculate",
  397. values={
  398. "event_id": event_id,
  399. "room_id": room_id,
  400. "type": "",
  401. "state_key": "",
  402. },
  403. desc="test_auth_difference_partial_remove",
  404. )
  405. )
  406. self.assert_auth_diff_is_expected(room_id)
  407. def assert_auth_diff_is_expected(self, room_id: str) -> None:
  408. """Assert the auth chain difference returns the correct answers."""
  409. difference = self.get_success(
  410. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}])
  411. )
  412. self.assertSetEqual(difference, {"a", "b"})
  413. difference = self.get_success(
  414. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"c"}])
  415. )
  416. self.assertSetEqual(difference, {"a", "b", "c", "e", "f"})
  417. difference = self.get_success(
  418. self.store.get_auth_chain_difference(room_id, [{"a", "c"}, {"b"}])
  419. )
  420. self.assertSetEqual(difference, {"a", "b", "c"})
  421. difference = self.get_success(
  422. self.store.get_auth_chain_difference(room_id, [{"a", "c"}, {"b", "c"}])
  423. )
  424. self.assertSetEqual(difference, {"a", "b"})
  425. difference = self.get_success(
  426. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"d"}])
  427. )
  428. self.assertSetEqual(difference, {"a", "b", "d", "e"})
  429. difference = self.get_success(
  430. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"c"}, {"d"}])
  431. )
  432. self.assertSetEqual(difference, {"a", "b", "c", "d", "e", "f"})
  433. difference = self.get_success(
  434. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"e"}])
  435. )
  436. self.assertSetEqual(difference, {"a", "b"})
  437. difference = self.get_success(
  438. self.store.get_auth_chain_difference(room_id, [{"a"}])
  439. )
  440. self.assertSetEqual(difference, set())
  441. def test_auth_difference_partial_cover(self) -> None:
  442. """Test that we correctly handle rooms where not all events have a chain
  443. cover calculated. This can happen in some obscure edge cases, including
  444. during the background update that calculates the chain cover for old
  445. rooms.
  446. """
  447. room_id = "@ROOM:local"
  448. # The silly auth graph we use to test the auth difference algorithm,
  449. # where the top are the most recent events.
  450. #
  451. # A B
  452. # \ /
  453. # D E
  454. # \ |
  455. # ` F C
  456. # | /|
  457. # G ´ |
  458. # | \ |
  459. # H I
  460. # | |
  461. # K J
  462. auth_graph: Dict[str, List[str]] = {
  463. "a": ["e"],
  464. "b": ["e"],
  465. "c": ["g", "i"],
  466. "d": ["f"],
  467. "e": ["f"],
  468. "f": ["g"],
  469. "g": ["h", "i"],
  470. "h": ["k"],
  471. "i": ["j"],
  472. "k": [],
  473. "j": [],
  474. }
  475. depth_map = {
  476. "a": 7,
  477. "b": 7,
  478. "c": 4,
  479. "d": 6,
  480. "e": 6,
  481. "f": 5,
  482. "g": 3,
  483. "h": 2,
  484. "i": 2,
  485. "k": 1,
  486. "j": 1,
  487. }
  488. # We rudely fiddle with the appropriate tables directly, as that's much
  489. # easier than constructing events properly.
  490. def insert_event(txn: LoggingTransaction) -> None:
  491. # First insert the room and mark it as having a chain cover.
  492. self.store.db_pool.simple_insert_txn(
  493. txn,
  494. "rooms",
  495. {
  496. "room_id": room_id,
  497. "creator": "room_creator_user_id",
  498. "is_public": True,
  499. "room_version": "6",
  500. "has_auth_chain_index": True,
  501. },
  502. )
  503. stream_ordering = 0
  504. for event_id in auth_graph:
  505. stream_ordering += 1
  506. depth = depth_map[event_id]
  507. self.store.db_pool.simple_insert_txn(
  508. txn,
  509. table="events",
  510. values={
  511. "event_id": event_id,
  512. "room_id": room_id,
  513. "depth": depth,
  514. "topological_ordering": depth,
  515. "type": "m.test",
  516. "processed": True,
  517. "outlier": False,
  518. "stream_ordering": stream_ordering,
  519. },
  520. )
  521. # Insert all events apart from 'B'
  522. self.persist_events._persist_event_auth_chain_txn(
  523. txn,
  524. [
  525. cast(EventBase, FakeEvent(event_id, room_id, auth_graph[event_id]))
  526. for event_id in auth_graph
  527. if event_id != "b"
  528. ],
  529. )
  530. # Now we insert the event 'B' without a chain cover, by temporarily
  531. # pretending the room doesn't have a chain cover.
  532. self.store.db_pool.simple_update_txn(
  533. txn,
  534. table="rooms",
  535. keyvalues={"room_id": room_id},
  536. updatevalues={"has_auth_chain_index": False},
  537. )
  538. self.persist_events._persist_event_auth_chain_txn(
  539. txn,
  540. [cast(EventBase, FakeEvent("b", room_id, auth_graph["b"]))],
  541. )
  542. self.store.db_pool.simple_update_txn(
  543. txn,
  544. table="rooms",
  545. keyvalues={"room_id": room_id},
  546. updatevalues={"has_auth_chain_index": True},
  547. )
  548. self.get_success(
  549. self.store.db_pool.runInteraction(
  550. "insert",
  551. insert_event,
  552. )
  553. )
  554. # Now actually test that various combinations give the right result:
  555. difference = self.get_success(
  556. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}])
  557. )
  558. self.assertSetEqual(difference, {"a", "b"})
  559. difference = self.get_success(
  560. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"c"}])
  561. )
  562. self.assertSetEqual(difference, {"a", "b", "c", "e", "f"})
  563. difference = self.get_success(
  564. self.store.get_auth_chain_difference(room_id, [{"a", "c"}, {"b"}])
  565. )
  566. self.assertSetEqual(difference, {"a", "b", "c"})
  567. difference = self.get_success(
  568. self.store.get_auth_chain_difference(room_id, [{"a", "c"}, {"b", "c"}])
  569. )
  570. self.assertSetEqual(difference, {"a", "b"})
  571. difference = self.get_success(
  572. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"d"}])
  573. )
  574. self.assertSetEqual(difference, {"a", "b", "d", "e"})
  575. difference = self.get_success(
  576. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"c"}, {"d"}])
  577. )
  578. self.assertSetEqual(difference, {"a", "b", "c", "d", "e", "f"})
  579. difference = self.get_success(
  580. self.store.get_auth_chain_difference(room_id, [{"a"}, {"b"}, {"e"}])
  581. )
  582. self.assertSetEqual(difference, {"a", "b"})
  583. difference = self.get_success(
  584. self.store.get_auth_chain_difference(room_id, [{"a"}])
  585. )
  586. self.assertSetEqual(difference, set())
  587. @parameterized.expand(
  588. [(room_version,) for room_version in KNOWN_ROOM_VERSIONS.values()]
  589. )
  590. def test_prune_inbound_federation_queue(self, room_version: RoomVersion) -> None:
  591. """Test that pruning of inbound federation queues work"""
  592. room_id = "some_room_id"
  593. def prev_event_format(prev_event_id: str) -> Union[Tuple[str, dict], str]:
  594. """Account for differences in prev_events format across room versions"""
  595. if room_version.event_format == EventFormatVersions.ROOM_V1_V2:
  596. return prev_event_id, {}
  597. return prev_event_id
  598. # Insert a bunch of events that all reference the previous one.
  599. self.get_success(
  600. self.store.db_pool.simple_insert_many(
  601. table="federation_inbound_events_staging",
  602. keys=(
  603. "origin",
  604. "room_id",
  605. "received_ts",
  606. "event_id",
  607. "event_json",
  608. "internal_metadata",
  609. ),
  610. values=[
  611. (
  612. "some_origin",
  613. room_id,
  614. 0,
  615. f"$fake_event_id_{i + 1}",
  616. json_encoder.encode(
  617. {"prev_events": [prev_event_format(f"$fake_event_id_{i}")]}
  618. ),
  619. "{}",
  620. )
  621. for i in range(500)
  622. ],
  623. desc="test_prune_inbound_federation_queue",
  624. )
  625. )
  626. # Calling prune once should return True, i.e. a prune happen. The second
  627. # time it shouldn't.
  628. pruned = self.get_success(
  629. self.store.prune_staged_events_in_room(room_id, room_version)
  630. )
  631. self.assertTrue(pruned)
  632. pruned = self.get_success(
  633. self.store.prune_staged_events_in_room(room_id, room_version)
  634. )
  635. self.assertFalse(pruned)
  636. # Assert that we only have a single event left in the queue, and that it
  637. # is the last one.
  638. count = self.get_success(
  639. self.store.db_pool.simple_select_one_onecol(
  640. table="federation_inbound_events_staging",
  641. keyvalues={"room_id": room_id},
  642. retcol="COUNT(*)",
  643. desc="test_prune_inbound_federation_queue",
  644. )
  645. )
  646. self.assertEqual(count, 1)
  647. next_staged_event_info = self.get_success(
  648. self.store.get_next_staged_event_id_for_room(room_id)
  649. )
  650. assert next_staged_event_info
  651. _, event_id = next_staged_event_info
  652. self.assertEqual(event_id, "$fake_event_id_500")
  653. def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo:
  654. """
  655. Sets up a room with various events and backward extremities to test
  656. backfill functions against.
  657. Returns:
  658. _BackfillSetupInfo including the `room_id` to test against and
  659. `depth_map` of events in the room
  660. """
  661. room_id = "!backfill-room-test:some-host"
  662. # The silly graph we use to test grabbing backward extremities,
  663. # where the top is the oldest events.
  664. # 1 (oldest)
  665. # |
  666. # 2 ⹁
  667. # | \
  668. # | [b1, b2, b3]
  669. # | |
  670. # | A
  671. # | /
  672. # 3 {
  673. # | \
  674. # | [b4, b5, b6]
  675. # | |
  676. # | B
  677. # | /
  678. # 4 ´
  679. # |
  680. # 5 (newest)
  681. event_graph: Dict[str, List[str]] = {
  682. "1": [],
  683. "2": ["1"],
  684. "3": ["2", "A"],
  685. "4": ["3", "B"],
  686. "5": ["4"],
  687. "A": ["b1", "b2", "b3"],
  688. "b1": ["2"],
  689. "b2": ["2"],
  690. "b3": ["2"],
  691. "B": ["b4", "b5", "b6"],
  692. "b4": ["3"],
  693. "b5": ["3"],
  694. "b6": ["3"],
  695. }
  696. depth_map: Dict[str, int] = {
  697. "1": 1,
  698. "2": 2,
  699. "b1": 3,
  700. "b2": 3,
  701. "b3": 3,
  702. "A": 4,
  703. "3": 5,
  704. "b4": 6,
  705. "b5": 6,
  706. "b6": 6,
  707. "B": 7,
  708. "4": 8,
  709. "5": 9,
  710. }
  711. # The events we have persisted on our server.
  712. # The rest are events in the room but not backfilled tet.
  713. our_server_events = {"5", "4", "B", "3", "A"}
  714. complete_event_dict_map: Dict[str, JsonDict] = {}
  715. stream_ordering = 0
  716. for event_id, prev_event_ids in event_graph.items():
  717. depth = depth_map[event_id]
  718. complete_event_dict_map[event_id] = {
  719. "event_id": event_id,
  720. "type": "test_regular_type",
  721. "room_id": room_id,
  722. "sender": "@sender",
  723. "prev_event_ids": prev_event_ids,
  724. "auth_event_ids": [],
  725. "origin_server_ts": stream_ordering,
  726. "depth": depth,
  727. "stream_ordering": stream_ordering,
  728. "content": {"body": "event" + event_id},
  729. }
  730. stream_ordering += 1
  731. def populate_db(txn: LoggingTransaction) -> None:
  732. # Insert the room to satisfy the foreign key constraint of
  733. # `event_failed_pull_attempts`
  734. self.store.db_pool.simple_insert_txn(
  735. txn,
  736. "rooms",
  737. {
  738. "room_id": room_id,
  739. "creator": "room_creator_user_id",
  740. "is_public": True,
  741. "room_version": "6",
  742. },
  743. )
  744. # Insert our server events
  745. for event_id in our_server_events:
  746. event_dict = complete_event_dict_map[event_id]
  747. self.store.db_pool.simple_insert_txn(
  748. txn,
  749. table="events",
  750. values={
  751. "event_id": event_dict.get("event_id"),
  752. "type": event_dict.get("type"),
  753. "room_id": event_dict.get("room_id"),
  754. "depth": event_dict.get("depth"),
  755. "topological_ordering": event_dict.get("depth"),
  756. "stream_ordering": event_dict.get("stream_ordering"),
  757. "processed": True,
  758. "outlier": False,
  759. },
  760. )
  761. # Insert the event edges
  762. for event_id in our_server_events:
  763. for prev_event_id in event_graph[event_id]:
  764. self.store.db_pool.simple_insert_txn(
  765. txn,
  766. table="event_edges",
  767. values={
  768. "event_id": event_id,
  769. "prev_event_id": prev_event_id,
  770. "room_id": room_id,
  771. },
  772. )
  773. # Insert the backward extremities
  774. prev_events_of_our_events = {
  775. prev_event_id
  776. for our_server_event in our_server_events
  777. for prev_event_id in complete_event_dict_map[our_server_event][
  778. "prev_event_ids"
  779. ]
  780. }
  781. backward_extremities = prev_events_of_our_events - our_server_events
  782. for backward_extremity in backward_extremities:
  783. self.store.db_pool.simple_insert_txn(
  784. txn,
  785. table="event_backward_extremities",
  786. values={
  787. "event_id": backward_extremity,
  788. "room_id": room_id,
  789. },
  790. )
  791. self.get_success(
  792. self.store.db_pool.runInteraction(
  793. "_setup_room_for_backfill_tests_populate_db",
  794. populate_db,
  795. )
  796. )
  797. return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map)
  798. def test_get_backfill_points_in_room(self) -> None:
  799. """
  800. Test to make sure only backfill points that are older and come before
  801. the `current_depth` are returned.
  802. """
  803. setup_info = self._setup_room_for_backfill_tests()
  804. room_id = setup_info.room_id
  805. depth_map = setup_info.depth_map
  806. # Try at "B"
  807. backfill_points = self.get_success(
  808. self.store.get_backfill_points_in_room(room_id, depth_map["B"], limit=100)
  809. )
  810. backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
  811. self.assertEqual(backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"])
  812. # Try at "A"
  813. backfill_points = self.get_success(
  814. self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
  815. )
  816. backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
  817. # Event "2" has a depth of 2 but is not included here because we only
  818. # know the approximate depth of 5 from our event "3".
  819. self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"])
  820. def test_get_backfill_points_in_room_excludes_events_we_have_attempted(
  821. self,
  822. ) -> None:
  823. """
  824. Test to make sure that events we have attempted to backfill (and within
  825. backoff timeout duration) do not show up as an event to backfill again.
  826. """
  827. setup_info = self._setup_room_for_backfill_tests()
  828. room_id = setup_info.room_id
  829. depth_map = setup_info.depth_map
  830. # Record some attempts to backfill these events which will make
  831. # `get_backfill_points_in_room` exclude them because we
  832. # haven't passed the backoff interval.
  833. self.get_success(
  834. self.store.record_event_failed_pull_attempt(room_id, "b5", "fake cause")
  835. )
  836. self.get_success(
  837. self.store.record_event_failed_pull_attempt(room_id, "b4", "fake cause")
  838. )
  839. self.get_success(
  840. self.store.record_event_failed_pull_attempt(room_id, "b3", "fake cause")
  841. )
  842. self.get_success(
  843. self.store.record_event_failed_pull_attempt(room_id, "b2", "fake cause")
  844. )
  845. # No time has passed since we attempted to backfill ^
  846. # Try at "B"
  847. backfill_points = self.get_success(
  848. self.store.get_backfill_points_in_room(room_id, depth_map["B"], limit=100)
  849. )
  850. backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
  851. # Only the backfill points that we didn't record earlier exist here.
  852. self.assertEqual(backfill_event_ids, ["b6", "2", "b1"])
  853. def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duration(
  854. self,
  855. ) -> None:
  856. """
  857. Test to make sure after we fake attempt to backfill event "b3" many times,
  858. we can see retry and see the "b3" again after the backoff timeout duration
  859. has exceeded.
  860. """
  861. setup_info = self._setup_room_for_backfill_tests()
  862. room_id = setup_info.room_id
  863. depth_map = setup_info.depth_map
  864. # Record some attempts to backfill these events which will make
  865. # `get_backfill_points_in_room` exclude them because we
  866. # haven't passed the backoff interval.
  867. self.get_success(
  868. self.store.record_event_failed_pull_attempt(room_id, "b3", "fake cause")
  869. )
  870. self.get_success(
  871. self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
  872. )
  873. self.get_success(
  874. self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
  875. )
  876. self.get_success(
  877. self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
  878. )
  879. self.get_success(
  880. self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
  881. )
  882. # Now advance time by 2 hours and we should only be able to see "b3"
  883. # because we have waited long enough for the single attempt (2^1 hours)
  884. # but we still shouldn't see "b1" because we haven't waited long enough
  885. # for this many attempts. We didn't do anything to "b2" so it should be
  886. # visible regardless.
  887. self.reactor.advance(datetime.timedelta(hours=2).total_seconds())
  888. # Try at "A" and make sure that "b1" is not in the list because we've
  889. # already attempted many times
  890. backfill_points = self.get_success(
  891. self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
  892. )
  893. backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
  894. self.assertEqual(backfill_event_ids, ["b3", "b2"])
  895. # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and
  896. # see if we can now backfill it
  897. self.reactor.advance(datetime.timedelta(hours=20).total_seconds())
  898. # Try at "A" again after we advanced enough time and we should see "b3" again
  899. backfill_points = self.get_success(
  900. self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
  901. )
  902. backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
  903. self.assertEqual(backfill_event_ids, ["b3", "b2", "b1"])
  904. def test_get_backfill_points_in_room_works_after_many_failed_pull_attempts_that_could_naively_overflow(
  905. self,
  906. ) -> None:
  907. """
  908. A test that reproduces https://github.com/matrix-org/synapse/issues/13929 (Postgres only).
  909. Test to make sure we can still get backfill points after many failed pull
  910. attempts that cause us to backoff to the limit. Even if the backoff formula
  911. would tell us to wait for more seconds than can be expressed in a 32 bit
  912. signed int.
  913. """
  914. setup_info = self._setup_room_for_backfill_tests()
  915. room_id = setup_info.room_id
  916. depth_map = setup_info.depth_map
  917. # Pretend that we have tried and failed 10 times to backfill event b1.
  918. for _ in range(10):
  919. self.get_success(
  920. self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause")
  921. )
  922. # If the backoff periods grow without limit:
  923. # After the first failed attempt, we would have backed off for 1 << 1 = 2 hours.
  924. # After the second failed attempt we would have backed off for 1 << 2 = 4 hours,
  925. # so after the 10th failed attempt we should backoff for 1 << 10 == 1024 hours.
  926. # Wait 1100 hours just so we have a nice round number.
  927. self.reactor.advance(datetime.timedelta(hours=1100).total_seconds())
  928. # 1024 hours in milliseconds is 1024 * 3600000, which exceeds the largest 32 bit
  929. # signed integer. The bug we're reproducing is that this overflow causes an
  930. # error in postgres preventing us from fetching a set of backwards extremities
  931. # to retry fetching.
  932. backfill_points = self.get_success(
  933. self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100)
  934. )
  935. # We should aim to fetch all backoff points: b1's latest backoff period has
  936. # expired, and we haven't tried the rest.
  937. backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points]
  938. self.assertEqual(backfill_event_ids, ["b3", "b2", "b1"])
  939. def test_get_event_ids_with_failed_pull_attempts(self) -> None:
  940. """
  941. Test to make sure we properly get event_ids based on whether they have any
  942. failed pull attempts.
  943. """
  944. # Create the room
  945. user_id = self.register_user("alice", "test")
  946. tok = self.login("alice", "test")
  947. room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
  948. self.get_success(
  949. self.store.record_event_failed_pull_attempt(
  950. room_id, "$failed_event_id1", "fake cause"
  951. )
  952. )
  953. self.get_success(
  954. self.store.record_event_failed_pull_attempt(
  955. room_id, "$failed_event_id2", "fake cause"
  956. )
  957. )
  958. event_ids_with_failed_pull_attempts = self.get_success(
  959. self.store.get_event_ids_with_failed_pull_attempts(
  960. event_ids=[
  961. "$failed_event_id1",
  962. "$fresh_event_id1",
  963. "$failed_event_id2",
  964. "$fresh_event_id2",
  965. ]
  966. )
  967. )
  968. self.assertEqual(
  969. event_ids_with_failed_pull_attempts,
  970. {"$failed_event_id1", "$failed_event_id2"},
  971. )
  972. def test_get_event_ids_to_not_pull_from_backoff(self) -> None:
  973. """
  974. Test to make sure only event IDs we should backoff from are returned.
  975. """
  976. # Create the room
  977. user_id = self.register_user("alice", "test")
  978. tok = self.login("alice", "test")
  979. room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
  980. failure_time = self.clock.time_msec()
  981. self.get_success(
  982. self.store.record_event_failed_pull_attempt(
  983. room_id, "$failed_event_id", "fake cause"
  984. )
  985. )
  986. event_ids_with_backoff = self.get_success(
  987. self.store.get_event_ids_to_not_pull_from_backoff(
  988. room_id=room_id, event_ids=["$failed_event_id", "$normal_event_id"]
  989. )
  990. )
  991. self.assertEqual(
  992. event_ids_with_backoff,
  993. # We expect a 2^1 hour backoff after a single failed attempt.
  994. {"$failed_event_id": failure_time + 2 * 60 * 60 * 1000},
  995. )
  996. def test_get_event_ids_to_not_pull_from_backoff_retry_after_backoff_duration(
  997. self,
  998. ) -> None:
  999. """
  1000. Test to make sure no event IDs are returned after the backoff duration has
  1001. elapsed.
  1002. """
  1003. # Create the room
  1004. user_id = self.register_user("alice", "test")
  1005. tok = self.login("alice", "test")
  1006. room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
  1007. self.get_success(
  1008. self.store.record_event_failed_pull_attempt(
  1009. room_id, "$failed_event_id", "fake cause"
  1010. )
  1011. )
  1012. # Now advance time by 2 hours so we wait long enough for the single failed
  1013. # attempt (2^1 hours).
  1014. self.reactor.advance(datetime.timedelta(hours=2).total_seconds())
  1015. event_ids_with_backoff = self.get_success(
  1016. self.store.get_event_ids_to_not_pull_from_backoff(
  1017. room_id=room_id, event_ids=["$failed_event_id", "$normal_event_id"]
  1018. )
  1019. )
  1020. # Since this function only returns events we should backoff from, time has
  1021. # elapsed past the backoff range so there is no events to backoff from.
  1022. self.assertEqual(event_ids_with_backoff, {})
  1023. @attr.s(auto_attribs=True)
  1024. class FakeEvent:
  1025. event_id: str
  1026. room_id: str
  1027. auth_events: List[str]
  1028. type = "foo"
  1029. state_key = "foo"
  1030. internal_metadata = _EventInternalMetadata({})
  1031. def auth_event_ids(self) -> List[str]:
  1032. return self.auth_events
  1033. def is_state(self) -> bool:
  1034. return True