您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 
 

1449 行
51 KiB

  1. # Copyright 2014-2016 OpenMarket Ltd
  2. # Copyright 2017 Vector Creations Ltd
  3. # Copyright 2018-2019 New Vector Ltd
  4. # Copyright 2019 The Matrix.org Foundation C.I.C.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. """ This module is responsible for getting events from the DB for pagination
  18. and event streaming.
  19. The order it returns events in depend on whether we are streaming forwards or
  20. are paginating backwards. We do this because we want to handle out of order
  21. messages nicely, while still returning them in the correct order when we
  22. paginate bacwards.
  23. This is implemented by keeping two ordering columns: stream_ordering and
  24. topological_ordering. Stream ordering is basically insertion/received order
  25. (except for events from backfill requests). The topological_ordering is a
  26. weak ordering of events based on the pdu graph.
  27. This means that we have to have two different types of tokens, depending on
  28. what sort order was used:
  29. - stream tokens are of the form: "s%d", which maps directly to the column
  30. - topological tokems: "t%d-%d", where the integers map to the topological
  31. and stream ordering columns respectively.
  32. """
  33. import logging
  34. from typing import (
  35. TYPE_CHECKING,
  36. Any,
  37. Collection,
  38. Dict,
  39. List,
  40. Optional,
  41. Set,
  42. Tuple,
  43. cast,
  44. overload,
  45. )
  46. import attr
  47. from frozendict import frozendict
  48. from typing_extensions import Literal
  49. from twisted.internet import defer
  50. from synapse.api.filtering import Filter
  51. from synapse.events import EventBase
  52. from synapse.logging.context import make_deferred_yieldable, run_in_background
  53. from synapse.logging.opentracing import trace
  54. from synapse.storage._base import SQLBaseStore
  55. from synapse.storage.database import (
  56. DatabasePool,
  57. LoggingDatabaseConnection,
  58. LoggingTransaction,
  59. make_in_list_sql_clause,
  60. )
  61. from synapse.storage.databases.main.events_worker import EventsWorkerStore
  62. from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine
  63. from synapse.storage.util.id_generators import MultiWriterIdGenerator
  64. from synapse.types import PersistedEventPosition, RoomStreamToken
  65. from synapse.util.caches.descriptors import cached
  66. from synapse.util.caches.stream_change_cache import StreamChangeCache
  67. from synapse.util.cancellation import cancellable
  68. if TYPE_CHECKING:
  69. from synapse.server import HomeServer
  70. logger = logging.getLogger(__name__)
  71. MAX_STREAM_SIZE = 1000
  72. _STREAM_TOKEN = "stream"
  73. _TOPOLOGICAL_TOKEN = "topological"
  74. # Used as return values for pagination APIs
  75. @attr.s(slots=True, frozen=True, auto_attribs=True)
  76. class _EventDictReturn:
  77. event_id: str
  78. topological_ordering: Optional[int]
  79. stream_ordering: int
  80. @attr.s(slots=True, frozen=True, auto_attribs=True)
  81. class _EventsAround:
  82. events_before: List[EventBase]
  83. events_after: List[EventBase]
  84. start: RoomStreamToken
  85. end: RoomStreamToken
  86. def generate_pagination_where_clause(
  87. direction: str,
  88. column_names: Tuple[str, str],
  89. from_token: Optional[Tuple[Optional[int], int]],
  90. to_token: Optional[Tuple[Optional[int], int]],
  91. engine: BaseDatabaseEngine,
  92. ) -> str:
  93. """Creates an SQL expression to bound the columns by the pagination
  94. tokens.
  95. For example creates an SQL expression like:
  96. (6, 7) >= (topological_ordering, stream_ordering)
  97. AND (5, 3) < (topological_ordering, stream_ordering)
  98. would be generated for dir=b, from_token=(6, 7) and to_token=(5, 3).
  99. Note that tokens are considered to be after the row they are in, e.g. if
  100. a row A has a token T, then we consider A to be before T. This convention
  101. is important when figuring out inequalities for the generated SQL, and
  102. produces the following result:
  103. - If paginating forwards then we exclude any rows matching the from
  104. token, but include those that match the to token.
  105. - If paginating backwards then we include any rows matching the from
  106. token, but include those that match the to token.
  107. Args:
  108. direction: Whether we're paginating backwards("b") or forwards ("f").
  109. column_names: The column names to bound. Must *not* be user defined as
  110. these get inserted directly into the SQL statement without escapes.
  111. from_token: The start point for the pagination. This is an exclusive
  112. minimum bound if direction is "f", and an inclusive maximum bound if
  113. direction is "b".
  114. to_token: The endpoint point for the pagination. This is an inclusive
  115. maximum bound if direction is "f", and an exclusive minimum bound if
  116. direction is "b".
  117. engine: The database engine to generate the clauses for
  118. Returns:
  119. The sql expression
  120. """
  121. assert direction in ("b", "f")
  122. where_clause = []
  123. if from_token:
  124. where_clause.append(
  125. _make_generic_sql_bound(
  126. bound=">=" if direction == "b" else "<",
  127. column_names=column_names,
  128. values=from_token,
  129. engine=engine,
  130. )
  131. )
  132. if to_token:
  133. where_clause.append(
  134. _make_generic_sql_bound(
  135. bound="<" if direction == "b" else ">=",
  136. column_names=column_names,
  137. values=to_token,
  138. engine=engine,
  139. )
  140. )
  141. return " AND ".join(where_clause)
  142. def _make_generic_sql_bound(
  143. bound: str,
  144. column_names: Tuple[str, str],
  145. values: Tuple[Optional[int], int],
  146. engine: BaseDatabaseEngine,
  147. ) -> str:
  148. """Create an SQL expression that bounds the given column names by the
  149. values, e.g. create the equivalent of `(1, 2) < (col1, col2)`.
  150. Only works with two columns.
  151. Older versions of SQLite don't support that syntax so we have to expand it
  152. out manually.
  153. Args:
  154. bound: The comparison operator to use. One of ">", "<", ">=",
  155. "<=", where the values are on the left and columns on the right.
  156. names: The column names. Must *not* be user defined
  157. as these get inserted directly into the SQL statement without
  158. escapes.
  159. values: The values to bound the columns by. If
  160. the first value is None then only creates a bound on the second
  161. column.
  162. engine: The database engine to generate the SQL for
  163. Returns:
  164. The SQL statement
  165. """
  166. assert bound in (">", "<", ">=", "<=")
  167. name1, name2 = column_names
  168. val1, val2 = values
  169. if val1 is None:
  170. val2 = int(val2)
  171. return "(%d %s %s)" % (val2, bound, name2)
  172. val1 = int(val1)
  173. val2 = int(val2)
  174. if isinstance(engine, PostgresEngine):
  175. # Postgres doesn't optimise ``(x < a) OR (x=a AND y<b)`` as well
  176. # as it optimises ``(x,y) < (a,b)`` on multicolumn indexes. So we
  177. # use the later form when running against postgres.
  178. return "((%d,%d) %s (%s,%s))" % (val1, val2, bound, name1, name2)
  179. # We want to generate queries of e.g. the form:
  180. #
  181. # (val1 < name1 OR (val1 = name1 AND val2 <= name2))
  182. #
  183. # which is equivalent to (val1, val2) < (name1, name2)
  184. return """(
  185. {val1:d} {strict_bound} {name1}
  186. OR ({val1:d} = {name1} AND {val2:d} {bound} {name2})
  187. )""".format(
  188. name1=name1,
  189. val1=val1,
  190. name2=name2,
  191. val2=val2,
  192. strict_bound=bound[0], # The first bound must always be strict equality here
  193. bound=bound,
  194. )
  195. def _filter_results(
  196. lower_token: Optional[RoomStreamToken],
  197. upper_token: Optional[RoomStreamToken],
  198. instance_name: str,
  199. topological_ordering: int,
  200. stream_ordering: int,
  201. ) -> bool:
  202. """Returns True if the event persisted by the given instance at the given
  203. topological/stream_ordering falls between the two tokens (taking a None
  204. token to mean unbounded).
  205. Used to filter results from fetching events in the DB against the given
  206. tokens. This is necessary to handle the case where the tokens include
  207. position maps, which we handle by fetching more than necessary from the DB
  208. and then filtering (rather than attempting to construct a complicated SQL
  209. query).
  210. """
  211. event_historical_tuple = (
  212. topological_ordering,
  213. stream_ordering,
  214. )
  215. if lower_token:
  216. if lower_token.topological is not None:
  217. # If these are historical tokens we compare the `(topological, stream)`
  218. # tuples.
  219. if event_historical_tuple <= lower_token.as_historical_tuple():
  220. return False
  221. else:
  222. # If these are live tokens we compare the stream ordering against the
  223. # writers stream position.
  224. if stream_ordering <= lower_token.get_stream_pos_for_instance(
  225. instance_name
  226. ):
  227. return False
  228. if upper_token:
  229. if upper_token.topological is not None:
  230. if upper_token.as_historical_tuple() < event_historical_tuple:
  231. return False
  232. else:
  233. if upper_token.get_stream_pos_for_instance(instance_name) < stream_ordering:
  234. return False
  235. return True
  236. def filter_to_clause(event_filter: Optional[Filter]) -> Tuple[str, List[str]]:
  237. # NB: This may create SQL clauses that don't optimise well (and we don't
  238. # have indices on all possible clauses). E.g. it may create
  239. # "room_id == X AND room_id != X", which postgres doesn't optimise.
  240. if not event_filter:
  241. return "", []
  242. clauses = []
  243. args = []
  244. if event_filter.types:
  245. clauses.append(
  246. "(%s)" % " OR ".join("event.type = ?" for _ in event_filter.types)
  247. )
  248. args.extend(event_filter.types)
  249. for typ in event_filter.not_types:
  250. clauses.append("event.type != ?")
  251. args.append(typ)
  252. if event_filter.senders:
  253. clauses.append(
  254. "(%s)" % " OR ".join("event.sender = ?" for _ in event_filter.senders)
  255. )
  256. args.extend(event_filter.senders)
  257. for sender in event_filter.not_senders:
  258. clauses.append("event.sender != ?")
  259. args.append(sender)
  260. if event_filter.rooms:
  261. clauses.append(
  262. "(%s)" % " OR ".join("event.room_id = ?" for _ in event_filter.rooms)
  263. )
  264. args.extend(event_filter.rooms)
  265. for room_id in event_filter.not_rooms:
  266. clauses.append("event.room_id != ?")
  267. args.append(room_id)
  268. if event_filter.contains_url:
  269. clauses.append("event.contains_url = ?")
  270. args.append(event_filter.contains_url)
  271. # We're only applying the "labels" filter on the database query, because applying the
  272. # "not_labels" filter via a SQL query is non-trivial. Instead, we let
  273. # event_filter.check_fields apply it, which is not as efficient but makes the
  274. # implementation simpler.
  275. if event_filter.labels:
  276. clauses.append("(%s)" % " OR ".join("label = ?" for _ in event_filter.labels))
  277. args.extend(event_filter.labels)
  278. # Filter on relation_senders / relation types from the joined tables.
  279. if event_filter.related_by_senders:
  280. clauses.append(
  281. "(%s)"
  282. % " OR ".join(
  283. "related_event.sender = ?" for _ in event_filter.related_by_senders
  284. )
  285. )
  286. args.extend(event_filter.related_by_senders)
  287. if event_filter.related_by_rel_types:
  288. clauses.append(
  289. "(%s)"
  290. % " OR ".join(
  291. "relation_type = ?" for _ in event_filter.related_by_rel_types
  292. )
  293. )
  294. args.extend(event_filter.related_by_rel_types)
  295. return " AND ".join(clauses), args
  296. class StreamWorkerStore(EventsWorkerStore, SQLBaseStore):
  297. def __init__(
  298. self,
  299. database: DatabasePool,
  300. db_conn: LoggingDatabaseConnection,
  301. hs: "HomeServer",
  302. ):
  303. super().__init__(database, db_conn, hs)
  304. self._instance_name = hs.get_instance_name()
  305. self._send_federation = hs.should_send_federation()
  306. self._federation_shard_config = hs.config.worker.federation_shard_config
  307. # If we're a process that sends federation we may need to reset the
  308. # `federation_stream_position` table to match the current sharding
  309. # config. We don't do this now as otherwise two processes could conflict
  310. # during startup which would cause one to die.
  311. self._need_to_reset_federation_stream_positions = self._send_federation
  312. events_max = self.get_room_max_stream_ordering()
  313. event_cache_prefill, min_event_val = self.db_pool.get_cache_dict(
  314. db_conn,
  315. "events",
  316. entity_column="room_id",
  317. stream_column="stream_ordering",
  318. max_value=events_max,
  319. )
  320. self._events_stream_cache = StreamChangeCache(
  321. "EventsRoomStreamChangeCache",
  322. min_event_val,
  323. prefilled_cache=event_cache_prefill,
  324. )
  325. self._membership_stream_cache = StreamChangeCache(
  326. "MembershipStreamChangeCache", events_max
  327. )
  328. self._stream_order_on_start = self.get_room_max_stream_ordering()
  329. def get_room_max_stream_ordering(self) -> int:
  330. """Get the stream_ordering of regular events that we have committed up to
  331. Returns the maximum stream id such that all stream ids less than or
  332. equal to it have been successfully persisted.
  333. """
  334. return self._stream_id_gen.get_current_token()
  335. def get_room_min_stream_ordering(self) -> int:
  336. """Get the stream_ordering of backfilled events that we have committed up to
  337. Backfilled events use *negative* stream orderings, so this returns the
  338. minimum negative stream id such that all stream ids greater than or
  339. equal to it have been successfully persisted.
  340. """
  341. return self._backfill_id_gen.get_current_token()
  342. def get_room_max_token(self) -> RoomStreamToken:
  343. """Get a `RoomStreamToken` that marks the current maximum persisted
  344. position of the events stream. Useful to get a token that represents
  345. "now".
  346. The token returned is a "live" token that may have an instance_map
  347. component.
  348. """
  349. min_pos = self._stream_id_gen.get_current_token()
  350. positions = {}
  351. if isinstance(self._stream_id_gen, MultiWriterIdGenerator):
  352. # The `min_pos` is the minimum position that we know all instances
  353. # have finished persisting to, so we only care about instances whose
  354. # positions are ahead of that. (Instance positions can be behind the
  355. # min position as there are times we can work out that the minimum
  356. # position is ahead of the naive minimum across all current
  357. # positions. See MultiWriterIdGenerator for details)
  358. positions = {
  359. i: p
  360. for i, p in self._stream_id_gen.get_positions().items()
  361. if p > min_pos
  362. }
  363. return RoomStreamToken(None, min_pos, frozendict(positions))
  364. async def get_room_events_stream_for_rooms(
  365. self,
  366. room_ids: Collection[str],
  367. from_key: RoomStreamToken,
  368. to_key: RoomStreamToken,
  369. limit: int = 0,
  370. order: str = "DESC",
  371. ) -> Dict[str, Tuple[List[EventBase], RoomStreamToken]]:
  372. """Get new room events in stream ordering since `from_key`.
  373. Args:
  374. room_ids
  375. from_key: Token from which no events are returned before
  376. to_key: Token from which no events are returned after. (This
  377. is typically the current stream token)
  378. limit: Maximum number of events to return
  379. order: Either "DESC" or "ASC". Determines which events are
  380. returned when the result is limited. If "DESC" then the most
  381. recent `limit` events are returned, otherwise returns the
  382. oldest `limit` events.
  383. Returns:
  384. A map from room id to a tuple containing:
  385. - list of recent events in the room
  386. - stream ordering key for the start of the chunk of events returned.
  387. """
  388. room_ids = self._events_stream_cache.get_entities_changed(
  389. room_ids, from_key.stream
  390. )
  391. if not room_ids:
  392. return {}
  393. results = {}
  394. room_ids = list(room_ids)
  395. for rm_ids in (room_ids[i : i + 20] for i in range(0, len(room_ids), 20)):
  396. res = await make_deferred_yieldable(
  397. defer.gatherResults(
  398. [
  399. run_in_background(
  400. self.get_room_events_stream_for_room,
  401. room_id,
  402. from_key,
  403. to_key,
  404. limit,
  405. order=order,
  406. )
  407. for room_id in rm_ids
  408. ],
  409. consumeErrors=True,
  410. )
  411. )
  412. results.update(dict(zip(rm_ids, res)))
  413. return results
  414. def get_rooms_that_changed(
  415. self, room_ids: Collection[str], from_key: RoomStreamToken
  416. ) -> Set[str]:
  417. """Given a list of rooms and a token, return rooms where there may have
  418. been changes.
  419. """
  420. from_id = from_key.stream
  421. return {
  422. room_id
  423. for room_id in room_ids
  424. if self._events_stream_cache.has_entity_changed(room_id, from_id)
  425. }
  426. async def get_room_events_stream_for_room(
  427. self,
  428. room_id: str,
  429. from_key: RoomStreamToken,
  430. to_key: RoomStreamToken,
  431. limit: int = 0,
  432. order: str = "DESC",
  433. ) -> Tuple[List[EventBase], RoomStreamToken]:
  434. """Get new room events in stream ordering since `from_key`.
  435. Args:
  436. room_id
  437. from_key: Token from which no events are returned before
  438. to_key: Token from which no events are returned after. (This
  439. is typically the current stream token)
  440. limit: Maximum number of events to return
  441. order: Either "DESC" or "ASC". Determines which events are
  442. returned when the result is limited. If "DESC" then the most
  443. recent `limit` events are returned, otherwise returns the
  444. oldest `limit` events.
  445. Returns:
  446. The list of events (in ascending stream order) and the token from the start
  447. of the chunk of events returned.
  448. """
  449. if from_key == to_key:
  450. return [], from_key
  451. has_changed = self._events_stream_cache.has_entity_changed(
  452. room_id, from_key.stream
  453. )
  454. if not has_changed:
  455. return [], from_key
  456. def f(txn: LoggingTransaction) -> List[_EventDictReturn]:
  457. # To handle tokens with a non-empty instance_map we fetch more
  458. # results than necessary and then filter down
  459. min_from_id = from_key.stream
  460. max_to_id = to_key.get_max_stream_pos()
  461. sql = """
  462. SELECT event_id, instance_name, topological_ordering, stream_ordering
  463. FROM events
  464. WHERE
  465. room_id = ?
  466. AND not outlier
  467. AND stream_ordering > ? AND stream_ordering <= ?
  468. ORDER BY stream_ordering %s LIMIT ?
  469. """ % (
  470. order,
  471. )
  472. txn.execute(sql, (room_id, min_from_id, max_to_id, 2 * limit))
  473. rows = [
  474. _EventDictReturn(event_id, None, stream_ordering)
  475. for event_id, instance_name, topological_ordering, stream_ordering in txn
  476. if _filter_results(
  477. from_key,
  478. to_key,
  479. instance_name,
  480. topological_ordering,
  481. stream_ordering,
  482. )
  483. ][:limit]
  484. return rows
  485. rows = await self.db_pool.runInteraction("get_room_events_stream_for_room", f)
  486. ret = await self.get_events_as_list(
  487. [r.event_id for r in rows], get_prev_content=True
  488. )
  489. self._set_before_and_after(ret, rows, topo_order=False)
  490. if order.lower() == "desc":
  491. ret.reverse()
  492. if rows:
  493. key = RoomStreamToken(None, min(r.stream_ordering for r in rows))
  494. else:
  495. # Assume we didn't get anything because there was nothing to
  496. # get.
  497. key = from_key
  498. return ret, key
  499. @cancellable
  500. async def get_membership_changes_for_user(
  501. self,
  502. user_id: str,
  503. from_key: RoomStreamToken,
  504. to_key: RoomStreamToken,
  505. excluded_rooms: Optional[List[str]] = None,
  506. ) -> List[EventBase]:
  507. """Fetch membership events for a given user.
  508. All such events whose stream ordering `s` lies in the range
  509. `from_key < s <= to_key` are returned. Events are ordered by ascending stream
  510. order.
  511. """
  512. # Start by ruling out cases where a DB query is not necessary.
  513. if from_key == to_key:
  514. return []
  515. if from_key:
  516. has_changed = self._membership_stream_cache.has_entity_changed(
  517. user_id, int(from_key.stream)
  518. )
  519. if not has_changed:
  520. return []
  521. def f(txn: LoggingTransaction) -> List[_EventDictReturn]:
  522. # To handle tokens with a non-empty instance_map we fetch more
  523. # results than necessary and then filter down
  524. min_from_id = from_key.stream
  525. max_to_id = to_key.get_max_stream_pos()
  526. args: List[Any] = [user_id, min_from_id, max_to_id]
  527. ignore_room_clause = ""
  528. if excluded_rooms is not None and len(excluded_rooms) > 0:
  529. ignore_room_clause = "AND e.room_id NOT IN (%s)" % ",".join(
  530. "?" for _ in excluded_rooms
  531. )
  532. args = args + excluded_rooms
  533. sql = """
  534. SELECT m.event_id, instance_name, topological_ordering, stream_ordering
  535. FROM events AS e, room_memberships AS m
  536. WHERE e.event_id = m.event_id
  537. AND m.user_id = ?
  538. AND e.stream_ordering > ? AND e.stream_ordering <= ?
  539. %s
  540. ORDER BY e.stream_ordering ASC
  541. """ % (
  542. ignore_room_clause,
  543. )
  544. txn.execute(sql, args)
  545. rows = [
  546. _EventDictReturn(event_id, None, stream_ordering)
  547. for event_id, instance_name, topological_ordering, stream_ordering in txn
  548. if _filter_results(
  549. from_key,
  550. to_key,
  551. instance_name,
  552. topological_ordering,
  553. stream_ordering,
  554. )
  555. ]
  556. return rows
  557. rows = await self.db_pool.runInteraction("get_membership_changes_for_user", f)
  558. ret = await self.get_events_as_list(
  559. [r.event_id for r in rows], get_prev_content=True
  560. )
  561. self._set_before_and_after(ret, rows, topo_order=False)
  562. return ret
  563. async def get_recent_events_for_room(
  564. self, room_id: str, limit: int, end_token: RoomStreamToken
  565. ) -> Tuple[List[EventBase], RoomStreamToken]:
  566. """Get the most recent events in the room in topological ordering.
  567. Args:
  568. room_id
  569. limit
  570. end_token: The stream token representing now.
  571. Returns:
  572. A list of events and a token pointing to the start of the returned
  573. events. The events returned are in ascending topological order.
  574. """
  575. rows, token = await self.get_recent_event_ids_for_room(
  576. room_id, limit, end_token
  577. )
  578. events = await self.get_events_as_list(
  579. [r.event_id for r in rows], get_prev_content=True
  580. )
  581. self._set_before_and_after(events, rows)
  582. return events, token
  583. async def get_recent_event_ids_for_room(
  584. self, room_id: str, limit: int, end_token: RoomStreamToken
  585. ) -> Tuple[List[_EventDictReturn], RoomStreamToken]:
  586. """Get the most recent events in the room in topological ordering.
  587. Args:
  588. room_id
  589. limit
  590. end_token: The stream token representing now.
  591. Returns:
  592. A list of _EventDictReturn and a token pointing to the start of the
  593. returned events. The events returned are in ascending order.
  594. """
  595. # Allow a zero limit here, and no-op.
  596. if limit == 0:
  597. return [], end_token
  598. rows, token = await self.db_pool.runInteraction(
  599. "get_recent_event_ids_for_room",
  600. self._paginate_room_events_txn,
  601. room_id,
  602. from_token=end_token,
  603. limit=limit,
  604. )
  605. # We want to return the results in ascending order.
  606. rows.reverse()
  607. return rows, token
  608. async def get_room_event_before_stream_ordering(
  609. self, room_id: str, stream_ordering: int
  610. ) -> Optional[Tuple[int, int, str]]:
  611. """Gets details of the first event in a room at or before a stream ordering
  612. Args:
  613. room_id:
  614. stream_ordering:
  615. Returns:
  616. A tuple of (stream ordering, topological ordering, event_id)
  617. """
  618. def _f(txn: LoggingTransaction) -> Optional[Tuple[int, int, str]]:
  619. sql = """
  620. SELECT stream_ordering, topological_ordering, event_id
  621. FROM events
  622. LEFT JOIN rejections USING (event_id)
  623. WHERE room_id = ?
  624. AND stream_ordering <= ?
  625. AND NOT outlier
  626. AND rejections.event_id IS NULL
  627. ORDER BY stream_ordering DESC
  628. LIMIT 1
  629. """
  630. txn.execute(sql, (room_id, stream_ordering))
  631. return cast(Optional[Tuple[int, int, str]], txn.fetchone())
  632. return await self.db_pool.runInteraction(
  633. "get_room_event_before_stream_ordering", _f
  634. )
  635. async def get_last_event_in_room_before_stream_ordering(
  636. self,
  637. room_id: str,
  638. end_token: RoomStreamToken,
  639. ) -> Optional[str]:
  640. """Returns the ID of the last event in a room at or before a stream ordering
  641. Args:
  642. room_id
  643. end_token: The token used to stream from
  644. Returns:
  645. The ID of the most recent event, or None if there are no events in the room
  646. before this stream ordering.
  647. """
  648. last_row = await self.get_room_event_before_stream_ordering(
  649. room_id=room_id,
  650. stream_ordering=end_token.stream,
  651. )
  652. if last_row:
  653. return last_row[2]
  654. return None
  655. async def get_current_room_stream_token_for_room_id(
  656. self, room_id: str
  657. ) -> RoomStreamToken:
  658. """Returns the current position of the rooms stream (historic token)."""
  659. stream_ordering = self.get_room_max_stream_ordering()
  660. topo = await self.db_pool.runInteraction(
  661. "_get_max_topological_txn", self._get_max_topological_txn, room_id
  662. )
  663. return RoomStreamToken(topo, stream_ordering)
  664. @overload
  665. def get_stream_id_for_event_txn(
  666. self,
  667. txn: LoggingTransaction,
  668. event_id: str,
  669. allow_none: Literal[False] = False,
  670. ) -> int:
  671. ...
  672. @overload
  673. def get_stream_id_for_event_txn(
  674. self,
  675. txn: LoggingTransaction,
  676. event_id: str,
  677. allow_none: bool = False,
  678. ) -> Optional[int]:
  679. ...
  680. def get_stream_id_for_event_txn(
  681. self,
  682. txn: LoggingTransaction,
  683. event_id: str,
  684. allow_none: bool = False,
  685. ) -> Optional[int]:
  686. # Type ignore: we pass keyvalues a Dict[str, str]; the function wants
  687. # Dict[str, Any]. I think mypy is unhappy because Dict is invariant?
  688. return self.db_pool.simple_select_one_onecol_txn( # type: ignore[call-overload]
  689. txn=txn,
  690. table="events",
  691. keyvalues={"event_id": event_id},
  692. retcol="stream_ordering",
  693. allow_none=allow_none,
  694. )
  695. async def get_position_for_event(self, event_id: str) -> PersistedEventPosition:
  696. """Get the persisted position for an event"""
  697. row = await self.db_pool.simple_select_one(
  698. table="events",
  699. keyvalues={"event_id": event_id},
  700. retcols=("stream_ordering", "instance_name"),
  701. desc="get_position_for_event",
  702. )
  703. return PersistedEventPosition(
  704. row["instance_name"] or "master", row["stream_ordering"]
  705. )
  706. async def get_topological_token_for_event(self, event_id: str) -> RoomStreamToken:
  707. """The stream token for an event
  708. Args:
  709. event_id: The id of the event to look up a stream token for.
  710. Raises:
  711. StoreError if the event wasn't in the database.
  712. Returns:
  713. A `RoomStreamToken` topological token.
  714. """
  715. row = await self.db_pool.simple_select_one(
  716. table="events",
  717. keyvalues={"event_id": event_id},
  718. retcols=("stream_ordering", "topological_ordering"),
  719. desc="get_topological_token_for_event",
  720. )
  721. return RoomStreamToken(row["topological_ordering"], row["stream_ordering"])
  722. async def get_current_topological_token(self, room_id: str, stream_key: int) -> int:
  723. """Gets the topological token in a room after or at the given stream
  724. ordering.
  725. Args:
  726. room_id
  727. stream_key
  728. """
  729. sql = (
  730. "SELECT coalesce(MIN(topological_ordering), 0) FROM events"
  731. " WHERE room_id = ? AND stream_ordering >= ?"
  732. )
  733. row = await self.db_pool.execute(
  734. "get_current_topological_token", None, sql, room_id, stream_key
  735. )
  736. return row[0][0] if row else 0
  737. def _get_max_topological_txn(self, txn: LoggingTransaction, room_id: str) -> int:
  738. txn.execute(
  739. "SELECT MAX(topological_ordering) FROM events WHERE room_id = ?",
  740. (room_id,),
  741. )
  742. rows = txn.fetchall()
  743. # An aggregate function like MAX() will always return one row per group
  744. # so we can safely rely on the lookup here. For example, when a we
  745. # lookup a `room_id` which does not exist, `rows` will look like
  746. # `[(None,)]`
  747. return rows[0][0] if rows[0][0] is not None else 0
  748. @staticmethod
  749. def _set_before_and_after(
  750. events: List[EventBase], rows: List[_EventDictReturn], topo_order: bool = True
  751. ) -> None:
  752. """Inserts ordering information to events' internal metadata from
  753. the DB rows.
  754. Args:
  755. events
  756. rows
  757. topo_order: Whether the events were ordered topologically or by stream
  758. ordering. If true then all rows should have a non null
  759. topological_ordering.
  760. """
  761. for event, row in zip(events, rows):
  762. stream = row.stream_ordering
  763. if topo_order and row.topological_ordering:
  764. topo: Optional[int] = row.topological_ordering
  765. else:
  766. topo = None
  767. internal = event.internal_metadata
  768. internal.before = RoomStreamToken(topo, stream - 1)
  769. internal.after = RoomStreamToken(topo, stream)
  770. internal.order = (int(topo) if topo else 0, int(stream))
  771. async def get_events_around(
  772. self,
  773. room_id: str,
  774. event_id: str,
  775. before_limit: int,
  776. after_limit: int,
  777. event_filter: Optional[Filter] = None,
  778. ) -> _EventsAround:
  779. """Retrieve events and pagination tokens around a given event in a
  780. room.
  781. """
  782. results = await self.db_pool.runInteraction(
  783. "get_events_around",
  784. self._get_events_around_txn,
  785. room_id,
  786. event_id,
  787. before_limit,
  788. after_limit,
  789. event_filter,
  790. )
  791. events_before = await self.get_events_as_list(
  792. list(results["before"]["event_ids"]), get_prev_content=True
  793. )
  794. events_after = await self.get_events_as_list(
  795. list(results["after"]["event_ids"]), get_prev_content=True
  796. )
  797. return _EventsAround(
  798. events_before=events_before,
  799. events_after=events_after,
  800. start=results["before"]["token"],
  801. end=results["after"]["token"],
  802. )
  803. def _get_events_around_txn(
  804. self,
  805. txn: LoggingTransaction,
  806. room_id: str,
  807. event_id: str,
  808. before_limit: int,
  809. after_limit: int,
  810. event_filter: Optional[Filter],
  811. ) -> dict:
  812. """Retrieves event_ids and pagination tokens around a given event in a
  813. room.
  814. Args:
  815. room_id
  816. event_id
  817. before_limit
  818. after_limit
  819. event_filter
  820. Returns:
  821. dict
  822. """
  823. results = self.db_pool.simple_select_one_txn(
  824. txn,
  825. "events",
  826. keyvalues={"event_id": event_id, "room_id": room_id},
  827. retcols=["stream_ordering", "topological_ordering"],
  828. )
  829. # This cannot happen as `allow_none=False`.
  830. assert results is not None
  831. # Paginating backwards includes the event at the token, but paginating
  832. # forward doesn't.
  833. before_token = RoomStreamToken(
  834. results["topological_ordering"] - 1, results["stream_ordering"]
  835. )
  836. after_token = RoomStreamToken(
  837. results["topological_ordering"], results["stream_ordering"]
  838. )
  839. rows, start_token = self._paginate_room_events_txn(
  840. txn,
  841. room_id,
  842. before_token,
  843. direction="b",
  844. limit=before_limit,
  845. event_filter=event_filter,
  846. )
  847. events_before = [r.event_id for r in rows]
  848. rows, end_token = self._paginate_room_events_txn(
  849. txn,
  850. room_id,
  851. after_token,
  852. direction="f",
  853. limit=after_limit,
  854. event_filter=event_filter,
  855. )
  856. events_after = [r.event_id for r in rows]
  857. return {
  858. "before": {"event_ids": events_before, "token": start_token},
  859. "after": {"event_ids": events_after, "token": end_token},
  860. }
  861. async def get_all_new_events_stream(
  862. self, from_id: int, current_id: int, limit: int, get_prev_content: bool = False
  863. ) -> Tuple[int, List[EventBase], Dict[str, Optional[int]]]:
  864. """Get all new events
  865. Returns all events with from_id < stream_ordering <= current_id.
  866. Args:
  867. from_id: the stream_ordering of the last event we processed
  868. current_id: the stream_ordering of the most recently processed event
  869. limit: the maximum number of events to return
  870. get_prev_content: whether to fetch previous event content
  871. Returns:
  872. A tuple of (next_id, events, event_to_received_ts), where `next_id`
  873. is the next value to pass as `from_id` (it will either be the
  874. stream_ordering of the last returned event, or, if fewer than `limit`
  875. events were found, the `current_id`). The `event_to_received_ts` is
  876. a dictionary mapping event ID to the event `received_ts`.
  877. """
  878. def get_all_new_events_stream_txn(
  879. txn: LoggingTransaction,
  880. ) -> Tuple[int, Dict[str, Optional[int]]]:
  881. sql = (
  882. "SELECT e.stream_ordering, e.event_id, e.received_ts"
  883. " FROM events AS e"
  884. " WHERE"
  885. " ? < e.stream_ordering AND e.stream_ordering <= ?"
  886. " ORDER BY e.stream_ordering ASC"
  887. " LIMIT ?"
  888. )
  889. txn.execute(sql, (from_id, current_id, limit))
  890. rows = txn.fetchall()
  891. upper_bound = current_id
  892. if len(rows) == limit:
  893. upper_bound = rows[-1][0]
  894. event_to_received_ts: Dict[str, Optional[int]] = {
  895. row[1]: row[2] for row in rows
  896. }
  897. return upper_bound, event_to_received_ts
  898. upper_bound, event_to_received_ts = await self.db_pool.runInteraction(
  899. "get_all_new_events_stream", get_all_new_events_stream_txn
  900. )
  901. events = await self.get_events_as_list(
  902. event_to_received_ts.keys(),
  903. get_prev_content=get_prev_content,
  904. )
  905. return upper_bound, events, event_to_received_ts
  906. async def get_federation_out_pos(self, typ: str) -> int:
  907. if self._need_to_reset_federation_stream_positions:
  908. await self.db_pool.runInteraction(
  909. "_reset_federation_positions_txn", self._reset_federation_positions_txn
  910. )
  911. self._need_to_reset_federation_stream_positions = False
  912. return await self.db_pool.simple_select_one_onecol(
  913. table="federation_stream_position",
  914. retcol="stream_id",
  915. keyvalues={"type": typ, "instance_name": self._instance_name},
  916. desc="get_federation_out_pos",
  917. )
  918. async def update_federation_out_pos(self, typ: str, stream_id: int) -> None:
  919. if self._need_to_reset_federation_stream_positions:
  920. await self.db_pool.runInteraction(
  921. "_reset_federation_positions_txn", self._reset_federation_positions_txn
  922. )
  923. self._need_to_reset_federation_stream_positions = False
  924. await self.db_pool.simple_update_one(
  925. table="federation_stream_position",
  926. keyvalues={"type": typ, "instance_name": self._instance_name},
  927. updatevalues={"stream_id": stream_id},
  928. desc="update_federation_out_pos",
  929. )
  930. def _reset_federation_positions_txn(self, txn: LoggingTransaction) -> None:
  931. """Fiddles with the `federation_stream_position` table to make it match
  932. the configured federation sender instances during start up.
  933. """
  934. # The federation sender instances may have changed, so we need to
  935. # massage the `federation_stream_position` table to have a row per type
  936. # per instance sending federation. If there is a mismatch we update the
  937. # table with the correct rows using the *minimum* stream ID seen. This
  938. # may result in resending of events/EDUs to remote servers, but that is
  939. # preferable to dropping them.
  940. if not self._send_federation:
  941. return
  942. # Pull out the configured instances. If we don't have a shard config then
  943. # we assume that we're the only instance sending.
  944. configured_instances = self._federation_shard_config.instances
  945. if not configured_instances:
  946. configured_instances = [self._instance_name]
  947. elif self._instance_name not in configured_instances:
  948. return
  949. instances_in_table = self.db_pool.simple_select_onecol_txn(
  950. txn,
  951. table="federation_stream_position",
  952. keyvalues={},
  953. retcol="instance_name",
  954. )
  955. if set(instances_in_table) == set(configured_instances):
  956. # Nothing to do
  957. return
  958. sql = """
  959. SELECT type, MIN(stream_id) FROM federation_stream_position
  960. GROUP BY type
  961. """
  962. txn.execute(sql)
  963. min_positions = {typ: pos for typ, pos in txn} # Map from type -> min position
  964. # Ensure we do actually have some values here
  965. assert set(min_positions) == {"federation", "events"}
  966. sql = """
  967. DELETE FROM federation_stream_position
  968. WHERE NOT (%s)
  969. """
  970. clause, args = make_in_list_sql_clause(
  971. txn.database_engine, "instance_name", configured_instances
  972. )
  973. txn.execute(sql % (clause,), args)
  974. for typ, stream_id in min_positions.items():
  975. self.db_pool.simple_upsert_txn(
  976. txn,
  977. table="federation_stream_position",
  978. keyvalues={"type": typ, "instance_name": self._instance_name},
  979. values={"stream_id": stream_id},
  980. )
  981. def has_room_changed_since(self, room_id: str, stream_id: int) -> bool:
  982. return self._events_stream_cache.has_entity_changed(room_id, stream_id)
  983. def _paginate_room_events_txn(
  984. self,
  985. txn: LoggingTransaction,
  986. room_id: str,
  987. from_token: RoomStreamToken,
  988. to_token: Optional[RoomStreamToken] = None,
  989. direction: str = "b",
  990. limit: int = -1,
  991. event_filter: Optional[Filter] = None,
  992. ) -> Tuple[List[_EventDictReturn], RoomStreamToken]:
  993. """Returns list of events before or after a given token.
  994. Args:
  995. txn
  996. room_id
  997. from_token: The token used to stream from
  998. to_token: A token which if given limits the results to only those before
  999. direction: Either 'b' or 'f' to indicate whether we are paginating
  1000. forwards or backwards from `from_key`.
  1001. limit: The maximum number of events to return.
  1002. event_filter: If provided filters the events to
  1003. those that match the filter.
  1004. Returns:
  1005. A list of _EventDictReturn and a token that points to the end of the
  1006. result set. If no events are returned then the end of the stream has
  1007. been reached (i.e. there are no events between `from_token` and
  1008. `to_token`), or `limit` is zero.
  1009. """
  1010. assert int(limit) >= 0
  1011. # Tokens really represent positions between elements, but we use
  1012. # the convention of pointing to the event before the gap. Hence
  1013. # we have a bit of asymmetry when it comes to equalities.
  1014. args = [False, room_id]
  1015. if direction == "b":
  1016. order = "DESC"
  1017. else:
  1018. order = "ASC"
  1019. # The bounds for the stream tokens are complicated by the fact
  1020. # that we need to handle the instance_map part of the tokens. We do this
  1021. # by fetching all events between the min stream token and the maximum
  1022. # stream token (as returned by `RoomStreamToken.get_max_stream_pos`) and
  1023. # then filtering the results.
  1024. if from_token.topological is not None:
  1025. from_bound: Tuple[Optional[int], int] = from_token.as_historical_tuple()
  1026. elif direction == "b":
  1027. from_bound = (
  1028. None,
  1029. from_token.get_max_stream_pos(),
  1030. )
  1031. else:
  1032. from_bound = (
  1033. None,
  1034. from_token.stream,
  1035. )
  1036. to_bound: Optional[Tuple[Optional[int], int]] = None
  1037. if to_token:
  1038. if to_token.topological is not None:
  1039. to_bound = to_token.as_historical_tuple()
  1040. elif direction == "b":
  1041. to_bound = (
  1042. None,
  1043. to_token.stream,
  1044. )
  1045. else:
  1046. to_bound = (
  1047. None,
  1048. to_token.get_max_stream_pos(),
  1049. )
  1050. bounds = generate_pagination_where_clause(
  1051. direction=direction,
  1052. column_names=("event.topological_ordering", "event.stream_ordering"),
  1053. from_token=from_bound,
  1054. to_token=to_bound,
  1055. engine=self.database_engine,
  1056. )
  1057. filter_clause, filter_args = filter_to_clause(event_filter)
  1058. if filter_clause:
  1059. bounds += " AND " + filter_clause
  1060. args.extend(filter_args)
  1061. # We fetch more events as we'll filter the result set
  1062. args.append(int(limit) * 2)
  1063. select_keywords = "SELECT"
  1064. join_clause = ""
  1065. # Using DISTINCT in this SELECT query is quite expensive, because it
  1066. # requires the engine to sort on the entire (not limited) result set,
  1067. # i.e. the entire events table. Only use it in scenarios that could result
  1068. # in the same event ID occurring multiple times in the results.
  1069. needs_distinct = False
  1070. if event_filter and event_filter.labels:
  1071. # If we're not filtering on a label, then joining on event_labels will
  1072. # return as many row for a single event as the number of labels it has. To
  1073. # avoid this, only join if we're filtering on at least one label.
  1074. join_clause += """
  1075. LEFT JOIN event_labels
  1076. USING (event_id, room_id, topological_ordering)
  1077. """
  1078. if len(event_filter.labels) > 1:
  1079. # Multiple labels could cause the same event to appear multiple times.
  1080. needs_distinct = True
  1081. # If there is a filter on relation_senders and relation_types join to the
  1082. # relations table.
  1083. if event_filter and (
  1084. event_filter.related_by_senders or event_filter.related_by_rel_types
  1085. ):
  1086. # Filtering by relations could cause the same event to appear multiple
  1087. # times (since there's no limit on the number of relations to an event).
  1088. needs_distinct = True
  1089. join_clause += """
  1090. LEFT JOIN event_relations AS relation ON (event.event_id = relation.relates_to_id)
  1091. """
  1092. if event_filter.related_by_senders:
  1093. join_clause += """
  1094. LEFT JOIN events AS related_event ON (relation.event_id = related_event.event_id)
  1095. """
  1096. if needs_distinct:
  1097. select_keywords += " DISTINCT"
  1098. sql = """
  1099. %(select_keywords)s
  1100. event.event_id, event.instance_name,
  1101. event.topological_ordering, event.stream_ordering
  1102. FROM events AS event
  1103. %(join_clause)s
  1104. WHERE event.outlier = ? AND event.room_id = ? AND %(bounds)s
  1105. ORDER BY event.topological_ordering %(order)s,
  1106. event.stream_ordering %(order)s LIMIT ?
  1107. """ % {
  1108. "select_keywords": select_keywords,
  1109. "join_clause": join_clause,
  1110. "bounds": bounds,
  1111. "order": order,
  1112. }
  1113. txn.execute(sql, args)
  1114. # Filter the result set.
  1115. rows = [
  1116. _EventDictReturn(event_id, topological_ordering, stream_ordering)
  1117. for event_id, instance_name, topological_ordering, stream_ordering in txn
  1118. if _filter_results(
  1119. lower_token=to_token if direction == "b" else from_token,
  1120. upper_token=from_token if direction == "b" else to_token,
  1121. instance_name=instance_name,
  1122. topological_ordering=topological_ordering,
  1123. stream_ordering=stream_ordering,
  1124. )
  1125. ][:limit]
  1126. if rows:
  1127. topo = rows[-1].topological_ordering
  1128. token = rows[-1].stream_ordering
  1129. if direction == "b":
  1130. # Tokens are positions between events.
  1131. # This token points *after* the last event in the chunk.
  1132. # We need it to point to the event before it in the chunk
  1133. # when we are going backwards so we subtract one from the
  1134. # stream part.
  1135. token -= 1
  1136. next_token = RoomStreamToken(topo, token)
  1137. else:
  1138. # TODO (erikj): We should work out what to do here instead.
  1139. next_token = to_token if to_token else from_token
  1140. return rows, next_token
  1141. @trace
  1142. async def paginate_room_events(
  1143. self,
  1144. room_id: str,
  1145. from_key: RoomStreamToken,
  1146. to_key: Optional[RoomStreamToken] = None,
  1147. direction: str = "b",
  1148. limit: int = -1,
  1149. event_filter: Optional[Filter] = None,
  1150. ) -> Tuple[List[EventBase], RoomStreamToken]:
  1151. """Returns list of events before or after a given token.
  1152. Args:
  1153. room_id
  1154. from_key: The token used to stream from
  1155. to_key: A token which if given limits the results to only those before
  1156. direction: Either 'b' or 'f' to indicate whether we are paginating
  1157. forwards or backwards from `from_key`.
  1158. limit: The maximum number of events to return.
  1159. event_filter: If provided filters the events to those that match the filter.
  1160. Returns:
  1161. The results as a list of events and a token that points to the end
  1162. of the result set. If no events are returned then the end of the
  1163. stream has been reached (i.e. there are no events between `from_key`
  1164. and `to_key`).
  1165. """
  1166. rows, token = await self.db_pool.runInteraction(
  1167. "paginate_room_events",
  1168. self._paginate_room_events_txn,
  1169. room_id,
  1170. from_key,
  1171. to_key,
  1172. direction,
  1173. limit,
  1174. event_filter,
  1175. )
  1176. events = await self.get_events_as_list(
  1177. [r.event_id for r in rows], get_prev_content=True
  1178. )
  1179. self._set_before_and_after(events, rows)
  1180. return events, token
  1181. @cached()
  1182. async def get_id_for_instance(self, instance_name: str) -> int:
  1183. """Get a unique, immutable ID that corresponds to the given Synapse worker instance."""
  1184. def _get_id_for_instance_txn(txn: LoggingTransaction) -> int:
  1185. instance_id = self.db_pool.simple_select_one_onecol_txn(
  1186. txn,
  1187. table="instance_map",
  1188. keyvalues={"instance_name": instance_name},
  1189. retcol="instance_id",
  1190. allow_none=True,
  1191. )
  1192. if instance_id is not None:
  1193. return instance_id
  1194. # If we don't have an entry upsert one.
  1195. #
  1196. # We could do this before the first check, and rely on the cache for
  1197. # efficiency, but each UPSERT causes the next ID to increment which
  1198. # can quickly bloat the size of the generated IDs for new instances.
  1199. self.db_pool.simple_upsert_txn(
  1200. txn,
  1201. table="instance_map",
  1202. keyvalues={"instance_name": instance_name},
  1203. values={},
  1204. )
  1205. return self.db_pool.simple_select_one_onecol_txn(
  1206. txn,
  1207. table="instance_map",
  1208. keyvalues={"instance_name": instance_name},
  1209. retcol="instance_id",
  1210. )
  1211. return await self.db_pool.runInteraction(
  1212. "get_id_for_instance", _get_id_for_instance_txn
  1213. )
  1214. @cached()
  1215. async def get_name_from_instance_id(self, instance_id: int) -> str:
  1216. """Get the instance name from an ID previously returned by
  1217. `get_id_for_instance`.
  1218. """
  1219. return await self.db_pool.simple_select_one_onecol(
  1220. table="instance_map",
  1221. keyvalues={"instance_id": instance_id},
  1222. retcol="instance_name",
  1223. desc="get_name_from_instance_id",
  1224. )