You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

235 lines
7.9 KiB

  1. # Copyright 2017 Vector Creations Ltd
  2. # Copyright 2019 New Vector Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import heapq
  16. from typing import TYPE_CHECKING, Iterable, Optional, Tuple, Type, TypeVar, cast
  17. import attr
  18. from synapse.replication.tcp.streams._base import (
  19. Stream,
  20. StreamRow,
  21. StreamUpdateResult,
  22. Token,
  23. )
  24. if TYPE_CHECKING:
  25. from synapse.server import HomeServer
  26. """Handling of the 'events' replication stream
  27. This stream contains rows of various types. Each row therefore contains a 'type'
  28. identifier before the real data. For example::
  29. RDATA events batch ["state", ["!room:id", "m.type", "", "$event:id"]]
  30. RDATA events 12345 ["ev", ["$event:id", "!room:id", "m.type", null, null]]
  31. An "ev" row is sent for each new event. The fields in the data part are:
  32. * The new event id
  33. * The room id for the event
  34. * The type of the new event
  35. * The state key of the event, for state events
  36. * The event id of an event which is redacted by this event.
  37. A "state" row is sent whenever the "current state" in a room changes. The fields in the
  38. data part are:
  39. * The room id for the state change
  40. * The event type of the state which has changed
  41. * The state_key of the state which has changed
  42. * The event id of the new state
  43. """
  44. @attr.s(slots=True, frozen=True, auto_attribs=True)
  45. class EventsStreamRow:
  46. """A parsed row from the events replication stream"""
  47. type: str # the TypeId of one of the *EventsStreamRows
  48. data: "BaseEventsStreamRow"
  49. T = TypeVar("T", bound="BaseEventsStreamRow")
  50. class BaseEventsStreamRow:
  51. """Base class for rows to be sent in the events stream.
  52. Specifies how to identify, serialize and deserialize the different types.
  53. """
  54. # Unique string that ids the type. Must be overridden in sub classes.
  55. TypeId: str
  56. @classmethod
  57. def from_data(cls: Type[T], data: Iterable[Optional[str]]) -> T:
  58. """Parse the data from the replication stream into a row.
  59. By default we just call the constructor with the data list as arguments
  60. Args:
  61. data: The value of the data object from the replication stream
  62. """
  63. return cls(*data)
  64. @attr.s(slots=True, frozen=True, auto_attribs=True)
  65. class EventsStreamEventRow(BaseEventsStreamRow):
  66. TypeId = "ev"
  67. event_id: str
  68. room_id: str
  69. type: str
  70. state_key: Optional[str]
  71. redacts: Optional[str]
  72. relates_to: Optional[str]
  73. membership: Optional[str]
  74. rejected: bool
  75. outlier: bool
  76. @attr.s(slots=True, frozen=True, auto_attribs=True)
  77. class EventsStreamCurrentStateRow(BaseEventsStreamRow):
  78. TypeId = "state"
  79. room_id: str
  80. type: str
  81. state_key: str
  82. event_id: Optional[str]
  83. _EventRows: Tuple[Type[BaseEventsStreamRow], ...] = (
  84. EventsStreamEventRow,
  85. EventsStreamCurrentStateRow,
  86. )
  87. TypeToRow = {Row.TypeId: Row for Row in _EventRows}
  88. class EventsStream(Stream):
  89. """We received a new event, or an event went from being an outlier to not"""
  90. NAME = "events"
  91. def __init__(self, hs: "HomeServer"):
  92. self._store = hs.get_datastores().main
  93. super().__init__(
  94. hs.get_instance_name(),
  95. self._store._stream_id_gen.get_current_token_for_writer,
  96. self._update_function,
  97. )
  98. async def _update_function(
  99. self,
  100. instance_name: str,
  101. from_token: Token,
  102. current_token: Token,
  103. target_row_count: int,
  104. ) -> StreamUpdateResult:
  105. # the events stream merges together three separate sources:
  106. # * new events
  107. # * current_state changes
  108. # * events which were previously outliers, but have now been de-outliered.
  109. #
  110. # The merge operation is complicated by the fact that we only have a single
  111. # "stream token" which is supposed to indicate how far we have got through
  112. # all three streams. It's therefore no good to return rows 1-1000 from the
  113. # "new events" table if the state_deltas are limited to rows 1-100 by the
  114. # target_row_count.
  115. #
  116. # In other words: we must pick a new upper limit, and must return *all* rows
  117. # up to that point for each of the three sources.
  118. #
  119. # Start by trying to split the target_row_count up. We expect to have a
  120. # negligible number of ex-outliers, and a rough approximation based on recent
  121. # traffic on sw1v.org shows that there are approximately the same number of
  122. # event rows between a given pair of stream ids as there are state
  123. # updates, so let's split our target_row_count among those two types. The target
  124. # is only an approximation - it doesn't matter if we end up going a bit over it.
  125. target_row_count //= 2
  126. # now we fetch up to that many rows from the events table
  127. event_rows = await self._store.get_all_new_forward_event_rows(
  128. instance_name, from_token, current_token, target_row_count
  129. )
  130. # we rely on get_all_new_forward_event_rows strictly honouring the limit, so
  131. # that we know it is safe to just take upper_limit = event_rows[-1][0].
  132. assert (
  133. len(event_rows) <= target_row_count
  134. ), "get_all_new_forward_event_rows did not honour row limit"
  135. # if we hit the limit on event_updates, there's no point in going beyond the
  136. # last stream_id in the batch for the other sources.
  137. if len(event_rows) == target_row_count:
  138. limited = True
  139. upper_limit: int = event_rows[-1][0]
  140. else:
  141. limited = False
  142. upper_limit = current_token
  143. # next up is the state delta table.
  144. (
  145. state_rows,
  146. upper_limit,
  147. state_rows_limited,
  148. ) = await self._store.get_all_updated_current_state_deltas(
  149. instance_name, from_token, upper_limit, target_row_count
  150. )
  151. limited = limited or state_rows_limited
  152. # finally, fetch the ex-outliers rows. We assume there are few enough of these
  153. # not to bother with the limit.
  154. ex_outliers_rows = await self._store.get_ex_outlier_stream_rows(
  155. instance_name, from_token, upper_limit
  156. )
  157. # we now need to turn the raw database rows returned into tuples suitable
  158. # for the replication protocol (basically, we add an identifier to
  159. # distinguish the row type). At the same time, we can limit the event_rows
  160. # to the max stream_id from state_rows.
  161. event_updates: Iterable[Tuple[int, Tuple]] = (
  162. (stream_id, (EventsStreamEventRow.TypeId, rest))
  163. for (stream_id, *rest) in event_rows
  164. if stream_id <= upper_limit
  165. )
  166. state_updates: Iterable[Tuple[int, Tuple]] = (
  167. (stream_id, (EventsStreamCurrentStateRow.TypeId, rest))
  168. for (stream_id, *rest) in state_rows
  169. )
  170. ex_outliers_updates: Iterable[Tuple[int, Tuple]] = (
  171. (stream_id, (EventsStreamEventRow.TypeId, rest))
  172. for (stream_id, *rest) in ex_outliers_rows
  173. )
  174. # we need to return a sorted list, so merge them together.
  175. updates = list(heapq.merge(event_updates, state_updates, ex_outliers_updates))
  176. return updates, upper_limit, limited
  177. @classmethod
  178. def parse_row(cls, row: StreamRow) -> "EventsStreamRow":
  179. (typ, data) = cast(Tuple[str, Iterable[Optional[str]]], row)
  180. event_stream_row_data = TypeToRow[typ].from_data(data)
  181. return EventsStreamRow(typ, event_stream_row_data)