You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

398 lines
14 KiB

  1. # Copyright 2020 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import logging
  15. from inspect import isawaitable
  16. from typing import TYPE_CHECKING, Any, Generic, List, Optional, Type, TypeVar, cast
  17. import attr
  18. import txredisapi
  19. from zope.interface import implementer
  20. from twisted.internet.address import IPv4Address, IPv6Address
  21. from twisted.internet.interfaces import IAddress, IConnector
  22. from twisted.python.failure import Failure
  23. from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
  24. from synapse.metrics.background_process_metrics import (
  25. BackgroundProcessLoggingContext,
  26. run_as_background_process,
  27. wrap_as_background_process,
  28. )
  29. from synapse.replication.tcp.commands import (
  30. Command,
  31. ReplicateCommand,
  32. parse_command_from_line,
  33. )
  34. from synapse.replication.tcp.protocol import (
  35. IReplicationConnection,
  36. tcp_inbound_commands_counter,
  37. tcp_outbound_commands_counter,
  38. )
  39. if TYPE_CHECKING:
  40. from synapse.replication.tcp.handler import ReplicationCommandHandler
  41. from synapse.server import HomeServer
  42. logger = logging.getLogger(__name__)
  43. T = TypeVar("T")
  44. V = TypeVar("V")
  45. @attr.s
  46. class ConstantProperty(Generic[T, V]):
  47. """A descriptor that returns the given constant, ignoring attempts to set
  48. it.
  49. """
  50. constant: V = attr.ib()
  51. def __get__(self, obj: Optional[T], objtype: Optional[Type[T]] = None) -> V:
  52. return self.constant
  53. def __set__(self, obj: Optional[T], value: V) -> None:
  54. pass
  55. @implementer(IReplicationConnection)
  56. class RedisSubscriber(txredisapi.SubscriberProtocol):
  57. """Connection to redis subscribed to replication stream.
  58. This class fulfils two functions:
  59. (a) it implements the twisted Protocol API, where it handles the SUBSCRIBEd redis
  60. connection, parsing *incoming* messages into replication commands, and passing them
  61. to `ReplicationCommandHandler`
  62. (b) it implements the IReplicationConnection API, where it sends *outgoing* commands
  63. onto outbound_redis_connection.
  64. Due to the vagaries of `txredisapi` we don't want to have a custom
  65. constructor, so instead we expect the defined attributes below to be set
  66. immediately after initialisation.
  67. Attributes:
  68. synapse_handler: The command handler to handle incoming commands.
  69. synapse_stream_prefix: The *redis* stream name to subscribe to and publish
  70. from (not anything to do with Synapse replication streams).
  71. synapse_outbound_redis_connection: The connection to redis to use to send
  72. commands.
  73. """
  74. synapse_handler: "ReplicationCommandHandler"
  75. synapse_stream_prefix: str
  76. synapse_channel_names: List[str]
  77. synapse_outbound_redis_connection: txredisapi.ConnectionHandler
  78. def __init__(self, *args: Any, **kwargs: Any):
  79. super().__init__(*args, **kwargs)
  80. # a logcontext which we use for processing incoming commands. We declare it as a
  81. # background process so that the CPU stats get reported to prometheus.
  82. with PreserveLoggingContext():
  83. # thanks to `PreserveLoggingContext()`, the new logcontext is guaranteed to
  84. # capture the sentinel context as its containing context and won't prevent
  85. # GC of / unintentionally reactivate what would be the current context.
  86. self._logging_context = BackgroundProcessLoggingContext(
  87. "replication_command_handler"
  88. )
  89. def connectionMade(self) -> None:
  90. logger.info("Connected to redis")
  91. super().connectionMade()
  92. run_as_background_process("subscribe-replication", self._send_subscribe)
  93. async def _send_subscribe(self) -> None:
  94. # it's important to make sure that we only send the REPLICATE command once we
  95. # have successfully subscribed to the stream - otherwise we might miss the
  96. # POSITION response sent back by the other end.
  97. fully_qualified_stream_names = [
  98. f"{self.synapse_stream_prefix}/{stream_suffix}"
  99. for stream_suffix in self.synapse_channel_names
  100. ] + [self.synapse_stream_prefix]
  101. logger.info("Sending redis SUBSCRIBE for %r", fully_qualified_stream_names)
  102. await make_deferred_yieldable(self.subscribe(fully_qualified_stream_names))
  103. logger.info(
  104. "Successfully subscribed to redis stream, sending REPLICATE command"
  105. )
  106. self.synapse_handler.new_connection(self)
  107. await self._async_send_command(ReplicateCommand())
  108. logger.info("REPLICATE successfully sent")
  109. # We send out our positions when there is a new connection in case the
  110. # other side missed updates. We do this for Redis connections as the
  111. # otherside won't know we've connected and so won't issue a REPLICATE.
  112. self.synapse_handler.send_positions_to_connection(self)
  113. def messageReceived(self, pattern: str, channel: str, message: str) -> None:
  114. """Received a message from redis."""
  115. with PreserveLoggingContext(self._logging_context):
  116. self._parse_and_dispatch_message(message)
  117. def _parse_and_dispatch_message(self, message: str) -> None:
  118. if message.strip() == "":
  119. # Ignore blank lines
  120. return
  121. try:
  122. cmd = parse_command_from_line(message)
  123. except Exception:
  124. logger.exception(
  125. "Failed to parse replication line: %r",
  126. message,
  127. )
  128. return
  129. # We use "redis" as the name here as we don't have 1:1 connections to
  130. # remote instances.
  131. tcp_inbound_commands_counter.labels(cmd.NAME, "redis").inc()
  132. self.handle_command(cmd)
  133. def handle_command(self, cmd: Command) -> None:
  134. """Handle a command we have received over the replication stream.
  135. Delegates to `self.handler.on_<COMMAND>` (which can optionally return an
  136. Awaitable).
  137. Args:
  138. cmd: received command
  139. """
  140. cmd_func = getattr(self.synapse_handler, "on_%s" % (cmd.NAME,), None)
  141. if not cmd_func:
  142. logger.warning("Unhandled command: %r", cmd)
  143. return
  144. res = cmd_func(self, cmd)
  145. # the handler might be a coroutine: fire it off as a background process
  146. # if so.
  147. if isawaitable(res):
  148. run_as_background_process(
  149. "replication-" + cmd.get_logcontext_id(), lambda: res
  150. )
  151. def connectionLost(self, reason: Failure) -> None: # type: ignore[override]
  152. logger.info("Lost connection to redis")
  153. super().connectionLost(reason)
  154. self.synapse_handler.lost_connection(self)
  155. # mark the logging context as finished by triggering `__exit__()`
  156. with PreserveLoggingContext():
  157. with self._logging_context:
  158. pass
  159. # the sentinel context is now active, which may not be correct.
  160. # PreserveLoggingContext() will restore the correct logging context.
  161. def send_command(self, cmd: Command) -> None:
  162. """Send a command if connection has been established.
  163. Args:
  164. cmd: The command to send
  165. """
  166. run_as_background_process(
  167. "send-cmd", self._async_send_command, cmd, bg_start_span=False
  168. )
  169. async def _async_send_command(self, cmd: Command) -> None:
  170. """Encode a replication command and send it over our outbound connection"""
  171. string = "%s %s" % (cmd.NAME, cmd.to_line())
  172. if "\n" in string:
  173. raise Exception("Unexpected newline in command: %r", string)
  174. encoded_string = string.encode("utf-8")
  175. # We use "redis" as the name here as we don't have 1:1 connections to
  176. # remote instances.
  177. tcp_outbound_commands_counter.labels(cmd.NAME, "redis").inc()
  178. channel_name = cmd.redis_channel_name(self.synapse_stream_prefix)
  179. await make_deferred_yieldable(
  180. self.synapse_outbound_redis_connection.publish(channel_name, encoded_string)
  181. )
  182. class SynapseRedisFactory(txredisapi.RedisFactory):
  183. """A subclass of RedisFactory that periodically sends pings to ensure that
  184. we detect dead connections.
  185. """
  186. # We want to *always* retry connecting, txredisapi will stop if there is a
  187. # failure during certain operations, e.g. during AUTH.
  188. continueTrying = cast(bool, ConstantProperty(True))
  189. def __init__(
  190. self,
  191. hs: "HomeServer",
  192. uuid: str,
  193. dbid: Optional[int],
  194. poolsize: int,
  195. isLazy: bool = False,
  196. handler: Type = txredisapi.ConnectionHandler,
  197. charset: str = "utf-8",
  198. password: Optional[str] = None,
  199. replyTimeout: int = 30,
  200. convertNumbers: Optional[int] = True,
  201. ):
  202. super().__init__(
  203. uuid=uuid,
  204. dbid=dbid,
  205. poolsize=poolsize,
  206. isLazy=isLazy,
  207. handler=handler,
  208. charset=charset,
  209. password=password,
  210. replyTimeout=replyTimeout,
  211. convertNumbers=convertNumbers,
  212. )
  213. hs.get_clock().looping_call(self._send_ping, 30 * 1000)
  214. @wrap_as_background_process("redis_ping")
  215. async def _send_ping(self) -> None:
  216. for connection in self.pool:
  217. try:
  218. await make_deferred_yieldable(connection.ping())
  219. except Exception:
  220. logger.warning("Failed to send ping to a redis connection")
  221. # ReconnectingClientFactory has some logging (if you enable `self.noisy`), but
  222. # it's rubbish. We add our own here.
  223. def startedConnecting(self, connector: IConnector) -> None:
  224. logger.info(
  225. "Connecting to redis server %s", format_address(connector.getDestination())
  226. )
  227. super().startedConnecting(connector)
  228. def clientConnectionFailed(self, connector: IConnector, reason: Failure) -> None:
  229. logger.info(
  230. "Connection to redis server %s failed: %s",
  231. format_address(connector.getDestination()),
  232. reason.value,
  233. )
  234. super().clientConnectionFailed(connector, reason)
  235. def clientConnectionLost(self, connector: IConnector, reason: Failure) -> None:
  236. logger.info(
  237. "Connection to redis server %s lost: %s",
  238. format_address(connector.getDestination()),
  239. reason.value,
  240. )
  241. super().clientConnectionLost(connector, reason)
  242. def format_address(address: IAddress) -> str:
  243. if isinstance(address, (IPv4Address, IPv6Address)):
  244. return "%s:%i" % (address.host, address.port)
  245. return str(address)
  246. class RedisDirectTcpReplicationClientFactory(SynapseRedisFactory):
  247. """This is a reconnecting factory that connects to redis and immediately
  248. subscribes to some streams.
  249. Args:
  250. hs
  251. outbound_redis_connection: A connection to redis that will be used to
  252. send outbound commands (this is separate to the redis connection
  253. used to subscribe).
  254. channel_names: A list of channel names to append to the base channel name
  255. to additionally subscribe to.
  256. e.g. if ['ABC', 'DEF'] is specified then we'll listen to:
  257. example.com; example.com/ABC; and example.com/DEF.
  258. """
  259. maxDelay = 5
  260. protocol = RedisSubscriber
  261. def __init__(
  262. self,
  263. hs: "HomeServer",
  264. outbound_redis_connection: txredisapi.ConnectionHandler,
  265. channel_names: List[str],
  266. ):
  267. super().__init__(
  268. hs,
  269. uuid="subscriber",
  270. dbid=None,
  271. poolsize=1,
  272. replyTimeout=30,
  273. password=hs.config.redis.redis_password,
  274. )
  275. self.synapse_handler = hs.get_replication_command_handler()
  276. self.synapse_stream_prefix = hs.hostname
  277. self.synapse_channel_names = channel_names
  278. self.synapse_outbound_redis_connection = outbound_redis_connection
  279. def buildProtocol(self, addr: IAddress) -> RedisSubscriber:
  280. p = super().buildProtocol(addr)
  281. p = cast(RedisSubscriber, p)
  282. # We do this here rather than add to the constructor of `RedisSubcriber`
  283. # as to do so would involve overriding `buildProtocol` entirely, however
  284. # the base method does some other things than just instantiating the
  285. # protocol.
  286. p.synapse_handler = self.synapse_handler
  287. p.synapse_outbound_redis_connection = self.synapse_outbound_redis_connection
  288. p.synapse_stream_prefix = self.synapse_stream_prefix
  289. p.synapse_channel_names = self.synapse_channel_names
  290. return p
  291. def lazyConnection(
  292. hs: "HomeServer",
  293. host: str = "localhost",
  294. port: int = 6379,
  295. dbid: Optional[int] = None,
  296. reconnect: bool = True,
  297. password: Optional[str] = None,
  298. replyTimeout: int = 30,
  299. ) -> txredisapi.ConnectionHandler:
  300. """Creates a connection to Redis that is lazily set up and reconnects if the
  301. connections is lost.
  302. """
  303. uuid = "%s:%d" % (host, port)
  304. factory = SynapseRedisFactory(
  305. hs,
  306. uuid=uuid,
  307. dbid=dbid,
  308. poolsize=1,
  309. isLazy=True,
  310. handler=txredisapi.ConnectionHandler,
  311. password=password,
  312. replyTimeout=replyTimeout,
  313. )
  314. factory.continueTrying = reconnect
  315. reactor = hs.get_reactor()
  316. reactor.connectTCP(
  317. host,
  318. port,
  319. factory,
  320. timeout=30,
  321. bindAddress=None,
  322. )
  323. return factory.handler