You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

562 lines
22 KiB

  1. # Copyright 2022 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unles4s required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import inspect
  15. import itertools
  16. import logging
  17. from typing import (
  18. Any,
  19. Callable,
  20. ContextManager,
  21. Dict,
  22. List,
  23. Optional,
  24. Set,
  25. Tuple,
  26. TypeVar,
  27. Union,
  28. )
  29. from unittest import mock
  30. from unittest.mock import Mock
  31. from twisted.internet.defer import Deferred
  32. from twisted.internet.error import ConnectionDone
  33. from twisted.python.failure import Failure
  34. from twisted.test.proto_helpers import MemoryReactorClock
  35. from twisted.web.server import Site
  36. from synapse.http.server import (
  37. HTTP_STATUS_REQUEST_CANCELLED,
  38. respond_with_html_bytes,
  39. respond_with_json,
  40. )
  41. from synapse.http.site import SynapseRequest
  42. from synapse.logging.context import LoggingContext, make_deferred_yieldable
  43. from synapse.types import JsonDict
  44. from tests.server import FakeChannel, make_request
  45. from tests.unittest import logcontext_clean
  46. logger = logging.getLogger(__name__)
  47. T = TypeVar("T")
  48. def test_disconnect(
  49. reactor: MemoryReactorClock,
  50. channel: FakeChannel,
  51. expect_cancellation: bool,
  52. expected_body: Union[bytes, JsonDict],
  53. expected_code: Optional[int] = None,
  54. ) -> None:
  55. """Disconnects an in-flight request and checks the response.
  56. Args:
  57. reactor: The twisted reactor running the request handler.
  58. channel: The `FakeChannel` for the request.
  59. expect_cancellation: `True` if request processing is expected to be cancelled,
  60. `False` if the request should run to completion.
  61. expected_body: The expected response for the request.
  62. expected_code: The expected status code for the request. Defaults to `200` or
  63. `499` depending on `expect_cancellation`.
  64. """
  65. # Determine the expected status code.
  66. if expected_code is None:
  67. if expect_cancellation:
  68. expected_code = HTTP_STATUS_REQUEST_CANCELLED
  69. else:
  70. expected_code = 200
  71. request = channel.request
  72. if channel.is_finished():
  73. raise AssertionError(
  74. "Request finished before we could disconnect - "
  75. "ensure `await_result=False` is passed to `make_request`.",
  76. )
  77. # We're about to disconnect the request. This also disconnects the channel, so we
  78. # have to rely on mocks to extract the response.
  79. respond_method: Callable[..., Any]
  80. if isinstance(expected_body, bytes):
  81. respond_method = respond_with_html_bytes
  82. else:
  83. respond_method = respond_with_json
  84. with mock.patch(
  85. f"synapse.http.server.{respond_method.__name__}", wraps=respond_method
  86. ) as respond_mock:
  87. # Disconnect the request.
  88. request.connectionLost(reason=ConnectionDone())
  89. if expect_cancellation:
  90. # An immediate cancellation is expected.
  91. respond_mock.assert_called_once()
  92. else:
  93. respond_mock.assert_not_called()
  94. # The handler is expected to run to completion.
  95. reactor.advance(1.0)
  96. respond_mock.assert_called_once()
  97. args, _kwargs = respond_mock.call_args
  98. code, body = args[1], args[2]
  99. if code != expected_code:
  100. raise AssertionError(
  101. f"{code} != {expected_code} : "
  102. "Request did not finish with the expected status code."
  103. )
  104. if request.code != expected_code:
  105. raise AssertionError(
  106. f"{request.code} != {expected_code} : "
  107. "Request did not finish with the expected status code."
  108. )
  109. if body != expected_body:
  110. raise AssertionError(
  111. f"{body!r} != {expected_body!r} : "
  112. "Request did not finish with the expected status code."
  113. )
  114. @logcontext_clean
  115. def make_request_with_cancellation_test(
  116. test_name: str,
  117. reactor: MemoryReactorClock,
  118. site: Site,
  119. method: str,
  120. path: str,
  121. content: Union[bytes, str, JsonDict] = b"",
  122. *,
  123. token: Optional[str] = None,
  124. ) -> FakeChannel:
  125. """Performs a request repeatedly, disconnecting at successive `await`s, until
  126. one completes.
  127. Fails if:
  128. * A logging context is lost during cancellation.
  129. * A logging context get restarted after it is marked as finished, eg. if
  130. a request's logging context is used by some processing started by the
  131. request, but the request neglects to cancel that processing or wait for it
  132. to complete.
  133. Note that "Re-starting finished log context" errors get raised within the
  134. request handling code and may or may not get caught. These errors will
  135. likely manifest as a different logging context error at a later point. When
  136. debugging logging context failures, setting a breakpoint in
  137. `logcontext_error` can prove useful.
  138. * A request gets stuck, possibly due to a previous cancellation.
  139. * The request does not return a 499 when the client disconnects.
  140. This implies that a `CancelledError` was swallowed somewhere.
  141. It is up to the caller to verify that the request returns the correct data when
  142. it finally runs to completion.
  143. Note that this function can only cover a single code path and does not guarantee
  144. that an endpoint is compatible with cancellation on every code path.
  145. To allow inspection of the code path that is being tested, this function will
  146. log the stack trace at every `await` that gets cancelled. To view these log
  147. lines, `trial` can be run with the `SYNAPSE_TEST_LOG_LEVEL=INFO` environment
  148. variable, which will include the log lines in `_trial_temp/test.log`.
  149. Alternatively, `_log_for_request` can be modified to write to `sys.stdout`.
  150. Args:
  151. test_name: The name of the test, which will be logged.
  152. reactor: The twisted reactor running the request handler.
  153. site: The twisted `Site` to use to render the request.
  154. method: The HTTP request method ("verb").
  155. path: The HTTP path, suitably URL encoded (e.g. escaped UTF-8 & spaces and
  156. such).
  157. content: The body of the request.
  158. Returns:
  159. The `FakeChannel` object which stores the result of the final request that
  160. runs to completion.
  161. """
  162. # To process a request, a coroutine run is created for the async method handling
  163. # the request. That method may then start other coroutine runs, wrapped in
  164. # `Deferred`s.
  165. #
  166. # We would like to trigger a cancellation at the first `await`, re-run the
  167. # request and cancel at the second `await`, and so on. By patching
  168. # `Deferred.__next__`, we can intercept `await`s, track which ones we have or
  169. # have not seen, and force them to block when they wouldn't have.
  170. # The set of previously seen `await`s.
  171. # Each element is a stringified stack trace.
  172. seen_awaits: Set[Tuple[str, ...]] = set()
  173. _log_for_request(
  174. 0, f"Running make_request_with_cancellation_test for {test_name}..."
  175. )
  176. for request_number in itertools.count(1):
  177. deferred_patch = Deferred__next__Patch(seen_awaits, request_number)
  178. try:
  179. with mock.patch(
  180. "synapse.http.server.respond_with_json", wraps=respond_with_json
  181. ) as respond_mock:
  182. with deferred_patch.patch():
  183. # Start the request.
  184. channel = make_request(
  185. reactor,
  186. site,
  187. method,
  188. path,
  189. content,
  190. await_result=False,
  191. access_token=token,
  192. )
  193. request = channel.request
  194. # Run the request until we see a new `await` which we have not
  195. # yet cancelled at, or it completes.
  196. while not respond_mock.called and not deferred_patch.new_await_seen:
  197. previous_awaits_seen = deferred_patch.awaits_seen
  198. reactor.advance(0.0)
  199. if deferred_patch.awaits_seen == previous_awaits_seen:
  200. # We didn't see any progress. Try advancing the clock.
  201. reactor.advance(1.0)
  202. if deferred_patch.awaits_seen == previous_awaits_seen:
  203. # We still didn't see any progress. The request might be
  204. # stuck.
  205. raise AssertionError(
  206. "Request appears to be stuck, possibly due to a "
  207. "previous cancelled request"
  208. )
  209. if respond_mock.called:
  210. # The request ran to completion and we are done with testing it.
  211. # `respond_with_json` writes the response asynchronously, so we
  212. # might have to give the reactor a kick before the channel gets
  213. # the response.
  214. deferred_patch.unblock_awaits()
  215. channel.await_result()
  216. return channel
  217. # Disconnect the client and wait for the response.
  218. request.connectionLost(reason=ConnectionDone())
  219. _log_for_request(request_number, "--- disconnected ---")
  220. # Advance the reactor just enough to get a response.
  221. # We don't want to advance the reactor too far, because we can only
  222. # detect re-starts of finished logging contexts after we set the
  223. # finished flag below.
  224. for _ in range(2):
  225. # We may need to pump the reactor to allow `delay_cancellation`s to
  226. # finish.
  227. if not respond_mock.called:
  228. reactor.advance(0.0)
  229. # Try advancing the clock if that didn't work.
  230. if not respond_mock.called:
  231. reactor.advance(1.0)
  232. # `delay_cancellation`s may be waiting for processing that we've
  233. # forced to block. Try unblocking them, followed by another round of
  234. # pumping the reactor.
  235. if not respond_mock.called:
  236. deferred_patch.unblock_awaits()
  237. # Mark the request's logging context as finished. If it gets
  238. # activated again, an `AssertionError` will be raised and bubble up
  239. # through request handling code. This `AssertionError` may or may not be
  240. # caught. Eventually some other code will deactivate the logging
  241. # context which will raise a different `AssertionError` because
  242. # resource usage won't have been correctly tracked.
  243. if isinstance(request, SynapseRequest) and request.logcontext:
  244. request.logcontext.finished = True
  245. # Check that the request finished with a 499,
  246. # ie. the `CancelledError` wasn't swallowed.
  247. respond_mock.assert_called_once()
  248. if request.code != HTTP_STATUS_REQUEST_CANCELLED:
  249. raise AssertionError(
  250. f"{request.code} != {HTTP_STATUS_REQUEST_CANCELLED} : "
  251. "Cancelled request did not finish with the correct status code."
  252. )
  253. finally:
  254. # Unblock any processing that might be shared between requests, if we
  255. # haven't already done so.
  256. deferred_patch.unblock_awaits()
  257. assert False, "unreachable" # noqa: B011
  258. class Deferred__next__Patch:
  259. """A `Deferred.__next__` patch that will intercept `await`s and force them
  260. to block once it sees a new `await`.
  261. When done with the patch, `unblock_awaits()` must be called to clean up after any
  262. `await`s that were forced to block, otherwise processing shared between multiple
  263. requests, such as database queries started by `@cached`, will become permanently
  264. stuck.
  265. Usage:
  266. seen_awaits = set()
  267. deferred_patch = Deferred__next__Patch(seen_awaits, 1)
  268. try:
  269. with deferred_patch.patch():
  270. # do things
  271. ...
  272. finally:
  273. deferred_patch.unblock_awaits()
  274. """
  275. def __init__(self, seen_awaits: Set[Tuple[str, ...]], request_number: int):
  276. """
  277. Args:
  278. seen_awaits: The set of stack traces of `await`s that have been previously
  279. seen. When the `Deferred.__next__` patch sees a new `await`, it will add
  280. it to the set.
  281. request_number: The request number to log against.
  282. """
  283. self._request_number = request_number
  284. self._seen_awaits = seen_awaits
  285. self._original_Deferred___next__ = Deferred.__next__ # type: ignore[misc,unused-ignore]
  286. # The number of `await`s on `Deferred`s we have seen so far.
  287. self.awaits_seen = 0
  288. # Whether we have seen a new `await` not in `seen_awaits`.
  289. self.new_await_seen = False
  290. # To force `await`s on resolved `Deferred`s to block, we make up a new
  291. # unresolved `Deferred` and return it out of `Deferred.__next__` /
  292. # `coroutine.send()`. We have to resolve it later, in case the `await`ing
  293. # coroutine is part of some shared processing, such as `@cached`.
  294. self._to_unblock: Dict[Deferred, Union[object, Failure]] = {}
  295. # The last stack we logged.
  296. self._previous_stack: List[inspect.FrameInfo] = []
  297. def patch(self) -> ContextManager[Mock]:
  298. """Returns a context manager which patches `Deferred.__next__`."""
  299. def Deferred___next__(
  300. deferred: "Deferred[T]", value: object = None
  301. ) -> "Deferred[T]":
  302. """Intercepts `await`s on `Deferred`s and rigs them to block once we have
  303. seen enough of them.
  304. `Deferred.__next__` will normally:
  305. * return `self` if the `Deferred` is unresolved, in which case
  306. `coroutine.send()` will return the `Deferred`, and
  307. `_defer.inlineCallbacks` will stop running the coroutine until the
  308. `Deferred` is resolved.
  309. * raise a `StopIteration(result)`, containing the result of the `await`.
  310. * raise another exception, which will come out of the `await`.
  311. """
  312. self.awaits_seen += 1
  313. stack = _get_stack(skip_frames=1)
  314. stack_hash = _hash_stack(stack)
  315. if stack_hash not in self._seen_awaits:
  316. # Block at the current `await` onwards.
  317. self._seen_awaits.add(stack_hash)
  318. self.new_await_seen = True
  319. if not self.new_await_seen:
  320. # This `await` isn't interesting. Let it proceed normally.
  321. # Don't log the stack. It's been seen before in a previous run.
  322. self._previous_stack = stack
  323. return self._original_Deferred___next__(deferred, value)
  324. # We want to block at the current `await`.
  325. if deferred.called and not deferred.paused:
  326. # This `Deferred` already has a result.
  327. # We return a new, unresolved, `Deferred` for `_inlineCallbacks` to wait
  328. # on. This blocks the coroutine that did this `await`.
  329. # We queue it up for unblocking later.
  330. new_deferred: "Deferred[T]" = Deferred()
  331. self._to_unblock[new_deferred] = deferred.result
  332. _log_await_stack(
  333. stack,
  334. self._previous_stack,
  335. self._request_number,
  336. "force-blocked await",
  337. )
  338. self._previous_stack = stack
  339. return make_deferred_yieldable(new_deferred)
  340. # This `Deferred` does not have a result yet.
  341. # The `await` will block normally, so we don't have to do anything.
  342. _log_await_stack(
  343. stack,
  344. self._previous_stack,
  345. self._request_number,
  346. "blocking await",
  347. )
  348. self._previous_stack = stack
  349. return self._original_Deferred___next__(deferred, value)
  350. return mock.patch.object(Deferred, "__next__", new=Deferred___next__)
  351. def unblock_awaits(self) -> None:
  352. """Unblocks any shared processing that we forced to block.
  353. Must be called when done, otherwise processing shared between multiple requests,
  354. such as database queries started by `@cached`, will become permanently stuck.
  355. """
  356. to_unblock = self._to_unblock
  357. self._to_unblock = {}
  358. for deferred, result in to_unblock.items():
  359. deferred.callback(result)
  360. def _log_for_request(request_number: int, message: str) -> None:
  361. """Logs a message for an iteration of `make_request_with_cancellation_test`."""
  362. # We want consistent alignment when logging stack traces, so ensure the logging
  363. # context has a fixed width name.
  364. with LoggingContext(name=f"request-{request_number:<2}"):
  365. logger.info(message)
  366. def _log_await_stack(
  367. stack: List[inspect.FrameInfo],
  368. previous_stack: List[inspect.FrameInfo],
  369. request_number: int,
  370. note: str,
  371. ) -> None:
  372. """Logs the stack for an `await` in `make_request_with_cancellation_test`.
  373. Only logs the part of the stack that has changed since the previous call.
  374. Example output looks like:
  375. ```
  376. delay_cancellation:750 (synapse/util/async_helpers.py:750)
  377. DatabasePool._runInteraction:768 (synapse/storage/database.py:768)
  378. > *blocked on await* at DatabasePool.runWithConnection:891 (synapse/storage/database.py:891)
  379. ```
  380. Args:
  381. stack: The stack to log, as returned by `_get_stack()`.
  382. previous_stack: The previous stack logged, with callers appearing before
  383. callees.
  384. request_number: The request number to log against.
  385. note: A note to attach to the last stack frame, eg. "blocked on await".
  386. """
  387. for i, frame_info in enumerate(stack[:-1]):
  388. # Skip any frames in common with the previous logging.
  389. if i < len(previous_stack) and frame_info == previous_stack[i]:
  390. continue
  391. frame = _format_stack_frame(frame_info)
  392. message = f"{' ' * i}{frame}"
  393. _log_for_request(request_number, message)
  394. # Always print the final frame with the `await`.
  395. # If the frame with the `await` started another coroutine run, we may have already
  396. # printed a deeper stack which includes our final frame. We want to log where all
  397. # `await`s happen, so we reprint the frame in this case.
  398. i = len(stack) - 1
  399. frame_info = stack[i]
  400. frame = _format_stack_frame(frame_info)
  401. message = f"{' ' * i}> *{note}* at {frame}"
  402. _log_for_request(request_number, message)
  403. def _format_stack_frame(frame_info: inspect.FrameInfo) -> str:
  404. """Returns a string representation of a stack frame.
  405. Used for debug logging.
  406. Returns:
  407. A string, formatted like
  408. "JsonResource._async_render:559 (synapse/http/server.py:559)".
  409. """
  410. method_name = _get_stack_frame_method_name(frame_info)
  411. return (
  412. f"{method_name}:{frame_info.lineno} ({frame_info.filename}:{frame_info.lineno})"
  413. )
  414. def _get_stack(skip_frames: int) -> List[inspect.FrameInfo]:
  415. """Captures the stack for a request.
  416. Skips any twisted frames and stops at `JsonResource.wrapped_async_request_handler`.
  417. Used for debug logging.
  418. Returns:
  419. A list of `inspect.FrameInfo`s, with callers appearing before callees.
  420. """
  421. stack = []
  422. skip_frames += 1 # Also skip `get_stack` itself.
  423. for frame_info in inspect.stack()[skip_frames:]:
  424. # Skip any twisted `inlineCallbacks` gunk.
  425. if "/twisted/" in frame_info.filename:
  426. continue
  427. # Exclude the reactor frame, upwards.
  428. method_name = _get_stack_frame_method_name(frame_info)
  429. if method_name == "ThreadedMemoryReactorClock.advance":
  430. break
  431. stack.append(frame_info)
  432. # Stop at `JsonResource`'s `wrapped_async_request_handler`, which is the entry
  433. # point for request handling.
  434. if frame_info.function == "wrapped_async_request_handler":
  435. break
  436. return stack[::-1]
  437. def _get_stack_frame_method_name(frame_info: inspect.FrameInfo) -> str:
  438. """Returns the name of a stack frame's method.
  439. eg. "JsonResource._async_render".
  440. """
  441. method_name = frame_info.function
  442. # Prefix the class name for instance methods.
  443. frame_self = frame_info.frame.f_locals.get("self")
  444. if frame_self:
  445. method = getattr(frame_self, method_name, None)
  446. if method:
  447. method_name = method.__qualname__
  448. else:
  449. # We couldn't find the method on `self`.
  450. # Make something up. It's useful to know which class "contains" a
  451. # function anyway.
  452. method_name = f"{type(frame_self).__name__} {method_name}"
  453. return method_name
  454. def _hash_stack(stack: List[inspect.FrameInfo]) -> Tuple[str, ...]:
  455. """Turns a stack into a hashable value that can be put into a set."""
  456. return tuple(_format_stack_frame(frame) for frame in stack)