Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.
 
 
 
 
 
 

1155 rindas
39 KiB

  1. # Copyright 2014-2016 OpenMarket Ltd
  2. # Copyright 2018 New Vector Ltd
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import logging
  16. import urllib.parse
  17. from http import HTTPStatus
  18. from io import BytesIO
  19. from typing import (
  20. TYPE_CHECKING,
  21. Any,
  22. BinaryIO,
  23. Callable,
  24. Dict,
  25. List,
  26. Mapping,
  27. Optional,
  28. Tuple,
  29. Union,
  30. )
  31. import treq
  32. from canonicaljson import encode_canonical_json
  33. from netaddr import AddrFormatError, IPAddress, IPSet
  34. from prometheus_client import Counter
  35. from typing_extensions import Protocol
  36. from zope.interface import implementer, provider
  37. from OpenSSL import SSL
  38. from OpenSSL.SSL import VERIFY_NONE
  39. from twisted.internet import defer, error as twisted_error, protocol, ssl
  40. from twisted.internet.address import IPv4Address, IPv6Address
  41. from twisted.internet.interfaces import (
  42. IAddress,
  43. IDelayedCall,
  44. IHostResolution,
  45. IOpenSSLContextFactory,
  46. IReactorCore,
  47. IReactorPluggableNameResolver,
  48. IReactorTime,
  49. IResolutionReceiver,
  50. ITCPTransport,
  51. )
  52. from twisted.internet.protocol import connectionDone
  53. from twisted.internet.task import Cooperator
  54. from twisted.python.failure import Failure
  55. from twisted.web._newclient import ResponseDone
  56. from twisted.web.client import (
  57. Agent,
  58. HTTPConnectionPool,
  59. ResponseNeverReceived,
  60. readBody,
  61. )
  62. from twisted.web.http import PotentialDataLoss
  63. from twisted.web.http_headers import Headers
  64. from twisted.web.iweb import (
  65. UNKNOWN_LENGTH,
  66. IAgent,
  67. IBodyProducer,
  68. IPolicyForHTTPS,
  69. IResponse,
  70. )
  71. from synapse.api.errors import Codes, HttpResponseException, SynapseError
  72. from synapse.http import QuieterFileBodyProducer, RequestTimedOutError, redact_uri
  73. from synapse.http.proxyagent import ProxyAgent
  74. from synapse.http.replicationagent import ReplicationAgent
  75. from synapse.http.types import QueryParams
  76. from synapse.logging.context import make_deferred_yieldable, run_in_background
  77. from synapse.logging.opentracing import set_tag, start_active_span, tags
  78. from synapse.types import ISynapseReactor, StrSequence
  79. from synapse.util import json_decoder
  80. from synapse.util.async_helpers import timeout_deferred
  81. if TYPE_CHECKING:
  82. from synapse.server import HomeServer
  83. logger = logging.getLogger(__name__)
  84. outgoing_requests_counter = Counter("synapse_http_client_requests", "", ["method"])
  85. incoming_responses_counter = Counter(
  86. "synapse_http_client_responses", "", ["method", "code"]
  87. )
  88. # the type of the headers map, to be passed to the t.w.h.Headers.
  89. #
  90. # The actual type accepted by Twisted is
  91. # Mapping[Union[str, bytes], Sequence[Union[str, bytes]] ,
  92. # allowing us to mix and match str and bytes freely. However: any str is also a
  93. # Sequence[str]; passing a header string value which is a
  94. # standalone str is interpreted as a sequence of 1-codepoint strings. This is a disastrous footgun.
  95. # We use a narrower value type (RawHeaderValue) to avoid this footgun.
  96. #
  97. # We also simplify the keys to be either all str or all bytes. This helps because
  98. # Dict[K, V] is invariant in K (and indeed V).
  99. RawHeaders = Union[Mapping[str, "RawHeaderValue"], Mapping[bytes, "RawHeaderValue"]]
  100. # the value actually has to be a List, but List is invariant so we can't specify that
  101. # the entries can either be Lists or bytes.
  102. RawHeaderValue = Union[
  103. StrSequence,
  104. List[bytes],
  105. List[Union[str, bytes]],
  106. Tuple[bytes, ...],
  107. Tuple[Union[str, bytes], ...],
  108. ]
  109. def _is_ip_blocked(
  110. ip_address: IPAddress, allowlist: Optional[IPSet], blocklist: IPSet
  111. ) -> bool:
  112. """
  113. Compares an IP address to allowed and disallowed IP sets.
  114. Args:
  115. ip_address: The IP address to check
  116. allowlist: Allowed IP addresses.
  117. blocklist: Disallowed IP addresses.
  118. Returns:
  119. True if the IP address is in the blocklist and not in the allowlist.
  120. """
  121. if ip_address in blocklist:
  122. if allowlist is None or ip_address not in allowlist:
  123. return True
  124. return False
  125. _EPSILON = 0.00000001
  126. def _make_scheduler(
  127. reactor: IReactorTime,
  128. ) -> Callable[[Callable[[], object]], IDelayedCall]:
  129. """Makes a schedular suitable for a Cooperator using the given reactor.
  130. (This is effectively just a copy from `twisted.internet.task`)
  131. """
  132. def _scheduler(x: Callable[[], object]) -> IDelayedCall:
  133. return reactor.callLater(_EPSILON, x)
  134. return _scheduler
  135. class _IPBlockingResolver:
  136. """
  137. A proxy for reactor.nameResolver which only produces non-blocklisted IP
  138. addresses, preventing DNS rebinding attacks.
  139. """
  140. def __init__(
  141. self,
  142. reactor: IReactorPluggableNameResolver,
  143. ip_allowlist: Optional[IPSet],
  144. ip_blocklist: IPSet,
  145. ):
  146. """
  147. Args:
  148. reactor: The twisted reactor.
  149. ip_allowlist: IP addresses to allow.
  150. ip_blocklist: IP addresses to disallow.
  151. """
  152. self._reactor = reactor
  153. self._ip_allowlist = ip_allowlist
  154. self._ip_blocklist = ip_blocklist
  155. def resolveHostName(
  156. self, recv: IResolutionReceiver, hostname: str, portNumber: int = 0
  157. ) -> IResolutionReceiver:
  158. addresses: List[IAddress] = []
  159. def _callback() -> None:
  160. has_bad_ip = False
  161. for address in addresses:
  162. # We only expect IPv4 and IPv6 addresses since only A/AAAA lookups
  163. # should go through this path.
  164. if not isinstance(address, (IPv4Address, IPv6Address)):
  165. continue
  166. ip_address = IPAddress(address.host)
  167. if _is_ip_blocked(ip_address, self._ip_allowlist, self._ip_blocklist):
  168. logger.info(
  169. "Blocked %s from DNS resolution to %s" % (ip_address, hostname)
  170. )
  171. has_bad_ip = True
  172. # if we have a blocked IP, we'd like to raise an error to block the
  173. # request, but all we can really do from here is claim that there were no
  174. # valid results.
  175. if not has_bad_ip:
  176. for address in addresses:
  177. recv.addressResolved(address)
  178. recv.resolutionComplete()
  179. @provider(IResolutionReceiver)
  180. class EndpointReceiver:
  181. @staticmethod
  182. def resolutionBegan(resolutionInProgress: IHostResolution) -> None:
  183. recv.resolutionBegan(resolutionInProgress)
  184. @staticmethod
  185. def addressResolved(address: IAddress) -> None:
  186. addresses.append(address)
  187. @staticmethod
  188. def resolutionComplete() -> None:
  189. _callback()
  190. self._reactor.nameResolver.resolveHostName(
  191. EndpointReceiver, hostname, portNumber=portNumber
  192. )
  193. return recv
  194. # ISynapseReactor implies IReactorCore, but explicitly marking it this as an implementer
  195. # of IReactorCore seems to keep mypy-zope happier.
  196. @implementer(IReactorCore, ISynapseReactor)
  197. class BlocklistingReactorWrapper:
  198. """
  199. A Reactor wrapper which will prevent DNS resolution to blocked IP
  200. addresses, to prevent DNS rebinding.
  201. """
  202. def __init__(
  203. self,
  204. reactor: IReactorPluggableNameResolver,
  205. ip_allowlist: Optional[IPSet],
  206. ip_blocklist: IPSet,
  207. ):
  208. self._reactor = reactor
  209. # We need to use a DNS resolver which filters out blocked IP
  210. # addresses, to prevent DNS rebinding.
  211. self._nameResolver = _IPBlockingResolver(
  212. self._reactor, ip_allowlist, ip_blocklist
  213. )
  214. def __getattr__(self, attr: str) -> Any:
  215. # Passthrough to the real reactor except for the DNS resolver.
  216. if attr == "nameResolver":
  217. return self._nameResolver
  218. else:
  219. return getattr(self._reactor, attr)
  220. class BlocklistingAgentWrapper(Agent):
  221. """
  222. An Agent wrapper which will prevent access to IP addresses being accessed
  223. directly (without an IP address lookup).
  224. """
  225. def __init__(
  226. self,
  227. agent: IAgent,
  228. ip_blocklist: IPSet,
  229. ip_allowlist: Optional[IPSet] = None,
  230. ):
  231. """
  232. Args:
  233. agent: The Agent to wrap.
  234. ip_allowlist: IP addresses to allow.
  235. ip_blocklist: IP addresses to disallow.
  236. """
  237. self._agent = agent
  238. self._ip_allowlist = ip_allowlist
  239. self._ip_blocklist = ip_blocklist
  240. def request(
  241. self,
  242. method: bytes,
  243. uri: bytes,
  244. headers: Optional[Headers] = None,
  245. bodyProducer: Optional[IBodyProducer] = None,
  246. ) -> defer.Deferred:
  247. h = urllib.parse.urlparse(uri.decode("ascii"))
  248. try:
  249. # h.hostname is Optional[str], None raises an AddrFormatError, so
  250. # this is safe even though IPAddress requires a str.
  251. ip_address = IPAddress(h.hostname) # type: ignore[arg-type]
  252. except AddrFormatError:
  253. # Not an IP
  254. pass
  255. else:
  256. if _is_ip_blocked(ip_address, self._ip_allowlist, self._ip_blocklist):
  257. logger.info("Blocking access to %s" % (ip_address,))
  258. e = SynapseError(HTTPStatus.FORBIDDEN, "IP address blocked")
  259. return defer.fail(Failure(e))
  260. return self._agent.request(
  261. method, uri, headers=headers, bodyProducer=bodyProducer
  262. )
  263. class BaseHttpClient:
  264. """
  265. A simple, no-frills HTTP client with methods that wrap up common ways of
  266. using HTTP in Matrix. Does not come with a default Agent, subclasses will need to
  267. define their own.
  268. Args:
  269. hs: The HomeServer instance to pass in
  270. treq_args: Extra keyword arguments to be given to treq.request.
  271. """
  272. agent: IAgent
  273. def __init__(
  274. self,
  275. hs: "HomeServer",
  276. treq_args: Optional[Dict[str, Any]] = None,
  277. ):
  278. self.hs = hs
  279. self.reactor = hs.get_reactor()
  280. self._extra_treq_args = treq_args or {}
  281. self.clock = hs.get_clock()
  282. user_agent = hs.version_string
  283. if hs.config.server.user_agent_suffix:
  284. user_agent = "%s %s" % (
  285. user_agent,
  286. hs.config.server.user_agent_suffix,
  287. )
  288. self.user_agent = user_agent.encode("ascii")
  289. # We use this for our body producers to ensure that they use the correct
  290. # reactor.
  291. self._cooperator = Cooperator(scheduler=_make_scheduler(hs.get_reactor()))
  292. async def request(
  293. self,
  294. method: str,
  295. uri: str,
  296. data: Optional[bytes] = None,
  297. headers: Optional[Headers] = None,
  298. ) -> IResponse:
  299. """
  300. Args:
  301. method: HTTP method to use.
  302. uri: URI to query.
  303. data: Data to send in the request body, if applicable.
  304. headers: Request headers.
  305. Returns:
  306. Response object, once the headers have been read.
  307. Raises:
  308. RequestTimedOutError if the request times out before the headers are read
  309. """
  310. outgoing_requests_counter.labels(method).inc()
  311. # log request but strip `access_token` (AS requests for example include this)
  312. logger.debug("Sending request %s %s", method, redact_uri(uri))
  313. with start_active_span(
  314. "outgoing-client-request",
  315. tags={
  316. tags.SPAN_KIND: tags.SPAN_KIND_RPC_CLIENT,
  317. tags.HTTP_METHOD: method,
  318. tags.HTTP_URL: uri,
  319. },
  320. finish_on_close=True,
  321. ):
  322. try:
  323. body_producer = None
  324. if data is not None:
  325. body_producer = QuieterFileBodyProducer(
  326. BytesIO(data),
  327. cooperator=self._cooperator,
  328. )
  329. request_deferred: defer.Deferred = treq.request(
  330. method,
  331. uri,
  332. agent=self.agent,
  333. data=body_producer,
  334. headers=headers,
  335. # Avoid buffering the body in treq since we do not reuse
  336. # response bodies.
  337. unbuffered=True,
  338. **self._extra_treq_args,
  339. )
  340. # we use our own timeout mechanism rather than treq's as a workaround
  341. # for https://twistedmatrix.com/trac/ticket/9534.
  342. request_deferred = timeout_deferred(
  343. request_deferred,
  344. 60,
  345. self.hs.get_reactor(),
  346. )
  347. # turn timeouts into RequestTimedOutErrors
  348. request_deferred.addErrback(_timeout_to_request_timed_out_error)
  349. response = await make_deferred_yieldable(request_deferred)
  350. incoming_responses_counter.labels(method, response.code).inc()
  351. logger.info(
  352. "Received response to %s %s: %s",
  353. method,
  354. redact_uri(uri),
  355. response.code,
  356. )
  357. return response
  358. except Exception as e:
  359. incoming_responses_counter.labels(method, "ERR").inc()
  360. logger.info(
  361. "Error sending request to %s %s: %s %s",
  362. method,
  363. redact_uri(uri),
  364. type(e).__name__,
  365. e.args[0],
  366. )
  367. set_tag(tags.ERROR, True)
  368. set_tag("error_reason", e.args[0])
  369. raise
  370. async def post_urlencoded_get_json(
  371. self,
  372. uri: str,
  373. args: Optional[Mapping[str, Union[str, List[str]]]] = None,
  374. headers: Optional[RawHeaders] = None,
  375. ) -> Any:
  376. """
  377. Args:
  378. uri: uri to query
  379. args: parameters to be url-encoded in the body
  380. headers: a map from header name to a list of values for that header
  381. Returns:
  382. parsed json
  383. Raises:
  384. RequestTimedOutError: if there is a timeout before the response headers
  385. are received. Note there is currently no timeout on reading the response
  386. body.
  387. HttpResponseException: On a non-2xx HTTP response.
  388. ValueError: if the response was not JSON
  389. """
  390. # TODO: Do we ever want to log message contents?
  391. logger.debug("post_urlencoded_get_json args: %s", args)
  392. query_bytes = encode_query_args(args)
  393. actual_headers = {
  394. b"Content-Type": [b"application/x-www-form-urlencoded"],
  395. b"User-Agent": [self.user_agent],
  396. b"Accept": [b"application/json"],
  397. }
  398. if headers:
  399. actual_headers.update(headers) # type: ignore
  400. response = await self.request(
  401. "POST", uri, headers=Headers(actual_headers), data=query_bytes
  402. )
  403. body = await make_deferred_yieldable(readBody(response))
  404. if 200 <= response.code < 300:
  405. return json_decoder.decode(body.decode("utf-8"))
  406. else:
  407. raise HttpResponseException(
  408. response.code, response.phrase.decode("ascii", errors="replace"), body
  409. )
  410. async def post_json_get_json(
  411. self, uri: str, post_json: Any, headers: Optional[RawHeaders] = None
  412. ) -> Any:
  413. """
  414. Args:
  415. uri: URI to query.
  416. post_json: request body, to be encoded as json
  417. headers: a map from header name to a list of values for that header
  418. Returns:
  419. parsed json
  420. Raises:
  421. RequestTimedOutError: if there is a timeout before the response headers
  422. are received. Note there is currently no timeout on reading the response
  423. body.
  424. HttpResponseException: On a non-2xx HTTP response.
  425. ValueError: if the response was not JSON
  426. """
  427. json_str = encode_canonical_json(post_json)
  428. logger.debug("HTTP POST %s -> %s", json_str, uri)
  429. actual_headers = {
  430. b"Content-Type": [b"application/json"],
  431. b"User-Agent": [self.user_agent],
  432. b"Accept": [b"application/json"],
  433. }
  434. if headers:
  435. actual_headers.update(headers) # type: ignore
  436. response = await self.request(
  437. "POST", uri, headers=Headers(actual_headers), data=json_str
  438. )
  439. body = await make_deferred_yieldable(readBody(response))
  440. if 200 <= response.code < 300:
  441. return json_decoder.decode(body.decode("utf-8"))
  442. else:
  443. raise HttpResponseException(
  444. response.code, response.phrase.decode("ascii", errors="replace"), body
  445. )
  446. async def get_json(
  447. self,
  448. uri: str,
  449. args: Optional[QueryParams] = None,
  450. headers: Optional[RawHeaders] = None,
  451. ) -> Any:
  452. """Gets some json from the given URI.
  453. Args:
  454. uri: The URI to request, not including query parameters
  455. args: A dictionary used to create query string
  456. headers: a map from header name to a list of values for that header
  457. Returns:
  458. Succeeds when we get a 2xx HTTP response, with the HTTP body as JSON.
  459. Raises:
  460. RequestTimedOutError: if there is a timeout before the response headers
  461. are received. Note there is currently no timeout on reading the response
  462. body.
  463. HttpResponseException On a non-2xx HTTP response.
  464. ValueError: if the response was not JSON
  465. """
  466. actual_headers = {b"Accept": [b"application/json"]}
  467. if headers:
  468. actual_headers.update(headers) # type: ignore
  469. body = await self.get_raw(uri, args, headers=actual_headers)
  470. return json_decoder.decode(body.decode("utf-8"))
  471. async def put_json(
  472. self,
  473. uri: str,
  474. json_body: Any,
  475. args: Optional[QueryParams] = None,
  476. headers: Optional[RawHeaders] = None,
  477. ) -> Any:
  478. """Puts some json to the given URI.
  479. Args:
  480. uri: The URI to request, not including query parameters
  481. json_body: The JSON to put in the HTTP body,
  482. args: A dictionary used to create query strings
  483. headers: a map from header name to a list of values for that header
  484. Returns:
  485. Succeeds when we get a 2xx HTTP response, with the HTTP body as JSON.
  486. Raises:
  487. RequestTimedOutError: if there is a timeout before the response headers
  488. are received. Note there is currently no timeout on reading the response
  489. body.
  490. HttpResponseException On a non-2xx HTTP response.
  491. ValueError: if the response was not JSON
  492. """
  493. if args:
  494. query_str = urllib.parse.urlencode(args, True)
  495. uri = "%s?%s" % (uri, query_str)
  496. json_str = encode_canonical_json(json_body)
  497. actual_headers = {
  498. b"Content-Type": [b"application/json"],
  499. b"User-Agent": [self.user_agent],
  500. b"Accept": [b"application/json"],
  501. }
  502. if headers:
  503. actual_headers.update(headers) # type: ignore
  504. response = await self.request(
  505. "PUT", uri, headers=Headers(actual_headers), data=json_str
  506. )
  507. body = await make_deferred_yieldable(readBody(response))
  508. if 200 <= response.code < 300:
  509. return json_decoder.decode(body.decode("utf-8"))
  510. else:
  511. raise HttpResponseException(
  512. response.code, response.phrase.decode("ascii", errors="replace"), body
  513. )
  514. async def get_raw(
  515. self,
  516. uri: str,
  517. args: Optional[QueryParams] = None,
  518. headers: Optional[RawHeaders] = None,
  519. ) -> bytes:
  520. """Gets raw text from the given URI.
  521. Args:
  522. uri: The URI to request, not including query parameters
  523. args: A dictionary used to create query strings
  524. headers: a map from header name to a list of values for that header
  525. Returns:
  526. Succeeds when we get a 2xx HTTP response, with the
  527. HTTP body as bytes.
  528. Raises:
  529. RequestTimedOutError: if there is a timeout before the response headers
  530. are received. Note there is currently no timeout on reading the response
  531. body.
  532. HttpResponseException on a non-2xx HTTP response.
  533. """
  534. if args:
  535. query_str = urllib.parse.urlencode(args, True)
  536. uri = "%s?%s" % (uri, query_str)
  537. actual_headers = {b"User-Agent": [self.user_agent]}
  538. if headers:
  539. actual_headers.update(headers) # type: ignore
  540. response = await self.request("GET", uri, headers=Headers(actual_headers))
  541. body = await make_deferred_yieldable(readBody(response))
  542. if 200 <= response.code < 300:
  543. return body
  544. else:
  545. raise HttpResponseException(
  546. response.code, response.phrase.decode("ascii", errors="replace"), body
  547. )
  548. # XXX: FIXME: This is horribly copy-pasted from matrixfederationclient.
  549. # The two should be factored out.
  550. async def get_file(
  551. self,
  552. url: str,
  553. output_stream: BinaryIO,
  554. max_size: Optional[int] = None,
  555. headers: Optional[RawHeaders] = None,
  556. is_allowed_content_type: Optional[Callable[[str], bool]] = None,
  557. ) -> Tuple[int, Dict[bytes, List[bytes]], str, int]:
  558. """GETs a file from a given URL
  559. Args:
  560. url: The URL to GET
  561. output_stream: File to write the response body to.
  562. headers: A map from header name to a list of values for that header
  563. is_allowed_content_type: A predicate to determine whether the
  564. content type of the file we're downloading is allowed. If set and
  565. it evaluates to False when called with the content type, the
  566. request will be terminated before completing the download by
  567. raising SynapseError.
  568. Returns:
  569. A tuple of the file length, dict of the response
  570. headers, absolute URI of the response and HTTP response code.
  571. Raises:
  572. RequestTimedOutError: if there is a timeout before the response headers
  573. are received. Note there is currently no timeout on reading the response
  574. body.
  575. SynapseError: if the response is not a 2xx, the remote file is too large, or
  576. another exception happens during the download.
  577. """
  578. actual_headers = {b"User-Agent": [self.user_agent]}
  579. if headers:
  580. actual_headers.update(headers) # type: ignore
  581. response = await self.request("GET", url, headers=Headers(actual_headers))
  582. resp_headers = dict(response.headers.getAllRawHeaders())
  583. if response.code > 299:
  584. logger.warning("Got %d when downloading %s" % (response.code, url))
  585. raise SynapseError(
  586. HTTPStatus.BAD_GATEWAY, "Got error %d" % (response.code,), Codes.UNKNOWN
  587. )
  588. if is_allowed_content_type and b"Content-Type" in resp_headers:
  589. content_type = resp_headers[b"Content-Type"][0].decode("ascii")
  590. if not is_allowed_content_type(content_type):
  591. raise SynapseError(
  592. HTTPStatus.BAD_GATEWAY,
  593. (
  594. "Requested file's content type not allowed for this operation: %s"
  595. % content_type
  596. ),
  597. )
  598. # TODO: if our Content-Type is HTML or something, just read the first
  599. # N bytes into RAM rather than saving it all to disk only to read it
  600. # straight back in again
  601. try:
  602. d = read_body_with_max_size(response, output_stream, max_size)
  603. # Ensure that the body is not read forever.
  604. d = timeout_deferred(d, 30, self.hs.get_reactor())
  605. length = await make_deferred_yieldable(d)
  606. except BodyExceededMaxSize:
  607. raise SynapseError(
  608. HTTPStatus.BAD_GATEWAY,
  609. "Requested file is too large > %r bytes" % (max_size,),
  610. Codes.TOO_LARGE,
  611. )
  612. except defer.TimeoutError:
  613. raise SynapseError(
  614. HTTPStatus.BAD_GATEWAY,
  615. "Requested file took too long to download",
  616. Codes.TOO_LARGE,
  617. )
  618. except Exception as e:
  619. raise SynapseError(
  620. HTTPStatus.BAD_GATEWAY, ("Failed to download remote body: %s" % e)
  621. ) from e
  622. return (
  623. length,
  624. resp_headers,
  625. response.request.absoluteURI.decode("ascii"),
  626. response.code,
  627. )
  628. class SimpleHttpClient(BaseHttpClient):
  629. """
  630. An HTTP client capable of crossing a proxy and respecting a block/allow list.
  631. This also configures a larger / longer lasting HTTP connection pool.
  632. Args:
  633. hs: The HomeServer instance to pass in
  634. treq_args: Extra keyword arguments to be given to treq.request.
  635. ip_blocklist: The IP addresses that we may not request.
  636. ip_allowlist: The allowed IP addresses, that we can
  637. request if it were otherwise caught in a blocklist.
  638. use_proxy: Whether proxy settings should be discovered and used
  639. from conventional environment variables.
  640. """
  641. def __init__(
  642. self,
  643. hs: "HomeServer",
  644. treq_args: Optional[Dict[str, Any]] = None,
  645. ip_allowlist: Optional[IPSet] = None,
  646. ip_blocklist: Optional[IPSet] = None,
  647. use_proxy: bool = False,
  648. ):
  649. super().__init__(hs, treq_args=treq_args)
  650. self._ip_allowlist = ip_allowlist
  651. self._ip_blocklist = ip_blocklist
  652. if self._ip_blocklist:
  653. # If we have an IP blocklist, we need to use a DNS resolver which
  654. # filters out blocked IP addresses, to prevent DNS rebinding.
  655. self.reactor: ISynapseReactor = BlocklistingReactorWrapper(
  656. self.reactor, self._ip_allowlist, self._ip_blocklist
  657. )
  658. # the pusher makes lots of concurrent SSL connections to Sygnal, and tends to
  659. # do so in batches, so we need to allow the pool to keep lots of idle
  660. # connections around.
  661. pool = HTTPConnectionPool(self.reactor)
  662. # XXX: The justification for using the cache factor here is that larger
  663. # instances will need both more cache and more connections.
  664. # Still, this should probably be a separate dial
  665. pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
  666. pool.cachedConnectionTimeout = 2 * 60
  667. self.agent: IAgent = ProxyAgent(
  668. self.reactor,
  669. hs.get_reactor(),
  670. connectTimeout=15,
  671. contextFactory=self.hs.get_http_client_context_factory(),
  672. pool=pool,
  673. use_proxy=use_proxy,
  674. )
  675. if self._ip_blocklist:
  676. # If we have an IP blocklist, we then install the Agent which prevents
  677. # direct access to IP addresses, that are not caught by the DNS resolution.
  678. self.agent = BlocklistingAgentWrapper(
  679. self.agent,
  680. ip_blocklist=self._ip_blocklist,
  681. ip_allowlist=self._ip_allowlist,
  682. )
  683. class ReplicationClient(BaseHttpClient):
  684. """Client for connecting to replication endpoints via HTTP and HTTPS.
  685. Attributes:
  686. agent: The custom Twisted Agent used for constructing the connection.
  687. """
  688. def __init__(
  689. self,
  690. hs: "HomeServer",
  691. ):
  692. """
  693. Args:
  694. hs: The HomeServer instance to pass in
  695. """
  696. super().__init__(hs)
  697. # Use a pool, but a very small one.
  698. pool = HTTPConnectionPool(self.reactor)
  699. pool.maxPersistentPerHost = 5
  700. pool.cachedConnectionTimeout = 2 * 60
  701. self.agent: IAgent = ReplicationAgent(
  702. hs.get_reactor(),
  703. hs.config.worker.instance_map,
  704. contextFactory=hs.get_http_client_context_factory(),
  705. pool=pool,
  706. )
  707. async def request(
  708. self,
  709. method: str,
  710. uri: str,
  711. data: Optional[bytes] = None,
  712. headers: Optional[Headers] = None,
  713. ) -> IResponse:
  714. """
  715. Make a request, differs from BaseHttpClient.request in that it does not use treq.
  716. Args:
  717. method: HTTP method to use.
  718. uri: URI to query.
  719. data: Data to send in the request body, if applicable.
  720. headers: Request headers.
  721. Returns:
  722. Response object, once the headers have been read.
  723. Raises:
  724. RequestTimedOutError if the request times out before the headers are read
  725. """
  726. outgoing_requests_counter.labels(method).inc()
  727. logger.debug("Sending request %s %s", method, uri)
  728. with start_active_span(
  729. "outgoing-replication-request",
  730. tags={
  731. tags.SPAN_KIND: tags.SPAN_KIND_RPC_CLIENT,
  732. tags.HTTP_METHOD: method,
  733. tags.HTTP_URL: uri,
  734. },
  735. finish_on_close=True,
  736. ):
  737. try:
  738. body_producer = None
  739. if data is not None:
  740. body_producer = QuieterFileBodyProducer(
  741. BytesIO(data),
  742. cooperator=self._cooperator,
  743. )
  744. # Skip the fancy treq stuff, we don't need cookie handling, redirects,
  745. # or buffered response bodies.
  746. method_bytes = method.encode("ascii")
  747. uri_bytes = uri.encode("ascii")
  748. # To preserve the logging context, the timeout is treated
  749. # in a similar way to `defer.gatherResults`:
  750. # * Each logging context-preserving fork is wrapped in
  751. # `run_in_background`. In this case there is only one,
  752. # since the timeout fork is not logging-context aware.
  753. # * The `Deferred` that joins the forks back together is
  754. # wrapped in `make_deferred_yieldable` to restore the
  755. # logging context regardless of the path taken.
  756. # (The logic/comments for this came from MatrixFederationHttpClient)
  757. request_deferred = run_in_background(
  758. self.agent.request,
  759. method_bytes,
  760. uri_bytes,
  761. headers,
  762. bodyProducer=body_producer,
  763. )
  764. # we use our own timeout mechanism rather than twisted's as a workaround
  765. # for https://twistedmatrix.com/trac/ticket/9534.
  766. # (Updated url https://github.com/twisted/twisted/issues/9534)
  767. request_deferred = timeout_deferred(
  768. request_deferred,
  769. 60,
  770. self.hs.get_reactor(),
  771. )
  772. # turn timeouts into RequestTimedOutErrors
  773. request_deferred.addErrback(_timeout_to_request_timed_out_error)
  774. response = await make_deferred_yieldable(request_deferred)
  775. incoming_responses_counter.labels(method, response.code).inc()
  776. logger.info(
  777. "Received response to %s %s: %s",
  778. method,
  779. uri,
  780. response.code,
  781. )
  782. return response
  783. except Exception as e:
  784. incoming_responses_counter.labels(method, "ERR").inc()
  785. logger.info(
  786. "Error sending request to %s %s: %s %s",
  787. method,
  788. uri,
  789. type(e).__name__,
  790. e.args[0],
  791. )
  792. set_tag(tags.ERROR, True)
  793. set_tag("error_reason", e.args[0])
  794. raise
  795. def _timeout_to_request_timed_out_error(f: Failure) -> Failure:
  796. if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError):
  797. # The TCP connection has its own timeout (set by the 'connectTimeout' param
  798. # on the Agent), which raises twisted_error.TimeoutError exception.
  799. raise RequestTimedOutError("Timeout connecting to remote server")
  800. elif f.check(defer.TimeoutError, ResponseNeverReceived):
  801. # this one means that we hit our overall timeout on the request
  802. raise RequestTimedOutError("Timeout waiting for response from remote server")
  803. return f
  804. class ByteWriteable(Protocol):
  805. """The type of object which must be passed into read_body_with_max_size.
  806. Typically this is a file object.
  807. """
  808. def write(self, data: bytes) -> int:
  809. pass
  810. class BodyExceededMaxSize(Exception):
  811. """The maximum allowed size of the HTTP body was exceeded."""
  812. class _DiscardBodyWithMaxSizeProtocol(protocol.Protocol):
  813. """A protocol which immediately errors upon receiving data."""
  814. transport: Optional[ITCPTransport] = None
  815. def __init__(self, deferred: defer.Deferred):
  816. self.deferred = deferred
  817. def _maybe_fail(self) -> None:
  818. """
  819. Report a max size exceed error and disconnect the first time this is called.
  820. """
  821. if not self.deferred.called:
  822. self.deferred.errback(BodyExceededMaxSize())
  823. # Close the connection (forcefully) since all the data will get
  824. # discarded anyway.
  825. assert self.transport is not None
  826. self.transport.abortConnection()
  827. def dataReceived(self, data: bytes) -> None:
  828. self._maybe_fail()
  829. def connectionLost(self, reason: Failure = connectionDone) -> None:
  830. self._maybe_fail()
  831. class _ReadBodyWithMaxSizeProtocol(protocol.Protocol):
  832. """A protocol which reads body to a stream, erroring if the body exceeds a maximum size."""
  833. transport: Optional[ITCPTransport] = None
  834. def __init__(
  835. self, stream: ByteWriteable, deferred: defer.Deferred, max_size: Optional[int]
  836. ):
  837. self.stream = stream
  838. self.deferred = deferred
  839. self.length = 0
  840. self.max_size = max_size
  841. def dataReceived(self, data: bytes) -> None:
  842. # If the deferred was called, bail early.
  843. if self.deferred.called:
  844. return
  845. try:
  846. self.stream.write(data)
  847. except Exception:
  848. self.deferred.errback()
  849. return
  850. self.length += len(data)
  851. # The first time the maximum size is exceeded, error and cancel the
  852. # connection. dataReceived might be called again if data was received
  853. # in the meantime.
  854. if self.max_size is not None and self.length >= self.max_size:
  855. self.deferred.errback(BodyExceededMaxSize())
  856. # Close the connection (forcefully) since all the data will get
  857. # discarded anyway.
  858. assert self.transport is not None
  859. self.transport.abortConnection()
  860. def connectionLost(self, reason: Failure = connectionDone) -> None:
  861. # If the maximum size was already exceeded, there's nothing to do.
  862. if self.deferred.called:
  863. return
  864. if reason.check(ResponseDone):
  865. self.deferred.callback(self.length)
  866. elif reason.check(PotentialDataLoss):
  867. # This applies to requests which don't set `Content-Length` or a
  868. # `Transfer-Encoding` in the response because in this case the end of the
  869. # response is indicated by the connection being closed, an event which may
  870. # also be due to a transient network problem or other error. But since this
  871. # behavior is expected of some servers (like YouTube), let's ignore it.
  872. # Stolen from https://github.com/twisted/treq/pull/49/files
  873. # http://twistedmatrix.com/trac/ticket/4840
  874. self.deferred.callback(self.length)
  875. else:
  876. self.deferred.errback(reason)
  877. def read_body_with_max_size(
  878. response: IResponse, stream: ByteWriteable, max_size: Optional[int]
  879. ) -> "defer.Deferred[int]":
  880. """
  881. Read a HTTP response body to a file-object. Optionally enforcing a maximum file size.
  882. If the maximum file size is reached, the returned Deferred will resolve to a
  883. Failure with a BodyExceededMaxSize exception.
  884. Args:
  885. response: The HTTP response to read from.
  886. stream: The file-object to write to.
  887. max_size: The maximum file size to allow.
  888. Returns:
  889. A Deferred which resolves to the length of the read body.
  890. """
  891. d: "defer.Deferred[int]" = defer.Deferred()
  892. # If the Content-Length header gives a size larger than the maximum allowed
  893. # size, do not bother downloading the body.
  894. if max_size is not None and response.length != UNKNOWN_LENGTH:
  895. if response.length > max_size:
  896. response.deliverBody(_DiscardBodyWithMaxSizeProtocol(d))
  897. return d
  898. response.deliverBody(_ReadBodyWithMaxSizeProtocol(stream, d, max_size))
  899. return d
  900. def encode_query_args(args: Optional[QueryParams]) -> bytes:
  901. """
  902. Encodes a map of query arguments to bytes which can be appended to a URL.
  903. Args:
  904. args: The query arguments, a mapping of string to string or list of strings.
  905. Returns:
  906. The query arguments encoded as bytes.
  907. """
  908. if args is None:
  909. return b""
  910. query_str = urllib.parse.urlencode(args, True)
  911. return query_str.encode("utf8")
  912. @implementer(IPolicyForHTTPS)
  913. class InsecureInterceptableContextFactory(ssl.ContextFactory):
  914. """
  915. Factory for PyOpenSSL SSL contexts which accepts any certificate for any domain.
  916. Do not use this since it allows an attacker to intercept your communications.
  917. """
  918. def __init__(self) -> None:
  919. self._context = SSL.Context(SSL.SSLv23_METHOD)
  920. self._context.set_verify(VERIFY_NONE, lambda *_: False)
  921. def getContext(self) -> SSL.Context:
  922. return self._context
  923. def creatorForNetloc(self, hostname: bytes, port: int) -> IOpenSSLContextFactory:
  924. return self
  925. def is_unknown_endpoint(
  926. e: HttpResponseException, synapse_error: Optional[SynapseError] = None
  927. ) -> bool:
  928. """
  929. Returns true if the response was due to an endpoint being unimplemented.
  930. Args:
  931. e: The error response received from the remote server.
  932. synapse_error: The above error converted to a SynapseError. This is
  933. automatically generated if not provided.
  934. """
  935. if synapse_error is None:
  936. synapse_error = e.to_synapse_error()
  937. # Matrix v1.6 specifies that servers should return a 404 or 405 with an errcode
  938. # of M_UNRECOGNIZED when they receive a request to an unknown endpoint or
  939. # to an unknown method, respectively.
  940. #
  941. # Older versions of servers don't return proper errors, so be graceful. But,
  942. # also handle that some endpoints truly do return 404 errors.
  943. return (
  944. # 404 is an unknown endpoint, 405 is a known endpoint, but unknown method.
  945. (e.code == 404 or e.code == 405)
  946. and (
  947. # Consider empty body or non-JSON bodies to be unrecognised (matches
  948. # older Dendrites & Conduits).
  949. not e.response
  950. or not e.response.startswith(b"{")
  951. # The proper response JSON with M_UNRECOGNIZED errcode.
  952. or synapse_error.errcode == Codes.UNRECOGNIZED
  953. )
  954. ) or (
  955. # Older Synapses returned a 400 error.
  956. e.code == 400
  957. and synapse_error.errcode == Codes.UNRECOGNIZED
  958. )