diff --git a/changelog.d/16137.feature b/changelog.d/16137.feature new file mode 100644 index 0000000000..bba6f161cd --- /dev/null +++ b/changelog.d/16137.feature @@ -0,0 +1 @@ +Support resolving homeservers using `matrix-fed` DNS SRV records from [MSC4040](https://github.com/matrix-org/matrix-spec-proposals/pull/4040). diff --git a/scripts-dev/federation_client.py b/scripts-dev/federation_client.py index 5ad334b4d8..e8baeac5e2 100755 --- a/scripts-dev/federation_client.py +++ b/scripts-dev/federation_client.py @@ -329,6 +329,17 @@ class MatrixConnectionAdapter(HTTPAdapter): raise ValueError("Invalid host:port '%s'" % (server_name,)) return out[0], port, out[0] + # Look up SRV for Matrix 1.8 `matrix-fed` service first + try: + srv = srvlookup.lookup("matrix-fed", "tcp", server_name)[0] + print( + f"SRV lookup on _matrix-fed._tcp.{server_name} gave {srv}", + file=sys.stderr, + ) + return srv.host, srv.port, server_name + except Exception: + pass + # Fall back to deprecated `matrix` service try: srv = srvlookup.lookup("matrix", "tcp", server_name)[0] print( @@ -337,6 +348,7 @@ class MatrixConnectionAdapter(HTTPAdapter): ) return srv.host, srv.port, server_name except Exception: + # Fall even further back to just port 8448 return server_name, 8448, server_name @staticmethod diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 91a24efcd0..a3a396bb37 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -399,15 +399,34 @@ class MatrixHostnameEndpoint: if port or _is_ip_literal(host): return [Server(host, port or 8448)] + # Check _matrix-fed._tcp SRV record. logger.debug("Looking up SRV record for %s", host.decode(errors="replace")) + server_list = await self._srv_resolver.resolve_service( + b"_matrix-fed._tcp." + host + ) + + if server_list: + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Got %s from SRV lookup for %s", + ", ".join(map(str, server_list)), + host.decode(errors="replace"), + ) + return server_list + + # No _matrix-fed._tcp SRV record, fallback to legacy _matrix._tcp SRV record. + logger.debug( + "Looking up deprecated SRV record for %s", host.decode(errors="replace") + ) server_list = await self._srv_resolver.resolve_service(b"_matrix._tcp." + host) if server_list: - logger.debug( - "Got %s from SRV lookup for %s", - ", ".join(map(str, server_list)), - host.decode(errors="replace"), - ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Got %s from deprecated SRV lookup for %s", + ", ".join(map(str, server_list)), + host.decode(errors="replace"), + ) return server_list # No SRV records, so we fallback to host and 8448 diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index 0d17f2fe5b..9f63fa6fa8 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -15,7 +15,7 @@ import base64 import logging import os from typing import Generator, List, Optional, cast -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, call, patch import treq from netaddr import IPSet @@ -651,9 +651,9 @@ class MatrixFederationAgentTests(unittest.TestCase): # .well-known request fails. self.reactor.pump((0.4,)) - # now there should be a SRV lookup - self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.testserv1" + # now there should be two SRV lookups + self.mock_resolver.resolve_service.assert_has_calls( + [call(b"_matrix-fed._tcp.testserv1"), call(b"_matrix._tcp.testserv1")] ) # we should fall back to a direct connection @@ -737,9 +737,9 @@ class MatrixFederationAgentTests(unittest.TestCase): # .well-known request fails. self.reactor.pump((0.4,)) - # now there should be a SRV lookup - self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.testserv" + # now there should be two SRV lookups + self.mock_resolver.resolve_service.assert_has_calls( + [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")] ) # we should fall back to a direct connection @@ -788,9 +788,12 @@ class MatrixFederationAgentTests(unittest.TestCase): content=b'{ "m.server": "target-server" }', ) - # there should be a SRV lookup - self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.target-server" + # there should be two SRV lookups + self.mock_resolver.resolve_service.assert_has_calls( + [ + call(b"_matrix-fed._tcp.target-server"), + call(b"_matrix._tcp.target-server"), + ] ) # now we should get a connection to the target server @@ -878,9 +881,12 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) - # there should be a SRV lookup - self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.target-server" + # there should be two SRV lookups + self.mock_resolver.resolve_service.assert_has_calls( + [ + call(b"_matrix-fed._tcp.target-server"), + call(b"_matrix._tcp.target-server"), + ] ) # now we should get a connection to the target server @@ -942,9 +948,9 @@ class MatrixFederationAgentTests(unittest.TestCase): client_factory, expected_sni=b"testserv", content=b"NOT JSON" ) - # now there should be a SRV lookup - self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.testserv" + # now there should be two SRV lookups + self.mock_resolver.resolve_service.assert_has_calls( + [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")] ) # we should fall back to a direct connection @@ -1016,14 +1022,14 @@ class MatrixFederationAgentTests(unittest.TestCase): # there should be no requests self.assertEqual(len(http_proto.requests), 0) - # and there should be a SRV lookup instead - self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.testserv" + # and there should be two SRV lookups instead + self.mock_resolver.resolve_service.assert_has_calls( + [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")] ) def test_get_hostname_srv(self) -> None: """ - Test the behaviour when there is a single SRV record + Test the behaviour when there is a single SRV record for _matrix-fed. """ self.agent = self._make_agent() @@ -1039,7 +1045,51 @@ class MatrixFederationAgentTests(unittest.TestCase): # the request for a .well-known will have failed with a DNS lookup error. self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.testserv" + b"_matrix-fed._tcp.testserv" + ) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 8443) + + # make a test server, and wire up the client + http_server = self._make_connection(client_factory, expected_sni=b"testserv") + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b"GET") + self.assertEqual(request.path, b"/foo/bar") + self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"testserv"]) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + + def test_get_hostname_srv_legacy(self) -> None: + """ + Test the behaviour when there is a single SRV record for _matrix. + """ + self.agent = self._make_agent() + + # Return no entries for the _matrix-fed lookup, and a response for _matrix. + self.mock_resolver.resolve_service.side_effect = [ + [], + [Server(host=b"srvtarget", port=8443)], + ] + self.reactor.lookups["srvtarget"] = "1.2.3.4" + + test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + # the request for a .well-known will have failed with a DNS lookup error. + self.mock_resolver.resolve_service.assert_has_calls( + [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")] ) # Make sure treq is trying to connect @@ -1065,7 +1115,7 @@ class MatrixFederationAgentTests(unittest.TestCase): def test_get_well_known_srv(self) -> None: """Test the behaviour when the .well-known redirects to a place where there - is a SRV. + is a _matrix-fed SRV record. """ self.agent = self._make_agent() @@ -1096,7 +1146,72 @@ class MatrixFederationAgentTests(unittest.TestCase): # there should be a SRV lookup self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.target-server" + b"_matrix-fed._tcp.target-server" + ) + + # now we should get a connection to the target of the SRV record + self.assertEqual(len(clients), 2) + (host, port, client_factory, _timeout, _bindAddress) = clients[1] + self.assertEqual(host, "5.6.7.8") + self.assertEqual(port, 8443) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, expected_sni=b"target-server" + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b"GET") + self.assertEqual(request.path, b"/foo/bar") + self.assertEqual( + request.requestHeaders.getRawHeaders(b"host"), [b"target-server"] + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + + def test_get_well_known_srv_legacy(self) -> None: + """Test the behaviour when the .well-known redirects to a place where there + is a _matrix SRV record. + """ + self.agent = self._make_agent() + + self.reactor.lookups["testserv"] = "1.2.3.4" + self.reactor.lookups["srvtarget"] = "5.6.7.8" + + test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + # there should be an attempt to connect on port 443 for the .well-known + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 443) + + # Return no entries for the _matrix-fed lookup, and a response for _matrix. + self.mock_resolver.resolve_service.side_effect = [ + [], + [Server(host=b"srvtarget", port=8443)], + ] + + self._handle_well_known_connection( + client_factory, + expected_sni=b"testserv", + content=b'{ "m.server": "target-server" }', + ) + + # there should be two SRV lookups + self.mock_resolver.resolve_service.assert_has_calls( + [ + call(b"_matrix-fed._tcp.target-server"), + call(b"_matrix._tcp.target-server"), + ] ) # now we should get a connection to the target of the SRV record @@ -1158,8 +1273,11 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.4,)) # now there should have been a SRV lookup - self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.xn--bcher-kva.com" + self.mock_resolver.resolve_service.assert_has_calls( + [ + call(b"_matrix-fed._tcp.xn--bcher-kva.com"), + call(b"_matrix._tcp.xn--bcher-kva.com"), + ] ) # We should fall back to port 8448 @@ -1188,7 +1306,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.successResultOf(test_d) def test_idna_srv_target(self) -> None: - """test the behaviour when the target of a SRV record has idna chars""" + """test the behaviour when the target of a _matrix-fed SRV record has idna chars""" self.agent = self._make_agent() self.mock_resolver.resolve_service.return_value = [ @@ -1204,7 +1322,57 @@ class MatrixFederationAgentTests(unittest.TestCase): self.assertNoResult(test_d) self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.xn--bcher-kva.com" + b"_matrix-fed._tcp.xn--bcher-kva.com" + ) + + # Make sure treq is trying to connect + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients[0] + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 8443) + + # make a test server, and wire up the client + http_server = self._make_connection( + client_factory, expected_sni=b"xn--bcher-kva.com" + ) + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b"GET") + self.assertEqual(request.path, b"/foo/bar") + self.assertEqual( + request.requestHeaders.getRawHeaders(b"host"), [b"xn--bcher-kva.com"] + ) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + + def test_idna_srv_target_legacy(self) -> None: + """test the behaviour when the target of a _matrix SRV record has idna chars""" + self.agent = self._make_agent() + + # Return no entries for the _matrix-fed lookup, and a response for _matrix. + self.mock_resolver.resolve_service.side_effect = [ + [], + [Server(host=b"xn--trget-3qa.com", port=8443)], + ] # târget.com + self.reactor.lookups["xn--trget-3qa.com"] = "1.2.3.4" + + test_d = self._make_get_request( + b"matrix-federation://xn--bcher-kva.com/foo/bar" + ) + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_has_calls( + [ + call(b"_matrix-fed._tcp.xn--bcher-kva.com"), + call(b"_matrix._tcp.xn--bcher-kva.com"), + ] ) # Make sure treq is trying to connect @@ -1394,7 +1562,7 @@ class MatrixFederationAgentTests(unittest.TestCase): self.assertIsNone(r.delegated_server) def test_srv_fallbacks(self) -> None: - """Test that other SRV results are tried if the first one fails.""" + """Test that other SRV results are tried if the first one fails for _matrix-fed SRV.""" self.agent = self._make_agent() self.mock_resolver.resolve_service.return_value = [ @@ -1409,7 +1577,67 @@ class MatrixFederationAgentTests(unittest.TestCase): self.assertNoResult(test_d) self.mock_resolver.resolve_service.assert_called_once_with( - b"_matrix._tcp.testserv" + b"_matrix-fed._tcp.testserv" + ) + + # We should see an attempt to connect to the first server + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 8443) + + # Fonx the connection + client_factory.clientConnectionFailed(None, Exception("nope")) + + # There's a 300ms delay in HostnameEndpoint + self.reactor.pump((0.4,)) + + # Hasn't failed yet + self.assertNoResult(test_d) + + # We shouldnow see an attempt to connect to the second server + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 8444) + + # make a test server, and wire up the client + http_server = self._make_connection(client_factory, expected_sni=b"testserv") + + self.assertEqual(len(http_server.requests), 1) + request = http_server.requests[0] + self.assertEqual(request.method, b"GET") + self.assertEqual(request.path, b"/foo/bar") + self.assertEqual(request.requestHeaders.getRawHeaders(b"host"), [b"testserv"]) + + # finish the request + request.finish() + self.reactor.pump((0.1,)) + self.successResultOf(test_d) + + def test_srv_fallbacks_legacy(self) -> None: + """Test that other SRV results are tried if the first one fails for _matrix SRV.""" + self.agent = self._make_agent() + + # Return no entries for the _matrix-fed lookup, and a response for _matrix. + self.mock_resolver.resolve_service.side_effect = [ + [], + [ + Server(host=b"target.com", port=8443), + Server(host=b"target.com", port=8444), + ], + ] + self.reactor.lookups["target.com"] = "1.2.3.4" + + test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + self.mock_resolver.resolve_service.assert_has_calls( + [call(b"_matrix-fed._tcp.testserv"), call(b"_matrix._tcp.testserv")] ) # We should see an attempt to connect to the first server @@ -1449,6 +1677,43 @@ class MatrixFederationAgentTests(unittest.TestCase): self.reactor.pump((0.1,)) self.successResultOf(test_d) + def test_srv_no_fallback_to_legacy(self) -> None: + """Test that _matrix SRV results are not tried if the _matrix-fed one fails.""" + self.agent = self._make_agent() + + # Return a failing entry for _matrix-fed. + self.mock_resolver.resolve_service.side_effect = [ + [Server(host=b"target.com", port=8443)], + [], + ] + self.reactor.lookups["target.com"] = "1.2.3.4" + + test_d = self._make_get_request(b"matrix-federation://testserv/foo/bar") + + # Nothing happened yet + self.assertNoResult(test_d) + + # Only the _matrix-fed is checked, _matrix is ignored. + self.mock_resolver.resolve_service.assert_called_once_with( + b"_matrix-fed._tcp.testserv" + ) + + # We should see an attempt to connect to the first server + clients = self.reactor.tcpClients + self.assertEqual(len(clients), 1) + (host, port, client_factory, _timeout, _bindAddress) = clients.pop(0) + self.assertEqual(host, "1.2.3.4") + self.assertEqual(port, 8443) + + # Fonx the connection + client_factory.clientConnectionFailed(None, Exception("nope")) + + # There's a 300ms delay in HostnameEndpoint + self.reactor.pump((0.4,)) + + # Failed to resolve a server. + self.assertFailure(test_d, Exception) + class TestCachePeriodFromHeaders(unittest.TestCase): def test_cache_control(self) -> None: