You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1430 rivejä
51 KiB

  1. # -*- coding: utf-8 -*-
  2. # Copyright 2014-2016 OpenMarket Ltd
  3. # Copyright 2019 The Matrix.org Foundation C.I.C.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import collections
  17. import logging
  18. import re
  19. from abc import abstractmethod
  20. from enum import Enum
  21. from typing import Any, Dict, List, Optional, Tuple
  22. from canonicaljson import json
  23. from synapse.api.constants import EventTypes
  24. from synapse.api.errors import StoreError
  25. from synapse.api.room_versions import RoomVersion, RoomVersions
  26. from synapse.storage._base import SQLBaseStore, db_to_json
  27. from synapse.storage.database import DatabasePool, LoggingTransaction
  28. from synapse.storage.databases.main.search import SearchStore
  29. from synapse.types import ThirdPartyInstanceID
  30. from synapse.util.caches.descriptors import cached
  31. logger = logging.getLogger(__name__)
  32. OpsLevel = collections.namedtuple(
  33. "OpsLevel", ("ban_level", "kick_level", "redact_level")
  34. )
  35. RatelimitOverride = collections.namedtuple(
  36. "RatelimitOverride", ("messages_per_second", "burst_count")
  37. )
  38. class RoomSortOrder(Enum):
  39. """
  40. Enum to define the sorting method used when returning rooms with get_rooms_paginate
  41. NAME = sort rooms alphabetically by name
  42. JOINED_MEMBERS = sort rooms by membership size, highest to lowest
  43. """
  44. # ALPHABETICAL and SIZE are deprecated.
  45. # ALPHABETICAL is the same as NAME.
  46. ALPHABETICAL = "alphabetical"
  47. # SIZE is the same as JOINED_MEMBERS.
  48. SIZE = "size"
  49. NAME = "name"
  50. CANONICAL_ALIAS = "canonical_alias"
  51. JOINED_MEMBERS = "joined_members"
  52. JOINED_LOCAL_MEMBERS = "joined_local_members"
  53. VERSION = "version"
  54. CREATOR = "creator"
  55. ENCRYPTION = "encryption"
  56. FEDERATABLE = "federatable"
  57. PUBLIC = "public"
  58. JOIN_RULES = "join_rules"
  59. GUEST_ACCESS = "guest_access"
  60. HISTORY_VISIBILITY = "history_visibility"
  61. STATE_EVENTS = "state_events"
  62. class RoomWorkerStore(SQLBaseStore):
  63. def __init__(self, database: DatabasePool, db_conn, hs):
  64. super(RoomWorkerStore, self).__init__(database, db_conn, hs)
  65. self.config = hs.config
  66. def get_room(self, room_id):
  67. """Retrieve a room.
  68. Args:
  69. room_id (str): The ID of the room to retrieve.
  70. Returns:
  71. A dict containing the room information, or None if the room is unknown.
  72. """
  73. return self.db_pool.simple_select_one(
  74. table="rooms",
  75. keyvalues={"room_id": room_id},
  76. retcols=("room_id", "is_public", "creator"),
  77. desc="get_room",
  78. allow_none=True,
  79. )
  80. def get_room_with_stats(self, room_id: str):
  81. """Retrieve room with statistics.
  82. Args:
  83. room_id: The ID of the room to retrieve.
  84. Returns:
  85. A dict containing the room information, or None if the room is unknown.
  86. """
  87. def get_room_with_stats_txn(txn, room_id):
  88. sql = """
  89. SELECT room_id, state.name, state.canonical_alias, curr.joined_members,
  90. curr.local_users_in_room AS joined_local_members, rooms.room_version AS version,
  91. rooms.creator, state.encryption, state.is_federatable AS federatable,
  92. rooms.is_public AS public, state.join_rules, state.guest_access,
  93. state.history_visibility, curr.current_state_events AS state_events
  94. FROM rooms
  95. LEFT JOIN room_stats_state state USING (room_id)
  96. LEFT JOIN room_stats_current curr USING (room_id)
  97. WHERE room_id = ?
  98. """
  99. txn.execute(sql, [room_id])
  100. # Catch error if sql returns empty result to return "None" instead of an error
  101. try:
  102. res = self.db_pool.cursor_to_dict(txn)[0]
  103. except IndexError:
  104. return None
  105. res["federatable"] = bool(res["federatable"])
  106. res["public"] = bool(res["public"])
  107. return res
  108. return self.db_pool.runInteraction(
  109. "get_room_with_stats", get_room_with_stats_txn, room_id
  110. )
  111. def get_public_room_ids(self):
  112. return self.db_pool.simple_select_onecol(
  113. table="rooms",
  114. keyvalues={"is_public": True},
  115. retcol="room_id",
  116. desc="get_public_room_ids",
  117. )
  118. def count_public_rooms(self, network_tuple, ignore_non_federatable):
  119. """Counts the number of public rooms as tracked in the room_stats_current
  120. and room_stats_state table.
  121. Args:
  122. network_tuple (ThirdPartyInstanceID|None)
  123. ignore_non_federatable (bool): If true filters out non-federatable rooms
  124. """
  125. def _count_public_rooms_txn(txn):
  126. query_args = []
  127. if network_tuple:
  128. if network_tuple.appservice_id:
  129. published_sql = """
  130. SELECT room_id from appservice_room_list
  131. WHERE appservice_id = ? AND network_id = ?
  132. """
  133. query_args.append(network_tuple.appservice_id)
  134. query_args.append(network_tuple.network_id)
  135. else:
  136. published_sql = """
  137. SELECT room_id FROM rooms WHERE is_public
  138. """
  139. else:
  140. published_sql = """
  141. SELECT room_id FROM rooms WHERE is_public
  142. UNION SELECT room_id from appservice_room_list
  143. """
  144. sql = """
  145. SELECT
  146. COALESCE(COUNT(*), 0)
  147. FROM (
  148. %(published_sql)s
  149. ) published
  150. INNER JOIN room_stats_state USING (room_id)
  151. INNER JOIN room_stats_current USING (room_id)
  152. WHERE
  153. (
  154. join_rules = 'public' OR history_visibility = 'world_readable'
  155. )
  156. AND joined_members > 0
  157. """ % {
  158. "published_sql": published_sql
  159. }
  160. txn.execute(sql, query_args)
  161. return txn.fetchone()[0]
  162. return self.db_pool.runInteraction(
  163. "count_public_rooms", _count_public_rooms_txn
  164. )
  165. async def get_largest_public_rooms(
  166. self,
  167. network_tuple: Optional[ThirdPartyInstanceID],
  168. search_filter: Optional[dict],
  169. limit: Optional[int],
  170. bounds: Optional[Tuple[int, str]],
  171. forwards: bool,
  172. ignore_non_federatable: bool = False,
  173. ):
  174. """Gets the largest public rooms (where largest is in terms of joined
  175. members, as tracked in the statistics table).
  176. Args:
  177. network_tuple
  178. search_filter
  179. limit: Maxmimum number of rows to return, unlimited otherwise.
  180. bounds: An uppoer or lower bound to apply to result set if given,
  181. consists of a joined member count and room_id (these are
  182. excluded from result set).
  183. forwards: true iff going forwards, going backwards otherwise
  184. ignore_non_federatable: If true filters out non-federatable rooms.
  185. Returns:
  186. Rooms in order: biggest number of joined users first.
  187. We then arbitrarily use the room_id as a tie breaker.
  188. """
  189. where_clauses = []
  190. query_args = []
  191. if network_tuple:
  192. if network_tuple.appservice_id:
  193. published_sql = """
  194. SELECT room_id from appservice_room_list
  195. WHERE appservice_id = ? AND network_id = ?
  196. """
  197. query_args.append(network_tuple.appservice_id)
  198. query_args.append(network_tuple.network_id)
  199. else:
  200. published_sql = """
  201. SELECT room_id FROM rooms WHERE is_public
  202. """
  203. else:
  204. published_sql = """
  205. SELECT room_id FROM rooms WHERE is_public
  206. UNION SELECT room_id from appservice_room_list
  207. """
  208. # Work out the bounds if we're given them, these bounds look slightly
  209. # odd, but are designed to help query planner use indices by pulling
  210. # out a common bound.
  211. if bounds:
  212. last_joined_members, last_room_id = bounds
  213. if forwards:
  214. where_clauses.append(
  215. """
  216. joined_members <= ? AND (
  217. joined_members < ? OR room_id < ?
  218. )
  219. """
  220. )
  221. else:
  222. where_clauses.append(
  223. """
  224. joined_members >= ? AND (
  225. joined_members > ? OR room_id > ?
  226. )
  227. """
  228. )
  229. query_args += [last_joined_members, last_joined_members, last_room_id]
  230. if ignore_non_federatable:
  231. where_clauses.append("is_federatable")
  232. if search_filter and search_filter.get("generic_search_term", None):
  233. search_term = "%" + search_filter["generic_search_term"] + "%"
  234. where_clauses.append(
  235. """
  236. (
  237. LOWER(name) LIKE ?
  238. OR LOWER(topic) LIKE ?
  239. OR LOWER(canonical_alias) LIKE ?
  240. )
  241. """
  242. )
  243. query_args += [
  244. search_term.lower(),
  245. search_term.lower(),
  246. search_term.lower(),
  247. ]
  248. where_clause = ""
  249. if where_clauses:
  250. where_clause = " AND " + " AND ".join(where_clauses)
  251. sql = """
  252. SELECT
  253. room_id, name, topic, canonical_alias, joined_members,
  254. avatar, history_visibility, joined_members, guest_access
  255. FROM (
  256. %(published_sql)s
  257. ) published
  258. INNER JOIN room_stats_state USING (room_id)
  259. INNER JOIN room_stats_current USING (room_id)
  260. WHERE
  261. (
  262. join_rules = 'public' OR history_visibility = 'world_readable'
  263. )
  264. AND joined_members > 0
  265. %(where_clause)s
  266. ORDER BY joined_members %(dir)s, room_id %(dir)s
  267. """ % {
  268. "published_sql": published_sql,
  269. "where_clause": where_clause,
  270. "dir": "DESC" if forwards else "ASC",
  271. }
  272. if limit is not None:
  273. query_args.append(limit)
  274. sql += """
  275. LIMIT ?
  276. """
  277. def _get_largest_public_rooms_txn(txn):
  278. txn.execute(sql, query_args)
  279. results = self.db_pool.cursor_to_dict(txn)
  280. if not forwards:
  281. results.reverse()
  282. return results
  283. ret_val = await self.db_pool.runInteraction(
  284. "get_largest_public_rooms", _get_largest_public_rooms_txn
  285. )
  286. return ret_val
  287. @cached(max_entries=10000)
  288. def is_room_blocked(self, room_id):
  289. return self.db_pool.simple_select_one_onecol(
  290. table="blocked_rooms",
  291. keyvalues={"room_id": room_id},
  292. retcol="1",
  293. allow_none=True,
  294. desc="is_room_blocked",
  295. )
  296. async def get_rooms_paginate(
  297. self,
  298. start: int,
  299. limit: int,
  300. order_by: RoomSortOrder,
  301. reverse_order: bool,
  302. search_term: Optional[str],
  303. ) -> Tuple[List[Dict[str, Any]], int]:
  304. """Function to retrieve a paginated list of rooms as json.
  305. Args:
  306. start: offset in the list
  307. limit: maximum amount of rooms to retrieve
  308. order_by: the sort order of the returned list
  309. reverse_order: whether to reverse the room list
  310. search_term: a string to filter room names by
  311. Returns:
  312. A list of room dicts and an integer representing the total number of
  313. rooms that exist given this query
  314. """
  315. # Filter room names by a string
  316. where_statement = ""
  317. if search_term:
  318. where_statement = "WHERE state.name LIKE ?"
  319. # Our postgres db driver converts ? -> %s in SQL strings as that's the
  320. # placeholder for postgres.
  321. # HOWEVER, if you put a % into your SQL then everything goes wibbly.
  322. # To get around this, we're going to surround search_term with %'s
  323. # before giving it to the database in python instead
  324. search_term = "%" + search_term + "%"
  325. # Set ordering
  326. if RoomSortOrder(order_by) == RoomSortOrder.SIZE:
  327. # Deprecated in favour of RoomSortOrder.JOINED_MEMBERS
  328. order_by_column = "curr.joined_members"
  329. order_by_asc = False
  330. elif RoomSortOrder(order_by) == RoomSortOrder.ALPHABETICAL:
  331. # Deprecated in favour of RoomSortOrder.NAME
  332. order_by_column = "state.name"
  333. order_by_asc = True
  334. elif RoomSortOrder(order_by) == RoomSortOrder.NAME:
  335. order_by_column = "state.name"
  336. order_by_asc = True
  337. elif RoomSortOrder(order_by) == RoomSortOrder.CANONICAL_ALIAS:
  338. order_by_column = "state.canonical_alias"
  339. order_by_asc = True
  340. elif RoomSortOrder(order_by) == RoomSortOrder.JOINED_MEMBERS:
  341. order_by_column = "curr.joined_members"
  342. order_by_asc = False
  343. elif RoomSortOrder(order_by) == RoomSortOrder.JOINED_LOCAL_MEMBERS:
  344. order_by_column = "curr.local_users_in_room"
  345. order_by_asc = False
  346. elif RoomSortOrder(order_by) == RoomSortOrder.VERSION:
  347. order_by_column = "rooms.room_version"
  348. order_by_asc = False
  349. elif RoomSortOrder(order_by) == RoomSortOrder.CREATOR:
  350. order_by_column = "rooms.creator"
  351. order_by_asc = True
  352. elif RoomSortOrder(order_by) == RoomSortOrder.ENCRYPTION:
  353. order_by_column = "state.encryption"
  354. order_by_asc = True
  355. elif RoomSortOrder(order_by) == RoomSortOrder.FEDERATABLE:
  356. order_by_column = "state.is_federatable"
  357. order_by_asc = True
  358. elif RoomSortOrder(order_by) == RoomSortOrder.PUBLIC:
  359. order_by_column = "rooms.is_public"
  360. order_by_asc = True
  361. elif RoomSortOrder(order_by) == RoomSortOrder.JOIN_RULES:
  362. order_by_column = "state.join_rules"
  363. order_by_asc = True
  364. elif RoomSortOrder(order_by) == RoomSortOrder.GUEST_ACCESS:
  365. order_by_column = "state.guest_access"
  366. order_by_asc = True
  367. elif RoomSortOrder(order_by) == RoomSortOrder.HISTORY_VISIBILITY:
  368. order_by_column = "state.history_visibility"
  369. order_by_asc = True
  370. elif RoomSortOrder(order_by) == RoomSortOrder.STATE_EVENTS:
  371. order_by_column = "curr.current_state_events"
  372. order_by_asc = False
  373. else:
  374. raise StoreError(
  375. 500, "Incorrect value for order_by provided: %s" % order_by
  376. )
  377. # Whether to return the list in reverse order
  378. if reverse_order:
  379. # Flip the boolean
  380. order_by_asc = not order_by_asc
  381. # Create one query for getting the limited number of events that the user asked
  382. # for, and another query for getting the total number of events that could be
  383. # returned. Thus allowing us to see if there are more events to paginate through
  384. info_sql = """
  385. SELECT state.room_id, state.name, state.canonical_alias, curr.joined_members,
  386. curr.local_users_in_room, rooms.room_version, rooms.creator,
  387. state.encryption, state.is_federatable, rooms.is_public, state.join_rules,
  388. state.guest_access, state.history_visibility, curr.current_state_events
  389. FROM room_stats_state state
  390. INNER JOIN room_stats_current curr USING (room_id)
  391. INNER JOIN rooms USING (room_id)
  392. %s
  393. ORDER BY %s %s
  394. LIMIT ?
  395. OFFSET ?
  396. """ % (
  397. where_statement,
  398. order_by_column,
  399. "ASC" if order_by_asc else "DESC",
  400. )
  401. # Use a nested SELECT statement as SQL can't count(*) with an OFFSET
  402. count_sql = """
  403. SELECT count(*) FROM (
  404. SELECT room_id FROM room_stats_state state
  405. %s
  406. ) AS get_room_ids
  407. """ % (
  408. where_statement,
  409. )
  410. def _get_rooms_paginate_txn(txn):
  411. # Execute the data query
  412. sql_values = (limit, start)
  413. if search_term:
  414. # Add the search term into the WHERE clause
  415. sql_values = (search_term,) + sql_values
  416. txn.execute(info_sql, sql_values)
  417. # Refactor room query data into a structured dictionary
  418. rooms = []
  419. for room in txn:
  420. rooms.append(
  421. {
  422. "room_id": room[0],
  423. "name": room[1],
  424. "canonical_alias": room[2],
  425. "joined_members": room[3],
  426. "joined_local_members": room[4],
  427. "version": room[5],
  428. "creator": room[6],
  429. "encryption": room[7],
  430. "federatable": room[8],
  431. "public": room[9],
  432. "join_rules": room[10],
  433. "guest_access": room[11],
  434. "history_visibility": room[12],
  435. "state_events": room[13],
  436. }
  437. )
  438. # Execute the count query
  439. # Add the search term into the WHERE clause if present
  440. sql_values = (search_term,) if search_term else ()
  441. txn.execute(count_sql, sql_values)
  442. room_count = txn.fetchone()
  443. return rooms, room_count[0]
  444. return await self.db_pool.runInteraction(
  445. "get_rooms_paginate", _get_rooms_paginate_txn,
  446. )
  447. @cached(max_entries=10000)
  448. async def get_ratelimit_for_user(self, user_id):
  449. """Check if there are any overrides for ratelimiting for the given
  450. user
  451. Args:
  452. user_id (str)
  453. Returns:
  454. RatelimitOverride if there is an override, else None. If the contents
  455. of RatelimitOverride are None or 0 then ratelimitng has been
  456. disabled for that user entirely.
  457. """
  458. row = await self.db_pool.simple_select_one(
  459. table="ratelimit_override",
  460. keyvalues={"user_id": user_id},
  461. retcols=("messages_per_second", "burst_count"),
  462. allow_none=True,
  463. desc="get_ratelimit_for_user",
  464. )
  465. if row:
  466. return RatelimitOverride(
  467. messages_per_second=row["messages_per_second"],
  468. burst_count=row["burst_count"],
  469. )
  470. else:
  471. return None
  472. @cached()
  473. async def get_retention_policy_for_room(self, room_id):
  474. """Get the retention policy for a given room.
  475. If no retention policy has been found for this room, returns a policy defined
  476. by the configured default policy (which has None as both the 'min_lifetime' and
  477. the 'max_lifetime' if no default policy has been defined in the server's
  478. configuration).
  479. Args:
  480. room_id (str): The ID of the room to get the retention policy of.
  481. Returns:
  482. dict[int, int]: "min_lifetime" and "max_lifetime" for this room.
  483. """
  484. def get_retention_policy_for_room_txn(txn):
  485. txn.execute(
  486. """
  487. SELECT min_lifetime, max_lifetime FROM room_retention
  488. INNER JOIN current_state_events USING (event_id, room_id)
  489. WHERE room_id = ?;
  490. """,
  491. (room_id,),
  492. )
  493. return self.db_pool.cursor_to_dict(txn)
  494. ret = await self.db_pool.runInteraction(
  495. "get_retention_policy_for_room", get_retention_policy_for_room_txn,
  496. )
  497. # If we don't know this room ID, ret will be None, in this case return the default
  498. # policy.
  499. if not ret:
  500. return {
  501. "min_lifetime": self.config.retention_default_min_lifetime,
  502. "max_lifetime": self.config.retention_default_max_lifetime,
  503. }
  504. row = ret[0]
  505. # If one of the room's policy's attributes isn't defined, use the matching
  506. # attribute from the default policy.
  507. # The default values will be None if no default policy has been defined, or if one
  508. # of the attributes is missing from the default policy.
  509. if row["min_lifetime"] is None:
  510. row["min_lifetime"] = self.config.retention_default_min_lifetime
  511. if row["max_lifetime"] is None:
  512. row["max_lifetime"] = self.config.retention_default_max_lifetime
  513. return row
  514. def get_media_mxcs_in_room(self, room_id):
  515. """Retrieves all the local and remote media MXC URIs in a given room
  516. Args:
  517. room_id (str)
  518. Returns:
  519. The local and remote media as a lists of tuples where the key is
  520. the hostname and the value is the media ID.
  521. """
  522. def _get_media_mxcs_in_room_txn(txn):
  523. local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
  524. local_media_mxcs = []
  525. remote_media_mxcs = []
  526. # Convert the IDs to MXC URIs
  527. for media_id in local_mxcs:
  528. local_media_mxcs.append("mxc://%s/%s" % (self.hs.hostname, media_id))
  529. for hostname, media_id in remote_mxcs:
  530. remote_media_mxcs.append("mxc://%s/%s" % (hostname, media_id))
  531. return local_media_mxcs, remote_media_mxcs
  532. return self.db_pool.runInteraction(
  533. "get_media_ids_in_room", _get_media_mxcs_in_room_txn
  534. )
  535. def quarantine_media_ids_in_room(self, room_id, quarantined_by):
  536. """For a room loops through all events with media and quarantines
  537. the associated media
  538. """
  539. logger.info("Quarantining media in room: %s", room_id)
  540. def _quarantine_media_in_room_txn(txn):
  541. local_mxcs, remote_mxcs = self._get_media_mxcs_in_room_txn(txn, room_id)
  542. return self._quarantine_media_txn(
  543. txn, local_mxcs, remote_mxcs, quarantined_by
  544. )
  545. return self.db_pool.runInteraction(
  546. "quarantine_media_in_room", _quarantine_media_in_room_txn
  547. )
  548. def _get_media_mxcs_in_room_txn(self, txn, room_id):
  549. """Retrieves all the local and remote media MXC URIs in a given room
  550. Args:
  551. txn (cursor)
  552. room_id (str)
  553. Returns:
  554. The local and remote media as a lists of tuples where the key is
  555. the hostname and the value is the media ID.
  556. """
  557. mxc_re = re.compile("^mxc://([^/]+)/([^/#?]+)")
  558. sql = """
  559. SELECT stream_ordering, json FROM events
  560. JOIN event_json USING (room_id, event_id)
  561. WHERE room_id = ?
  562. %(where_clause)s
  563. AND contains_url = ? AND outlier = ?
  564. ORDER BY stream_ordering DESC
  565. LIMIT ?
  566. """
  567. txn.execute(sql % {"where_clause": ""}, (room_id, True, False, 100))
  568. local_media_mxcs = []
  569. remote_media_mxcs = []
  570. while True:
  571. next_token = None
  572. for stream_ordering, content_json in txn:
  573. next_token = stream_ordering
  574. event_json = db_to_json(content_json)
  575. content = event_json["content"]
  576. content_url = content.get("url")
  577. thumbnail_url = content.get("info", {}).get("thumbnail_url")
  578. for url in (content_url, thumbnail_url):
  579. if not url:
  580. continue
  581. matches = mxc_re.match(url)
  582. if matches:
  583. hostname = matches.group(1)
  584. media_id = matches.group(2)
  585. if hostname == self.hs.hostname:
  586. local_media_mxcs.append(media_id)
  587. else:
  588. remote_media_mxcs.append((hostname, media_id))
  589. if next_token is None:
  590. # We've gone through the whole room, so we're finished.
  591. break
  592. txn.execute(
  593. sql % {"where_clause": "AND stream_ordering < ?"},
  594. (room_id, next_token, True, False, 100),
  595. )
  596. return local_media_mxcs, remote_media_mxcs
  597. def quarantine_media_by_id(
  598. self, server_name: str, media_id: str, quarantined_by: str,
  599. ):
  600. """quarantines a single local or remote media id
  601. Args:
  602. server_name: The name of the server that holds this media
  603. media_id: The ID of the media to be quarantined
  604. quarantined_by: The user ID that initiated the quarantine request
  605. """
  606. logger.info("Quarantining media: %s/%s", server_name, media_id)
  607. is_local = server_name == self.config.server_name
  608. def _quarantine_media_by_id_txn(txn):
  609. local_mxcs = [media_id] if is_local else []
  610. remote_mxcs = [(server_name, media_id)] if not is_local else []
  611. return self._quarantine_media_txn(
  612. txn, local_mxcs, remote_mxcs, quarantined_by
  613. )
  614. return self.db_pool.runInteraction(
  615. "quarantine_media_by_user", _quarantine_media_by_id_txn
  616. )
  617. def quarantine_media_ids_by_user(self, user_id: str, quarantined_by: str):
  618. """quarantines all local media associated with a single user
  619. Args:
  620. user_id: The ID of the user to quarantine media of
  621. quarantined_by: The ID of the user who made the quarantine request
  622. """
  623. def _quarantine_media_by_user_txn(txn):
  624. local_media_ids = self._get_media_ids_by_user_txn(txn, user_id)
  625. return self._quarantine_media_txn(txn, local_media_ids, [], quarantined_by)
  626. return self.db_pool.runInteraction(
  627. "quarantine_media_by_user", _quarantine_media_by_user_txn
  628. )
  629. def _get_media_ids_by_user_txn(self, txn, user_id: str, filter_quarantined=True):
  630. """Retrieves local media IDs by a given user
  631. Args:
  632. txn (cursor)
  633. user_id: The ID of the user to retrieve media IDs of
  634. Returns:
  635. The local and remote media as a lists of tuples where the key is
  636. the hostname and the value is the media ID.
  637. """
  638. # Local media
  639. sql = """
  640. SELECT media_id
  641. FROM local_media_repository
  642. WHERE user_id = ?
  643. """
  644. if filter_quarantined:
  645. sql += "AND quarantined_by IS NULL"
  646. txn.execute(sql, (user_id,))
  647. local_media_ids = [row[0] for row in txn]
  648. # TODO: Figure out all remote media a user has referenced in a message
  649. return local_media_ids
  650. def _quarantine_media_txn(
  651. self,
  652. txn,
  653. local_mxcs: List[str],
  654. remote_mxcs: List[Tuple[str, str]],
  655. quarantined_by: str,
  656. ) -> int:
  657. """Quarantine local and remote media items
  658. Args:
  659. txn (cursor)
  660. local_mxcs: A list of local mxc URLs
  661. remote_mxcs: A list of (remote server, media id) tuples representing
  662. remote mxc URLs
  663. quarantined_by: The ID of the user who initiated the quarantine request
  664. Returns:
  665. The total number of media items quarantined
  666. """
  667. # Update all the tables to set the quarantined_by flag
  668. txn.executemany(
  669. """
  670. UPDATE local_media_repository
  671. SET quarantined_by = ?
  672. WHERE media_id = ? AND safe_from_quarantine = ?
  673. """,
  674. ((quarantined_by, media_id, False) for media_id in local_mxcs),
  675. )
  676. # Note that a rowcount of -1 can be used to indicate no rows were affected.
  677. total_media_quarantined = txn.rowcount if txn.rowcount > 0 else 0
  678. txn.executemany(
  679. """
  680. UPDATE remote_media_cache
  681. SET quarantined_by = ?
  682. WHERE media_origin = ? AND media_id = ?
  683. """,
  684. ((quarantined_by, origin, media_id) for origin, media_id in remote_mxcs),
  685. )
  686. total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0
  687. return total_media_quarantined
  688. async def get_all_new_public_rooms(
  689. self, instance_name: str, last_id: int, current_id: int, limit: int
  690. ) -> Tuple[List[Tuple[int, tuple]], int, bool]:
  691. """Get updates for public rooms replication stream.
  692. Args:
  693. instance_name: The writer we want to fetch updates from. Unused
  694. here since there is only ever one writer.
  695. last_id: The token to fetch updates from. Exclusive.
  696. current_id: The token to fetch updates up to. Inclusive.
  697. limit: The requested limit for the number of rows to return. The
  698. function may return more or fewer rows.
  699. Returns:
  700. A tuple consisting of: the updates, a token to use to fetch
  701. subsequent updates, and whether we returned fewer rows than exists
  702. between the requested tokens due to the limit.
  703. The token returned can be used in a subsequent call to this
  704. function to get further updatees.
  705. The updates are a list of 2-tuples of stream ID and the row data
  706. """
  707. if last_id == current_id:
  708. return [], current_id, False
  709. def get_all_new_public_rooms(txn):
  710. sql = """
  711. SELECT stream_id, room_id, visibility, appservice_id, network_id
  712. FROM public_room_list_stream
  713. WHERE stream_id > ? AND stream_id <= ?
  714. ORDER BY stream_id ASC
  715. LIMIT ?
  716. """
  717. txn.execute(sql, (last_id, current_id, limit))
  718. updates = [(row[0], row[1:]) for row in txn]
  719. limited = False
  720. upto_token = current_id
  721. if len(updates) >= limit:
  722. upto_token = updates[-1][0]
  723. limited = True
  724. return updates, upto_token, limited
  725. return await self.db_pool.runInteraction(
  726. "get_all_new_public_rooms", get_all_new_public_rooms
  727. )
  728. class RoomBackgroundUpdateStore(SQLBaseStore):
  729. REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
  730. ADD_ROOMS_ROOM_VERSION_COLUMN = "add_rooms_room_version_column"
  731. def __init__(self, database: DatabasePool, db_conn, hs):
  732. super(RoomBackgroundUpdateStore, self).__init__(database, db_conn, hs)
  733. self.config = hs.config
  734. self.db_pool.updates.register_background_update_handler(
  735. "insert_room_retention", self._background_insert_retention,
  736. )
  737. self.db_pool.updates.register_background_update_handler(
  738. self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE,
  739. self._remove_tombstoned_rooms_from_directory,
  740. )
  741. self.db_pool.updates.register_background_update_handler(
  742. self.ADD_ROOMS_ROOM_VERSION_COLUMN,
  743. self._background_add_rooms_room_version_column,
  744. )
  745. async def _background_insert_retention(self, progress, batch_size):
  746. """Retrieves a list of all rooms within a range and inserts an entry for each of
  747. them into the room_retention table.
  748. NULLs the property's columns if missing from the retention event in the room's
  749. state (or NULLs all of them if there's no retention event in the room's state),
  750. so that we fall back to the server's retention policy.
  751. """
  752. last_room = progress.get("room_id", "")
  753. def _background_insert_retention_txn(txn):
  754. txn.execute(
  755. """
  756. SELECT state.room_id, state.event_id, events.json
  757. FROM current_state_events as state
  758. LEFT JOIN event_json AS events ON (state.event_id = events.event_id)
  759. WHERE state.room_id > ? AND state.type = '%s'
  760. ORDER BY state.room_id ASC
  761. LIMIT ?;
  762. """
  763. % EventTypes.Retention,
  764. (last_room, batch_size),
  765. )
  766. rows = self.db_pool.cursor_to_dict(txn)
  767. if not rows:
  768. return True
  769. for row in rows:
  770. if not row["json"]:
  771. retention_policy = {}
  772. else:
  773. ev = db_to_json(row["json"])
  774. retention_policy = ev["content"]
  775. self.db_pool.simple_insert_txn(
  776. txn=txn,
  777. table="room_retention",
  778. values={
  779. "room_id": row["room_id"],
  780. "event_id": row["event_id"],
  781. "min_lifetime": retention_policy.get("min_lifetime"),
  782. "max_lifetime": retention_policy.get("max_lifetime"),
  783. },
  784. )
  785. logger.info("Inserted %d rows into room_retention", len(rows))
  786. self.db_pool.updates._background_update_progress_txn(
  787. txn, "insert_room_retention", {"room_id": rows[-1]["room_id"]}
  788. )
  789. if batch_size > len(rows):
  790. return True
  791. else:
  792. return False
  793. end = await self.db_pool.runInteraction(
  794. "insert_room_retention", _background_insert_retention_txn,
  795. )
  796. if end:
  797. await self.db_pool.updates._end_background_update("insert_room_retention")
  798. return batch_size
  799. async def _background_add_rooms_room_version_column(
  800. self, progress: dict, batch_size: int
  801. ):
  802. """Background update to go and add room version inforamtion to `rooms`
  803. table from `current_state_events` table.
  804. """
  805. last_room_id = progress.get("room_id", "")
  806. def _background_add_rooms_room_version_column_txn(txn: LoggingTransaction):
  807. sql = """
  808. SELECT room_id, json FROM current_state_events
  809. INNER JOIN event_json USING (room_id, event_id)
  810. WHERE room_id > ? AND type = 'm.room.create' AND state_key = ''
  811. ORDER BY room_id
  812. LIMIT ?
  813. """
  814. txn.execute(sql, (last_room_id, batch_size))
  815. updates = []
  816. for room_id, event_json in txn:
  817. event_dict = db_to_json(event_json)
  818. room_version_id = event_dict.get("content", {}).get(
  819. "room_version", RoomVersions.V1.identifier
  820. )
  821. creator = event_dict.get("content").get("creator")
  822. updates.append((room_id, creator, room_version_id))
  823. if not updates:
  824. return True
  825. new_last_room_id = ""
  826. for room_id, creator, room_version_id in updates:
  827. # We upsert here just in case we don't already have a row,
  828. # mainly for paranoia as much badness would happen if we don't
  829. # insert the row and then try and get the room version for the
  830. # room.
  831. self.db_pool.simple_upsert_txn(
  832. txn,
  833. table="rooms",
  834. keyvalues={"room_id": room_id},
  835. values={"room_version": room_version_id},
  836. insertion_values={"is_public": False, "creator": creator},
  837. )
  838. new_last_room_id = room_id
  839. self.db_pool.updates._background_update_progress_txn(
  840. txn, self.ADD_ROOMS_ROOM_VERSION_COLUMN, {"room_id": new_last_room_id}
  841. )
  842. return False
  843. end = await self.db_pool.runInteraction(
  844. "_background_add_rooms_room_version_column",
  845. _background_add_rooms_room_version_column_txn,
  846. )
  847. if end:
  848. await self.db_pool.updates._end_background_update(
  849. self.ADD_ROOMS_ROOM_VERSION_COLUMN
  850. )
  851. return batch_size
  852. async def _remove_tombstoned_rooms_from_directory(
  853. self, progress, batch_size
  854. ) -> int:
  855. """Removes any rooms with tombstone events from the room directory
  856. Nowadays this is handled by the room upgrade handler, but we may have some
  857. that got left behind
  858. """
  859. last_room = progress.get("room_id", "")
  860. def _get_rooms(txn):
  861. txn.execute(
  862. """
  863. SELECT room_id
  864. FROM rooms r
  865. INNER JOIN current_state_events cse USING (room_id)
  866. WHERE room_id > ? AND r.is_public
  867. AND cse.type = '%s' AND cse.state_key = ''
  868. ORDER BY room_id ASC
  869. LIMIT ?;
  870. """
  871. % EventTypes.Tombstone,
  872. (last_room, batch_size),
  873. )
  874. return [row[0] for row in txn]
  875. rooms = await self.db_pool.runInteraction(
  876. "get_tombstoned_directory_rooms", _get_rooms
  877. )
  878. if not rooms:
  879. await self.db_pool.updates._end_background_update(
  880. self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE
  881. )
  882. return 0
  883. for room_id in rooms:
  884. logger.info("Removing tombstoned room %s from the directory", room_id)
  885. await self.set_room_is_public(room_id, False)
  886. await self.db_pool.updates._background_update_progress(
  887. self.REMOVE_TOMESTONED_ROOMS_BG_UPDATE, {"room_id": rooms[-1]}
  888. )
  889. return len(rooms)
  890. @abstractmethod
  891. def set_room_is_public(self, room_id, is_public):
  892. # this will need to be implemented if a background update is performed with
  893. # existing (tombstoned, public) rooms in the database.
  894. #
  895. # It's overridden by RoomStore for the synapse master.
  896. raise NotImplementedError()
  897. class RoomStore(RoomBackgroundUpdateStore, RoomWorkerStore, SearchStore):
  898. def __init__(self, database: DatabasePool, db_conn, hs):
  899. super(RoomStore, self).__init__(database, db_conn, hs)
  900. self.config = hs.config
  901. async def upsert_room_on_join(self, room_id: str, room_version: RoomVersion):
  902. """Ensure that the room is stored in the table
  903. Called when we join a room over federation, and overwrites any room version
  904. currently in the table.
  905. """
  906. await self.db_pool.simple_upsert(
  907. desc="upsert_room_on_join",
  908. table="rooms",
  909. keyvalues={"room_id": room_id},
  910. values={"room_version": room_version.identifier},
  911. insertion_values={"is_public": False, "creator": ""},
  912. # rooms has a unique constraint on room_id, so no need to lock when doing an
  913. # emulated upsert.
  914. lock=False,
  915. )
  916. async def store_room(
  917. self,
  918. room_id: str,
  919. room_creator_user_id: str,
  920. is_public: bool,
  921. room_version: RoomVersion,
  922. ):
  923. """Stores a room.
  924. Args:
  925. room_id: The desired room ID, can be None.
  926. room_creator_user_id: The user ID of the room creator.
  927. is_public: True to indicate that this room should appear in
  928. public room lists.
  929. room_version: The version of the room
  930. Raises:
  931. StoreError if the room could not be stored.
  932. """
  933. try:
  934. def store_room_txn(txn, next_id):
  935. self.db_pool.simple_insert_txn(
  936. txn,
  937. "rooms",
  938. {
  939. "room_id": room_id,
  940. "creator": room_creator_user_id,
  941. "is_public": is_public,
  942. "room_version": room_version.identifier,
  943. },
  944. )
  945. if is_public:
  946. self.db_pool.simple_insert_txn(
  947. txn,
  948. table="public_room_list_stream",
  949. values={
  950. "stream_id": next_id,
  951. "room_id": room_id,
  952. "visibility": is_public,
  953. },
  954. )
  955. with self._public_room_id_gen.get_next() as next_id:
  956. await self.db_pool.runInteraction(
  957. "store_room_txn", store_room_txn, next_id
  958. )
  959. except Exception as e:
  960. logger.error("store_room with room_id=%s failed: %s", room_id, e)
  961. raise StoreError(500, "Problem creating room.")
  962. async def maybe_store_room_on_invite(self, room_id: str, room_version: RoomVersion):
  963. """
  964. When we receive an invite over federation, store the version of the room if we
  965. don't already know the room version.
  966. """
  967. await self.db_pool.simple_upsert(
  968. desc="maybe_store_room_on_invite",
  969. table="rooms",
  970. keyvalues={"room_id": room_id},
  971. values={},
  972. insertion_values={
  973. "room_version": room_version.identifier,
  974. "is_public": False,
  975. "creator": "",
  976. },
  977. # rooms has a unique constraint on room_id, so no need to lock when doing an
  978. # emulated upsert.
  979. lock=False,
  980. )
  981. async def set_room_is_public(self, room_id, is_public):
  982. def set_room_is_public_txn(txn, next_id):
  983. self.db_pool.simple_update_one_txn(
  984. txn,
  985. table="rooms",
  986. keyvalues={"room_id": room_id},
  987. updatevalues={"is_public": is_public},
  988. )
  989. entries = self.db_pool.simple_select_list_txn(
  990. txn,
  991. table="public_room_list_stream",
  992. keyvalues={
  993. "room_id": room_id,
  994. "appservice_id": None,
  995. "network_id": None,
  996. },
  997. retcols=("stream_id", "visibility"),
  998. )
  999. entries.sort(key=lambda r: r["stream_id"])
  1000. add_to_stream = True
  1001. if entries:
  1002. add_to_stream = bool(entries[-1]["visibility"]) != is_public
  1003. if add_to_stream:
  1004. self.db_pool.simple_insert_txn(
  1005. txn,
  1006. table="public_room_list_stream",
  1007. values={
  1008. "stream_id": next_id,
  1009. "room_id": room_id,
  1010. "visibility": is_public,
  1011. "appservice_id": None,
  1012. "network_id": None,
  1013. },
  1014. )
  1015. with self._public_room_id_gen.get_next() as next_id:
  1016. await self.db_pool.runInteraction(
  1017. "set_room_is_public", set_room_is_public_txn, next_id
  1018. )
  1019. self.hs.get_notifier().on_new_replication_data()
  1020. async def set_room_is_public_appservice(
  1021. self, room_id, appservice_id, network_id, is_public
  1022. ):
  1023. """Edit the appservice/network specific public room list.
  1024. Each appservice can have a number of published room lists associated
  1025. with them, keyed off of an appservice defined `network_id`, which
  1026. basically represents a single instance of a bridge to a third party
  1027. network.
  1028. Args:
  1029. room_id (str)
  1030. appservice_id (str)
  1031. network_id (str)
  1032. is_public (bool): Whether to publish or unpublish the room from the
  1033. list.
  1034. """
  1035. def set_room_is_public_appservice_txn(txn, next_id):
  1036. if is_public:
  1037. try:
  1038. self.db_pool.simple_insert_txn(
  1039. txn,
  1040. table="appservice_room_list",
  1041. values={
  1042. "appservice_id": appservice_id,
  1043. "network_id": network_id,
  1044. "room_id": room_id,
  1045. },
  1046. )
  1047. except self.database_engine.module.IntegrityError:
  1048. # We've already inserted, nothing to do.
  1049. return
  1050. else:
  1051. self.db_pool.simple_delete_txn(
  1052. txn,
  1053. table="appservice_room_list",
  1054. keyvalues={
  1055. "appservice_id": appservice_id,
  1056. "network_id": network_id,
  1057. "room_id": room_id,
  1058. },
  1059. )
  1060. entries = self.db_pool.simple_select_list_txn(
  1061. txn,
  1062. table="public_room_list_stream",
  1063. keyvalues={
  1064. "room_id": room_id,
  1065. "appservice_id": appservice_id,
  1066. "network_id": network_id,
  1067. },
  1068. retcols=("stream_id", "visibility"),
  1069. )
  1070. entries.sort(key=lambda r: r["stream_id"])
  1071. add_to_stream = True
  1072. if entries:
  1073. add_to_stream = bool(entries[-1]["visibility"]) != is_public
  1074. if add_to_stream:
  1075. self.db_pool.simple_insert_txn(
  1076. txn,
  1077. table="public_room_list_stream",
  1078. values={
  1079. "stream_id": next_id,
  1080. "room_id": room_id,
  1081. "visibility": is_public,
  1082. "appservice_id": appservice_id,
  1083. "network_id": network_id,
  1084. },
  1085. )
  1086. with self._public_room_id_gen.get_next() as next_id:
  1087. await self.db_pool.runInteraction(
  1088. "set_room_is_public_appservice",
  1089. set_room_is_public_appservice_txn,
  1090. next_id,
  1091. )
  1092. self.hs.get_notifier().on_new_replication_data()
  1093. def get_room_count(self):
  1094. """Retrieve a list of all rooms
  1095. """
  1096. def f(txn):
  1097. sql = "SELECT count(*) FROM rooms"
  1098. txn.execute(sql)
  1099. row = txn.fetchone()
  1100. return row[0] or 0
  1101. return self.db_pool.runInteraction("get_rooms", f)
  1102. def add_event_report(
  1103. self, room_id, event_id, user_id, reason, content, received_ts
  1104. ):
  1105. next_id = self._event_reports_id_gen.get_next()
  1106. return self.db_pool.simple_insert(
  1107. table="event_reports",
  1108. values={
  1109. "id": next_id,
  1110. "received_ts": received_ts,
  1111. "room_id": room_id,
  1112. "event_id": event_id,
  1113. "user_id": user_id,
  1114. "reason": reason,
  1115. "content": json.dumps(content),
  1116. },
  1117. desc="add_event_report",
  1118. )
  1119. def get_current_public_room_stream_id(self):
  1120. return self._public_room_id_gen.get_current_token()
  1121. async def block_room(self, room_id: str, user_id: str) -> None:
  1122. """Marks the room as blocked. Can be called multiple times.
  1123. Args:
  1124. room_id: Room to block
  1125. user_id: Who blocked it
  1126. """
  1127. await self.db_pool.simple_upsert(
  1128. table="blocked_rooms",
  1129. keyvalues={"room_id": room_id},
  1130. values={},
  1131. insertion_values={"user_id": user_id},
  1132. desc="block_room",
  1133. )
  1134. await self.db_pool.runInteraction(
  1135. "block_room_invalidation",
  1136. self._invalidate_cache_and_stream,
  1137. self.is_room_blocked,
  1138. (room_id,),
  1139. )
  1140. async def get_rooms_for_retention_period_in_range(
  1141. self, min_ms: Optional[int], max_ms: Optional[int], include_null: bool = False
  1142. ) -> Dict[str, dict]:
  1143. """Retrieves all of the rooms within the given retention range.
  1144. Optionally includes the rooms which don't have a retention policy.
  1145. Args:
  1146. min_ms: Duration in milliseconds that define the lower limit of
  1147. the range to handle (exclusive). If None, doesn't set a lower limit.
  1148. max_ms: Duration in milliseconds that define the upper limit of
  1149. the range to handle (inclusive). If None, doesn't set an upper limit.
  1150. include_null: Whether to include rooms which retention policy is NULL
  1151. in the returned set.
  1152. Returns:
  1153. The rooms within this range, along with their retention
  1154. policy. The key is "room_id", and maps to a dict describing the retention
  1155. policy associated with this room ID. The keys for this nested dict are
  1156. "min_lifetime" (int|None), and "max_lifetime" (int|None).
  1157. """
  1158. def get_rooms_for_retention_period_in_range_txn(txn):
  1159. range_conditions = []
  1160. args = []
  1161. if min_ms is not None:
  1162. range_conditions.append("max_lifetime > ?")
  1163. args.append(min_ms)
  1164. if max_ms is not None:
  1165. range_conditions.append("max_lifetime <= ?")
  1166. args.append(max_ms)
  1167. # Do a first query which will retrieve the rooms that have a retention policy
  1168. # in their current state.
  1169. sql = """
  1170. SELECT room_id, min_lifetime, max_lifetime FROM room_retention
  1171. INNER JOIN current_state_events USING (event_id, room_id)
  1172. """
  1173. if len(range_conditions):
  1174. sql += " WHERE (" + " AND ".join(range_conditions) + ")"
  1175. if include_null:
  1176. sql += " OR max_lifetime IS NULL"
  1177. txn.execute(sql, args)
  1178. rows = self.db_pool.cursor_to_dict(txn)
  1179. rooms_dict = {}
  1180. for row in rows:
  1181. rooms_dict[row["room_id"]] = {
  1182. "min_lifetime": row["min_lifetime"],
  1183. "max_lifetime": row["max_lifetime"],
  1184. }
  1185. if include_null:
  1186. # If required, do a second query that retrieves all of the rooms we know
  1187. # of so we can handle rooms with no retention policy.
  1188. sql = "SELECT DISTINCT room_id FROM current_state_events"
  1189. txn.execute(sql)
  1190. rows = self.db_pool.cursor_to_dict(txn)
  1191. # If a room isn't already in the dict (i.e. it doesn't have a retention
  1192. # policy in its state), add it with a null policy.
  1193. for row in rows:
  1194. if row["room_id"] not in rooms_dict:
  1195. rooms_dict[row["room_id"]] = {
  1196. "min_lifetime": None,
  1197. "max_lifetime": None,
  1198. }
  1199. return rooms_dict
  1200. rooms = await self.db_pool.runInteraction(
  1201. "get_rooms_for_retention_period_in_range",
  1202. get_rooms_for_retention_period_in_range_txn,
  1203. )
  1204. return rooms