You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

219 lines
8.2 KiB

  1. # Copyright 2020 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import logging
  15. from typing import TYPE_CHECKING, Optional
  16. from synapse.events.utils import prune_event_dict
  17. from synapse.metrics.background_process_metrics import wrap_as_background_process
  18. from synapse.storage._base import SQLBaseStore
  19. from synapse.storage.database import (
  20. DatabasePool,
  21. LoggingDatabaseConnection,
  22. LoggingTransaction,
  23. )
  24. from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore
  25. from synapse.storage.databases.main.events_worker import EventsWorkerStore
  26. from synapse.util import json_encoder
  27. if TYPE_CHECKING:
  28. from synapse.server import HomeServer
  29. logger = logging.getLogger(__name__)
  30. class CensorEventsStore(EventsWorkerStore, CacheInvalidationWorkerStore, SQLBaseStore):
  31. def __init__(
  32. self,
  33. database: DatabasePool,
  34. db_conn: LoggingDatabaseConnection,
  35. hs: "HomeServer",
  36. ):
  37. super().__init__(database, db_conn, hs)
  38. if (
  39. hs.config.worker.run_background_tasks
  40. and self.hs.config.server.redaction_retention_period is not None
  41. ):
  42. hs.get_clock().looping_call(self._censor_redactions, 5 * 60 * 1000)
  43. @wrap_as_background_process("_censor_redactions")
  44. async def _censor_redactions(self) -> None:
  45. """Censors all redactions older than the configured period that haven't
  46. been censored yet.
  47. By censor we mean update the event_json table with the redacted event.
  48. """
  49. if self.hs.config.server.redaction_retention_period is None:
  50. return
  51. if not (
  52. await self.db_pool.updates.has_completed_background_update(
  53. "redactions_have_censored_ts_idx"
  54. )
  55. ):
  56. # We don't want to run this until the appropriate index has been
  57. # created.
  58. return
  59. before_ts = (
  60. self._clock.time_msec() - self.hs.config.server.redaction_retention_period
  61. )
  62. # We fetch all redactions that:
  63. # 1. point to an event we have,
  64. # 2. has a received_ts from before the cut off, and
  65. # 3. we haven't yet censored.
  66. #
  67. # This is limited to 100 events to ensure that we don't try and do too
  68. # much at once. We'll get called again so this should eventually catch
  69. # up.
  70. sql = """
  71. SELECT redactions.event_id, redacts FROM redactions
  72. LEFT JOIN events AS original_event ON (
  73. redacts = original_event.event_id
  74. )
  75. WHERE NOT have_censored
  76. AND redactions.received_ts <= ?
  77. ORDER BY redactions.received_ts ASC
  78. LIMIT ?
  79. """
  80. rows = await self.db_pool.execute(
  81. "_censor_redactions_fetch", None, sql, before_ts, 100
  82. )
  83. updates = []
  84. for redaction_id, event_id in rows:
  85. redaction_event = await self.get_event(redaction_id, allow_none=True)
  86. original_event = await self.get_event(
  87. event_id, allow_rejected=True, allow_none=True
  88. )
  89. # The SQL above ensures that we have both the redaction and
  90. # original event, so if the `get_event` calls return None it
  91. # means that the redaction wasn't allowed. Either way we know that
  92. # the result won't change so we mark the fact that we've checked.
  93. if (
  94. redaction_event
  95. and original_event
  96. and original_event.internal_metadata.is_redacted()
  97. ):
  98. # Redaction was allowed
  99. pruned_json: Optional[str] = json_encoder.encode(
  100. prune_event_dict(
  101. original_event.room_version, original_event.get_dict()
  102. )
  103. )
  104. else:
  105. # Redaction wasn't allowed
  106. pruned_json = None
  107. updates.append((redaction_id, event_id, pruned_json))
  108. def _update_censor_txn(txn: LoggingTransaction) -> None:
  109. for redaction_id, event_id, pruned_json in updates:
  110. if pruned_json:
  111. self._censor_event_txn(txn, event_id, pruned_json)
  112. self.db_pool.simple_update_one_txn(
  113. txn,
  114. table="redactions",
  115. keyvalues={"event_id": redaction_id},
  116. updatevalues={"have_censored": True},
  117. )
  118. await self.db_pool.runInteraction("_update_censor_txn", _update_censor_txn)
  119. def _censor_event_txn(
  120. self, txn: LoggingTransaction, event_id: str, pruned_json: str
  121. ) -> None:
  122. """Censor an event by replacing its JSON in the event_json table with the
  123. provided pruned JSON.
  124. Args:
  125. txn: The database transaction.
  126. event_id: The ID of the event to censor.
  127. pruned_json: The pruned JSON
  128. """
  129. self.db_pool.simple_update_one_txn(
  130. txn,
  131. table="event_json",
  132. keyvalues={"event_id": event_id},
  133. updatevalues={"json": pruned_json},
  134. )
  135. async def expire_event(self, event_id: str) -> None:
  136. """Retrieve and expire an event that has expired, and delete its associated
  137. expiry timestamp. If the event can't be retrieved, delete its associated
  138. timestamp so we don't try to expire it again in the future.
  139. Args:
  140. event_id: The ID of the event to delete.
  141. """
  142. # Try to retrieve the event's content from the database or the event cache.
  143. event = await self.get_event(event_id)
  144. def delete_expired_event_txn(txn: LoggingTransaction) -> None:
  145. # Delete the expiry timestamp associated with this event from the database.
  146. self._delete_event_expiry_txn(txn, event_id)
  147. if not event:
  148. # If we can't find the event, log a warning and delete the expiry date
  149. # from the database so that we don't try to expire it again in the
  150. # future.
  151. logger.warning(
  152. "Can't expire event %s because we don't have it.", event_id
  153. )
  154. return
  155. # Prune the event's dict then convert it to JSON.
  156. pruned_json = json_encoder.encode(
  157. prune_event_dict(event.room_version, event.get_dict())
  158. )
  159. # Update the event_json table to replace the event's JSON with the pruned
  160. # JSON.
  161. self._censor_event_txn(txn, event.event_id, pruned_json)
  162. # We need to invalidate the event cache entry for this event because we
  163. # changed its content in the database. We can't call
  164. # self._invalidate_cache_and_stream because self.get_event_cache isn't of the
  165. # right type.
  166. self.invalidate_get_event_cache_after_txn(txn, event.event_id)
  167. # Send that invalidation to replication so that other workers also invalidate
  168. # the event cache.
  169. self._send_invalidation_to_replication(
  170. txn, "_get_event_cache", (event.event_id,)
  171. )
  172. await self.db_pool.runInteraction(
  173. "delete_expired_event", delete_expired_event_txn
  174. )
  175. def _delete_event_expiry_txn(self, txn: LoggingTransaction, event_id: str) -> None:
  176. """Delete the expiry timestamp associated with an event ID without deleting the
  177. actual event.
  178. Args:
  179. txn: The transaction to use to perform the deletion.
  180. event_id: The event ID to delete the associated expiry timestamp of.
  181. """
  182. self.db_pool.simple_delete_txn(
  183. txn=txn, table="event_expiry", keyvalues={"event_id": event_id}
  184. )