Remove functionality associated with unused historical stats tables (#9721)

Fixes #9602
il y a 2 ans · f6767abc05
--- a/changelog.d/9721.removal
+++ b/changelog.d/9721.removal
@@ -0,0 +1 @@
 Remove functionality associated with the unused `room_stats_historical` and `user_stats_historical` tables. Contributed by @xmunoz.
--- a/docs/room_and_user_statistics.md
+++ b/docs/room_and_user_statistics.md
@@ -1,9 +1,9 @@
 Room and User Statistics
 ========================
 Synapse maintains room and user statistics (as well as a cache of room state),
 in various tables. These can be used for administrative purposes but are also
 used when generating the public room directory.
 Synapse maintains room and user statistics in various tables. These can be used
 for administrative purposes but are also used when generating the public room
 directory.
 # Synapse Developer Documentation
@@ -15,48 +15,8 @@ used when generating the public room directory.
 * **subject**: Something we are tracking stats about – currently a room or user.
 * **current row**: An entry for a subject in the appropriate current statistics
    table. Each subject can have only one.
 * **historical row**: An entry for a subject in the appropriate historical
    statistics table. Each subject can have any number of these.
 ### Overview
 Stats are maintained as time series. There are two kinds of column:
 * absolute columns – where the value is correct for the time given by `end_ts`
    in the stats row. (Imagine a line graph for these values)
    * They can also be thought of as 'gauges' in Prometheus, if you are familiar.
 * per-slice columns – where the value corresponds to how many of the occurrences
    occurred within the time slice given by `(end_ts − bucket_size)…end_ts`
    or `start_ts…end_ts`. (Imagine a histogram for these values)
 Stats are maintained in two tables (for each type): current and historical.
 Current stats correspond to the present values. Each subject can only have one
 entry.
 Historical stats correspond to values in the past. Subjects may have multiple
 entries.
 ## Concepts around the management of stats
 ### Current rows
 Current rows contain the most up-to-date statistics for a room.
 They only contain absolute columns
 ### Historical rows
 Historical rows can always be considered to be valid for the time slice and
 end time specified.
 * historical rows will not exist for every time slice – they will be omitted
    if there were no changes. In this case, the following assumptions can be
    made to interpolate/recreate missing rows:
    - absolute fields have the same values as in the preceding row
    - per-slice fields are zero (`0`)
 * historical rows will not be retained forever – rows older than a configurable
    time will be purged.
 #### Purge
 The purging of historical rows is not yet implemented.
 Stats correspond to the present values. Current rows contain the most up-to-date
 statistics for a room. Each subject can only have one entry.
--- a/docs/sample_config.yaml
+++ b/docs/sample_config.yaml
@@ -2652,11 +2652,6 @@ stats:
  #
  #enabled: false
  # The size of each timeslice in the room_stats_historical and
  # user_stats_historical tables, as a time period. Defaults to "1d".
  #
  #bucket_size: 1h
 # Server Notices room configuration
 #
--- a/synapse/config/stats.py
+++ b/synapse/config/stats.py
@@ -38,13 +38,9 @@ class StatsConfig(Config):
    def read_config(self, config, **kwargs):
        self.stats_enabled = True
        self.stats_bucket_size = 86400 * 1000
        stats_config = config.get("stats", None)
        if stats_config:
            self.stats_enabled = stats_config.get("enabled", self.stats_enabled)
            self.stats_bucket_size = self.parse_duration(
                stats_config.get("bucket_size", "1d")
            )
        if not self.stats_enabled:
            logger.warning(ROOM_STATS_DISABLED_WARN)
@@ -59,9 +55,4 @@ class StatsConfig(Config):
          # correctly.
          #
          #enabled: false
          # The size of each timeslice in the room_stats_historical and
          # user_stats_historical tables, as a time period. Defaults to "1d".
          #
          #bucket_size: 1h
        """
--- a/synapse/handlers/stats.py
+++ b/synapse/handlers/stats.py
@@ -45,7 +45,6 @@ class StatsHandler:
        self.clock = hs.get_clock()
        self.notifier = hs.get_notifier()
        self.is_mine_id = hs.is_mine_id
        self.stats_bucket_size = hs.config.stats_bucket_size
        self.stats_enabled = hs.config.stats_enabled
@@ -106,20 +105,6 @@ class StatsHandler:
                room_deltas = {}
                user_deltas = {}
            # Then count deltas for total_events and total_event_bytes.
            (
                room_count,
                user_count,
            ) = await self.store.get_changes_room_total_events_and_bytes(
                self.pos, max_pos
            )
            for room_id, fields in room_count.items():
                room_deltas.setdefault(room_id, Counter()).update(fields)
            for user_id, fields in user_count.items():
                user_deltas.setdefault(user_id, Counter()).update(fields)
            logger.debug("room_deltas: %s", room_deltas)
            logger.debug("user_deltas: %s", user_deltas)
@@ -181,12 +166,10 @@ class StatsHandler:
            event_content = {}  # type: JsonDict
            sender = None
            if event_id is not None:
                event = await self.store.get_event(event_id, allow_none=True)
                if event:
                    event_content = event.content or {}
                    sender = event.sender
            # All the values in this dict are deltas (RELATIVE changes)
            room_stats_delta = room_to_stats_deltas.setdefault(room_id, Counter())
@@ -244,12 +227,6 @@ class StatsHandler:
                    room_stats_delta["joined_members"] += 1
                elif membership == Membership.INVITE:
                    room_stats_delta["invited_members"] += 1
                    if sender and self.is_mine_id(sender):
                        user_to_stats_deltas.setdefault(sender, Counter())[
                            "invites_sent"
                        ] += 1
                elif membership == Membership.LEAVE:
                    room_stats_delta["left_members"] += 1
                elif membership == Membership.BAN:
@@ -279,10 +256,6 @@ class StatsHandler:
                room_state["is_federatable"] = (
                    event_content.get("m.federate", True) is True
                )
                if sender and self.is_mine_id(sender):
                    user_to_stats_deltas.setdefault(sender, Counter())[
                        "rooms_created"
                    ] += 1
            elif typ == EventTypes.JoinRules:
                room_state["join_rules"] = event_content.get("join_rule")
            elif typ == EventTypes.RoomHistoryVisibility:
--- a/synapse/storage/databases/main/purge_events.py
+++ b/synapse/storage/databases/main/purge_events.py
@@ -392,7 +392,6 @@ class PurgeEventsStore(StateGroupWorkerStore, CacheInvalidationWorkerStore):
            "room_memberships",
            "room_stats_state",
            "room_stats_current",
            "room_stats_historical",
            "room_stats_earliest_token",
            "rooms",
            "stream_ordering_to_exterm",
--- a/synapse/storage/databases/main/stats.py
+++ b/synapse/storage/databases/main/stats.py
@@ -26,7 +26,6 @@ from synapse.api.constants import EventTypes, Membership
 from synapse.api.errors import StoreError
 from synapse.storage.database import DatabasePool
 from synapse.storage.databases.main.state_deltas import StateDeltasStore
 from synapse.storage.engines import PostgresEngine
 from synapse.types import JsonDict
 from synapse.util.caches.descriptors import cached
@@ -49,14 +48,6 @@ ABSOLUTE_STATS_FIELDS = {
    "user": ("joined_rooms",),
 }
 # these fields are per-timeslice and so should be reset to 0 upon a new slice
 # You can draw these stats on a histogram.
 # Example: number of events sent locally during a time slice
 PER_SLICE_FIELDS = {
    "room": ("total_events", "total_event_bytes"),
    "user": ("invites_sent", "rooms_created", "total_events", "total_event_bytes"),
 }
 TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user_id")}
 # these are the tables (& ID columns) which contain our actual subjects
@@ -106,7 +97,6 @@ class StatsStore(StateDeltasStore):
        self.server_name = hs.hostname
        self.clock = self.hs.get_clock()
        self.stats_enabled = hs.config.stats_enabled
        self.stats_bucket_size = hs.config.stats_bucket_size
        self.stats_delta_processing_lock = DeferredLock()
@@ -122,22 +112,6 @@ class StatsStore(StateDeltasStore):
        self.db_pool.updates.register_noop_background_update("populate_stats_cleanup")
        self.db_pool.updates.register_noop_background_update("populate_stats_prepare")
    def quantise_stats_time(self, ts):
        """
        Quantises a timestamp to be a multiple of the bucket size.
        Args:
            ts (int): the timestamp to quantise, in milliseconds since the Unix
                Epoch
        Returns:
            int: a timestamp which
              - is divisible by the bucket size;
              - is no later than `ts`; and
              - is the largest such timestamp.
        """
        return (ts // self.stats_bucket_size) * self.stats_bucket_size
    async def _populate_stats_process_users(self, progress, batch_size):
        """
        This is a background update which regenerates statistics for users.
@@ -288,56 +262,6 @@ class StatsStore(StateDeltasStore):
            desc="update_room_state",
        )
    async def get_statistics_for_subject(
        self, stats_type: str, stats_id: str, start: str, size: int = 100
    ) -> List[dict]:
        """
        Get statistics for a given subject.
        Args:
            stats_type: The type of subject
            stats_id: The ID of the subject (e.g. room_id or user_id)
            start: Pagination start. Number of entries, not timestamp.
            size: How many entries to return.
        Returns:
            A list of dicts, where the dict has the keys of
            ABSOLUTE_STATS_FIELDS[stats_type],  and "bucket_size" and "end_ts".
        """
        return await self.db_pool.runInteraction(
            "get_statistics_for_subject",
            self._get_statistics_for_subject_txn,
            stats_type,
            stats_id,
            start,
            size,
        )
    def _get_statistics_for_subject_txn(
        self, txn, stats_type, stats_id, start, size=100
    ):
        """
        Transaction-bound version of L{get_statistics_for_subject}.
        """
        table, id_col = TYPE_TO_TABLE[stats_type]
        selected_columns = list(
            ABSOLUTE_STATS_FIELDS[stats_type] + PER_SLICE_FIELDS[stats_type]
        )
        slice_list = self.db_pool.simple_select_list_paginate_txn(
            txn,
            table + "_historical",
            "end_ts",
            start,
            size,
            retcols=selected_columns + ["bucket_size", "end_ts"],
            keyvalues={id_col: stats_id},
            order_direction="DESC",
        )
        return slice_list
    @cached()
    async def get_earliest_token_for_stats(
        self, stats_type: str, id: str
@@ -451,14 +375,10 @@ class StatsStore(StateDeltasStore):
        table, id_col = TYPE_TO_TABLE[stats_type]
        quantised_ts = self.quantise_stats_time(int(ts))
        end_ts = quantised_ts + self.stats_bucket_size
        # Lets be paranoid and check that all the given field names are known
        abs_field_names = ABSOLUTE_STATS_FIELDS[stats_type]
        slice_field_names = PER_SLICE_FIELDS[stats_type]
        for field in chain(fields.keys(), absolute_field_overrides.keys()):
            if field not in abs_field_names and field not in slice_field_names:
            if field not in abs_field_names:
                # guard against potential SQL injection dodginess
                raise ValueError(
                    "%s is not a recognised field"
@@ -491,20 +411,6 @@ class StatsStore(StateDeltasStore):
            additive_relatives=deltas_of_absolute_fields,
        )
        per_slice_additive_relatives = {
            key: fields.get(key, 0) for key in slice_field_names
        }
        self._upsert_copy_from_table_with_additive_relatives_txn(
            txn=txn,
            into_table=table + "_historical",
            keyvalues={id_col: stats_id},
            extra_dst_insvalues={"bucket_size": self.stats_bucket_size},
            extra_dst_keyvalues={"end_ts": end_ts},
            additive_relatives=per_slice_additive_relatives,
            src_table=table + "_current",
            copy_columns=abs_field_names,
        )
    def _upsert_with_additive_relatives_txn(
        self, txn, table, keyvalues, absolutes, additive_relatives
    ):
@@ -572,201 +478,6 @@ class StatsStore(StateDeltasStore):
                current_row.update(absolutes)
                self.db_pool.simple_update_one_txn(txn, table, keyvalues, current_row)
    def _upsert_copy_from_table_with_additive_relatives_txn(
        self,
        txn,
        into_table,
        keyvalues,
        extra_dst_keyvalues,
        extra_dst_insvalues,
        additive_relatives,
        src_table,
        copy_columns,
    ):
        """Updates the historic stats table with latest updates.
        This involves copying "absolute" fields from the `_current` table, and
        adding relative fields to any existing values.
        Args:
             txn: Transaction
             into_table (str): The destination table to UPSERT the row into
             keyvalues (dict[str, any]): Row-identifying key values
             extra_dst_keyvalues (dict[str, any]): Additional keyvalues
                for `into_table`.
             extra_dst_insvalues (dict[str, any]): Additional values to insert
                on new row creation for `into_table`.
             additive_relatives (dict[str, any]): Fields that will be added onto
                if existing row present. (Must be disjoint from copy_columns.)
             src_table (str): The source table to copy from
             copy_columns (iterable[str]): The list of columns to copy
        """
        if self.database_engine.can_native_upsert:
            ins_columns = chain(
                keyvalues,
                copy_columns,
                additive_relatives,
                extra_dst_keyvalues,
                extra_dst_insvalues,
            )
            sel_exprs = chain(
                keyvalues,
                copy_columns,
                (
                    "?"
                    for _ in chain(
                        additive_relatives, extra_dst_keyvalues, extra_dst_insvalues
                    )
                ),
            )
            keyvalues_where = ("%s = ?" % f for f in keyvalues)
            sets_cc = ("%s = EXCLUDED.%s" % (f, f) for f in copy_columns)
            sets_ar = (
                "%s = EXCLUDED.%s + %s.%s" % (f, f, into_table, f)
                for f in additive_relatives
            )
            sql = """
                INSERT INTO %(into_table)s (%(ins_columns)s)
                SELECT %(sel_exprs)s
                FROM %(src_table)s
                WHERE %(keyvalues_where)s
                ON CONFLICT (%(keyvalues)s)
                DO UPDATE SET %(sets)s
            """ % {
                "into_table": into_table,
                "ins_columns": ", ".join(ins_columns),
                "sel_exprs": ", ".join(sel_exprs),
                "keyvalues_where": " AND ".join(keyvalues_where),
                "src_table": src_table,
                "keyvalues": ", ".join(
                    chain(keyvalues.keys(), extra_dst_keyvalues.keys())
                ),
                "sets": ", ".join(chain(sets_cc, sets_ar)),
            }
            qargs = list(
                chain(
                    additive_relatives.values(),
                    extra_dst_keyvalues.values(),
                    extra_dst_insvalues.values(),
                    keyvalues.values(),
                )
            )
            txn.execute(sql, qargs)
        else:
            self.database_engine.lock_table(txn, into_table)
            src_row = self.db_pool.simple_select_one_txn(
                txn, src_table, keyvalues, copy_columns
            )
            all_dest_keyvalues = {**keyvalues, **extra_dst_keyvalues}
            dest_current_row = self.db_pool.simple_select_one_txn(
                txn,
                into_table,
                keyvalues=all_dest_keyvalues,
                retcols=list(chain(additive_relatives.keys(), copy_columns)),
                allow_none=True,
            )
            if dest_current_row is None:
                merged_dict = {
                    **keyvalues,
                    **extra_dst_keyvalues,
                    **extra_dst_insvalues,
                    **src_row,
                    **additive_relatives,
                }
                self.db_pool.simple_insert_txn(txn, into_table, merged_dict)
            else:
                for (key, val) in additive_relatives.items():
                    src_row[key] = dest_current_row[key] + val
                self.db_pool.simple_update_txn(
                    txn, into_table, all_dest_keyvalues, src_row
                )
    async def get_changes_room_total_events_and_bytes(
        self, min_pos: int, max_pos: int
    ) -> Tuple[Dict[str, Dict[str, int]], Dict[str, Dict[str, int]]]:
        """Fetches the counts of events in the given range of stream IDs.
        Args:
            min_pos
            max_pos
        Returns:
            Mapping of room ID to field changes.
        """
        return await self.db_pool.runInteraction(
            "stats_incremental_total_events_and_bytes",
            self.get_changes_room_total_events_and_bytes_txn,
            min_pos,
            max_pos,
        )
    def get_changes_room_total_events_and_bytes_txn(
        self, txn, low_pos: int, high_pos: int
    ) -> Tuple[Dict[str, Dict[str, int]], Dict[str, Dict[str, int]]]:
        """Gets the total_events and total_event_bytes counts for rooms and
        senders, in a range of stream_orderings (including backfilled events).
        Args:
            txn
            low_pos: Low stream ordering
            high_pos: High stream ordering
        Returns:
            The room and user deltas for total_events/total_event_bytes in the
            format of `stats_id` -> fields
        """
        if low_pos >= high_pos:
            # nothing to do here.
            return {}, {}
        if isinstance(self.database_engine, PostgresEngine):
            new_bytes_expression = "OCTET_LENGTH(json)"
        else:
            new_bytes_expression = "LENGTH(CAST(json AS BLOB))"
        sql = """
            SELECT events.room_id, COUNT(*) AS new_events, SUM(%s) AS new_bytes
            FROM events INNER JOIN event_json USING (event_id)
            WHERE (? < stream_ordering AND stream_ordering <= ?)
                OR (? <= stream_ordering AND stream_ordering <= ?)
            GROUP BY events.room_id
        """ % (
            new_bytes_expression,
        )
        txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos))
        room_deltas = {
            room_id: {"total_events": new_events, "total_event_bytes": new_bytes}
            for room_id, new_events, new_bytes in txn
        }
        sql = """
            SELECT events.sender, COUNT(*) AS new_events, SUM(%s) AS new_bytes
            FROM events INNER JOIN event_json USING (event_id)
            WHERE (? < stream_ordering AND stream_ordering <= ?)
                OR (? <= stream_ordering AND stream_ordering <= ?)
            GROUP BY events.sender
        """ % (
            new_bytes_expression,
        )
        txn.execute(sql, (low_pos, high_pos, -high_pos, -low_pos))
        user_deltas = {
            user_id: {"total_events": new_events, "total_event_bytes": new_bytes}
            for user_id, new_events, new_bytes in txn
            if self.hs.is_mine_id(user_id)
        }
        return room_deltas, user_deltas
    async def _calculate_and_set_initial_state_for_room(
        self, room_id: str
    ) -> Tuple[dict, dict, int]:
--- a/synapse/storage/schema/init.py
+++ b/synapse/storage/schema/init.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 SCHEMA_VERSION = 60
 SCHEMA_VERSION = 61
 """Represents the expectations made by the codebase about the database schema
 This should be incremented whenever the codebase changes its requirements on the
@@ -21,6 +21,10 @@ older versions of Synapse).
 See `README.md <synapse/storage/schema/README.md>`_  for more information on how this
 works.
 Changes in SCHEMA_VERSION = 61:
    - The `user_stats_historical` and `room_stats_historical` tables are not written and
      are not read (previously, they were written but not read).
 """
--- a/tests/handlers/test_stats.py
+++ b/tests/handlers/test_stats.py
@@ -88,16 +88,12 @@ class StatsRoomTests(unittest.HomeserverTestCase):
    def _get_current_stats(self, stats_type, stat_id):
        table, id_col = stats.TYPE_TO_TABLE[stats_type]
        cols = list(stats.ABSOLUTE_STATS_FIELDS[stats_type]) + list(
            stats.PER_SLICE_FIELDS[stats_type]
        )
        end_ts = self.store.quantise_stats_time(self.reactor.seconds() * 1000)
        cols = list(stats.ABSOLUTE_STATS_FIELDS[stats_type])
        return self.get_success(
            self.store.db_pool.simple_select_one(
                table + "_historical",
                {id_col: stat_id, end_ts: end_ts},
                table + "_current",
                {id_col: stat_id},
                cols,
                allow_none=True,
            )
@@ -156,115 +152,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        self.assertEqual(len(r), 1)
        self.assertEqual(r[0]["topic"], "foo")
    def test_initial_earliest_token(self):
        """
        Ingestion via notify_new_event will ignore tokens that the background
        update have already processed.
        """
        self.reactor.advance(86401)
        self.hs.config.stats_enabled = False
        self.handler.stats_enabled = False
        u1 = self.register_user("u1", "pass")
        u1_token = self.login("u1", "pass")
        u2 = self.register_user("u2", "pass")
        u2_token = self.login("u2", "pass")
        u3 = self.register_user("u3", "pass")
        u3_token = self.login("u3", "pass")
        room_1 = self.helper.create_room_as(u1, tok=u1_token)
        self.helper.send_state(
            room_1, event_type="m.room.topic", body={"topic": "foo"}, tok=u1_token
        )
        # Begin the ingestion by creating the temp tables. This will also store
        # the position that the deltas should begin at, once they take over.
        self.hs.config.stats_enabled = True
        self.handler.stats_enabled = True
        self.store.db_pool.updates._all_done = False
        self.get_success(
            self.store.db_pool.simple_update_one(
                table="stats_incremental_position",
                keyvalues={},
                updatevalues={"stream_id": 0},
            )
        )
        self.get_success(
            self.store.db_pool.simple_insert(
                "background_updates",
                {"update_name": "populate_stats_prepare", "progress_json": "{}"},
            )
        )
        while not self.get_success(
            self.store.db_pool.updates.has_completed_background_updates()
        ):
            self.get_success(
                self.store.db_pool.updates.do_next_background_update(100), by=0.1
            )
        # Now, before the table is actually ingested, add some more events.
        self.helper.invite(room=room_1, src=u1, targ=u2, tok=u1_token)
        self.helper.join(room=room_1, user=u2, tok=u2_token)
        # orig_delta_processor = self.store.
        # Now do the initial ingestion.
        self.get_success(
            self.store.db_pool.simple_insert(
                "background_updates",
                {"update_name": "populate_stats_process_rooms", "progress_json": "{}"},
            )
        )
        self.get_success(
            self.store.db_pool.simple_insert(
                "background_updates",
                {
                    "update_name": "populate_stats_cleanup",
                    "progress_json": "{}",
                    "depends_on": "populate_stats_process_rooms",
                },
            )
        )
        self.store.db_pool.updates._all_done = False
        while not self.get_success(
            self.store.db_pool.updates.has_completed_background_updates()
        ):
            self.get_success(
                self.store.db_pool.updates.do_next_background_update(100), by=0.1
            )
        self.reactor.advance(86401)
        # Now add some more events, triggering ingestion. Because of the stream
        # position being set to before the events sent in the middle, a simpler
        # implementation would reprocess those events, and say there were four
        # users, not three.
        self.helper.invite(room=room_1, src=u1, targ=u3, tok=u1_token)
        self.helper.join(room=room_1, user=u3, tok=u3_token)
        # self.handler.notify_new_event()
        # We need to let the delta processor advance…
        self.reactor.advance(10 * 60)
        # Get the slices! There should be two -- day 1, and day 2.
        r = self.get_success(self.store.get_statistics_for_subject("room", room_1, 0))
        self.assertEqual(len(r), 2)
        # The oldest has 2 joined members
        self.assertEqual(r[-1]["joined_members"], 2)
        # The newest has 3
        self.assertEqual(r[0]["joined_members"], 3)
    def test_create_user(self):
        """
        When we create a user, it should have statistics already ready.
@@ -296,22 +183,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        self.assertIsNotNone(r1stats)
        self.assertIsNotNone(r2stats)
        # contains the default things you'd expect in a fresh room
        self.assertEqual(
            r1stats["total_events"],
            EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM,
            "Wrong number of total_events in new room's stats!"
            " You may need to update this if more state events are added to"
            " the room creation process.",
        )
        self.assertEqual(
            r2stats["total_events"],
            EXPT_NUM_STATE_EVTS_IN_FRESH_PRIVATE_ROOM,
            "Wrong number of total_events in new room's stats!"
            " You may need to update this if more state events are added to"
            " the room creation process.",
        )
        self.assertEqual(
            r1stats["current_state_events"], EXPT_NUM_STATE_EVTS_IN_FRESH_PUBLIC_ROOM
        )
@@ -327,24 +198,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        self.assertEqual(r2stats["invited_members"], 0)
        self.assertEqual(r2stats["banned_members"], 0)
    def test_send_message_increments_total_events(self):
        """
        When we send a message, it increments total_events.
        """
        self._perform_background_initial_update()
        u1 = self.register_user("u1", "pass")
        u1token = self.login("u1", "pass")
        r1 = self.helper.create_room_as(u1, tok=u1token)
        r1stats_ante = self._get_current_stats("room", r1)
        self.helper.send(r1, "hiss", tok=u1token)
        r1stats_post = self._get_current_stats("room", r1)
        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
    def test_updating_profile_information_does_not_increase_joined_members_count(self):
        """
        Check that the joined_members count does not increase when a user changes their
@@ -378,7 +231,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
    def test_send_state_event_nonoverwriting(self):
        """
        When we send a non-overwriting state event, it increments total_events AND current_state_events
        When we send a non-overwriting state event, it increments current_state_events
        """
        self._perform_background_initial_update()
@@ -399,44 +252,14 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        r1stats_post = self._get_current_stats("room", r1)
        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
        self.assertEqual(
            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
            1,
        )
    def test_send_state_event_overwriting(self):
        """
        When we send an overwriting state event, it increments total_events ONLY
        """
        self._perform_background_initial_update()
        u1 = self.register_user("u1", "pass")
        u1token = self.login("u1", "pass")
        r1 = self.helper.create_room_as(u1, tok=u1token)
        self.helper.send_state(
            r1, "cat.hissing", {"value": True}, tok=u1token, state_key="tabby"
        )
        r1stats_ante = self._get_current_stats("room", r1)
        self.helper.send_state(
            r1, "cat.hissing", {"value": False}, tok=u1token, state_key="tabby"
        )
        r1stats_post = self._get_current_stats("room", r1)
        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
        self.assertEqual(
            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
            0,
        )
    def test_join_first_time(self):
        """
        When a user joins a room for the first time, total_events, current_state_events and
        When a user joins a room for the first time, current_state_events and
        joined_members should increase by exactly 1.
        """
@@ -455,7 +278,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        r1stats_post = self._get_current_stats("room", r1)
        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
        self.assertEqual(
            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
            1,
@@ -466,7 +288,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
    def test_join_after_leave(self):
        """
        When a user joins a room after being previously left, total_events and
        When a user joins a room after being previously left,
        joined_members should increase by exactly 1.
        current_state_events should not increase.
        left_members should decrease by exactly 1.
@@ -490,7 +312,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        r1stats_post = self._get_current_stats("room", r1)
        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
        self.assertEqual(
            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
            0,
@@ -504,7 +325,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
    def test_invited(self):
        """
        When a user invites another user, current_state_events, total_events and
        When a user invites another user, current_state_events and
        invited_members should increase by exactly 1.
        """
@@ -522,7 +343,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        r1stats_post = self._get_current_stats("room", r1)
        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
        self.assertEqual(
            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
            1,
@@ -533,7 +353,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
    def test_join_after_invite(self):
        """
        When a user joins a room after being invited, total_events and
        When a user joins a room after being invited and
        joined_members should increase by exactly 1.
        current_state_events should not increase.
        invited_members should decrease by exactly 1.
@@ -556,7 +376,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        r1stats_post = self._get_current_stats("room", r1)
        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
        self.assertEqual(
            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
            0,
@@ -570,7 +389,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
    def test_left(self):
        """
        When a user leaves a room after joining, total_events and
        When a user leaves a room after joining and
        left_members should increase by exactly 1.
        current_state_events should not increase.
        joined_members should decrease by exactly 1.
@@ -593,7 +412,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        r1stats_post = self._get_current_stats("room", r1)
        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
        self.assertEqual(
            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
            0,
@@ -607,7 +425,7 @@ class StatsRoomTests(unittest.HomeserverTestCase):
    def test_banned(self):
        """
        When a user is banned from a room after joining, total_events and
        When a user is banned from a room after joining and
        left_members should increase by exactly 1.
        current_state_events should not increase.
        banned_members should decrease by exactly 1.
@@ -630,7 +448,6 @@ class StatsRoomTests(unittest.HomeserverTestCase):
        r1stats_post = self._get_current_stats("room", r1)
        self.assertEqual(r1stats_post["total_events"] - r1stats_ante["total_events"], 1)
        self.assertEqual(
            r1stats_post["current_state_events"] - r1stats_ante["current_state_events"],
            0,
--- a/tests/rest/admin/test_room.py
+++ b/tests/rest/admin/test_room.py
@@ -1753,7 +1753,6 @@ PURGE_TABLES = [
    "room_memberships",
    "room_stats_state",
    "room_stats_current",
    "room_stats_historical",
    "room_stats_earliest_token",
    "rooms",
    "stream_ordering_to_exterm",