|
- # Copyright 2015-2022 The Matrix.org Foundation C.I.C.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
-
-
- import gc
- import logging
- import platform
- import time
- from typing import Iterable
-
- from prometheus_client.core import (
- REGISTRY,
- CounterMetricFamily,
- Gauge,
- GaugeMetricFamily,
- Histogram,
- Metric,
- )
-
- from twisted.internet import task
-
- from synapse.metrics._types import Collector
-
- """Prometheus metrics for garbage collection"""
-
-
- logger = logging.getLogger(__name__)
-
- # The minimum time in seconds between GCs for each generation, regardless of the current GC
- # thresholds and counts.
- MIN_TIME_BETWEEN_GCS = (1.0, 10.0, 30.0)
-
- running_on_pypy = platform.python_implementation() == "PyPy"
-
- #
- # Python GC metrics
- #
-
- gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"])
- gc_time = Histogram(
- "python_gc_time",
- "Time taken to GC (sec)",
- ["gen"],
- buckets=[
- 0.0025,
- 0.005,
- 0.01,
- 0.025,
- 0.05,
- 0.10,
- 0.25,
- 0.50,
- 1.00,
- 2.50,
- 5.00,
- 7.50,
- 15.00,
- 30.00,
- 45.00,
- 60.00,
- ],
- )
-
-
- class GCCounts(Collector):
- def collect(self) -> Iterable[Metric]:
- cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
- for n, m in enumerate(gc.get_count()):
- cm.add_metric([str(n)], m)
-
- yield cm
-
-
- def install_gc_manager() -> None:
- """Disable automatic GC, and replace it with a task that runs every 100ms
-
- This means that (a) we can limit how often GC runs; (b) we can get some metrics
- about GC activity.
-
- It does nothing on PyPy.
- """
-
- if running_on_pypy:
- return
-
- REGISTRY.register(GCCounts())
-
- gc.disable()
-
- # The time (in seconds since the epoch) of the last time we did a GC for each generation.
- _last_gc = [0.0, 0.0, 0.0]
-
- def _maybe_gc() -> None:
- # Check if we need to do a manual GC (since its been disabled), and do
- # one if necessary. Note we go in reverse order as e.g. a gen 1 GC may
- # promote an object into gen 2, and we don't want to handle the same
- # object multiple times.
- threshold = gc.get_threshold()
- counts = gc.get_count()
- end = time.time()
- for i in (2, 1, 0):
- # We check if we need to do one based on a straightforward
- # comparison between the threshold and count. We also do an extra
- # check to make sure that we don't a GC too often.
- if threshold[i] < counts[i] and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:
- if i == 0:
- logger.debug("Collecting gc %d", i)
- else:
- logger.info("Collecting gc %d", i)
-
- start = time.time()
- unreachable = gc.collect(i)
- end = time.time()
-
- _last_gc[i] = end
-
- gc_time.labels(i).observe(end - start)
- gc_unreachable.labels(i).set(unreachable)
-
- gc_task = task.LoopingCall(_maybe_gc)
- gc_task.start(0.1)
-
-
- #
- # PyPy GC / memory metrics
- #
-
-
- class PyPyGCStats(Collector):
- def collect(self) -> Iterable[Metric]:
- # @stats is a pretty-printer object with __str__() returning a nice table,
- # plus some fields that contain data from that table.
- # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB').
- stats = gc.get_stats(memory_pressure=False) # type: ignore
- # @s contains same fields as @stats, but as actual integers.
- s = stats._s # type: ignore
-
- # also note that field naming is completely braindead
- # and only vaguely correlates with the pretty-printed table.
- # >>>> gc.get_stats(False)
- # Total memory consumed:
- # GC used: 8.7MB (peak: 39.0MB) # s.total_gc_memory, s.peak_memory
- # in arenas: 3.0MB # s.total_arena_memory
- # rawmalloced: 1.7MB # s.total_rawmalloced_memory
- # nursery: 4.0MB # s.nursery_size
- # raw assembler used: 31.0kB # s.jit_backend_used
- # -----------------------------
- # Total: 8.8MB # stats.memory_used_sum
- #
- # Total memory allocated:
- # GC allocated: 38.7MB (peak: 41.1MB) # s.total_allocated_memory, s.peak_allocated_memory
- # in arenas: 30.9MB # s.peak_arena_memory
- # rawmalloced: 4.1MB # s.peak_rawmalloced_memory
- # nursery: 4.0MB # s.nursery_size
- # raw assembler allocated: 1.0MB # s.jit_backend_allocated
- # -----------------------------
- # Total: 39.7MB # stats.memory_allocated_sum
- #
- # Total time spent in GC: 0.073 # s.total_gc_time
-
- pypy_gc_time = CounterMetricFamily(
- "pypy_gc_time_seconds_total",
- "Total time spent in PyPy GC",
- labels=[],
- )
- pypy_gc_time.add_metric([], s.total_gc_time / 1000)
- yield pypy_gc_time
-
- pypy_mem = GaugeMetricFamily(
- "pypy_memory_bytes",
- "Memory tracked by PyPy allocator",
- labels=["state", "class", "kind"],
- )
- # memory used by JIT assembler
- pypy_mem.add_metric(["used", "", "jit"], s.jit_backend_used)
- pypy_mem.add_metric(["allocated", "", "jit"], s.jit_backend_allocated)
- # memory used by GCed objects
- pypy_mem.add_metric(["used", "", "arenas"], s.total_arena_memory)
- pypy_mem.add_metric(["allocated", "", "arenas"], s.peak_arena_memory)
- pypy_mem.add_metric(["used", "", "rawmalloced"], s.total_rawmalloced_memory)
- pypy_mem.add_metric(["allocated", "", "rawmalloced"], s.peak_rawmalloced_memory)
- pypy_mem.add_metric(["used", "", "nursery"], s.nursery_size)
- pypy_mem.add_metric(["allocated", "", "nursery"], s.nursery_size)
- # totals
- pypy_mem.add_metric(["used", "totals", "gc"], s.total_gc_memory)
- pypy_mem.add_metric(["allocated", "totals", "gc"], s.total_allocated_memory)
- pypy_mem.add_metric(["used", "totals", "gc_peak"], s.peak_memory)
- pypy_mem.add_metric(["allocated", "totals", "gc_peak"], s.peak_allocated_memory)
- yield pypy_mem
-
-
- if running_on_pypy:
- REGISTRY.register(PyPyGCStats())
|