You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

263 lines
8.1 KiB

  1. # Copyright 2015, 2016 OpenMarket Ltd
  2. # Copyright 2019, 2020 The Matrix.org Foundation C.I.C.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import collections
  16. import logging
  17. import typing
  18. from enum import Enum, auto
  19. from sys import intern
  20. from typing import Any, Callable, Dict, List, Optional, Sized, TypeVar
  21. import attr
  22. from prometheus_client import REGISTRY
  23. from prometheus_client.core import Gauge
  24. from synapse.config.cache import add_resizable_cache
  25. from synapse.util.metrics import DynamicCollectorRegistry
  26. logger = logging.getLogger(__name__)
  27. # Whether to track estimated memory usage of the LruCaches.
  28. TRACK_MEMORY_USAGE = False
  29. # We track cache metrics in a special registry that lets us update the metrics
  30. # just before they are returned from the scrape endpoint.
  31. CACHE_METRIC_REGISTRY = DynamicCollectorRegistry()
  32. caches_by_name: Dict[str, Sized] = {}
  33. cache_size = Gauge(
  34. "synapse_util_caches_cache_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
  35. )
  36. cache_hits = Gauge(
  37. "synapse_util_caches_cache_hits", "", ["name"], registry=CACHE_METRIC_REGISTRY
  38. )
  39. cache_evicted = Gauge(
  40. "synapse_util_caches_cache_evicted_size",
  41. "",
  42. ["name", "reason"],
  43. registry=CACHE_METRIC_REGISTRY,
  44. )
  45. cache_total = Gauge(
  46. "synapse_util_caches_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
  47. )
  48. cache_max_size = Gauge(
  49. "synapse_util_caches_cache_max_size", "", ["name"], registry=CACHE_METRIC_REGISTRY
  50. )
  51. cache_memory_usage = Gauge(
  52. "synapse_util_caches_cache_size_bytes",
  53. "Estimated memory usage of the caches",
  54. ["name"],
  55. registry=CACHE_METRIC_REGISTRY,
  56. )
  57. response_cache_size = Gauge(
  58. "synapse_util_caches_response_cache_size",
  59. "",
  60. ["name"],
  61. registry=CACHE_METRIC_REGISTRY,
  62. )
  63. response_cache_hits = Gauge(
  64. "synapse_util_caches_response_cache_hits",
  65. "",
  66. ["name"],
  67. registry=CACHE_METRIC_REGISTRY,
  68. )
  69. response_cache_evicted = Gauge(
  70. "synapse_util_caches_response_cache_evicted_size",
  71. "",
  72. ["name", "reason"],
  73. registry=CACHE_METRIC_REGISTRY,
  74. )
  75. response_cache_total = Gauge(
  76. "synapse_util_caches_response_cache", "", ["name"], registry=CACHE_METRIC_REGISTRY
  77. )
  78. # Register our custom cache metrics registry with the global registry
  79. REGISTRY.register(CACHE_METRIC_REGISTRY)
  80. class EvictionReason(Enum):
  81. size = auto()
  82. time = auto()
  83. invalidation = auto()
  84. @attr.s(slots=True, auto_attribs=True)
  85. class CacheMetric:
  86. _cache: Sized
  87. _cache_type: str
  88. _cache_name: str
  89. _collect_callback: Optional[Callable]
  90. hits: int = 0
  91. misses: int = 0
  92. eviction_size_by_reason: typing.Counter[EvictionReason] = attr.ib(
  93. factory=collections.Counter
  94. )
  95. memory_usage: Optional[int] = None
  96. def inc_hits(self) -> None:
  97. self.hits += 1
  98. def inc_misses(self) -> None:
  99. self.misses += 1
  100. def inc_evictions(self, reason: EvictionReason, size: int = 1) -> None:
  101. self.eviction_size_by_reason[reason] += size
  102. def inc_memory_usage(self, memory: int) -> None:
  103. if self.memory_usage is None:
  104. self.memory_usage = 0
  105. self.memory_usage += memory
  106. def dec_memory_usage(self, memory: int) -> None:
  107. assert self.memory_usage is not None
  108. self.memory_usage -= memory
  109. def clear_memory_usage(self) -> None:
  110. if self.memory_usage is not None:
  111. self.memory_usage = 0
  112. def describe(self) -> List[str]:
  113. return []
  114. def collect(self) -> None:
  115. try:
  116. if self._cache_type == "response_cache":
  117. response_cache_size.labels(self._cache_name).set(len(self._cache))
  118. response_cache_hits.labels(self._cache_name).set(self.hits)
  119. for reason in EvictionReason:
  120. response_cache_evicted.labels(self._cache_name, reason.name).set(
  121. self.eviction_size_by_reason[reason]
  122. )
  123. response_cache_total.labels(self._cache_name).set(
  124. self.hits + self.misses
  125. )
  126. else:
  127. cache_size.labels(self._cache_name).set(len(self._cache))
  128. cache_hits.labels(self._cache_name).set(self.hits)
  129. for reason in EvictionReason:
  130. cache_evicted.labels(self._cache_name, reason.name).set(
  131. self.eviction_size_by_reason[reason]
  132. )
  133. cache_total.labels(self._cache_name).set(self.hits + self.misses)
  134. max_size = getattr(self._cache, "max_size", None)
  135. if max_size:
  136. cache_max_size.labels(self._cache_name).set(max_size)
  137. if TRACK_MEMORY_USAGE:
  138. # self.memory_usage can be None if nothing has been inserted
  139. # into the cache yet.
  140. cache_memory_usage.labels(self._cache_name).set(
  141. self.memory_usage or 0
  142. )
  143. if self._collect_callback:
  144. self._collect_callback()
  145. except Exception as e:
  146. logger.warning("Error calculating metrics for %s: %s", self._cache_name, e)
  147. raise
  148. def register_cache(
  149. cache_type: str,
  150. cache_name: str,
  151. cache: Sized,
  152. collect_callback: Optional[Callable] = None,
  153. resizable: bool = True,
  154. resize_callback: Optional[Callable] = None,
  155. ) -> CacheMetric:
  156. """Register a cache object for metric collection and resizing.
  157. Args:
  158. cache_type: a string indicating the "type" of the cache. This is used
  159. only for deduplication so isn't too important provided it's constant.
  160. cache_name: name of the cache
  161. cache: cache itself, which must implement __len__(), and may optionally implement
  162. a max_size property
  163. collect_callback: If given, a function which is called during metric
  164. collection to update additional metrics.
  165. resizable: Whether this cache supports being resized, in which case either
  166. resize_callback must be provided, or the cache must support set_max_size().
  167. resize_callback: A function which can be called to resize the cache.
  168. Returns:
  169. an object which provides inc_{hits,misses,evictions} methods
  170. """
  171. if resizable:
  172. if not resize_callback:
  173. resize_callback = cache.set_cache_factor # type: ignore
  174. add_resizable_cache(cache_name, resize_callback)
  175. metric = CacheMetric(cache, cache_type, cache_name, collect_callback)
  176. metric_name = "cache_%s_%s" % (cache_type, cache_name)
  177. caches_by_name[cache_name] = cache
  178. CACHE_METRIC_REGISTRY.register_hook(metric_name, metric.collect)
  179. return metric
  180. KNOWN_KEYS = {
  181. key: key
  182. for key in (
  183. "auth_events",
  184. "content",
  185. "depth",
  186. "event_id",
  187. "hashes",
  188. "origin",
  189. "origin_server_ts",
  190. "prev_events",
  191. "room_id",
  192. "sender",
  193. "signatures",
  194. "state_key",
  195. "type",
  196. "unsigned",
  197. "user_id",
  198. )
  199. }
  200. T = TypeVar("T", Optional[str], str)
  201. def intern_string(string: T) -> T:
  202. """Takes a (potentially) unicode string and interns it if it's ascii"""
  203. if string is None:
  204. return None
  205. try:
  206. return intern(string)
  207. except UnicodeEncodeError:
  208. return string
  209. def intern_dict(dictionary: Dict[str, Any]) -> Dict[str, Any]:
  210. """Takes a dictionary and interns well known keys and their values"""
  211. return {
  212. KNOWN_KEYS.get(key, key): _intern_known_values(key, value)
  213. for key, value in dictionary.items()
  214. }
  215. def _intern_known_values(key: str, value: Any) -> Any:
  216. intern_keys = ("event_id", "room_id", "sender", "user_id", "type", "state_key")
  217. if key in intern_keys:
  218. return intern_string(value)
  219. return value