Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.
 
 
 
 
 
 

226 рядки
8.3 KiB

  1. # Copyright 2020 The Matrix.org Foundation C.I.C.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import logging
  15. import math
  16. import resource
  17. import sys
  18. from typing import TYPE_CHECKING, List, Mapping, Sized, Tuple
  19. from prometheus_client import Gauge
  20. from synapse.metrics.background_process_metrics import wrap_as_background_process
  21. from synapse.types import JsonDict
  22. if TYPE_CHECKING:
  23. from synapse.server import HomeServer
  24. logger = logging.getLogger("synapse.app.homeserver")
  25. # Contains the list of processes we will be monitoring
  26. # currently either 0 or 1
  27. _stats_process: List[Tuple[int, "resource.struct_rusage"]] = []
  28. # Gauges to expose monthly active user control metrics
  29. current_mau_gauge = Gauge("synapse_admin_mau_current", "Current MAU")
  30. current_mau_by_service_gauge = Gauge(
  31. "synapse_admin_mau_current_mau_by_service",
  32. "Current MAU by service",
  33. ["app_service"],
  34. )
  35. max_mau_gauge = Gauge("synapse_admin_mau_max", "MAU Limit")
  36. registered_reserved_users_mau_gauge = Gauge(
  37. "synapse_admin_mau_registered_reserved_users",
  38. "Registered users with reserved threepids",
  39. )
  40. @wrap_as_background_process("phone_stats_home")
  41. async def phone_stats_home(
  42. hs: "HomeServer",
  43. stats: JsonDict,
  44. stats_process: List[Tuple[int, "resource.struct_rusage"]] = _stats_process,
  45. ) -> None:
  46. """Collect usage statistics and send them to the configured endpoint.
  47. Args:
  48. hs: the HomeServer object to use for gathering usage data.
  49. stats: the dict in which to store the statistics sent to the configured
  50. endpoint. Mostly used in tests to figure out the data that is supposed to
  51. be sent.
  52. stats_process: statistics about resource usage of the process.
  53. """
  54. logger.info("Gathering stats for reporting")
  55. now = int(hs.get_clock().time())
  56. # Ensure the homeserver has started.
  57. assert hs.start_time is not None
  58. uptime = int(now - hs.start_time)
  59. if uptime < 0:
  60. uptime = 0
  61. #
  62. # Performance statistics. Keep this early in the function to maintain reliability of `test_performance_100` test.
  63. #
  64. old = stats_process[0]
  65. new = (now, resource.getrusage(resource.RUSAGE_SELF))
  66. stats_process[0] = new
  67. # Get RSS in bytes
  68. stats["memory_rss"] = new[1].ru_maxrss
  69. # Get CPU time in % of a single core, not % of all cores
  70. used_cpu_time = (new[1].ru_utime + new[1].ru_stime) - (
  71. old[1].ru_utime + old[1].ru_stime
  72. )
  73. if used_cpu_time == 0 or new[0] == old[0]:
  74. stats["cpu_average"] = 0
  75. else:
  76. stats["cpu_average"] = math.floor(used_cpu_time / (new[0] - old[0]) * 100)
  77. #
  78. # General statistics
  79. #
  80. store = hs.get_datastores().main
  81. common_metrics = await hs.get_common_usage_metrics_manager().get_metrics()
  82. stats["homeserver"] = hs.config.server.server_name
  83. stats["server_context"] = hs.config.server.server_context
  84. stats["timestamp"] = now
  85. stats["uptime_seconds"] = uptime
  86. version = sys.version_info
  87. stats["python_version"] = "{}.{}.{}".format(
  88. version.major, version.minor, version.micro
  89. )
  90. stats["total_users"] = await store.count_all_users()
  91. total_nonbridged_users = await store.count_nonbridged_users()
  92. stats["total_nonbridged_users"] = total_nonbridged_users
  93. daily_user_type_results = await store.count_daily_user_type()
  94. for name, count in daily_user_type_results.items():
  95. stats["daily_user_type_" + name] = count
  96. room_count = await store.get_room_count()
  97. stats["total_room_count"] = room_count
  98. stats["daily_active_users"] = common_metrics.daily_active_users
  99. stats["monthly_active_users"] = await store.count_monthly_users()
  100. daily_active_e2ee_rooms = await store.count_daily_active_e2ee_rooms()
  101. stats["daily_active_e2ee_rooms"] = daily_active_e2ee_rooms
  102. stats["daily_e2ee_messages"] = await store.count_daily_e2ee_messages()
  103. daily_sent_e2ee_messages = await store.count_daily_sent_e2ee_messages()
  104. stats["daily_sent_e2ee_messages"] = daily_sent_e2ee_messages
  105. stats["daily_active_rooms"] = await store.count_daily_active_rooms()
  106. stats["daily_messages"] = await store.count_daily_messages()
  107. daily_sent_messages = await store.count_daily_sent_messages()
  108. stats["daily_sent_messages"] = daily_sent_messages
  109. r30v2_results = await store.count_r30v2_users()
  110. for name, count in r30v2_results.items():
  111. stats["r30v2_users_" + name] = count
  112. stats["cache_factor"] = hs.config.caches.global_factor
  113. stats["event_cache_size"] = hs.config.caches.event_cache_size
  114. #
  115. # Database version
  116. #
  117. # This only reports info about the *main* database.
  118. stats["database_engine"] = store.db_pool.engine.module.__name__
  119. stats["database_server_version"] = store.db_pool.engine.server_version
  120. #
  121. # Logging configuration
  122. #
  123. synapse_logger = logging.getLogger("synapse")
  124. log_level = synapse_logger.getEffectiveLevel()
  125. stats["log_level"] = logging.getLevelName(log_level)
  126. logger.info(
  127. "Reporting stats to %s: %s" % (hs.config.metrics.report_stats_endpoint, stats)
  128. )
  129. try:
  130. await hs.get_proxied_http_client().put_json(
  131. hs.config.metrics.report_stats_endpoint, stats
  132. )
  133. except Exception as e:
  134. logger.warning("Error reporting stats: %s", e)
  135. def start_phone_stats_home(hs: "HomeServer") -> None:
  136. """
  137. Start the background tasks which report phone home stats.
  138. """
  139. clock = hs.get_clock()
  140. stats: JsonDict = {}
  141. def performance_stats_init() -> None:
  142. _stats_process.clear()
  143. _stats_process.append(
  144. (int(hs.get_clock().time()), resource.getrusage(resource.RUSAGE_SELF))
  145. )
  146. # Rather than update on per session basis, batch up the requests.
  147. # If you increase the loop period, the accuracy of user_daily_visits
  148. # table will decrease
  149. clock.looping_call(
  150. hs.get_datastores().main.generate_user_daily_visits, 5 * 60 * 1000
  151. )
  152. # monthly active user limiting functionality
  153. clock.looping_call(
  154. hs.get_datastores().main.reap_monthly_active_users, 1000 * 60 * 60
  155. )
  156. hs.get_datastores().main.reap_monthly_active_users()
  157. @wrap_as_background_process("generate_monthly_active_users")
  158. async def generate_monthly_active_users() -> None:
  159. current_mau_count = 0
  160. current_mau_count_by_service: Mapping[str, int] = {}
  161. reserved_users: Sized = ()
  162. store = hs.get_datastores().main
  163. if hs.config.server.limit_usage_by_mau or hs.config.server.mau_stats_only:
  164. current_mau_count = await store.get_monthly_active_count()
  165. current_mau_count_by_service = (
  166. await store.get_monthly_active_count_by_service()
  167. )
  168. reserved_users = await store.get_registered_reserved_users()
  169. current_mau_gauge.set(float(current_mau_count))
  170. for app_service, count in current_mau_count_by_service.items():
  171. current_mau_by_service_gauge.labels(app_service).set(float(count))
  172. registered_reserved_users_mau_gauge.set(float(len(reserved_users)))
  173. max_mau_gauge.set(float(hs.config.server.max_mau_value))
  174. if hs.config.server.limit_usage_by_mau or hs.config.server.mau_stats_only:
  175. generate_monthly_active_users()
  176. clock.looping_call(generate_monthly_active_users, 5 * 60 * 1000)
  177. # End of monthly active user settings
  178. if hs.config.metrics.report_stats:
  179. logger.info("Scheduling stats reporting for 3 hour intervals")
  180. clock.looping_call(phone_stats_home, 3 * 60 * 60 * 1000, hs, stats)
  181. # We need to defer this init for the cases that we daemonize
  182. # otherwise the process ID we get is that of the non-daemon process
  183. clock.call_later(0, performance_stats_init)
  184. # We wait 5 minutes to send the first set of stats as the server can
  185. # be quite busy the first few minutes
  186. clock.call_later(5 * 60, phone_stats_home, hs, stats)