瀏覽代碼

Make the media `/upload` tracing less ambiguous (#15888)

A lot of the functions have the same name in this space like `store_file`,
and we also do it multiple times for different reasons (main media repo,
other storage providers, thumbnails, etc) so it's good to differentiate
them so your head doesn't explode.

Follow-up to https://github.com/matrix-org/synapse/pull/15850

Tracing instrumentation to media `/upload` code paths to investigate https://github.com/matrix-org/synapse/issues/15841
tags/v1.88.0rc1
Eric Eastwood 10 月之前
committed by GitHub
父節點
當前提交
2328e90fbb
沒有發現已知的金鑰在資料庫的簽署中 GPG Key ID: 4AEE18F83AFDEB23
共有 3 個文件被更改,包括 62 次插入42 次删除
  1. +1
    -0
      changelog.d/15888.misc
  2. +48
    -30
      synapse/media/media_storage.py
  3. +13
    -12
      synapse/media/storage_provider.py

+ 1
- 0
changelog.d/15888.misc 查看文件

@@ -0,0 +1 @@
Add tracing to media `/upload` code paths.

+ 48
- 30
synapse/media/media_storage.py 查看文件

@@ -38,7 +38,7 @@ from twisted.protocols.basic import FileSender

from synapse.api.errors import NotFoundError
from synapse.logging.context import defer_to_thread, make_deferred_yieldable
from synapse.logging.opentracing import trace
from synapse.logging.opentracing import start_active_span, trace, trace_with_opname
from synapse.util import Clock
from synapse.util.file_consumer import BackgroundFileConsumer

@@ -77,7 +77,7 @@ class MediaStorage:
self._spam_checker_module_callbacks = hs.get_module_api_callbacks().spam_checker
self.clock = hs.get_clock()

@trace
@trace_with_opname("MediaStorage.store_file")
async def store_file(self, source: IO, file_info: FileInfo) -> str:
"""Write `source` to the on disk media store, and also any other
configured storage providers
@@ -91,18 +91,19 @@ class MediaStorage:
"""

with self.store_into_file(file_info) as (f, fname, finish_cb):
# Write to the main repository
# Write to the main media repository
await self.write_to_file(source, f)
# Write to the other storage providers
await finish_cb()

return fname

@trace
@trace_with_opname("MediaStorage.write_to_file")
async def write_to_file(self, source: IO, output: IO) -> None:
"""Asynchronously write the `source` to `output`."""
await defer_to_thread(self.reactor, _write_file_synchronously, source, output)

@trace
@trace_with_opname("MediaStorage.store_into_file")
@contextlib.contextmanager
def store_into_file(
self, file_info: FileInfo
@@ -117,9 +118,9 @@ class MediaStorage:
fname can be used to read the contents from after upload, e.g. to
generate thumbnails.

finish_cb must be called and waited on after the file has been
successfully been written to. Should not be called if there was an
error.
finish_cb must be called and waited on after the file has been successfully been
written to. Should not be called if there was an error. Checks for spam and
stores the file into the configured storage providers.

Args:
file_info: Info about the file to store
@@ -139,35 +140,48 @@ class MediaStorage:

finished_called = [False]

main_media_repo_write_trace_scope = start_active_span(
"writing to main media repo"
)
main_media_repo_write_trace_scope.__enter__()

try:
with open(fname, "wb") as f:

async def finish() -> None:
# Ensure that all writes have been flushed and close the
# file.
f.flush()
f.close()

spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
ReadableFileWrapper(self.clock, fname), file_info
)
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
logger.info("Blocking media due to spam checker")
# Note that we'll delete the stored media, due to the
# try/except below. The media also won't be stored in
# the DB.
# We currently ignore any additional field returned by
# the spam-check API.
raise SpamMediaException(errcode=spam_check[0])

for provider in self.storage_providers:
await provider.store_file(path, file_info)

finished_called[0] = True
# When someone calls finish, we assume they are done writing to the main media repo
main_media_repo_write_trace_scope.__exit__(None, None, None)

with start_active_span("writing to other storage providers"):
# Ensure that all writes have been flushed and close the
# file.
f.flush()
f.close()

spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
ReadableFileWrapper(self.clock, fname), file_info
)
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
logger.info("Blocking media due to spam checker")
# Note that we'll delete the stored media, due to the
# try/except below. The media also won't be stored in
# the DB.
# We currently ignore any additional field returned by
# the spam-check API.
raise SpamMediaException(errcode=spam_check[0])

for provider in self.storage_providers:
with start_active_span(str(provider)):
await provider.store_file(path, file_info)

finished_called[0] = True

yield f, fname, finish
except Exception as e:
try:
main_media_repo_write_trace_scope.__exit__(
type(e), None, e.__traceback__
)
os.remove(fname)
except Exception:
pass
@@ -175,7 +189,11 @@ class MediaStorage:
raise e from None

if not finished_called:
raise Exception("Finished callback not called")
exc = Exception("Finished callback not called")
main_media_repo_write_trace_scope.__exit__(
type(exc), None, exc.__traceback__
)
raise exc

async def fetch_media(self, file_info: FileInfo) -> Optional[Responder]:
"""Attempts to fetch media described by file_info from the local cache


+ 13
- 12
synapse/media/storage_provider.py 查看文件

@@ -20,7 +20,7 @@ from typing import TYPE_CHECKING, Callable, Optional

from synapse.config._base import Config
from synapse.logging.context import defer_to_thread, run_in_background
from synapse.logging.opentracing import trace
from synapse.logging.opentracing import start_active_span, trace_with_opname
from synapse.util.async_helpers import maybe_awaitable

from ._base import FileInfo, Responder
@@ -87,7 +87,7 @@ class StorageProviderWrapper(StorageProvider):
def __str__(self) -> str:
return "StorageProviderWrapper[%s]" % (self.backend,)

@trace
@trace_with_opname("StorageProviderWrapper.store_file")
async def store_file(self, path: str, file_info: FileInfo) -> None:
if not file_info.server_name and not self.store_local:
return None
@@ -116,7 +116,7 @@ class StorageProviderWrapper(StorageProvider):

run_in_background(store)

@trace
@trace_with_opname("StorageProviderWrapper.fetch")
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
if file_info.url_cache:
# Files in the URL preview cache definitely aren't stored here,
@@ -144,7 +144,7 @@ class FileStorageProviderBackend(StorageProvider):
def __str__(self) -> str:
return "FileStorageProviderBackend[%s]" % (self.base_directory,)

@trace
@trace_with_opname("FileStorageProviderBackend.store_file")
async def store_file(self, path: str, file_info: FileInfo) -> None:
"""See StorageProvider.store_file"""

@@ -156,14 +156,15 @@ class FileStorageProviderBackend(StorageProvider):

# mypy needs help inferring the type of the second parameter, which is generic
shutil_copyfile: Callable[[str, str], str] = shutil.copyfile
await defer_to_thread(
self.hs.get_reactor(),
shutil_copyfile,
primary_fname,
backup_fname,
)

@trace
with start_active_span("shutil_copyfile"):
await defer_to_thread(
self.hs.get_reactor(),
shutil_copyfile,
primary_fname,
backup_fname,
)

@trace_with_opname("FileStorageProviderBackend.fetch")
async def fetch(self, path: str, file_info: FileInfo) -> Optional[Responder]:
"""See StorageProvider.fetch"""



Loading…
取消
儲存