You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

189 lines
5.3 KiB

  1. # Copyright 2014-2016 OpenMarket Ltd
  2. # Copyright 2020 The Matrix.org Foundation C.I.C.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import heapq
  16. from itertools import islice
  17. from typing import (
  18. Callable,
  19. Collection,
  20. Dict,
  21. Generator,
  22. Iterable,
  23. Iterator,
  24. List,
  25. Mapping,
  26. Set,
  27. Sized,
  28. Tuple,
  29. TypeVar,
  30. )
  31. from typing_extensions import Protocol
  32. T = TypeVar("T")
  33. S = TypeVar("S", bound="_SelfSlice")
  34. class _SelfSlice(Sized, Protocol):
  35. """A helper protocol that matches types where taking a slice results in the
  36. same type being returned.
  37. This is more specific than `Sequence`, which allows another `Sequence` to be
  38. returned.
  39. """
  40. def __getitem__(self: S, i: slice) -> S:
  41. ...
  42. def batch_iter(iterable: Iterable[T], size: int) -> Iterator[Tuple[T, ...]]:
  43. """batch an iterable up into tuples with a maximum size
  44. Args:
  45. iterable: the iterable to slice
  46. size: the maximum batch size
  47. Returns:
  48. an iterator over the chunks
  49. """
  50. # make sure we can deal with iterables like lists too
  51. sourceiter = iter(iterable)
  52. # call islice until it returns an empty tuple
  53. return iter(lambda: tuple(islice(sourceiter, size)), ())
  54. def chunk_seq(iseq: S, maxlen: int) -> Iterator[S]:
  55. """Split the given sequence into chunks of the given size
  56. The last chunk may be shorter than the given size.
  57. If the input is empty, no chunks are returned.
  58. """
  59. return (iseq[i : i + maxlen] for i in range(0, len(iseq), maxlen))
  60. def partition(
  61. iterable: Iterable[T], predicate: Callable[[T], bool]
  62. ) -> Tuple[List[T], List[T]]:
  63. """
  64. Separate a given iterable into two lists based on the result of a predicate function.
  65. Args:
  66. iterable: the iterable to partition (separate)
  67. predicate: a function that takes an item from the iterable and returns a boolean
  68. Returns:
  69. A tuple of two lists, the first containing all items for which the predicate
  70. returned True, the second containing all items for which the predicate returned
  71. False
  72. """
  73. true_results = []
  74. false_results = []
  75. for item in iterable:
  76. if predicate(item):
  77. true_results.append(item)
  78. else:
  79. false_results.append(item)
  80. return true_results, false_results
  81. def sorted_topologically(
  82. nodes: Iterable[T],
  83. graph: Mapping[T, Collection[T]],
  84. ) -> Generator[T, None, None]:
  85. """Given a set of nodes and a graph, yield the nodes in toplogical order.
  86. For example `sorted_topologically([1, 2], {1: [2]})` will yield `2, 1`.
  87. """
  88. # This is implemented by Kahn's algorithm.
  89. degree_map = {node: 0 for node in nodes}
  90. reverse_graph: Dict[T, Set[T]] = {}
  91. for node, edges in graph.items():
  92. if node not in degree_map:
  93. continue
  94. for edge in set(edges):
  95. if edge in degree_map:
  96. degree_map[node] += 1
  97. reverse_graph.setdefault(edge, set()).add(node)
  98. reverse_graph.setdefault(node, set())
  99. zero_degree = [node for node, degree in degree_map.items() if degree == 0]
  100. heapq.heapify(zero_degree)
  101. while zero_degree:
  102. node = heapq.heappop(zero_degree)
  103. yield node
  104. for edge in reverse_graph.get(node, []):
  105. if edge in degree_map:
  106. degree_map[edge] -= 1
  107. if degree_map[edge] == 0:
  108. heapq.heappush(zero_degree, edge)
  109. def sorted_topologically_batched(
  110. nodes: Iterable[T],
  111. graph: Mapping[T, Collection[T]],
  112. ) -> Generator[Collection[T], None, None]:
  113. r"""Walk the graph topologically, returning batches of nodes where all nodes
  114. that references it have been previously returned.
  115. For example, given the following graph:
  116. A
  117. / \
  118. B C
  119. \ /
  120. D
  121. This function will return: `[[A], [B, C], [D]]`.
  122. This function is useful for e.g. batch persisting events in an auth chain,
  123. where we can only persist an event if all its auth events have already been
  124. persisted.
  125. """
  126. degree_map = {node: 0 for node in nodes}
  127. reverse_graph: Dict[T, Set[T]] = {}
  128. for node, edges in graph.items():
  129. if node not in degree_map:
  130. continue
  131. for edge in set(edges):
  132. if edge in degree_map:
  133. degree_map[node] += 1
  134. reverse_graph.setdefault(edge, set()).add(node)
  135. reverse_graph.setdefault(node, set())
  136. zero_degree = [node for node, degree in degree_map.items() if degree == 0]
  137. while zero_degree:
  138. new_zero_degree = []
  139. for node in zero_degree:
  140. for edge in reverse_graph.get(node, []):
  141. if edge in degree_map:
  142. degree_map[edge] -= 1
  143. if degree_map[edge] == 0:
  144. new_zero_degree.append(edge)
  145. yield zero_degree
  146. zero_degree = new_zero_degree