From 02cdace707a129823c37d650b13b67fb3a7d7e24 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 10 May 2022 07:43:34 -0400 Subject: [PATCH 001/181] Add class-diagrams and notes for push. (#12676) --- changelog.d/12676.misc | 1 + synapse/push/__init__.py | 79 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 changelog.d/12676.misc diff --git a/changelog.d/12676.misc b/changelog.d/12676.misc new file mode 100644 index 000000000000..26490af00dee --- /dev/null +++ b/changelog.d/12676.misc @@ -0,0 +1 @@ +Improve documentation of the `synapse.push` module. diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index a1b771109848..d1dfb406d43a 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -12,6 +12,85 @@ # See the License for the specific language governing permissions and # limitations under the License. +""" +This module implements the push rules & notifications portion of the Matrix +specification. + +There's a few related features: + +* Push notifications (i.e. email or outgoing requests to a Push Gateway). +* Calculation of unread notifications (for /sync and /notifications). + +When Synapse receives a new event (locally, via the Client-Server API, or via +federation), the following occurs: + +1. The push rules get evaluated to generate a set of per-user actions. +2. The event is persisted into the database. +3. (In the background) The notifier is notified about the new event. + +The per-user actions are initially stored in the event_push_actions_staging table, +before getting moved into the event_push_actions table when the event is persisted. +The event_push_actions table is periodically summarised into the event_push_summary +and event_push_summary_stream_ordering tables. + +Since push actions block an event from being persisted the generation of push +actions is performance sensitive. + +The general interaction of the classes are: + + +---------------------------------------------+ + | FederationEventHandler/EventCreationHandler | + +---------------------------------------------+ + | + v + +-----------------+ + | ActionGenerator | + +-----------------+ + | + v + +-----------------------+ +---------------------------+ + | BulkPushRuleEvaluator |---->| PushRuleEvaluatorForEvent | + +-----------------------+ +---------------------------+ + | + v + +-----------------------------+ + | EventPushActionsWorkerStore | + +-----------------------------+ + +The notifier notifies the pusher pool of the new event, which checks for affected +users. Each user-configured pusher of the affected users then performs the +previously calculated action. + +The general interaction of the classes are: + + +----------+ + | Notifier | + +----------+ + | + v + +------------+ +--------------+ + | PusherPool |---->| PusherConfig | + +------------+ +--------------+ + | + | +---------------+ + +<--->| PusherFactory | + | +---------------+ + v + +------------------------+ +-----------------------------------------------+ + | EmailPusher/HttpPusher |---->| EventPushActionsWorkerStore/PusherWorkerStore | + +------------------------+ +-----------------------------------------------+ + | + v + +-------------------------+ + | Mailer/SimpleHttpClient | + +-------------------------+ + +The Pusher instance also calls out to various utilities for generating payloads +(or email templates), but those interactions are not detailed in this diagram +(and are specific to the type of pusher). + +""" + import abc from typing import TYPE_CHECKING, Any, Dict, Optional From b44fbdffa44cee752853ee16ad5604ec67667f92 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 10 May 2022 07:54:30 -0400 Subject: [PATCH 002/181] Move free functions into PushRuleEvaluatorForEvent. (#12677) * Move `_condition_checker` into `PushRuleEvaluatorForEvent`. * Move the condition cache into `PushRuleEvaluatorForEvent`. * Improve docstrings. * Inline a method which is only called once. --- changelog.d/12677.misc | 1 + synapse/push/bulk_push_rule_evaluator.py | 32 +---------- synapse/push/push_rule_evaluator.py | 70 ++++++++++++++++++++++-- 3 files changed, 69 insertions(+), 34 deletions(-) create mode 100644 changelog.d/12677.misc diff --git a/changelog.d/12677.misc b/changelog.d/12677.misc new file mode 100644 index 000000000000..eed12e69e9ba --- /dev/null +++ b/changelog.d/12677.misc @@ -0,0 +1 @@ +Refactor functions to on `PushRuleEvaluatorForEvent`. diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index b07cf2eee705..85ddb56c6eb4 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -208,8 +208,6 @@ async def action_for_event_by_user( event, len(room_members), sender_power_level, power_levels ) - condition_cache: Dict[str, bool] = {} - # If the event is not a state event check if any users ignore the sender. if not event.is_state(): ignorers = await self.store.ignored_by(event.sender) @@ -247,8 +245,8 @@ async def action_for_event_by_user( if "enabled" in rule and not rule["enabled"]: continue - matches = _condition_checker( - evaluator, rule["conditions"], uid, display_name, condition_cache + matches = evaluator.check_conditions( + rule["conditions"], uid, display_name ) if matches: actions = [x for x in rule["actions"] if x != "dont_notify"] @@ -267,32 +265,6 @@ async def action_for_event_by_user( ) -def _condition_checker( - evaluator: PushRuleEvaluatorForEvent, - conditions: List[dict], - uid: str, - display_name: Optional[str], - cache: Dict[str, bool], -) -> bool: - for cond in conditions: - _cache_key = cond.get("_cache_key", None) - if _cache_key: - res = cache.get(_cache_key, None) - if res is False: - return False - elif res is True: - continue - - res = evaluator.matches(cond, uid, display_name) - if _cache_key: - cache[_cache_key] = bool(res) - - if not res: - return False - - return True - - MemberMap = Dict[str, Optional[EventIdMembership]] Rule = Dict[str, dict] RulesByUser = Dict[str, List[Rule]] diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index f617c759e6cf..54db6b5612a3 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -129,9 +129,55 @@ def __init__( # Maps strings of e.g. 'content.body' -> event["content"]["body"] self._value_cache = _flatten_dict(event) + # Maps cache keys to final values. + self._condition_cache: Dict[str, bool] = {} + + def check_conditions( + self, conditions: List[dict], uid: str, display_name: Optional[str] + ) -> bool: + """ + Returns true if a user's conditions/user ID/display name match the event. + + Args: + conditions: The user's conditions to match. + uid: The user's MXID. + display_name: The display name. + + Returns: + True if all conditions match the event, False otherwise. + """ + for cond in conditions: + _cache_key = cond.get("_cache_key", None) + if _cache_key: + res = self._condition_cache.get(_cache_key, None) + if res is False: + return False + elif res is True: + continue + + res = self.matches(cond, uid, display_name) + if _cache_key: + self._condition_cache[_cache_key] = bool(res) + + if not res: + return False + + return True + def matches( self, condition: Dict[str, Any], user_id: str, display_name: Optional[str] ) -> bool: + """ + Returns true if a user's condition/user ID/display name match the event. + + Args: + condition: The user's condition to match. + uid: The user's MXID. + display_name: The display name, or None if there is not one. + + Returns: + True if the condition matches the event, False otherwise. + """ if condition["kind"] == "event_match": return self._event_match(condition, user_id) elif condition["kind"] == "contains_display_name": @@ -146,6 +192,16 @@ def matches( return True def _event_match(self, condition: dict, user_id: str) -> bool: + """ + Check an "event_match" push rule condition. + + Args: + condition: The "event_match" push rule condition to match. + user_id: The user's MXID. + + Returns: + True if the condition matches the event, False otherwise. + """ pattern = condition.get("pattern", None) if not pattern: @@ -167,13 +223,22 @@ def _event_match(self, condition: dict, user_id: str) -> bool: return _glob_matches(pattern, body, word_boundary=True) else: - haystack = self._get_value(condition["key"]) + haystack = self._value_cache.get(condition["key"], None) if haystack is None: return False return _glob_matches(pattern, haystack) def _contains_display_name(self, display_name: Optional[str]) -> bool: + """ + Check an "event_match" push rule condition. + + Args: + display_name: The display name, or None if there is not one. + + Returns: + True if the display name is found in the event body, False otherwise. + """ if not display_name: return False @@ -191,9 +256,6 @@ def _contains_display_name(self, display_name: Optional[str]) -> bool: return bool(r.search(body)) - def _get_value(self, dotted_key: str) -> Optional[str]: - return self._value_cache.get(dotted_key, None) - # Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches regex_cache: LruCache[Tuple[str, bool, bool], Pattern] = LruCache( From 5c00151c28367cb091c408d02b275e7859bd4ace Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 10 May 2022 14:05:22 +0100 Subject: [PATCH 003/181] Add `@cancellable` decorator, for use on request handlers (#12586) Signed-off-by: Sean Quah --- changelog.d/12586.misc | 1 + synapse/http/server.py | 61 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 changelog.d/12586.misc diff --git a/changelog.d/12586.misc b/changelog.d/12586.misc new file mode 100644 index 000000000000..d26e332305ce --- /dev/null +++ b/changelog.d/12586.misc @@ -0,0 +1 @@ +Add `@cancellable` decorator, for use on endpoint methods that can be cancelled when clients disconnect. diff --git a/synapse/http/server.py b/synapse/http/server.py index 657bffcddd88..8c96f2196eda 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -33,6 +33,7 @@ Optional, Pattern, Tuple, + TypeVar, Union, ) @@ -92,6 +93,66 @@ HTTP_STATUS_REQUEST_CANCELLED = 499 +F = TypeVar("F", bound=Callable[..., Any]) + + +_cancellable_method_names = frozenset( + { + # `RestServlet`, `BaseFederationServlet` and `BaseFederationServerServlet` + # methods + "on_GET", + "on_PUT", + "on_POST", + "on_DELETE", + # `_AsyncResource`, `DirectServeHtmlResource` and `DirectServeJsonResource` + # methods + "_async_render_GET", + "_async_render_PUT", + "_async_render_POST", + "_async_render_DELETE", + "_async_render_OPTIONS", + # `ReplicationEndpoint` methods + "_handle_request", + } +) + + +def cancellable(method: F) -> F: + """Marks a servlet method as cancellable. + + Methods with this decorator will be cancelled if the client disconnects before we + finish processing the request. + + During cancellation, `Deferred.cancel()` will be invoked on the `Deferred` wrapping + the method. The `cancel()` call will propagate down to the `Deferred` that is + currently being waited on. That `Deferred` will raise a `CancelledError`, which will + propagate up, as per normal exception handling. + + Before applying this decorator to a new endpoint, you MUST recursively check + that all `await`s in the function are on `async` functions or `Deferred`s that + handle cancellation cleanly, otherwise a variety of bugs may occur, ranging from + premature logging context closure, to stuck requests, to database corruption. + + Usage: + class SomeServlet(RestServlet): + @cancellable + async def on_GET(self, request: SynapseRequest) -> ...: + ... + """ + if method.__name__ not in _cancellable_method_names: + raise ValueError( + "@cancellable decorator can only be applied to servlet methods." + ) + + method.cancellable = True # type: ignore[attr-defined] + return method + + +def is_method_cancellable(method: Callable[..., Any]) -> bool: + """Checks whether a servlet method has the `@cancellable` flag.""" + return getattr(method, "cancellable", False) + + def return_json_error(f: failure.Failure, request: SynapseRequest) -> None: """Sends a JSON error response to clients.""" From 5cfb0045955f8b5e9e8a1e0505fa2b5ed4f7bde2 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 10 May 2022 14:06:08 +0100 Subject: [PATCH 004/181] Add ability to cancel disconnected requests to `SynapseRequest` (#12588) Signed-off-by: Sean Quah --- changelog.d/12588.misc | 1 + synapse/http/site.py | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12588.misc diff --git a/changelog.d/12588.misc b/changelog.d/12588.misc new file mode 100644 index 000000000000..f62d5c8e210c --- /dev/null +++ b/changelog.d/12588.misc @@ -0,0 +1 @@ +Add ability to cancel disconnected requests to `SynapseRequest`. diff --git a/synapse/http/site.py b/synapse/http/site.py index 0b85a57d7787..f7f1c5704285 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -19,6 +19,7 @@ import attr from zope.interface import implementer +from twisted.internet.defer import Deferred from twisted.internet.interfaces import IAddress, IReactorTime from twisted.python.failure import Failure from twisted.web.http import HTTPChannel @@ -91,6 +92,13 @@ def __init__( # we can't yet create the logcontext, as we don't know the method. self.logcontext: Optional[LoggingContext] = None + # The `Deferred` to cancel if the client disconnects early. Expected to be set + # by `Resource.render`. + self.render_deferred: Optional["Deferred[None]"] = None + # A boolean indicating whether `_render_deferred` should be cancelled if the + # client disconnects early. Expected to be set during `Resource.render`. + self.is_render_cancellable = False + global _next_request_seq self.request_seq = _next_request_seq _next_request_seq += 1 @@ -357,7 +365,21 @@ def connectionLost(self, reason: Union[Failure, Exception]) -> None: {"event": "client connection lost", "reason": str(reason.value)} ) - if not self._is_processing: + if self._is_processing: + if self.is_render_cancellable: + if self.render_deferred is not None: + # Throw a cancellation into the request processing, in the hope + # that it will finish up sooner than it normally would. + # The `self.processing()` context manager will call + # `_finished_processing()` when done. + with PreserveLoggingContext(): + self.render_deferred.cancel() + else: + logger.error( + "Connection from client lost, but have no Deferred to " + "cancel even though the request is marked as cancellable." + ) + else: self._finished_processing() def _started_processing(self, servlet_name: str) -> None: From dbb12a0b547914024316b6eb510069e900680e42 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 10 May 2022 14:06:56 +0100 Subject: [PATCH 005/181] Add helper class for testing request cancellation (#12630) Also expose the `SynapseRequest` from `FakeChannel` in tests, so that we can call `Request.connectionLost` to simulate a client disconnecting. Signed-off-by: Sean Quah --- changelog.d/12630.misc | 1 + tests/http/server/__init__.py | 13 +++++ tests/http/server/_base.py | 100 ++++++++++++++++++++++++++++++++++ tests/server.py | 13 +++++ 4 files changed, 127 insertions(+) create mode 100644 changelog.d/12630.misc create mode 100644 tests/http/server/__init__.py create mode 100644 tests/http/server/_base.py diff --git a/changelog.d/12630.misc b/changelog.d/12630.misc new file mode 100644 index 000000000000..43e12603e2d8 --- /dev/null +++ b/changelog.d/12630.misc @@ -0,0 +1 @@ +Add a helper class for testing request cancellation. diff --git a/tests/http/server/__init__.py b/tests/http/server/__init__.py new file mode 100644 index 000000000000..3a5f22c02235 --- /dev/null +++ b/tests/http/server/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/http/server/_base.py b/tests/http/server/_base.py new file mode 100644 index 000000000000..b9f1a381aa2b --- /dev/null +++ b/tests/http/server/_base.py @@ -0,0 +1,100 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unles4s required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from http import HTTPStatus +from typing import Any, Callable, Optional, Union +from unittest import mock + +from twisted.internet.error import ConnectionDone + +from synapse.http.server import ( + HTTP_STATUS_REQUEST_CANCELLED, + respond_with_html_bytes, + respond_with_json, +) +from synapse.types import JsonDict + +from tests import unittest +from tests.server import FakeChannel, ThreadedMemoryReactorClock + + +class EndpointCancellationTestHelperMixin(unittest.TestCase): + """Provides helper methods for testing cancellation of endpoints.""" + + def _test_disconnect( + self, + reactor: ThreadedMemoryReactorClock, + channel: FakeChannel, + expect_cancellation: bool, + expected_body: Union[bytes, JsonDict], + expected_code: Optional[int] = None, + ) -> None: + """Disconnects an in-flight request and checks the response. + + Args: + reactor: The twisted reactor running the request handler. + channel: The `FakeChannel` for the request. + expect_cancellation: `True` if request processing is expected to be + cancelled, `False` if the request should run to completion. + expected_body: The expected response for the request. + expected_code: The expected status code for the request. Defaults to `200` + or `499` depending on `expect_cancellation`. + """ + # Determine the expected status code. + if expected_code is None: + if expect_cancellation: + expected_code = HTTP_STATUS_REQUEST_CANCELLED + else: + expected_code = HTTPStatus.OK + + request = channel.request + self.assertFalse( + channel.is_finished(), + "Request finished before we could disconnect - " + "was `await_result=False` passed to `make_request`?", + ) + + # We're about to disconnect the request. This also disconnects the channel, so + # we have to rely on mocks to extract the response. + respond_method: Callable[..., Any] + if isinstance(expected_body, bytes): + respond_method = respond_with_html_bytes + else: + respond_method = respond_with_json + + with mock.patch( + f"synapse.http.server.{respond_method.__name__}", wraps=respond_method + ) as respond_mock: + # Disconnect the request. + request.connectionLost(reason=ConnectionDone()) + + if expect_cancellation: + # An immediate cancellation is expected. + respond_mock.assert_called_once() + args, _kwargs = respond_mock.call_args + code, body = args[1], args[2] + self.assertEqual(code, expected_code) + self.assertEqual(request.code, expected_code) + self.assertEqual(body, expected_body) + else: + respond_mock.assert_not_called() + + # The handler is expected to run to completion. + reactor.pump([1.0]) + respond_mock.assert_called_once() + args, _kwargs = respond_mock.call_args + code, body = args[1], args[2] + self.assertEqual(code, expected_code) + self.assertEqual(request.code, expected_code) + self.assertEqual(body, expected_body) diff --git a/tests/server.py b/tests/server.py index 8f30e250c83c..aaefcfc46cd9 100644 --- a/tests/server.py +++ b/tests/server.py @@ -109,6 +109,17 @@ class FakeChannel: _ip: str = "127.0.0.1" _producer: Optional[Union[IPullProducer, IPushProducer]] = None resource_usage: Optional[ContextResourceUsage] = None + _request: Optional[Request] = None + + @property + def request(self) -> Request: + assert self._request is not None + return self._request + + @request.setter + def request(self, request: Request) -> None: + assert self._request is None + self._request = request @property def json_body(self): @@ -322,6 +333,8 @@ def make_request( channel = FakeChannel(site, reactor, ip=client_ip) req = request(channel, site) + channel.request = req + req.content = BytesIO(content) # Twisted expects to be at the end of the content when parsing the request. req.content.seek(0, SEEK_END) From 147f098fb4ac7ae435bae7d29c05f93b43472854 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 10 May 2022 15:35:08 +0100 Subject: [PATCH 006/181] Stop writing to `event_reference_hashes` (#12679) This table is never read, since #11794. We stop writing to it; in future we can drop it altogether. --- changelog.d/12679.misc | 1 + synapse/storage/databases/main/events.py | 25 ------------------- .../storage/databases/main/purge_events.py | 3 --- synapse/storage/schema/__init__.py | 5 +++- tests/storage/test_event_federation.py | 9 ------- 5 files changed, 5 insertions(+), 38 deletions(-) create mode 100644 changelog.d/12679.misc diff --git a/changelog.d/12679.misc b/changelog.d/12679.misc new file mode 100644 index 000000000000..6df1116b49ee --- /dev/null +++ b/changelog.d/12679.misc @@ -0,0 +1 @@ +Preparation for database schema simplifications: stop writing to `event_reference_hashes`. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ed29a0a5e2db..ad611b2c0bb2 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -36,7 +36,6 @@ import synapse.metrics from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.room_versions import RoomVersions -from synapse.crypto.event_signing import compute_event_reference_hash from synapse.events import EventBase # noqa: F401 from synapse.events.snapshot import EventContext # noqa: F401 from synapse.storage._base import db_to_json, make_in_list_sql_clause @@ -1600,11 +1599,6 @@ def _update_metadata_tables_txn( inhibit_local_membership_updates=inhibit_local_membership_updates, ) - # Insert event_reference_hashes table. - self._store_event_reference_hashes_txn( - txn, [event for event, _ in events_and_contexts] - ) - # Prefill the event cache self._add_to_cache(txn, events_and_contexts) @@ -1704,25 +1698,6 @@ def _insert_event_expiry_txn(self, txn, event_id, expiry_ts): values={"event_id": event_id, "expiry_ts": expiry_ts}, ) - def _store_event_reference_hashes_txn(self, txn, events): - """Store a hash for a PDU - Args: - txn (cursor): - events (list): list of Events. - """ - - vals = [] - for event in events: - ref_alg, ref_hash_bytes = compute_event_reference_hash(event) - vals.append((event.event_id, ref_alg, memoryview(ref_hash_bytes))) - - self.db_pool.simple_insert_many_txn( - txn, - table="event_reference_hashes", - keys=("event_id", "algorithm", "hash"), - values=vals, - ) - def _store_room_members_txn( self, txn, events, *, inhibit_local_membership_updates: bool = False ): diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index bfc85b3add98..38ba91af4c47 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -69,7 +69,6 @@ def _purge_history_txn( # event_forward_extremities # event_json # event_push_actions - # event_reference_hashes # event_relations # event_search # event_to_state_groups @@ -220,7 +219,6 @@ def _purge_history_txn( "event_auth", "event_edges", "event_forward_extremities", - "event_reference_hashes", "event_relations", "event_search", "rejections", @@ -369,7 +367,6 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: "event_edges", "event_json", "event_push_actions_staging", - "event_reference_hashes", "event_relations", "event_to_state_groups", "event_auth_chains", diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 871d4ace123c..20c344faeab3 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 69 # remember to update the list below when updating +SCHEMA_VERSION = 70 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the @@ -62,6 +62,9 @@ Changes in SCHEMA_VERSION = 69: - We now write to `device_lists_changes_in_room` table. - Use sequence to generate future `application_services_txns.txn_id`s + +Changes in SCHEMA_VERSION = 70: + - event_reference_hashes is no longer written to. """ diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 645d564d1c40..d92a9ac5b798 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -58,15 +58,6 @@ def insert_event(txn, i): (room_id, event_id), ) - txn.execute( - ( - "INSERT INTO event_reference_hashes " - "(event_id, algorithm, hash) " - "VALUES (?, 'sha256', ?)" - ), - (event_id, bytearray(b"ffff")), - ) - for i in range(0, 20): self.get_success( self.store.db_pool.runInteraction("insert", insert_event, i) From 989fa3309655e2ebd5416f4b09a98edfb1b2caa8 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 10 May 2022 20:07:48 +0200 Subject: [PATCH 007/181] Add some type hints to datastore. (#12477) --- changelog.d/12477.misc | 1 + synapse/events/snapshot.py | 3 +- synapse/storage/databases/main/events.py | 156 ++++++++++++++--------- synapse/storage/databases/main/search.py | 33 +++-- 4 files changed, 122 insertions(+), 71 deletions(-) create mode 100644 changelog.d/12477.misc diff --git a/changelog.d/12477.misc b/changelog.d/12477.misc new file mode 100644 index 000000000000..e793d08e5e3f --- /dev/null +++ b/changelog.d/12477.misc @@ -0,0 +1 @@ +Add some type hints to datastore. \ No newline at end of file diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 46042b2bf7af..8120c305df14 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -15,6 +15,7 @@ import attr from frozendict import frozendict +from typing_extensions import Literal from twisted.internet.defer import Deferred @@ -106,7 +107,7 @@ class EventContext: incomplete state. """ - rejected: Union[bool, str] = False + rejected: Union[Literal[False], str] = False _state_group: Optional[int] = None state_group_before_event: Optional[int] = None prev_group: Optional[int] = None diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ad611b2c0bb2..6c12653bb3c6 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -49,7 +49,7 @@ from synapse.storage.engines.postgres import PostgresEngine from synapse.storage.util.id_generators import AbstractStreamIdGenerator from synapse.storage.util.sequence import SequenceGenerator -from synapse.types import StateMap, get_domain_from_id +from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically @@ -235,7 +235,9 @@ async def _get_events_which_are_prevs(self, event_ids: Iterable[str]) -> List[st """ results: List[str] = [] - def _get_events_which_are_prevs_txn(txn, batch): + def _get_events_which_are_prevs_txn( + txn: LoggingTransaction, batch: Collection[str] + ) -> None: sql = """ SELECT prev_event_id, internal_metadata FROM event_edges @@ -285,7 +287,9 @@ async def _get_prevs_before_rejected(self, event_ids: Iterable[str]) -> Set[str] # and their prev events. existing_prevs = set() - def _get_prevs_before_rejected_txn(txn, batch): + def _get_prevs_before_rejected_txn( + txn: LoggingTransaction, batch: Collection[str] + ) -> None: to_recursively_check = batch while to_recursively_check: @@ -515,7 +519,7 @@ def _persist_event_auth_chain_txn( @classmethod def _add_chain_cover_index( cls, - txn, + txn: LoggingTransaction, db_pool: DatabasePool, event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], @@ -809,7 +813,7 @@ def _add_chain_cover_index( @staticmethod def _allocate_chain_ids( - txn, + txn: LoggingTransaction, db_pool: DatabasePool, event_chain_id_gen: SequenceGenerator, event_to_room_id: Dict[str, str], @@ -943,7 +947,7 @@ def _persist_transaction_ids_txn( self, txn: LoggingTransaction, events_and_contexts: List[Tuple[EventBase, EventContext]], - ): + ) -> None: """Persist the mapping from transaction IDs to event IDs (if defined).""" to_insert = [] @@ -997,7 +1001,7 @@ def _update_current_state_txn( txn: LoggingTransaction, state_delta_by_room: Dict[str, DeltaState], stream_id: int, - ): + ) -> None: for room_id, delta_state in state_delta_by_room.items(): to_delete = delta_state.to_delete to_insert = delta_state.to_insert @@ -1155,7 +1159,7 @@ def _update_current_state_txn( txn, room_id, members_changed ) - def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str): + def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str) -> None: """Update the room version in the database based off current state events. @@ -1189,7 +1193,7 @@ def _update_forward_extremities_txn( txn: LoggingTransaction, new_forward_extremities: Dict[str, Set[str]], max_stream_order: int, - ): + ) -> None: for room_id in new_forward_extremities.keys(): self.db_pool.simple_delete_txn( txn, table="event_forward_extremities", keyvalues={"room_id": room_id} @@ -1254,9 +1258,9 @@ def _filter_events_and_contexts_for_duplicates( def _update_room_depths_txn( self, - txn, + txn: LoggingTransaction, events_and_contexts: List[Tuple[EventBase, EventContext]], - ): + ) -> None: """Update min_depth for each room Args: @@ -1385,7 +1389,7 @@ def _store_event_txn( # nothing to do here return - def event_dict(event): + def event_dict(event: EventBase) -> JsonDict: d = event.get_dict() d.pop("redacted", None) d.pop("redacted_because", None) @@ -1476,18 +1480,20 @@ def event_dict(event): ), ) - def _store_rejected_events_txn(self, txn, events_and_contexts): + def _store_rejected_events_txn( + self, + txn: LoggingTransaction, + events_and_contexts: List[Tuple[EventBase, EventContext]], + ) -> List[Tuple[EventBase, EventContext]]: """Add rows to the 'rejections' table for received events which were rejected Args: - txn (twisted.enterprise.adbapi.Connection): db connection - events_and_contexts (list[(EventBase, EventContext)]): events - we are persisting + txn: db connection + events_and_contexts: events we are persisting Returns: - list[(EventBase, EventContext)] new list, without the rejected - events. + new list, without the rejected events. """ # Remove the rejected events from the list now that we've added them # to the events table and the events_json table. @@ -1508,7 +1514,7 @@ def _update_metadata_tables_txn( events_and_contexts: List[Tuple[EventBase, EventContext]], all_events_and_contexts: List[Tuple[EventBase, EventContext]], inhibit_local_membership_updates: bool = False, - ): + ) -> None: """Update all the miscellaneous tables for new events Args: @@ -1602,7 +1608,11 @@ def _update_metadata_tables_txn( # Prefill the event cache self._add_to_cache(txn, events_and_contexts) - def _add_to_cache(self, txn, events_and_contexts): + def _add_to_cache( + self, + txn: LoggingTransaction, + events_and_contexts: List[Tuple[EventBase, EventContext]], + ) -> None: to_prefill = [] rows = [] @@ -1633,7 +1643,7 @@ def _add_to_cache(self, txn, events_and_contexts): if not row["rejects"] and not row["redacts"]: to_prefill.append(EventCacheEntry(event=event, redacted_event=None)) - def prefill(): + def prefill() -> None: for cache_entry in to_prefill: self.store._get_event_cache.set( (cache_entry.event.event_id,), cache_entry @@ -1663,19 +1673,24 @@ def _store_redaction(self, txn: LoggingTransaction, event: EventBase) -> None: ) def insert_labels_for_event_txn( - self, txn, event_id, labels, room_id, topological_ordering - ): + self, + txn: LoggingTransaction, + event_id: str, + labels: List[str], + room_id: str, + topological_ordering: int, + ) -> None: """Store the mapping between an event's ID and its labels, with one row per (event_id, label) tuple. Args: - txn (LoggingTransaction): The transaction to execute. - event_id (str): The event's ID. - labels (list[str]): A list of text labels. - room_id (str): The ID of the room the event was sent to. - topological_ordering (int): The position of the event in the room's topology. + txn: The transaction to execute. + event_id: The event's ID. + labels: A list of text labels. + room_id: The ID of the room the event was sent to. + topological_ordering: The position of the event in the room's topology. """ - return self.db_pool.simple_insert_many_txn( + self.db_pool.simple_insert_many_txn( txn=txn, table="event_labels", keys=("event_id", "label", "room_id", "topological_ordering"), @@ -1684,25 +1699,32 @@ def insert_labels_for_event_txn( ], ) - def _insert_event_expiry_txn(self, txn, event_id, expiry_ts): + def _insert_event_expiry_txn( + self, txn: LoggingTransaction, event_id: str, expiry_ts: int + ) -> None: """Save the expiry timestamp associated with a given event ID. Args: - txn (LoggingTransaction): The database transaction to use. - event_id (str): The event ID the expiry timestamp is associated with. - expiry_ts (int): The timestamp at which to expire (delete) the event. + txn: The database transaction to use. + event_id: The event ID the expiry timestamp is associated with. + expiry_ts: The timestamp at which to expire (delete) the event. """ - return self.db_pool.simple_insert_txn( + self.db_pool.simple_insert_txn( txn=txn, table="event_expiry", values={"event_id": event_id, "expiry_ts": expiry_ts}, ) def _store_room_members_txn( - self, txn, events, *, inhibit_local_membership_updates: bool = False - ): + self, + txn: LoggingTransaction, + events: List[EventBase], + *, + inhibit_local_membership_updates: bool = False, + ) -> None: """ Store a room member in the database. + Args: txn: The transaction to use. events: List of events to store. @@ -1742,6 +1764,7 @@ def non_null_str_or_none(val: Any) -> Optional[str]: ) for event in events: + assert event.internal_metadata.stream_ordering is not None txn.call_after( self.store._membership_stream_cache.entity_has_changed, event.state_key, @@ -1838,7 +1861,9 @@ def _handle_event_relations( (parent_id, event.sender), ) - def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): + def _handle_insertion_event( + self, txn: LoggingTransaction, event: EventBase + ) -> None: """Handles keeping track of insertion events and edges/connections. Part of MSC2716. @@ -1899,7 +1924,7 @@ def _handle_insertion_event(self, txn: LoggingTransaction, event: EventBase): }, ) - def _handle_batch_event(self, txn: LoggingTransaction, event: EventBase): + def _handle_batch_event(self, txn: LoggingTransaction, event: EventBase) -> None: """Handles inserting the batch edges/connections between the batch event and an insertion event. Part of MSC2716. @@ -1999,25 +2024,29 @@ def _handle_redact_relations( txn, table="event_relations", keyvalues={"event_id": redacted_event_id} ) - def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase): + def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase) -> None: if isinstance(event.content.get("topic"), str): self.store_event_search_txn( txn, event, "content.topic", event.content["topic"] ) - def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase): + def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase) -> None: if isinstance(event.content.get("name"), str): self.store_event_search_txn( txn, event, "content.name", event.content["name"] ) - def _store_room_message_txn(self, txn: LoggingTransaction, event: EventBase): + def _store_room_message_txn( + self, txn: LoggingTransaction, event: EventBase + ) -> None: if isinstance(event.content.get("body"), str): self.store_event_search_txn( txn, event, "content.body", event.content["body"] ) - def _store_retention_policy_for_room_txn(self, txn, event): + def _store_retention_policy_for_room_txn( + self, txn: LoggingTransaction, event: EventBase + ) -> None: if not event.is_state(): logger.debug("Ignoring non-state m.room.retention event") return @@ -2077,8 +2106,11 @@ def store_event_search_txn( ) def _set_push_actions_for_event_and_users_txn( - self, txn, events_and_contexts, all_events_and_contexts - ): + self, + txn: LoggingTransaction, + events_and_contexts: List[Tuple[EventBase, EventContext]], + all_events_and_contexts: List[Tuple[EventBase, EventContext]], + ) -> None: """Handles moving push actions from staging table to main event_push_actions table for all events in `events_and_contexts`. @@ -2086,12 +2118,10 @@ def _set_push_actions_for_event_and_users_txn( from the push action staging area. Args: - events_and_contexts (list[(EventBase, EventContext)]): events - we are persisting - all_events_and_contexts (list[(EventBase, EventContext)]): all - events that we were going to persist. This includes events - we've already persisted, etc, that wouldn't appear in - events_and_context. + events_and_contexts: events we are persisting + all_events_and_contexts: all events that we were going to persist. + This includes events we've already persisted, etc, that wouldn't + appear in events_and_context. """ # Only non outlier events will have push actions associated with them, @@ -2160,7 +2190,9 @@ def _set_push_actions_for_event_and_users_txn( ), ) - def _remove_push_actions_for_event_id_txn(self, txn, room_id, event_id): + def _remove_push_actions_for_event_id_txn( + self, txn: LoggingTransaction, room_id: str, event_id: str + ) -> None: # Sad that we have to blow away the cache for the whole room here txn.call_after( self.store.get_unread_event_push_actions_by_room_for_user.invalidate, @@ -2171,7 +2203,9 @@ def _remove_push_actions_for_event_id_txn(self, txn, room_id, event_id): (room_id, event_id), ) - def _store_rejections_txn(self, txn, event_id, reason): + def _store_rejections_txn( + self, txn: LoggingTransaction, event_id: str, reason: str + ) -> None: self.db_pool.simple_insert_txn( txn, table="rejections", @@ -2183,8 +2217,10 @@ def _store_rejections_txn(self, txn, event_id, reason): ) def _store_event_state_mappings_txn( - self, txn, events_and_contexts: Iterable[Tuple[EventBase, EventContext]] - ): + self, + txn: LoggingTransaction, + events_and_contexts: Collection[Tuple[EventBase, EventContext]], + ) -> None: state_groups = {} for event, context in events_and_contexts: if event.internal_metadata.is_outlier(): @@ -2241,7 +2277,9 @@ def _store_event_state_mappings_txn( state_group_id, ) - def _update_min_depth_for_room_txn(self, txn, room_id, depth): + def _update_min_depth_for_room_txn( + self, txn: LoggingTransaction, room_id: str, depth: int + ) -> None: min_depth = self.store._get_min_depth_interaction(txn, room_id) if min_depth is not None and depth >= min_depth: @@ -2254,7 +2292,9 @@ def _update_min_depth_for_room_txn(self, txn, room_id, depth): values={"min_depth": depth}, ) - def _handle_mult_prev_events(self, txn, events): + def _handle_mult_prev_events( + self, txn: LoggingTransaction, events: List[EventBase] + ) -> None: """ For the given event, update the event edges table and forward and backward extremities tables. @@ -2272,7 +2312,9 @@ def _handle_mult_prev_events(self, txn, events): self._update_backward_extremeties(txn, events) - def _update_backward_extremeties(self, txn, events): + def _update_backward_extremeties( + self, txn: LoggingTransaction, events: List[EventBase] + ) -> None: """Updates the event_backward_extremities tables based on the new/updated events being persisted. diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 3c49e7ec98e2..78e0773b2a88 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -14,7 +14,7 @@ import logging import re -from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Set +from typing import TYPE_CHECKING, Any, Collection, Iterable, List, Optional, Set, Tuple import attr @@ -27,7 +27,7 @@ LoggingTransaction, ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour -from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.types import JsonDict if TYPE_CHECKING: @@ -149,7 +149,9 @@ def __init__( self.EVENT_SEARCH_DELETE_NON_STRINGS, self._background_delete_non_strings ) - async def _background_reindex_search(self, progress, batch_size): + async def _background_reindex_search( + self, progress: JsonDict, batch_size: int + ) -> int: # we work through the events table from highest stream id to lowest target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] @@ -157,7 +159,7 @@ async def _background_reindex_search(self, progress, batch_size): TYPES = ["m.room.name", "m.room.message", "m.room.topic"] - def reindex_search_txn(txn): + def reindex_search_txn(txn: LoggingTransaction) -> int: sql = ( "SELECT stream_ordering, event_id, room_id, type, json, " " origin_server_ts FROM events" @@ -255,12 +257,14 @@ def reindex_search_txn(txn): return result - async def _background_reindex_gin_search(self, progress, batch_size): + async def _background_reindex_gin_search( + self, progress: JsonDict, batch_size: int + ) -> int: """This handles old synapses which used GIST indexes, if any; converting them back to be GIN as per the actual schema. """ - def create_index(conn): + def create_index(conn: LoggingDatabaseConnection) -> None: conn.rollback() # we have to set autocommit, because postgres refuses to @@ -299,7 +303,9 @@ def create_index(conn): ) return 1 - async def _background_reindex_search_order(self, progress, batch_size): + async def _background_reindex_search_order( + self, progress: JsonDict, batch_size: int + ) -> int: target_min_stream_id = progress["target_min_stream_id_inclusive"] max_stream_id = progress["max_stream_id_exclusive"] rows_inserted = progress.get("rows_inserted", 0) @@ -307,7 +313,7 @@ async def _background_reindex_search_order(self, progress, batch_size): if not have_added_index: - def create_index(conn): + def create_index(conn: LoggingDatabaseConnection) -> None: conn.rollback() conn.set_session(autocommit=True) c = conn.cursor() @@ -336,7 +342,7 @@ def create_index(conn): pg, ) - def reindex_search_txn(txn): + def reindex_search_txn(txn: LoggingTransaction) -> Tuple[int, bool]: sql = ( "UPDATE event_search AS es SET stream_ordering = e.stream_ordering," " origin_server_ts = e.origin_server_ts" @@ -644,7 +650,8 @@ async def search_rooms( else: raise Exception("Unrecognized database engine") - args.append(limit) + # mypy expects to append only a `str`, not an `int` + args.append(limit) # type: ignore[arg-type] results = await self.db_pool.execute( "search_rooms", self.db_pool.cursor_to_dict, sql, *args @@ -705,7 +712,7 @@ async def _find_highlights_in_postgres( A set of strings. """ - def f(txn): + def f(txn: LoggingTransaction) -> Set[str]: highlight_words = set() for event in events: # As a hack we simply join values of all possible keys. This is @@ -759,11 +766,11 @@ def f(txn): return await self.db_pool.runInteraction("_find_highlights", f) -def _to_postgres_options(options_dict): +def _to_postgres_options(options_dict: JsonDict) -> str: return "'%s'" % (",".join("%s=%s" % (k, v) for k, v in options_dict.items()),) -def _parse_query(database_engine, search_term): +def _parse_query(database_engine: BaseDatabaseEngine, search_term: str) -> str: """Takes a plain unicode string from the user and converts it into a form that can be passed to database. We use this so that we can add prefix matching, which isn't something From 29f06704b8871a44926f7c99e73cf4a978fb8e81 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 10 May 2022 14:10:22 -0400 Subject: [PATCH 008/181] Fix incorrect type hint in filtering code. (#12695) --- changelog.d/12695.misc | 1 + synapse/api/filtering.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12695.misc diff --git a/changelog.d/12695.misc b/changelog.d/12695.misc new file mode 100644 index 000000000000..1b39d969a4c5 --- /dev/null +++ b/changelog.d/12695.misc @@ -0,0 +1 @@ +Fixes an incorrect type hint for `Filter._check_event_relations`. diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 4a808e33fee1..b91ce06de7c3 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -19,6 +19,7 @@ TYPE_CHECKING, Awaitable, Callable, + Collection, Dict, Iterable, List, @@ -444,9 +445,9 @@ def filter_rooms(self, room_ids: Iterable[str]) -> Set[str]: return room_ids async def _check_event_relations( - self, events: Iterable[FilterEvent] + self, events: Collection[FilterEvent] ) -> List[FilterEvent]: - # The event IDs to check, mypy doesn't understand the ifinstance check. + # The event IDs to check, mypy doesn't understand the isinstance check. event_ids = [event.event_id for event in events if isinstance(event, EventBase)] # type: ignore[attr-defined] event_ids_to_keep = set( await self._store.events_have_relations( From c997bfb926a29f0ec894fca889cc5eae603f4027 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Tue, 10 May 2022 20:39:05 +0100 Subject: [PATCH 009/181] Capture the `Deferred` for request cancellation in `_AsyncResource` (#12694) All async request processing goes through `_AsyncResource`, so this is the only place where a `Deferred` needs to be captured for cancellation. Unfortunately, the same isn't true for determining whether a request can be cancelled. Each of `RestServlet`, `BaseFederationServlet`, `DirectServe{Html,Json}Resource` and `ReplicationEndpoint` have different wrappers around the method doing the request handling and they all need to be handled separately. Signed-off-by: Sean Quah --- changelog.d/12694.misc | 1 + synapse/http/server.py | 4 +++- synapse/http/site.py | 9 +++++---- 3 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 changelog.d/12694.misc diff --git a/changelog.d/12694.misc b/changelog.d/12694.misc new file mode 100644 index 000000000000..e1e956a51301 --- /dev/null +++ b/changelog.d/12694.misc @@ -0,0 +1 @@ +Capture the `Deferred` for request cancellation in `_AsyncResource`. diff --git a/synapse/http/server.py b/synapse/http/server.py index 8c96f2196eda..4b4debc5cd2b 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -344,7 +344,9 @@ def __init__(self, extract_context: bool = False): def render(self, request: SynapseRequest) -> int: """This gets called by twisted every time someone sends us a request.""" - defer.ensureDeferred(self._async_render_wrapper(request)) + request.render_deferred = defer.ensureDeferred( + self._async_render_wrapper(request) + ) return NOT_DONE_YET @wrap_async_request_handler diff --git a/synapse/http/site.py b/synapse/http/site.py index f7f1c5704285..eeec74b78ae5 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -92,11 +92,12 @@ def __init__( # we can't yet create the logcontext, as we don't know the method. self.logcontext: Optional[LoggingContext] = None - # The `Deferred` to cancel if the client disconnects early. Expected to be set - # by `Resource.render`. + # The `Deferred` to cancel if the client disconnects early and + # `is_render_cancellable` is set. Expected to be set by `Resource.render`. self.render_deferred: Optional["Deferred[None]"] = None - # A boolean indicating whether `_render_deferred` should be cancelled if the - # client disconnects early. Expected to be set during `Resource.render`. + # A boolean indicating whether `render_deferred` should be cancelled if the + # client disconnects early. Expected to be set by the coroutine started by + # `Resource.render`, if rendering is asynchronous. self.is_render_cancellable = False global _next_request_seq From c72d26c1e1e997e63cef1c474010a7db783f8022 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 10 May 2022 20:43:13 +0100 Subject: [PATCH 010/181] Refactor `EventContext` (#12689) Refactor how the `EventContext` class works, with the intention of reducing the amount of state we fetch from the DB during event processing. The idea here is to get rid of the cached `current_state_ids` and `prev_state_ids` that live in the `EventContext`, and instead defer straight to the database (and its caching). One change that may have a noticeable effect is that we now no longer prefill the `get_current_state_ids` cache on a state change. However, that query is relatively light, since its just a case of reading a table from the DB (unlike fetching state at an event which is more heavyweight). For deployments with workers this cache isn't even used. Part of #12684 --- changelog.d/12689.misc | 1 + synapse/events/snapshot.py | 177 ++++------------------- synapse/handlers/federation.py | 6 +- synapse/handlers/federation_event.py | 6 +- synapse/handlers/message.py | 6 +- synapse/push/action_generator.py | 4 + synapse/state/__init__.py | 9 +- synapse/storage/databases/main/events.py | 6 - synapse/storage/persist_events.py | 42 +----- tests/handlers/test_federation_event.py | 4 +- tests/storage/test_event_chain.py | 2 +- tests/test_state.py | 3 + tests/test_visibility.py | 4 +- 13 files changed, 70 insertions(+), 200 deletions(-) create mode 100644 changelog.d/12689.misc diff --git a/changelog.d/12689.misc b/changelog.d/12689.misc new file mode 100644 index 000000000000..daa484ea3019 --- /dev/null +++ b/changelog.d/12689.misc @@ -0,0 +1 @@ +Refactor `EventContext` class. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 8120c305df14..9ccd24b298bb 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -17,11 +17,8 @@ from frozendict import frozendict from typing_extensions import Literal -from twisted.internet.defer import Deferred - from synapse.appservice import ApplicationService from synapse.events import EventBase -from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.types import JsonDict, StateMap if TYPE_CHECKING: @@ -61,6 +58,9 @@ class EventContext: If ``state_group`` is None (ie, the event is an outlier), ``state_group_before_event`` will always also be ``None``. + state_delta_due_to_event: If `state_group` and `state_group_before_event` are not None + then this is the delta of the state between the two groups. + prev_group: If it is known, ``state_group``'s prev_group. Note that this being None does not necessarily mean that ``state_group`` does not have a prev_group! @@ -79,73 +79,47 @@ class EventContext: app_service: If this event is being sent by a (local) application service, that app service. - _current_state_ids: The room state map, including this event - ie, the state - in ``state_group``. - - (type, state_key) -> event_id - - For an outlier, this is {} - - Note that this is a private attribute: it should be accessed via - ``get_current_state_ids``. _AsyncEventContext impl calculates this - on-demand: it will be None until that happens. - - _prev_state_ids: The room state map, excluding this event - ie, the state - in ``state_group_before_event``. For a non-state - event, this will be the same as _current_state_events. - - Note that it is a completely different thing to prev_group! - - (type, state_key) -> event_id - - For an outlier, this is {} - - As with _current_state_ids, this is a private attribute. It should be - accessed via get_prev_state_ids. - partial_state: if True, we may be storing this event with a temporary, incomplete state. """ + _storage: "Storage" rejected: Union[Literal[False], str] = False _state_group: Optional[int] = None state_group_before_event: Optional[int] = None + _state_delta_due_to_event: Optional[StateMap[str]] = None prev_group: Optional[int] = None delta_ids: Optional[StateMap[str]] = None app_service: Optional[ApplicationService] = None - _current_state_ids: Optional[StateMap[str]] = None - _prev_state_ids: Optional[StateMap[str]] = None - partial_state: bool = False @staticmethod def with_state( + storage: "Storage", state_group: Optional[int], state_group_before_event: Optional[int], - current_state_ids: Optional[StateMap[str]], - prev_state_ids: Optional[StateMap[str]], + state_delta_due_to_event: Optional[StateMap[str]], partial_state: bool, prev_group: Optional[int] = None, delta_ids: Optional[StateMap[str]] = None, ) -> "EventContext": return EventContext( - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, + storage=storage, state_group=state_group, state_group_before_event=state_group_before_event, + state_delta_due_to_event=state_delta_due_to_event, prev_group=prev_group, delta_ids=delta_ids, partial_state=partial_state, ) @staticmethod - def for_outlier() -> "EventContext": + def for_outlier( + storage: "Storage", + ) -> "EventContext": """Return an EventContext instance suitable for persisting an outlier event""" - return EventContext( - current_state_ids={}, - prev_state_ids={}, - ) + return EventContext(storage=storage) async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict: """Converts self to a type that can be serialized as JSON, and then @@ -158,24 +132,14 @@ async def serialize(self, event: EventBase, store: "DataStore") -> JsonDict: The serialized event. """ - # We don't serialize the full state dicts, instead they get pulled out - # of the DB on the other side. However, the other side can't figure out - # the prev_state_ids, so if we're a state event we include the event - # id that we replaced in the state. - if event.is_state(): - prev_state_ids = await self.get_prev_state_ids() - prev_state_id = prev_state_ids.get((event.type, event.state_key)) - else: - prev_state_id = None - return { - "prev_state_id": prev_state_id, - "event_type": event.type, - "event_state_key": event.get_state_key(), "state_group": self._state_group, "state_group_before_event": self.state_group_before_event, "rejected": self.rejected, "prev_group": self.prev_group, + "state_delta_due_to_event": _encode_state_dict( + self._state_delta_due_to_event + ), "delta_ids": _encode_state_dict(self.delta_ids), "app_service_id": self.app_service.id if self.app_service else None, "partial_state": self.partial_state, @@ -193,16 +157,16 @@ def deserialize(storage: "Storage", input: JsonDict) -> "EventContext": Returns: The event context. """ - context = _AsyncEventContextImpl( + context = EventContext( # We use the state_group and prev_state_id stuff to pull the # current_state_ids out of the DB and construct prev_state_ids. storage=storage, - prev_state_id=input["prev_state_id"], - event_type=input["event_type"], - event_state_key=input["event_state_key"], state_group=input["state_group"], state_group_before_event=input["state_group_before_event"], prev_group=input["prev_group"], + state_delta_due_to_event=_decode_state_dict( + input["state_delta_due_to_event"] + ), delta_ids=_decode_state_dict(input["delta_ids"]), rejected=input["rejected"], partial_state=input.get("partial_state", False), @@ -250,8 +214,15 @@ async def get_current_state_ids(self) -> Optional[StateMap[str]]: if self.rejected: raise RuntimeError("Attempt to access state_ids of rejected event") - await self._ensure_fetched() - return self._current_state_ids + assert self._state_delta_due_to_event is not None + + prev_state_ids = await self.get_prev_state_ids() + + if self._state_delta_due_to_event: + prev_state_ids = dict(prev_state_ids) + prev_state_ids.update(self._state_delta_due_to_event) + + return prev_state_ids async def get_prev_state_ids(self) -> StateMap[str]: """ @@ -266,94 +237,10 @@ async def get_prev_state_ids(self) -> StateMap[str]: Maps a (type, state_key) to the event ID of the state event matching this tuple. """ - await self._ensure_fetched() - # There *should* be previous state IDs now. - assert self._prev_state_ids is not None - return self._prev_state_ids - - def get_cached_current_state_ids(self) -> Optional[StateMap[str]]: - """Gets the current state IDs if we have them already cached. - - It is an error to access this for a rejected event, since rejected state should - not make it into the room state. This method will raise an exception if - ``rejected`` is set. - - Returns: - Returns None if we haven't cached the state or if state_group is None - (which happens when the associated event is an outlier). - - Otherwise, returns the the current state IDs. - """ - if self.rejected: - raise RuntimeError("Attempt to access state_ids of rejected event") - - return self._current_state_ids - - async def _ensure_fetched(self) -> None: - return None - - -@attr.s(slots=True) -class _AsyncEventContextImpl(EventContext): - """ - An implementation of EventContext which fetches _current_state_ids and - _prev_state_ids from the database on demand. - - Attributes: - - _storage - - _fetching_state_deferred: Resolves when *_state_ids have been calculated. - None if we haven't started calculating yet - - _event_type: The type of the event the context is associated with. - - _event_state_key: The state_key of the event the context is associated with. - - _prev_state_id: If the event associated with the context is a state event, - then `_prev_state_id` is the event_id of the state that was replaced. - """ - - # This needs to have a default as we're inheriting - _storage: "Storage" = attr.ib(default=None) - _prev_state_id: Optional[str] = attr.ib(default=None) - _event_type: str = attr.ib(default=None) - _event_state_key: Optional[str] = attr.ib(default=None) - _fetching_state_deferred: Optional["Deferred[None]"] = attr.ib(default=None) - - async def _ensure_fetched(self) -> None: - if not self._fetching_state_deferred: - self._fetching_state_deferred = run_in_background(self._fill_out_state) - - await make_deferred_yieldable(self._fetching_state_deferred) - - async def _fill_out_state(self) -> None: - """Called to populate the _current_state_ids and _prev_state_ids - attributes by loading from the database. - """ - if self.state_group is None: - # No state group means the event is an outlier. Usually the state_ids dicts are also - # pre-set to empty dicts, but they get reset when the context is serialized, so set - # them to empty dicts again here. - self._current_state_ids = {} - self._prev_state_ids = {} - return - - current_state_ids = await self._storage.state.get_state_ids_for_group( - self.state_group + assert self.state_group_before_event is not None + return await self._storage.state.get_state_ids_for_group( + self.state_group_before_event ) - # Set this separately so mypy knows current_state_ids is not None. - self._current_state_ids = current_state_ids - if self._event_state_key is not None: - self._prev_state_ids = dict(current_state_ids) - - key = (self._event_type, self._event_state_key) - if self._prev_state_id: - self._prev_state_ids[key] = self._prev_state_id - else: - self._prev_state_ids.pop(key, None) - else: - self._prev_state_ids = current_state_ids def _encode_state_dict( diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 38dc5b1f6edf..be5099b507f6 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -659,7 +659,7 @@ async def do_knock( # in the invitee's sync stream. It is stripped out for all other local users. event.unsigned["knock_room_state"] = stripped_room_state["knock_state_events"] - context = EventContext.for_outlier() + context = EventContext.for_outlier(self.storage) stream_id = await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) @@ -848,7 +848,7 @@ async def on_invite_request( ) ) - context = EventContext.for_outlier() + context = EventContext.for_outlier(self.storage) await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) @@ -877,7 +877,7 @@ async def do_remotely_reject_invite( await self.federation_client.send_leave(host_list, event) - context = EventContext.for_outlier() + context = EventContext.for_outlier(self.storage) stream_id = await self._federation_event_handler.persist_events_and_notify( event.room_id, [(event, context)] ) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 6cf927e4ff7b..6d11b32b61db 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -1423,7 +1423,7 @@ def prep(event: EventBase) -> Optional[Tuple[EventBase, EventContext]]: # we're not bothering about room state, so flag the event as an outlier. event.internal_metadata.outlier = True - context = EventContext.for_outlier() + context = EventContext.for_outlier(self._storage) try: validate_event_for_room_version(room_version_obj, event) check_auth_rules_for_event(room_version_obj, event, auth) @@ -1874,10 +1874,10 @@ async def _update_context_for_auth_events( ) return EventContext.with_state( + storage=self._storage, state_group=state_group, state_group_before_event=context.state_group_before_event, - current_state_ids=current_state_ids, - prev_state_ids=prev_state_ids, + state_delta_due_to_event=state_updates, prev_group=prev_group, delta_ids=state_updates, partial_state=context.partial_state, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index c28b792e6fe2..e47799e7f962 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -757,6 +757,10 @@ async def deduplicate_state_event( The previous version of the event is returned, if it is found in the event context. Otherwise, None is returned. """ + if event.internal_metadata.is_outlier(): + # This can happen due to out of band memberships + return None + prev_state_ids = await context.get_prev_state_ids() prev_event_id = prev_state_ids.get((event.type, event.state_key)) if not prev_event_id: @@ -1001,7 +1005,7 @@ async def create_new_client_event( # after it is created if builder.internal_metadata.outlier: event.internal_metadata.outlier = True - context = EventContext.for_outlier() + context = EventContext.for_outlier(self.storage) elif ( event.type == EventTypes.MSC2716_INSERTION and state_event_ids diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py index 60758df01664..730d9cd35463 100644 --- a/synapse/push/action_generator.py +++ b/synapse/push/action_generator.py @@ -40,5 +40,9 @@ def __init__(self, hs: "HomeServer"): async def handle_push_actions_for_event( self, event: EventBase, context: EventContext ) -> None: + if event.internal_metadata.is_outlier(): + # This can happen due to out of band memberships + return + with Measure(self.clock, "action_for_event_by_user"): await self.bulk_evaluator.action_for_event_by_user(event, context) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index cad3b4264007..54e41d537584 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -130,6 +130,7 @@ def __init__(self, hs: "HomeServer"): self.state_store = hs.get_storage().state self.hs = hs self._state_resolution_handler = hs.get_state_resolution_handler() + self._storage = hs.get_storage() @overload async def get_current_state( @@ -361,10 +362,10 @@ async def compute_event_context( if not event.is_state(): return EventContext.with_state( + storage=self._storage, state_group_before_event=state_group_before_event, state_group=state_group_before_event, - current_state_ids=state_ids_before_event, - prev_state_ids=state_ids_before_event, + state_delta_due_to_event={}, prev_group=state_group_before_event_prev_group, delta_ids=deltas_to_state_group_before_event, partial_state=partial_state, @@ -393,10 +394,10 @@ async def compute_event_context( ) return EventContext.with_state( + storage=self._storage, state_group=state_group_after_event, state_group_before_event=state_group_before_event, - current_state_ids=state_ids_after_event, - prev_state_ids=state_ids_before_event, + state_delta_due_to_event=delta_ids, prev_group=state_group_before_event, delta_ids=delta_ids, partial_state=partial_state, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 6c12653bb3c6..f544bcfff07f 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -128,7 +128,6 @@ async def _persist_events_and_state_updates( self, events_and_contexts: List[Tuple[EventBase, EventContext]], *, - current_state_for_room: Dict[str, StateMap[str]], state_delta_for_room: Dict[str, DeltaState], new_forward_extremities: Dict[str, Set[str]], use_negative_stream_ordering: bool = False, @@ -139,8 +138,6 @@ async def _persist_events_and_state_updates( Args: events_and_contexts: - current_state_for_room: Map from room_id to the current state of - the room based on forward extremities state_delta_for_room: Map from room_id to the delta to apply to room state new_forward_extremities: Map from room_id to set of event IDs @@ -215,9 +212,6 @@ async def _persist_events_and_state_updates( event_counter.labels(event.type, origin_type, origin_entity).inc() - for room_id, new_state in current_state_for_room.items(): - self.store.get_current_state_ids.prefill((room_id,), new_state) - for room_id, latest_event_ids in new_forward_extremities.items(): self.store.get_latest_event_ids_in_room.prefill( (room_id,), list(latest_event_ids) diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 97118045a1ad..a7f6338e058d 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -487,12 +487,6 @@ async def _persist_event_batch( # extremities in each room new_forward_extremities: Dict[str, Set[str]] = {} - # map room_id->(type,state_key)->event_id tracking the full - # state in each room after adding these events. - # This is simply used to prefill the get_current_state_ids - # cache - current_state_for_room: Dict[str, StateMap[str]] = {} - # map room_id->(to_delete, to_insert) where to_delete is a list # of type/state keys to remove from current state, and to_insert # is a map (type,key)->event_id giving the state delta in each @@ -628,14 +622,8 @@ async def _persist_event_batch( state_delta_for_room[room_id] = delta - # If we have the current_state then lets prefill - # the cache with it. - if current_state is not None: - current_state_for_room[room_id] = current_state - await self.persist_events_store._persist_events_and_state_updates( chunk, - current_state_for_room=current_state_for_room, state_delta_for_room=state_delta_for_room, new_forward_extremities=new_forward_extremities, use_negative_stream_ordering=backfilled, @@ -733,7 +721,8 @@ async def _get_new_state_after_events( The first state map is the full new current state and the second is the delta to the existing current state. If both are None then - there has been no change. + there has been no change. Either or neither can be None if there + has been a change. The function may prune some old entries from the set of new forward extremities if it's safe to do so. @@ -743,9 +732,6 @@ async def _get_new_state_after_events( the new current state is only returned if we've already calculated it. """ - # map from state_group to ((type, key) -> event_id) state map - state_groups_map = {} - # Map from (prev state group, new state group) -> delta state dict state_group_deltas = {} @@ -759,16 +745,6 @@ async def _get_new_state_after_events( ) continue - if ctx.state_group in state_groups_map: - continue - - # We're only interested in pulling out state that has already - # been cached in the context. We'll pull stuff out of the DB later - # if necessary. - current_state_ids = ctx.get_cached_current_state_ids() - if current_state_ids is not None: - state_groups_map[ctx.state_group] = current_state_ids - if ctx.prev_group: state_group_deltas[(ctx.prev_group, ctx.state_group)] = ctx.delta_ids @@ -826,18 +802,14 @@ async def _get_new_state_after_events( delta_ids = state_group_deltas.get((old_state_group, new_state_group), None) if delta_ids is not None: # We have a delta from the existing to new current state, - # so lets just return that. If we happen to already have - # the current state in memory then lets also return that, - # but it doesn't matter if we don't. - new_state = state_groups_map.get(new_state_group) - return new_state, delta_ids, new_latest_event_ids + # so lets just return that. + return None, delta_ids, new_latest_event_ids # Now that we have calculated new_state_groups we need to get # their state IDs so we can resolve to a single state set. - missing_state = new_state_groups - set(state_groups_map) - if missing_state: - group_to_state = await self.state_store._get_state_for_groups(missing_state) - state_groups_map.update(group_to_state) + state_groups_map = await self.state_store._get_state_for_groups( + new_state_groups + ) if len(new_state_groups) == 1: # If there is only one state group, then we know what the current diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index 489ba5773672..e64b28f28b86 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -148,7 +148,9 @@ def _test_process_pulled_event_with_missing_state( prev_event.internal_metadata.outlier = True persistence = self.hs.get_storage().persistence self.get_success( - persistence.persist_event(prev_event, EventContext.for_outlier()) + persistence.persist_event( + prev_event, EventContext.for_outlier(self.hs.get_storage()) + ) ) else: diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index 401020fd6361..c7661e71868f 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -393,7 +393,7 @@ def _persist(txn): # We need to persist the events to the events and state_events # tables. persist_events_store._store_event_txn( - txn, [(e, EventContext()) for e in events] + txn, [(e, EventContext(self.hs.get_storage())) for e in events] ) # Actually call the function that calculates the auth chain stuff. diff --git a/tests/test_state.py b/tests/test_state.py index e4baa6913746..651ec1c7d4bd 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -88,6 +88,9 @@ async def get_state_groups_ids(self, room_id, event_ids): return groups + async def get_state_ids_for_group(self, state_group): + return self._group_to_state[state_group] + async def store_state_group( self, event_id, room_id, prev_group, delta_ids, current_state_ids ): diff --git a/tests/test_visibility.py b/tests/test_visibility.py index d0230f9ebbc5..7a9b01ef9d44 100644 --- a/tests/test_visibility.py +++ b/tests/test_visibility.py @@ -234,7 +234,9 @@ def _inject_outlier(self) -> EventBase: event = self.get_success(builder.build(prev_event_ids=[], auth_event_ids=[])) event.internal_metadata.outlier = True self.get_success( - self.storage.persistence.persist_event(event, EventContext.for_outlier()) + self.storage.persistence.persist_event( + event, EventContext.for_outlier(self.storage) + ) ) return event From 84facf769eb79112be5f21942c18047b2b85f0bd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 10 May 2022 23:39:14 -0500 Subject: [PATCH 011/181] Fix `/messages` throwing a 500 when querying for non-existent room (#12683) Fix /~https://github.com/matrix-org/synapse/issues/12678 Complement test added: /~https://github.com/matrix-org/complement/pull/369 **Before:** 500 internal server error **After:** According to the [spec](https://spec.matrix.org/latest/client-server-api/#get_matrixclientv3roomsroomidmessages), calling `/messages` against a non-existent `room_id` should throw a 403 forbidden (since you're not part of the room). This also matches the behavior before /~https://github.com/matrix-org/synapse/pull/12370 which regressed Synapse to the 500 behavior. ```json { "errcode": "M_FORBIDDEN", "error": "User @test:my.synapse.server not in room !dne:my.synapse.server, and room previews are disabled" } ``` --- changelog.d/12683.bugfix | 1 + synapse/handlers/pagination.py | 2 +- synapse/storage/databases/main/stream.py | 26 ++++++++++-------------- 3 files changed, 13 insertions(+), 16 deletions(-) create mode 100644 changelog.d/12683.bugfix diff --git a/changelog.d/12683.bugfix b/changelog.d/12683.bugfix new file mode 100644 index 000000000000..2ce84a223a37 --- /dev/null +++ b/changelog.d/12683.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.57.0 where `/messages` would throw a 500 error when querying for a non-existent room. diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 7ee334037376..2e30180094d2 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -448,7 +448,7 @@ async def get_messages( ) # We expect `/messages` to use historic pagination tokens by default but # `/messages` should still works with live tokens when manually provided. - assert from_token.room_key.topological + assert from_token.room_key.topological is not None if pagin_config.limit is None: # This shouldn't happen as we've set a default limit before this diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 793e906630e8..4e1d9647b7b8 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -785,22 +785,14 @@ async def get_last_event_in_room_before_stream_ordering( return None async def get_current_room_stream_token_for_room_id( - self, room_id: Optional[str] = None + self, room_id: str ) -> RoomStreamToken: - """Returns the current position of the rooms stream. - - By default, it returns a live token with the current global stream - token. Specifying a `room_id` causes it to return a historic token with - the room specific topological token. - """ + """Returns the current position of the rooms stream (historic token).""" stream_ordering = self.get_room_max_stream_ordering() - if room_id is None: - return RoomStreamToken(None, stream_ordering) - else: - topo = await self.db_pool.runInteraction( - "_get_max_topological_txn", self._get_max_topological_txn, room_id - ) - return RoomStreamToken(topo, stream_ordering) + topo = await self.db_pool.runInteraction( + "_get_max_topological_txn", self._get_max_topological_txn, room_id + ) + return RoomStreamToken(topo, stream_ordering) def get_stream_id_for_event_txn( self, @@ -870,7 +862,11 @@ def _get_max_topological_txn(self, txn: LoggingTransaction, room_id: str) -> int ) rows = txn.fetchall() - return rows[0][0] if rows else 0 + # An aggregate function like MAX() will always return one row per group + # so we can safely rely on the lookup here. For example, when a we + # lookup a `room_id` which does not exist, `rows` will look like + # `[(None,)]` + return rows[0][0] if rows[0][0] is not None else 0 @staticmethod def _set_before_and_after( From a4c75918b3e9cf48fa2bb91e9861f5f6fd74bd2e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 11 May 2022 07:15:21 -0400 Subject: [PATCH 012/181] Remove unneeded `ActionGenerator` class. (#12691) It simply passes through to `BulkPushRuleEvaluator`, which can be called directly instead. --- changelog.d/12691.misc | 1 + synapse/handlers/federation_event.py | 4 +- synapse/handlers/message.py | 6 ++- synapse/push/__init__.py | 5 --- synapse/push/action_generator.py | 48 ------------------------ synapse/push/bulk_push_rule_evaluator.py | 7 ++++ synapse/server.py | 6 +-- 7 files changed, 17 insertions(+), 60 deletions(-) create mode 100644 changelog.d/12691.misc delete mode 100644 synapse/push/action_generator.py diff --git a/changelog.d/12691.misc b/changelog.d/12691.misc new file mode 100644 index 000000000000..c63543421111 --- /dev/null +++ b/changelog.d/12691.misc @@ -0,0 +1 @@ +Remove an unneeded class in the push code. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 6d11b32b61db..761caa04b726 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -103,7 +103,7 @@ def __init__(self, hs: "HomeServer"): self._event_creation_handler = hs.get_event_creation_handler() self._event_auth_handler = hs.get_event_auth_handler() self._message_handler = hs.get_message_handler() - self._action_generator = hs.get_action_generator() + self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator() self._state_resolution_handler = hs.get_state_resolution_handler() # avoid a circular dependency by deferring execution here self._get_room_member_handler = hs.get_room_member_handler @@ -1913,7 +1913,7 @@ async def _run_push_actions_and_persist_event( min_depth, ) else: - await self._action_generator.handle_push_actions_for_event( + await self._bulk_push_rule_evaluator.action_for_event_by_user( event, context ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e47799e7f962..4a4b535bae6a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -426,7 +426,7 @@ def __init__(self, hs: "HomeServer"): # This is to stop us from diverging history *too* much. self.limiter = Linearizer(max_count=5, name="room_event_creation_limit") - self.action_generator = hs.get_action_generator() + self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator() self.spam_checker = hs.get_spam_checker() self.third_party_event_rules: "ThirdPartyEventRules" = ( @@ -1249,7 +1249,9 @@ async def _persist_event( # and `state_groups` because they have `prev_events` that aren't persisted yet # (historical messages persisted in reverse-chronological order). if not event.internal_metadata.is_historical(): - await self.action_generator.handle_push_actions_for_event(event, context) + await self._bulk_push_rule_evaluator.action_for_event_by_user( + event, context + ) try: # If we're a worker we need to hit out to the master. diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index d1dfb406d43a..57c4d70466b6 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -43,11 +43,6 @@ +---------------------------------------------+ | v - +-----------------+ - | ActionGenerator | - +-----------------+ - | - v +-----------------------+ +---------------------------+ | BulkPushRuleEvaluator |---->| PushRuleEvaluatorForEvent | +-----------------------+ +---------------------------+ diff --git a/synapse/push/action_generator.py b/synapse/push/action_generator.py deleted file mode 100644 index 730d9cd35463..000000000000 --- a/synapse/push/action_generator.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright 2015 OpenMarket Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -from typing import TYPE_CHECKING - -from synapse.events import EventBase -from synapse.events.snapshot import EventContext -from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator -from synapse.util.metrics import Measure - -if TYPE_CHECKING: - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - - -class ActionGenerator: - def __init__(self, hs: "HomeServer"): - self.clock = hs.get_clock() - self.bulk_evaluator = BulkPushRuleEvaluator(hs) - # really we want to get all user ids and all profile tags too, - # since we want the actions for each profile tag for every user and - # also actions for a client with no profile tag for each user. - # Currently the event stream doesn't support profile tags on an - # event stream, so we just run the rules for a client with no profile - # tag (ie. we just need all the users). - - async def handle_push_actions_for_event( - self, event: EventBase, context: EventContext - ) -> None: - if event.internal_metadata.is_outlier(): - # This can happen due to out of band memberships - return - - with Measure(self.clock, "action_for_event_by_user"): - await self.bulk_evaluator.action_for_event_by_user(event, context) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 85ddb56c6eb4..0ffafc882b65 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -29,6 +29,7 @@ from synapse.util.caches import CacheMetric, register_cache from synapse.util.caches.descriptors import lru_cache from synapse.util.caches.lrucache import LruCache +from synapse.util.metrics import measure_func from .push_rule_evaluator import PushRuleEvaluatorForEvent @@ -105,6 +106,7 @@ class BulkPushRuleEvaluator: def __init__(self, hs: "HomeServer"): self.hs = hs self.store = hs.get_datastores().main + self.clock = hs.get_clock() self._event_auth_handler = hs.get_event_auth_handler() # Used by `RulesForRoom` to ensure only one thing mutates the cache at a @@ -185,6 +187,7 @@ async def _get_power_levels_and_sender_level( return pl_event.content if pl_event else {}, sender_level + @measure_func("action_for_event_by_user") async def action_for_event_by_user( self, event: EventBase, context: EventContext ) -> None: @@ -192,6 +195,10 @@ async def action_for_event_by_user( should increment the unread count, and insert the results into the event_push_actions_staging table. """ + if event.internal_metadata.is_outlier(): + # This can happen due to out of band memberships + return + count_as_unread = _should_count_as_unread(event, context) rules_by_user = await self._get_rules_for_event(event, context) diff --git a/synapse/server.py b/synapse/server.py index d49c76518a8d..7daa7b9334c8 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -119,7 +119,7 @@ from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.module_api import ModuleApi from synapse.notifier import Notifier -from synapse.push.action_generator import ActionGenerator +from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator from synapse.push.pusherpool import PusherPool from synapse.replication.tcp.client import ReplicationDataHandler from synapse.replication.tcp.external_cache import ExternalCache @@ -644,8 +644,8 @@ def get_replication_command_handler(self) -> ReplicationCommandHandler: return ReplicationCommandHandler(self) @cache_in_self - def get_action_generator(self) -> ActionGenerator: - return ActionGenerator(self) + def get_bulk_push_rule_evaluator(self) -> BulkPushRuleEvaluator: + return BulkPushRuleEvaluator(self) @cache_in_self def get_user_directory_handler(self) -> UserDirectoryHandler: From dffecade7df8a88caced2a7707c51e2de3407c0d Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 11 May 2022 12:24:48 +0100 Subject: [PATCH 013/181] Respect the `@cancellable` flag for `DirectServe{Html,Json}Resource`s (#12698) `DirectServeHtmlResource` and `DirectServeJsonResource` both inherit from `_AsyncResource`. These classes expect to be subclassed with `_async_render_*` methods. This commit has no effect on `JsonResource`, despite inheriting from `_AsyncResource`. `JsonResource` has its own `_async_render` override which will need to be updated separately. Signed-off-by: Sean Quah --- changelog.d/12698.misc | 1 + synapse/http/server.py | 2 + tests/test_server.py | 111 ++++++++++++++++++++++++++++++++++++++++- 3 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12698.misc diff --git a/changelog.d/12698.misc b/changelog.d/12698.misc new file mode 100644 index 000000000000..5d626352f9c2 --- /dev/null +++ b/changelog.d/12698.misc @@ -0,0 +1 @@ +Respect the `@cancellable` flag for `DirectServe{Html,Json}Resource`s. diff --git a/synapse/http/server.py b/synapse/http/server.py index 4b4debc5cd2b..f6d4d8db86fa 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -382,6 +382,8 @@ async def _async_render(self, request: SynapseRequest) -> Optional[Tuple[int, An method_handler = getattr(self, "_async_render_%s" % (request_method,), None) if method_handler: + request.is_render_cancellable = is_method_cancellable(method_handler) + raw_callback_return = method_handler(request) # Is it synchronous? We'll allow this for now. diff --git a/tests/test_server.py b/tests/test_server.py index f2ffbc895b88..0f1eb43cbced 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -13,18 +13,28 @@ # limitations under the License. import re +from http import HTTPStatus +from typing import Tuple from twisted.internet.defer import Deferred from twisted.web.resource import Resource from synapse.api.errors import Codes, RedirectException, SynapseError from synapse.config.server import parse_listener_def -from synapse.http.server import DirectServeHtmlResource, JsonResource, OptionsResource -from synapse.http.site import SynapseSite +from synapse.http.server import ( + DirectServeHtmlResource, + DirectServeJsonResource, + JsonResource, + OptionsResource, + cancellable, +) +from synapse.http.site import SynapseRequest, SynapseSite from synapse.logging.context import make_deferred_yieldable +from synapse.types import JsonDict from synapse.util import Clock from tests import unittest +from tests.http.server._base import EndpointCancellationTestHelperMixin from tests.server import ( FakeSite, ThreadedMemoryReactorClock, @@ -363,3 +373,100 @@ async def callback(request): self.assertEqual(channel.result["code"], b"200") self.assertNotIn("body", channel.result) + + +class CancellableDirectServeJsonResource(DirectServeJsonResource): + def __init__(self, clock: Clock): + super().__init__() + self.clock = clock + + @cancellable + async def _async_render_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, {"result": True} + + async def _async_render_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, {"result": True} + + +class CancellableDirectServeHtmlResource(DirectServeHtmlResource): + ERROR_TEMPLATE = "{code} {msg}" + + def __init__(self, clock: Clock): + super().__init__() + self.clock = clock + + @cancellable + async def _async_render_GET(self, request: SynapseRequest) -> Tuple[int, bytes]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, b"ok" + + async def _async_render_POST(self, request: SynapseRequest) -> Tuple[int, bytes]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, b"ok" + + +class DirectServeJsonResourceCancellationTests(EndpointCancellationTestHelperMixin): + """Tests for `DirectServeJsonResource` cancellation.""" + + def setUp(self): + self.reactor = ThreadedMemoryReactorClock() + self.clock = Clock(self.reactor) + self.resource = CancellableDirectServeJsonResource(self.clock) + self.site = FakeSite(self.resource, self.reactor) + + def test_cancellable_disconnect(self) -> None: + """Test that handlers with the `@cancellable` flag can be cancelled.""" + channel = make_request( + self.reactor, self.site, "GET", "/sleep", await_result=False + ) + self._test_disconnect( + self.reactor, + channel, + expect_cancellation=True, + expected_body={"error": "Request cancelled", "errcode": Codes.UNKNOWN}, + ) + + def test_uncancellable_disconnect(self) -> None: + """Test that handlers without the `@cancellable` flag cannot be cancelled.""" + channel = make_request( + self.reactor, self.site, "POST", "/sleep", await_result=False + ) + self._test_disconnect( + self.reactor, + channel, + expect_cancellation=False, + expected_body={"result": True}, + ) + + +class DirectServeHtmlResourceCancellationTests(EndpointCancellationTestHelperMixin): + """Tests for `DirectServeHtmlResource` cancellation.""" + + def setUp(self): + self.reactor = ThreadedMemoryReactorClock() + self.clock = Clock(self.reactor) + self.resource = CancellableDirectServeHtmlResource(self.clock) + self.site = FakeSite(self.resource, self.reactor) + + def test_cancellable_disconnect(self) -> None: + """Test that handlers with the `@cancellable` flag can be cancelled.""" + channel = make_request( + self.reactor, self.site, "GET", "/sleep", await_result=False + ) + self._test_disconnect( + self.reactor, + channel, + expect_cancellation=True, + expected_body=b"499 Request cancelled", + ) + + def test_uncancellable_disconnect(self) -> None: + """Test that handlers without the `@cancellable` flag cannot be cancelled.""" + channel = make_request( + self.reactor, self.site, "POST", "/sleep", await_result=False + ) + self._test_disconnect( + self.reactor, channel, expect_cancellation=False, expected_body=b"ok" + ) From 9d8e380d2e8267129de921b9b926257c36417cd2 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 11 May 2022 12:25:13 +0100 Subject: [PATCH 014/181] Respect the `@cancellable` flag for `RestServlet`s and `BaseFederationServlet`s (#12699) Both `RestServlet`s and `BaseFederationServlet`s register their handlers with `HttpServer.register_paths` / `JsonResource.register_paths`. Update `JsonResource` to respect the `@cancellable` flag on handlers registered in this way. Although `ReplicationEndpoint` also registers itself using `register_paths`, it does not pass the handler method that would have the `@cancellable` flag directly, and so needs separate handling. Signed-off-by: Sean Quah --- changelog.d/12699.misc | 1 + synapse/http/server.py | 5 + tests/federation/transport/server/__init__.py | 13 ++ .../federation/transport/server/test__base.py | 112 ++++++++++++++++++ tests/http/test_servlet.py | 60 +++++++++- tests/unittest.py | 2 +- 6 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12699.misc create mode 100644 tests/federation/transport/server/__init__.py create mode 100644 tests/federation/transport/server/test__base.py diff --git a/changelog.d/12699.misc b/changelog.d/12699.misc new file mode 100644 index 000000000000..d278a956c7a9 --- /dev/null +++ b/changelog.d/12699.misc @@ -0,0 +1 @@ +Respect the `@cancellable` flag for `RestServlet`s and `BaseFederationServlet`s. diff --git a/synapse/http/server.py b/synapse/http/server.py index f6d4d8db86fa..756c6e1aeeda 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -314,6 +314,9 @@ def register_paths( If the regex contains groups these gets passed to the callback via an unpacked tuple. + The callback may be marked with the `@cancellable` decorator, which will + cause request processing to be cancelled when clients disconnect early. + Args: method: The HTTP method to listen to. path_patterns: The regex used to match requests. @@ -544,6 +547,8 @@ def _get_handler_for_request( async def _async_render(self, request: SynapseRequest) -> Tuple[int, Any]: callback, servlet_classname, group_dict = self._get_handler_for_request(request) + request.is_render_cancellable = is_method_cancellable(callback) + # Make sure we have an appropriate name for this handler in prometheus # (rather than the default of JsonResource). request.request_metrics.name = servlet_classname diff --git a/tests/federation/transport/server/__init__.py b/tests/federation/transport/server/__init__.py new file mode 100644 index 000000000000..3a5f22c02235 --- /dev/null +++ b/tests/federation/transport/server/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/federation/transport/server/test__base.py b/tests/federation/transport/server/test__base.py new file mode 100644 index 000000000000..98a951f03e07 --- /dev/null +++ b/tests/federation/transport/server/test__base.py @@ -0,0 +1,112 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from http import HTTPStatus +from typing import Dict, List, Tuple + +from synapse.api.errors import Codes +from synapse.federation.transport.server import BaseFederationServlet +from synapse.federation.transport.server._base import Authenticator +from synapse.http.server import JsonResource, cancellable +from synapse.server import HomeServer +from synapse.types import JsonDict +from synapse.util.ratelimitutils import FederationRateLimiter + +from tests import unittest +from tests.http.server._base import EndpointCancellationTestHelperMixin + + +class CancellableFederationServlet(BaseFederationServlet): + PATH = "/sleep" + + def __init__( + self, + hs: HomeServer, + authenticator: Authenticator, + ratelimiter: FederationRateLimiter, + server_name: str, + ): + super().__init__(hs, authenticator, ratelimiter, server_name) + self.clock = hs.get_clock() + + @cancellable + async def on_GET( + self, origin: str, content: None, query: Dict[bytes, List[bytes]] + ) -> Tuple[int, JsonDict]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, {"result": True} + + async def on_POST( + self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] + ) -> Tuple[int, JsonDict]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, {"result": True} + + +class BaseFederationServletCancellationTests( + unittest.FederatingHomeserverTestCase, EndpointCancellationTestHelperMixin +): + """Tests for `BaseFederationServlet` cancellation.""" + + path = f"{CancellableFederationServlet.PREFIX}{CancellableFederationServlet.PATH}" + + def create_test_resource(self): + """Overrides `HomeserverTestCase.create_test_resource`.""" + resource = JsonResource(self.hs) + + CancellableFederationServlet( + hs=self.hs, + authenticator=Authenticator(self.hs), + ratelimiter=self.hs.get_federation_ratelimiter(), + server_name=self.hs.hostname, + ).register(resource) + + return resource + + def test_cancellable_disconnect(self) -> None: + """Test that handlers with the `@cancellable` flag can be cancelled.""" + channel = self.make_signed_federation_request( + "GET", self.path, await_result=False + ) + + # Advance past all the rate limiting logic. If we disconnect too early, the + # request won't be processed. + self.pump() + + self._test_disconnect( + self.reactor, + channel, + expect_cancellation=True, + expected_body={"error": "Request cancelled", "errcode": Codes.UNKNOWN}, + ) + + def test_uncancellable_disconnect(self) -> None: + """Test that handlers without the `@cancellable` flag cannot be cancelled.""" + channel = self.make_signed_federation_request( + "POST", + self.path, + content={}, + await_result=False, + ) + + # Advance past all the rate limiting logic. If we disconnect too early, the + # request won't be processed. + self.pump() + + self._test_disconnect( + self.reactor, + channel, + expect_cancellation=False, + expected_body={"result": True}, + ) diff --git a/tests/http/test_servlet.py b/tests/http/test_servlet.py index a80bfb9f4eb5..ad521525cfaa 100644 --- a/tests/http/test_servlet.py +++ b/tests/http/test_servlet.py @@ -12,16 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. import json +from http import HTTPStatus from io import BytesIO +from typing import Tuple from unittest.mock import Mock -from synapse.api.errors import SynapseError +from synapse.api.errors import Codes, SynapseError +from synapse.http.server import cancellable from synapse.http.servlet import ( + RestServlet, parse_json_object_from_request, parse_json_value_from_request, ) +from synapse.http.site import SynapseRequest +from synapse.rest.client._base import client_patterns +from synapse.server import HomeServer +from synapse.types import JsonDict from tests import unittest +from tests.http.server._base import EndpointCancellationTestHelperMixin def make_request(content): @@ -76,3 +85,52 @@ def test_parse_json_object(self): # Test not an object with self.assertRaises(SynapseError): parse_json_object_from_request(make_request(b'["foo"]')) + + +class CancellableRestServlet(RestServlet): + """A `RestServlet` with a mix of cancellable and uncancellable handlers.""" + + PATTERNS = client_patterns("/sleep$") + + def __init__(self, hs: HomeServer): + super().__init__() + self.clock = hs.get_clock() + + @cancellable + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, {"result": True} + + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, {"result": True} + + +class TestRestServletCancellation( + unittest.HomeserverTestCase, EndpointCancellationTestHelperMixin +): + """Tests for `RestServlet` cancellation.""" + + servlets = [ + lambda hs, http_server: CancellableRestServlet(hs).register(http_server) + ] + + def test_cancellable_disconnect(self) -> None: + """Test that handlers with the `@cancellable` flag can be cancelled.""" + channel = self.make_request("GET", "/sleep", await_result=False) + self._test_disconnect( + self.reactor, + channel, + expect_cancellation=True, + expected_body={"error": "Request cancelled", "errcode": Codes.UNKNOWN}, + ) + + def test_uncancellable_disconnect(self) -> None: + """Test that handlers without the `@cancellable` flag cannot be cancelled.""" + channel = self.make_request("POST", "/sleep", await_result=False) + self._test_disconnect( + self.reactor, + channel, + expect_cancellation=False, + expected_body={"result": True}, + ) diff --git a/tests/unittest.py b/tests/unittest.py index 9afa68c164ad..e7f255b4fa09 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -831,7 +831,7 @@ def make_signed_federation_request( self.site, method=method, path=path, - content=content or "", + content=content if content is not None else "", shorthand=False, await_result=await_result, custom_headers=custom_headers, From a559c8b0d939670b9d58dbeda6f3b1dd2f21937b Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 11 May 2022 12:25:39 +0100 Subject: [PATCH 015/181] Respect the `@cancellable` flag for `ReplicationEndpoint`s (#12700) While `ReplicationEndpoint`s register themselves via `JsonResource`, they pass a method that calls the handler, instead of the handler itself, to `register_paths`. As a result, `JsonResource` will not correctly pick up the `@cancellable` flag and we have to apply it ourselves. Signed-off-by: Sean Quah --- changelog.d/12700.misc | 1 + synapse/replication/http/_base.py | 21 +++++- tests/replication/http/__init__.py | 13 ++++ tests/replication/http/test__base.py | 106 +++++++++++++++++++++++++++ 4 files changed, 139 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12700.misc create mode 100644 tests/replication/http/__init__.py create mode 100644 tests/replication/http/test__base.py diff --git a/changelog.d/12700.misc b/changelog.d/12700.misc new file mode 100644 index 000000000000..d93eb5dada74 --- /dev/null +++ b/changelog.d/12700.misc @@ -0,0 +1 @@ +Respect the `@cancellable` flag for `ReplicationEndpoint`s. diff --git a/synapse/replication/http/_base.py b/synapse/replication/http/_base.py index 2bd244ed79df..a4ae4040c353 100644 --- a/synapse/replication/http/_base.py +++ b/synapse/replication/http/_base.py @@ -26,7 +26,8 @@ from synapse.api.errors import HttpResponseException, SynapseError from synapse.http import RequestTimedOutError -from synapse.http.server import HttpServer +from synapse.http.server import HttpServer, is_method_cancellable +from synapse.http.site import SynapseRequest from synapse.logging import opentracing from synapse.logging.opentracing import trace from synapse.types import JsonDict @@ -310,6 +311,12 @@ def register(self, http_server: HttpServer) -> None: url_args = list(self.PATH_ARGS) method = self.METHOD + if self.CACHE and is_method_cancellable(self._handle_request): + raise Exception( + f"{self.__class__.__name__} has been marked as cancellable, but CACHE " + "is set. The cancellable flag would have no effect." + ) + if self.CACHE: url_args.append("txn_id") @@ -324,7 +331,7 @@ def register(self, http_server: HttpServer) -> None: ) async def _check_auth_and_handle( - self, request: Request, **kwargs: Any + self, request: SynapseRequest, **kwargs: Any ) -> Tuple[int, JsonDict]: """Called on new incoming requests when caching is enabled. Checks if there is a cached response for the request and returns that, @@ -340,8 +347,18 @@ async def _check_auth_and_handle( if self.CACHE: txn_id = kwargs.pop("txn_id") + # We ignore the `@cancellable` flag, since cancellation wouldn't interupt + # `_handle_request` and `ResponseCache` does not handle cancellation + # correctly yet. In particular, there may be issues to do with logging + # context lifetimes. + return await self.response_cache.wrap( txn_id, self._handle_request, request, **kwargs ) + # The `@cancellable` decorator may be applied to `_handle_request`. But we + # told `HttpServer.register_paths` that our handler is `_check_auth_and_handle`, + # so we have to set up the cancellable flag ourselves. + request.is_render_cancellable = is_method_cancellable(self._handle_request) + return await self._handle_request(request, **kwargs) diff --git a/tests/replication/http/__init__.py b/tests/replication/http/__init__.py new file mode 100644 index 000000000000..3a5f22c02235 --- /dev/null +++ b/tests/replication/http/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/replication/http/test__base.py b/tests/replication/http/test__base.py new file mode 100644 index 000000000000..a5ab093a2722 --- /dev/null +++ b/tests/replication/http/test__base.py @@ -0,0 +1,106 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from http import HTTPStatus +from typing import Tuple + +from twisted.web.server import Request + +from synapse.api.errors import Codes +from synapse.http.server import JsonResource, cancellable +from synapse.replication.http import REPLICATION_PREFIX +from synapse.replication.http._base import ReplicationEndpoint +from synapse.server import HomeServer +from synapse.types import JsonDict + +from tests import unittest +from tests.http.server._base import EndpointCancellationTestHelperMixin + + +class CancellableReplicationEndpoint(ReplicationEndpoint): + NAME = "cancellable_sleep" + PATH_ARGS = () + CACHE = False + + def __init__(self, hs: HomeServer): + super().__init__(hs) + self.clock = hs.get_clock() + + @staticmethod + async def _serialize_payload() -> JsonDict: + return {} + + @cancellable + async def _handle_request( # type: ignore[override] + self, request: Request + ) -> Tuple[int, JsonDict]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, {"result": True} + + +class UncancellableReplicationEndpoint(ReplicationEndpoint): + NAME = "uncancellable_sleep" + PATH_ARGS = () + CACHE = False + + def __init__(self, hs: HomeServer): + super().__init__(hs) + self.clock = hs.get_clock() + + @staticmethod + async def _serialize_payload() -> JsonDict: + return {} + + async def _handle_request( # type: ignore[override] + self, request: Request + ) -> Tuple[int, JsonDict]: + await self.clock.sleep(1.0) + return HTTPStatus.OK, {"result": True} + + +class ReplicationEndpointCancellationTestCase( + unittest.HomeserverTestCase, EndpointCancellationTestHelperMixin +): + """Tests for `ReplicationEndpoint` cancellation.""" + + def create_test_resource(self): + """Overrides `HomeserverTestCase.create_test_resource`.""" + resource = JsonResource(self.hs) + + CancellableReplicationEndpoint(self.hs).register(resource) + UncancellableReplicationEndpoint(self.hs).register(resource) + + return resource + + def test_cancellable_disconnect(self) -> None: + """Test that handlers with the `@cancellable` flag can be cancelled.""" + path = f"{REPLICATION_PREFIX}/{CancellableReplicationEndpoint.NAME}/" + channel = self.make_request("POST", path, await_result=False) + self._test_disconnect( + self.reactor, + channel, + expect_cancellation=True, + expected_body={"error": "Request cancelled", "errcode": Codes.UNKNOWN}, + ) + + def test_uncancellable_disconnect(self) -> None: + """Test that handlers without the `@cancellable` flag cannot be cancelled.""" + path = f"{REPLICATION_PREFIX}/{UncancellableReplicationEndpoint.NAME}/" + channel = self.make_request("POST", path, await_result=False) + self._test_disconnect( + self.reactor, + channel, + expect_cancellation=False, + expected_body={"result": True}, + ) From d38d242411b8910dfacde1e61fd3a0ec5cbcaa66 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 11 May 2022 14:43:22 +0100 Subject: [PATCH 016/181] Reload cache factors from disk on SIGHUP (#12673) --- changelog.d/12673.feature | 1 + docs/sample_config.yaml | 6 ++ .../configuration/config_documentation.md | 17 ++++ synapse/app/_base.py | 44 ++++++++++ synapse/app/homeserver.py | 36 +-------- synapse/config/_base.py | 81 +++++++++++++++++-- synapse/config/_base.pyi | 15 +++- synapse/config/cache.py | 49 ++++++----- synapse/http/client.py | 2 +- tests/config/test_cache.py | 8 ++ tests/server.py | 1 + 11 files changed, 199 insertions(+), 61 deletions(-) create mode 100644 changelog.d/12673.feature diff --git a/changelog.d/12673.feature b/changelog.d/12673.feature new file mode 100644 index 000000000000..f2bddd6e1c27 --- /dev/null +++ b/changelog.d/12673.feature @@ -0,0 +1 @@ +Synapse will now reload [cache config](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#caching) when it receives a [SIGHUP](https://en.wikipedia.org/wiki/SIGHUP) signal. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index a803b8261dcd..e7b57f5a0bdf 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -730,6 +730,12 @@ retention: # A cache 'factor' is a multiplier that can be applied to each of # Synapse's caches in order to increase or decrease the maximum # number of entries that can be stored. +# +# The configuration for cache factors (caches.global_factor and +# caches.per_cache_factors) can be reloaded while the application is running, +# by sending a SIGHUP signal to the Synapse process. Changes to other parts of +# the caching config will NOT be applied after a SIGHUP is received; a restart +# is necessary. # The number of events to cache in memory. Not affected by # caches.global_factor. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 21dad0ac41e2..f292b94fb0cd 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1130,6 +1130,23 @@ caches: expire_caches: false sync_response_cache_duration: 2m ``` + +### Reloading cache factors + +The cache factors (i.e. `caches.global_factor` and `caches.per_cache_factors`) may be reloaded at any time by sending a +[`SIGHUP`](https://en.wikipedia.org/wiki/SIGHUP) signal to Synapse using e.g. + +```commandline +kill -HUP [PID_OF_SYNAPSE_PROCESS] +``` + +If you are running multiple workers, you must individually update the worker +config file and send this signal to each worker process. + +If you're using the [example systemd service](/~https://github.com/matrix-org/synapse/blob/develop/contrib/systemd/matrix-synapse.service) +file in Synapse's `contrib` directory, you can send a `SIGHUP` signal by using +`systemctl reload matrix-synapse`. + --- ## Database ## Config options related to database settings. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 3623c1724ded..a3446ac6e874 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -49,9 +49,12 @@ from twisted.protocols.tls import TLSMemoryBIOFactory from twisted.python.threadpool import ThreadPool +import synapse.util.caches from synapse.api.constants import MAX_PDU_SIZE from synapse.app import check_bind_error from synapse.app.phone_stats_home import start_phone_stats_home +from synapse.config import ConfigError +from synapse.config._base import format_config_error from synapse.config.homeserver import HomeServerConfig from synapse.config.server import ManholeConfig from synapse.crypto import context_factory @@ -432,6 +435,10 @@ def run_sighup(*args: Any, **kwargs: Any) -> None: signal.signal(signal.SIGHUP, run_sighup) register_sighup(refresh_certificate, hs) + register_sighup(reload_cache_config, hs.config) + + # Apply the cache config. + hs.config.caches.resize_all_caches() # Load the certificate from disk. refresh_certificate(hs) @@ -486,6 +493,43 @@ def run_sighup(*args: Any, **kwargs: Any) -> None: atexit.register(gc.freeze) +def reload_cache_config(config: HomeServerConfig) -> None: + """Reload cache config from disk and immediately apply it.resize caches accordingly. + + If the config is invalid, a `ConfigError` is logged and no changes are made. + + Otherwise, this: + - replaces the `caches` section on the given `config` object, + - resizes all caches according to the new cache factors, and + + Note that the following cache config keys are read, but not applied: + - event_cache_size: used to set a max_size and _original_max_size on + EventsWorkerStore._get_event_cache when it is created. We'd have to update + the _original_max_size (and maybe + - sync_response_cache_duration: would have to update the timeout_sec attribute on + HomeServer -> SyncHandler -> ResponseCache. + - track_memory_usage. This affects synapse.util.caches.TRACK_MEMORY_USAGE which + influences Synapse's self-reported metrics. + + Also, the HTTPConnectionPool in SimpleHTTPClient sets its maxPersistentPerHost + parameter based on the global_factor. This won't be applied on a config reload. + """ + try: + previous_cache_config = config.reload_config_section("caches") + except ConfigError as e: + logger.warning("Failed to reload cache config") + for f in format_config_error(e): + logger.warning(f) + else: + logger.debug( + "New cache config. Was:\n %s\nNow:\n", + previous_cache_config.__dict__, + config.caches.__dict__, + ) + synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage + config.caches.resize_all_caches() + + def setup_sentry(hs: "HomeServer") -> None: """Enable sentry integration, if enabled in configuration""" diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 0f75e7b9d491..4c6c0658ab14 100644 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -16,7 +16,7 @@ import logging import os import sys -from typing import Dict, Iterable, Iterator, List +from typing import Dict, Iterable, List from matrix_common.versionstring import get_distribution_version_string @@ -45,7 +45,7 @@ redirect_stdio_to_logs, register_start, ) -from synapse.config._base import ConfigError +from synapse.config._base import ConfigError, format_config_error from synapse.config.emailconfig import ThreepidBehaviour from synapse.config.homeserver import HomeServerConfig from synapse.config.server import ListenerConfig @@ -399,38 +399,6 @@ async def start() -> None: return hs -def format_config_error(e: ConfigError) -> Iterator[str]: - """ - Formats a config error neatly - - The idea is to format the immediate error, plus the "causes" of those errors, - hopefully in a way that makes sense to the user. For example: - - Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template': - Failed to parse config for module 'JinjaOidcMappingProvider': - invalid jinja template: - unexpected end of template, expected 'end of print statement'. - - Args: - e: the error to be formatted - - Returns: An iterator which yields string fragments to be formatted - """ - yield "Error in configuration" - - if e.path: - yield " at '%s'" % (".".join(e.path),) - - yield ":\n %s" % (e.msg,) - - parent_e = e.__cause__ - indent = 1 - while parent_e: - indent += 1 - yield ":\n%s%s" % (" " * indent, str(parent_e)) - parent_e = parent_e.__cause__ - - def run(hs: HomeServer) -> None: _base.start_reactor( "synapse-homeserver", diff --git a/synapse/config/_base.py b/synapse/config/_base.py index 179aa7ff887e..42364fc133f1 100644 --- a/synapse/config/_base.py +++ b/synapse/config/_base.py @@ -16,14 +16,18 @@ import argparse import errno +import logging import os from collections import OrderedDict from hashlib import sha256 from textwrap import dedent from typing import ( Any, + ClassVar, + Collection, Dict, Iterable, + Iterator, List, MutableMapping, Optional, @@ -40,6 +44,8 @@ from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter +logger = logging.getLogger(__name__) + class ConfigError(Exception): """Represents a problem parsing the configuration @@ -55,6 +61,38 @@ def __init__(self, msg: str, path: Optional[Iterable[str]] = None): self.path = path +def format_config_error(e: ConfigError) -> Iterator[str]: + """ + Formats a config error neatly + + The idea is to format the immediate error, plus the "causes" of those errors, + hopefully in a way that makes sense to the user. For example: + + Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template': + Failed to parse config for module 'JinjaOidcMappingProvider': + invalid jinja template: + unexpected end of template, expected 'end of print statement'. + + Args: + e: the error to be formatted + + Returns: An iterator which yields string fragments to be formatted + """ + yield "Error in configuration" + + if e.path: + yield " at '%s'" % (".".join(e.path),) + + yield ":\n %s" % (e.msg,) + + parent_e = e.__cause__ + indent = 1 + while parent_e: + indent += 1 + yield ":\n%s%s" % (" " * indent, str(parent_e)) + parent_e = parent_e.__cause__ + + # We split these messages out to allow packages to override with package # specific instructions. MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS = """\ @@ -119,7 +157,7 @@ class Config: defined in subclasses. """ - section: str + section: ClassVar[str] def __init__(self, root_config: "RootConfig" = None): self.root = root_config @@ -309,9 +347,12 @@ class RootConfig: class, lower-cased and with "Config" removed. """ - config_classes = [] + config_classes: List[Type[Config]] = [] + + def __init__(self, config_files: Collection[str] = ()): + # Capture absolute paths here, so we can reload config after we daemonize. + self.config_files = [os.path.abspath(path) for path in config_files] - def __init__(self): for config_class in self.config_classes: if config_class.section is None: raise ValueError("%r requires a section name" % (config_class,)) @@ -512,12 +553,10 @@ def load_config_with_parser( object from parser.parse_args(..)` """ - obj = cls() - config_args = parser.parse_args(argv) config_files = find_config_files(search_paths=config_args.config_path) - + obj = cls(config_files) if not config_files: parser.error("Must supply a config file.") @@ -627,7 +666,7 @@ def load_or_generate_config( generate_missing_configs = config_args.generate_missing_configs - obj = cls() + obj = cls(config_files) if config_args.generate_config: if config_args.report_stats is None: @@ -727,6 +766,34 @@ def generate_missing_files( ) -> None: self.invoke_all("generate_files", config_dict, config_dir_path) + def reload_config_section(self, section_name: str) -> Config: + """Reconstruct the given config section, leaving all others unchanged. + + This works in three steps: + + 1. Create a new instance of the relevant `Config` subclass. + 2. Call `read_config` on that instance to parse the new config. + 3. Replace the existing config instance with the new one. + + :raises ValueError: if the given `section` does not exist. + :raises ConfigError: for any other problems reloading config. + + :returns: the previous config object, which no longer has a reference to this + RootConfig. + """ + existing_config: Optional[Config] = getattr(self, section_name, None) + if existing_config is None: + raise ValueError(f"Unknown config section '{section_name}'") + logger.info("Reloading config section '%s'", section_name) + + new_config_data = read_config_files(self.config_files) + new_config = type(existing_config)(self) + new_config.read_config(new_config_data) + setattr(self, section_name, new_config) + + existing_config.root = None + return existing_config + def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]: """Read the config files into a dict diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index bd092f956dde..71d6655fda4e 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -1,15 +1,19 @@ import argparse from typing import ( Any, + Collection, Dict, Iterable, + Iterator, List, + Literal, MutableMapping, Optional, Tuple, Type, TypeVar, Union, + overload, ) import jinja2 @@ -64,6 +68,8 @@ class ConfigError(Exception): self.msg = msg self.path = path +def format_config_error(e: ConfigError) -> Iterator[str]: ... + MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str MISSING_REPORT_STATS_SPIEL: str MISSING_SERVER_NAME: str @@ -117,7 +123,8 @@ class RootConfig: background_updates: background_updates.BackgroundUpdateConfig config_classes: List[Type["Config"]] = ... - def __init__(self) -> None: ... + config_files: List[str] + def __init__(self, config_files: Collection[str] = ...) -> None: ... def invoke_all( self, func_name: str, *args: Any, **kwargs: Any ) -> MutableMapping[str, Any]: ... @@ -157,6 +164,12 @@ class RootConfig: def generate_missing_files( self, config_dict: dict, config_dir_path: str ) -> None: ... + @overload + def reload_config_section( + self, section_name: Literal["caches"] + ) -> cache.CacheConfig: ... + @overload + def reload_config_section(self, section_name: str) -> Config: ... class Config: root: RootConfig diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 94d852f413d9..58b2fe55193c 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -69,11 +69,11 @@ def _canonicalise_cache_name(cache_name: str) -> str: def add_resizable_cache( cache_name: str, cache_resize_callback: Callable[[float], None] ) -> None: - """Register a cache that's size can dynamically change + """Register a cache whose size can dynamically change Args: cache_name: A reference to the cache - cache_resize_callback: A callback function that will be ran whenever + cache_resize_callback: A callback function that will run whenever the cache needs to be resized """ # Some caches have '*' in them which we strip out. @@ -96,6 +96,13 @@ class CacheConfig(Config): section = "caches" _environ = os.environ + event_cache_size: int + cache_factors: Dict[str, float] + global_factor: float + track_memory_usage: bool + expiry_time_msec: Optional[int] + sync_response_cache_duration: int + @staticmethod def reset() -> None: """Resets the caches to their defaults. Used for tests.""" @@ -115,6 +122,12 @@ def generate_config_section(self, **kwargs: Any) -> str: # A cache 'factor' is a multiplier that can be applied to each of # Synapse's caches in order to increase or decrease the maximum # number of entries that can be stored. + # + # The configuration for cache factors (caches.global_factor and + # caches.per_cache_factors) can be reloaded while the application is running, + # by sending a SIGHUP signal to the Synapse process. Changes to other parts of + # the caching config will NOT be applied after a SIGHUP is received; a restart + # is necessary. # The number of events to cache in memory. Not affected by # caches.global_factor. @@ -174,21 +187,21 @@ def generate_config_section(self, **kwargs: Any) -> str: """ def read_config(self, config: JsonDict, **kwargs: Any) -> None: + """Populate this config object with values from `config`. + + This method does NOT resize existing or future caches: use `resize_all_caches`. + We use two separate methods so that we can reject bad config before applying it. + """ self.event_cache_size = self.parse_size( config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE) ) - self.cache_factors: Dict[str, float] = {} + self.cache_factors = {} cache_config = config.get("caches") or {} - self.global_factor = cache_config.get( - "global_factor", properties.default_factor_size - ) + self.global_factor = cache_config.get("global_factor", _DEFAULT_FACTOR_SIZE) if not isinstance(self.global_factor, (int, float)): raise ConfigError("caches.global_factor must be a number.") - # Set the global one so that it's reflected in new caches - properties.default_factor_size = self.global_factor - # Load cache factors from the config individual_factors = cache_config.get("per_cache_factors") or {} if not isinstance(individual_factors, dict): @@ -230,7 +243,7 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m") if expire_caches: - self.expiry_time_msec: Optional[int] = self.parse_duration(cache_entry_ttl) + self.expiry_time_msec = self.parse_duration(cache_entry_ttl) else: self.expiry_time_msec = None @@ -254,19 +267,19 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: cache_config.get("sync_response_cache_duration", 0) ) - # Resize all caches (if necessary) with the new factors we've loaded - self.resize_all_caches() - - # Store this function so that it can be called from other classes without - # needing an instance of Config - properties.resize_all_caches_func = self.resize_all_caches - def resize_all_caches(self) -> None: - """Ensure all cache sizes are up to date + """Ensure all cache sizes are up-to-date. For each cache, run the mapped callback function with either a specific cache factor or the default, global one. """ + # Set the global factor size, so that new caches are appropriately sized. + properties.default_factor_size = self.global_factor + + # Store this function so that it can be called from other classes without + # needing an instance of CacheConfig + properties.resize_all_caches_func = self.resize_all_caches + # block other threads from modifying _CACHES while we iterate it. with _CACHES_LOCK: for cache_name, callback in _CACHES.items(): diff --git a/synapse/http/client.py b/synapse/http/client.py index 8310fb466ac5..b2c9a7c67090 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -348,7 +348,7 @@ def __init__( # XXX: The justification for using the cache factor here is that larger instances # will need both more cache and more connections. # Still, this should probably be a separate dial - pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5)) + pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5) pool.cachedConnectionTimeout = 2 * 60 self.agent: IAgent = ProxyAgent( diff --git a/tests/config/test_cache.py b/tests/config/test_cache.py index 4bb82e810e0c..d2b3c299e354 100644 --- a/tests/config/test_cache.py +++ b/tests/config/test_cache.py @@ -38,6 +38,7 @@ def test_individual_caches_from_environ(self): "SYNAPSE_NOT_CACHE": "BLAH", } self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() self.assertEqual(dict(self.config.cache_factors), {"something_or_other": 2.0}) @@ -52,6 +53,7 @@ def test_config_overrides_environ(self): "SYNAPSE_CACHE_FACTOR_FOO": 1, } self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() self.assertEqual( dict(self.config.cache_factors), @@ -71,6 +73,7 @@ def test_individual_instantiated_before_config_load(self): config = {"caches": {"per_cache_factors": {"foo": 3}}} self.config.read_config(config) + self.config.resize_all_caches() self.assertEqual(cache.max_size, 300) @@ -82,6 +85,7 @@ def test_individual_instantiated_after_config_load(self): """ config = {"caches": {"per_cache_factors": {"foo": 2}}} self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() cache = LruCache(100) add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) @@ -99,6 +103,7 @@ def test_global_instantiated_before_config_load(self): config = {"caches": {"global_factor": 4}} self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() self.assertEqual(cache.max_size, 400) @@ -110,6 +115,7 @@ def test_global_instantiated_after_config_load(self): """ config = {"caches": {"global_factor": 1.5}} self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() cache = LruCache(100) add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor) @@ -128,6 +134,7 @@ def test_cache_with_asterisk_in_name(self): "SYNAPSE_CACHE_FACTOR_CACHE_B": 3, } self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() cache_a = LruCache(100) add_resizable_cache("*cache_a*", cache_resize_callback=cache_a.set_cache_factor) @@ -148,6 +155,7 @@ def test_apply_cache_factor_from_config(self): config = {"caches": {"event_cache_size": "10k"}} self.config.read_config(config, config_dir_path="", data_dir_path="") + self.config.resize_all_caches() cache = LruCache( max_size=self.config.event_cache_size, diff --git a/tests/server.py b/tests/server.py index aaefcfc46cd9..b9f465971fdd 100644 --- a/tests/server.py +++ b/tests/server.py @@ -749,6 +749,7 @@ def setup_test_homeserver( if config is None: config = default_config(name, parse=True) + config.caches.resize_all_caches() config.ldap_enabled = False if "clock" not in kwargs: From 6ee61b905256f87dc2b75007ed711cd59065db9a Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 11 May 2022 14:52:26 +0100 Subject: [PATCH 017/181] Complain if a federation endpoint has the `@cancellable` flag (#12705) `BaseFederationServlet` wraps its endpoints in a bunch of async code that has not been vetted for compatibility with cancellation. Fail CI if a `@cancellable` flag is applied to a federation endpoint. Signed-off-by: Sean Quah --- changelog.d/12705.misc | 1 + synapse/federation/transport/server/_base.py | 13 ++++++++++++- tests/federation/transport/server/test__base.py | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12705.misc diff --git a/changelog.d/12705.misc b/changelog.d/12705.misc new file mode 100644 index 000000000000..a913d8bb85eb --- /dev/null +++ b/changelog.d/12705.misc @@ -0,0 +1 @@ +Complain if a federation endpoint has the `@cancellable` flag, since some of the wrapper code may not handle cancellation correctly yet. diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py index d629a3ecb5dd..103861644a70 100644 --- a/synapse/federation/transport/server/_base.py +++ b/synapse/federation/transport/server/_base.py @@ -21,7 +21,7 @@ from synapse.api.errors import Codes, FederationDeniedError, SynapseError from synapse.api.urls import FEDERATION_V1_PREFIX -from synapse.http.server import HttpServer, ServletCallback +from synapse.http.server import HttpServer, ServletCallback, is_method_cancellable from synapse.http.servlet import parse_json_object_from_request from synapse.http.site import SynapseRequest from synapse.logging.context import run_in_background @@ -373,6 +373,17 @@ def register(self, server: HttpServer) -> None: if code is None: continue + if is_method_cancellable(code): + # The wrapper added by `self._wrap` will inherit the cancellable flag, + # but the wrapper itself does not support cancellation yet. + # Once resolved, the cancellation tests in + # `tests/federation/transport/server/test__base.py` can be re-enabled. + raise Exception( + f"{self.__class__.__name__}.on_{method} has been marked as " + "cancellable, but federation servlets do not support cancellation " + "yet." + ) + server.register_paths( method, (pattern,), diff --git a/tests/federation/transport/server/test__base.py b/tests/federation/transport/server/test__base.py index 98a951f03e07..ac3695a8ccab 100644 --- a/tests/federation/transport/server/test__base.py +++ b/tests/federation/transport/server/test__base.py @@ -59,6 +59,8 @@ class BaseFederationServletCancellationTests( ): """Tests for `BaseFederationServlet` cancellation.""" + skip = "`BaseFederationServlet` does not support cancellation yet." + path = f"{CancellableFederationServlet.PREFIX}{CancellableFederationServlet.PATH}" def create_test_resource(self): From db10f2c037ff59124776a10e198ab432aec2bdc6 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 11 May 2022 16:34:17 +0100 Subject: [PATCH 018/181] No longer permit empty body when sending receipts (#12709) --- changelog.d/12709.removal | 1 + synapse/rest/client/receipts.py | 13 +------------ tests/rest/client/test_sync.py | 30 ++++-------------------------- 3 files changed, 6 insertions(+), 38 deletions(-) create mode 100644 changelog.d/12709.removal diff --git a/changelog.d/12709.removal b/changelog.d/12709.removal new file mode 100644 index 000000000000..6bb03e28941f --- /dev/null +++ b/changelog.d/12709.removal @@ -0,0 +1 @@ +Require a body in POST requests to `/rooms/{roomId}/receipt/{receiptType}/{eventId}`, as required by the [Matrix specification](https://spec.matrix.org/v1.2/client-server-api/#post_matrixclientv3roomsroomidreceiptreceipttypeeventid). This breaks compatibility with Element Android 1.2.0 and earlier: users of those clients will be unable to send read receipts. diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py index f9caab663523..4b03eb876b75 100644 --- a/synapse/rest/client/receipts.py +++ b/synapse/rest/client/receipts.py @@ -13,12 +13,10 @@ # limitations under the License. import logging -import re from typing import TYPE_CHECKING, Tuple from synapse.api.constants import ReceiptTypes from synapse.api.errors import SynapseError -from synapse.http import get_request_user_agent from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request from synapse.http.site import SynapseRequest @@ -26,8 +24,6 @@ from ._base import client_patterns -pattern = re.compile(r"(?:Element|SchildiChat)/1\.[012]\.") - if TYPE_CHECKING: from synapse.server import HomeServer @@ -69,14 +65,7 @@ async def on_POST( ): raise SynapseError(400, "Receipt type must be 'm.read'") - # Do not allow older SchildiChat and Element Android clients (prior to Element/1.[012].x) to send an empty body. - user_agent = get_request_user_agent(request) - allow_empty_body = False - if "Android" in user_agent: - if pattern.match(user_agent) or "Riot" in user_agent: - allow_empty_body = True - # This call makes sure possible empty body is handled correctly - parse_json_object_from_request(request, allow_empty_body) + parse_json_object_from_request(request, allow_empty_body=False) await self.presence_handler.bump_presence_active_time(requester.user) diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 010833764957..2722bf26e76c 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import json +from http import HTTPStatus from typing import List, Optional from parameterized import parameterized @@ -485,30 +486,7 @@ def test_private_receipt_cannot_override_public(self) -> None: # Test that we didn't override the public read receipt self.assertIsNone(self._get_read_receipt()) - @parameterized.expand( - [ - # Old Element version, expected to send an empty body - ( - "agent1", - "Element/1.2.2 (Linux; U; Android 9; MatrixAndroidSDK_X 0.0.1)", - 200, - ), - # Old SchildiChat version, expected to send an empty body - ("agent2", "SchildiChat/1.2.1 (Android 10)", 200), - # Expected 400: Denies empty body starting at version 1.3+ - ("agent3", "Element/1.3.6 (Android 10)", 400), - ("agent4", "SchildiChat/1.3.6 (Android 11)", 400), - # Contains "Riot": Receipts with empty bodies expected - ("agent5", "Element (Riot.im) (Android 9)", 200), - # Expected 400: Does not contain "Android" - ("agent6", "Element/1.2.1", 400), - # Expected 400: Different format, missing "/" after Element; existing build that should allow empty bodies, but minimal ongoing usage - ("agent7", "Element dbg/1.1.8-dev (Android)", 400), - ] - ) - def test_read_receipt_with_empty_body( - self, name: str, user_agent: str, expected_status_code: int - ) -> None: + def test_read_receipt_with_empty_body_is_rejected(self) -> None: # Send a message as the first user res = self.helper.send(self.room_id, body="hello", tok=self.tok) @@ -517,9 +495,9 @@ def test_read_receipt_with_empty_body( "POST", f"/rooms/{self.room_id}/receipt/m.read/{res['event_id']}", access_token=self.tok2, - custom_headers=[("User-Agent", user_agent)], ) - self.assertEqual(channel.code, expected_status_code) + self.assertEqual(channel.code, HTTPStatus.BAD_REQUEST) + self.assertEqual(channel.json_body["errcode"], "M_NOT_JSON", channel.json_body) def _get_read_receipt(self) -> Optional[JsonDict]: """Syncs and returns the read receipt.""" From bf7ce92bf7307ded3643d8cc5ee01aee21f23f58 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 11 May 2022 17:22:34 +0100 Subject: [PATCH 019/181] Enable cancellation of `GET /members` and `GET /state` requests (#12708) Enable cancellation of `GET /rooms/$room_id/members`, `GET /rooms/$room_id/state` and `GET /rooms/$room_id/state/$state_key/*` requests. Signed-off-by: Sean Quah --- changelog.d/12708.misc | 1 + synapse/http/server.py | 4 +++- synapse/rest/client/room.py | 6 +++++- 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12708.misc diff --git a/changelog.d/12708.misc b/changelog.d/12708.misc new file mode 100644 index 000000000000..aa99e7311b97 --- /dev/null +++ b/changelog.d/12708.misc @@ -0,0 +1 @@ +Enable cancellation of `GET /rooms/$room_id/members`, `GET /rooms/$room_id/state` and `GET /rooms/$room_id/state/$event_type/*` requests. diff --git a/synapse/http/server.py b/synapse/http/server.py index 756c6e1aeeda..e3dcc3f3dd06 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -139,7 +139,9 @@ class SomeServlet(RestServlet): async def on_GET(self, request: SynapseRequest) -> ...: ... """ - if method.__name__ not in _cancellable_method_names: + if method.__name__ not in _cancellable_method_names and not any( + method.__name__.startswith(prefix) for prefix in _cancellable_method_names + ): raise ValueError( "@cancellable decorator can only be applied to servlet methods." ) diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 906fe09e9713..4b8bfbffcb36 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -34,7 +34,7 @@ ) from synapse.api.filtering import Filter from synapse.events.utils import format_event_for_client_v2 -from synapse.http.server import HttpServer +from synapse.http.server import HttpServer, cancellable from synapse.http.servlet import ( ResolveRoomIdMixin, RestServlet, @@ -143,6 +143,7 @@ def register(self, http_server: HttpServer) -> None: self.__class__.__name__, ) + @cancellable def on_GET_no_state_key( self, request: SynapseRequest, room_id: str, event_type: str ) -> Awaitable[Tuple[int, JsonDict]]: @@ -153,6 +154,7 @@ def on_PUT_no_state_key( ) -> Awaitable[Tuple[int, JsonDict]]: return self.on_PUT(request, room_id, event_type, "") + @cancellable async def on_GET( self, request: SynapseRequest, room_id: str, event_type: str, state_key: str ) -> Tuple[int, JsonDict]: @@ -481,6 +483,7 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() self.store = hs.get_datastores().main + @cancellable async def on_GET( self, request: SynapseRequest, room_id: str ) -> Tuple[int, JsonDict]: @@ -602,6 +605,7 @@ def __init__(self, hs: "HomeServer"): self.message_handler = hs.get_message_handler() self.auth = hs.get_auth() + @cancellable async def on_GET( self, request: SynapseRequest, room_id: str ) -> Tuple[int, List[JsonDict]]: From 409573f6d0f146db2a55914cb7b65a0a95f6fde5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 12 May 2022 09:29:37 +0100 Subject: [PATCH 020/181] Fix reference to the wrong symbol in the media admin api docs (#12715) --- changelog.d/12715.doc | 1 + docs/admin_api/media_admin_api.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12715.doc diff --git a/changelog.d/12715.doc b/changelog.d/12715.doc new file mode 100644 index 000000000000..150d78c3f634 --- /dev/null +++ b/changelog.d/12715.doc @@ -0,0 +1 @@ +Fix a typo in the Media Admin API documentation. diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index 96b3668f2a08..d57c5aedae4c 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -289,7 +289,7 @@ POST /_synapse/admin/v1/purge_media_cache?before_ts= URL Parameters -* `unix_timestamp_in_ms`: string representing a positive integer - Unix timestamp in milliseconds. +* `before_ts`: string representing a positive integer - Unix timestamp in milliseconds. All cached media that was last accessed before this timestamp will be removed. Response: From de1e599b9defdc9b541f14a03157f614cb688729 Mon Sep 17 00:00:00 2001 From: Andy Balaam Date: Thu, 12 May 2022 11:41:35 +0100 Subject: [PATCH 021/181] add default_power_level_content_override config option. (#12618) Co-authored-by: Matthew Hodgson --- changelog.d/12618.feature | 1 + docs/sample_config.yaml | 34 +++ .../configuration/config_documentation.md | 26 ++ synapse/config/room.py | 47 ++++ synapse/handlers/room.py | 16 +- tests/rest/client/test_rooms.py | 258 ++++++++++++++++++ 6 files changed, 381 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12618.feature diff --git a/changelog.d/12618.feature b/changelog.d/12618.feature new file mode 100644 index 000000000000..37fa03b3cb41 --- /dev/null +++ b/changelog.d/12618.feature @@ -0,0 +1 @@ +Add a `default_power_level_content_override` config option to set default room power levels per room preset. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index e7b57f5a0bdf..03a0f6314cdd 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2468,6 +2468,40 @@ push: # #encryption_enabled_by_default_for_room_type: invite +# Override the default power levels for rooms created on this server, per +# room creation preset. +# +# The appropriate dictionary for the room preset will be applied on top +# of the existing power levels content. +# +# Useful if you know that your users need special permissions in rooms +# that they create (e.g. to send particular types of state events without +# needing an elevated power level). This takes the same shape as the +# `power_level_content_override` parameter in the /createRoom API, but +# is applied before that parameter. +# +# Valid keys are some or all of `private_chat`, `trusted_private_chat` +# and `public_chat`. Inside each of those should be any of the +# properties allowed in `power_level_content_override` in the +# /createRoom API. If any property is missing, its default value will +# continue to be used. If any property is present, it will overwrite +# the existing default completely (so if the `events` property exists, +# the default event power levels will be ignored). +# +#default_power_level_content_override: +# private_chat: +# "events": +# "com.example.myeventtype" : 0 +# "m.room.avatar": 50 +# "m.room.canonical_alias": 50 +# "m.room.encryption": 100 +# "m.room.history_visibility": 100 +# "m.room.name": 50 +# "m.room.power_levels": 100 +# "m.room.server_acl": 100 +# "m.room.tombstone": 100 +# "events_default": 1 + # Uncomment to allow non-server-admin users to create groups on this server # diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index f292b94fb0cd..2af1f284b14e 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -3315,6 +3315,32 @@ room_list_publication_rules: room_id: "*" action: allow ``` + +--- +Config option: `default_power_level_content_override` + +The `default_power_level_content_override` option controls the default power +levels for rooms. + +Useful if you know that your users need special permissions in rooms +that they create (e.g. to send particular types of state events without +needing an elevated power level). This takes the same shape as the +`power_level_content_override` parameter in the /createRoom API, but +is applied before that parameter. + +Note that each key provided inside a preset (for example `events` in the example +below) will overwrite all existing defaults inside that key. So in the example +below, newly-created private_chat rooms will have no rules for any event types +except `com.example.foo`. + +Example configuration: +```yaml +default_power_level_content_override: + private_chat: { "events": { "com.example.foo" : 0 } } + trusted_private_chat: null + public_chat: null +``` + --- ## Opentracing ## Configuration options related to Opentracing support. diff --git a/synapse/config/room.py b/synapse/config/room.py index e18a87ea37f6..462d85ac1d1e 100644 --- a/synapse/config/room.py +++ b/synapse/config/room.py @@ -63,6 +63,19 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: "Invalid value for encryption_enabled_by_default_for_room_type" ) + self.default_power_level_content_override = config.get( + "default_power_level_content_override", + None, + ) + if self.default_power_level_content_override is not None: + for preset in self.default_power_level_content_override: + if preset not in vars(RoomCreationPreset).values(): + raise ConfigError( + "Unrecognised room preset %s in default_power_level_content_override" + % preset + ) + # We validate the actual overrides when we try to apply them. + def generate_config_section(self, **kwargs: Any) -> str: return """\ ## Rooms ## @@ -83,4 +96,38 @@ def generate_config_section(self, **kwargs: Any) -> str: # will also not affect rooms created by other servers. # #encryption_enabled_by_default_for_room_type: invite + + # Override the default power levels for rooms created on this server, per + # room creation preset. + # + # The appropriate dictionary for the room preset will be applied on top + # of the existing power levels content. + # + # Useful if you know that your users need special permissions in rooms + # that they create (e.g. to send particular types of state events without + # needing an elevated power level). This takes the same shape as the + # `power_level_content_override` parameter in the /createRoom API, but + # is applied before that parameter. + # + # Valid keys are some or all of `private_chat`, `trusted_private_chat` + # and `public_chat`. Inside each of those should be any of the + # properties allowed in `power_level_content_override` in the + # /createRoom API. If any property is missing, its default value will + # continue to be used. If any property is present, it will overwrite + # the existing default completely (so if the `events` property exists, + # the default event power levels will be ignored). + # + #default_power_level_content_override: + # private_chat: + # "events": + # "com.example.myeventtype" : 0 + # "m.room.avatar": 50 + # "m.room.canonical_alias": 50 + # "m.room.encryption": 100 + # "m.room.history_visibility": 100 + # "m.room.name": 50 + # "m.room.power_levels": 100 + # "m.room.server_acl": 100 + # "m.room.tombstone": 100 + # "events_default": 1 """ diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 604eb6ec154a..e71c78adad67 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -149,6 +149,10 @@ def __init__(self, hs: "HomeServer"): ) preset_config["encrypted"] = encrypted + self._default_power_level_content_override = ( + self.config.room.default_power_level_content_override + ) + self._replication = hs.get_replication_data_handler() # linearizer to stop two upgrades happening at once @@ -1042,9 +1046,19 @@ async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: for invitee in invite_list: power_level_content["users"][invitee] = 100 - # Power levels overrides are defined per chat preset + # If the user supplied a preset name e.g. "private_chat", + # we apply that preset power_level_content.update(config["power_level_content_override"]) + # If the server config contains default_power_level_content_override, + # and that contains information for this room preset, apply it. + if self._default_power_level_content_override: + override = self._default_power_level_content_override.get(preset_config) + if override is not None: + power_level_content.update(override) + + # Finally, if the user supplied specific permissions for this room, + # apply those. if power_level_content_override: power_level_content.update(power_level_content_override) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index 9443daa0560a..ad416e2fd8c0 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -1116,6 +1116,264 @@ def test_rooms_messages_sent(self) -> None: self.assertEqual(200, channel.code, msg=channel.result["body"]) +class RoomPowerLevelOverridesTestCase(RoomBase): + """Tests that the power levels can be overridden with server config.""" + + user_id = "@sid1:red" + + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.admin_user_id = self.register_user("admin", "pass") + self.admin_access_token = self.login("admin", "pass") + + def power_levels(self, room_id: str) -> Dict[str, Any]: + return self.helper.get_state( + room_id, "m.room.power_levels", self.admin_access_token + ) + + def test_default_power_levels_with_room_override(self) -> None: + """ + Create a room, providing power level overrides. + Confirm that the room's power levels reflect the overrides. + + See /~https://github.com/matrix-org/matrix-spec/issues/492 + - currently we overwrite each key of power_level_content_override + completely. + """ + + room_id = self.helper.create_room_as( + self.user_id, + extra_content={ + "power_level_content_override": {"events": {"custom.event": 0}} + }, + ) + self.assertEqual( + { + "custom.event": 0, + }, + self.power_levels(room_id)["events"], + ) + + @unittest.override_config( + { + "default_power_level_content_override": { + "public_chat": {"events": {"custom.event": 0}}, + } + }, + ) + def test_power_levels_with_server_override(self) -> None: + """ + With a server configured to modify the room-level defaults, + Create a room, without providing any extra power level overrides. + Confirm that the room's power levels reflect the server-level overrides. + + Similar to /~https://github.com/matrix-org/matrix-spec/issues/492, + we overwrite each key of power_level_content_override completely. + """ + + room_id = self.helper.create_room_as(self.user_id) + self.assertEqual( + { + "custom.event": 0, + }, + self.power_levels(room_id)["events"], + ) + + @unittest.override_config( + { + "default_power_level_content_override": { + "public_chat": { + "events": {"server.event": 0}, + "ban": 13, + }, + } + }, + ) + def test_power_levels_with_server_and_room_overrides(self) -> None: + """ + With a server configured to modify the room-level defaults, + create a room, providing different overrides. + Confirm that the room's power levels reflect both overrides, and + choose the room overrides where they clash. + """ + + room_id = self.helper.create_room_as( + self.user_id, + extra_content={ + "power_level_content_override": {"events": {"room.event": 0}} + }, + ) + + # Room override wins over server config + self.assertEqual( + {"room.event": 0}, + self.power_levels(room_id)["events"], + ) + + # But where there is no room override, server config wins + self.assertEqual(13, self.power_levels(room_id)["ban"]) + + +class RoomPowerLevelOverridesInPracticeTestCase(RoomBase): + """ + Tests that we can really do various otherwise-prohibited actions + based on overriding the power levels in config. + """ + + user_id = "@sid1:red" + + def test_creator_can_post_state_event(self) -> None: + # Given I am the creator of a room + room_id = self.helper.create_room_as(self.user_id) + + # When I send a state event + path = "/rooms/{room_id}/state/custom.event/my_state_key".format( + room_id=urlparse.quote(room_id), + ) + channel = self.make_request("PUT", path, "{}") + + # Then I am allowed + self.assertEqual(200, channel.code, msg=channel.result["body"]) + + def test_normal_user_can_not_post_state_event(self) -> None: + # Given I am a normal member of a room + room_id = self.helper.create_room_as("@some_other_guy:red") + self.helper.join(room=room_id, user=self.user_id) + + # When I send a state event + path = "/rooms/{room_id}/state/custom.event/my_state_key".format( + room_id=urlparse.quote(room_id), + ) + channel = self.make_request("PUT", path, "{}") + + # Then I am not allowed because state events require PL>=50 + self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual( + "You don't have permission to post that to the room. " + "user_level (0) < send_level (50)", + channel.json_body["error"], + ) + + @unittest.override_config( + { + "default_power_level_content_override": { + "public_chat": {"events": {"custom.event": 0}}, + } + }, + ) + def test_with_config_override_normal_user_can_post_state_event(self) -> None: + # Given the server has config allowing normal users to post my event type, + # and I am a normal member of a room + room_id = self.helper.create_room_as("@some_other_guy:red") + self.helper.join(room=room_id, user=self.user_id) + + # When I send a state event + path = "/rooms/{room_id}/state/custom.event/my_state_key".format( + room_id=urlparse.quote(room_id), + ) + channel = self.make_request("PUT", path, "{}") + + # Then I am allowed + self.assertEqual(200, channel.code, msg=channel.result["body"]) + + @unittest.override_config( + { + "default_power_level_content_override": { + "public_chat": {"events": {"custom.event": 0}}, + } + }, + ) + def test_any_room_override_defeats_config_override(self) -> None: + # Given the server has config allowing normal users to post my event type + # And I am a normal member of a room + # But the room was created with special permissions + extra_content: Dict[str, Any] = { + "power_level_content_override": {"events": {}}, + } + room_id = self.helper.create_room_as( + "@some_other_guy:red", extra_content=extra_content + ) + self.helper.join(room=room_id, user=self.user_id) + + # When I send a state event + path = "/rooms/{room_id}/state/custom.event/my_state_key".format( + room_id=urlparse.quote(room_id), + ) + channel = self.make_request("PUT", path, "{}") + + # Then I am not allowed + self.assertEqual(403, channel.code, msg=channel.result["body"]) + + @unittest.override_config( + { + "default_power_level_content_override": { + "public_chat": {"events": {"custom.event": 0}}, + } + }, + ) + def test_specific_room_override_defeats_config_override(self) -> None: + # Given the server has config allowing normal users to post my event type, + # and I am a normal member of a room, + # but the room was created with special permissions for this event type + extra_content = { + "power_level_content_override": {"events": {"custom.event": 1}}, + } + room_id = self.helper.create_room_as( + "@some_other_guy:red", extra_content=extra_content + ) + self.helper.join(room=room_id, user=self.user_id) + + # When I send a state event + path = "/rooms/{room_id}/state/custom.event/my_state_key".format( + room_id=urlparse.quote(room_id), + ) + channel = self.make_request("PUT", path, "{}") + + # Then I am not allowed + self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual( + "You don't have permission to post that to the room. " + + "user_level (0) < send_level (1)", + channel.json_body["error"], + ) + + @unittest.override_config( + { + "default_power_level_content_override": { + "public_chat": {"events": {"custom.event": 0}}, + "private_chat": None, + "trusted_private_chat": None, + } + }, + ) + def test_config_override_applies_only_to_specific_preset(self) -> None: + # Given the server has config for public_chats, + # and I am a normal member of a private_chat room + room_id = self.helper.create_room_as("@some_other_guy:red", is_public=False) + self.helper.invite(room=room_id, src="@some_other_guy:red", targ=self.user_id) + self.helper.join(room=room_id, user=self.user_id) + + # When I send a state event + path = "/rooms/{room_id}/state/custom.event/my_state_key".format( + room_id=urlparse.quote(room_id), + ) + channel = self.make_request("PUT", path, "{}") + + # Then I am not allowed because the public_chat config does not + # affect this room, because this room is a private_chat + self.assertEqual(403, channel.code, msg=channel.result["body"]) + self.assertEqual( + "You don't have permission to post that to the room. " + + "user_level (0) < send_level (50)", + channel.json_body["error"], + ) + + class RoomInitialSyncTestCase(RoomBase): """Tests /rooms/$room_id/initialSync.""" From 17e1eb7749adf12d43f534c50115bbe19c809ea6 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 12 May 2022 15:33:50 +0100 Subject: [PATCH 022/181] Reduce the number of "untyped defs" (#12716) --- changelog.d/12716.misc | 1 + mypy.ini | 24 ++++++++++ synapse/groups/groups_server.py | 2 +- synapse/http/client.py | 16 ++++--- .../federation/matrix_federation_agent.py | 2 +- synapse/http/federation/srv_resolver.py | 4 +- .../http/federation/well_known_resolver.py | 6 +-- synapse/http/matrixfederationclient.py | 31 ++++++++----- synapse/http/request_metrics.py | 10 ++--- synapse/storage/database.py | 44 +++++++++++++------ synapse/storage/databases/main/metrics.py | 24 +++++----- synapse/storage/databases/main/stream.py | 8 ++-- synapse/storage/persist_events.py | 21 +++++---- synapse/storage/prepare_database.py | 2 +- synapse/storage/state.py | 6 ++- synapse/storage/types.py | 10 ++++- 16 files changed, 142 insertions(+), 69 deletions(-) create mode 100644 changelog.d/12716.misc diff --git a/changelog.d/12716.misc b/changelog.d/12716.misc new file mode 100644 index 000000000000..b07e1b52ee7c --- /dev/null +++ b/changelog.d/12716.misc @@ -0,0 +1 @@ +Add type annotations to increase the number of modules passing `disallow-untyped-defs`. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index ba0de419f5ea..8478dd9e510b 100644 --- a/mypy.ini +++ b/mypy.ini @@ -119,9 +119,18 @@ disallow_untyped_defs = True [mypy-synapse.federation.transport.client] disallow_untyped_defs = False +[mypy-synapse.groups.*] +disallow_untyped_defs = True + [mypy-synapse.handlers.*] disallow_untyped_defs = True +[mypy-synapse.http.federation.*] +disallow_untyped_defs = True + +[mypy-synapse.http.request_metrics] +disallow_untyped_defs = True + [mypy-synapse.http.server] disallow_untyped_defs = True @@ -196,12 +205,27 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.state_deltas] disallow_untyped_defs = True +[mypy-synapse.storage.databases.main.stream] +disallow_untyped_defs = True + [mypy-synapse.storage.databases.main.transactions] disallow_untyped_defs = True [mypy-synapse.storage.databases.main.user_erasure_store] disallow_untyped_defs = True +[mypy-synapse.storage.prepare_database] +disallow_untyped_defs = True + +[mypy-synapse.storage.persist_events] +disallow_untyped_defs = True + +[mypy-synapse.storage.state] +disallow_untyped_defs = True + +[mypy-synapse.storage.types] +disallow_untyped_defs = True + [mypy-synapse.storage.util.*] disallow_untyped_defs = True diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py index 4c3a5a6e24d1..dfd24af695ab 100644 --- a/synapse/groups/groups_server.py +++ b/synapse/groups/groups_server.py @@ -934,7 +934,7 @@ async def delete_group(self, group_id: str, requester_user_id: str) -> None: # Before deleting the group lets kick everyone out of it users = await self.store.get_users_in_group(group_id, include_private=True) - async def _kick_user_from_group(user_id): + async def _kick_user_from_group(user_id: str) -> None: if self.hs.is_mine_id(user_id): groups_local = self.hs.get_groups_local_handler() assert isinstance( diff --git a/synapse/http/client.py b/synapse/http/client.py index b2c9a7c67090..084d0a5b84e9 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -43,8 +43,10 @@ from twisted.internet.address import IPv4Address, IPv6Address from twisted.internet.interfaces import ( IAddress, + IDelayedCall, IHostResolution, IReactorPluggableNameResolver, + IReactorTime, IResolutionReceiver, ITCPTransport, ) @@ -121,13 +123,15 @@ def check_against_blacklist( _EPSILON = 0.00000001 -def _make_scheduler(reactor): +def _make_scheduler( + reactor: IReactorTime, +) -> Callable[[Callable[[], object]], IDelayedCall]: """Makes a schedular suitable for a Cooperator using the given reactor. (This is effectively just a copy from `twisted.internet.task`) """ - def _scheduler(x): + def _scheduler(x: Callable[[], object]) -> IDelayedCall: return reactor.callLater(_EPSILON, x) return _scheduler @@ -775,7 +779,7 @@ async def get_file( ) -def _timeout_to_request_timed_out_error(f: Failure): +def _timeout_to_request_timed_out_error(f: Failure) -> Failure: if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError): # The TCP connection has its own timeout (set by the 'connectTimeout' param # on the Agent), which raises twisted_error.TimeoutError exception. @@ -809,7 +813,7 @@ class _DiscardBodyWithMaxSizeProtocol(protocol.Protocol): def __init__(self, deferred: defer.Deferred): self.deferred = deferred - def _maybe_fail(self): + def _maybe_fail(self) -> None: """ Report a max size exceed error and disconnect the first time this is called. """ @@ -933,12 +937,12 @@ class InsecureInterceptableContextFactory(ssl.ContextFactory): Do not use this since it allows an attacker to intercept your communications. """ - def __init__(self): + def __init__(self) -> None: self._context = SSL.Context(SSL.SSLv23_METHOD) self._context.set_verify(VERIFY_NONE, lambda *_: False) def getContext(self, hostname=None, port=None): return self._context - def creatorForNetloc(self, hostname, port): + def creatorForNetloc(self, hostname: bytes, port: int): return self diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index a8a520f80944..2f0177f1e203 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -239,7 +239,7 @@ def __init__( self._srv_resolver = srv_resolver - def endpointForURI(self, parsed_uri: URI): + def endpointForURI(self, parsed_uri: URI) -> "MatrixHostnameEndpoint": return MatrixHostnameEndpoint( self._reactor, self._proxy_reactor, diff --git a/synapse/http/federation/srv_resolver.py b/synapse/http/federation/srv_resolver.py index f68646fd0dd4..de0e882b3312 100644 --- a/synapse/http/federation/srv_resolver.py +++ b/synapse/http/federation/srv_resolver.py @@ -16,7 +16,7 @@ import logging import random import time -from typing import Callable, Dict, List +from typing import Any, Callable, Dict, List import attr @@ -109,7 +109,7 @@ class SrvResolver: def __init__( self, - dns_client=client, + dns_client: Any = client, cache: Dict[bytes, List[Server]] = SERVER_CACHE, get_time: Callable[[], float] = time.time, ): diff --git a/synapse/http/federation/well_known_resolver.py b/synapse/http/federation/well_known_resolver.py index 43f2140429b5..71b685fadec9 100644 --- a/synapse/http/federation/well_known_resolver.py +++ b/synapse/http/federation/well_known_resolver.py @@ -74,9 +74,9 @@ _had_valid_well_known_cache: TTLCache[bytes, bool] = TTLCache("had-valid-well-known") -@attr.s(slots=True, frozen=True) +@attr.s(slots=True, frozen=True, auto_attribs=True) class WellKnownLookupResult: - delegated_server = attr.ib() + delegated_server: Optional[bytes] class WellKnownResolver: @@ -336,4 +336,4 @@ def _parse_cache_control(headers: Headers) -> Dict[bytes, Optional[bytes]]: class _FetchWellKnownFailure(Exception): # True if we didn't get a non-5xx HTTP response, i.e. this may or may not be # a temporary failure. - temporary = attr.ib() + temporary: bool = attr.ib() diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index c2ec3caa0ea8..725b5c33b8c5 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -23,6 +23,8 @@ from io import BytesIO, StringIO from typing import ( TYPE_CHECKING, + Any, + BinaryIO, Callable, Dict, Generic, @@ -44,7 +46,7 @@ from twisted.internet import defer from twisted.internet.error import DNSLookupError from twisted.internet.interfaces import IReactorTime -from twisted.internet.task import _EPSILON, Cooperator +from twisted.internet.task import Cooperator from twisted.web.client import ResponseFailed from twisted.web.http_headers import Headers from twisted.web.iweb import IBodyProducer, IResponse @@ -58,11 +60,13 @@ RequestSendFailed, SynapseError, ) +from synapse.crypto.context_factory import FederationPolicyForHTTPS from synapse.http import QuieterFileBodyProducer from synapse.http.client import ( BlacklistingAgentWrapper, BodyExceededMaxSize, ByteWriteable, + _make_scheduler, encode_query_args, read_body_with_max_size, ) @@ -181,7 +185,7 @@ class JsonParser(ByteParser[Union[JsonDict, list]]): CONTENT_TYPE = "application/json" - def __init__(self): + def __init__(self) -> None: self._buffer = StringIO() self._binary_wrapper = BinaryIOWrapper(self._buffer) @@ -299,7 +303,9 @@ async def _handle_response( class BinaryIOWrapper: """A wrapper for a TextIO which converts from bytes on the fly.""" - def __init__(self, file: typing.TextIO, encoding="utf-8", errors="strict"): + def __init__( + self, file: typing.TextIO, encoding: str = "utf-8", errors: str = "strict" + ): self.decoder = codecs.getincrementaldecoder(encoding)(errors) self.file = file @@ -317,7 +323,11 @@ class MatrixFederationHttpClient: requests. """ - def __init__(self, hs: "HomeServer", tls_client_options_factory): + def __init__( + self, + hs: "HomeServer", + tls_client_options_factory: Optional[FederationPolicyForHTTPS], + ): self.hs = hs self.signing_key = hs.signing_key self.server_name = hs.hostname @@ -348,10 +358,7 @@ def __init__(self, hs: "HomeServer", tls_client_options_factory): self.version_string_bytes = hs.version_string.encode("ascii") self.default_timeout = 60 - def schedule(x): - self.reactor.callLater(_EPSILON, x) - - self._cooperator = Cooperator(scheduler=schedule) + self._cooperator = Cooperator(scheduler=_make_scheduler(self.reactor)) self._sleeper = AwakenableSleeper(self.reactor) @@ -364,7 +371,7 @@ async def _send_request_with_optional_trailing_slash( self, request: MatrixFederationRequest, try_trailing_slash_on_400: bool = False, - **send_request_args, + **send_request_args: Any, ) -> IResponse: """Wrapper for _send_request which can optionally retry the request upon receiving a combination of a 400 HTTP response code and a @@ -1159,7 +1166,7 @@ async def get_file( self, destination: str, path: str, - output_stream, + output_stream: BinaryIO, args: Optional[QueryParams] = None, retry_on_dns_fail: bool = True, max_size: Optional[int] = None, @@ -1250,10 +1257,10 @@ async def get_file( return length, headers -def _flatten_response_never_received(e): +def _flatten_response_never_received(e: BaseException) -> str: if hasattr(e, "reasons"): reasons = ", ".join( - _flatten_response_never_received(f.value) for f in e.reasons + _flatten_response_never_received(f.value) for f in e.reasons # type: ignore[attr-defined] ) return "%s:[%s]" % (type(e).__name__, reasons) diff --git a/synapse/http/request_metrics.py b/synapse/http/request_metrics.py index 4886626d5074..2b6d113544ca 100644 --- a/synapse/http/request_metrics.py +++ b/synapse/http/request_metrics.py @@ -162,7 +162,7 @@ def start(self, time_sec: float, name: str, method: str) -> None: with _in_flight_requests_lock: _in_flight_requests.add(self) - def stop(self, time_sec, response_code, sent_bytes): + def stop(self, time_sec: float, response_code: int, sent_bytes: int) -> None: with _in_flight_requests_lock: _in_flight_requests.discard(self) @@ -186,13 +186,13 @@ def stop(self, time_sec, response_code, sent_bytes): ) return - response_code = str(response_code) + response_code_str = str(response_code) - outgoing_responses_counter.labels(self.method, response_code).inc() + outgoing_responses_counter.labels(self.method, response_code_str).inc() response_count.labels(self.method, self.name, tag).inc() - response_timer.labels(self.method, self.name, tag, response_code).observe( + response_timer.labels(self.method, self.name, tag, response_code_str).observe( time_sec - self.start_ts ) @@ -221,7 +221,7 @@ def stop(self, time_sec, response_code, sent_bytes): # flight. self.update_metrics() - def update_metrics(self): + def update_metrics(self) -> None: """Updates the in flight metrics with values from this request.""" if not self.start_context: logger.error( diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 41f566b6487a..5ddb58a8a2ca 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -31,6 +31,7 @@ List, Optional, Tuple, + Type, TypeVar, cast, overload, @@ -41,6 +42,7 @@ from typing_extensions import Concatenate, Literal, ParamSpec from twisted.enterprise import adbapi +from twisted.internet.interfaces import IReactorCore from synapse.api.errors import StoreError from synapse.config.database import DatabaseConnectionConfig @@ -92,7 +94,9 @@ def make_pool( - reactor, db_config: DatabaseConnectionConfig, engine: BaseDatabaseEngine + reactor: IReactorCore, + db_config: DatabaseConnectionConfig, + engine: BaseDatabaseEngine, ) -> adbapi.ConnectionPool: """Get the connection pool for the database.""" @@ -101,7 +105,7 @@ def make_pool( db_args = dict(db_config.config.get("args", {})) db_args.setdefault("cp_reconnect", True) - def _on_new_connection(conn): + def _on_new_connection(conn: Connection) -> None: # Ensure we have a logging context so we can correctly track queries, # etc. with LoggingContext("db.on_new_connection"): @@ -157,7 +161,11 @@ class LoggingDatabaseConnection: default_txn_name: str def cursor( - self, *, txn_name=None, after_callbacks=None, exception_callbacks=None + self, + *, + txn_name: Optional[str] = None, + after_callbacks: Optional[List["_CallbackListEntry"]] = None, + exception_callbacks: Optional[List["_CallbackListEntry"]] = None, ) -> "LoggingTransaction": if not txn_name: txn_name = self.default_txn_name @@ -183,11 +191,16 @@ def __enter__(self) -> "LoggingDatabaseConnection": self.conn.__enter__() return self - def __exit__(self, exc_type, exc_value, traceback) -> Optional[bool]: + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[types.TracebackType], + ) -> Optional[bool]: return self.conn.__exit__(exc_type, exc_value, traceback) # Proxy through any unknown lookups to the DB conn class. - def __getattr__(self, name): + def __getattr__(self, name: str) -> Any: return getattr(self.conn, name) @@ -391,17 +404,22 @@ def close(self) -> None: def __enter__(self) -> "LoggingTransaction": return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[types.TracebackType], + ) -> None: self.close() class PerformanceCounters: - def __init__(self): - self.current_counters = {} - self.previous_counters = {} + def __init__(self) -> None: + self.current_counters: Dict[str, Tuple[int, float]] = {} + self.previous_counters: Dict[str, Tuple[int, float]] = {} def update(self, key: str, duration_secs: float) -> None: - count, cum_time = self.current_counters.get(key, (0, 0)) + count, cum_time = self.current_counters.get(key, (0, 0.0)) count += 1 cum_time += duration_secs self.current_counters[key] = (count, cum_time) @@ -527,7 +545,7 @@ async def _check_safe_to_upsert(self) -> None: def start_profiling(self) -> None: self._previous_loop_ts = monotonic_time() - def loop(): + def loop() -> None: curr = self._current_txn_total_time prev = self._previous_txn_total_time self._previous_txn_total_time = curr @@ -1186,7 +1204,7 @@ def simple_upsert_txn_emulated( if lock: self.engine.lock_table(txn, table) - def _getwhere(key): + def _getwhere(key: str) -> str: # If the value we're passing in is None (aka NULL), we need to use # IS, not =, as NULL = NULL equals NULL (False). if keyvalues[key] is None: @@ -2258,7 +2276,7 @@ async def simple_search_list( term: Optional[str], col: str, retcols: Collection[str], - desc="simple_search_list", + desc: str = "simple_search_list", ) -> Optional[List[Dict[str, Any]]]: """Executes a SELECT query on the named table, which may return zero or more rows, returning the result as a list of dicts. diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py index 1480a0f04829..d03555a5857b 100644 --- a/synapse/storage/databases/main/metrics.py +++ b/synapse/storage/databases/main/metrics.py @@ -23,6 +23,7 @@ from synapse.storage.databases.main.event_push_actions import ( EventPushActionsWorkerStore, ) +from synapse.storage.types import Cursor if TYPE_CHECKING: from synapse.server import HomeServer @@ -71,7 +72,7 @@ def __init__( self._last_user_visit_update = self._get_start_of_day() @wrap_as_background_process("read_forward_extremities") - async def _read_forward_extremities(self): + async def _read_forward_extremities(self) -> None: def fetch(txn): txn.execute( """ @@ -95,7 +96,7 @@ def fetch(txn): (x[0] - 1) * x[1] for x in res if x[1] ) - async def count_daily_e2ee_messages(self): + async def count_daily_e2ee_messages(self) -> int: """ Returns an estimate of the number of messages sent in the last day. @@ -115,7 +116,7 @@ def _count_messages(txn): return await self.db_pool.runInteraction("count_e2ee_messages", _count_messages) - async def count_daily_sent_e2ee_messages(self): + async def count_daily_sent_e2ee_messages(self) -> int: def _count_messages(txn): # This is good enough as if you have silly characters in your own # hostname then that's your own fault. @@ -136,7 +137,7 @@ def _count_messages(txn): "count_daily_sent_e2ee_messages", _count_messages ) - async def count_daily_active_e2ee_rooms(self): + async def count_daily_active_e2ee_rooms(self) -> int: def _count(txn): sql = """ SELECT COUNT(DISTINCT room_id) FROM events @@ -151,7 +152,7 @@ def _count(txn): "count_daily_active_e2ee_rooms", _count ) - async def count_daily_messages(self): + async def count_daily_messages(self) -> int: """ Returns an estimate of the number of messages sent in the last day. @@ -171,7 +172,7 @@ def _count_messages(txn): return await self.db_pool.runInteraction("count_messages", _count_messages) - async def count_daily_sent_messages(self): + async def count_daily_sent_messages(self) -> int: def _count_messages(txn): # This is good enough as if you have silly characters in your own # hostname then that's your own fault. @@ -192,7 +193,7 @@ def _count_messages(txn): "count_daily_sent_messages", _count_messages ) - async def count_daily_active_rooms(self): + async def count_daily_active_rooms(self) -> int: def _count(txn): sql = """ SELECT COUNT(DISTINCT room_id) FROM events @@ -226,7 +227,7 @@ async def count_monthly_users(self) -> int: "count_monthly_users", self._count_users, thirty_days_ago ) - def _count_users(self, txn, time_from): + def _count_users(self, txn: Cursor, time_from: int) -> int: """ Returns number of users seen in the past time_from period """ @@ -238,7 +239,10 @@ def _count_users(self, txn, time_from): ) u """ txn.execute(sql, (time_from,)) - (count,) = txn.fetchone() + # Mypy knows that fetchone() might return None if there are no rows. + # We know better: "SELECT COUNT(...) FROM ..." without any GROUP BY always + # returns exactly one row. + (count,) = txn.fetchone() # type: ignore[misc] return count async def count_r30_users(self) -> Dict[str, int]: @@ -453,7 +457,7 @@ def _count_r30v2_users(txn): "count_r30v2_users", _count_r30v2_users ) - def _get_start_of_day(self): + def _get_start_of_day(self) -> int: """ Returns millisecond unixtime for start of UTC day. """ diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 4e1d9647b7b8..59bbca2e3207 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -798,9 +798,11 @@ def get_stream_id_for_event_txn( self, txn: LoggingTransaction, event_id: str, - allow_none=False, - ) -> int: - return self.db_pool.simple_select_one_onecol_txn( + allow_none: bool = False, + ) -> Optional[int]: + # Type ignore: we pass keyvalues a Dict[str, str]; the function wants + # Dict[str, Any]. I think mypy is unhappy because Dict is invariant? + return self.db_pool.simple_select_one_onecol_txn( # type: ignore[call-overload] txn=txn, table="events", keyvalues={"event_id": event_id}, diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index a7f6338e058d..0fc282866bc5 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -25,6 +25,7 @@ Collection, Deque, Dict, + Generator, Generic, Iterable, List, @@ -207,7 +208,7 @@ async def add_to_queue( return res - def _handle_queue(self, room_id): + def _handle_queue(self, room_id: str) -> None: """Attempts to handle the queue for a room if not already being handled. The queue's callback will be invoked with for each item in the queue, @@ -227,7 +228,7 @@ def _handle_queue(self, room_id): self._currently_persisting_rooms.add(room_id) - async def handle_queue_loop(): + async def handle_queue_loop() -> None: try: queue = self._get_drainining_queue(room_id) for item in queue: @@ -250,15 +251,17 @@ async def handle_queue_loop(): with PreserveLoggingContext(): item.deferred.callback(ret) finally: - queue = self._event_persist_queues.pop(room_id, None) - if queue: - self._event_persist_queues[room_id] = queue + remaining_queue = self._event_persist_queues.pop(room_id, None) + if remaining_queue: + self._event_persist_queues[room_id] = remaining_queue self._currently_persisting_rooms.discard(room_id) # set handle_queue_loop off in the background run_as_background_process("persist_events", handle_queue_loop) - def _get_drainining_queue(self, room_id): + def _get_drainining_queue( + self, room_id: str + ) -> Generator[_EventPersistQueueItem, None, None]: queue = self._event_persist_queues.setdefault(room_id, deque()) try: @@ -317,7 +320,9 @@ async def persist_events( for event, ctx in events_and_contexts: partitioned.setdefault(event.room_id, []).append((event, ctx)) - async def enqueue(item): + async def enqueue( + item: Tuple[str, List[Tuple[EventBase, EventContext]]] + ) -> Dict[str, str]: room_id, evs_ctxs = item return await self._event_persist_queue.add_to_queue( room_id, evs_ctxs, backfilled=backfilled @@ -1102,7 +1107,7 @@ async def _is_server_still_joined( return False - async def _handle_potentially_left_users(self, user_ids: Set[str]): + async def _handle_potentially_left_users(self, user_ids: Set[str]) -> None: """Given a set of remote users check if the server still shares a room with them. If not then mark those users' device cache as stale. """ diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 546d6bae6e56..c33df420841d 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -85,7 +85,7 @@ def prepare_database( database_engine: BaseDatabaseEngine, config: Optional[HomeServerConfig], databases: Collection[str] = ("main", "state"), -): +) -> None: """Prepares a physical database for usage. Will either create all necessary tables or upgrade from an older schema version. diff --git a/synapse/storage/state.py b/synapse/storage/state.py index d1d58592145e..d4a1bd4f9d7d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -62,7 +62,7 @@ class StateFilter: types: "frozendict[str, Optional[FrozenSet[str]]]" include_others: bool = False - def __attrs_post_init__(self): + def __attrs_post_init__(self) -> None: # If `include_others` is set we canonicalise the filter by removing # wildcards from the types dictionary if self.include_others: @@ -138,7 +138,9 @@ def from_lazy_load_member_list(members: Iterable[str]) -> "StateFilter": ) @staticmethod - def freeze(types: Mapping[str, Optional[Collection[str]]], include_others: bool): + def freeze( + types: Mapping[str, Optional[Collection[str]]], include_others: bool + ) -> "StateFilter": """ Returns a (frozen) StateFilter with the same contents as the parameters specified here, which can be made of mutable types. diff --git a/synapse/storage/types.py b/synapse/storage/types.py index d7d6f1d90ecb..40536c183005 100644 --- a/synapse/storage/types.py +++ b/synapse/storage/types.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Iterator, List, Mapping, Optional, Sequence, Tuple, Union +from types import TracebackType +from typing import Any, Iterator, List, Mapping, Optional, Sequence, Tuple, Type, Union from typing_extensions import Protocol @@ -86,5 +87,10 @@ def rollback(self) -> None: def __enter__(self) -> "Connection": ... - def __exit__(self, exc_type, exc_value, traceback) -> Optional[bool]: + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> Optional[bool]: ... From 57f6c496d0e26b1b455de936bd950e1899a5ae25 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 12 May 2022 18:16:32 +0100 Subject: [PATCH 023/181] URL preview cache expiry logs: INFO -> DEBUG, text clarifications (#12720) --- changelog.d/12720.misc | 1 + synapse/rest/media/v1/preview_url_resource.py | 30 +++++++++++++------ 2 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 changelog.d/12720.misc diff --git a/changelog.d/12720.misc b/changelog.d/12720.misc new file mode 100644 index 000000000000..01b427f200ae --- /dev/null +++ b/changelog.d/12720.misc @@ -0,0 +1 @@ +Drop the logging level of status messages for the URL preview cache expiry job from INFO to DEBUG. \ No newline at end of file diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 50383bdbd1c5..2b2db63bf7cc 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -668,7 +668,7 @@ async def _expire_url_cache_data(self) -> None: logger.debug("Running url preview cache expiry") if not (await self.store.db_pool.updates.has_completed_background_updates()): - logger.info("Still running DB updates; skipping expiry") + logger.debug("Still running DB updates; skipping url preview cache expiry") return def try_remove_parent_dirs(dirs: Iterable[str]) -> None: @@ -688,7 +688,9 @@ def try_remove_parent_dirs(dirs: Iterable[str]) -> None: # Failed, skip deleting the rest of the parent dirs if e.errno != errno.ENOTEMPTY: logger.warning( - "Failed to remove media directory: %r: %s", dir, e + "Failed to remove media directory while clearing url preview cache: %r: %s", + dir, + e, ) break @@ -703,7 +705,11 @@ def try_remove_parent_dirs(dirs: Iterable[str]) -> None: except FileNotFoundError: pass # If the path doesn't exist, meh except OSError as e: - logger.warning("Failed to remove media: %r: %s", media_id, e) + logger.warning( + "Failed to remove media while clearing url preview cache: %r: %s", + media_id, + e, + ) continue removed_media.append(media_id) @@ -714,9 +720,11 @@ def try_remove_parent_dirs(dirs: Iterable[str]) -> None: await self.store.delete_url_cache(removed_media) if removed_media: - logger.info("Deleted %d entries from url cache", len(removed_media)) + logger.debug( + "Deleted %d entries from url preview cache", len(removed_media) + ) else: - logger.debug("No entries removed from url cache") + logger.debug("No entries removed from url preview cache") # Now we delete old images associated with the url cache. # These may be cached for a bit on the client (i.e., they @@ -733,7 +741,9 @@ def try_remove_parent_dirs(dirs: Iterable[str]) -> None: except FileNotFoundError: pass # If the path doesn't exist, meh except OSError as e: - logger.warning("Failed to remove media: %r: %s", media_id, e) + logger.warning( + "Failed to remove media from url preview cache: %r: %s", media_id, e + ) continue dirs = self.filepaths.url_cache_filepath_dirs_to_delete(media_id) @@ -745,7 +755,9 @@ def try_remove_parent_dirs(dirs: Iterable[str]) -> None: except FileNotFoundError: pass # If the path doesn't exist, meh except OSError as e: - logger.warning("Failed to remove media: %r: %s", media_id, e) + logger.warning( + "Failed to remove media from url preview cache: %r: %s", media_id, e + ) continue removed_media.append(media_id) @@ -758,9 +770,9 @@ def try_remove_parent_dirs(dirs: Iterable[str]) -> None: await self.store.delete_url_cache_media(removed_media) if removed_media: - logger.info("Deleted %d media from url cache", len(removed_media)) + logger.debug("Deleted %d media from url preview cache", len(removed_media)) else: - logger.debug("No media removed from url cache") + logger.debug("No media removed from url preview cache") def _is_media(content_type: str) -> bool: From c9fc2c0d2260b4e77eda31a7a4a15b073d539db2 Mon Sep 17 00:00:00 2001 From: Niklas Date: Fri, 13 May 2022 12:15:51 +0200 Subject: [PATCH 024/181] Update issuer URL in example OIDC Keycloak config (#12727) * Update openid.md Newer versions of keycloak returning a 404 when using the `/auth` prefix. Related: /~https://github.com/matrix-org/synapse/issues/12714 --- changelog.d/12727.doc | 1 + docs/openid.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12727.doc diff --git a/changelog.d/12727.doc b/changelog.d/12727.doc new file mode 100644 index 000000000000..c41e50c85ba0 --- /dev/null +++ b/changelog.d/12727.doc @@ -0,0 +1 @@ +Update the OpenID Connect example for Keycloak to be compatible with newer versions of Keycloak. Contributed by @nhh. diff --git a/docs/openid.md b/docs/openid.md index 19cacaafefe0..e899db63d63b 100644 --- a/docs/openid.md +++ b/docs/openid.md @@ -159,7 +159,7 @@ Follow the [Getting Started Guide](https://www.keycloak.org/getting-started) to oidc_providers: - idp_id: keycloak idp_name: "My KeyCloak server" - issuer: "https://127.0.0.1:8443/auth/realms/{realm_name}" + issuer: "https://127.0.0.1:8443/realms/{realm_name}" client_id: "synapse" client_secret: "copy secret generated from above" scopes: ["openid", "profile"] From 39bed28b2843c79438d5cb51a6bb40e31c4420e7 Mon Sep 17 00:00:00 2001 From: Jess Porter Date: Fri, 13 May 2022 12:17:38 +0100 Subject: [PATCH 025/181] SpamChecker metrics (#12513) * add Measure blocks all over SpamChecker Signed-off-by: jesopo * fix test_spam_checker_may_join_room and test_threepid_invite_spamcheck * better changelog entry --- changelog.d/12513.feature | 1 + synapse/events/spamcheck.py | 81 +++++++++++++++++++++++---------- synapse/server.py | 2 +- tests/rest/client/test_rooms.py | 6 ++- 4 files changed, 64 insertions(+), 26 deletions(-) create mode 100644 changelog.d/12513.feature diff --git a/changelog.d/12513.feature b/changelog.d/12513.feature new file mode 100644 index 000000000000..01bf1d9d2cf6 --- /dev/null +++ b/changelog.d/12513.feature @@ -0,0 +1 @@ +Measure the time taken in spam-checking callbacks and expose those measurements as metrics. diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 3b6795d40f6b..f30207376ae2 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -32,6 +32,7 @@ from synapse.spam_checker_api import RegistrationBehaviour from synapse.types import RoomAlias, UserProfile from synapse.util.async_helpers import delay_cancellation, maybe_awaitable +from synapse.util.metrics import Measure if TYPE_CHECKING: import synapse.events @@ -162,7 +163,10 @@ def run(*args: Any, **kwargs: Any) -> Awaitable: class SpamChecker: - def __init__(self) -> None: + def __init__(self, hs: "synapse.server.HomeServer") -> None: + self.hs = hs + self.clock = hs.get_clock() + self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] self._user_may_join_room_callbacks: List[USER_MAY_JOIN_ROOM_CALLBACK] = [] self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = [] @@ -255,7 +259,10 @@ async def check_event_for_spam( will be used as the error message returned to the user. """ for callback in self._check_event_for_spam_callbacks: - res: Union[bool, str] = await delay_cancellation(callback(event)) + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + res: Union[bool, str] = await delay_cancellation(callback(event)) if res: return res @@ -276,9 +283,12 @@ async def user_may_join_room( Whether the user may join the room """ for callback in self._user_may_join_room_callbacks: - may_join_room = await delay_cancellation( - callback(user_id, room_id, is_invited) - ) + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + may_join_room = await delay_cancellation( + callback(user_id, room_id, is_invited) + ) if may_join_room is False: return False @@ -300,9 +310,12 @@ async def user_may_invite( True if the user may send an invite, otherwise False """ for callback in self._user_may_invite_callbacks: - may_invite = await delay_cancellation( - callback(inviter_userid, invitee_userid, room_id) - ) + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + may_invite = await delay_cancellation( + callback(inviter_userid, invitee_userid, room_id) + ) if may_invite is False: return False @@ -328,9 +341,12 @@ async def user_may_send_3pid_invite( True if the user may send the invite, otherwise False """ for callback in self._user_may_send_3pid_invite_callbacks: - may_send_3pid_invite = await delay_cancellation( - callback(inviter_userid, medium, address, room_id) - ) + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + may_send_3pid_invite = await delay_cancellation( + callback(inviter_userid, medium, address, room_id) + ) if may_send_3pid_invite is False: return False @@ -348,7 +364,10 @@ async def user_may_create_room(self, userid: str) -> bool: True if the user may create a room, otherwise False """ for callback in self._user_may_create_room_callbacks: - may_create_room = await delay_cancellation(callback(userid)) + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + may_create_room = await delay_cancellation(callback(userid)) if may_create_room is False: return False @@ -369,9 +388,12 @@ async def user_may_create_room_alias( True if the user may create a room alias, otherwise False """ for callback in self._user_may_create_room_alias_callbacks: - may_create_room_alias = await delay_cancellation( - callback(userid, room_alias) - ) + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + may_create_room_alias = await delay_cancellation( + callback(userid, room_alias) + ) if may_create_room_alias is False: return False @@ -390,7 +412,10 @@ async def user_may_publish_room(self, userid: str, room_id: str) -> bool: True if the user may publish the room, otherwise False """ for callback in self._user_may_publish_room_callbacks: - may_publish_room = await delay_cancellation(callback(userid, room_id)) + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + may_publish_room = await delay_cancellation(callback(userid, room_id)) if may_publish_room is False: return False @@ -412,9 +437,13 @@ async def check_username_for_spam(self, user_profile: UserProfile) -> bool: True if the user is spammy. """ for callback in self._check_username_for_spam_callbacks: - # Make a copy of the user profile object to ensure the spam checker cannot - # modify it. - if await delay_cancellation(callback(user_profile.copy())): + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + # Make a copy of the user profile object to ensure the spam checker cannot + # modify it. + res = await delay_cancellation(callback(user_profile.copy())) + if res: return True return False @@ -442,9 +471,12 @@ async def check_registration_for_spam( """ for callback in self._check_registration_for_spam_callbacks: - behaviour = await delay_cancellation( - callback(email_threepid, username, request_info, auth_provider_id) - ) + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + behaviour = await delay_cancellation( + callback(email_threepid, username, request_info, auth_provider_id) + ) assert isinstance(behaviour, RegistrationBehaviour) if behaviour != RegistrationBehaviour.ALLOW: return behaviour @@ -486,7 +518,10 @@ async def check_media_file_for_spam( """ for callback in self._check_media_file_for_spam_callbacks: - spam = await delay_cancellation(callback(file_wrapper, file_info)) + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + spam = await delay_cancellation(callback(file_wrapper, file_info)) if spam: return True diff --git a/synapse/server.py b/synapse/server.py index 7daa7b9334c8..ee60cce8ebce 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -681,7 +681,7 @@ def get_stats_handler(self) -> StatsHandler: @cache_in_self def get_spam_checker(self) -> SpamChecker: - return SpamChecker() + return SpamChecker(self) @cache_in_self def get_third_party_event_rules(self) -> ThirdPartyEventRules: diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index ad416e2fd8c0..d0197aca94a0 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -925,7 +925,7 @@ async def user_may_join_room( ) -> bool: return return_value - callback_mock = Mock(side_effect=user_may_join_room) + callback_mock = Mock(side_effect=user_may_join_room, spec=lambda *x: None) self.hs.get_spam_checker()._user_may_join_room_callbacks.append(callback_mock) # Join a first room, without being invited to it. @@ -2856,7 +2856,9 @@ def test_threepid_invite_spamcheck(self) -> None: # Add a mock to the spamchecker callbacks for user_may_send_3pid_invite. Make it # allow everything for now. - mock = Mock(return_value=make_awaitable(True)) + # `spec` argument is needed for this function mock to have `__qualname__`, which + # is needed for `Measure` metrics buried in SpamChecker. + mock = Mock(return_value=make_awaitable(True), spec=lambda *x: None) self.hs.get_spam_checker()._user_may_send_3pid_invite_callbacks.append(mock) # Send a 3PID invite into the room and check that it succeeded. From aec69d2481e9ea1d8ea1c0ffce1706a65a7896a8 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Fri, 13 May 2022 12:35:31 +0100 Subject: [PATCH 026/181] Another batch of type annotations (#12726) --- changelog.d/12726.misc | 1 + mypy.ini | 21 ++++++++++++ synapse/handlers/e2e_keys.py | 29 ++++++---------- synapse/http/connectproxyclient.py | 39 +++++++++++++--------- synapse/http/proxyagent.py | 2 +- synapse/logging/_remote.py | 20 ++++++----- synapse/logging/formatter.py | 14 +++++--- synapse/logging/handlers.py | 4 +-- synapse/logging/scopecontextmanager.py | 28 ++++++++++++---- synapse/storage/background_updates.py | 19 ++++++++--- synapse/types.py | 46 ++++++++++++++++---------- 11 files changed, 144 insertions(+), 79 deletions(-) create mode 100644 changelog.d/12726.misc diff --git a/changelog.d/12726.misc b/changelog.d/12726.misc new file mode 100644 index 000000000000..b07e1b52ee7c --- /dev/null +++ b/changelog.d/12726.misc @@ -0,0 +1 @@ +Add type annotations to increase the number of modules passing `disallow-untyped-defs`. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 8478dd9e510b..9ae7ad211c54 100644 --- a/mypy.ini +++ b/mypy.ini @@ -128,15 +128,30 @@ disallow_untyped_defs = True [mypy-synapse.http.federation.*] disallow_untyped_defs = True +[mypy-synapse.http.connectproxyclient] +disallow_untyped_defs = True + +[mypy-synapse.http.proxyagent] +disallow_untyped_defs = True + [mypy-synapse.http.request_metrics] disallow_untyped_defs = True [mypy-synapse.http.server] disallow_untyped_defs = True +[mypy-synapse.logging._remote] +disallow_untyped_defs = True + [mypy-synapse.logging.context] disallow_untyped_defs = True +[mypy-synapse.logging.formatter] +disallow_untyped_defs = True + +[mypy-synapse.logging.handlers] +disallow_untyped_defs = True + [mypy-synapse.metrics.*] disallow_untyped_defs = True @@ -166,6 +181,9 @@ disallow_untyped_defs = True [mypy-synapse.state.*] disallow_untyped_defs = True +[mypy-synapse.storage.databases.background_updates] +disallow_untyped_defs = True + [mypy-synapse.storage.databases.main.account_data] disallow_untyped_defs = True @@ -232,6 +250,9 @@ disallow_untyped_defs = True [mypy-synapse.streams.*] disallow_untyped_defs = True +[mypy-synapse.types] +disallow_untyped_defs = True + [mypy-synapse.util.*] disallow_untyped_defs = True diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index d6714228ef41..e6c2cfb8c8e7 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -15,7 +15,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple import attr from canonicaljson import encode_canonical_json @@ -1105,22 +1105,19 @@ async def _get_e2e_cross_signing_verify_key( # can request over federation raise NotFoundError("No %s key found for %s" % (key_type, user_id)) - ( - key, - key_id, - verify_key, - ) = await self._retrieve_cross_signing_keys_for_remote_user(user, key_type) - - if key is None: + cross_signing_keys = await self._retrieve_cross_signing_keys_for_remote_user( + user, key_type + ) + if cross_signing_keys is None: raise NotFoundError("No %s key found for %s" % (key_type, user_id)) - return key, key_id, verify_key + return cross_signing_keys async def _retrieve_cross_signing_keys_for_remote_user( self, user: UserID, desired_key_type: str, - ) -> Tuple[Optional[dict], Optional[str], Optional[VerifyKey]]: + ) -> Optional[Tuple[Dict[str, Any], str, VerifyKey]]: """Queries cross-signing keys for a remote user and saves them to the database Only the key specified by `key_type` will be returned, while all retrieved keys @@ -1146,12 +1143,10 @@ async def _retrieve_cross_signing_keys_for_remote_user( type(e), e, ) - return None, None, None + return None # Process each of the retrieved cross-signing keys - desired_key = None - desired_key_id = None - desired_verify_key = None + desired_key_data = None retrieved_device_ids = [] for key_type in ["master", "self_signing"]: key_content = remote_result.get(key_type + "_key") @@ -1196,9 +1191,7 @@ async def _retrieve_cross_signing_keys_for_remote_user( # If this is the desired key type, save it and its ID/VerifyKey if key_type == desired_key_type: - desired_key = key_content - desired_verify_key = verify_key - desired_key_id = key_id + desired_key_data = key_content, key_id, verify_key # At the same time, store this key in the db for subsequent queries await self.store.set_e2e_cross_signing_key( @@ -1212,7 +1205,7 @@ async def _retrieve_cross_signing_keys_for_remote_user( user.to_string(), retrieved_device_ids ) - return desired_key, desired_key_id, desired_verify_key + return desired_key_data def _check_cross_signing_key( diff --git a/synapse/http/connectproxyclient.py b/synapse/http/connectproxyclient.py index 203e995bb77d..23a60af17184 100644 --- a/synapse/http/connectproxyclient.py +++ b/synapse/http/connectproxyclient.py @@ -14,15 +14,22 @@ import base64 import logging -from typing import Optional +from typing import Optional, Union import attr from zope.interface import implementer from twisted.internet import defer, protocol from twisted.internet.error import ConnectError -from twisted.internet.interfaces import IReactorCore, IStreamClientEndpoint +from twisted.internet.interfaces import ( + IAddress, + IConnector, + IProtocol, + IReactorCore, + IStreamClientEndpoint, +) from twisted.internet.protocol import ClientFactory, Protocol, connectionDone +from twisted.python.failure import Failure from twisted.web import http logger = logging.getLogger(__name__) @@ -81,14 +88,14 @@ def __init__( self._port = port self._proxy_creds = proxy_creds - def __repr__(self): + def __repr__(self) -> str: return "" % (self._proxy_endpoint,) # Mypy encounters a false positive here: it complains that ClientFactory # is incompatible with IProtocolFactory. But ClientFactory inherits from # Factory, which implements IProtocolFactory. So I think this is a bug # in mypy-zope. - def connect(self, protocolFactory: ClientFactory): # type: ignore[override] + def connect(self, protocolFactory: ClientFactory) -> "defer.Deferred[IProtocol]": # type: ignore[override] f = HTTPProxiedClientFactory( self._host, self._port, protocolFactory, self._proxy_creds ) @@ -125,10 +132,10 @@ def __init__( self.proxy_creds = proxy_creds self.on_connection: "defer.Deferred[None]" = defer.Deferred() - def startedConnecting(self, connector): + def startedConnecting(self, connector: IConnector) -> None: return self.wrapped_factory.startedConnecting(connector) - def buildProtocol(self, addr): + def buildProtocol(self, addr: IAddress) -> "HTTPConnectProtocol": wrapped_protocol = self.wrapped_factory.buildProtocol(addr) if wrapped_protocol is None: raise TypeError("buildProtocol produced None instead of a Protocol") @@ -141,13 +148,13 @@ def buildProtocol(self, addr): self.proxy_creds, ) - def clientConnectionFailed(self, connector, reason): + def clientConnectionFailed(self, connector: IConnector, reason: Failure) -> None: logger.debug("Connection to proxy failed: %s", reason) if not self.on_connection.called: self.on_connection.errback(reason) return self.wrapped_factory.clientConnectionFailed(connector, reason) - def clientConnectionLost(self, connector, reason): + def clientConnectionLost(self, connector: IConnector, reason: Failure) -> None: logger.debug("Connection to proxy lost: %s", reason) if not self.on_connection.called: self.on_connection.errback(reason) @@ -191,10 +198,10 @@ def __init__( ) self.http_setup_client.on_connected.addCallback(self.proxyConnected) - def connectionMade(self): + def connectionMade(self) -> None: self.http_setup_client.makeConnection(self.transport) - def connectionLost(self, reason=connectionDone): + def connectionLost(self, reason: Failure = connectionDone) -> None: if self.wrapped_protocol.connected: self.wrapped_protocol.connectionLost(reason) @@ -203,7 +210,7 @@ def connectionLost(self, reason=connectionDone): if not self.connected_deferred.called: self.connected_deferred.errback(reason) - def proxyConnected(self, _): + def proxyConnected(self, _: Union[None, "defer.Deferred[None]"]) -> None: self.wrapped_protocol.makeConnection(self.transport) self.connected_deferred.callback(self.wrapped_protocol) @@ -213,7 +220,7 @@ def proxyConnected(self, _): if buf: self.wrapped_protocol.dataReceived(buf) - def dataReceived(self, data: bytes): + def dataReceived(self, data: bytes) -> None: # if we've set up the HTTP protocol, we can send the data there if self.wrapped_protocol.connected: return self.wrapped_protocol.dataReceived(data) @@ -243,7 +250,7 @@ def __init__( self.proxy_creds = proxy_creds self.on_connected: "defer.Deferred[None]" = defer.Deferred() - def connectionMade(self): + def connectionMade(self) -> None: logger.debug("Connected to proxy, sending CONNECT") self.sendCommand(b"CONNECT", b"%s:%d" % (self.host, self.port)) @@ -257,14 +264,14 @@ def connectionMade(self): self.endHeaders() - def handleStatus(self, version: bytes, status: bytes, message: bytes): + def handleStatus(self, version: bytes, status: bytes, message: bytes) -> None: logger.debug("Got Status: %s %s %s", status, message, version) if status != b"200": raise ProxyConnectError(f"Unexpected status on CONNECT: {status!s}") - def handleEndHeaders(self): + def handleEndHeaders(self) -> None: logger.debug("End Headers") self.on_connected.callback(None) - def handleResponse(self, body): + def handleResponse(self, body: bytes) -> None: pass diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py index a16dde23807f..b2a50c910507 100644 --- a/synapse/http/proxyagent.py +++ b/synapse/http/proxyagent.py @@ -245,7 +245,7 @@ def http_proxy_endpoint( proxy: Optional[bytes], reactor: IReactorCore, tls_options_factory: Optional[IPolicyForHTTPS], - **kwargs, + **kwargs: object, ) -> Tuple[Optional[IStreamClientEndpoint], Optional[ProxyCredentials]]: """Parses an http proxy setting and returns an endpoint for the proxy diff --git a/synapse/logging/_remote.py b/synapse/logging/_remote.py index 475756f1db64..5a61b21eaf7e 100644 --- a/synapse/logging/_remote.py +++ b/synapse/logging/_remote.py @@ -31,7 +31,11 @@ TCP4ClientEndpoint, TCP6ClientEndpoint, ) -from twisted.internet.interfaces import IPushProducer, IStreamClientEndpoint +from twisted.internet.interfaces import ( + IPushProducer, + IReactorTCP, + IStreamClientEndpoint, +) from twisted.internet.protocol import Factory, Protocol from twisted.internet.tcp import Connection from twisted.python.failure import Failure @@ -59,14 +63,14 @@ class LogProducer: _buffer: Deque[logging.LogRecord] _paused: bool = attr.ib(default=False, init=False) - def pauseProducing(self): + def pauseProducing(self) -> None: self._paused = True - def stopProducing(self): + def stopProducing(self) -> None: self._paused = True self._buffer = deque() - def resumeProducing(self): + def resumeProducing(self) -> None: # If we're already producing, nothing to do. self._paused = False @@ -102,8 +106,8 @@ def __init__( host: str, port: int, maximum_buffer: int = 1000, - level=logging.NOTSET, - _reactor=None, + level: int = logging.NOTSET, + _reactor: Optional[IReactorTCP] = None, ): super().__init__(level=level) self.host = host @@ -118,7 +122,7 @@ def __init__( if _reactor is None: from twisted.internet import reactor - _reactor = reactor + _reactor = reactor # type: ignore[assignment] try: ip = ip_address(self.host) @@ -139,7 +143,7 @@ def __init__( self._stopping = False self._connect() - def close(self): + def close(self) -> None: self._stopping = True self._service.stopService() diff --git a/synapse/logging/formatter.py b/synapse/logging/formatter.py index c0f12ecd15b8..c88b8ae5450f 100644 --- a/synapse/logging/formatter.py +++ b/synapse/logging/formatter.py @@ -16,6 +16,8 @@ import logging import traceback from io import StringIO +from types import TracebackType +from typing import Optional, Tuple, Type class LogFormatter(logging.Formatter): @@ -28,10 +30,14 @@ class LogFormatter(logging.Formatter): where it was caught are logged). """ - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def formatException(self, ei): + def formatException( + self, + ei: Tuple[ + Optional[Type[BaseException]], + Optional[BaseException], + Optional[TracebackType], + ], + ) -> str: sio = StringIO() (typ, val, tb) = ei diff --git a/synapse/logging/handlers.py b/synapse/logging/handlers.py index 478b5274942b..dec2a2c3dd1a 100644 --- a/synapse/logging/handlers.py +++ b/synapse/logging/handlers.py @@ -49,7 +49,7 @@ def __init__( ) self._flushing_thread.start() - def on_reactor_running(): + def on_reactor_running() -> None: self._reactor_started = True reactor_to_use: IReactorCore @@ -74,7 +74,7 @@ def shouldFlush(self, record: LogRecord) -> bool: else: return True - def _flush_periodically(self): + def _flush_periodically(self) -> None: """ Whilst this handler is active, flush the handler periodically. """ diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py index d57e7c5324f8..a26a1a58e7d6 100644 --- a/synapse/logging/scopecontextmanager.py +++ b/synapse/logging/scopecontextmanager.py @@ -13,6 +13,8 @@ # limitations under the License.import logging import logging +from types import TracebackType +from typing import Optional, Type from opentracing import Scope, ScopeManager @@ -107,19 +109,26 @@ class _LogContextScope(Scope): and - if enter_logcontext was set - the logcontext is finished too. """ - def __init__(self, manager, span, logcontext, enter_logcontext, finish_on_close): + def __init__( + self, + manager: LogContextScopeManager, + span, + logcontext, + enter_logcontext: bool, + finish_on_close: bool, + ): """ Args: - manager (LogContextScopeManager): + manager: the manager that is responsible for this scope. span (Span): the opentracing span which this scope represents the local lifetime for. logcontext (LogContext): the logcontext to which this scope is attached. - enter_logcontext (Boolean): + enter_logcontext: if True the logcontext will be exited when the scope is finished - finish_on_close (Boolean): + finish_on_close: if True finish the span when the scope is closed """ super().__init__(manager, span) @@ -127,16 +136,21 @@ def __init__(self, manager, span, logcontext, enter_logcontext, finish_on_close) self._finish_on_close = finish_on_close self._enter_logcontext = enter_logcontext - def __exit__(self, exc_type, value, traceback): + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: if exc_type == twisted.internet.defer._DefGen_Return: # filter out defer.returnValue() calls exc_type = value = traceback = None super().__exit__(exc_type, value, traceback) - def __str__(self): + def __str__(self) -> str: return f"Scope<{self.span}>" - def close(self): + def close(self) -> None: active_scope = self.manager.active if active_scope is not self: logger.error( diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 08c6eabc6d1a..c2bbbb574e75 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -12,20 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from types import TracebackType from typing import ( TYPE_CHECKING, + Any, AsyncContextManager, Awaitable, Callable, Dict, Iterable, + List, Optional, + Type, ) import attr from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage.types import Connection +from synapse.storage.types import Connection, Cursor from synapse.types import JsonDict from synapse.util import Clock, json_encoder @@ -74,7 +78,12 @@ async def __aenter__(self) -> int: return self._update_duration_ms - async def __aexit__(self, *exc) -> None: + async def __aexit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + tb: Optional[TracebackType], + ) -> None: pass @@ -352,7 +361,7 @@ async def do_next_background_update(self, sleep: bool = True) -> bool: True if we have finished running all the background updates, otherwise False """ - def get_background_updates_txn(txn): + def get_background_updates_txn(txn: Cursor) -> List[Dict[str, Any]]: txn.execute( """ SELECT update_name, depends_on FROM background_updates @@ -469,7 +478,7 @@ def register_background_update_handler( self, update_name: str, update_handler: Callable[[JsonDict, int], Awaitable[int]], - ): + ) -> None: """Register a handler for doing a background update. The handler should take two arguments: @@ -603,7 +612,7 @@ def create_index_sqlite(conn: Connection) -> None: else: runner = create_index_sqlite - async def updater(progress, batch_size): + async def updater(progress: JsonDict, batch_size: int) -> int: if runner is not None: logger.info("Adding index %s to %s", index_name, table) await self.db_pool.runWithConnection(runner) diff --git a/synapse/types.py b/synapse/types.py index 9ac688b23b28..325332a6e00f 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -24,6 +24,7 @@ Mapping, Match, MutableMapping, + NoReturn, Optional, Set, Tuple, @@ -35,6 +36,7 @@ import attr from frozendict import frozendict from signedjson.key import decode_verify_key_bytes +from signedjson.types import VerifyKey from typing_extensions import TypedDict from unpaddedbase64 import decode_base64 from zope.interface import Interface @@ -55,6 +57,7 @@ if TYPE_CHECKING: from synapse.appservice.api import ApplicationService from synapse.storage.databases.main import DataStore, PurgeEventsStore + from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore # Define a state map type from type/state_key to T (usually an event ID or # event) @@ -114,7 +117,7 @@ class Requester: app_service: Optional["ApplicationService"] authenticated_entity: str - def serialize(self): + def serialize(self) -> Dict[str, Any]: """Converts self to a type that can be serialized as JSON, and then deserialized by `deserialize` @@ -132,7 +135,9 @@ def serialize(self): } @staticmethod - def deserialize(store, input): + def deserialize( + store: "ApplicationServiceWorkerStore", input: Dict[str, Any] + ) -> "Requester": """Converts a dict that was produced by `serialize` back into a Requester. @@ -236,10 +241,10 @@ class DomainSpecificString(metaclass=abc.ABCMeta): domain: str # Because this is a frozen class, it is deeply immutable. - def __copy__(self): + def __copy__(self: DS) -> DS: return self - def __deepcopy__(self, memo): + def __deepcopy__(self: DS, memo: Dict[str, object]) -> DS: return self @classmethod @@ -729,12 +734,14 @@ async def to_string(self, store: "DataStore") -> str: ) @property - def room_stream_id(self): + def room_stream_id(self) -> int: return self.room_key.stream - def copy_and_advance(self, key, new_value) -> "StreamToken": + def copy_and_advance(self, key: str, new_value: Any) -> "StreamToken": """Advance the given key in the token to a new value if and only if the new value is after the old value. + + :raises TypeError: if `key` is not the one of the keys tracked by a StreamToken. """ if key == "room_key": new_token = self.copy_and_replace( @@ -751,7 +758,7 @@ def copy_and_advance(self, key, new_value) -> "StreamToken": else: return self - def copy_and_replace(self, key, new_value) -> "StreamToken": + def copy_and_replace(self, key: str, new_value: Any) -> "StreamToken": return attr.evolve(self, **{key: new_value}) @@ -793,14 +800,14 @@ class ThirdPartyInstanceID: # Deny iteration because it will bite you if you try to create a singleton # set by: # users = set(user) - def __iter__(self): + def __iter__(self) -> NoReturn: raise ValueError("Attempted to iterate a %s" % (type(self).__name__,)) # Because this class is a frozen class, it is deeply immutable. - def __copy__(self): + def __copy__(self) -> "ThirdPartyInstanceID": return self - def __deepcopy__(self, memo): + def __deepcopy__(self, memo: Dict[str, object]) -> "ThirdPartyInstanceID": return self @classmethod @@ -852,25 +859,28 @@ def __bool__(self) -> bool: return bool(self.changed or self.left) -def get_verify_key_from_cross_signing_key(key_info): +def get_verify_key_from_cross_signing_key( + key_info: Mapping[str, Any] +) -> Tuple[str, VerifyKey]: """Get the key ID and signedjson verify key from a cross-signing key dict Args: - key_info (dict): a cross-signing key dict, which must have a "keys" + key_info: a cross-signing key dict, which must have a "keys" property that has exactly one item in it Returns: - (str, VerifyKey): the key ID and verify key for the cross-signing key + the key ID and verify key for the cross-signing key """ - # make sure that exactly one key is provided + # make sure that a `keys` field is provided if "keys" not in key_info: raise ValueError("Invalid key") keys = key_info["keys"] - if len(keys) != 1: - raise ValueError("Invalid key") - # and return that one key - for key_id, key_data in keys.items(): + # and that it contains exactly one key + if len(keys) == 1: + key_id, key_data = next(iter(keys.items())) return key_id, decode_verify_key_bytes(key_id, decode_base64(key_data)) + else: + raise ValueError("Invalid key") @attr.s(auto_attribs=True, frozen=True, slots=True) From 90131044297ac5378fb381050f4068784dc206a8 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Fri, 13 May 2022 15:30:15 +0200 Subject: [PATCH 027/181] Don't create an empty room when checking for MAU limits (#12713) --- changelog.d/12713.bugfix | 1 + .../resource_limits_server_notices.py | 40 +++-------- .../server_notices/server_notices_manager.py | 70 ++++++++++++------- .../test_resource_limits_server_notices.py | 11 ++- 4 files changed, 66 insertions(+), 56 deletions(-) create mode 100644 changelog.d/12713.bugfix diff --git a/changelog.d/12713.bugfix b/changelog.d/12713.bugfix new file mode 100644 index 000000000000..91e70f102c5d --- /dev/null +++ b/changelog.d/12713.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.30.0 where empty rooms could be automatically created if a monthly active users limit is set. diff --git a/synapse/server_notices/resource_limits_server_notices.py b/synapse/server_notices/resource_limits_server_notices.py index 015dd08f05e4..b5f3a0c74e9e 100644 --- a/synapse/server_notices/resource_limits_server_notices.py +++ b/synapse/server_notices/resource_limits_server_notices.py @@ -21,7 +21,6 @@ ServerNoticeMsgType, ) from synapse.api.errors import AuthError, ResourceLimitError, SynapseError -from synapse.server_notices.server_notices_manager import SERVER_NOTICE_ROOM_TAG if TYPE_CHECKING: from synapse.server import HomeServer @@ -71,18 +70,19 @@ async def maybe_send_server_notice_to_user(self, user_id: str) -> None: # In practice, not sure we can ever get here return - room_id = await self._server_notices_manager.get_or_create_notice_room_for_user( + # Check if there's a server notice room for this user. + room_id = await self._server_notices_manager.maybe_get_notice_room_for_user( user_id ) - if not room_id: - logger.warning("Failed to get server notices room") - return - - await self._check_and_set_tags(user_id, room_id) - - # Determine current state of room - currently_blocked, ref_events = await self._is_room_currently_blocked(room_id) + if room_id is not None: + # Determine current state of room + currently_blocked, ref_events = await self._is_room_currently_blocked( + room_id + ) + else: + currently_blocked = False + ref_events = [] limit_msg = None limit_type = None @@ -161,26 +161,6 @@ async def _apply_limit_block_notification( user_id, content, EventTypes.Pinned, "" ) - async def _check_and_set_tags(self, user_id: str, room_id: str) -> None: - """ - Since server notices rooms were originally not with tags, - important to check that tags have been set correctly - Args: - user_id(str): the user in question - room_id(str): the server notices room for that user - """ - tags = await self._store.get_tags_for_room(user_id, room_id) - need_to_set_tag = True - if tags: - if SERVER_NOTICE_ROOM_TAG in tags: - # tag already present, nothing to do here - need_to_set_tag = False - if need_to_set_tag: - max_id = await self._account_data_handler.add_tag_to_room( - user_id, room_id, SERVER_NOTICE_ROOM_TAG, {} - ) - self._notifier.on_new_event("account_data_key", max_id, users=[user_id]) - async def _is_room_currently_blocked(self, room_id: str) -> Tuple[bool, List[str]]: """ Determines if the room is currently blocked diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py index 48eae5fa062a..c2c37e1015ce 100644 --- a/synapse/server_notices/server_notices_manager.py +++ b/synapse/server_notices/server_notices_manager.py @@ -90,6 +90,35 @@ async def send_notice( ) return event + @cached() + async def maybe_get_notice_room_for_user(self, user_id: str) -> Optional[str]: + """Try to look up the server notice room for this user if it exists. + + Does not create one if none can be found. + + Args: + user_id: the user we want a server notice room for. + + Returns: + The room's ID, or None if no room could be found. + """ + rooms = await self._store.get_rooms_for_local_user_where_membership_is( + user_id, [Membership.INVITE, Membership.JOIN] + ) + for room in rooms: + # it's worth noting that there is an asymmetry here in that we + # expect the user to be invited or joined, but the system user must + # be joined. This is kinda deliberate, in that if somebody somehow + # manages to invite the system user to a room, that doesn't make it + # the server notices room. + user_ids = await self._store.get_users_in_room(room.room_id) + if len(user_ids) <= 2 and self.server_notices_mxid in user_ids: + # we found a room which our user shares with the system notice + # user + return room.room_id + + return None + @cached() async def get_or_create_notice_room_for_user(self, user_id: str) -> str: """Get the room for notices for a given user @@ -112,31 +141,20 @@ async def get_or_create_notice_room_for_user(self, user_id: str) -> str: self.server_notices_mxid, authenticated_entity=self._server_name ) - rooms = await self._store.get_rooms_for_local_user_where_membership_is( - user_id, [Membership.INVITE, Membership.JOIN] - ) - for room in rooms: - # it's worth noting that there is an asymmetry here in that we - # expect the user to be invited or joined, but the system user must - # be joined. This is kinda deliberate, in that if somebody somehow - # manages to invite the system user to a room, that doesn't make it - # the server notices room. - user_ids = await self._store.get_users_in_room(room.room_id) - if len(user_ids) <= 2 and self.server_notices_mxid in user_ids: - # we found a room which our user shares with the system notice - # user - logger.info( - "Using existing server notices room %s for user %s", - room.room_id, - user_id, - ) - await self._update_notice_user_profile_if_changed( - requester, - room.room_id, - self._config.servernotices.server_notices_mxid_display_name, - self._config.servernotices.server_notices_mxid_avatar_url, - ) - return room.room_id + room_id = await self.maybe_get_notice_room_for_user(user_id) + if room_id is not None: + logger.info( + "Using existing server notices room %s for user %s", + room_id, + user_id, + ) + await self._update_notice_user_profile_if_changed( + requester, + room_id, + self._config.servernotices.server_notices_mxid_display_name, + self._config.servernotices.server_notices_mxid_avatar_url, + ) + return room_id # apparently no existing notice room: create a new one logger.info("Creating server notices room for %s", user_id) @@ -166,6 +184,8 @@ async def get_or_create_notice_room_for_user(self, user_id: str) -> str: ) room_id = info["room_id"] + self.maybe_get_notice_room_for_user.invalidate((user_id,)) + max_id = await self._account_data_handler.add_tag_to_room( user_id, room_id, SERVER_NOTICE_ROOM_TAG, {} ) diff --git a/tests/server_notices/test_resource_limits_server_notices.py b/tests/server_notices/test_resource_limits_server_notices.py index 9ee9509d3a96..07e29788e5be 100644 --- a/tests/server_notices/test_resource_limits_server_notices.py +++ b/tests/server_notices/test_resource_limits_server_notices.py @@ -75,6 +75,9 @@ def prepare(self, reactor, clock, hs): self._rlsn._server_notices_manager.get_or_create_notice_room_for_user = Mock( return_value=make_awaitable("!something:localhost") ) + self._rlsn._server_notices_manager.maybe_get_notice_room_for_user = Mock( + return_value=make_awaitable("!something:localhost") + ) self._rlsn._store.add_tag_to_room = Mock(return_value=make_awaitable(None)) self._rlsn._store.get_tags_for_room = Mock(return_value=make_awaitable({})) @@ -102,6 +105,7 @@ def test_maybe_send_server_notice_to_user_remove_blocked_notice(self): ) self.get_success(self._rlsn.maybe_send_server_notice_to_user(self.user_id)) # Would be better to check the content, but once == remove blocking event + self._rlsn._server_notices_manager.maybe_get_notice_room_for_user.assert_called_once() self._send_notice.assert_called_once() def test_maybe_send_server_notice_to_user_remove_blocked_notice_noop(self): @@ -300,7 +304,10 @@ def test_no_invite_without_notice(self): hasn't been reached (since it's the only user and the limit is 5), so users shouldn't receive a server notice. """ - self.register_user("user", "password") + m = Mock(return_value=make_awaitable(None)) + self._rlsn._server_notices_manager.maybe_get_notice_room_for_user = m + + user_id = self.register_user("user", "password") tok = self.login("user", "password") channel = self.make_request("GET", "/sync?timeout=0", access_token=tok) @@ -309,6 +316,8 @@ def test_no_invite_without_notice(self): "rooms", channel.json_body, "Got invites without server notice" ) + m.assert_called_once_with(user_id) + def test_invite_with_notice(self): """Tests that, if the MAU limit is hit, the server notices user invites each user to a room in which it has sent a notice. From e8ae472d3b991362bfb48fb319f386163b2d5e76 Mon Sep 17 00:00:00 2001 From: Till <2353100+S7evinK@users.noreply.github.com> Date: Fri, 13 May 2022 17:45:47 +0200 Subject: [PATCH 028/181] Update configs used by Complement to allow more invites (#12731) --- changelog.d/12731.misc | 1 + docker/complement/conf-workers/workers-shared.yaml | 12 ++++++++++++ docker/complement/conf/homeserver.yaml | 12 ++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 changelog.d/12731.misc diff --git a/changelog.d/12731.misc b/changelog.d/12731.misc new file mode 100644 index 000000000000..962100d516c1 --- /dev/null +++ b/changelog.d/12731.misc @@ -0,0 +1 @@ +Update configs used by Complement to allow more invites/3PID validations during tests. \ No newline at end of file diff --git a/docker/complement/conf-workers/workers-shared.yaml b/docker/complement/conf-workers/workers-shared.yaml index 8b6987037715..86ee11ecd0e5 100644 --- a/docker/complement/conf-workers/workers-shared.yaml +++ b/docker/complement/conf-workers/workers-shared.yaml @@ -53,6 +53,18 @@ rc_joins: per_second: 9999 burst_count: 9999 +rc_3pid_validation: + per_second: 1000 + burst_count: 1000 + +rc_invites: + per_room: + per_second: 1000 + burst_count: 1000 + per_user: + per_second: 1000 + burst_count: 1000 + federation_rr_transactions_per_room_per_second: 9999 ## Experimental Features ## diff --git a/docker/complement/conf/homeserver.yaml b/docker/complement/conf/homeserver.yaml index 174f87f52ee0..e2be540bbb9e 100644 --- a/docker/complement/conf/homeserver.yaml +++ b/docker/complement/conf/homeserver.yaml @@ -87,6 +87,18 @@ rc_joins: per_second: 9999 burst_count: 9999 +rc_3pid_validation: + per_second: 1000 + burst_count: 1000 + +rc_invites: + per_room: + per_second: 1000 + burst_count: 1000 + per_user: + per_second: 1000 + burst_count: 1000 + federation_rr_transactions_per_room_per_second: 9999 ## API Configuration ## From cde8af9a495cbc7f3d0207e3f17c37eddaee34e1 Mon Sep 17 00:00:00 2001 From: Shay Date: Fri, 13 May 2022 12:32:39 -0700 Subject: [PATCH 029/181] Add config flags to allow for cache auto-tuning (#12701) --- changelog.d/12701.feature | 1 + docs/sample_config.yaml | 18 +++ .../configuration/config_documentation.md | 17 ++- synapse/config/cache.py | 33 +++++ synapse/metrics/jemalloc.py | 114 +++++++++++------- synapse/util/caches/lrucache.py | 79 ++++++++++-- tests/util/test_lrucache.py | 58 ++++++++- 7 files changed, 266 insertions(+), 54 deletions(-) create mode 100644 changelog.d/12701.feature diff --git a/changelog.d/12701.feature b/changelog.d/12701.feature new file mode 100644 index 000000000000..bb2264602c84 --- /dev/null +++ b/changelog.d/12701.feature @@ -0,0 +1 @@ +Add a config options to allow for auto-tuning of caches. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 03a0f6314cdd..05a3606da043 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -784,6 +784,24 @@ caches: # #cache_entry_ttl: 30m + # This flag enables cache autotuning, and is further specified by the sub-options `max_cache_memory_usage`, + # `target_cache_memory_usage`, `min_cache_ttl`. These flags work in conjunction with each other to maintain + # a balance between cache memory usage and cache entry availability. You must be using jemalloc to utilize + # this option, and all three of the options must be specified for this feature to work. + #cache_autotuning: + # This flag sets a ceiling on much memory the cache can use before caches begin to be continuously evicted. + # They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in + # the flag below, or until the `min_cache_ttl` is hit. + #max_cache_memory_usage: 1024M + + # This flag sets a rough target for the desired memory usage of the caches. + #target_cache_memory_usage: 758M + + # 'min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when + # caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches + # from being emptied while Synapse is evicting due to memory. + #min_cache_ttl: 5m + # Controls how long the results of a /sync request are cached for after # a successful response is returned. A higher duration can help clients with # intermittent connections, at the cost of higher memory usage. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 2af1f284b14e..ca443631055f 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1119,7 +1119,17 @@ Caching can be configured through the following sub-options: with intermittent connections, at the cost of higher memory usage. By default, this is zero, which means that sync responses are not cached at all. - +* `cache_autotuning` and its sub-options `max_cache_memory_usage`, `target_cache_memory_usage`, and + `min_cache_ttl` work in conjunction with each other to maintain a balance between cache memory + usage and cache entry availability. You must be using [jemalloc](/~https://github.com/matrix-org/synapse#help-synapse-is-slow-and-eats-all-my-ramcpu) + to utilize this option, and all three of the options must be specified for this feature to work. + * `max_cache_memory_usage` sets a ceiling on how much memory the cache can use before caches begin to be continuously evicted. + They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in + the flag below, or until the `min_cache_ttl` is hit. + * `target_memory_usage` sets a rough target for the desired memory usage of the caches. + * `min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when + caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches + from being emptied while Synapse is evicting due to memory. Example configuration: ```yaml @@ -1127,8 +1137,11 @@ caches: global_factor: 1.0 per_cache_factors: get_users_who_share_room_with_user: 2.0 - expire_caches: false sync_response_cache_duration: 2m + cache_autotuning: + max_cache_memory_usage: 1024M + target_cache_memory_usage: 758M + min_cache_ttl: 5m ``` ### Reloading cache factors diff --git a/synapse/config/cache.py b/synapse/config/cache.py index 58b2fe55193c..d2f55534d7d1 100644 --- a/synapse/config/cache.py +++ b/synapse/config/cache.py @@ -176,6 +176,24 @@ def generate_config_section(self, **kwargs: Any) -> str: # #cache_entry_ttl: 30m + # This flag enables cache autotuning, and is further specified by the sub-options `max_cache_memory_usage`, + # `target_cache_memory_usage`, `min_cache_ttl`. These flags work in conjunction with each other to maintain + # a balance between cache memory usage and cache entry availability. You must be using jemalloc to utilize + # this option, and all three of the options must be specified for this feature to work. + #cache_autotuning: + # This flag sets a ceiling on much memory the cache can use before caches begin to be continuously evicted. + # They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in + # the flag below, or until the `min_cache_ttl` is hit. + #max_cache_memory_usage: 1024M + + # This flag sets a rough target for the desired memory usage of the caches. + #target_cache_memory_usage: 758M + + # 'min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when + # caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches + # from being emptied while Synapse is evicting due to memory. + #min_cache_ttl: 5m + # Controls how long the results of a /sync request are cached for after # a successful response is returned. A higher duration can help clients with # intermittent connections, at the cost of higher memory usage. @@ -263,6 +281,21 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: ) self.expiry_time_msec = self.parse_duration(expiry_time) + self.cache_autotuning = cache_config.get("cache_autotuning") + if self.cache_autotuning: + max_memory_usage = self.cache_autotuning.get("max_cache_memory_usage") + self.cache_autotuning["max_cache_memory_usage"] = self.parse_size( + max_memory_usage + ) + + target_mem_size = self.cache_autotuning.get("target_cache_memory_usage") + self.cache_autotuning["target_cache_memory_usage"] = self.parse_size( + target_mem_size + ) + + min_cache_ttl = self.cache_autotuning.get("min_cache_ttl") + self.cache_autotuning["min_cache_ttl"] = self.parse_duration(min_cache_ttl) + self.sync_response_cache_duration = self.parse_duration( cache_config.get("sync_response_cache_duration", 0) ) diff --git a/synapse/metrics/jemalloc.py b/synapse/metrics/jemalloc.py index 6bc329f04a9c..1fc8a0e888a1 100644 --- a/synapse/metrics/jemalloc.py +++ b/synapse/metrics/jemalloc.py @@ -18,6 +18,7 @@ import re from typing import Iterable, Optional, overload +import attr from prometheus_client import REGISTRY, Metric from typing_extensions import Literal @@ -27,52 +28,24 @@ logger = logging.getLogger(__name__) -def _setup_jemalloc_stats() -> None: - """Checks to see if jemalloc is loaded, and hooks up a collector to record - statistics exposed by jemalloc. - """ - - # Try to find the loaded jemalloc shared library, if any. We need to - # introspect into what is loaded, rather than loading whatever is on the - # path, as if we load a *different* jemalloc version things will seg fault. - - # We look in `/proc/self/maps`, which only exists on linux. - if not os.path.exists("/proc/self/maps"): - logger.debug("Not looking for jemalloc as no /proc/self/maps exist") - return - - # We're looking for a path at the end of the line that includes - # "libjemalloc". - regex = re.compile(r"/\S+/libjemalloc.*$") - - jemalloc_path = None - with open("/proc/self/maps") as f: - for line in f: - match = regex.search(line.strip()) - if match: - jemalloc_path = match.group() - - if not jemalloc_path: - # No loaded jemalloc was found. - logger.debug("jemalloc not found") - return - - logger.debug("Found jemalloc at %s", jemalloc_path) - - jemalloc = ctypes.CDLL(jemalloc_path) +@attr.s(slots=True, frozen=True, auto_attribs=True) +class JemallocStats: + jemalloc: ctypes.CDLL @overload def _mallctl( - name: str, read: Literal[True] = True, write: Optional[int] = None + self, name: str, read: Literal[True] = True, write: Optional[int] = None ) -> int: ... @overload - def _mallctl(name: str, read: Literal[False], write: Optional[int] = None) -> None: + def _mallctl( + self, name: str, read: Literal[False], write: Optional[int] = None + ) -> None: ... def _mallctl( - name: str, read: bool = True, write: Optional[int] = None + self, name: str, read: bool = True, write: Optional[int] = None ) -> Optional[int]: """Wrapper around `mallctl` for reading and writing integers to jemalloc. @@ -120,7 +93,7 @@ def _mallctl( # Where oldp/oldlenp is a buffer where the old value will be written to # (if not null), and newp/newlen is the buffer with the new value to set # (if not null). Note that they're all references *except* newlen. - result = jemalloc.mallctl( + result = self.jemalloc.mallctl( name.encode("ascii"), input_var_ref, input_len_ref, @@ -136,21 +109,80 @@ def _mallctl( return input_var.value - def _jemalloc_refresh_stats() -> None: + def refresh_stats(self) -> None: """Request that jemalloc updates its internal statistics. This needs to be called before querying for stats, otherwise it will return stale values. """ try: - _mallctl("epoch", read=False, write=1) + self._mallctl("epoch", read=False, write=1) except Exception as e: logger.warning("Failed to reload jemalloc stats: %s", e) + def get_stat(self, name: str) -> int: + """Request the stat of the given name at the time of the last + `refresh_stats` call. This may throw if we fail to read + the stat. + """ + return self._mallctl(f"stats.{name}") + + +_JEMALLOC_STATS: Optional[JemallocStats] = None + + +def get_jemalloc_stats() -> Optional[JemallocStats]: + """Returns an interface to jemalloc, if it is being used. + + Note that this will always return None until `setup_jemalloc_stats` has been + called. + """ + return _JEMALLOC_STATS + + +def _setup_jemalloc_stats() -> None: + """Checks to see if jemalloc is loaded, and hooks up a collector to record + statistics exposed by jemalloc. + """ + + global _JEMALLOC_STATS + + # Try to find the loaded jemalloc shared library, if any. We need to + # introspect into what is loaded, rather than loading whatever is on the + # path, as if we load a *different* jemalloc version things will seg fault. + + # We look in `/proc/self/maps`, which only exists on linux. + if not os.path.exists("/proc/self/maps"): + logger.debug("Not looking for jemalloc as no /proc/self/maps exist") + return + + # We're looking for a path at the end of the line that includes + # "libjemalloc". + regex = re.compile(r"/\S+/libjemalloc.*$") + + jemalloc_path = None + with open("/proc/self/maps") as f: + for line in f: + match = regex.search(line.strip()) + if match: + jemalloc_path = match.group() + + if not jemalloc_path: + # No loaded jemalloc was found. + logger.debug("jemalloc not found") + return + + logger.debug("Found jemalloc at %s", jemalloc_path) + + jemalloc_dll = ctypes.CDLL(jemalloc_path) + + stats = JemallocStats(jemalloc_dll) + _JEMALLOC_STATS = stats + class JemallocCollector(Collector): """Metrics for internal jemalloc stats.""" def collect(self) -> Iterable[Metric]: - _jemalloc_refresh_stats() + stats.refresh_stats() g = GaugeMetricFamily( "jemalloc_stats_app_memory_bytes", @@ -184,7 +216,7 @@ def collect(self) -> Iterable[Metric]: "metadata", ): try: - value = _mallctl(f"stats.{t}") + value = stats.get_stat(t) except Exception as e: # There was an error fetching the value, skip. logger.warning("Failed to read jemalloc stats.%s: %s", t, e) diff --git a/synapse/util/caches/lrucache.py b/synapse/util/caches/lrucache.py index 45ff0de638a4..a3b60578e3c3 100644 --- a/synapse/util/caches/lrucache.py +++ b/synapse/util/caches/lrucache.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +import math import threading import weakref from enum import Enum @@ -40,6 +41,7 @@ from synapse.config import cache as cache_config from synapse.metrics.background_process_metrics import wrap_as_background_process +from synapse.metrics.jemalloc import get_jemalloc_stats from synapse.util import Clock, caches from synapse.util.caches import CacheMetric, EvictionReason, register_cache from synapse.util.caches.treecache import TreeCache, iterate_tree_cache_entry @@ -106,10 +108,16 @@ def update_last_access(self, clock: Clock) -> None: @wrap_as_background_process("LruCache._expire_old_entries") -async def _expire_old_entries(clock: Clock, expiry_seconds: int) -> None: +async def _expire_old_entries( + clock: Clock, expiry_seconds: int, autotune_config: Optional[dict] +) -> None: """Walks the global cache list to find cache entries that haven't been - accessed in the given number of seconds. + accessed in the given number of seconds, or if a given memory threshold has been breached. """ + if autotune_config: + max_cache_memory_usage = autotune_config["max_cache_memory_usage"] + target_cache_memory_usage = autotune_config["target_cache_memory_usage"] + min_cache_ttl = autotune_config["min_cache_ttl"] / 1000 now = int(clock.time()) node = GLOBAL_ROOT.prev_node @@ -119,11 +127,36 @@ async def _expire_old_entries(clock: Clock, expiry_seconds: int) -> None: logger.debug("Searching for stale caches") + evicting_due_to_memory = False + + # determine if we're evicting due to memory + jemalloc_interface = get_jemalloc_stats() + if jemalloc_interface and autotune_config: + try: + jemalloc_interface.refresh_stats() + mem_usage = jemalloc_interface.get_stat("allocated") + if mem_usage > max_cache_memory_usage: + logger.info("Begin memory-based cache eviction.") + evicting_due_to_memory = True + except Exception: + logger.warning( + "Unable to read allocated memory, skipping memory-based cache eviction." + ) + while node is not GLOBAL_ROOT: # Only the root node isn't a `_TimedListNode`. assert isinstance(node, _TimedListNode) - if node.last_access_ts_secs > now - expiry_seconds: + # if node has not aged past expiry_seconds and we are not evicting due to memory usage, there's + # nothing to do here + if ( + node.last_access_ts_secs > now - expiry_seconds + and not evicting_due_to_memory + ): + break + + # if entry is newer than min_cache_entry_ttl then do not evict and don't evict anything newer + if evicting_due_to_memory and now - node.last_access_ts_secs < min_cache_ttl: break cache_entry = node.get_cache_entry() @@ -136,10 +169,29 @@ async def _expire_old_entries(clock: Clock, expiry_seconds: int) -> None: assert cache_entry is not None cache_entry.drop_from_cache() + # Check mem allocation periodically if we are evicting a bunch of caches + if jemalloc_interface and evicting_due_to_memory and (i + 1) % 100 == 0: + try: + jemalloc_interface.refresh_stats() + mem_usage = jemalloc_interface.get_stat("allocated") + if mem_usage < target_cache_memory_usage: + evicting_due_to_memory = False + logger.info("Stop memory-based cache eviction.") + except Exception: + logger.warning( + "Unable to read allocated memory, this may affect memory-based cache eviction." + ) + # If we've failed to read the current memory usage then we + # should stop trying to evict based on memory usage + evicting_due_to_memory = False + # If we do lots of work at once we yield to allow other stuff to happen. if (i + 1) % 10000 == 0: logger.debug("Waiting during drop") - await clock.sleep(0) + if node.last_access_ts_secs > now - expiry_seconds: + await clock.sleep(0.5) + else: + await clock.sleep(0) logger.debug("Waking during drop") node = next_node @@ -156,21 +208,28 @@ async def _expire_old_entries(clock: Clock, expiry_seconds: int) -> None: def setup_expire_lru_cache_entries(hs: "HomeServer") -> None: """Start a background job that expires all cache entries if they have not - been accessed for the given number of seconds. + been accessed for the given number of seconds, or if a given memory usage threshold has been + breached. """ - if not hs.config.caches.expiry_time_msec: + if not hs.config.caches.expiry_time_msec and not hs.config.caches.cache_autotuning: return - logger.info( - "Expiring LRU caches after %d seconds", hs.config.caches.expiry_time_msec / 1000 - ) + if hs.config.caches.expiry_time_msec: + expiry_time = hs.config.caches.expiry_time_msec / 1000 + logger.info("Expiring LRU caches after %d seconds", expiry_time) + else: + expiry_time = math.inf global USE_GLOBAL_LIST USE_GLOBAL_LIST = True clock = hs.get_clock() clock.looping_call( - _expire_old_entries, 30 * 1000, clock, hs.config.caches.expiry_time_msec / 1000 + _expire_old_entries, + 30 * 1000, + clock, + expiry_time, + hs.config.caches.cache_autotuning, ) diff --git a/tests/util/test_lrucache.py b/tests/util/test_lrucache.py index 321fc1776f8c..67173a4f5b3a 100644 --- a/tests/util/test_lrucache.py +++ b/tests/util/test_lrucache.py @@ -14,8 +14,9 @@ from typing import List -from unittest.mock import Mock +from unittest.mock import Mock, patch +from synapse.metrics.jemalloc import JemallocStats from synapse.util.caches.lrucache import LruCache, setup_expire_lru_cache_entries from synapse.util.caches.treecache import TreeCache @@ -316,3 +317,58 @@ def test_evict(self): self.assertEqual(cache.get("key1"), None) self.assertEqual(cache.get("key2"), 3) + + +class MemoryEvictionTestCase(unittest.HomeserverTestCase): + @override_config( + { + "caches": { + "cache_autotuning": { + "max_cache_memory_usage": "700M", + "target_cache_memory_usage": "500M", + "min_cache_ttl": "5m", + } + } + } + ) + @patch("synapse.util.caches.lrucache.get_jemalloc_stats") + def test_evict_memory(self, jemalloc_interface) -> None: + mock_jemalloc_class = Mock(spec=JemallocStats) + jemalloc_interface.return_value = mock_jemalloc_class + + # set the return value of get_stat() to be greater than max_cache_memory_usage + mock_jemalloc_class.get_stat.return_value = 924288000 + + setup_expire_lru_cache_entries(self.hs) + cache = LruCache(4, clock=self.hs.get_clock()) + + cache["key1"] = 1 + cache["key2"] = 2 + + # advance the reactor less than the min_cache_ttl + self.reactor.advance(60 * 2) + + # our items should still be in the cache + self.assertEqual(cache.get("key1"), 1) + self.assertEqual(cache.get("key2"), 2) + + # advance the reactor past the min_cache_ttl + self.reactor.advance(60 * 6) + + # the items should be cleared from cache + self.assertEqual(cache.get("key1"), None) + self.assertEqual(cache.get("key2"), None) + + # add more stuff to caches + cache["key1"] = 1 + cache["key2"] = 2 + + # set the return value of get_stat() to be lower than target_cache_memory_usage + mock_jemalloc_class.get_stat.return_value = 10000 + + # advance the reactor past the min_cache_ttl + self.reactor.advance(60 * 6) + + # the items should still be in the cache + self.assertEqual(cache.get("key1"), 1) + self.assertEqual(cache.get("key2"), 2) From 86a515ccbf359ecd65a42a3f409b8f97c8f22284 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 16 May 2022 08:42:45 -0400 Subject: [PATCH 030/181] Consolidate logic for parsing relations. (#12693) Parse the `m.relates_to` event content field (which describes relations) in a single place, this is used during: * Event persistence. * Validation of the Client-Server API. * Fetching bundled aggregations. * Processing of push rules. Each of these separately implement the logic and each made slightly different assumptions about what was valid. Some had minor / potential bugs. --- changelog.d/12693.misc | 1 + synapse/events/__init__.py | 45 ++++++++++++++++++++++ synapse/handlers/message.py | 30 ++++++--------- synapse/handlers/relations.py | 20 +++++----- synapse/push/bulk_push_rule_evaluator.py | 6 +-- synapse/storage/databases/main/events.py | 49 ++++++++++-------------- tests/rest/client/test_sync.py | 8 +++- 7 files changed, 98 insertions(+), 61 deletions(-) create mode 100644 changelog.d/12693.misc diff --git a/changelog.d/12693.misc b/changelog.d/12693.misc new file mode 100644 index 000000000000..8bd1e1cb0cd5 --- /dev/null +++ b/changelog.d/12693.misc @@ -0,0 +1 @@ +Consolidate parsing of relation information from events. diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index c238376caf62..39ad2793d98d 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -15,6 +15,7 @@ # limitations under the License. import abc +import collections.abc import os from typing import ( TYPE_CHECKING, @@ -32,9 +33,11 @@ overload, ) +import attr from typing_extensions import Literal from unpaddedbase64 import encode_base64 +from synapse.api.constants import RelationTypes from synapse.api.room_versions import EventFormatVersions, RoomVersion, RoomVersions from synapse.types import JsonDict, RoomStreamToken from synapse.util.caches import intern_dict @@ -615,3 +618,45 @@ def make_event_from_dict( return event_type( event_dict, room_version, internal_metadata_dict or {}, rejected_reason ) + + +@attr.s(slots=True, frozen=True, auto_attribs=True) +class _EventRelation: + # The target event of the relation. + parent_id: str + # The relation type. + rel_type: str + # The aggregation key. Will be None if the rel_type is not m.annotation or is + # not a string. + aggregation_key: Optional[str] + + +def relation_from_event(event: EventBase) -> Optional[_EventRelation]: + """ + Attempt to parse relation information an event. + + Returns: + The event relation information, if it is valid. None, otherwise. + """ + relation = event.content.get("m.relates_to") + if not relation or not isinstance(relation, collections.abc.Mapping): + # No relation information. + return None + + # Relations must have a type and parent event ID. + rel_type = relation.get("rel_type") + if not isinstance(rel_type, str): + return None + + parent_id = relation.get("event_id") + if not isinstance(parent_id, str): + return None + + # Annotations have a key field. + aggregation_key = None + if rel_type == RelationTypes.ANNOTATION: + aggregation_key = relation.get("key") + if not isinstance(aggregation_key, str): + aggregation_key = None + + return _EventRelation(parent_id, rel_type, aggregation_key) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4a4b535bae6a..0951b9c71f75 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -44,7 +44,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersions from synapse.api.urls import ConsentURIBuilder from synapse.event_auth import validate_event_for_room_version -from synapse.events import EventBase +from synapse.events import EventBase, relation_from_event from synapse.events.builder import EventBuilder from synapse.events.snapshot import EventContext from synapse.events.validator import EventValidator @@ -1060,20 +1060,11 @@ async def _validate_event_relation(self, event: EventBase) -> None: SynapseError if the event is invalid. """ - relation = event.content.get("m.relates_to") + relation = relation_from_event(event) if not relation: return - relation_type = relation.get("rel_type") - if not relation_type: - return - - # Ensure the parent is real. - relates_to = relation.get("event_id") - if not relates_to: - return - - parent_event = await self.store.get_event(relates_to, allow_none=True) + parent_event = await self.store.get_event(relation.parent_id, allow_none=True) if parent_event: # And in the same room. if parent_event.room_id != event.room_id: @@ -1082,28 +1073,31 @@ async def _validate_event_relation(self, event: EventBase) -> None: else: # There must be some reason that the client knows the event exists, # see if there are existing relations. If so, assume everything is fine. - if not await self.store.event_is_target_of_relation(relates_to): + if not await self.store.event_is_target_of_relation(relation.parent_id): # Otherwise, the client can't know about the parent event! raise SynapseError(400, "Can't send relation to unknown event") # If this event is an annotation then we check that that the sender # can't annotate the same way twice (e.g. stops users from liking an # event multiple times). - if relation_type == RelationTypes.ANNOTATION: - aggregation_key = relation["key"] + if relation.rel_type == RelationTypes.ANNOTATION: + aggregation_key = relation.aggregation_key + + if aggregation_key is None: + raise SynapseError(400, "Missing aggregation key") if len(aggregation_key) > 500: raise SynapseError(400, "Aggregation key is too long") already_exists = await self.store.has_user_annotated_event( - relates_to, event.type, aggregation_key, event.sender + relation.parent_id, event.type, aggregation_key, event.sender ) if already_exists: raise SynapseError(400, "Can't send same reaction twice") # Don't attempt to start a thread if the parent event is a relation. - elif relation_type == RelationTypes.THREAD: - if await self.store.event_includes_relation(relates_to): + elif relation.rel_type == RelationTypes.THREAD: + if await self.store.event_includes_relation(relation.parent_id): raise SynapseError( 400, "Cannot start threads from an event with a relation" ) diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index c2754ec918de..ab7e54857d56 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import collections.abc import logging from typing import ( TYPE_CHECKING, @@ -28,7 +27,7 @@ from synapse.api.constants import RelationTypes from synapse.api.errors import SynapseError -from synapse.events import EventBase +from synapse.events import EventBase, relation_from_event from synapse.storage.databases.main.relations import _RelatedEvent from synapse.types import JsonDict, Requester, StreamToken, UserID from synapse.visibility import filter_events_for_client @@ -373,20 +372,21 @@ async def get_bundled_aggregations( if event.is_state(): continue - relates_to = event.content.get("m.relates_to") - relation_type = None - if isinstance(relates_to, collections.abc.Mapping): - relation_type = relates_to.get("rel_type") + relates_to = relation_from_event(event) + if relates_to: # An event which is a replacement (ie edit) or annotation (ie, # reaction) may not have any other event related to it. - if relation_type in (RelationTypes.ANNOTATION, RelationTypes.REPLACE): + if relates_to.rel_type in ( + RelationTypes.ANNOTATION, + RelationTypes.REPLACE, + ): continue + # Track the event's relation information for later. + relations_by_id[event.event_id] = relates_to.rel_type + # The event should get bundled aggregations. events_by_id[event.event_id] = event - # Track the event's relation information for later. - if isinstance(relation_type, str): - relations_by_id[event.event_id] = relation_type # event ID -> bundled aggregation in non-serialized form. results: Dict[str, BundledAggregations] = {} diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 0ffafc882b65..4ac2c546bf2a 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -21,7 +21,7 @@ from synapse.api.constants import EventTypes, Membership, RelationTypes from synapse.event_auth import get_user_power_level -from synapse.events import EventBase +from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership @@ -78,8 +78,8 @@ def _should_count_as_unread(event: EventBase, context: EventContext) -> bool: return False # Exclude edits. - relates_to = event.content.get("m.relates_to", {}) - if relates_to.get("rel_type") == RelationTypes.REPLACE: + relates_to = relation_from_event(event) + if relates_to and relates_to.rel_type == RelationTypes.REPLACE: return False # Mark events that have a non-empty string body as unread. diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index f544bcfff07f..42d484dc98d9 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -36,8 +36,8 @@ import synapse.metrics from synapse.api.constants import EventContentFields, EventTypes, RelationTypes from synapse.api.room_versions import RoomVersions -from synapse.events import EventBase # noqa: F401 -from synapse.events.snapshot import EventContext # noqa: F401 +from synapse.events import EventBase, relation_from_event +from synapse.events.snapshot import EventContext from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import ( DatabasePool, @@ -1807,52 +1807,45 @@ def _handle_event_relations( txn: The current database transaction. event: The event which might have relations. """ - relation = event.content.get("m.relates_to") + relation = relation_from_event(event) if not relation: - # No relations + # No relation, nothing to do. return - # Relations must have a type and parent event ID. - rel_type = relation.get("rel_type") - if not isinstance(rel_type, str): - return - - parent_id = relation.get("event_id") - if not isinstance(parent_id, str): - return - - # Annotations have a key field. - aggregation_key = None - if rel_type == RelationTypes.ANNOTATION: - aggregation_key = relation.get("key") - self.db_pool.simple_insert_txn( txn, table="event_relations", values={ "event_id": event.event_id, - "relates_to_id": parent_id, - "relation_type": rel_type, - "aggregation_key": aggregation_key, + "relates_to_id": relation.parent_id, + "relation_type": relation.rel_type, + "aggregation_key": relation.aggregation_key, }, ) - txn.call_after(self.store.get_relations_for_event.invalidate, (parent_id,)) txn.call_after( - self.store.get_aggregation_groups_for_event.invalidate, (parent_id,) + self.store.get_relations_for_event.invalidate, (relation.parent_id,) + ) + txn.call_after( + self.store.get_aggregation_groups_for_event.invalidate, + (relation.parent_id,), ) - if rel_type == RelationTypes.REPLACE: - txn.call_after(self.store.get_applicable_edit.invalidate, (parent_id,)) + if relation.rel_type == RelationTypes.REPLACE: + txn.call_after( + self.store.get_applicable_edit.invalidate, (relation.parent_id,) + ) - if rel_type == RelationTypes.THREAD: - txn.call_after(self.store.get_thread_summary.invalidate, (parent_id,)) + if relation.rel_type == RelationTypes.THREAD: + txn.call_after( + self.store.get_thread_summary.invalidate, (relation.parent_id,) + ) # It should be safe to only invalidate the cache if the user has not # previously participated in the thread, but that's difficult (and # potentially error-prone) so it is always invalidated. txn.call_after( self.store.get_thread_participated.invalidate, - (parent_id, event.sender), + (relation.parent_id, event.sender), ) def _handle_insertion_event( diff --git a/tests/rest/client/test_sync.py b/tests/rest/client/test_sync.py index 2722bf26e76c..74b6560cbcf5 100644 --- a/tests/rest/client/test_sync.py +++ b/tests/rest/client/test_sync.py @@ -656,12 +656,13 @@ def test_unread_counts(self) -> None: self._check_unread_count(3) # Check that custom events with a body increase the unread counter. - self.helper.send_event( + result = self.helper.send_event( self.room_id, "org.matrix.custom_type", {"body": "hello"}, tok=self.tok2, ) + event_id = result["event_id"] self._check_unread_count(4) # Check that edits don't increase the unread counter. @@ -671,7 +672,10 @@ def test_unread_counts(self) -> None: content={ "body": "hello", "msgtype": "m.text", - "m.relates_to": {"rel_type": RelationTypes.REPLACE}, + "m.relates_to": { + "rel_type": RelationTypes.REPLACE, + "event_id": event_id, + }, }, tok=self.tok2, ) From a5c26750b50563f2edda8b5d37c70b1d49e5f34c Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Mon, 16 May 2022 14:06:04 +0100 Subject: [PATCH 031/181] Fix room upgrades creating an empty room when auth fails (#12696) Signed-off-by: Sean Quah --- changelog.d/12696.bugfix | 1 + synapse/handlers/room.py | 125 ++++++++++++------ .../test_sharded_event_persister.py | 14 +- 3 files changed, 84 insertions(+), 56 deletions(-) create mode 100644 changelog.d/12696.bugfix diff --git a/changelog.d/12696.bugfix b/changelog.d/12696.bugfix new file mode 100644 index 000000000000..e410184a22af --- /dev/null +++ b/changelog.d/12696.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where an empty room would be created when a user with an insufficient power level tried to upgrade a room. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index e71c78adad67..23baa50d0375 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -33,6 +33,7 @@ import attr from typing_extensions import TypedDict +import synapse.events.snapshot from synapse.api.constants import ( EventContentFields, EventTypes, @@ -77,7 +78,6 @@ create_requester, ) from synapse.util import stringutils -from synapse.util.async_helpers import Linearizer from synapse.util.caches.response_cache import ResponseCache from synapse.util.stringutils import parse_and_validate_server_name from synapse.visibility import filter_events_for_client @@ -155,9 +155,6 @@ def __init__(self, hs: "HomeServer"): self._replication = hs.get_replication_data_handler() - # linearizer to stop two upgrades happening at once - self._upgrade_linearizer = Linearizer("room_upgrade_linearizer") - # If a user tries to update the same room multiple times in quick # succession, only process the first attempt and return its result to # subsequent requests @@ -200,6 +197,39 @@ async def upgrade_room( 400, "An upgrade for this room is currently in progress" ) + # Check whether the room exists and 404 if it doesn't. + # We could go straight for the auth check, but that will raise a 403 instead. + old_room = await self.store.get_room(old_room_id) + if old_room is None: + raise NotFoundError("Unknown room id %s" % (old_room_id,)) + + new_room_id = self._generate_room_id() + + # Check whether the user has the power level to carry out the upgrade. + # `check_auth_rules_from_context` will check that they are in the room and have + # the required power level to send the tombstone event. + ( + tombstone_event, + tombstone_context, + ) = await self.event_creation_handler.create_event( + requester, + { + "type": EventTypes.Tombstone, + "state_key": "", + "room_id": old_room_id, + "sender": user_id, + "content": { + "body": "This room has been replaced", + "replacement_room": new_room_id, + }, + }, + ) + old_room_version = await self.store.get_room_version(old_room_id) + validate_event_for_room_version(old_room_version, tombstone_event) + await self._event_auth_handler.check_auth_rules_from_context( + old_room_version, tombstone_event, tombstone_context + ) + # Upgrade the room # # If this user has sent multiple upgrade requests for the same room @@ -210,19 +240,35 @@ async def upgrade_room( self._upgrade_room, requester, old_room_id, - new_version, # args for _upgrade_room + old_room, # args for _upgrade_room + new_room_id, + new_version, + tombstone_event, + tombstone_context, ) return ret async def _upgrade_room( - self, requester: Requester, old_room_id: str, new_version: RoomVersion + self, + requester: Requester, + old_room_id: str, + old_room: Dict[str, Any], + new_room_id: str, + new_version: RoomVersion, + tombstone_event: EventBase, + tombstone_context: synapse.events.snapshot.EventContext, ) -> str: """ Args: requester: the user requesting the upgrade old_room_id: the id of the room to be replaced - new_versions: the version to upgrade the room to + old_room: a dict containing room information for the room to be replaced, + as returned by `RoomWorkerStore.get_room`. + new_room_id: the id of the replacement room + new_version: the version to upgrade the room to + tombstone_event: the tombstone event to send to the old room + tombstone_context: the context for the tombstone event Raises: ShadowBanError if the requester is shadow-banned. @@ -230,40 +276,15 @@ async def _upgrade_room( user_id = requester.user.to_string() assert self.hs.is_mine_id(user_id), "User must be our own: %s" % (user_id,) - # start by allocating a new room id - r = await self.store.get_room(old_room_id) - if r is None: - raise NotFoundError("Unknown room id %s" % (old_room_id,)) - new_room_id = await self._generate_room_id( - creator_id=user_id, - is_public=r["is_public"], - room_version=new_version, - ) - logger.info("Creating new room %s to replace %s", new_room_id, old_room_id) - # we create and auth the tombstone event before properly creating the new - # room, to check our user has perms in the old room. - ( - tombstone_event, - tombstone_context, - ) = await self.event_creation_handler.create_event( - requester, - { - "type": EventTypes.Tombstone, - "state_key": "", - "room_id": old_room_id, - "sender": user_id, - "content": { - "body": "This room has been replaced", - "replacement_room": new_room_id, - }, - }, - ) - old_room_version = await self.store.get_room_version(old_room_id) - validate_event_for_room_version(old_room_version, tombstone_event) - await self._event_auth_handler.check_auth_rules_from_context( - old_room_version, tombstone_event, tombstone_context + # create the new room. may raise a `StoreError` in the exceedingly unlikely + # event of a room ID collision. + await self.store.store_room( + room_id=new_room_id, + room_creator_user_id=user_id, + is_public=old_room["is_public"], + room_version=new_version, ) await self.clone_existing_room( @@ -782,7 +803,7 @@ async def create_room( visibility = config.get("visibility", "private") is_public = visibility == "public" - room_id = await self._generate_room_id( + room_id = await self._generate_and_create_room_id( creator_id=user_id, is_public=is_public, room_version=room_version, @@ -1104,7 +1125,26 @@ async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: return last_sent_stream_id - async def _generate_room_id( + def _generate_room_id(self) -> str: + """Generates a random room ID. + + Room IDs look like "!opaque_id:domain" and are case-sensitive as per the spec + at https://spec.matrix.org/v1.2/appendices/#room-ids-and-event-ids. + + Does not check for collisions with existing rooms or prevent future calls from + returning the same room ID. To ensure the uniqueness of a new room ID, use + `_generate_and_create_room_id` instead. + + Synapse's room IDs are 18 [a-zA-Z] characters long, which comes out to around + 102 bits. + + Returns: + A random room ID of the form "!opaque_id:domain". + """ + random_string = stringutils.random_string(18) + return RoomID(random_string, self.hs.hostname).to_string() + + async def _generate_and_create_room_id( self, creator_id: str, is_public: bool, @@ -1115,8 +1155,7 @@ async def _generate_room_id( attempts = 0 while attempts < 5: try: - random_string = stringutils.random_string(18) - gen_room_id = RoomID(random_string, self.hs.hostname).to_string() + gen_room_id = self._generate_room_id() await self.store.store_room( room_id=gen_room_id, room_creator_user_id=creator_id, diff --git a/tests/replication/test_sharded_event_persister.py b/tests/replication/test_sharded_event_persister.py index 5f142e84c359..a7ca68069e86 100644 --- a/tests/replication/test_sharded_event_persister.py +++ b/tests/replication/test_sharded_event_persister.py @@ -14,7 +14,6 @@ import logging from unittest.mock import patch -from synapse.api.room_versions import RoomVersion from synapse.rest import admin from synapse.rest.client import login, room, sync from synapse.storage.util.id_generators import MultiWriterIdGenerator @@ -64,21 +63,10 @@ def _create_room(self, room_id: str, user_id: str, tok: str): # We control the room ID generation by patching out the # `_generate_room_id` method - async def generate_room( - creator_id: str, is_public: bool, room_version: RoomVersion - ): - await self.store.store_room( - room_id=room_id, - room_creator_user_id=creator_id, - is_public=is_public, - room_version=room_version, - ) - return room_id - with patch( "synapse.handlers.room.RoomCreationHandler._generate_room_id" ) as mock: - mock.side_effect = generate_room + mock.side_effect = lambda: room_id self.helper.create_room_as(user_id, tok=tok) def test_basic(self): From 806003461226dc8db19bf9d631260b78c48d544e Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 16 May 2022 15:50:07 +0200 Subject: [PATCH 032/181] Fix typo in listener config (#12742) --- changelog.d/12742.doc | 1 + docs/sample_config.yaml | 2 +- docs/usage/configuration/config_documentation.md | 4 ++-- synapse/config/server.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 changelog.d/12742.doc diff --git a/changelog.d/12742.doc b/changelog.d/12742.doc new file mode 100644 index 000000000000..0084e27a7d03 --- /dev/null +++ b/changelog.d/12742.doc @@ -0,0 +1 @@ +Fix typo in server listener documentation. \ No newline at end of file diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 05a3606da043..ee98d193cbb9 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -289,7 +289,7 @@ presence: # federation: the server-server API (/_matrix/federation). Also implies # 'media', 'keys', 'openid' # -# keys: the key discovery API (/_matrix/keys). +# keys: the key discovery API (/_matrix/key). # # media: the media API (/_matrix/media). # diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index ca443631055f..f0bf13976d2c 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -467,13 +467,13 @@ Sub-options for each listener include: Valid resource names are: -* `client`: the client-server API (/_matrix/client), and the synapse admin API (/_synapse/admin). Also implies 'media' and 'static'. +* `client`: the client-server API (/_matrix/client), and the synapse admin API (/_synapse/admin). Also implies `media` and `static`. * `consent`: user consent forms (/_matrix/consent). See [here](../../consent_tracking.md) for more. * `federation`: the server-server API (/_matrix/federation). Also implies `media`, `keys`, `openid` -* `keys`: the key discovery API (/_matrix/keys). +* `keys`: the key discovery API (/_matrix/key). * `media`: the media API (/_matrix/media). diff --git a/synapse/config/server.py b/synapse/config/server.py index 005a3ee48ce4..f73d5e1f6666 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -996,7 +996,7 @@ def generate_config_section( # federation: the server-server API (/_matrix/federation). Also implies # 'media', 'keys', 'openid' # - # keys: the key discovery API (/_matrix/keys). + # keys: the key discovery API (/_matrix/key). # # media: the media API (/_matrix/media). # From 3ce15cc7be02da139e0b274418b2c137d737035a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0imon=20Brandner?= Date: Mon, 16 May 2022 17:06:23 +0200 Subject: [PATCH 033/181] Avoid unnecessary copies when filtering private read receipts. (#12711) A minor optimization to avoid unnecessary copying/building identical dictionaries when filtering private read receipts. Also clarifies comments and cleans-up some tests. --- changelog.d/12711.misc | 1 + synapse/handlers/initial_sync.py | 6 +- synapse/handlers/receipts.py | 94 +++++++++++++++++++++----------- tests/handlers/test_receipts.py | 64 +++++++++------------- 4 files changed, 92 insertions(+), 73 deletions(-) create mode 100644 changelog.d/12711.misc diff --git a/changelog.d/12711.misc b/changelog.d/12711.misc new file mode 100644 index 000000000000..0831ce045268 --- /dev/null +++ b/changelog.d/12711.misc @@ -0,0 +1 @@ +Optimize private read receipt filtering. diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 7b94770f9722..de09aed3a356 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -143,7 +143,7 @@ async def _snapshot_all_rooms( to_key=int(now_token.receipt_key), ) if self.hs.config.experimental.msc2285_enabled: - receipt = ReceiptEventSource.filter_out_private(receipt, user_id) + receipt = ReceiptEventSource.filter_out_private_receipts(receipt, user_id) tags_by_room = await self.store.get_tags_for_user(user_id) @@ -449,7 +449,9 @@ async def get_receipts() -> List[JsonDict]: if not receipts: return [] if self.hs.config.experimental.msc2285_enabled: - receipts = ReceiptEventSource.filter_out_private(receipts, user_id) + receipts = ReceiptEventSource.filter_out_private_receipts( + receipts, user_id + ) return receipts presence, receipts, (messages, token) = await make_deferred_yieldable( diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 43d615357b3c..550d58b0e1c9 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -165,43 +165,69 @@ def __init__(self, hs: "HomeServer"): self.config = hs.config @staticmethod - def filter_out_private(events: List[JsonDict], user_id: str) -> List[JsonDict]: + def filter_out_private_receipts( + rooms: List[JsonDict], user_id: str + ) -> List[JsonDict]: """ - This method takes in what is returned by - get_linearized_receipts_for_rooms() and goes through read receipts - filtering out m.read.private receipts if they were not sent by the - current user. - """ - - visible_events = [] + Filters a list of serialized receipts (as returned by /sync and /initialSync) + and removes private read receipts of other users. - # filter out private receipts the user shouldn't see - for event in events: - content = event.get("content", {}) - new_event = event.copy() - new_event["content"] = {} + This operates on the return value of get_linearized_receipts_for_rooms(), + which is wrapped in a cache. Care must be taken to ensure that the input + values are not modified. - for event_id, event_content in content.items(): - receipt_event = {} - for receipt_type, receipt_content in event_content.items(): - if receipt_type == ReceiptTypes.READ_PRIVATE: - user_rr = receipt_content.get(user_id, None) - if user_rr: - receipt_event[ReceiptTypes.READ_PRIVATE] = { - user_id: user_rr.copy() - } - else: - receipt_event[receipt_type] = receipt_content.copy() - - # Only include the receipt event if it is non-empty. - if receipt_event: - new_event["content"][event_id] = receipt_event + Args: + rooms: A list of mappings, each mapping has a `content` field, which + is a map of event ID -> receipt type -> user ID -> receipt information. - # Append new_event to visible_events unless empty - if len(new_event["content"].keys()) > 0: - visible_events.append(new_event) + Returns: + The same as rooms, but filtered. + """ - return visible_events + result = [] + + # Iterate through each room's receipt content. + for room in rooms: + # The receipt content with other user's private read receipts removed. + content = {} + + # Iterate over each event ID / receipts for that event. + for event_id, orig_event_content in room.get("content", {}).items(): + event_content = orig_event_content + # If there are private read receipts, additional logic is necessary. + if ReceiptTypes.READ_PRIVATE in event_content: + # Make a copy without private read receipts to avoid leaking + # other user's private read receipts.. + event_content = { + receipt_type: receipt_value + for receipt_type, receipt_value in event_content.items() + if receipt_type != ReceiptTypes.READ_PRIVATE + } + + # Copy the current user's private read receipt from the + # original content, if it exists. + user_private_read_receipt = orig_event_content[ + ReceiptTypes.READ_PRIVATE + ].get(user_id, None) + if user_private_read_receipt: + event_content[ReceiptTypes.READ_PRIVATE] = { + user_id: user_private_read_receipt + } + + # Include the event if there is at least one non-private read + # receipt or the current user has a private read receipt. + if event_content: + content[event_id] = event_content + + # Include the event if there is at least one non-private read receipt + # or the current user has a private read receipt. + if content: + # Build a new event to avoid mutating the cache. + new_room = {k: v for k, v in room.items() if k != "content"} + new_room["content"] = content + result.append(new_room) + + return result async def get_new_events( self, @@ -223,7 +249,9 @@ async def get_new_events( ) if self.config.experimental.msc2285_enabled: - events = ReceiptEventSource.filter_out_private(events, user.to_string()) + events = ReceiptEventSource.filter_out_private_receipts( + events, user.to_string() + ) return events, to_key diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index 0482a1ea34fb..78807cdcfcdc 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +from copy import deepcopy from typing import List from synapse.api.constants import ReceiptTypes @@ -125,42 +125,6 @@ def test_filters_out_event_with_only_private_receipts_and_ignores_the_rest(self) ], ) - def test_handles_missing_content_of_m_read(self): - self._test_filters_private( - [ - { - "content": { - "$14356419ggffg114394fHBLK:matrix.org": {ReceiptTypes.READ: {}}, - "$1435641916114394fHBLK:matrix.org": { - ReceiptTypes.READ: { - "@user:jki.re": { - "ts": 1436451550453, - } - } - }, - }, - "room_id": "!jEsUZKDJdhlrceRyVU:example.org", - "type": "m.receipt", - } - ], - [ - { - "content": { - "$14356419ggffg114394fHBLK:matrix.org": {ReceiptTypes.READ: {}}, - "$1435641916114394fHBLK:matrix.org": { - ReceiptTypes.READ: { - "@user:jki.re": { - "ts": 1436451550453, - } - } - }, - }, - "room_id": "!jEsUZKDJdhlrceRyVU:example.org", - "type": "m.receipt", - } - ], - ) - def test_handles_empty_event(self): self._test_filters_private( [ @@ -332,9 +296,33 @@ def test_leaves_our_private_and_their_public(self): ], ) + def test_we_do_not_mutate(self): + """Ensure the input values are not modified.""" + events = [ + { + "content": { + "$1435641916114394fHBLK:matrix.org": { + ReceiptTypes.READ_PRIVATE: { + "@rikj:jki.re": { + "ts": 1436451550453, + } + } + } + }, + "room_id": "!jEsUZKDJdhlrceRyVU:example.org", + "type": "m.receipt", + } + ] + original_events = deepcopy(events) + self._test_filters_private(events, []) + # Since the events are fed in from a cache they should not be modified. + self.assertEqual(events, original_events) + def _test_filters_private( self, events: List[JsonDict], expected_output: List[JsonDict] ): """Tests that the _filter_out_private returns the expected output""" - filtered_events = self.event_source.filter_out_private(events, "@me:server.org") + filtered_events = self.event_source.filter_out_private_receipts( + events, "@me:server.org" + ) self.assertEqual(filtered_events, expected_output) From 83be72d76ca171ceb0fc381aa4548c1d9fea0dc7 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 16 May 2022 16:35:31 +0100 Subject: [PATCH 034/181] Add `StreamKeyType` class and replace string literals with constants (#12567) --- changelog.d/12567.misc | 1 + synapse/handlers/account_data.py | 10 ++--- synapse/handlers/appservice.py | 39 ++++++++++--------- synapse/handlers/device.py | 7 +++- synapse/handlers/devicemessage.py | 6 +-- synapse/handlers/initial_sync.py | 13 ++++--- synapse/handlers/pagination.py | 6 +-- synapse/handlers/presence.py | 6 +-- synapse/handlers/receipts.py | 12 +++++- synapse/handlers/room.py | 9 +++-- synapse/handlers/search.py | 10 ++--- synapse/handlers/sync.py | 23 ++++++----- synapse/handlers/typing.py | 4 +- synapse/notifier.py | 5 ++- synapse/replication/tcp/client.py | 18 +++++---- .../server_notices/server_notices_manager.py | 4 +- .../storage/databases/main/e2e_room_keys.py | 4 +- synapse/storage/databases/main/relations.py | 6 ++- synapse/types.py | 22 +++++++++-- 19 files changed, 125 insertions(+), 80 deletions(-) create mode 100644 changelog.d/12567.misc diff --git a/changelog.d/12567.misc b/changelog.d/12567.misc new file mode 100644 index 000000000000..35f08569bada --- /dev/null +++ b/changelog.d/12567.misc @@ -0,0 +1 @@ +Replace string literal instances of stream key types with typed constants. \ No newline at end of file diff --git a/synapse/handlers/account_data.py b/synapse/handlers/account_data.py index 4af9fbc5d10a..0478448b47ea 100644 --- a/synapse/handlers/account_data.py +++ b/synapse/handlers/account_data.py @@ -23,7 +23,7 @@ ReplicationUserAccountDataRestServlet, ) from synapse.streams import EventSource -from synapse.types import JsonDict, UserID +from synapse.types import JsonDict, StreamKeyType, UserID if TYPE_CHECKING: from synapse.server import HomeServer @@ -105,7 +105,7 @@ async def add_account_data_to_room( ) self._notifier.on_new_event( - "account_data_key", max_stream_id, users=[user_id] + StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] ) await self._notify_modules(user_id, room_id, account_data_type, content) @@ -141,7 +141,7 @@ async def add_account_data_for_user( ) self._notifier.on_new_event( - "account_data_key", max_stream_id, users=[user_id] + StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] ) await self._notify_modules(user_id, None, account_data_type, content) @@ -176,7 +176,7 @@ async def add_tag_to_room( ) self._notifier.on_new_event( - "account_data_key", max_stream_id, users=[user_id] + StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] ) return max_stream_id else: @@ -201,7 +201,7 @@ async def remove_tag_from_room(self, user_id: str, room_id: str, tag: str) -> in ) self._notifier.on_new_event( - "account_data_key", max_stream_id, users=[user_id] + StreamKeyType.ACCOUNT_DATA, max_stream_id, users=[user_id] ) return max_stream_id else: diff --git a/synapse/handlers/appservice.py b/synapse/handlers/appservice.py index 85bd5e47682b..1da7bcc85b5c 100644 --- a/synapse/handlers/appservice.py +++ b/synapse/handlers/appservice.py @@ -38,6 +38,7 @@ JsonDict, RoomAlias, RoomStreamToken, + StreamKeyType, UserID, ) from synapse.util.async_helpers import Linearizer @@ -213,8 +214,8 @@ def notify_interested_services_ephemeral( Args: stream_key: The stream the event came from. - `stream_key` can be "typing_key", "receipt_key", "presence_key", - "to_device_key" or "device_list_key". Any other value for `stream_key` + `stream_key` can be StreamKeyType.TYPING, StreamKeyType.RECEIPT, StreamKeyType.PRESENCE, + StreamKeyType.TO_DEVICE or StreamKeyType.DEVICE_LIST. Any other value for `stream_key` will cause this function to return early. Ephemeral events will only be pushed to appservices that have opted into @@ -235,11 +236,11 @@ def notify_interested_services_ephemeral( # Only the following streams are currently supported. # FIXME: We should use constants for these values. if stream_key not in ( - "typing_key", - "receipt_key", - "presence_key", - "to_device_key", - "device_list_key", + StreamKeyType.TYPING, + StreamKeyType.RECEIPT, + StreamKeyType.PRESENCE, + StreamKeyType.TO_DEVICE, + StreamKeyType.DEVICE_LIST, ): return @@ -258,14 +259,14 @@ def notify_interested_services_ephemeral( # Ignore to-device messages if the feature flag is not enabled if ( - stream_key == "to_device_key" + stream_key == StreamKeyType.TO_DEVICE and not self._msc2409_to_device_messages_enabled ): return # Ignore device lists if the feature flag is not enabled if ( - stream_key == "device_list_key" + stream_key == StreamKeyType.DEVICE_LIST and not self._msc3202_transaction_extensions_enabled ): return @@ -283,15 +284,15 @@ def notify_interested_services_ephemeral( if ( stream_key in ( - "typing_key", - "receipt_key", - "presence_key", - "to_device_key", + StreamKeyType.TYPING, + StreamKeyType.RECEIPT, + StreamKeyType.PRESENCE, + StreamKeyType.TO_DEVICE, ) and service.supports_ephemeral ) or ( - stream_key == "device_list_key" + stream_key == StreamKeyType.DEVICE_LIST and service.msc3202_transaction_extensions ) ] @@ -317,7 +318,7 @@ async def _notify_interested_services_ephemeral( logger.debug("Checking interested services for %s", stream_key) with Measure(self.clock, "notify_interested_services_ephemeral"): for service in services: - if stream_key == "typing_key": + if stream_key == StreamKeyType.TYPING: # Note that we don't persist the token (via set_appservice_stream_type_pos) # for typing_key due to performance reasons and due to their highly # ephemeral nature. @@ -333,7 +334,7 @@ async def _notify_interested_services_ephemeral( async with self._ephemeral_events_linearizer.queue( (service.id, stream_key) ): - if stream_key == "receipt_key": + if stream_key == StreamKeyType.RECEIPT: events = await self._handle_receipts(service, new_token) self.scheduler.enqueue_for_appservice(service, ephemeral=events) @@ -342,7 +343,7 @@ async def _notify_interested_services_ephemeral( service, "read_receipt", new_token ) - elif stream_key == "presence_key": + elif stream_key == StreamKeyType.PRESENCE: events = await self._handle_presence(service, users, new_token) self.scheduler.enqueue_for_appservice(service, ephemeral=events) @@ -351,7 +352,7 @@ async def _notify_interested_services_ephemeral( service, "presence", new_token ) - elif stream_key == "to_device_key": + elif stream_key == StreamKeyType.TO_DEVICE: # Retrieve a list of to-device message events, as well as the # maximum stream token of the messages we were able to retrieve. to_device_messages = await self._get_to_device_messages( @@ -366,7 +367,7 @@ async def _notify_interested_services_ephemeral( service, "to_device", new_token ) - elif stream_key == "device_list_key": + elif stream_key == StreamKeyType.DEVICE_LIST: device_list_summary = await self._get_device_list_summary( service, new_token ) diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index a91b1ee4d5f4..1d6d1f8a9248 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -43,6 +43,7 @@ ) from synapse.types import ( JsonDict, + StreamKeyType, StreamToken, UserID, get_domain_from_id, @@ -502,7 +503,7 @@ async def notify_device_update( # specify the user ID too since the user should always get their own device list # updates, even if they aren't in any rooms. self.notifier.on_new_event( - "device_list_key", position, users={user_id}, rooms=room_ids + StreamKeyType.DEVICE_LIST, position, users={user_id}, rooms=room_ids ) # We may need to do some processing asynchronously for local user IDs. @@ -523,7 +524,9 @@ async def notify_user_signature_update( from_user_id, user_ids ) - self.notifier.on_new_event("device_list_key", position, users=[from_user_id]) + self.notifier.on_new_event( + StreamKeyType.DEVICE_LIST, position, users=[from_user_id] + ) async def user_left_room(self, user: UserID, room_id: str) -> None: user_id = user.to_string() diff --git a/synapse/handlers/devicemessage.py b/synapse/handlers/devicemessage.py index 4cb725d027c7..53668cce3bb4 100644 --- a/synapse/handlers/devicemessage.py +++ b/synapse/handlers/devicemessage.py @@ -26,7 +26,7 @@ set_tag, ) from synapse.replication.http.devices import ReplicationUserDevicesResyncRestServlet -from synapse.types import JsonDict, Requester, UserID, get_domain_from_id +from synapse.types import JsonDict, Requester, StreamKeyType, UserID, get_domain_from_id from synapse.util import json_encoder from synapse.util.stringutils import random_string @@ -151,7 +151,7 @@ async def on_direct_to_device_edu(self, origin: str, content: JsonDict) -> None: # Notify listeners that there are new to-device messages to process, # handing them the latest stream id. self.notifier.on_new_event( - "to_device_key", last_stream_id, users=local_messages.keys() + StreamKeyType.TO_DEVICE, last_stream_id, users=local_messages.keys() ) async def _check_for_unknown_devices( @@ -285,7 +285,7 @@ async def send_device_message( # Notify listeners that there are new to-device messages to process, # handing them the latest stream id. self.notifier.on_new_event( - "to_device_key", last_stream_id, users=local_messages.keys() + StreamKeyType.TO_DEVICE, last_stream_id, users=local_messages.keys() ) if self.federation_sender: diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index de09aed3a356..d79248ad905b 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -30,6 +30,7 @@ Requester, RoomStreamToken, StateMap, + StreamKeyType, StreamToken, UserID, ) @@ -220,8 +221,10 @@ async def handle_room(event: RoomsForUser) -> None: self.storage, user_id, messages ) - start_token = now_token.copy_and_replace("room_key", token) - end_token = now_token.copy_and_replace("room_key", room_end_token) + start_token = now_token.copy_and_replace(StreamKeyType.ROOM, token) + end_token = now_token.copy_and_replace( + StreamKeyType.ROOM, room_end_token + ) time_now = self.clock.time_msec() d["messages"] = { @@ -369,8 +372,8 @@ async def _room_initial_sync_parted( self.storage, user_id, messages, is_peeking=is_peeking ) - start_token = StreamToken.START.copy_and_replace("room_key", token) - end_token = StreamToken.START.copy_and_replace("room_key", stream_token) + start_token = StreamToken.START.copy_and_replace(StreamKeyType.ROOM, token) + end_token = StreamToken.START.copy_and_replace(StreamKeyType.ROOM, stream_token) time_now = self.clock.time_msec() @@ -474,7 +477,7 @@ async def get_receipts() -> List[JsonDict]: self.storage, user_id, messages, is_peeking=is_peeking ) - start_token = now_token.copy_and_replace("room_key", token) + start_token = now_token.copy_and_replace(StreamKeyType.ROOM, token) end_token = now_token time_now = self.clock.time_msec() diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 2e30180094d2..6ae88add9526 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -27,7 +27,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.state import StateFilter from synapse.streams.config import PaginationConfig -from synapse.types import JsonDict, Requester +from synapse.types import JsonDict, Requester, StreamKeyType from synapse.util.async_helpers import ReadWriteLock from synapse.util.stringutils import random_string from synapse.visibility import filter_events_for_client @@ -491,7 +491,7 @@ async def get_messages( if leave_token.topological < curr_topo: from_token = from_token.copy_and_replace( - "room_key", leave_token + StreamKeyType.ROOM, leave_token ) await self.hs.get_federation_handler().maybe_backfill( @@ -513,7 +513,7 @@ async def get_messages( event_filter=event_filter, ) - next_token = from_token.copy_and_replace("room_key", next_key) + next_token = from_token.copy_and_replace(StreamKeyType.ROOM, next_key) if events: if event_filter: diff --git a/synapse/handlers/presence.py b/synapse/handlers/presence.py index 268481ec1963..dd84e6c88b6c 100644 --- a/synapse/handlers/presence.py +++ b/synapse/handlers/presence.py @@ -66,7 +66,7 @@ from synapse.replication.tcp.streams import PresenceFederationStream, PresenceStream from synapse.storage.databases.main import DataStore from synapse.streams import EventSource -from synapse.types import JsonDict, UserID, get_domain_from_id +from synapse.types import JsonDict, StreamKeyType, UserID, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.metrics import Measure @@ -522,7 +522,7 @@ async def notify_from_replication( room_ids_to_states, users_to_states = parties self.notifier.on_new_event( - "presence_key", + StreamKeyType.PRESENCE, stream_id, rooms=room_ids_to_states.keys(), users=users_to_states.keys(), @@ -1145,7 +1145,7 @@ async def _persist_and_notify(self, states: List[UserPresenceState]) -> None: room_ids_to_states, users_to_states = parties self.notifier.on_new_event( - "presence_key", + StreamKeyType.PRESENCE, stream_id, rooms=room_ids_to_states.keys(), users=[UserID.from_string(u) for u in users_to_states], diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 550d58b0e1c9..e6a35f1d093c 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -17,7 +17,13 @@ from synapse.api.constants import ReceiptTypes from synapse.appservice import ApplicationService from synapse.streams import EventSource -from synapse.types import JsonDict, ReadReceipt, UserID, get_domain_from_id +from synapse.types import ( + JsonDict, + ReadReceipt, + StreamKeyType, + UserID, + get_domain_from_id, +) if TYPE_CHECKING: from synapse.server import HomeServer @@ -129,7 +135,9 @@ async def _handle_new_receipts(self, receipts: List[ReadReceipt]) -> bool: affected_room_ids = list({r.room_id for r in receipts}) - self.notifier.on_new_event("receipt_key", max_batch_id, rooms=affected_room_ids) + self.notifier.on_new_event( + StreamKeyType.RECEIPT, max_batch_id, rooms=affected_room_ids + ) # Note that the min here shouldn't be relied upon to be accurate. await self.hs.get_pusherpool().on_new_receipts( min_batch_id, max_batch_id, affected_room_ids diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 23baa50d0375..a2973109adc4 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -73,6 +73,7 @@ RoomID, RoomStreamToken, StateMap, + StreamKeyType, StreamToken, UserID, create_requester, @@ -1292,10 +1293,10 @@ async def filter_evts(events: List[EventBase]) -> List[EventBase]: events_after=events_after, state=await filter_evts(state_events), aggregations=aggregations, - start=await token.copy_and_replace("room_key", results.start).to_string( - self.store - ), - end=await token.copy_and_replace("room_key", results.end).to_string( + start=await token.copy_and_replace( + StreamKeyType.ROOM, results.start + ).to_string(self.store), + end=await token.copy_and_replace(StreamKeyType.ROOM, results.end).to_string( self.store ), ) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 5619f8f50e03..cd1c47dae8b1 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -24,7 +24,7 @@ from synapse.api.filtering import Filter from synapse.events import EventBase from synapse.storage.state import StateFilter -from synapse.types import JsonDict, UserID +from synapse.types import JsonDict, StreamKeyType, UserID from synapse.visibility import filter_events_for_client if TYPE_CHECKING: @@ -655,11 +655,11 @@ async def _calculate_event_contexts( "events_before": events_before, "events_after": events_after, "start": await now_token.copy_and_replace( - "room_key", res.start + StreamKeyType.ROOM, res.start + ).to_string(self.store), + "end": await now_token.copy_and_replace( + StreamKeyType.ROOM, res.end ).to_string(self.store), - "end": await now_token.copy_and_replace("room_key", res.end).to_string( - self.store - ), } if include_profile: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 2c555a66d066..4be08fe7cbc6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -37,6 +37,7 @@ Requester, RoomStreamToken, StateMap, + StreamKeyType, StreamToken, UserID, ) @@ -449,7 +450,7 @@ async def ephemeral_by_room( room_ids=room_ids, is_guest=sync_config.is_guest, ) - now_token = now_token.copy_and_replace("typing_key", typing_key) + now_token = now_token.copy_and_replace(StreamKeyType.TYPING, typing_key) ephemeral_by_room: JsonDict = {} @@ -471,7 +472,7 @@ async def ephemeral_by_room( room_ids=room_ids, is_guest=sync_config.is_guest, ) - now_token = now_token.copy_and_replace("receipt_key", receipt_key) + now_token = now_token.copy_and_replace(StreamKeyType.RECEIPT, receipt_key) for event in receipts: room_id = event["room_id"] @@ -537,7 +538,9 @@ async def _load_filtered_recents( prev_batch_token = now_token if recents: room_key = recents[0].internal_metadata.before - prev_batch_token = now_token.copy_and_replace("room_key", room_key) + prev_batch_token = now_token.copy_and_replace( + StreamKeyType.ROOM, room_key + ) return TimelineBatch( events=recents, prev_batch=prev_batch_token, limited=False @@ -611,7 +614,7 @@ async def _load_filtered_recents( recents = recents[-timeline_limit:] room_key = recents[0].internal_metadata.before - prev_batch_token = now_token.copy_and_replace("room_key", room_key) + prev_batch_token = now_token.copy_and_replace(StreamKeyType.ROOM, room_key) # Don't bother to bundle aggregations if the timeline is unlimited, # as clients will have all the necessary information. @@ -1398,7 +1401,7 @@ async def _generate_sync_entry_for_to_device( now_token.to_device_key, ) sync_result_builder.now_token = now_token.copy_and_replace( - "to_device_key", stream_id + StreamKeyType.TO_DEVICE, stream_id ) sync_result_builder.to_device = messages else: @@ -1503,7 +1506,7 @@ async def _generate_sync_entry_for_presence( ) assert presence_key sync_result_builder.now_token = now_token.copy_and_replace( - "presence_key", presence_key + StreamKeyType.PRESENCE, presence_key ) extra_users_ids = set(newly_joined_or_invited_users) @@ -1826,7 +1829,7 @@ async def _get_rooms_changed( # stream token as it'll only be used in the context of this # room. (c.f. the docstring of `to_room_stream_token`). leave_token = since_token.copy_and_replace( - "room_key", leave_position.to_room_stream_token() + StreamKeyType.ROOM, leave_position.to_room_stream_token() ) # If this is an out of band message, like a remote invite @@ -1875,7 +1878,9 @@ async def _get_rooms_changed( if room_entry: events, start_key = room_entry - prev_batch_token = now_token.copy_and_replace("room_key", start_key) + prev_batch_token = now_token.copy_and_replace( + StreamKeyType.ROOM, start_key + ) entry = RoomSyncResultBuilder( room_id=room_id, @@ -1972,7 +1977,7 @@ async def _get_all_rooms( continue leave_token = now_token.copy_and_replace( - "room_key", RoomStreamToken(None, event.stream_ordering) + StreamKeyType.ROOM, RoomStreamToken(None, event.stream_ordering) ) room_entries.append( RoomSyncResultBuilder( diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index 6854428b7ca5..bb00750bfd47 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -25,7 +25,7 @@ ) from synapse.replication.tcp.streams import TypingStream from synapse.streams import EventSource -from synapse.types import JsonDict, Requester, UserID, get_domain_from_id +from synapse.types import JsonDict, Requester, StreamKeyType, UserID, get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache from synapse.util.metrics import Measure from synapse.util.wheel_timer import WheelTimer @@ -382,7 +382,7 @@ def _push_update_local(self, member: RoomMember, typing: bool) -> None: ) self.notifier.on_new_event( - "typing_key", self._latest_room_serial, rooms=[member.room_id] + StreamKeyType.TYPING, self._latest_room_serial, rooms=[member.room_id] ) async def get_all_typing_updates( diff --git a/synapse/notifier.py b/synapse/notifier.py index 01a50b9d6226..ba23257f5498 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -46,6 +46,7 @@ JsonDict, PersistedEventPosition, RoomStreamToken, + StreamKeyType, StreamToken, UserID, ) @@ -370,7 +371,7 @@ def _notify_pending_new_room_events( if users or rooms: self.on_new_event( - "room_key", + StreamKeyType.ROOM, max_room_stream_token, users=users, rooms=rooms, @@ -440,7 +441,7 @@ def on_new_event( for room in rooms: user_streams |= self.room_to_user_streams.get(room, set()) - if stream_key == "to_device_key": + if stream_key == StreamKeyType.TO_DEVICE: issue9533_logger.debug( "to-device messages stream id %s, awaking streams for %s", new_token, diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index 350762f49447..a52e25c1af3f 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -43,7 +43,7 @@ EventsStreamEventRow, EventsStreamRow, ) -from synapse.types import PersistedEventPosition, ReadReceipt, UserID +from synapse.types import PersistedEventPosition, ReadReceipt, StreamKeyType, UserID from synapse.util.async_helpers import Linearizer, timeout_deferred from synapse.util.metrics import Measure @@ -153,19 +153,19 @@ async def on_rdata( if stream_name == TypingStream.NAME: self._typing_handler.process_replication_rows(token, rows) self.notifier.on_new_event( - "typing_key", token, rooms=[row.room_id for row in rows] + StreamKeyType.TYPING, token, rooms=[row.room_id for row in rows] ) elif stream_name == PushRulesStream.NAME: self.notifier.on_new_event( - "push_rules_key", token, users=[row.user_id for row in rows] + StreamKeyType.PUSH_RULES, token, users=[row.user_id for row in rows] ) elif stream_name in (AccountDataStream.NAME, TagAccountDataStream.NAME): self.notifier.on_new_event( - "account_data_key", token, users=[row.user_id for row in rows] + StreamKeyType.ACCOUNT_DATA, token, users=[row.user_id for row in rows] ) elif stream_name == ReceiptsStream.NAME: self.notifier.on_new_event( - "receipt_key", token, rooms=[row.room_id for row in rows] + StreamKeyType.RECEIPT, token, rooms=[row.room_id for row in rows] ) await self._pusher_pool.on_new_receipts( token, token, {row.room_id for row in rows} @@ -173,14 +173,18 @@ async def on_rdata( elif stream_name == ToDeviceStream.NAME: entities = [row.entity for row in rows if row.entity.startswith("@")] if entities: - self.notifier.on_new_event("to_device_key", token, users=entities) + self.notifier.on_new_event( + StreamKeyType.TO_DEVICE, token, users=entities + ) elif stream_name == DeviceListsStream.NAME: all_room_ids: Set[str] = set() for row in rows: if row.entity.startswith("@"): room_ids = await self.store.get_rooms_for_user(row.entity) all_room_ids.update(room_ids) - self.notifier.on_new_event("device_list_key", token, rooms=all_room_ids) + self.notifier.on_new_event( + StreamKeyType.DEVICE_LIST, token, rooms=all_room_ids + ) elif stream_name == GroupServerStream.NAME: self.notifier.on_new_event( "groups_key", token, users=[row.user_id for row in rows] diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py index c2c37e1015ce..8ecab86ec7d3 100644 --- a/synapse/server_notices/server_notices_manager.py +++ b/synapse/server_notices/server_notices_manager.py @@ -16,7 +16,7 @@ from synapse.api.constants import EventTypes, Membership, RoomCreationPreset from synapse.events import EventBase -from synapse.types import Requester, UserID, create_requester +from synapse.types import Requester, StreamKeyType, UserID, create_requester from synapse.util.caches.descriptors import cached if TYPE_CHECKING: @@ -189,7 +189,7 @@ async def get_or_create_notice_room_for_user(self, user_id: str) -> str: max_id = await self._account_data_handler.add_tag_to_room( user_id, room_id, SERVER_NOTICE_ROOM_TAG, {} ) - self._notifier.on_new_event("account_data_key", max_id, users=[user_id]) + self._notifier.on_new_event(StreamKeyType.ACCOUNT_DATA, max_id, users=[user_id]) logger.info("Created server notices room %s for %s", room_id, user_id) return room_id diff --git a/synapse/storage/databases/main/e2e_room_keys.py b/synapse/storage/databases/main/e2e_room_keys.py index b789a588a54b..af59be6b4854 100644 --- a/synapse/storage/databases/main/e2e_room_keys.py +++ b/synapse/storage/databases/main/e2e_room_keys.py @@ -21,7 +21,7 @@ from synapse.logging.opentracing import log_kv, trace from synapse.storage._base import SQLBaseStore, db_to_json from synapse.storage.database import LoggingTransaction -from synapse.types import JsonDict, JsonSerializable +from synapse.types import JsonDict, JsonSerializable, StreamKeyType from synapse.util import json_encoder @@ -126,7 +126,7 @@ async def add_e2e_room_keys( "message": "Set room key", "room_id": room_id, "session_id": session_id, - "room_key": room_key, + StreamKeyType.ROOM: room_key, } ) diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 484976ca6b0b..fe8fded88b87 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -34,7 +34,7 @@ from synapse.storage.database import LoggingTransaction, make_in_list_sql_clause from synapse.storage.databases.main.stream import generate_pagination_where_clause from synapse.storage.engines import PostgresEngine -from synapse.types import JsonDict, RoomStreamToken, StreamToken +from synapse.types import JsonDict, RoomStreamToken, StreamKeyType, StreamToken from synapse.util.caches.descriptors import cached, cachedList logger = logging.getLogger(__name__) @@ -161,7 +161,9 @@ def _get_recent_references_for_event_txn( if len(events) > limit and last_topo_id and last_stream_id: next_key = RoomStreamToken(last_topo_id, last_stream_id) if from_token: - next_token = from_token.copy_and_replace("room_key", next_key) + next_token = from_token.copy_and_replace( + StreamKeyType.ROOM, next_key + ) else: next_token = StreamToken( room_key=next_key, diff --git a/synapse/types.py b/synapse/types.py index 325332a6e00f..bd8071d51d78 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -37,7 +37,7 @@ from frozendict import frozendict from signedjson.key import decode_verify_key_bytes from signedjson.types import VerifyKey -from typing_extensions import TypedDict +from typing_extensions import Final, TypedDict from unpaddedbase64 import decode_base64 from zope.interface import Interface @@ -630,6 +630,22 @@ async def to_string(self, store: "DataStore") -> str: return "s%d" % (self.stream,) +class StreamKeyType: + """Known stream types. + + A stream is a list of entities ordered by an incrementing "stream token". + """ + + ROOM: Final = "room_key" + PRESENCE: Final = "presence_key" + TYPING: Final = "typing_key" + RECEIPT: Final = "receipt_key" + ACCOUNT_DATA: Final = "account_data_key" + PUSH_RULES: Final = "push_rules_key" + TO_DEVICE: Final = "to_device_key" + DEVICE_LIST: Final = "device_list_key" + + @attr.s(slots=True, frozen=True, auto_attribs=True) class StreamToken: """A collection of keys joined together by underscores in the following @@ -743,9 +759,9 @@ def copy_and_advance(self, key: str, new_value: Any) -> "StreamToken": :raises TypeError: if `key` is not the one of the keys tracked by a StreamToken. """ - if key == "room_key": + if key == StreamKeyType.ROOM: new_token = self.copy_and_replace( - "room_key", self.room_key.copy_and_advance(new_value) + StreamKeyType.ROOM, self.room_key.copy_and_advance(new_value) ) return new_token From e24c11afd6bb63de2dd7e029a5839d84f592df3c Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 16 May 2022 17:51:43 +0100 Subject: [PATCH 035/181] changelog --- changelog.d/12748.doc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/12748.doc diff --git a/changelog.d/12748.doc b/changelog.d/12748.doc new file mode 100644 index 000000000000..996ad3a1b926 --- /dev/null +++ b/changelog.d/12748.doc @@ -0,0 +1 @@ +Link to the configuration manual from the welcome page of the documentation. From 3eafee629d39eadbf4a3df3cb97801405d232e08 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 16 May 2022 17:52:17 +0100 Subject: [PATCH 036/181] Revert "changelog" This reverts commit e24c11afd6bb63de2dd7e029a5839d84f592df3c. whoops... --- changelog.d/12748.doc | 1 - 1 file changed, 1 deletion(-) delete mode 100644 changelog.d/12748.doc diff --git a/changelog.d/12748.doc b/changelog.d/12748.doc deleted file mode 100644 index 996ad3a1b926..000000000000 --- a/changelog.d/12748.doc +++ /dev/null @@ -1 +0,0 @@ -Link to the configuration manual from the welcome page of the documentation. From 6d8d1218dde7c265dadf723cf9955d2c2fc77df9 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 16 May 2022 18:31:12 +0100 Subject: [PATCH 037/181] Fix typo in name of 'run_background_tasks_on' option in config manual (#12749) --- changelog.d/12749.doc | 1 + docs/usage/configuration/config_documentation.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12749.doc diff --git a/changelog.d/12749.doc b/changelog.d/12749.doc new file mode 100644 index 000000000000..4560319ee43b --- /dev/null +++ b/changelog.d/12749.doc @@ -0,0 +1 @@ +Fix typo in 'run_background_tasks_on' option name in configuration manual documentation. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index f0bf13976d2c..3e2031f08aa6 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -3454,7 +3454,7 @@ stream_writers: typing: worker1 ``` --- -Config option: `run_background_task_on` +Config option: `run_background_tasks_on` The worker that is used to run background tasks (e.g. cleaning up expired data). If not provided this defaults to the main process. From 1fe202a1a3343fad77da270ffe0923a46f1944dd Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 17 May 2022 00:34:38 +0100 Subject: [PATCH 038/181] Tidy up and type-hint the database engine modules (#12734) Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/12734.misc | 1 + mypy.ini | 3 + synapse/storage/engines/__init__.py | 12 ++-- synapse/storage/engines/_base.py | 26 ++++---- synapse/storage/engines/postgres.py | 92 ++++++++++++++++------------- synapse/storage/engines/sqlite.py | 72 +++++++++++----------- synapse/storage/types.py | 70 ++++++++++++++++++++++ 7 files changed, 182 insertions(+), 94 deletions(-) create mode 100644 changelog.d/12734.misc diff --git a/changelog.d/12734.misc b/changelog.d/12734.misc new file mode 100644 index 000000000000..ffbfb0d63233 --- /dev/null +++ b/changelog.d/12734.misc @@ -0,0 +1 @@ +Tidy up and type-hint the database engine modules. diff --git a/mypy.ini b/mypy.ini index 9ae7ad211c54..b5b907973ffc 100644 --- a/mypy.ini +++ b/mypy.ini @@ -232,6 +232,9 @@ disallow_untyped_defs = True [mypy-synapse.storage.databases.main.user_erasure_store] disallow_untyped_defs = True +[mypy-synapse.storage.engines.*] +disallow_untyped_defs = True + [mypy-synapse.storage.prepare_database] disallow_untyped_defs = True diff --git a/synapse/storage/engines/__init__.py b/synapse/storage/engines/__init__.py index afb7d5054db8..f51b3d228ee7 100644 --- a/synapse/storage/engines/__init__.py +++ b/synapse/storage/engines/__init__.py @@ -11,25 +11,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Mapping from ._base import BaseDatabaseEngine, IncorrectDatabaseSetup from .postgres import PostgresEngine from .sqlite import Sqlite3Engine -def create_engine(database_config) -> BaseDatabaseEngine: +def create_engine(database_config: Mapping[str, Any]) -> BaseDatabaseEngine: name = database_config["name"] if name == "sqlite3": - import sqlite3 - - return Sqlite3Engine(sqlite3, database_config) + return Sqlite3Engine(database_config) if name == "psycopg2": - # Note that psycopg2cffi-compat provides the psycopg2 module on pypy. - import psycopg2 - - return PostgresEngine(psycopg2, database_config) + return PostgresEngine(database_config) raise RuntimeError("Unsupported database engine '%s'" % (name,)) diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py index 143cd98ca292..971ff8269323 100644 --- a/synapse/storage/engines/_base.py +++ b/synapse/storage/engines/_base.py @@ -13,9 +13,12 @@ # limitations under the License. import abc from enum import IntEnum -from typing import Generic, Optional, TypeVar +from typing import TYPE_CHECKING, Any, Generic, Mapping, Optional, TypeVar -from synapse.storage.types import Connection +from synapse.storage.types import Connection, Cursor, DBAPI2Module + +if TYPE_CHECKING: + from synapse.storage.database import LoggingDatabaseConnection class IsolationLevel(IntEnum): @@ -32,7 +35,7 @@ class IncorrectDatabaseSetup(RuntimeError): class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta): - def __init__(self, module, database_config: dict): + def __init__(self, module: DBAPI2Module, config: Mapping[str, Any]): self.module = module @property @@ -69,7 +72,7 @@ def check_database( ... @abc.abstractmethod - def check_new_database(self, txn) -> None: + def check_new_database(self, txn: Cursor) -> None: """Gets called when setting up a brand new database. This allows us to apply stricter checks on new databases versus existing database. """ @@ -79,8 +82,11 @@ def check_new_database(self, txn) -> None: def convert_param_style(self, sql: str) -> str: ... + # This method would ideally take a plain ConnectionType, but it seems that + # the Sqlite engine expects to use LoggingDatabaseConnection.cursor + # instead of sqlite3.Connection.cursor: only the former takes a txn_name. @abc.abstractmethod - def on_new_connection(self, db_conn: ConnectionType) -> None: + def on_new_connection(self, db_conn: "LoggingDatabaseConnection") -> None: ... @abc.abstractmethod @@ -92,7 +98,7 @@ def is_connection_closed(self, conn: ConnectionType) -> bool: ... @abc.abstractmethod - def lock_table(self, txn, table: str) -> None: + def lock_table(self, txn: Cursor, table: str) -> None: ... @property @@ -102,12 +108,12 @@ def server_version(self) -> str: ... @abc.abstractmethod - def in_transaction(self, conn: Connection) -> bool: + def in_transaction(self, conn: ConnectionType) -> bool: """Whether the connection is currently in a transaction.""" ... @abc.abstractmethod - def attempt_to_set_autocommit(self, conn: Connection, autocommit: bool): + def attempt_to_set_autocommit(self, conn: ConnectionType, autocommit: bool) -> None: """Attempt to set the connections autocommit mode. When True queries are run outside of transactions. @@ -119,8 +125,8 @@ def attempt_to_set_autocommit(self, conn: Connection, autocommit: bool): @abc.abstractmethod def attempt_to_set_isolation_level( - self, conn: Connection, isolation_level: Optional[int] - ): + self, conn: ConnectionType, isolation_level: Optional[int] + ) -> None: """Attempt to set the connections isolation level. Note: This has no effect on SQLite3, as transactions are SERIALIZABLE by default. diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index e8d29e287004..391f8ed24a3d 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -13,39 +13,47 @@ # limitations under the License. import logging -from typing import Mapping, Optional +from typing import TYPE_CHECKING, Any, Mapping, NoReturn, Optional, Tuple, cast from synapse.storage.engines._base import ( BaseDatabaseEngine, IncorrectDatabaseSetup, IsolationLevel, ) -from synapse.storage.types import Connection +from synapse.storage.types import Cursor + +if TYPE_CHECKING: + import psycopg2 # noqa: F401 + + from synapse.storage.database import LoggingDatabaseConnection + logger = logging.getLogger(__name__) -class PostgresEngine(BaseDatabaseEngine): - def __init__(self, database_module, database_config): - super().__init__(database_module, database_config) - self.module.extensions.register_type(self.module.extensions.UNICODE) +class PostgresEngine(BaseDatabaseEngine["psycopg2.connection"]): + def __init__(self, database_config: Mapping[str, Any]): + import psycopg2.extensions + + super().__init__(psycopg2, database_config) + psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) # Disables passing `bytes` to txn.execute, c.f. #6186. If you do # actually want to use bytes than wrap it in `bytearray`. - def _disable_bytes_adapter(_): + def _disable_bytes_adapter(_: bytes) -> NoReturn: raise Exception("Passing bytes to DB is disabled.") - self.module.extensions.register_adapter(bytes, _disable_bytes_adapter) - self.synchronous_commit = database_config.get("synchronous_commit", True) - self._version = None # unknown as yet + psycopg2.extensions.register_adapter(bytes, _disable_bytes_adapter) + self.synchronous_commit: bool = database_config.get("synchronous_commit", True) + self._version: Optional[int] = None # unknown as yet self.isolation_level_map: Mapping[int, int] = { - IsolationLevel.READ_COMMITTED: self.module.extensions.ISOLATION_LEVEL_READ_COMMITTED, - IsolationLevel.REPEATABLE_READ: self.module.extensions.ISOLATION_LEVEL_REPEATABLE_READ, - IsolationLevel.SERIALIZABLE: self.module.extensions.ISOLATION_LEVEL_SERIALIZABLE, + IsolationLevel.READ_COMMITTED: psycopg2.extensions.ISOLATION_LEVEL_READ_COMMITTED, + IsolationLevel.REPEATABLE_READ: psycopg2.extensions.ISOLATION_LEVEL_REPEATABLE_READ, + IsolationLevel.SERIALIZABLE: psycopg2.extensions.ISOLATION_LEVEL_SERIALIZABLE, } self.default_isolation_level = ( - self.module.extensions.ISOLATION_LEVEL_REPEATABLE_READ + psycopg2.extensions.ISOLATION_LEVEL_REPEATABLE_READ ) self.config = database_config @@ -53,19 +61,21 @@ def _disable_bytes_adapter(_): def single_threaded(self) -> bool: return False - def get_db_locale(self, txn): + def get_db_locale(self, txn: Cursor) -> Tuple[str, str]: txn.execute( "SELECT datcollate, datctype FROM pg_database WHERE datname = current_database()" ) - collation, ctype = txn.fetchone() + collation, ctype = cast(Tuple[str, str], txn.fetchone()) return collation, ctype - def check_database(self, db_conn, allow_outdated_version: bool = False): + def check_database( + self, db_conn: "psycopg2.connection", allow_outdated_version: bool = False + ) -> None: # Get the version of PostgreSQL that we're using. As per the psycopg2 # docs: The number is formed by converting the major, minor, and # revision numbers into two-decimal-digit numbers and appending them # together. For example, version 8.1.5 will be returned as 80105 - self._version = db_conn.server_version + self._version = cast(int, db_conn.server_version) allow_unsafe_locale = self.config.get("allow_unsafe_locale", False) # Are we on a supported PostgreSQL version? @@ -108,7 +118,7 @@ def check_database(self, db_conn, allow_outdated_version: bool = False): ctype, ) - def check_new_database(self, txn): + def check_new_database(self, txn: Cursor) -> None: """Gets called when setting up a brand new database. This allows us to apply stricter checks on new databases versus existing database. """ @@ -129,10 +139,10 @@ def check_new_database(self, txn): "See docs/postgres.md for more information." % ("\n".join(errors)) ) - def convert_param_style(self, sql): + def convert_param_style(self, sql: str) -> str: return sql.replace("?", "%s") - def on_new_connection(self, db_conn): + def on_new_connection(self, db_conn: "LoggingDatabaseConnection") -> None: db_conn.set_isolation_level(self.default_isolation_level) # Set the bytea output to escape, vs the default of hex @@ -149,14 +159,14 @@ def on_new_connection(self, db_conn): db_conn.commit() @property - def can_native_upsert(self): + def can_native_upsert(self) -> bool: """ Can we use native UPSERTs? """ return True @property - def supports_using_any_list(self): + def supports_using_any_list(self) -> bool: """Do we support using `a = ANY(?)` and passing a list""" return True @@ -165,27 +175,25 @@ def supports_returning(self) -> bool: """Do we support the `RETURNING` clause in insert/update/delete?""" return True - def is_deadlock(self, error): - if isinstance(error, self.module.DatabaseError): + def is_deadlock(self, error: Exception) -> bool: + import psycopg2.extensions + + if isinstance(error, psycopg2.DatabaseError): # https://www.postgresql.org/docs/current/static/errcodes-appendix.html # "40001" serialization_failure # "40P01" deadlock_detected return error.pgcode in ["40001", "40P01"] return False - def is_connection_closed(self, conn): + def is_connection_closed(self, conn: "psycopg2.connection") -> bool: return bool(conn.closed) - def lock_table(self, txn, table): + def lock_table(self, txn: Cursor, table: str) -> None: txn.execute("LOCK TABLE %s in EXCLUSIVE MODE" % (table,)) @property - def server_version(self): - """Returns a string giving the server version. For example: '8.1.5' - - Returns: - string - """ + def server_version(self) -> str: + """Returns a string giving the server version. For example: '8.1.5'.""" # note that this is a bit of a hack because it relies on check_database # having been called. Still, that should be a safe bet here. numver = self._version @@ -197,17 +205,21 @@ def server_version(self): else: return "%i.%i.%i" % (numver / 10000, (numver % 10000) / 100, numver % 100) - def in_transaction(self, conn: Connection) -> bool: - return conn.status != self.module.extensions.STATUS_READY # type: ignore + def in_transaction(self, conn: "psycopg2.connection") -> bool: + import psycopg2.extensions + + return conn.status != psycopg2.extensions.STATUS_READY - def attempt_to_set_autocommit(self, conn: Connection, autocommit: bool): - return conn.set_session(autocommit=autocommit) # type: ignore + def attempt_to_set_autocommit( + self, conn: "psycopg2.connection", autocommit: bool + ) -> None: + return conn.set_session(autocommit=autocommit) def attempt_to_set_isolation_level( - self, conn: Connection, isolation_level: Optional[int] - ): + self, conn: "psycopg2.connection", isolation_level: Optional[int] + ) -> None: if isolation_level is None: isolation_level = self.default_isolation_level else: isolation_level = self.isolation_level_map[isolation_level] - return conn.set_isolation_level(isolation_level) # type: ignore + return conn.set_isolation_level(isolation_level) diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index 6c19e55999bd..621f2c5efe28 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -12,21 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License. import platform +import sqlite3 import struct import threading -import typing -from typing import Optional +from typing import TYPE_CHECKING, Any, List, Mapping, Optional from synapse.storage.engines import BaseDatabaseEngine -from synapse.storage.types import Connection +from synapse.storage.types import Cursor -if typing.TYPE_CHECKING: - import sqlite3 # noqa: F401 +if TYPE_CHECKING: + from synapse.storage.database import LoggingDatabaseConnection -class Sqlite3Engine(BaseDatabaseEngine["sqlite3.Connection"]): - def __init__(self, database_module, database_config): - super().__init__(database_module, database_config) +class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]): + def __init__(self, database_config: Mapping[str, Any]): + super().__init__(sqlite3, database_config) database = database_config.get("args", {}).get("database") self._is_in_memory = database in ( @@ -37,7 +37,7 @@ def __init__(self, database_module, database_config): if platform.python_implementation() == "PyPy": # pypy's sqlite3 module doesn't handle bytearrays, convert them # back to bytes. - database_module.register_adapter(bytearray, lambda array: bytes(array)) + sqlite3.register_adapter(bytearray, lambda array: bytes(array)) # The current max state_group, or None if we haven't looked # in the DB yet. @@ -49,41 +49,43 @@ def single_threaded(self) -> bool: return True @property - def can_native_upsert(self): + def can_native_upsert(self) -> bool: """ Do we support native UPSERTs? This requires SQLite3 3.24+, plus some more work we haven't done yet to tell what was inserted vs updated. """ - return self.module.sqlite_version_info >= (3, 24, 0) + return sqlite3.sqlite_version_info >= (3, 24, 0) @property - def supports_using_any_list(self): + def supports_using_any_list(self) -> bool: """Do we support using `a = ANY(?)` and passing a list""" return False @property def supports_returning(self) -> bool: """Do we support the `RETURNING` clause in insert/update/delete?""" - return self.module.sqlite_version_info >= (3, 35, 0) + return sqlite3.sqlite_version_info >= (3, 35, 0) - def check_database(self, db_conn, allow_outdated_version: bool = False): + def check_database( + self, db_conn: sqlite3.Connection, allow_outdated_version: bool = False + ) -> None: if not allow_outdated_version: - version = self.module.sqlite_version_info + version = sqlite3.sqlite_version_info # Synapse is untested against older SQLite versions, and we don't want # to let users upgrade to a version of Synapse with broken support for their # sqlite version, because it risks leaving them with a half-upgraded db. if version < (3, 22, 0): raise RuntimeError("Synapse requires sqlite 3.22 or above.") - def check_new_database(self, txn): + def check_new_database(self, txn: Cursor) -> None: """Gets called when setting up a brand new database. This allows us to apply stricter checks on new databases versus existing database. """ - def convert_param_style(self, sql): + def convert_param_style(self, sql: str) -> str: return sql - def on_new_connection(self, db_conn): + def on_new_connection(self, db_conn: "LoggingDatabaseConnection") -> None: # We need to import here to avoid an import loop. from synapse.storage.prepare_database import prepare_database @@ -97,48 +99,46 @@ def on_new_connection(self, db_conn): db_conn.execute("PRAGMA foreign_keys = ON;") db_conn.commit() - def is_deadlock(self, error): + def is_deadlock(self, error: Exception) -> bool: return False - def is_connection_closed(self, conn): + def is_connection_closed(self, conn: sqlite3.Connection) -> bool: return False - def lock_table(self, txn, table): + def lock_table(self, txn: Cursor, table: str) -> None: return @property - def server_version(self): - """Gets a string giving the server version. For example: '3.22.0' + def server_version(self) -> str: + """Gets a string giving the server version. For example: '3.22.0'.""" + return "%i.%i.%i" % sqlite3.sqlite_version_info - Returns: - string - """ - return "%i.%i.%i" % self.module.sqlite_version_info - - def in_transaction(self, conn: Connection) -> bool: - return conn.in_transaction # type: ignore + def in_transaction(self, conn: sqlite3.Connection) -> bool: + return conn.in_transaction - def attempt_to_set_autocommit(self, conn: Connection, autocommit: bool): + def attempt_to_set_autocommit( + self, conn: sqlite3.Connection, autocommit: bool + ) -> None: # Twisted doesn't let us set attributes on the connections, so we can't # set the connection to autocommit mode. pass def attempt_to_set_isolation_level( - self, conn: Connection, isolation_level: Optional[int] - ): - # All transactions are SERIALIZABLE by default in sqllite + self, conn: sqlite3.Connection, isolation_level: Optional[int] + ) -> None: + # All transactions are SERIALIZABLE by default in sqlite pass # Following functions taken from: /~https://github.com/coleifer/peewee -def _parse_match_info(buf): +def _parse_match_info(buf: bytes) -> List[int]: bufsize = len(buf) return [struct.unpack("@I", buf[i : i + 4])[0] for i in range(0, bufsize, 4)] -def _rank(raw_match_info): +def _rank(raw_match_info: bytes) -> float: """Handle match_info called w/default args 'pcx' - based on the example rank function http://sqlite.org/fts3.html#appendix_a """ diff --git a/synapse/storage/types.py b/synapse/storage/types.py index 40536c183005..0031df1e0649 100644 --- a/synapse/storage/types.py +++ b/synapse/storage/types.py @@ -94,3 +94,73 @@ def __exit__( traceback: Optional[TracebackType], ) -> Optional[bool]: ... + + +class DBAPI2Module(Protocol): + """The module-level attributes that we use from PEP 249. + + This is NOT a comprehensive stub for the entire DBAPI2.""" + + __name__: str + + # Exceptions. See https://peps.python.org/pep-0249/#exceptions + + # For our specific drivers: + # - Python's sqlite3 module doesn't contains the same descriptions as the + # DBAPI2 spec, see https://docs.python.org/3/library/sqlite3.html#exceptions + # - Psycopg2 maps every Postgres error code onto a unique exception class which + # extends from this hierarchy. See + # https://docs.python.org/3/library/sqlite3.html?highlight=sqlite3#exceptions + # https://www.postgresql.org/docs/current/errcodes-appendix.html#ERRCODES-TABLE + Warning: Type[Exception] + Error: Type[Exception] + + # Errors are divided into `InterfaceError`s (something went wrong in the database + # driver) and `DatabaseError`s (something went wrong in the database). These are + # both subclasses of `Error`, but we can't currently express this in type + # annotations due to /~https://github.com/python/mypy/issues/8397 + InterfaceError: Type[Exception] + DatabaseError: Type[Exception] + + # Everything below is a subclass of `DatabaseError`. + + # Roughly: the database rejected a nonsensical value. Examples: + # - An integer was too big for its data type. + # - An invalid date time was provided. + # - A string contained a null code point. + DataError: Type[Exception] + + # Roughly: something went wrong in the database, but it's not within the application + # programmer's control. Examples: + # - We failed to establish a connection to the database. + # - The connection to the database was lost. + # - A deadlock was detected. + # - A serialisation failure occurred. + # - The database ran out of resources, such as storage, memory, connections, etc. + # - The database encountered an error from the operating system. + OperationalError: Type[Exception] + + # Roughly: we've given the database data which breaks a rule we asked it to enforce. + # Examples: + # - Stop, criminal scum! You violated the foreign key constraint + # - Also check constraints, non-null constraints, etc. + IntegrityError: Type[Exception] + + # Roughly: something went wrong within the database server itself. + InternalError: Type[Exception] + + # Roughly: the application did something silly that needs to be fixed. Examples: + # - We don't have permissions to do something. + # - We tried to create a table with duplicate column names. + # - We tried to use a reserved name. + # - We referred to a column that doesn't exist. + ProgrammingError: Type[Exception] + + # Roughly: we've tried to do something that this database doesn't support. + NotSupportedError: Type[Exception] + + def connect(self, **parameters: object) -> Connection: + ... + + +__all__ = ["Cursor", "Connection", "DBAPI2Module"] From fcf951d5dc7ca8c4cb18aa9c1f5ccb005df3610a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 May 2022 10:34:27 +0100 Subject: [PATCH 039/181] Track in memory events using weakrefs (#10533) --- changelog.d/10533.misc | 1 + .../storage/databases/main/events_worker.py | 35 +++++++++++++++++-- tests/handlers/test_sync.py | 1 + .../databases/main/test_events_worker.py | 25 +++++++++++++ 4 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 changelog.d/10533.misc diff --git a/changelog.d/10533.misc b/changelog.d/10533.misc new file mode 100644 index 000000000000..f70dc6496fcf --- /dev/null +++ b/changelog.d/10533.misc @@ -0,0 +1 @@ +Improve event caching mechanism to avoid having multiple copies of an event in memory at a time. diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index a4a604a49915..5b22d6b45211 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -14,6 +14,7 @@ import logging import threading +import weakref from enum import Enum, auto from typing import ( TYPE_CHECKING, @@ -23,6 +24,7 @@ Dict, Iterable, List, + MutableMapping, Optional, Set, Tuple, @@ -248,6 +250,12 @@ def __init__( str, ObservableDeferred[Dict[str, EventCacheEntry]] ] = {} + # We keep track of the events we have currently loaded in memory so that + # we can reuse them even if they've been evicted from the cache. We only + # track events that don't need redacting in here (as then we don't need + # to track redaction status). + self._event_ref: MutableMapping[str, EventBase] = weakref.WeakValueDictionary() + self._event_fetch_lock = threading.Condition() self._event_fetch_list: List[ Tuple[Iterable[str], "defer.Deferred[Dict[str, _EventRow]]"] @@ -723,6 +731,8 @@ async def get_missing_events_from_db() -> Dict[str, EventCacheEntry]: def _invalidate_get_event_cache(self, event_id: str) -> None: self._get_event_cache.invalidate((event_id,)) + self._event_ref.pop(event_id, None) + self._current_event_fetches.pop(event_id, None) def _get_events_from_cache( self, events: Iterable[str], update_metrics: bool = True @@ -738,13 +748,30 @@ def _get_events_from_cache( event_map = {} for event_id in events: + # First check if it's in the event cache ret = self._get_event_cache.get( (event_id,), None, update_metrics=update_metrics ) - if not ret: + if ret: + event_map[event_id] = ret continue - event_map[event_id] = ret + # Otherwise check if we still have the event in memory. + event = self._event_ref.get(event_id) + if event: + # Reconstruct an event cache entry + + cache_entry = EventCacheEntry( + event=event, + # We don't cache weakrefs to redacted events, so we know + # this is None. + redacted_event=None, + ) + event_map[event_id] = cache_entry + + # We add the entry back into the cache as we want to keep + # recently queried events in the cache. + self._get_event_cache.set((event_id,), cache_entry) return event_map @@ -1124,6 +1151,10 @@ async def _get_events_from_db( self._get_event_cache.set((event_id,), cache_entry) result_map[event_id] = cache_entry + if not redacted_event: + # We only cache references to unredacted events. + self._event_ref[event_id] = original_ev + return result_map async def _enqueue_events(self, events: Collection[str]) -> Dict[str, _EventRow]: diff --git a/tests/handlers/test_sync.py b/tests/handlers/test_sync.py index 865b8b7e47ef..db3302a4c78d 100644 --- a/tests/handlers/test_sync.py +++ b/tests/handlers/test_sync.py @@ -160,6 +160,7 @@ def test_unknown_room_version(self): # Blow away caches (supported room versions can only change due to a restart). self.store.get_rooms_for_user_with_stream_ordering.invalidate_all() self.store._get_event_cache.clear() + self.store._event_ref.clear() # The rooms should be excluded from the sync response. # Get a new request key. diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py index c237a8c7e228..38963ce4a74c 100644 --- a/tests/storage/databases/main/test_events_worker.py +++ b/tests/storage/databases/main/test_events_worker.py @@ -154,6 +154,31 @@ def test_simple(self): # We should have fetched the event from the DB self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1) + def test_event_ref(self): + """Test that we reuse events that are still in memory but have fallen + out of the cache, rather than requesting them from the DB. + """ + + # Reset the event cache + self.store._get_event_cache.clear() + + with LoggingContext("test") as ctx: + # We keep hold of the event event though we never use it. + event = self.get_success(self.store.get_event(self.event_id)) # noqa: F841 + + # We should have fetched the event from the DB + self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 1) + + # Reset the event cache + self.store._get_event_cache.clear() + + with LoggingContext("test") as ctx: + self.get_success(self.store.get_event(self.event_id)) + + # Since the event is still in memory we shouldn't have fetched it + # from the DB + self.assertEqual(ctx.get_resource_usage().evt_db_fetch_count, 0) + def test_dedupe(self): """Test that if we request the same event multiple times we only pull it out once. From 32ef24fbd74b8822c3e57c8ce74b979506aea7be Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 May 2022 10:34:59 +0100 Subject: [PATCH 040/181] Add index to cache invalidations (#12747) For workers that rarely write to the cache the `get_all_updated_caches` query can become expensive if the worker falls behind when reading the cache. --- changelog.d/12747.bugfix | 1 + synapse/storage/databases/main/cache.py | 8 ++++++++ .../delta/69/02cache_invalidation_index.sql | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 changelog.d/12747.bugfix create mode 100644 synapse/storage/schema/main/delta/69/02cache_invalidation_index.sql diff --git a/changelog.d/12747.bugfix b/changelog.d/12747.bugfix new file mode 100644 index 000000000000..0fb0059237cc --- /dev/null +++ b/changelog.d/12747.bugfix @@ -0,0 +1 @@ +Fix poor database performance when reading the cache invalidation stream for large servers with lots of workers. diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index dd4e83a2ad19..1653a6a9b694 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -57,6 +57,14 @@ def __init__( self._instance_name = hs.get_instance_name() + self.db_pool.updates.register_background_index_update( + update_name="cache_invalidation_index_by_instance", + index_name="cache_invalidation_stream_by_instance_instance_index", + table="cache_invalidation_stream_by_instance", + columns=("instance_name", "stream_id"), + psql_only=True, # The table is only on postgres DBs. + ) + async def get_all_updated_caches( self, instance_name: str, last_id: int, current_id: int, limit: int ) -> Tuple[List[Tuple[int, tuple]], int, bool]: diff --git a/synapse/storage/schema/main/delta/69/02cache_invalidation_index.sql b/synapse/storage/schema/main/delta/69/02cache_invalidation_index.sql new file mode 100644 index 000000000000..22ae3b8c0005 --- /dev/null +++ b/synapse/storage/schema/main/delta/69/02cache_invalidation_index.sql @@ -0,0 +1,18 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Background update to clear the inboxes of hidden and deleted devices. +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (6902, 'cache_invalidation_index_by_instance', '{}'); From a34a41f1354147565da248bf3222449c6e976035 Mon Sep 17 00:00:00 2001 From: SpiritCroc Date: Tue, 17 May 2022 12:03:07 +0200 Subject: [PATCH 041/181] Fix push for m.read events (#12721) badge_count_last_call was always zero when the response for push notifications included a "rejected" key which mapped to an empty list. --- changelog.d/12721.bugfix | 1 + synapse/push/httppusher.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12721.bugfix diff --git a/changelog.d/12721.bugfix b/changelog.d/12721.bugfix new file mode 100644 index 000000000000..6987f7ab15e1 --- /dev/null +++ b/changelog.d/12721.bugfix @@ -0,0 +1 @@ +Fix push to dismiss notifications when read on another client. Contributed by @SpiritCroc @ Beeper. diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 5818344520f5..d5603596c004 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -405,7 +405,7 @@ async def dispatch_push( rejected = [] if "rejected" in resp: rejected = resp["rejected"] - else: + if not rejected: self.badge_count_last_call = badge return rejected From 24b590de32154eb3965220bd62715e52b37b4074 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 17 May 2022 12:07:18 +0200 Subject: [PATCH 042/181] Remove code which updates `application_services_state.last_txn` (#12680) This column is unused as of #12209, so let's stop writing to it. --- changelog.d/12680.misc | 1 + synapse/storage/databases/main/appservice.py | 47 ++++++++++---------- synapse/storage/schema/__init__.py | 5 ++- tests/handlers/test_appservice.py | 10 ----- tests/storage/test_appservice.py | 27 +++-------- 5 files changed, 35 insertions(+), 55 deletions(-) create mode 100644 changelog.d/12680.misc diff --git a/changelog.d/12680.misc b/changelog.d/12680.misc new file mode 100644 index 000000000000..dfd1f0a6c658 --- /dev/null +++ b/changelog.d/12680.misc @@ -0,0 +1 @@ +Remove code which updates unused database column `application_services_state.last_txn`. diff --git a/synapse/storage/databases/main/appservice.py b/synapse/storage/databases/main/appservice.py index 945707b0ecd5..e284454b660f 100644 --- a/synapse/storage/databases/main/appservice.py +++ b/synapse/storage/databases/main/appservice.py @@ -203,19 +203,29 @@ async def get_appservice_state( """Get the application service state. Args: - service: The service whose state to set. + service: The service whose state to get. Returns: - An ApplicationServiceState or none. + An ApplicationServiceState, or None if we have yet to attempt any + transactions to the AS. """ - result = await self.db_pool.simple_select_one( + # if we have created transactions for this AS but not yet attempted to send + # them, we will have a row in the table with state=NULL (recording the stream + # positions we have processed up to). + # + # On the other hand, if we have yet to create any transactions for this AS at + # all, then there will be no row for the AS. + # + # In either case, we return None to indicate "we don't yet know the state of + # this AS". + result = await self.db_pool.simple_select_one_onecol( "application_services_state", {"as_id": service.id}, - ["state"], + retcol="state", allow_none=True, desc="get_appservice_state", ) if result: - return ApplicationServiceState(result.get("state")) + return ApplicationServiceState(result) return None async def set_appservice_state( @@ -296,14 +306,6 @@ async def complete_appservice_txn( """ def _complete_appservice_txn(txn: LoggingTransaction) -> None: - # Set current txn_id for AS to 'txn_id' - self.db_pool.simple_upsert_txn( - txn, - "application_services_state", - {"as_id": service.id}, - {"last_txn": txn_id}, - ) - # Delete txn self.db_pool.simple_delete_txn( txn, @@ -452,16 +454,15 @@ async def set_appservice_stream_type_pos( % (stream_type,) ) - def set_appservice_stream_type_pos_txn(txn: LoggingTransaction) -> None: - stream_id_type = "%s_stream_id" % stream_type - txn.execute( - "UPDATE application_services_state SET %s = ? WHERE as_id=?" - % stream_id_type, - (pos, service.id), - ) - - await self.db_pool.runInteraction( - "set_appservice_stream_type_pos", set_appservice_stream_type_pos_txn + # this may be the first time that we're recording any state for this AS, so + # we don't yet know if a row for it exists; hence we have to upsert here. + await self.db_pool.simple_upsert( + table="application_services_state", + keyvalues={"as_id": service.id}, + values={f"{stream_type}_stream_id": pos}, + # no need to lock when emulating upsert: as_id is a unique key + lock=False, + desc="set_appservice_stream_type_pos", ) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 20c344faeab3..da98f05e0348 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -61,7 +61,9 @@ Changes in SCHEMA_VERSION = 69: - We now write to `device_lists_changes_in_room` table. - - Use sequence to generate future `application_services_txns.txn_id`s + - We now use a PostgreSQL sequence to generate future txn_ids for + `application_services_txns`. `application_services_state.last_txn` is no longer + updated. Changes in SCHEMA_VERSION = 70: - event_reference_hashes is no longer written to. @@ -71,6 +73,7 @@ SCHEMA_COMPAT_VERSION = ( # We now assume that `device_lists_changes_in_room` has been filled out for # recent device_list_updates. + # ... and that `application_services_state.last_txn` is not used. 69 ) """Limit on how far the synapse codebase can be rolled back without breaking db compat diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py index 5b0cd1ab8608..53e7a5d81b7d 100644 --- a/tests/handlers/test_appservice.py +++ b/tests/handlers/test_appservice.py @@ -434,16 +434,6 @@ def test_sending_read_receipt_batches_to_application_services(self): }, ) - # "Complete" a transaction. - # All this really does for us is make an entry in the application_services_state - # database table, which tracks the current stream_token per stream ID per AS. - self.get_success( - self.hs.get_datastores().main.complete_appservice_txn( - 0, - interested_appservice, - ) - ) - # Now, pretend that we receive a large burst of read receipts (300 total) that # all come in at once. for i in range(300): diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index 1bf93e79a7fa..1047ed09c85d 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -14,7 +14,7 @@ import json import os import tempfile -from typing import List, Optional, cast +from typing import List, cast from unittest.mock import Mock import yaml @@ -149,15 +149,12 @@ def _add_service(self, url, as_token, id) -> None: outfile.write(yaml.dump(as_yaml)) self.as_yaml_files.append(as_token) - def _set_state( - self, id: str, state: ApplicationServiceState, txn: Optional[int] = None - ): + def _set_state(self, id: str, state: ApplicationServiceState): return self.db_pool.runOperation( self.engine.convert_param_style( - "INSERT INTO application_services_state(as_id, state, last_txn) " - "VALUES(?,?,?)" + "INSERT INTO application_services_state(as_id, state) VALUES(?,?)" ), - (id, state.value, txn), + (id, state.value), ) def _insert_txn(self, as_id, txn_id, events): @@ -280,17 +277,6 @@ def test_complete_appservice_txn_first_txn( self.store.complete_appservice_txn(txn_id=txn_id, service=service) ) - res = self.get_success( - self.db_pool.runQuery( - self.engine.convert_param_style( - "SELECT last_txn FROM application_services_state WHERE as_id=?" - ), - (service.id,), - ) - ) - self.assertEqual(1, len(res)) - self.assertEqual(txn_id, res[0][0]) - res = self.get_success( self.db_pool.runQuery( self.engine.convert_param_style( @@ -316,14 +302,13 @@ def test_complete_appservice_txn_updates_last_txn_state( res = self.get_success( self.db_pool.runQuery( self.engine.convert_param_style( - "SELECT last_txn, state FROM application_services_state WHERE as_id=?" + "SELECT state FROM application_services_state WHERE as_id=?" ), (service.id,), ) ) self.assertEqual(1, len(res)) - self.assertEqual(txn_id, res[0][0]) - self.assertEqual(ApplicationServiceState.UP.value, res[0][1]) + self.assertEqual(ApplicationServiceState.UP.value, res[0][0]) res = self.get_success( self.db_pool.runQuery( From 942c30b16b86cb05d2109b13bc2c1dc9ac2fea70 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 17 May 2022 04:41:39 -0600 Subject: [PATCH 043/181] Add a new room version for MSC3787's knock+restricted join rule (#12623) --- changelog.d/12623.feature | 1 + synapse/api/constants.py | 2 ++ synapse/api/room_versions.py | 32 ++++++++++++++++++++++++++++++++ synapse/event_auth.py | 21 +++++++++++++++++---- synapse/handlers/event_auth.py | 10 +++++++++- synapse/handlers/room_summary.py | 9 +++++++-- 6 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 changelog.d/12623.feature diff --git a/changelog.d/12623.feature b/changelog.d/12623.feature new file mode 100644 index 000000000000..cdee19fafa36 --- /dev/null +++ b/changelog.d/12623.feature @@ -0,0 +1 @@ +Add support for [MSC3787: Allowing knocks to restricted rooms](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3787). \ No newline at end of file diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 0ccd4c95581e..330de21f6b80 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -65,6 +65,8 @@ class JoinRules: PRIVATE: Final = "private" # As defined for MSC3083. RESTRICTED: Final = "restricted" + # As defined for MSC3787. + KNOCK_RESTRICTED: Final = "knock_restricted" class RestrictedJoinRuleTypes: diff --git a/synapse/api/room_versions.py b/synapse/api/room_versions.py index a747a4081497..3f85d61b4633 100644 --- a/synapse/api/room_versions.py +++ b/synapse/api/room_versions.py @@ -81,6 +81,9 @@ class RoomVersion: msc2716_historical: bool # MSC2716: Adds support for redacting "insertion", "chunk", and "marker" events msc2716_redactions: bool + # MSC3787: Adds support for a `knock_restricted` join rule, mixing concepts of + # knocks and restricted join rules into the same join condition. + msc3787_knock_restricted_join_rule: bool class RoomVersions: @@ -99,6 +102,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) V2 = RoomVersion( "2", @@ -115,6 +119,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) V3 = RoomVersion( "3", @@ -131,6 +136,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) V4 = RoomVersion( "4", @@ -147,6 +153,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) V5 = RoomVersion( "5", @@ -163,6 +170,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) V6 = RoomVersion( "6", @@ -179,6 +187,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) MSC2176 = RoomVersion( "org.matrix.msc2176", @@ -195,6 +204,7 @@ class RoomVersions: msc2403_knocking=False, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) V7 = RoomVersion( "7", @@ -211,6 +221,7 @@ class RoomVersions: msc2403_knocking=True, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) V8 = RoomVersion( "8", @@ -227,6 +238,7 @@ class RoomVersions: msc2403_knocking=True, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) V9 = RoomVersion( "9", @@ -243,6 +255,7 @@ class RoomVersions: msc2403_knocking=True, msc2716_historical=False, msc2716_redactions=False, + msc3787_knock_restricted_join_rule=False, ) MSC2716v3 = RoomVersion( "org.matrix.msc2716v3", @@ -259,6 +272,24 @@ class RoomVersions: msc2403_knocking=True, msc2716_historical=True, msc2716_redactions=True, + msc3787_knock_restricted_join_rule=False, + ) + MSC3787 = RoomVersion( + "org.matrix.msc3787", + RoomDisposition.UNSTABLE, + EventFormatVersions.V3, + StateResolutionVersions.V2, + enforce_key_validity=True, + special_case_aliases_auth=False, + strict_canonicaljson=True, + limit_notifications_power_levels=True, + msc2176_redaction_rules=False, + msc3083_join_rules=True, + msc3375_redaction_rules=True, + msc2403_knocking=True, + msc2716_historical=False, + msc2716_redactions=False, + msc3787_knock_restricted_join_rule=True, ) @@ -276,6 +307,7 @@ class RoomVersions: RoomVersions.V8, RoomVersions.V9, RoomVersions.MSC2716v3, + RoomVersions.MSC3787, ) } diff --git a/synapse/event_auth.py b/synapse/event_auth.py index 621a3efcccec..4c0b587a7643 100644 --- a/synapse/event_auth.py +++ b/synapse/event_auth.py @@ -414,7 +414,12 @@ def _is_membership_change_allowed( raise AuthError(403, "You are banned from this room") elif join_rule == JoinRules.PUBLIC: pass - elif room_version.msc3083_join_rules and join_rule == JoinRules.RESTRICTED: + elif ( + room_version.msc3083_join_rules and join_rule == JoinRules.RESTRICTED + ) or ( + room_version.msc3787_knock_restricted_join_rule + and join_rule == JoinRules.KNOCK_RESTRICTED + ): # This is the same as public, but the event must contain a reference # to the server who authorised the join. If the event does not contain # the proper content it is rejected. @@ -440,8 +445,13 @@ def _is_membership_change_allowed( if authorising_user_level < invite_level: raise AuthError(403, "Join event authorised by invalid server.") - elif join_rule == JoinRules.INVITE or ( - room_version.msc2403_knocking and join_rule == JoinRules.KNOCK + elif ( + join_rule == JoinRules.INVITE + or (room_version.msc2403_knocking and join_rule == JoinRules.KNOCK) + or ( + room_version.msc3787_knock_restricted_join_rule + and join_rule == JoinRules.KNOCK_RESTRICTED + ) ): if not caller_in_room and not caller_invited: raise AuthError(403, "You are not invited to this room.") @@ -462,7 +472,10 @@ def _is_membership_change_allowed( if user_level < ban_level or user_level <= target_level: raise AuthError(403, "You don't have permission to ban") elif room_version.msc2403_knocking and Membership.KNOCK == membership: - if join_rule != JoinRules.KNOCK: + if join_rule != JoinRules.KNOCK and ( + not room_version.msc3787_knock_restricted_join_rule + or join_rule != JoinRules.KNOCK_RESTRICTED + ): raise AuthError(403, "You don't have permission to knock") elif target_user_id != event.user_id: raise AuthError(403, "You cannot knock for other users") diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index d441ebb0ab3d..6bed46435135 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -241,7 +241,15 @@ async def has_restricted_join_rules( # If the join rule is not restricted, this doesn't apply. join_rules_event = await self._store.get_event(join_rules_event_id) - return join_rules_event.content.get("join_rule") == JoinRules.RESTRICTED + content_join_rule = join_rules_event.content.get("join_rule") + if content_join_rule == JoinRules.RESTRICTED: + return True + + # also check for MSC3787 behaviour + if room_version.msc3787_knock_restricted_join_rule: + return content_join_rule == JoinRules.KNOCK_RESTRICTED + + return False async def get_rooms_that_allow_join( self, state_ids: StateMap[str] diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index ff24ec806357..af83de319348 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -562,8 +562,13 @@ async def _is_local_room_accessible( if join_rules_event_id: join_rules_event = await self._store.get_event(join_rules_event_id) join_rule = join_rules_event.content.get("join_rule") - if join_rule == JoinRules.PUBLIC or ( - room_version.msc2403_knocking and join_rule == JoinRules.KNOCK + if ( + join_rule == JoinRules.PUBLIC + or (room_version.msc2403_knocking and join_rule == JoinRules.KNOCK) + or ( + room_version.msc3787_knock_restricted_join_rule + and join_rule == JoinRules.KNOCK_RESTRICTED + ) ): return True From 6edefef60289cc54e17fd6af838eb66c4973f5f5 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 17 May 2022 16:29:06 +0200 Subject: [PATCH 044/181] Add some type hints to datastore (#12717) --- changelog.d/12717.misc | 1 + mypy.ini | 2 - synapse/federation/sender/__init__.py | 24 ++- synapse/handlers/sync.py | 6 +- synapse/rest/client/push_rule.py | 4 +- synapse/state/__init__.py | 4 +- synapse/storage/databases/main/__init__.py | 8 +- synapse/storage/databases/main/metrics.py | 56 +++--- synapse/storage/databases/main/push_rule.py | 184 ++++++++++++------- synapse/storage/databases/main/roommember.py | 126 ++++++++----- 10 files changed, 254 insertions(+), 161 deletions(-) create mode 100644 changelog.d/12717.misc diff --git a/changelog.d/12717.misc b/changelog.d/12717.misc new file mode 100644 index 000000000000..e793d08e5e3f --- /dev/null +++ b/changelog.d/12717.misc @@ -0,0 +1 @@ +Add some type hints to datastore. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index b5b907973ffc..45668974b363 100644 --- a/mypy.ini +++ b/mypy.ini @@ -28,8 +28,6 @@ exclude = (?x) |synapse/storage/databases/main/cache.py |synapse/storage/databases/main/devices.py |synapse/storage/databases/main/event_federation.py - |synapse/storage/databases/main/push_rule.py - |synapse/storage/databases/main/roommember.py |synapse/storage/schema/ |tests/api/test_auth.py diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index 6d2f46318bea..dbe303ed9be8 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -15,7 +15,17 @@ import abc import logging from collections import OrderedDict -from typing import TYPE_CHECKING, Dict, Hashable, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Hashable, + Iterable, + List, + Optional, + Set, + Tuple, +) import attr from prometheus_client import Counter @@ -409,7 +419,7 @@ async def handle_event(event: EventBase) -> None: ) return - destinations: Optional[Set[str]] = None + destinations: Optional[Collection[str]] = None if not event.prev_event_ids(): # If there are no prev event IDs then the state is empty # and so no remote servers in the room @@ -444,7 +454,7 @@ async def handle_event(event: EventBase) -> None: ) return - destinations = { + sharded_destinations = { d for d in destinations if self._federation_shard_config.should_handle( @@ -456,12 +466,12 @@ async def handle_event(event: EventBase) -> None: # If we are sending the event on behalf of another server # then it already has the event and there is no reason to # send the event to it. - destinations.discard(send_on_behalf_of) + sharded_destinations.discard(send_on_behalf_of) - logger.debug("Sending %s to %r", event, destinations) + logger.debug("Sending %s to %r", event, sharded_destinations) - if destinations: - await self._send_pdu(event, destinations) + if sharded_destinations: + await self._send_pdu(event, sharded_destinations) now = self.clock.time_msec() ts = await self.store.get_received_ts(event.event_id) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4be08fe7cbc6..59b5d497be68 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -411,10 +411,10 @@ async def current_sync_for_user( set_tag(SynapseTags.SYNC_RESULT, bool(sync_result)) return sync_result - async def push_rules_for_user(self, user: UserID) -> JsonDict: + async def push_rules_for_user(self, user: UserID) -> Dict[str, Dict[str, list]]: user_id = user.to_string() - rules = await self.store.get_push_rules_for_user(user_id) - rules = format_push_rules_for_user(user, rules) + rules_raw = await self.store.get_push_rules_for_user(user_id) + rules = format_push_rules_for_user(user, rules_raw) return rules async def ephemeral_by_room( diff --git a/synapse/rest/client/push_rule.py b/synapse/rest/client/push_rule.py index b98640b14ac5..8191b4e32c34 100644 --- a/synapse/rest/client/push_rule.py +++ b/synapse/rest/client/push_rule.py @@ -148,9 +148,9 @@ async def on_GET(self, request: SynapseRequest, path: str) -> Tuple[int, JsonDic # we build up the full structure and then decide which bits of it # to send which means doing unnecessary work sometimes but is # is probably not going to make a whole lot of difference - rules = await self.store.get_push_rules_for_user(user_id) + rules_raw = await self.store.get_push_rules_for_user(user_id) - rules = format_push_rules_for_user(requester.user, rules) + rules = format_push_rules_for_user(requester.user, rules_raw) path_parts = path.split("/")[1:] diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 54e41d537584..0219091c4e8b 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -239,13 +239,13 @@ async def get_current_users_in_room( entry = await self.resolve_state_groups_for_events(room_id, latest_event_ids) return await self.store.get_joined_users_from_state(room_id, entry) - async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + async def get_current_hosts_in_room(self, room_id: str) -> FrozenSet[str]: event_ids = await self.store.get_latest_event_ids_in_room(room_id) return await self.get_hosts_in_room_at_events(room_id, event_ids) async def get_hosts_in_room_at_events( self, room_id: str, event_ids: Collection[str] - ) -> Set[str]: + ) -> FrozenSet[str]: """Get the hosts that were in a room at the given event ids Args: diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 5895b892024c..d545a1c002c9 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -26,11 +26,7 @@ from synapse.storage.databases.main.stats import UserSortOrder from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine from synapse.storage.types import Cursor -from synapse.storage.util.id_generators import ( - IdGenerator, - MultiWriterIdGenerator, - StreamIdGenerator, -) +from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator from synapse.types import JsonDict, get_domain_from_id from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -155,8 +151,6 @@ def __init__( ], ) - self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") - self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id") self._group_updates_id_gen = StreamIdGenerator( db_conn, "local_group_updates", "stream_id" ) diff --git a/synapse/storage/databases/main/metrics.py b/synapse/storage/databases/main/metrics.py index d03555a5857b..14294a0bb85a 100644 --- a/synapse/storage/databases/main/metrics.py +++ b/synapse/storage/databases/main/metrics.py @@ -14,16 +14,19 @@ import calendar import logging import time -from typing import TYPE_CHECKING, Dict +from typing import TYPE_CHECKING, Dict, List, Tuple, cast from synapse.metrics import GaugeBucketCollector from synapse.metrics.background_process_metrics import wrap_as_background_process from synapse.storage._base import SQLBaseStore -from synapse.storage.database import DatabasePool, LoggingDatabaseConnection +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.databases.main.event_push_actions import ( EventPushActionsWorkerStore, ) -from synapse.storage.types import Cursor if TYPE_CHECKING: from synapse.server import HomeServer @@ -73,7 +76,7 @@ def __init__( @wrap_as_background_process("read_forward_extremities") async def _read_forward_extremities(self) -> None: - def fetch(txn): + def fetch(txn: LoggingTransaction) -> List[Tuple[int, int]]: txn.execute( """ SELECT t1.c, t2.c @@ -86,7 +89,7 @@ def fetch(txn): ) t2 ON t1.room_id = t2.room_id """ ) - return txn.fetchall() + return cast(List[Tuple[int, int]], txn.fetchall()) res = await self.db_pool.runInteraction("read_forward_extremities", fetch) @@ -104,20 +107,20 @@ async def count_daily_e2ee_messages(self) -> int: call to this function, it will return None. """ - def _count_messages(txn): + def _count_messages(txn: LoggingTransaction) -> int: sql = """ SELECT COUNT(*) FROM events WHERE type = 'm.room.encrypted' AND stream_ordering > ? """ txn.execute(sql, (self.stream_ordering_day_ago,)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction("count_e2ee_messages", _count_messages) async def count_daily_sent_e2ee_messages(self) -> int: - def _count_messages(txn): + def _count_messages(txn: LoggingTransaction) -> int: # This is good enough as if you have silly characters in your own # hostname then that's your own fault. like_clause = "%:" + self.hs.hostname @@ -130,7 +133,7 @@ def _count_messages(txn): """ txn.execute(sql, (like_clause, self.stream_ordering_day_ago)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction( @@ -138,14 +141,14 @@ def _count_messages(txn): ) async def count_daily_active_e2ee_rooms(self) -> int: - def _count(txn): + def _count(txn: LoggingTransaction) -> int: sql = """ SELECT COUNT(DISTINCT room_id) FROM events WHERE type = 'm.room.encrypted' AND stream_ordering > ? """ txn.execute(sql, (self.stream_ordering_day_ago,)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction( @@ -160,20 +163,20 @@ async def count_daily_messages(self) -> int: call to this function, it will return None. """ - def _count_messages(txn): + def _count_messages(txn: LoggingTransaction) -> int: sql = """ SELECT COUNT(*) FROM events WHERE type = 'm.room.message' AND stream_ordering > ? """ txn.execute(sql, (self.stream_ordering_day_ago,)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction("count_messages", _count_messages) async def count_daily_sent_messages(self) -> int: - def _count_messages(txn): + def _count_messages(txn: LoggingTransaction) -> int: # This is good enough as if you have silly characters in your own # hostname then that's your own fault. like_clause = "%:" + self.hs.hostname @@ -186,7 +189,7 @@ def _count_messages(txn): """ txn.execute(sql, (like_clause, self.stream_ordering_day_ago)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction( @@ -194,14 +197,14 @@ def _count_messages(txn): ) async def count_daily_active_rooms(self) -> int: - def _count(txn): + def _count(txn: LoggingTransaction) -> int: sql = """ SELECT COUNT(DISTINCT room_id) FROM events WHERE type = 'm.room.message' AND stream_ordering > ? """ txn.execute(sql, (self.stream_ordering_day_ago,)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return count return await self.db_pool.runInteraction("count_daily_active_rooms", _count) @@ -227,7 +230,7 @@ async def count_monthly_users(self) -> int: "count_monthly_users", self._count_users, thirty_days_ago ) - def _count_users(self, txn: Cursor, time_from: int) -> int: + def _count_users(self, txn: LoggingTransaction, time_from: int) -> int: """ Returns number of users seen in the past time_from period """ @@ -242,7 +245,7 @@ def _count_users(self, txn: Cursor, time_from: int) -> int: # Mypy knows that fetchone() might return None if there are no rows. # We know better: "SELECT COUNT(...) FROM ..." without any GROUP BY always # returns exactly one row. - (count,) = txn.fetchone() # type: ignore[misc] + (count,) = cast(Tuple[int], txn.fetchone()) return count async def count_r30_users(self) -> Dict[str, int]: @@ -256,7 +259,7 @@ async def count_r30_users(self) -> Dict[str, int]: A mapping of counts globally as well as broken out by platform. """ - def _count_r30_users(txn): + def _count_r30_users(txn: LoggingTransaction) -> Dict[str, int]: thirty_days_in_secs = 86400 * 30 now = int(self._clock.time()) thirty_days_ago_in_secs = now - thirty_days_in_secs @@ -321,7 +324,7 @@ def _count_r30_users(txn): txn.execute(sql, (thirty_days_ago_in_secs, thirty_days_ago_in_secs)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) results["all"] = count return results @@ -348,7 +351,7 @@ async def count_r30v2_users(self) -> Dict[str, int]: - "web" (any web application -- it's not possible to distinguish Element Web here) """ - def _count_r30v2_users(txn): + def _count_r30v2_users(txn: LoggingTransaction) -> Dict[str, int]: thirty_days_in_secs = 86400 * 30 now = int(self._clock.time()) sixty_days_ago_in_secs = now - 2 * thirty_days_in_secs @@ -445,11 +448,8 @@ def _count_r30v2_users(txn): thirty_days_in_secs * 1000, ), ) - row = txn.fetchone() - if row is None: - results["all"] = 0 - else: - results["all"] = row[0] + (count,) = cast(Tuple[int], txn.fetchone()) + results["all"] = count return results @@ -471,7 +471,7 @@ async def generate_user_daily_visits(self) -> None: Generates daily visit data for use in cohort/ retention analysis """ - def _generate_user_daily_visits(txn): + def _generate_user_daily_visits(txn: LoggingTransaction) -> None: logger.info("Calling _generate_user_daily_visits") today_start = self._get_start_of_day() a_day_in_milliseconds = 24 * 60 * 60 * 1000 diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 4ed913e24879..0e2855fb446c 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -14,14 +14,18 @@ # limitations under the License. import abc import logging -from typing import TYPE_CHECKING, Dict, List, Tuple, Union +from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Tuple, Union, cast from synapse.api.errors import StoreError from synapse.config.homeserver import ExperimentalConfig from synapse.push.baserules import list_with_base_rules from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker from synapse.storage._base import SQLBaseStore, db_to_json -from synapse.storage.database import DatabasePool, LoggingDatabaseConnection +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) from synapse.storage.databases.main.appservice import ApplicationServiceWorkerStore from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.pusher import PusherWorkerStore @@ -30,9 +34,12 @@ from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.push_rule import InconsistentRuleException, RuleNotFoundException from synapse.storage.util.id_generators import ( + AbstractStreamIdGenerator, AbstractStreamIdTracker, + IdGenerator, StreamIdGenerator, ) +from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.descriptors import cached, cachedList from synapse.util.caches.stream_change_cache import StreamChangeCache @@ -57,7 +64,11 @@ def _is_experimental_rule_enabled( return True -def _load_rules(rawrules, enabled_map, experimental_config: ExperimentalConfig): +def _load_rules( + rawrules: List[JsonDict], + enabled_map: Dict[str, bool], + experimental_config: ExperimentalConfig, +) -> List[JsonDict]: ruleslist = [] for rawrule in rawrules: rule = dict(rawrule) @@ -137,7 +148,7 @@ def __init__( ) @abc.abstractmethod - def get_max_push_rules_stream_id(self): + def get_max_push_rules_stream_id(self) -> int: """Get the position of the push rules stream. Returns: @@ -146,7 +157,7 @@ def get_max_push_rules_stream_id(self): raise NotImplementedError() @cached(max_entries=5000) - async def get_push_rules_for_user(self, user_id): + async def get_push_rules_for_user(self, user_id: str) -> List[JsonDict]: rows = await self.db_pool.simple_select_list( table="push_rules", keyvalues={"user_name": user_id}, @@ -168,7 +179,7 @@ async def get_push_rules_for_user(self, user_id): return _load_rules(rows, enabled_map, self.hs.config.experimental) @cached(max_entries=5000) - async def get_push_rules_enabled_for_user(self, user_id) -> Dict[str, bool]: + async def get_push_rules_enabled_for_user(self, user_id: str) -> Dict[str, bool]: results = await self.db_pool.simple_select_list( table="push_rules_enable", keyvalues={"user_name": user_id}, @@ -184,13 +195,13 @@ async def have_push_rules_changed_for_user( return False else: - def have_push_rules_changed_txn(txn): + def have_push_rules_changed_txn(txn: LoggingTransaction) -> bool: sql = ( "SELECT COUNT(stream_id) FROM push_rules_stream" " WHERE user_id = ? AND ? < stream_id" ) txn.execute(sql, (user_id, last_id)) - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) return bool(count) return await self.db_pool.runInteraction( @@ -202,11 +213,13 @@ def have_push_rules_changed_txn(txn): list_name="user_ids", num_args=1, ) - async def bulk_get_push_rules(self, user_ids): + async def bulk_get_push_rules( + self, user_ids: Collection[str] + ) -> Dict[str, List[JsonDict]]: if not user_ids: return {} - results = {user_id: [] for user_id in user_ids} + results: Dict[str, List[JsonDict]] = {user_id: [] for user_id in user_ids} rows = await self.db_pool.simple_select_many_batch( table="push_rules", @@ -250,7 +263,7 @@ async def copy_push_rule_from_room_to_room( condition["pattern"] = new_room_id # Add the rule for the new room - await self.add_push_rule( + await self.add_push_rule( # type: ignore[attr-defined] user_id=user_id, rule_id=new_rule_id, priority_class=rule["priority_class"], @@ -286,11 +299,13 @@ async def copy_push_rules_from_room_to_room_for_user( list_name="user_ids", num_args=1, ) - async def bulk_get_push_rules_enabled(self, user_ids): + async def bulk_get_push_rules_enabled( + self, user_ids: Collection[str] + ) -> Dict[str, Dict[str, bool]]: if not user_ids: return {} - results = {user_id: {} for user_id in user_ids} + results: Dict[str, Dict[str, bool]] = {user_id: {} for user_id in user_ids} rows = await self.db_pool.simple_select_many_batch( table="push_rules_enable", @@ -306,7 +321,7 @@ async def bulk_get_push_rules_enabled(self, user_ids): async def get_all_push_rule_updates( self, instance_name: str, last_id: int, current_id: int, limit: int - ) -> Tuple[List[Tuple[int, tuple]], int, bool]: + ) -> Tuple[List[Tuple[int, Tuple[str]]], int, bool]: """Get updates for push_rules replication stream. Args: @@ -331,7 +346,9 @@ async def get_all_push_rule_updates( if last_id == current_id: return [], current_id, False - def get_all_push_rule_updates_txn(txn): + def get_all_push_rule_updates_txn( + txn: LoggingTransaction, + ) -> Tuple[List[Tuple[int, Tuple[str]]], int, bool]: sql = """ SELECT stream_id, user_id FROM push_rules_stream @@ -340,7 +357,10 @@ def get_all_push_rule_updates_txn(txn): LIMIT ? """ txn.execute(sql, (last_id, current_id, limit)) - updates = [(stream_id, (user_id,)) for stream_id, user_id in txn] + updates = cast( + List[Tuple[int, Tuple[str]]], + [(stream_id, (user_id,)) for stream_id, user_id in txn], + ) limited = False upper_bound = current_id @@ -356,15 +376,30 @@ def get_all_push_rule_updates_txn(txn): class PushRuleStore(PushRulesWorkerStore): + # Because we have write access, this will be a StreamIdGenerator + # (see PushRulesWorkerStore.__init__) + _push_rules_stream_id_gen: AbstractStreamIdGenerator + + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + self._push_rule_id_gen = IdGenerator(db_conn, "push_rules", "id") + self._push_rules_enable_id_gen = IdGenerator(db_conn, "push_rules_enable", "id") + async def add_push_rule( self, - user_id, - rule_id, - priority_class, - conditions, - actions, - before=None, - after=None, + user_id: str, + rule_id: str, + priority_class: int, + conditions: List[Dict[str, str]], + actions: List[Union[JsonDict, str]], + before: Optional[str] = None, + after: Optional[str] = None, ) -> None: conditions_json = json_encoder.encode(conditions) actions_json = json_encoder.encode(actions) @@ -400,17 +435,17 @@ async def add_push_rule( def _add_push_rule_relative_txn( self, - txn, - stream_id, - event_stream_ordering, - user_id, - rule_id, - priority_class, - conditions_json, - actions_json, - before, - after, - ): + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + priority_class: int, + conditions_json: str, + actions_json: str, + before: str, + after: str, + ) -> None: # Lock the table since otherwise we'll have annoying races between the # SELECT here and the UPSERT below. self.database_engine.lock_table(txn, "push_rules") @@ -470,15 +505,15 @@ def _add_push_rule_relative_txn( def _add_push_rule_highest_priority_txn( self, - txn, - stream_id, - event_stream_ordering, - user_id, - rule_id, - priority_class, - conditions_json, - actions_json, - ): + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + priority_class: int, + conditions_json: str, + actions_json: str, + ) -> None: # Lock the table since otherwise we'll have annoying races between the # SELECT here and the UPSERT below. self.database_engine.lock_table(txn, "push_rules") @@ -510,17 +545,17 @@ def _add_push_rule_highest_priority_txn( def _upsert_push_rule_txn( self, - txn, - stream_id, - event_stream_ordering, - user_id, - rule_id, - priority_class, - priority, - conditions_json, - actions_json, - update_stream=True, - ): + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + priority_class: int, + priority: int, + conditions_json: str, + actions_json: str, + update_stream: bool = True, + ) -> None: """Specialised version of simple_upsert_txn that picks a push_rule_id using the _push_rule_id_gen if it needs to insert the rule. It assumes that the "push_rules" table is locked""" @@ -600,7 +635,11 @@ async def delete_push_rule(self, user_id: str, rule_id: str) -> None: rule_id: The rule_id of the rule to be deleted """ - def delete_push_rule_txn(txn, stream_id, event_stream_ordering): + def delete_push_rule_txn( + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + ) -> None: # we don't use simple_delete_one_txn because that would fail if the # user did not have a push_rule_enable row. self.db_pool.simple_delete_txn( @@ -661,14 +700,14 @@ async def set_push_rule_enabled( def _set_push_rule_enabled_txn( self, - txn, - stream_id, - event_stream_ordering, - user_id, - rule_id, - enabled, - is_default_rule, - ): + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + enabled: bool, + is_default_rule: bool, + ) -> None: new_id = self._push_rules_enable_id_gen.get_next() if not is_default_rule: @@ -740,7 +779,11 @@ async def set_push_rule_actions( """ actions_json = json_encoder.encode(actions) - def set_push_rule_actions_txn(txn, stream_id, event_stream_ordering): + def set_push_rule_actions_txn( + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + ) -> None: if is_default_rule: # Add a dummy rule to the rules table with the user specified # actions. @@ -794,8 +837,15 @@ def set_push_rule_actions_txn(txn, stream_id, event_stream_ordering): ) def _insert_push_rules_update_txn( - self, txn, stream_id, event_stream_ordering, user_id, rule_id, op, data=None - ): + self, + txn: LoggingTransaction, + stream_id: int, + event_stream_ordering: int, + user_id: str, + rule_id: str, + op: str, + data: Optional[JsonDict] = None, + ) -> None: values = { "stream_id": stream_id, "event_stream_ordering": event_stream_ordering, @@ -814,5 +864,5 @@ def _insert_push_rules_update_txn( self.push_rules_stream_cache.entity_has_changed, user_id, stream_id ) - def get_max_push_rules_stream_id(self): + def get_max_push_rules_stream_id(self) -> int: return self._push_rules_stream_id_gen.get_current_token() diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 48e83592e728..608d40dfa164 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -37,7 +37,12 @@ wrap_as_background_process, ) from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause -from synapse.storage.database import DatabasePool, LoggingDatabaseConnection +from synapse.storage.database import ( + DatabasePool, + LoggingDatabaseConnection, + LoggingTransaction, +) +from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.engines import Sqlite3Engine from synapse.storage.roommember import ( @@ -46,7 +51,7 @@ ProfileInfo, RoomsForUser, ) -from synapse.types import PersistedEventPosition, get_domain_from_id +from synapse.types import JsonDict, PersistedEventPosition, StateMap, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import intern_string from synapse.util.caches.descriptors import _CacheContext, cached, cachedList @@ -115,7 +120,7 @@ def __init__( ) @wrap_as_background_process("_count_known_servers") - async def _count_known_servers(self): + async def _count_known_servers(self) -> int: """ Count the servers that this server knows about. @@ -123,7 +128,7 @@ async def _count_known_servers(self): `synapse_federation_known_servers` LaterGauge to collect. """ - def _transact(txn): + def _transact(txn: LoggingTransaction) -> int: if isinstance(self.database_engine, Sqlite3Engine): query = """ SELECT COUNT(DISTINCT substr(out.user_id, pos+1)) @@ -150,7 +155,9 @@ def _transact(txn): self._known_servers_count = max([count, 1]) return self._known_servers_count - def _check_safe_current_state_events_membership_updated_txn(self, txn): + def _check_safe_current_state_events_membership_updated_txn( + self, txn: LoggingTransaction + ) -> None: """Checks if it is safe to assume the new current_state_events membership column is up to date """ @@ -182,7 +189,7 @@ async def get_users_in_room(self, room_id: str) -> List[str]: "get_users_in_room", self.get_users_in_room_txn, room_id ) - def get_users_in_room_txn(self, txn, room_id: str) -> List[str]: + def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]: # If we can assume current_state_events.membership is up to date # then we can avoid a join, which is a Very Good Thing given how # frequently this function gets called. @@ -222,7 +229,9 @@ async def get_users_in_room_with_profiles( A mapping from user ID to ProfileInfo. """ - def _get_users_in_room_with_profiles(txn) -> Dict[str, ProfileInfo]: + def _get_users_in_room_with_profiles( + txn: LoggingTransaction, + ) -> Dict[str, ProfileInfo]: sql = """ SELECT state_key, display_name, avatar_url FROM room_memberships as m INNER JOIN current_state_events as c @@ -250,7 +259,9 @@ async def get_room_summary(self, room_id: str) -> Dict[str, MemberSummary]: dict of membership states, pointing to a MemberSummary named tuple. """ - def _get_room_summary_txn(txn): + def _get_room_summary_txn( + txn: LoggingTransaction, + ) -> Dict[str, MemberSummary]: # first get counts. # We do this all in one transaction to keep the cache small. # FIXME: get rid of this when we have room_stats @@ -279,7 +290,7 @@ def _get_room_summary_txn(txn): """ txn.execute(sql, (room_id,)) - res = {} + res: Dict[str, MemberSummary] = {} for count, membership in txn: res.setdefault(membership, MemberSummary([], count)) @@ -400,7 +411,7 @@ async def get_rooms_for_local_user_where_membership_is( def _get_rooms_for_local_user_where_membership_is_txn( self, - txn, + txn: LoggingTransaction, user_id: str, membership_list: List[str], ) -> List[RoomsForUser]: @@ -488,7 +499,7 @@ async def get_rooms_for_user_with_stream_ordering( ) def _get_rooms_for_user_with_stream_ordering_txn( - self, txn, user_id: str + self, txn: LoggingTransaction, user_id: str ) -> FrozenSet[GetRoomsForUserWithStreamOrdering]: # We use `current_state_events` here and not `local_current_membership` # as a) this gets called with remote users and b) this only gets called @@ -542,7 +553,7 @@ async def get_rooms_for_users_with_stream_ordering( ) def _get_rooms_for_users_with_stream_ordering_txn( - self, txn, user_ids: Collection[str] + self, txn: LoggingTransaction, user_ids: Collection[str] ) -> Dict[str, FrozenSet[GetRoomsForUserWithStreamOrdering]]: clause, args = make_in_list_sql_clause( @@ -575,7 +586,9 @@ def _get_rooms_for_users_with_stream_ordering_txn( txn.execute(sql, [Membership.JOIN] + args) - result = {user_id: set() for user_id in user_ids} + result: Dict[str, Set[GetRoomsForUserWithStreamOrdering]] = { + user_id: set() for user_id in user_ids + } for user_id, room_id, instance, stream_id in txn: result[user_id].add( GetRoomsForUserWithStreamOrdering( @@ -595,7 +608,9 @@ async def get_users_server_still_shares_room_with( if not user_ids: return set() - def _get_users_server_still_shares_room_with_txn(txn): + def _get_users_server_still_shares_room_with_txn( + txn: LoggingTransaction, + ) -> Set[str]: sql = """ SELECT state_key FROM current_state_events WHERE @@ -657,7 +672,7 @@ async def get_users_who_share_room_with_user( async def get_joined_users_from_context( self, event: EventBase, context: EventContext ) -> Dict[str, ProfileInfo]: - state_group = context.state_group + state_group: Union[object, int] = context.state_group if not state_group: # If state_group is None it means it has yet to be assigned a # state group, i.e. we need to make sure that calls with a state_group @@ -666,14 +681,16 @@ async def get_joined_users_from_context( state_group = object() current_state_ids = await context.get_current_state_ids() + assert current_state_ids is not None + assert state_group is not None return await self._get_joined_users_from_context( event.room_id, state_group, current_state_ids, event=event, context=context ) async def get_joined_users_from_state( - self, room_id, state_entry + self, room_id: str, state_entry: "_StateCacheEntry" ) -> Dict[str, ProfileInfo]: - state_group = state_entry.state_group + state_group: Union[object, int] = state_entry.state_group if not state_group: # If state_group is None it means it has yet to be assigned a # state group, i.e. we need to make sure that calls with a state_group @@ -681,6 +698,7 @@ async def get_joined_users_from_state( # To do this we set the state_group to a new object as object() != object() state_group = object() + assert state_group is not None with Measure(self._clock, "get_joined_users_from_state"): return await self._get_joined_users_from_context( room_id, state_group, state_entry.state, context=state_entry @@ -689,12 +707,12 @@ async def get_joined_users_from_state( @cached(num_args=2, cache_context=True, iterable=True, max_entries=100000) async def _get_joined_users_from_context( self, - room_id, - state_group, - current_state_ids, - cache_context, - event=None, - context=None, + room_id: str, + state_group: Union[object, int], + current_state_ids: StateMap[str], + cache_context: _CacheContext, + event: Optional[EventBase] = None, + context: Optional[Union[EventContext, "_StateCacheEntry"]] = None, ) -> Dict[str, ProfileInfo]: # We don't use `state_group`, it's there so that we can cache based # on it. However, it's important that it's never None, since two current_states @@ -765,14 +783,18 @@ async def _get_joined_users_from_context( return users_in_room @cached(max_entries=10000) - def _get_joined_profile_from_event_id(self, event_id): + def _get_joined_profile_from_event_id( + self, event_id: str + ) -> Optional[Tuple[str, ProfileInfo]]: raise NotImplementedError() @cachedList( cached_method_name="_get_joined_profile_from_event_id", list_name="event_ids", ) - async def _get_joined_profiles_from_event_ids(self, event_ids: Iterable[str]): + async def _get_joined_profiles_from_event_ids( + self, event_ids: Iterable[str] + ) -> Dict[str, Optional[Tuple[str, ProfileInfo]]]: """For given set of member event_ids check if they point to a join event and if so return the associated user and profile info. @@ -780,8 +802,7 @@ async def _get_joined_profiles_from_event_ids(self, event_ids: Iterable[str]): event_ids: The member event IDs to lookup Returns: - dict[str, Tuple[str, ProfileInfo]|None]: Map from event ID - to `user_id` and ProfileInfo (or None if not join event). + Map from event ID to `user_id` and ProfileInfo (or None if not join event). """ rows = await self.db_pool.simple_select_many_batch( @@ -847,8 +868,10 @@ async def _check_host_room_membership( return True - async def get_joined_hosts(self, room_id: str, state_entry): - state_group = state_entry.state_group + async def get_joined_hosts( + self, room_id: str, state_entry: "_StateCacheEntry" + ) -> FrozenSet[str]: + state_group: Union[object, int] = state_entry.state_group if not state_group: # If state_group is None it means it has yet to be assigned a # state group, i.e. we need to make sure that calls with a state_group @@ -856,6 +879,7 @@ async def get_joined_hosts(self, room_id: str, state_entry): # To do this we set the state_group to a new object as object() != object() state_group = object() + assert state_group is not None with Measure(self._clock, "get_joined_hosts"): return await self._get_joined_hosts( room_id, state_group, state_entry=state_entry @@ -863,7 +887,10 @@ async def get_joined_hosts(self, room_id: str, state_entry): @cached(num_args=2, max_entries=10000, iterable=True) async def _get_joined_hosts( - self, room_id: str, state_group: int, state_entry: "_StateCacheEntry" + self, + room_id: str, + state_group: Union[object, int], + state_entry: "_StateCacheEntry", ) -> FrozenSet[str]: # We don't use `state_group`, it's there so that we can cache based on # it. However, its important that its never None, since two @@ -881,7 +908,7 @@ async def _get_joined_hosts( # `get_joined_hosts` is called with the "current" state group for the # room, and so consecutive calls will be for consecutive state groups # which point to the previous state group. - cache = await self._get_joined_hosts_cache(room_id) + cache = await self._get_joined_hosts_cache(room_id) # type: ignore[misc] # If the state group in the cache matches, we already have the data we need. if state_entry.state_group == cache.state_group: @@ -897,6 +924,7 @@ async def _get_joined_hosts( elif state_entry.prev_group == cache.state_group: # The cached work is for the previous state group, so we work out # the delta. + assert state_entry.delta_ids is not None for (typ, state_key), event_id in state_entry.delta_ids.items(): if typ != EventTypes.Member: continue @@ -942,7 +970,7 @@ async def did_forget(self, user_id: str, room_id: str) -> bool: Returns False if they have since re-joined.""" - def f(txn): + def f(txn: LoggingTransaction) -> int: sql = ( "SELECT" " COUNT(*)" @@ -973,7 +1001,7 @@ async def get_forgotten_rooms_for_user(self, user_id: str) -> Set[str]: The forgotten rooms. """ - def _get_forgotten_rooms_for_user_txn(txn): + def _get_forgotten_rooms_for_user_txn(txn: LoggingTransaction) -> Set[str]: # This is a slightly convoluted query that first looks up all rooms # that the user has forgotten in the past, then rechecks that list # to see if any have subsequently been updated. This is done so that @@ -1076,7 +1104,9 @@ async def is_local_host_in_room_ignoring_users( clause, ) - def _is_local_host_in_room_ignoring_users_txn(txn): + def _is_local_host_in_room_ignoring_users_txn( + txn: LoggingTransaction, + ) -> bool: txn.execute(sql, (room_id, Membership.JOIN, *args)) return bool(txn.fetchone()) @@ -1110,15 +1140,17 @@ def __init__( where_clause="forgotten = 1", ) - async def _background_add_membership_profile(self, progress, batch_size): + async def _background_add_membership_profile( + self, progress: JsonDict, batch_size: int + ) -> int: target_min_stream_id = progress.get( - "target_min_stream_id_inclusive", self._min_stream_order_on_start + "target_min_stream_id_inclusive", self._min_stream_order_on_start # type: ignore[attr-defined] ) max_stream_id = progress.get( - "max_stream_id_exclusive", self._stream_order_on_start + 1 + "max_stream_id_exclusive", self._stream_order_on_start + 1 # type: ignore[attr-defined] ) - def add_membership_profile_txn(txn): + def add_membership_profile_txn(txn: LoggingTransaction) -> int: sql = """ SELECT stream_ordering, event_id, events.room_id, event_json.json FROM events @@ -1182,13 +1214,17 @@ def add_membership_profile_txn(txn): return result - async def _background_current_state_membership(self, progress, batch_size): + async def _background_current_state_membership( + self, progress: JsonDict, batch_size: int + ) -> int: """Update the new membership column on current_state_events. This works by iterating over all rooms in alphebetical order. """ - def _background_current_state_membership_txn(txn, last_processed_room): + def _background_current_state_membership_txn( + txn: LoggingTransaction, last_processed_room: str + ) -> Tuple[int, bool]: processed = 0 while processed < batch_size: txn.execute( @@ -1242,7 +1278,11 @@ def _background_current_state_membership_txn(txn, last_processed_room): return row_count -class RoomMemberStore(RoomMemberWorkerStore, RoomMemberBackgroundUpdateStore): +class RoomMemberStore( + RoomMemberWorkerStore, + RoomMemberBackgroundUpdateStore, + CacheInvalidationWorkerStore, +): def __init__( self, database: DatabasePool, @@ -1254,7 +1294,7 @@ def __init__( async def forget(self, user_id: str, room_id: str) -> None: """Indicate that user_id wishes to discard history for room_id.""" - def f(txn): + def f(txn: LoggingTransaction) -> None: sql = ( "UPDATE" " room_memberships" @@ -1288,5 +1328,5 @@ class _JoinedHostsCache: # equal to anything else). state_group: Union[object, int] = attr.Factory(object) - def __len__(self): + def __len__(self) -> int: return sum(len(v) for v in self.hosts_to_joined_users.values()) From 5331fb5b478789a3ffaaeddb58f8d1cefd42a9eb Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 17 May 2022 17:06:45 +0100 Subject: [PATCH 045/181] allow `on_invalidate=None` in `@cached` methods (#12769) --- changelog.d/12769.misc | 1 + scripts-dev/mypy_synapse_plugin.py | 25 +++++++++++++------- synapse/storage/databases/main/roommember.py | 3 ++- 3 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 changelog.d/12769.misc diff --git a/changelog.d/12769.misc b/changelog.d/12769.misc new file mode 100644 index 000000000000..27bd53abe376 --- /dev/null +++ b/changelog.d/12769.misc @@ -0,0 +1 @@ +Tweak the mypy plugin so that `@cached` can accept `on_invalidate=None`. diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py index c775865212ee..d08517a95382 100644 --- a/scripts-dev/mypy_synapse_plugin.py +++ b/scripts-dev/mypy_synapse_plugin.py @@ -21,7 +21,7 @@ from mypy.nodes import ARG_NAMED_OPT from mypy.plugin import MethodSigContext, Plugin from mypy.typeops import bind_self -from mypy.types import CallableType, NoneType +from mypy.types import CallableType, NoneType, UnionType class SynapsePlugin(Plugin): @@ -72,13 +72,20 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: # Third, we add an optional "on_invalidate" argument. # - # This is a callable which accepts no input and returns nothing. - calltyp = CallableType( - arg_types=[], - arg_kinds=[], - arg_names=[], - ret_type=NoneType(), - fallback=ctx.api.named_generic_type("builtins.function", []), + # This is a either + # - a callable which accepts no input and returns nothing, or + # - None. + calltyp = UnionType( + [ + NoneType(), + CallableType( + arg_types=[], + arg_kinds=[], + arg_names=[], + ret_type=NoneType(), + fallback=ctx.api.named_generic_type("builtins.function", []), + ), + ] ) arg_types.append(calltyp) @@ -95,7 +102,7 @@ def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: def plugin(version: str) -> Type[SynapsePlugin]: - # This is the entry point of the plugin, and let's us deal with the fact + # This is the entry point of the plugin, and lets us deal with the fact # that the mypy plugin interface is *not* stable by looking at the version # string. # diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 608d40dfa164..cc528fcf2dae 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -15,6 +15,7 @@ import logging from typing import ( TYPE_CHECKING, + Callable, Collection, Dict, FrozenSet, @@ -634,7 +635,7 @@ def _get_users_server_still_shares_room_with_txn( ) async def get_rooms_for_user( - self, user_id: str, on_invalidate=None + self, user_id: str, on_invalidate: Optional[Callable[[], None]] = None ) -> FrozenSet[str]: """Returns a set of room_ids the user is currently joined to. From 182ca78a12c4ae0f37726d43d5e592d669d99ee1 Mon Sep 17 00:00:00 2001 From: Mathieu Velten Date: Tue, 17 May 2022 19:01:06 +0200 Subject: [PATCH 046/181] Delete events from federation_inbound_events_staging table on purge (#12770) --- changelog.d/12770.bugfix | 1 + synapse/storage/databases/main/purge_events.py | 1 + tests/rest/admin/test_room.py | 1 + 3 files changed, 3 insertions(+) create mode 100644 changelog.d/12770.bugfix diff --git a/changelog.d/12770.bugfix b/changelog.d/12770.bugfix new file mode 100644 index 000000000000..a958f9a16ba3 --- /dev/null +++ b/changelog.d/12770.bugfix @@ -0,0 +1 @@ +Delete events from the `federation_inbound_events_staging` table when a room is purged through the admin API. diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 38ba91af4c47..c94d5f9f812b 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -417,6 +417,7 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]: "room_account_data", "room_tags", "local_current_membership", + "federation_inbound_events_staging", ): logger.info("[purge] removing %s from %s", room_id, table) txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,)) diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 95282f078e77..608d3f2dc36a 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -2489,4 +2489,5 @@ def _block_room(self, room_id: str) -> None: "room_tags", # "state_groups", # Current impl leaves orphaned state groups around. "state_groups_state", + "federation_inbound_events_staging", ] From 0d17357fcdded3fa3f8a37db7b6b9aa0402a10ed Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 17 May 2022 19:05:53 +0100 Subject: [PATCH 047/181] Suggest using docker when testing against postgres (#12765) Co-authored-by: Sean Quah <8349537+squahtx@users.noreply.github.com> --- changelog.d/12765.doc | 1 + docs/development/contributing_guide.md | 31 +++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 changelog.d/12765.doc diff --git a/changelog.d/12765.doc b/changelog.d/12765.doc new file mode 100644 index 000000000000..277b037d6b03 --- /dev/null +++ b/changelog.d/12765.doc @@ -0,0 +1 @@ +Recommend using docker to run tests against postgres. diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md index d356c72bf780..f55a1fbb9002 100644 --- a/docs/development/contributing_guide.md +++ b/docs/development/contributing_guide.md @@ -206,7 +206,32 @@ This means that we need to run our unit tests against PostgreSQL too. Our CI doe this automatically for pull requests and release candidates, but it's sometimes useful to reproduce this locally. -To do so, [configure Postgres](../postgres.md) and run `trial` with the +#### Using Docker + +The easiest way to do so is to run Postgres via a docker container. In one +terminal: + +```shell +docker run --rm -e POSTGRES_PASSWORD=mysecretpassword -e POSTGRES_USER=postgres -e POSTGRES_DB=postgress -p 5432:5432 postgres:14 +``` + +If you see an error like + +``` +docker: Error response from daemon: driver failed programming external connectivity on endpoint nice_ride (b57bbe2e251b70015518d00c9981e8cb8346b5c785250341a6c53e3c899875f1): Error starting userland proxy: listen tcp4 0.0.0.0:5432: bind: address already in use. +``` + +then something is already bound to port 5432. You're probably already running postgres locally. + +Once you have a postgres server running, invoke `trial` in a second terminal: + +```shell +SYNAPSE_POSTGRES=1 SYNAPSE_POSTGRES_HOST=127.0.0.1 SYNAPSE_POSTGRES_USER=postgres SYNAPSE_POSTGRES_PASSWORD=mysecretpassword poetry run trial tests +```` + +#### Using an existing Postgres installation + +If you have postgres already installed on your system, you can run `trial` with the following environment variables matching your configuration: - `SYNAPSE_POSTGRES` to anything nonempty @@ -229,8 +254,8 @@ You don't need to specify the host, user, port or password if your Postgres server is set to authenticate you over the UNIX socket (i.e. if the `psql` command works without further arguments). -Your Postgres account needs to be able to create databases. - +Your Postgres account needs to be able to create databases; see the postgres +docs for [`ALTER ROLE`](https://www.postgresql.org/docs/current/sql-alterrole.html). ## Run the integration tests ([Sytest](/~https://github.com/matrix-org/sytest)). From 37935b5183ab3cbee2f80359d80b1ff2176428f0 Mon Sep 17 00:00:00 2001 From: Adam <65660516+ajr0d@users.noreply.github.com> Date: Wed, 18 May 2022 10:37:48 +0100 Subject: [PATCH 048/181] Move methods that call add_push_rule to PushRuleStore (#12772) Signed-off-by: Adam Roddick --- changelog.d/12772.misc | 1 + synapse/storage/databases/main/push_rule.py | 102 ++++++++++---------- 2 files changed, 52 insertions(+), 51 deletions(-) create mode 100644 changelog.d/12772.misc diff --git a/changelog.d/12772.misc b/changelog.d/12772.misc new file mode 100644 index 000000000000..da66f376fe8e --- /dev/null +++ b/changelog.d/12772.misc @@ -0,0 +1 @@ +Move methods that call `add_push_rule` to the `PushRuleStore` class. diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 0e2855fb446c..ad67901cc1ac 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -243,57 +243,6 @@ async def bulk_get_push_rules( return results - async def copy_push_rule_from_room_to_room( - self, new_room_id: str, user_id: str, rule: dict - ) -> None: - """Copy a single push rule from one room to another for a specific user. - - Args: - new_room_id: ID of the new room. - user_id : ID of user the push rule belongs to. - rule: A push rule. - """ - # Create new rule id - rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1]) - new_rule_id = rule_id_scope + "/" + new_room_id - - # Change room id in each condition - for condition in rule.get("conditions", []): - if condition.get("key") == "room_id": - condition["pattern"] = new_room_id - - # Add the rule for the new room - await self.add_push_rule( # type: ignore[attr-defined] - user_id=user_id, - rule_id=new_rule_id, - priority_class=rule["priority_class"], - conditions=rule["conditions"], - actions=rule["actions"], - ) - - async def copy_push_rules_from_room_to_room_for_user( - self, old_room_id: str, new_room_id: str, user_id: str - ) -> None: - """Copy all of the push rules from one room to another for a specific - user. - - Args: - old_room_id: ID of the old room. - new_room_id: ID of the new room. - user_id: ID of user to copy push rules for. - """ - # Retrieve push rules for this user - user_push_rules = await self.get_push_rules_for_user(user_id) - - # Get rules relating to the old room and copy them to the new room - for rule in user_push_rules: - conditions = rule.get("conditions", []) - if any( - (c.get("key") == "room_id" and c.get("pattern") == old_room_id) - for c in conditions - ): - await self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule) - @cachedList( cached_method_name="get_push_rules_enabled_for_user", list_name="user_ids", @@ -866,3 +815,54 @@ def _insert_push_rules_update_txn( def get_max_push_rules_stream_id(self) -> int: return self._push_rules_stream_id_gen.get_current_token() + + async def copy_push_rule_from_room_to_room( + self, new_room_id: str, user_id: str, rule: dict + ) -> None: + """Copy a single push rule from one room to another for a specific user. + + Args: + new_room_id: ID of the new room. + user_id : ID of user the push rule belongs to. + rule: A push rule. + """ + # Create new rule id + rule_id_scope = "/".join(rule["rule_id"].split("/")[:-1]) + new_rule_id = rule_id_scope + "/" + new_room_id + + # Change room id in each condition + for condition in rule.get("conditions", []): + if condition.get("key") == "room_id": + condition["pattern"] = new_room_id + + # Add the rule for the new room + await self.add_push_rule( + user_id=user_id, + rule_id=new_rule_id, + priority_class=rule["priority_class"], + conditions=rule["conditions"], + actions=rule["actions"], + ) + + async def copy_push_rules_from_room_to_room_for_user( + self, old_room_id: str, new_room_id: str, user_id: str + ) -> None: + """Copy all of the push rules from one room to another for a specific + user. + + Args: + old_room_id: ID of the old room. + new_room_id: ID of the new room. + user_id: ID of user to copy push rules for. + """ + # Retrieve push rules for this user + user_push_rules = await self.get_push_rules_for_user(user_id) + + # Get rules relating to the old room and copy them to the new room + for rule in user_push_rules: + conditions = rule.get("conditions", []) + if any( + (c.get("key") == "room_id" and c.get("pattern") == old_room_id) + for c in conditions + ): + await self.copy_push_rule_from_room_to_room(new_room_id, user_id, rule) From 8afb7b55d0527f8c6af7690b162ebaabe9b5d9f5 Mon Sep 17 00:00:00 2001 From: Hubert Chathi Date: Wed, 18 May 2022 06:19:30 -0400 Subject: [PATCH 049/181] Make handling of federation Authorization header (more) compliant with RFC7230 (#12774) The main differences are: - values with delimiters (such as colons) should be quoted, so always quote the origin, since it could contain a colon followed by a port number - should allow more than one space after "X-Matrix" - quoted values with backslash-escaped characters should be unescaped - names should be case insensitive --- changelog.d/12774.misc | 1 + synapse/federation/transport/server/_base.py | 8 +++-- synapse/http/matrixfederationclient.py | 2 +- .../federation/transport/server/test__base.py | 29 ++++++++++++++++++- 4 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 changelog.d/12774.misc diff --git a/changelog.d/12774.misc b/changelog.d/12774.misc new file mode 100644 index 000000000000..8651f2e0e062 --- /dev/null +++ b/changelog.d/12774.misc @@ -0,0 +1 @@ +Make handling of federation Authorization header (more) compliant with RFC7230. diff --git a/synapse/federation/transport/server/_base.py b/synapse/federation/transport/server/_base.py index 103861644a70..84100a5a5257 100644 --- a/synapse/federation/transport/server/_base.py +++ b/synapse/federation/transport/server/_base.py @@ -169,14 +169,16 @@ def _parse_auth_header(header_bytes: bytes) -> Tuple[str, str, str, Optional[str """ try: header_str = header_bytes.decode("utf-8") - params = header_str.split(" ")[1].split(",") + params = re.split(" +", header_str)[1].split(",") param_dict: Dict[str, str] = { - k: v for k, v in [param.split("=", maxsplit=1) for param in params] + k.lower(): v for k, v in [param.split("=", maxsplit=1) for param in params] } def strip_quotes(value: str) -> str: if value.startswith('"'): - return value[1:-1] + return re.sub( + "\\\\(.)", lambda matchobj: matchobj.group(1), value[1:-1] + ) else: return value diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 725b5c33b8c5..0b9475debdb1 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -747,7 +747,7 @@ def build_auth_headers( for key, sig in request["signatures"][self.server_name].items(): auth_headers.append( ( - 'X-Matrix origin=%s,key="%s",sig="%s",destination="%s"' + 'X-Matrix origin="%s",key="%s",sig="%s",destination="%s"' % ( self.server_name, key, diff --git a/tests/federation/transport/server/test__base.py b/tests/federation/transport/server/test__base.py index ac3695a8ccab..e63885c1c9d8 100644 --- a/tests/federation/transport/server/test__base.py +++ b/tests/federation/transport/server/test__base.py @@ -17,7 +17,7 @@ from synapse.api.errors import Codes from synapse.federation.transport.server import BaseFederationServlet -from synapse.federation.transport.server._base import Authenticator +from synapse.federation.transport.server._base import Authenticator, _parse_auth_header from synapse.http.server import JsonResource, cancellable from synapse.server import HomeServer from synapse.types import JsonDict @@ -112,3 +112,30 @@ def test_uncancellable_disconnect(self) -> None: expect_cancellation=False, expected_body={"result": True}, ) + + +class BaseFederationAuthorizationTests(unittest.TestCase): + def test_authorization_header(self) -> None: + """Tests that the Authorization header is parsed correctly.""" + + # test a "normal" Authorization header + self.assertEqual( + _parse_auth_header( + b'X-Matrix origin=foo,key="ed25519:1",sig="sig",destination="bar"' + ), + ("foo", "ed25519:1", "sig", "bar"), + ) + # test an Authorization with extra spaces, upper-case names, and escaped + # characters + self.assertEqual( + _parse_auth_header( + b'X-Matrix ORIGIN=foo,KEY="ed25\\519:1",SIG="sig",destination="bar"' + ), + ("foo", "ed25519:1", "sig", "bar"), + ) + self.assertEqual( + _parse_auth_header( + b'X-Matrix origin=foo,key="ed25519:1",sig="sig",destination="bar",extra_field=ignored' + ), + ("foo", "ed25519:1", "sig", "bar"), + ) From d4713d3e335b21d12284ddd8ebd00e38abcfd521 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 18 May 2022 11:28:14 +0100 Subject: [PATCH 050/181] Discard null-containing strings before updating the user directory (#12762) --- changelog.d/12762.misc | 1 + synapse/rest/client/room.py | 4 +-- synapse/storage/databases/main/events.py | 4 +-- .../storage/databases/main/user_directory.py | 9 +++--- synapse/util/stringutils.py | 10 ++++++- tests/handlers/test_user_directory.py | 28 +++++++++++++++++++ 6 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 changelog.d/12762.misc diff --git a/changelog.d/12762.misc b/changelog.d/12762.misc new file mode 100644 index 000000000000..990fb6fe74eb --- /dev/null +++ b/changelog.d/12762.misc @@ -0,0 +1 @@ +Fix a long-standing bug where the user directory background process would fail to make forward progress if a user included a null codepoint in their display name or avatar. diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 4b8bfbffcb36..5a2361a2e691 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -109,10 +109,10 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() def register(self, http_server: HttpServer) -> None: - # /room/$roomid/state/$eventtype + # /rooms/$roomid/state/$eventtype no_state_key = "/rooms/(?P[^/]*)/state/(?P[^/]*)$" - # /room/$roomid/state/$eventtype/$statekey + # /rooms/$roomid/state/$eventtype/$statekey state_key = ( "/rooms/(?P[^/]*)/state/" "(?P[^/]*)/(?P[^/]*)$" diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 42d484dc98d9..0df8ff53957a 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -52,6 +52,7 @@ from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically +from synapse.util.stringutils import non_null_str_or_none if TYPE_CHECKING: from synapse.server import HomeServer @@ -1728,9 +1729,6 @@ def _store_room_members_txn( not affect the current local state. """ - def non_null_str_or_none(val: Any) -> Optional[str]: - return val if isinstance(val, str) and "\u0000" not in val else None - self.db_pool.simple_insert_many_txn( txn, table="room_memberships", diff --git a/synapse/storage/databases/main/user_directory.py b/synapse/storage/databases/main/user_directory.py index df772d472102..028db69af301 100644 --- a/synapse/storage/databases/main/user_directory.py +++ b/synapse/storage/databases/main/user_directory.py @@ -29,6 +29,7 @@ from typing_extensions import TypedDict from synapse.api.errors import StoreError +from synapse.util.stringutils import non_null_str_or_none if TYPE_CHECKING: from synapse.server import HomeServer @@ -469,11 +470,9 @@ async def update_profile_in_user_dir( """ Update or add a user's profile in the user directory. """ - # If the display name or avatar URL are unexpected types, overwrite them. - if not isinstance(display_name, str): - display_name = None - if not isinstance(avatar_url, str): - avatar_url = None + # If the display name or avatar URL are unexpected types, replace with None. + display_name = non_null_str_or_none(display_name) + avatar_url = non_null_str_or_none(avatar_url) def _update_profile_in_user_dir_txn(txn: LoggingTransaction) -> None: self.db_pool.simple_upsert_txn( diff --git a/synapse/util/stringutils.py b/synapse/util/stringutils.py index b26546aecdb7..27a363d7e516 100644 --- a/synapse/util/stringutils.py +++ b/synapse/util/stringutils.py @@ -16,7 +16,7 @@ import re import secrets import string -from typing import Iterable, Optional, Tuple +from typing import Any, Iterable, Optional, Tuple from netaddr import valid_ipv6 @@ -247,3 +247,11 @@ def base62_encode(num: int, minwidth: int = 1) -> str: # pad to minimum width pad = "0" * (minwidth - len(res)) return pad + res + + +def non_null_str_or_none(val: Any) -> Optional[str]: + """Check that the arg is a string containing no null (U+0000) codepoints. + + If so, returns the given string unmodified; otherwise, returns None. + """ + return val if isinstance(val, str) and "\u0000" not in val else None diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 96e2e3039ba8..4d658d29cab5 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -1007,6 +1007,34 @@ def test_local_user_leaving_room_remains_in_user_directory(self) -> None: self.assertEqual(in_public, {(bob, room1), (bob, room2)}) self.assertEqual(in_private, set()) + def test_ignore_display_names_with_null_codepoints(self) -> None: + MXC_DUMMY = "mxc://dummy" + + # Alice creates a public room. + alice = self.register_user("alice", "pass") + + # Alice has a user directory entry to start with. + self.assertIn( + alice, + self.get_success(self.user_dir_helper.get_profiles_in_user_directory()), + ) + + # Alice changes her name to include a null codepoint. + self.get_success( + self.hs.get_user_directory_handler().handle_local_profile_change( + alice, + ProfileInfo( + display_name="abcd\u0000efgh", + avatar_url=MXC_DUMMY, + ), + ) + ) + # Alice's profile should be updated with the new avatar, but no display name. + self.assertEqual( + self.get_success(self.user_dir_helper.get_profiles_in_user_directory()), + {alice: ProfileInfo(display_name=None, avatar_url=MXC_DUMMY)}, + ) + class TestUserDirSearchDisabled(unittest.HomeserverTestCase): servlets = [ From deca250e3f0f18ffc978f17d1f9e440fc8a4af98 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 18 May 2022 12:21:32 +0100 Subject: [PATCH 051/181] Add some documentation around the `rc_invites` option to the config docs (#12759) --- changelog.d/12759.doc | 1 + docs/usage/configuration/config_documentation.md | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 changelog.d/12759.doc diff --git a/changelog.d/12759.doc b/changelog.d/12759.doc new file mode 100644 index 000000000000..45d1c9c0ca1a --- /dev/null +++ b/changelog.d/12759.doc @@ -0,0 +1 @@ +Add information regarding the `rc_invites` ratelimiting option to the configuration docs. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 3e2031f08aa6..3ad3085bfac1 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1357,6 +1357,20 @@ This option sets ratelimiting how often invites can be sent in a room or to a specific user. `per_room` defaults to `per_second: 0.3`, `burst_count: 10` and `per_user` defaults to `per_second: 0.003`, `burst_count: 5`. +Client requests that invite user(s) when [creating a +room](https://spec.matrix.org/v1.2/client-server-api/#post_matrixclientv3createroom) +will count against the `rc_invites.per_room` limit, whereas +client requests to [invite a single user to a +room](https://spec.matrix.org/v1.2/client-server-api/#post_matrixclientv3roomsroomidinvite) +will count against both the `rc_invites.per_user` and `rc_invites.per_room` limits. + +Federation requests to invite a user will count against the `rc_invites.per_user` +limit only, as Synapse presumes ratelimiting by room will be done by the sending server. + +The `rc_invites.per_user` limit applies to the *receiver* of the invite, rather than the +sender, meaning that a `rc_invite.per_user.burst_count` of 5 mandates that a single user +cannot *receive* more than a burst of 5 invites at a time. + Example configuration: ```yaml rc_invites: From a167304c8bba0dffb4c64dc9034272ff5bcaa4ff Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 18 May 2022 12:29:32 +0100 Subject: [PATCH 052/181] Switch the 'Configuration' link in the docs homepage to the config manual (#12748) --- changelog.d/12748.doc | 1 + docs/welcome_and_overview.md | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/12748.doc diff --git a/changelog.d/12748.doc b/changelog.d/12748.doc new file mode 100644 index 000000000000..996ad3a1b926 --- /dev/null +++ b/changelog.d/12748.doc @@ -0,0 +1 @@ +Link to the configuration manual from the welcome page of the documentation. diff --git a/docs/welcome_and_overview.md b/docs/welcome_and_overview.md index aab2d6b4f0f6..451759f06ec6 100644 --- a/docs/welcome_and_overview.md +++ b/docs/welcome_and_overview.md @@ -7,10 +7,10 @@ team. ## Installing and using Synapse This documentation covers topics for **installation**, **configuration** and -**maintainence** of your Synapse process: +**maintenance** of your Synapse process: * Learn how to [install](setup/installation.md) and - [configure](usage/configuration/index.html) your own instance, perhaps with [Single + [configure](usage/configuration/config_documentation.md) your own instance, perhaps with [Single Sign-On](usage/configuration/user_authentication/index.html). * See how to [upgrade](upgrade.md) between Synapse versions. @@ -65,7 +65,7 @@ following documentation: Want to help keep Synapse going but don't know how to code? Synapse is a [Matrix.org Foundation](https://matrix.org) project. Consider becoming a -supportor on [Liberapay](https://liberapay.com/matrixdotorg), +supporter on [Liberapay](https://liberapay.com/matrixdotorg), [Patreon](https://patreon.com/matrixdotorg) or through [PayPal](https://paypal.me/matrixdotorg) via a one-time donation. From df4963548b8f9bf9e36e76558864f7045d7b5215 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Wed, 18 May 2022 11:46:06 +0000 Subject: [PATCH 053/181] Give a meaningful error message when a client tries to create a room with an invalid alias localpart. (#12779) --- changelog.d/12779.bugfix | 1 + synapse/handlers/directory.py | 3 +++ synapse/handlers/room.py | 15 +++++++++++++++ 3 files changed, 19 insertions(+) create mode 100644 changelog.d/12779.bugfix diff --git a/changelog.d/12779.bugfix b/changelog.d/12779.bugfix new file mode 100644 index 000000000000..7cf7a1f65f24 --- /dev/null +++ b/changelog.d/12779.bugfix @@ -0,0 +1 @@ +Give a meaningful error message when a client tries to create a room with an invalid alias localpart. \ No newline at end of file diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 33d827a45b33..4aa33df884ac 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -71,6 +71,9 @@ async def _create_association( if wchar in room_alias.localpart: raise SynapseError(400, "Invalid characters in room alias") + if ":" in room_alias.localpart: + raise SynapseError(400, "Invalid character in room alias localpart: ':'.") + if not self.hs.is_mine(room_alias): raise SynapseError(400, "Room alias must be local") # TODO(erikj): Change this. diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a2973109adc4..53569e521219 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -751,6 +751,21 @@ async def create_room( if wchar in config["room_alias_name"]: raise SynapseError(400, "Invalid characters in room alias") + if ":" in config["room_alias_name"]: + # Prevent someone from trying to pass in a full alias here. + # Note that it's permissible for a room alias to have multiple + # hash symbols at the start (notably bridged over from IRC, too), + # but the first colon in the alias is defined to separate the local + # part from the server name. + # (remember server names can contain port numbers, also separated + # by a colon. But under no circumstances should the local part be + # allowed to contain a colon!) + raise SynapseError( + 400, + "':' is not permitted in the room alias name. " + "Please note this expects a local part — 'wombat', not '#wombat:example.com'.", + ) + room_alias = RoomAlias(config["room_alias_name"], self.hs.hostname) mapping = await self.store.get_association_from_room_alias(room_alias) From 635f0d916bc5155d3f3cba0389a1ebe08a6b5910 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Wed, 18 May 2022 13:57:59 +0000 Subject: [PATCH 054/181] Do not keep going if there are 5 back-to-back background update failures. (#12781) --- changelog.d/12781.misc | 1 + synapse/storage/background_updates.py | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 changelog.d/12781.misc diff --git a/changelog.d/12781.misc b/changelog.d/12781.misc new file mode 100644 index 000000000000..8a045716172a --- /dev/null +++ b/changelog.d/12781.misc @@ -0,0 +1 @@ +Do not keep going if there are 5 back-to-back background update failures. \ No newline at end of file diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index c2bbbb574e75..37f2d6c644f4 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -282,12 +282,20 @@ async def run_background_updates(self, sleep: bool) -> None: self._running = True + back_to_back_failures = 0 + try: logger.info("Starting background schema updates") while self.enabled: try: result = await self.do_next_background_update(sleep) + back_to_back_failures = 0 except Exception: + back_to_back_failures += 1 + if back_to_back_failures >= 5: + raise RuntimeError( + "5 back-to-back background update failures; aborting." + ) logger.exception("Error doing update") else: if result: From 50ae4eafe1f8ba31f1977e5dc11c85f15722f1ee Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 18 May 2022 17:02:10 +0200 Subject: [PATCH 055/181] Add some type hints to `event_federation` datastore (#12753) Co-authored-by: David Robertson --- changelog.d/12753.misc | 1 + mypy.ini | 1 - synapse/handlers/room_batch.py | 2 + .../databases/main/event_federation.py | 187 ++++++++++++------ tests/handlers/test_federation.py | 1 + 5 files changed, 127 insertions(+), 65 deletions(-) create mode 100644 changelog.d/12753.misc diff --git a/changelog.d/12753.misc b/changelog.d/12753.misc new file mode 100644 index 000000000000..e793d08e5e3f --- /dev/null +++ b/changelog.d/12753.misc @@ -0,0 +1 @@ +Add some type hints to datastore. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index 45668974b363..4fa020b8764d 100644 --- a/mypy.ini +++ b/mypy.ini @@ -27,7 +27,6 @@ exclude = (?x) |synapse/storage/databases/__init__.py |synapse/storage/databases/main/cache.py |synapse/storage/databases/main/devices.py - |synapse/storage/databases/main/event_federation.py |synapse/storage/schema/ |tests/api/test_auth.py diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 29de7e5bed10..fbfd7484065c 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -53,6 +53,7 @@ async def inherit_depth_from_prev_ids(self, prev_event_ids: List[str]) -> int: # We want to use the successor event depth so they appear after `prev_event` because # it has a larger `depth` but before the successor event because the `stream_ordering` # is negative before the successor event. + assert most_recent_prev_event_id is not None successor_event_ids = await self.store.get_successor_events( most_recent_prev_event_id ) @@ -139,6 +140,7 @@ async def get_most_recent_full_state_ids_from_event_id_list( _, ) = await self.store.get_max_depth_of(event_ids) # mapping from (type, state_key) -> state_event_id + assert most_recent_event_id is not None prev_state_map = await self.state_store.get_state_ids_for_event( most_recent_event_id ) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 471022470843..dcfe8caf473a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -14,7 +14,17 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import TYPE_CHECKING, Collection, Dict, Iterable, List, Optional, Set, Tuple +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Iterable, + List, + Optional, + Set, + Tuple, + cast, +) import attr from prometheus_client import Counter, Gauge @@ -33,7 +43,7 @@ from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.signatures import SignatureWorkerStore from synapse.storage.engines import PostgresEngine -from synapse.storage.types import Cursor +from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.descriptors import cached from synapse.util.caches.lrucache import LruCache @@ -135,7 +145,7 @@ async def get_auth_chain_ids( # Check if we have indexed the room so we can use the chain cover # algorithm. - room = await self.get_room(room_id) + room = await self.get_room(room_id) # type: ignore[attr-defined] if room["has_auth_chain_index"]: try: return await self.db_pool.runInteraction( @@ -158,7 +168,11 @@ async def get_auth_chain_ids( ) def _get_auth_chain_ids_using_cover_index_txn( - self, txn: Cursor, room_id: str, event_ids: Collection[str], include_given: bool + self, + txn: LoggingTransaction, + room_id: str, + event_ids: Collection[str], + include_given: bool, ) -> Set[str]: """Calculates the auth chain IDs using the chain index.""" @@ -215,9 +229,9 @@ def _get_auth_chain_ids_using_cover_index_txn( chains: Dict[int, int] = {} # Add all linked chains reachable from initial set of chains. - for batch in batch_iter(event_chains, 1000): + for batch2 in batch_iter(event_chains, 1000): clause, args = make_in_list_sql_clause( - txn.database_engine, "origin_chain_id", batch + txn.database_engine, "origin_chain_id", batch2 ) txn.execute(sql % (clause,), args) @@ -297,7 +311,7 @@ def _get_auth_chain_ids_txn( front = set(event_ids) while front: - new_front = set() + new_front: Set[str] = set() for chunk in batch_iter(front, 100): # Pull the auth events either from the cache or DB. to_fetch: List[str] = [] # Event IDs to fetch from DB @@ -316,7 +330,7 @@ def _get_auth_chain_ids_txn( # Note we need to batch up the results by event ID before # adding to the cache. - to_cache = {} + to_cache: Dict[str, List[Tuple[str, int]]] = {} for event_id, auth_event_id, auth_event_depth in txn: to_cache.setdefault(event_id, []).append( (auth_event_id, auth_event_depth) @@ -349,7 +363,7 @@ async def get_auth_chain_difference( # Check if we have indexed the room so we can use the chain cover # algorithm. - room = await self.get_room(room_id) + room = await self.get_room(room_id) # type: ignore[attr-defined] if room["has_auth_chain_index"]: try: return await self.db_pool.runInteraction( @@ -370,7 +384,7 @@ async def get_auth_chain_difference( ) def _get_auth_chain_difference_using_cover_index_txn( - self, txn: Cursor, room_id: str, state_sets: List[Set[str]] + self, txn: LoggingTransaction, room_id: str, state_sets: List[Set[str]] ) -> Set[str]: """Calculates the auth chain difference using the chain index. @@ -444,9 +458,9 @@ def _get_auth_chain_difference_using_cover_index_txn( # (We need to take a copy of `seen_chains` as we want to mutate it in # the loop) - for batch in batch_iter(set(seen_chains), 1000): + for batch2 in batch_iter(set(seen_chains), 1000): clause, args = make_in_list_sql_clause( - txn.database_engine, "origin_chain_id", batch + txn.database_engine, "origin_chain_id", batch2 ) txn.execute(sql % (clause,), args) @@ -529,7 +543,7 @@ def _get_auth_chain_difference_using_cover_index_txn( return result def _get_auth_chain_difference_txn( - self, txn, state_sets: List[Set[str]] + self, txn: LoggingTransaction, state_sets: List[Set[str]] ) -> Set[str]: """Calculates the auth chain difference using a breadth first search. @@ -602,7 +616,7 @@ def _get_auth_chain_difference_txn( # I think building a temporary list with fetchall is more efficient than # just `search.extend(txn)`, but this is unconfirmed - search.extend(txn.fetchall()) + search.extend(cast(List[Tuple[int, str]], txn.fetchall())) # sort by depth search.sort() @@ -645,7 +659,7 @@ def _get_auth_chain_difference_txn( # We parse the results and add the to the `found` set and the # cache (note we need to batch up the results by event ID before # adding to the cache). - to_cache = {} + to_cache: Dict[str, List[Tuple[str, int]]] = {} for event_id, auth_event_id, auth_event_depth in txn: to_cache.setdefault(event_id, []).append( (auth_event_id, auth_event_depth) @@ -696,7 +710,7 @@ def _get_auth_chain_difference_txn( return {eid for eid, n in event_to_missing_sets.items() if n} async def get_oldest_event_ids_with_depth_in_room( - self, room_id + self, room_id: str ) -> List[Tuple[str, int]]: """Gets the oldest events(backwards extremities) in the room along with the aproximate depth. @@ -713,7 +727,9 @@ async def get_oldest_event_ids_with_depth_in_room( List of (event_id, depth) tuples """ - def get_oldest_event_ids_with_depth_in_room_txn(txn, room_id): + def get_oldest_event_ids_with_depth_in_room_txn( + txn: LoggingTransaction, room_id: str + ) -> List[Tuple[str, int]]: # Assemble a dictionary with event_id -> depth for the oldest events # we know of in the room. Backwards extremeties are the oldest # events we know of in the room but we only know of them because @@ -743,7 +759,7 @@ def get_oldest_event_ids_with_depth_in_room_txn(txn, room_id): txn.execute(sql, (room_id, False)) - return txn.fetchall() + return cast(List[Tuple[str, int]], txn.fetchall()) return await self.db_pool.runInteraction( "get_oldest_event_ids_with_depth_in_room", @@ -752,7 +768,7 @@ def get_oldest_event_ids_with_depth_in_room_txn(txn, room_id): ) async def get_insertion_event_backward_extremities_in_room( - self, room_id + self, room_id: str ) -> List[Tuple[str, int]]: """Get the insertion events we know about that we haven't backfilled yet. @@ -768,7 +784,9 @@ async def get_insertion_event_backward_extremities_in_room( List of (event_id, depth) tuples """ - def get_insertion_event_backward_extremities_in_room_txn(txn, room_id): + def get_insertion_event_backward_extremities_in_room_txn( + txn: LoggingTransaction, room_id: str + ) -> List[Tuple[str, int]]: sql = """ SELECT b.event_id, MAX(e.depth) FROM insertion_events as i /* We only want insertion events that are also marked as backwards extremities */ @@ -780,7 +798,7 @@ def get_insertion_event_backward_extremities_in_room_txn(txn, room_id): """ txn.execute(sql, (room_id,)) - return txn.fetchall() + return cast(List[Tuple[str, int]], txn.fetchall()) return await self.db_pool.runInteraction( "get_insertion_event_backward_extremities_in_room", @@ -788,7 +806,7 @@ def get_insertion_event_backward_extremities_in_room_txn(txn, room_id): room_id, ) - async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[str, int]: + async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[Optional[str], int]: """Returns the event ID and depth for the event that has the max depth from a set of event IDs Args: @@ -817,7 +835,7 @@ async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[str, int]: return max_depth_event_id, current_max_depth - async def get_min_depth_of(self, event_ids: List[str]) -> Tuple[str, int]: + async def get_min_depth_of(self, event_ids: List[str]) -> Tuple[Optional[str], int]: """Returns the event ID and depth for the event that has the min depth from a set of event IDs Args: @@ -865,7 +883,9 @@ async def get_prev_events_for_room(self, room_id: str) -> List[str]: "get_prev_events_for_room", self._get_prev_events_for_room_txn, room_id ) - def _get_prev_events_for_room_txn(self, txn, room_id: str): + def _get_prev_events_for_room_txn( + self, txn: LoggingTransaction, room_id: str + ) -> List[str]: # we just use the 10 newest events. Older events will become # prev_events of future events. @@ -896,7 +916,7 @@ async def get_rooms_with_many_extremities( sorted by extremity count. """ - def _get_rooms_with_many_extremities_txn(txn): + def _get_rooms_with_many_extremities_txn(txn: LoggingTransaction) -> List[str]: where_clause = "1=1" if room_id_filter: where_clause = "room_id NOT IN (%s)" % ( @@ -937,7 +957,9 @@ async def get_min_depth(self, room_id: str) -> Optional[int]: "get_min_depth", self._get_min_depth_interaction, room_id ) - def _get_min_depth_interaction(self, txn, room_id): + def _get_min_depth_interaction( + self, txn: LoggingTransaction, room_id: str + ) -> Optional[int]: min_depth = self.db_pool.simple_select_one_onecol_txn( txn, table="room_depth", @@ -966,22 +988,24 @@ async def get_forward_extremities_for_room_at_stream_ordering( """ # We want to make the cache more effective, so we clamp to the last # change before the given ordering. - last_change = self._events_stream_cache.get_max_pos_of_last_change(room_id) + last_change = self._events_stream_cache.get_max_pos_of_last_change(room_id) # type: ignore[attr-defined] # We don't always have a full stream_to_exterm_id table, e.g. after # the upgrade that introduced it, so we make sure we never ask for a # stream_ordering from before a restart - last_change = max(self._stream_order_on_start, last_change) + last_change = max(self._stream_order_on_start, last_change) # type: ignore[attr-defined] # provided the last_change is recent enough, we now clamp the requested # stream_ordering to it. - if last_change > self.stream_ordering_month_ago: + if last_change > self.stream_ordering_month_ago: # type: ignore[attr-defined] stream_ordering = min(last_change, stream_ordering) return await self._get_forward_extremeties_for_room(room_id, stream_ordering) @cached(max_entries=5000, num_args=2) - async def _get_forward_extremeties_for_room(self, room_id, stream_ordering): + async def _get_forward_extremeties_for_room( + self, room_id: str, stream_ordering: int + ) -> List[str]: """For a given room_id and stream_ordering, return the forward extremeties of the room at that point in "time". @@ -989,7 +1013,7 @@ async def _get_forward_extremeties_for_room(self, room_id, stream_ordering): stream_orderings from that point. """ - if stream_ordering <= self.stream_ordering_month_ago: + if stream_ordering <= self.stream_ordering_month_ago: # type: ignore[attr-defined] raise StoreError(400, "stream_ordering too old %s" % (stream_ordering,)) sql = """ @@ -1002,7 +1026,7 @@ async def _get_forward_extremeties_for_room(self, room_id, stream_ordering): WHERE room_id = ? """ - def get_forward_extremeties_for_room_txn(txn): + def get_forward_extremeties_for_room_txn(txn: LoggingTransaction) -> List[str]: txn.execute(sql, (stream_ordering, room_id)) return [event_id for event_id, in txn] @@ -1104,8 +1128,8 @@ def _get_connected_prev_event_backfill_results_txn( ] async def get_backfill_events( - self, room_id: str, seed_event_id_list: list, limit: int - ): + self, room_id: str, seed_event_id_list: List[str], limit: int + ) -> List[EventBase]: """Get a list of Events for a given topic that occurred before (and including) the events in seed_event_id_list. Return a list of max size `limit` @@ -1123,10 +1147,19 @@ async def get_backfill_events( ) events = await self.get_events_as_list(event_ids) return sorted( - events, key=lambda e: (-e.depth, -e.internal_metadata.stream_ordering) + # type-ignore: mypy doesn't like negating the Optional[int] stream_ordering. + # But it's never None, because these events were previously persisted to the DB. + events, + key=lambda e: (-e.depth, -e.internal_metadata.stream_ordering), # type: ignore[operator] ) - def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): + def _get_backfill_events( + self, + txn: LoggingTransaction, + room_id: str, + seed_event_id_list: List[str], + limit: int, + ) -> Set[str]: """ We want to make sure that we do a breadth-first, "depth" ordered search. We also handle navigating historical branches of history connected by @@ -1139,7 +1172,7 @@ def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): limit, ) - event_id_results = set() + event_id_results: Set[str] = set() # In a PriorityQueue, the lowest valued entries are retrieved first. # We're using depth as the priority in the queue and tie-break based on @@ -1147,7 +1180,7 @@ def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): # highest and newest-in-time message. We add events to the queue with a # negative depth so that we process the newest-in-time messages first # going backwards in time. stream_ordering follows the same pattern. - queue = PriorityQueue() + queue: "PriorityQueue[Tuple[int, int, str, str]]" = PriorityQueue() for seed_event_id in seed_event_id_list: event_lookup_result = self.db_pool.simple_select_one_txn( @@ -1253,7 +1286,13 @@ def _get_backfill_events(self, txn, room_id, seed_event_id_list, limit): return event_id_results - async def get_missing_events(self, room_id, earliest_events, latest_events, limit): + async def get_missing_events( + self, + room_id: str, + earliest_events: List[str], + latest_events: List[str], + limit: int, + ) -> List[EventBase]: ids = await self.db_pool.runInteraction( "get_missing_events", self._get_missing_events, @@ -1264,11 +1303,18 @@ async def get_missing_events(self, room_id, earliest_events, latest_events, limi ) return await self.get_events_as_list(ids) - def _get_missing_events(self, txn, room_id, earliest_events, latest_events, limit): + def _get_missing_events( + self, + txn: LoggingTransaction, + room_id: str, + earliest_events: List[str], + latest_events: List[str], + limit: int, + ) -> List[str]: seen_events = set(earliest_events) front = set(latest_events) - seen_events - event_results = [] + event_results: List[str] = [] query = ( "SELECT prev_event_id FROM event_edges " @@ -1311,7 +1357,7 @@ async def get_successor_events(self, event_id: str) -> List[str]: @wrap_as_background_process("delete_old_forward_extrem_cache") async def _delete_old_forward_extrem_cache(self) -> None: - def _delete_old_forward_extrem_cache_txn(txn): + def _delete_old_forward_extrem_cache_txn(txn: LoggingTransaction) -> None: # Delete entries older than a month, while making sure we don't delete # the only entries for a room. sql = """ @@ -1324,7 +1370,7 @@ def _delete_old_forward_extrem_cache_txn(txn): ) AND stream_ordering < ? """ txn.execute( - sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago) + sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago) # type: ignore[attr-defined] ) await self.db_pool.runInteraction( @@ -1382,7 +1428,9 @@ async def remove_received_event_from_staging( """ if self.db_pool.engine.supports_returning: - def _remove_received_event_from_staging_txn(txn): + def _remove_received_event_from_staging_txn( + txn: LoggingTransaction, + ) -> Optional[int]: sql = """ DELETE FROM federation_inbound_events_staging WHERE origin = ? AND event_id = ? @@ -1390,21 +1438,24 @@ def _remove_received_event_from_staging_txn(txn): """ txn.execute(sql, (origin, event_id)) - return txn.fetchone() + row = cast(Optional[Tuple[int]], txn.fetchone()) - row = await self.db_pool.runInteraction( + if row is None: + return None + + return row[0] + + return await self.db_pool.runInteraction( "remove_received_event_from_staging", _remove_received_event_from_staging_txn, db_autocommit=True, ) - if row is None: - return None - - return row[0] else: - def _remove_received_event_from_staging_txn(txn): + def _remove_received_event_from_staging_txn( + txn: LoggingTransaction, + ) -> Optional[int]: received_ts = self.db_pool.simple_select_one_onecol_txn( txn, table="federation_inbound_events_staging", @@ -1437,7 +1488,9 @@ async def get_next_staged_event_id_for_room( ) -> Optional[Tuple[str, str]]: """Get the next event ID in the staging area for the given room.""" - def _get_next_staged_event_id_for_room_txn(txn): + def _get_next_staged_event_id_for_room_txn( + txn: LoggingTransaction, + ) -> Optional[Tuple[str, str]]: sql = """ SELECT origin, event_id FROM federation_inbound_events_staging @@ -1448,7 +1501,7 @@ def _get_next_staged_event_id_for_room_txn(txn): txn.execute(sql, (room_id,)) - return txn.fetchone() + return cast(Optional[Tuple[str, str]], txn.fetchone()) return await self.db_pool.runInteraction( "get_next_staged_event_id_for_room", _get_next_staged_event_id_for_room_txn @@ -1461,7 +1514,9 @@ async def get_next_staged_event_for_room( ) -> Optional[Tuple[str, EventBase]]: """Get the next event in the staging area for the given room.""" - def _get_next_staged_event_for_room_txn(txn): + def _get_next_staged_event_for_room_txn( + txn: LoggingTransaction, + ) -> Optional[Tuple[str, str, str]]: sql = """ SELECT event_json, internal_metadata, origin FROM federation_inbound_events_staging @@ -1471,7 +1526,7 @@ def _get_next_staged_event_for_room_txn(txn): """ txn.execute(sql, (room_id,)) - return txn.fetchone() + return cast(Optional[Tuple[str, str, str]], txn.fetchone()) row = await self.db_pool.runInteraction( "get_next_staged_event_for_room", _get_next_staged_event_for_room_txn @@ -1599,18 +1654,20 @@ async def get_all_rooms_with_staged_incoming_events(self) -> List[str]: ) @wrap_as_background_process("_get_stats_for_federation_staging") - async def _get_stats_for_federation_staging(self): + async def _get_stats_for_federation_staging(self) -> None: """Update the prometheus metrics for the inbound federation staging area.""" - def _get_stats_for_federation_staging_txn(txn): + def _get_stats_for_federation_staging_txn( + txn: LoggingTransaction, + ) -> Tuple[int, int]: txn.execute("SELECT count(*) FROM federation_inbound_events_staging") - (count,) = txn.fetchone() + (count,) = cast(Tuple[int], txn.fetchone()) txn.execute( "SELECT min(received_ts) FROM federation_inbound_events_staging" ) - (received_ts,) = txn.fetchone() + (received_ts,) = cast(Tuple[Optional[int]], txn.fetchone()) # If there is nothing in the staging area default it to 0. age = 0 @@ -1651,19 +1708,21 @@ def __init__( self.EVENT_AUTH_STATE_ONLY, self._background_delete_non_state_event_auth ) - async def clean_room_for_join(self, room_id): - return await self.db_pool.runInteraction( + async def clean_room_for_join(self, room_id: str) -> None: + await self.db_pool.runInteraction( "clean_room_for_join", self._clean_room_for_join_txn, room_id ) - def _clean_room_for_join_txn(self, txn, room_id): + def _clean_room_for_join_txn(self, txn: LoggingTransaction, room_id: str) -> None: query = "DELETE FROM event_forward_extremities WHERE room_id = ?" txn.execute(query, (room_id,)) txn.call_after(self.get_latest_event_ids_in_room.invalidate, (room_id,)) - async def _background_delete_non_state_event_auth(self, progress, batch_size): - def delete_event_auth(txn): + async def _background_delete_non_state_event_auth( + self, progress: JsonDict, batch_size: int + ) -> int: + def delete_event_auth(txn: LoggingTransaction) -> bool: target_min_stream_id = progress.get("target_min_stream_id_inclusive") max_stream_id = progress.get("max_stream_id_exclusive") diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index 060ba5f5174f..e95dfdce2086 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -332,6 +332,7 @@ def test_backfill_floating_outlier_membership_auth(self) -> None: most_recent_prev_event_depth, ) = self.get_success(self.store.get_max_depth_of(prev_event_ids)) # mapping from (type, state_key) -> state_event_id + assert most_recent_prev_event_id is not None prev_state_map = self.get_success( self.state_store.get_state_ids_for_event(most_recent_prev_event_id) ) From 3d8839c30c96b49588196c60e2bbf056ed6465eb Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Wed, 18 May 2022 17:56:23 +0100 Subject: [PATCH 056/181] Add documentation for cancellation of request processing (#12761) Signed-off-by: Sean Quah --- changelog.d/12761.doc | 1 + docs/SUMMARY.md | 1 + .../synapse_architecture/cancellation.md | 392 ++++++++++++++++++ 3 files changed, 394 insertions(+) create mode 100644 changelog.d/12761.doc create mode 100644 docs/development/synapse_architecture/cancellation.md diff --git a/changelog.d/12761.doc b/changelog.d/12761.doc new file mode 100644 index 000000000000..2eb2c0976f1b --- /dev/null +++ b/changelog.d/12761.doc @@ -0,0 +1 @@ +Add documentation for cancellation of request processing. diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 65570cefbe1b..8400a6539a4e 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -89,6 +89,7 @@ - [Database Schemas](development/database_schema.md) - [Experimental features](development/experimental_features.md) - [Synapse Architecture]() + - [Cancellation](development/synapse_architecture/cancellation.md) - [Log Contexts](log_contexts.md) - [Replication](replication.md) - [TCP Replication](tcp_replication.md) diff --git a/docs/development/synapse_architecture/cancellation.md b/docs/development/synapse_architecture/cancellation.md new file mode 100644 index 000000000000..ef9e0226353b --- /dev/null +++ b/docs/development/synapse_architecture/cancellation.md @@ -0,0 +1,392 @@ +# Cancellation +Sometimes, requests take a long time to service and clients disconnect +before Synapse produces a response. To avoid wasting resources, Synapse +can cancel request processing for select endpoints marked with the +`@cancellable` decorator. + +Synapse makes use of Twisted's `Deferred.cancel()` feature to make +cancellation work. The `@cancellable` decorator does nothing by itself +and merely acts as a flag, signalling to developers and other code alike +that a method can be cancelled. + +## Enabling cancellation for an endpoint +1. Check that the endpoint method, and any `async` functions in its call + tree handle cancellation correctly. See + [Handling cancellation correctly](#handling-cancellation-correctly) + for a list of things to look out for. +2. Add the `@cancellable` decorator to the `on_GET/POST/PUT/DELETE` + method. It's not recommended to make non-`GET` methods cancellable, + since cancellation midway through some database updates is less + likely to be handled correctly. + +## Mechanics +There are two stages to cancellation: downward propagation of a +`cancel()` call, followed by upwards propagation of a `CancelledError` +out of a blocked `await`. +Both Twisted and asyncio have a cancellation mechanism. + +| | Method | Exception | Exception inherits from | +|---------------|---------------------|-----------------------------------------|-------------------------| +| Twisted | `Deferred.cancel()` | `twisted.internet.defer.CancelledError` | `Exception` (!) | +| asyncio | `Task.cancel()` | `asyncio.CancelledError` | `BaseException` | + +### Deferred.cancel() +When Synapse starts handling a request, it runs the async method +responsible for handling it using `defer.ensureDeferred`, which returns +a `Deferred`. For example: + +```python +def do_something() -> Deferred[None]: + ... + +@cancellable +async def on_GET() -> Tuple[int, JsonDict]: + d = make_deferred_yieldable(do_something()) + await d + return 200, {} + +request = defer.ensureDeferred(on_GET()) +``` + +When a client disconnects early, Synapse checks for the presence of the +`@cancellable` decorator on `on_GET`. Since `on_GET` is cancellable, +`Deferred.cancel()` is called on the `Deferred` from +`defer.ensureDeferred`, ie. `request`. Twisted knows which `Deferred` +`request` is waiting on and passes the `cancel()` call on to `d`. + +The `Deferred` being waited on, `d`, may have its own handling for +`cancel()` and pass the call on to other `Deferred`s. + +Eventually, a `Deferred` handles the `cancel()` call by resolving itself +with a `CancelledError`. + +### CancelledError +The `CancelledError` gets raised out of the `await` and bubbles up, as +per normal Python exception handling. + +## Handling cancellation correctly +In general, when writing code that might be subject to cancellation, two +things must be considered: + * The effect of `CancelledError`s raised out of `await`s. + * The effect of `Deferred`s being `cancel()`ed. + +Examples of code that handles cancellation incorrectly include: + * `try-except` blocks which swallow `CancelledError`s. + * Code that shares the same `Deferred`, which may be cancelled, between + multiple requests. + * Code that starts some processing that's exempt from cancellation, but + uses a logging context from cancellable code. The logging context + will be finished upon cancellation, while the uncancelled processing + is still using it. + +Some common patterns are listed below in more detail. + +### `async` function calls +Most functions in Synapse are relatively straightforward from a +cancellation standpoint: they don't do anything with `Deferred`s and +purely call and `await` other `async` functions. + +An `async` function handles cancellation correctly if its own code +handles cancellation correctly and all the async function it calls +handle cancellation correctly. For example: +```python +async def do_two_things() -> None: + check_something() + await do_something() + await do_something_else() +``` +`do_two_things` handles cancellation correctly if `do_something` and +`do_something_else` handle cancellation correctly. + +That is, when checking whether a function handles cancellation +correctly, its implementation and all its `async` function calls need to +be checked, recursively. + +As `check_something` is not `async`, it does not need to be checked. + +### CancelledErrors +Because Twisted's `CancelledError`s are `Exception`s, it's easy to +accidentally catch and suppress them. Care must be taken to ensure that +`CancelledError`s are allowed to propagate upwards. + + + + + + + + + + +
+ +**Bad**: +```python +try: + await do_something() +except Exception: + # `CancelledError` gets swallowed here. + logger.info(...) +``` + + +**Good**: +```python +try: + await do_something() +except CancelledError: + raise +except Exception: + logger.info(...) +``` +
+ +**OK**: +```python +try: + check_something() + # A `CancelledError` won't ever be raised here. +except Exception: + logger.info(...) +``` + + +**Good**: +```python +try: + await do_something() +except ValueError: + logger.info(...) +``` +
+ +#### defer.gatherResults +`defer.gatherResults` produces a `Deferred` which: + * broadcasts `cancel()` calls to every `Deferred` being waited on. + * wraps the first exception it sees in a `FirstError`. + +Together, this means that `CancelledError`s will be wrapped in +a `FirstError` unless unwrapped. Such `FirstError`s are liable to be +swallowed, so they must be unwrapped. + + + + + + +
+ +**Bad**: +```python +async def do_something() -> None: + await make_deferred_yieldable( + defer.gatherResults([...], consumeErrors=True) + ) + +try: + await do_something() +except CancelledError: + raise +except Exception: + # `FirstError(CancelledError)` gets swallowed here. + logger.info(...) +``` + + + +**Good**: +```python +async def do_something() -> None: + await make_deferred_yieldable( + defer.gatherResults([...], consumeErrors=True) + ).addErrback(unwrapFirstError) + +try: + await do_something() +except CancelledError: + raise +except Exception: + logger.info(...) +``` +
+ +### Creation of `Deferred`s +If a function creates a `Deferred`, the effect of cancelling it must be considered. `Deferred`s that get shared are likely to have unintended behaviour when cancelled. + + + + + + + + + +
+ +**Bad**: +```python +cache: Dict[str, Deferred[None]] = {} + +def wait_for_room(room_id: str) -> Deferred[None]: + deferred = cache.get(room_id) + if deferred is None: + deferred = Deferred() + cache[room_id] = deferred + # `deferred` can have multiple waiters. + # All of them will observe a `CancelledError` + # if any one of them is cancelled. + return make_deferred_yieldable(deferred) + +# Request 1 +await wait_for_room("!aAAaaAaaaAAAaAaAA:matrix.org") +# Request 2 +await wait_for_room("!aAAaaAaaaAAAaAaAA:matrix.org") +``` + + +**Good**: +```python +cache: Dict[str, Deferred[None]] = {} + +def wait_for_room(room_id: str) -> Deferred[None]: + deferred = cache.get(room_id) + if deferred is None: + deferred = Deferred() + cache[room_id] = deferred + # `deferred` will never be cancelled now. + # A `CancelledError` will still come out of + # the `await`. + # `delay_cancellation` may also be used. + return make_deferred_yieldable(stop_cancellation(deferred)) + +# Request 1 +await wait_for_room("!aAAaaAaaaAAAaAaAA:matrix.org") +# Request 2 +await wait_for_room("!aAAaaAaaaAAAaAaAA:matrix.org") +``` +
+ + +**Good**: +```python +cache: Dict[str, List[Deferred[None]]] = {} + +def wait_for_room(room_id: str) -> Deferred[None]: + if room_id not in cache: + cache[room_id] = [] + # Each request gets its own `Deferred` to wait on. + deferred = Deferred() + cache[room_id]].append(deferred) + return make_deferred_yieldable(deferred) + +# Request 1 +await wait_for_room("!aAAaaAaaaAAAaAaAA:matrix.org") +# Request 2 +await wait_for_room("!aAAaaAaaaAAAaAaAA:matrix.org") +``` +
+ +### Uncancelled processing +Some `async` functions may kick off some `async` processing which is +intentionally protected from cancellation, by `stop_cancellation` or +other means. If the `async` processing inherits the logcontext of the +request which initiated it, care must be taken to ensure that the +logcontext is not finished before the `async` processing completes. + + + + + + + + + + +
+ +**Bad**: +```python +cache: Optional[ObservableDeferred[None]] = None + +async def do_something_else( + to_resolve: Deferred[None] +) -> None: + await ... + logger.info("done!") + to_resolve.callback(None) + +async def do_something() -> None: + if not cache: + to_resolve = Deferred() + cache = ObservableDeferred(to_resolve) + # `do_something_else` will never be cancelled and + # can outlive the `request-1` logging context. + run_in_background(do_something_else, to_resolve) + + await make_deferred_yieldable(cache.observe()) + +with LoggingContext("request-1"): + await do_something() +``` + + +**Good**: +```python +cache: Optional[ObservableDeferred[None]] = None + +async def do_something_else( + to_resolve: Deferred[None] +) -> None: + await ... + logger.info("done!") + to_resolve.callback(None) + +async def do_something() -> None: + if not cache: + to_resolve = Deferred() + cache = ObservableDeferred(to_resolve) + run_in_background(do_something_else, to_resolve) + # We'll wait until `do_something_else` is + # done before raising a `CancelledError`. + await make_deferred_yieldable( + delay_cancellation(cache.observe()) + ) + else: + await make_deferred_yieldable(cache.observe()) + +with LoggingContext("request-1"): + await do_something() +``` +
+ +**OK**: +```python +cache: Optional[ObservableDeferred[None]] = None + +async def do_something_else( + to_resolve: Deferred[None] +) -> None: + await ... + logger.info("done!") + to_resolve.callback(None) + +async def do_something() -> None: + if not cache: + to_resolve = Deferred() + cache = ObservableDeferred(to_resolve) + # `do_something_else` will get its own independent + # logging context. `request-1` will not count any + # metrics from `do_something_else`. + run_as_background_process( + "do_something_else", + do_something_else, + to_resolve, + ) + + await make_deferred_yieldable(cache.observe()) + +with LoggingContext("request-1"): + await do_something() +``` + +
From 19d79b6ebe3070ad7352f24549fbafb9dee44b75 Mon Sep 17 00:00:00 2001 From: Shay Date: Wed, 18 May 2022 10:15:52 -0700 Subject: [PATCH 057/181] Refactor `resolve_state_groups_for_events` to not pull out full state when no state resolution happens. (#12775) --- changelog.d/12775.misc | 1 + synapse/state/__init__.py | 35 +++++++++++++----------- synapse/storage/databases/state/store.py | 2 +- synapse/storage/state.py | 12 ++++---- tests/test_state.py | 13 +++++++++ 5 files changed, 40 insertions(+), 23 deletions(-) create mode 100644 changelog.d/12775.misc diff --git a/changelog.d/12775.misc b/changelog.d/12775.misc new file mode 100644 index 000000000000..eac326cde3a7 --- /dev/null +++ b/changelog.d/12775.misc @@ -0,0 +1 @@ +Refactor `resolve_state_groups_for_events` to not pull out full state when no state resolution happens. \ No newline at end of file diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 0219091c4e8b..4b4ed42cff33 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -288,7 +288,6 @@ async def compute_event_context( # # first of all, figure out the state before the event # - if old_state: # if we're given the state before the event, then we use that state_ids_before_event: StateMap[str] = { @@ -419,33 +418,37 @@ async def resolve_state_groups_for_events( """ logger.debug("resolve_state_groups event_ids %s", event_ids) - # map from state group id to the state in that state group (where - # 'state' is a map from state key to event id) - # dict[int, dict[(str, str), str]] - state_groups_ids = await self.state_store.get_state_groups_ids( - room_id, event_ids - ) - - if len(state_groups_ids) == 0: - return _StateCacheEntry(state={}, state_group=None) - elif len(state_groups_ids) == 1: - name, state_list = list(state_groups_ids.items()).pop() + state_groups = await self.state_store.get_state_group_for_events(event_ids) - prev_group, delta_ids = await self.state_store.get_state_group_delta(name) + state_group_ids = state_groups.values() + # check if each event has same state group id, if so there's no state to resolve + state_group_ids_set = set(state_group_ids) + if len(state_group_ids_set) == 1: + (state_group_id,) = state_group_ids_set + state = await self.state_store.get_state_for_groups(state_group_ids_set) + prev_group, delta_ids = await self.state_store.get_state_group_delta( + state_group_id + ) return _StateCacheEntry( - state=state_list, - state_group=name, + state=state[state_group_id], + state_group=state_group_id, prev_group=prev_group, delta_ids=delta_ids, ) + elif len(state_group_ids_set) == 0: + return _StateCacheEntry(state={}, state_group=None) room_version = await self.store.get_room_version_id(room_id) + state_to_resolve = await self.state_store.get_state_for_groups( + state_group_ids_set + ) + result = await self._state_resolution_handler.resolve_state_groups( room_id, room_version, - state_groups_ids, + state_to_resolve, None, state_res_store=StateResolutionStore(self.store), ) diff --git a/synapse/storage/databases/state/store.py b/synapse/storage/databases/state/store.py index 7614d76ac646..609a2b88bfbf 100644 --- a/synapse/storage/databases/state/store.py +++ b/synapse/storage/databases/state/store.py @@ -189,7 +189,7 @@ def _get_state_for_group_using_cache( group: int, state_filter: StateFilter, ) -> Tuple[MutableStateMap[str], bool]: - """Checks if group is in cache. See `_get_state_for_groups` + """Checks if group is in cache. See `get_state_for_groups` Args: cache: the state group cache to use diff --git a/synapse/storage/state.py b/synapse/storage/state.py index d4a1bd4f9d7d..a6c60de50434 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -586,7 +586,7 @@ async def get_state_groups_ids( if not event_ids: return {} - event_to_groups = await self._get_state_group_for_events(event_ids) + event_to_groups = await self.get_state_group_for_events(event_ids) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups(groups) @@ -602,7 +602,7 @@ async def get_state_ids_for_group(self, state_group: int) -> StateMap[str]: Returns: Resolves to a map of (type, state_key) -> event_id """ - group_to_state = await self._get_state_for_groups((state_group,)) + group_to_state = await self.get_state_for_groups((state_group,)) return group_to_state[state_group] @@ -675,7 +675,7 @@ async def get_state_for_events( RuntimeError if we don't have a state group for one or more of the events (ie they are outliers or unknown) """ - event_to_groups = await self._get_state_group_for_events(event_ids) + event_to_groups = await self.get_state_group_for_events(event_ids) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups( @@ -716,7 +716,7 @@ async def get_state_ids_for_events( RuntimeError if we don't have a state group for one or more of the events (ie they are outliers or unknown) """ - event_to_groups = await self._get_state_group_for_events(event_ids) + event_to_groups = await self.get_state_group_for_events(event_ids) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups( @@ -774,7 +774,7 @@ async def get_state_ids_for_event( ) return state_map[event_id] - def _get_state_for_groups( + def get_state_for_groups( self, groups: Iterable[int], state_filter: Optional[StateFilter] = None ) -> Awaitable[Dict[int, MutableStateMap[str]]]: """Gets the state at each of a list of state groups, optionally @@ -792,7 +792,7 @@ def _get_state_for_groups( groups, state_filter or StateFilter.all() ) - async def _get_state_group_for_events( + async def get_state_group_for_events( self, event_ids: Collection[str], await_full_state: bool = True, diff --git a/tests/test_state.py b/tests/test_state.py index 651ec1c7d4bd..74a8ce6096b9 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -129,6 +129,19 @@ def register_event_id_state_group(self, event_id, state_group): async def get_room_version_id(self, room_id): return RoomVersions.V1.identifier + async def get_state_group_for_events(self, event_ids): + res = {} + for event in event_ids: + res[event] = self._event_to_state_group[event] + return res + + async def get_state_for_groups(self, groups): + res = {} + for group in groups: + state = self._group_to_state[group] + res[group] = state + return res + class DictObj(dict): def __init__(self, **kwargs): From 0fce474a4019b441940b6e12ac8a50ffac09727e Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 18 May 2022 18:24:44 +0100 Subject: [PATCH 058/181] Fix YAML parsing error in `url_preview_accept_language` (#12785) --- changelog.d/12785.doc | 1 + docs/usage/configuration/config_documentation.md | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 changelog.d/12785.doc diff --git a/changelog.d/12785.doc b/changelog.d/12785.doc new file mode 100644 index 000000000000..5209dfeb053e --- /dev/null +++ b/changelog.d/12785.doc @@ -0,0 +1 @@ +Fix invalid YAML syntax in the example documentation for the `url_preview_accept_language` config option. diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 3ad3085bfac1..6b9ffc09d2b3 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1194,7 +1194,7 @@ For more information on using Synapse with Postgres, see [here](../../postgres.md). Example SQLite configuration: -``` +```yaml database: name: sqlite3 args: @@ -1202,7 +1202,7 @@ database: ``` Example Postgres configuration: -``` +```yaml database: name: psycopg2 txn_limit: 10000 @@ -1679,10 +1679,10 @@ Defaults to "en". Example configuration: ```yaml url_preview_accept_language: - - en-UK - - en-US;q=0.9 - - fr;q=0.8 - - *;q=0.7 + - 'en-UK' + - 'en-US;q=0.9' + - 'fr;q=0.8' + - '*;q=0.7' ``` ---- Config option: `oembed` From d38c73e9abbd7fe40f4f24d4c96107415e6f15a1 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 18 May 2022 20:33:57 +0200 Subject: [PATCH 059/181] Skip waiting for full state if a StateFilter does not require it (#12498) If `StateFilter` specifies a state set which we will have regardless of state-syncing, then we may as well return it immediately. --- changelog.d/12498.misc | 1 + synapse/storage/state.py | 63 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 changelog.d/12498.misc diff --git a/changelog.d/12498.misc b/changelog.d/12498.misc new file mode 100644 index 000000000000..8a00b94fbeef --- /dev/null +++ b/changelog.d/12498.misc @@ -0,0 +1 @@ +Preparation for faster-room-join work: return subsets of room state which we already have, immediately. diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a6c60de50434..e58301a8f087 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -1,4 +1,5 @@ # Copyright 2014-2016 OpenMarket Ltd +# Copyright 2022 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +16,7 @@ from typing import ( TYPE_CHECKING, Awaitable, + Callable, Collection, Dict, Iterable, @@ -532,6 +534,44 @@ def approx_difference(self, other: "StateFilter") -> "StateFilter": new_all, new_excludes, new_wildcards, new_concrete_keys ) + def must_await_full_state(self, is_mine_id: Callable[[str], bool]) -> bool: + """Check if we need to wait for full state to complete to calculate this state + + If we have a state filter which is completely satisfied even with partial + state, then we don't need to await_full_state before we can return it. + + Args: + is_mine_id: a callable which confirms if a given state_key matches a mxid + of a local user + """ + + # TODO(faster_joins): it's not entirely clear that this is safe. In particular, + # there may be circumstances in which we return a piece of state that, once we + # resync the state, we discover is invalid. For example: if it turns out that + # the sender of a piece of state wasn't actually in the room, then clearly that + # state shouldn't have been returned. + # We should at least add some tests around this to see what happens. + + # if we haven't requested membership events, then it depends on the value of + # 'include_others' + if EventTypes.Member not in self.types: + return self.include_others + + # if we're looking for *all* membership events, then we have to wait + member_state_keys = self.types[EventTypes.Member] + if member_state_keys is None: + return True + + # otherwise, consider whose membership we are looking for. If it's entirely + # local users, then we don't need to wait. + for state_key in member_state_keys: + if not is_mine_id(state_key): + # remote user + return True + + # local users only + return False + _ALL_STATE_FILTER = StateFilter(types=frozendict(), include_others=True) _ALL_NON_MEMBER_STATE_FILTER = StateFilter( @@ -544,6 +584,7 @@ class StateGroupStorage: """High level interface to fetching state for event.""" def __init__(self, hs: "HomeServer", stores: "Databases"): + self._is_mine_id = hs.is_mine_id self.stores = stores self._partial_state_events_tracker = PartialStateEventsTracker(stores.main) @@ -675,7 +716,13 @@ async def get_state_for_events( RuntimeError if we don't have a state group for one or more of the events (ie they are outliers or unknown) """ - event_to_groups = await self.get_state_group_for_events(event_ids) + await_full_state = True + if state_filter and not state_filter.must_await_full_state(self._is_mine_id): + await_full_state = False + + event_to_groups = await self.get_state_group_for_events( + event_ids, await_full_state=await_full_state + ) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups( @@ -699,7 +746,9 @@ async def get_state_for_events( return {event: event_to_state[event] for event in event_ids} async def get_state_ids_for_events( - self, event_ids: Collection[str], state_filter: Optional[StateFilter] = None + self, + event_ids: Collection[str], + state_filter: Optional[StateFilter] = None, ) -> Dict[str, StateMap[str]]: """ Get the state dicts corresponding to a list of events, containing the event_ids @@ -716,7 +765,13 @@ async def get_state_ids_for_events( RuntimeError if we don't have a state group for one or more of the events (ie they are outliers or unknown) """ - event_to_groups = await self.get_state_group_for_events(event_ids) + await_full_state = True + if state_filter and not state_filter.must_await_full_state(self._is_mine_id): + await_full_state = False + + event_to_groups = await self.get_state_group_for_events( + event_ids, await_full_state=await_full_state + ) groups = set(event_to_groups.values()) group_to_state = await self.stores.state._get_state_for_groups( @@ -802,7 +857,7 @@ async def get_state_group_for_events( Args: event_ids: events to get state groups for await_full_state: if true, will block if we do not yet have complete - state at this event. + state at these events. """ if await_full_state: await self._partial_state_events_tracker.await_full_state(event_ids) From a1cb05b3e8b439a2e68d3762ea7373785b8be4e1 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 18 May 2022 14:49:33 -0400 Subject: [PATCH 060/181] Fix federation in demo scripts. (#12783) --- changelog.d/12783.misc | 1 + demo/start.sh | 7 +++++-- docs/development/demo.md | 9 +++++---- 3 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 changelog.d/12783.misc diff --git a/changelog.d/12783.misc b/changelog.d/12783.misc new file mode 100644 index 000000000000..97575608bb8b --- /dev/null +++ b/changelog.d/12783.misc @@ -0,0 +1 @@ +Fix federation when using the demo scripts. diff --git a/demo/start.sh b/demo/start.sh index 5a9972d24c2c..96b3a2ceab2f 100755 --- a/demo/start.sh +++ b/demo/start.sh @@ -12,6 +12,7 @@ export PYTHONPATH echo "$PYTHONPATH" +# Create servers which listen on HTTP at 808x and HTTPS at 848x. for port in 8080 8081 8082; do echo "Starting server on port $port... " @@ -19,10 +20,12 @@ for port in 8080 8081 8082; do mkdir -p demo/$port pushd demo/$port || exit - # Generate the configuration for the homeserver at localhost:848x. + # Generate the configuration for the homeserver at localhost:848x, note that + # the homeserver name needs to match the HTTPS listening port for federation + # to properly work.. python3 -m synapse.app.homeserver \ --generate-config \ - --server-name "localhost:$port" \ + --server-name "localhost:$https_port" \ --config-path "$port.config" \ --report-stats no diff --git a/docs/development/demo.md b/docs/development/demo.md index 4277252ceb60..893ed6998ebb 100644 --- a/docs/development/demo.md +++ b/docs/development/demo.md @@ -5,7 +5,7 @@ Requires you to have a [Synapse development environment setup](https://matrix-org.github.io/synapse/develop/development/contributing_guide.html#4-install-the-dependencies). The demo setup allows running three federation Synapse servers, with server -names `localhost:8080`, `localhost:8081`, and `localhost:8082`. +names `localhost:8480`, `localhost:8481`, and `localhost:8482`. You can access them via any Matrix client over HTTP at `localhost:8080`, `localhost:8081`, and `localhost:8082` or over HTTPS at `localhost:8480`, @@ -20,9 +20,10 @@ and the servers are configured in a highly insecure way, including: The servers are configured to store their data under `demo/8080`, `demo/8081`, and `demo/8082`. This includes configuration, logs, SQLite databases, and media. -Note that when joining a public room on a different HS via "#foo:bar.net", then -you are (in the current impl) joining a room with room_id "foo". This means that -it won't work if your HS already has a room with that name. +Note that when joining a public room on a different homeserver via "#foo:bar.net", +then you are (in the current implementation) joining a room with room_id "foo". +This means that it won't work if your homeserver already has a room with that +name. ## Using the demo scripts From 6ff99e3bea481790782c252c5433e9a88f65c4b0 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 18 May 2022 20:10:21 +0100 Subject: [PATCH 061/181] Downgrade some OIDC exceptions to warnings (#12723) --- changelog.d/12723.misc | 1 + synapse/handlers/oidc.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12723.misc diff --git a/changelog.d/12723.misc b/changelog.d/12723.misc new file mode 100644 index 000000000000..4f5bffeda639 --- /dev/null +++ b/changelog.d/12723.misc @@ -0,0 +1 @@ +Downgrade some OIDC errors to warnings in the logs, to reduce the noise of Sentry reports. diff --git a/synapse/handlers/oidc.py b/synapse/handlers/oidc.py index f6ffb7d18d91..9de61d554f41 100644 --- a/synapse/handlers/oidc.py +++ b/synapse/handlers/oidc.py @@ -224,7 +224,7 @@ async def handle_oidc_callback(self, request: SynapseRequest) -> None: self._sso_handler.render_error(request, "invalid_session", str(e)) return except MacaroonInvalidSignatureException as e: - logger.exception("Could not verify session for OIDC callback") + logger.warning("Could not verify session for OIDC callback: %s", e) self._sso_handler.render_error(request, "mismatching_session", str(e)) return @@ -827,7 +827,7 @@ async def handle_oidc_callback( logger.debug("Exchanging OAuth2 code for a token") token = await self._exchange_code(code) except OidcError as e: - logger.exception("Could not exchange OAuth2 code") + logger.warning("Could not exchange OAuth2 code: %s", e) self._sso_handler.render_error(request, e.error, e.error_description) return From 5675cebfaa511b6a93db15dd3db3b7f27ce0677a Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 19 May 2022 10:28:18 +0100 Subject: [PATCH 062/181] openid.md: fix some links docbook doesn't auto-linkify links --- docs/openid.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/openid.md b/docs/openid.md index e899db63d63b..9d615a573759 100644 --- a/docs/openid.md +++ b/docs/openid.md @@ -293,7 +293,7 @@ can be used to retrieve information on the authenticated user. As the Synapse login mechanism needs an attribute to uniquely identify users, and that endpoint does not return a `sub` property, an alternative `subject_claim` has to be set. -1. Create a new OAuth application: /~https://github.com/settings/applications/new. +1. Create a new OAuth application: [/~https://github.com/settings/applications/new](/~https://github.com/settings/applications/new). 2. Set the callback URL to `[synapse public baseurl]/_synapse/client/oidc/callback`. Synapse config: @@ -322,10 +322,10 @@ oidc_providers: [Google][google-idp] is an OpenID certified authentication and authorisation provider. -1. Set up a project in the Google API Console (see - https://developers.google.com/identity/protocols/oauth2/openid-connect#appsetup). -2. Add an "OAuth Client ID" for a Web Application under "Credentials". -3. Copy the Client ID and Client Secret, and add the following to your synapse config: +1. Set up a project in the Google API Console (see + [documentation](https://developers.google.com/identity/protocols/oauth2/openid-connect#appsetup)). +3. Add an "OAuth Client ID" for a Web Application under "Credentials". +4. Copy the Client ID and Client Secret, and add the following to your synapse config: ```yaml oidc_providers: - idp_id: google @@ -501,8 +501,8 @@ As well as the private key file, you will need: * Team ID: a 10-character ID associated with your developer account. * Key ID: the 10-character identifier for the key. -https://help.apple.com/developer-account/?lang=en#/dev77c875b7e has more -documentation on setting up SiWA. +[Apple's developer documentation](https://help.apple.com/developer-account/?lang=en#/dev77c875b7e) +has more information on setting up SiWA. The synapse config will look like this: @@ -535,8 +535,8 @@ needed to add OAuth2 capabilities to your Django projects. It supports Configuration on Django's side: -1. Add an application: https://example.com/admin/oauth2_provider/application/add/ and choose parameters like this: -* `Redirect uris`: https://synapse.example.com/_synapse/client/oidc/callback +1. Add an application: `https://example.com/admin/oauth2_provider/application/add/` and choose parameters like this: +* `Redirect uris`: `https://synapse.example.com/_synapse/client/oidc/callback` * `Client type`: `Confidential` * `Authorization grant type`: `Authorization code` * `Algorithm`: `HMAC with SHA-2 256` From 47619017f97e04733e2a2fe511d6865d0294f063 Mon Sep 17 00:00:00 2001 From: Sami Olmari Date: Thu, 19 May 2022 14:03:12 +0300 Subject: [PATCH 063/181] Add missing user directory search endpoint to the generic worker documentation (#12773) Signed-off-by: Sami Olmari --- changelog.d/12773.doc | 1 + docs/workers.md | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 changelog.d/12773.doc diff --git a/changelog.d/12773.doc b/changelog.d/12773.doc new file mode 100644 index 000000000000..6de371653427 --- /dev/null +++ b/changelog.d/12773.doc @@ -0,0 +1 @@ +Add missing user directory endpoint from the generic worker documentation. Contributed by @olmari. \ No newline at end of file diff --git a/docs/workers.md b/docs/workers.md index 553792d2384c..779069b8177f 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -251,6 +251,8 @@ information. # Presence requests ^/_matrix/client/(api/v1|r0|v3|unstable)/presence/ + # User directory search requests + ^/_matrix/client/(r0|v3|unstable)/user_directory/search$ Additionally, the following REST endpoints can be handled for GET requests: @@ -448,6 +450,14 @@ update_user_directory_from_worker: worker_name This work cannot be load-balanced; please ensure the main process is restarted after setting this option in the shared configuration! +User directory updates allow REST endpoints matching the following regular +expressions to work: + + ^/_matrix/client/(r0|v3|unstable)/user_directory/search$ + +The above endpoints can be routed to any worker, though you may choose to route +it to the chosen user directory worker. + This style of configuration supersedes the legacy `synapse.app.user_dir` worker application type. From d25935cd3d92a77a43719df8f6763be0cbcf665b Mon Sep 17 00:00:00 2001 From: Aminda Suomalainen Date: Thu, 19 May 2022 14:28:10 +0300 Subject: [PATCH 064/181] Implement MSC3818: copy room type on upgrade (#12786) Resolves: #11896 Signed-off-by: Aminda Suomalainen --- changelog.d/12786.feature | 1 + synapse/handlers/room.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 8 deletions(-) create mode 100644 changelog.d/12786.feature diff --git a/changelog.d/12786.feature b/changelog.d/12786.feature new file mode 100644 index 000000000000..c90ddd411ee2 --- /dev/null +++ b/changelog.d/12786.feature @@ -0,0 +1 @@ +Implement [MSC3818: Copy room type on upgrade](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3818). diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 53569e521219..b7d64a2f5a29 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -469,14 +469,14 @@ async def clone_existing_room( (EventTypes.PowerLevels, ""), ] - # If the old room was a space, copy over the room type and the rooms in - # the space. - if ( - old_room_create_event.content.get(EventContentFields.ROOM_TYPE) - == RoomTypes.SPACE - ): - creation_content[EventContentFields.ROOM_TYPE] = RoomTypes.SPACE - types_to_copy.append((EventTypes.SpaceChild, None)) + # Copy the room type as per MSC3818. + room_type = old_room_create_event.content.get(EventContentFields.ROOM_TYPE) + if room_type is not None: + creation_content[EventContentFields.ROOM_TYPE] = room_type + + # If the old room was a space, copy over the rooms in the space. + if room_type == RoomTypes.SPACE: + types_to_copy.append((EventTypes.SpaceChild, None)) old_room_state_ids = await self.store.get_filtered_current_state_ids( old_room_id, StateFilter.from_types(types_to_copy) From b935c9529c1193416621a18567c11ff4d1d5edca Mon Sep 17 00:00:00 2001 From: David Robertson Date: Thu, 19 May 2022 13:49:58 +0100 Subject: [PATCH 065/181] Simplify untyped-defs config in mypy.ini (#12790) --- changelog.d/12790.misc | 1 + mypy.ini | 173 ++++------------------------------------- 2 files changed, 18 insertions(+), 156 deletions(-) create mode 100644 changelog.d/12790.misc diff --git a/changelog.d/12790.misc b/changelog.d/12790.misc new file mode 100644 index 000000000000..b78156cf4e1d --- /dev/null +++ b/changelog.d/12790.misc @@ -0,0 +1 @@ +Simplify `disallow_untyped_defs` config in `mypy.ini`. diff --git a/mypy.ini b/mypy.ini index 4fa020b8764d..df2622df983a 100644 --- a/mypy.ini +++ b/mypy.ini @@ -10,6 +10,7 @@ warn_unreachable = True warn_unused_ignores = True local_partial_types = True no_implicit_optional = True +disallow_untyped_defs = True files = docker/, @@ -86,177 +87,37 @@ exclude = (?x) |tests/utils.py )$ -[mypy-synapse._scripts.*] -disallow_untyped_defs = True - -[mypy-synapse.api.*] -disallow_untyped_defs = True - -[mypy-synapse.app.*] -disallow_untyped_defs = True - -[mypy-synapse.appservice.*] -disallow_untyped_defs = True - -[mypy-synapse.config.*] -disallow_untyped_defs = True - -[mypy-synapse.crypto.*] -disallow_untyped_defs = True - -[mypy-synapse.event_auth] -disallow_untyped_defs = True - -[mypy-synapse.events.*] -disallow_untyped_defs = True - -[mypy-synapse.federation.*] -disallow_untyped_defs = True - [mypy-synapse.federation.transport.client] disallow_untyped_defs = False -[mypy-synapse.groups.*] -disallow_untyped_defs = True - -[mypy-synapse.handlers.*] -disallow_untyped_defs = True - -[mypy-synapse.http.federation.*] -disallow_untyped_defs = True - -[mypy-synapse.http.connectproxyclient] -disallow_untyped_defs = True - -[mypy-synapse.http.proxyagent] -disallow_untyped_defs = True - -[mypy-synapse.http.request_metrics] -disallow_untyped_defs = True - -[mypy-synapse.http.server] -disallow_untyped_defs = True - -[mypy-synapse.logging._remote] -disallow_untyped_defs = True - -[mypy-synapse.logging.context] -disallow_untyped_defs = True +[mypy-synapse.http.client] +disallow_untyped_defs = False -[mypy-synapse.logging.formatter] -disallow_untyped_defs = True +[mypy-synapse.http.matrixfederationclient] +disallow_untyped_defs = False -[mypy-synapse.logging.handlers] -disallow_untyped_defs = True +[mypy-synapse.logging.opentracing] +disallow_untyped_defs = False -[mypy-synapse.metrics.*] -disallow_untyped_defs = True +[mypy-synapse.logging.scopecontextmanager] +disallow_untyped_defs = False [mypy-synapse.metrics._reactor_metrics] +disallow_untyped_defs = False # This module imports select.epoll. That exists on Linux, but doesn't on macOS. # See /~https://github.com/matrix-org/synapse/pull/11771. warn_unused_ignores = False -[mypy-synapse.module_api.*] -disallow_untyped_defs = True - -[mypy-synapse.notifier] -disallow_untyped_defs = True - -[mypy-synapse.push.*] -disallow_untyped_defs = True - -[mypy-synapse.replication.*] -disallow_untyped_defs = True - -[mypy-synapse.rest.*] -disallow_untyped_defs = True - -[mypy-synapse.server_notices.*] -disallow_untyped_defs = True - -[mypy-synapse.state.*] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.background_updates] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.account_data] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.client_ips] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.directory] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.e2e_room_keys] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.end_to_end_keys] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.event_push_actions] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.events_bg_updates] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.events_worker] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.room] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.room_batch] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.profile] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.stats] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.state_deltas] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.stream] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.transactions] -disallow_untyped_defs = True - -[mypy-synapse.storage.databases.main.user_erasure_store] -disallow_untyped_defs = True - -[mypy-synapse.storage.engines.*] -disallow_untyped_defs = True - -[mypy-synapse.storage.prepare_database] -disallow_untyped_defs = True - -[mypy-synapse.storage.persist_events] -disallow_untyped_defs = True - -[mypy-synapse.storage.state] -disallow_untyped_defs = True - -[mypy-synapse.storage.types] -disallow_untyped_defs = True - -[mypy-synapse.storage.util.*] -disallow_untyped_defs = True - -[mypy-synapse.streams.*] -disallow_untyped_defs = True +[mypy-synapse.util.caches.treecache] +disallow_untyped_defs = False -[mypy-synapse.types] -disallow_untyped_defs = True +[mypy-synapse.server] +disallow_untyped_defs = False -[mypy-synapse.util.*] -disallow_untyped_defs = True +[mypy-synapse.storage.database] +disallow_untyped_defs = False -[mypy-synapse.util.caches.treecache] +[mypy-tests.*] disallow_untyped_defs = False [mypy-tests.handlers.test_user_directory] From f16ec055cc235eed1ae02f7cede99c366fedca5e Mon Sep 17 00:00:00 2001 From: Jae Lo Presti Date: Thu, 19 May 2022 14:03:13 +0100 Subject: [PATCH 066/181] hash_password: raise an error if no config file is specified (#12789) --- changelog.d/12789.misc | 1 + synapse/_scripts/hash_password.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 changelog.d/12789.misc diff --git a/changelog.d/12789.misc b/changelog.d/12789.misc new file mode 100644 index 000000000000..3398d00110c8 --- /dev/null +++ b/changelog.d/12789.misc @@ -0,0 +1 @@ +The `hash_password` script now fails when it is called without specifying a config file. diff --git a/synapse/_scripts/hash_password.py b/synapse/_scripts/hash_password.py index 3aa29de5bd8a..3bed367be29d 100755 --- a/synapse/_scripts/hash_password.py +++ b/synapse/_scripts/hash_password.py @@ -46,14 +46,14 @@ def main() -> None: "Path to server config file. " "Used to read in bcrypt_rounds and password_pepper." ), + required=True, ) args = parser.parse_args() - if "config" in args and args.config: - config = yaml.safe_load(args.config) - bcrypt_rounds = config.get("bcrypt_rounds", bcrypt_rounds) - password_config = config.get("password_config", None) or {} - password_pepper = password_config.get("pepper", password_pepper) + config = yaml.safe_load(args.config) + bcrypt_rounds = config.get("bcrypt_rounds", bcrypt_rounds) + password_config = config.get("password_config", None) or {} + password_pepper = password_config.get("pepper", password_pepper) password = args.password if not password: From 66a5f6c40018018cccffd79aded0850d13efe513 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 19 May 2022 14:16:49 +0100 Subject: [PATCH 067/181] Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance. (#12687) --- changelog.d/12687.bugfix | 1 + docs/upgrade.md | 90 +++++++++++++++++++ synapse/storage/background_updates.py | 15 ++++ synapse/storage/databases/state/bg_updates.py | 16 ++++ .../delta/70/08_state_group_edges_unique.sql | 17 ++++ 5 files changed, 139 insertions(+) create mode 100644 changelog.d/12687.bugfix create mode 100644 synapse/storage/schema/state/delta/70/08_state_group_edges_unique.sql diff --git a/changelog.d/12687.bugfix b/changelog.d/12687.bugfix new file mode 100644 index 000000000000..196d9766707a --- /dev/null +++ b/changelog.d/12687.bugfix @@ -0,0 +1 @@ +Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance. \ No newline at end of file diff --git a/docs/upgrade.md b/docs/upgrade.md index fa4b3ef5902d..92ca31b2f8de 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -89,6 +89,96 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.60.0 + +## Adding a new unique index to `state_group_edges` could fail if your database is corrupted + +This release of Synapse will add a unique index to the `state_group_edges` table, in order +to prevent accidentally introducing duplicate information (for example, because a database +backup was restored multiple times). + +Duplicate rows being present in this table could cause drastic performance problems; see +[issue 11779](/~https://github.com/matrix-org/synapse/issues/11779) for more details. + +If your Synapse database already has had duplicate rows introduced into this table, +this could fail, with either of these errors: + + +**On Postgres:** +``` +synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges +synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update +... +psycopg2.errors.UniqueViolation: could not create unique index "state_group_edges_unique_idx" +DETAIL: Key (state_group, prev_state_group)=(2, 1) is duplicated. +``` +(The numbers may be different.) + +**On SQLite:** +``` +synapse.storage.background_updates - 623 - INFO - background_updates-0 - Adding index state_group_edges_unique_idx to state_group_edges +synapse.storage.background_updates - 282 - ERROR - background_updates-0 - Error doing update +... +sqlite3.IntegrityError: UNIQUE constraint failed: state_group_edges.state_group, state_group_edges.prev_state_group +``` + + +
+Expand this section for steps to resolve this problem + +### On Postgres + +Connect to your database with `psql`. + +```sql +BEGIN; +DELETE FROM state_group_edges WHERE (ctid, state_group, prev_state_group) IN ( + SELECT row_id, state_group, prev_state_group + FROM ( + SELECT + ctid AS row_id, + MIN(ctid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id, + state_group, + prev_state_group + FROM state_group_edges + ) AS t1 + WHERE row_id <> min_row_id +); +COMMIT; +``` + + +### On SQLite + +At the command-line, use `sqlite3 path/to/your-homeserver-database.db`: + +```sql +BEGIN; +DELETE FROM state_group_edges WHERE (rowid, state_group, prev_state_group) IN ( + SELECT row_id, state_group, prev_state_group + FROM ( + SELECT + rowid AS row_id, + MIN(rowid) OVER (PARTITION BY state_group, prev_state_group) AS min_row_id, + state_group, + prev_state_group + FROM state_group_edges + ) + WHERE row_id <> min_row_id +); +COMMIT; +``` + + +### For more details + +[This comment on issue 11779](/~https://github.com/matrix-org/synapse/issues/11779#issuecomment-1131545970) +has queries that can be used to check a database for this problem in advance. + +
+ + + # Upgrading to v1.59.0 ## Device name lookup over federation has been disabled by default diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index 37f2d6c644f4..b1e5208c7603 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -535,6 +535,7 @@ def register_background_index_update( where_clause: Optional[str] = None, unique: bool = False, psql_only: bool = False, + replaces_index: Optional[str] = None, ) -> None: """Helper for store classes to do a background index addition @@ -554,6 +555,8 @@ def register_background_index_update( unique: true to make a UNIQUE index psql_only: true to only create this index on psql databases (useful for virtual sqlite tables) + replaces_index: The name of an index that this index replaces. + The named index will be dropped upon completion of the new index. """ def create_index_psql(conn: Connection) -> None: @@ -585,6 +588,12 @@ def create_index_psql(conn: Connection) -> None: } logger.debug("[SQL] %s", sql) c.execute(sql) + + if replaces_index is not None: + # We drop the old index as the new index has now been created. + sql = f"DROP INDEX IF EXISTS {replaces_index}" + logger.debug("[SQL] %s", sql) + c.execute(sql) finally: conn.set_session(autocommit=False) # type: ignore @@ -613,6 +622,12 @@ def create_index_sqlite(conn: Connection) -> None: logger.debug("[SQL] %s", sql) c.execute(sql) + if replaces_index is not None: + # We drop the old index as the new index has now been created. + sql = f"DROP INDEX IF EXISTS {replaces_index}" + logger.debug("[SQL] %s", sql) + c.execute(sql) + if isinstance(self.db_pool.engine, engines.PostgresEngine): runner: Optional[Callable[[Connection], None]] = create_index_psql elif psql_only: diff --git a/synapse/storage/databases/state/bg_updates.py b/synapse/storage/databases/state/bg_updates.py index 5de70f31d294..fa9eadaca7ea 100644 --- a/synapse/storage/databases/state/bg_updates.py +++ b/synapse/storage/databases/state/bg_updates.py @@ -195,6 +195,7 @@ class StateBackgroundUpdateStore(StateGroupBackgroundUpdateStore): STATE_GROUP_DEDUPLICATION_UPDATE_NAME = "state_group_state_deduplication" STATE_GROUP_INDEX_UPDATE_NAME = "state_group_state_type_index" STATE_GROUPS_ROOM_INDEX_UPDATE_NAME = "state_groups_room_id_idx" + STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME = "state_group_edges_unique_idx" def __init__( self, @@ -217,6 +218,21 @@ def __init__( columns=["room_id"], ) + # `state_group_edges` can cause severe performance issues if duplicate + # rows are introduced, which can accidentally be done by well-meaning + # server admins when trying to restore a database dump, etc. + # See /~https://github.com/matrix-org/synapse/issues/11779. + # Introduce a unique index to guard against that. + self.db_pool.updates.register_background_index_update( + self.STATE_GROUP_EDGES_UNIQUE_INDEX_UPDATE_NAME, + index_name="state_group_edges_unique_idx", + table="state_group_edges", + columns=["state_group", "prev_state_group"], + unique=True, + # The old index was on (state_group) and was not unique. + replaces_index="state_group_edges_idx", + ) + async def _background_deduplicate_state( self, progress: dict, batch_size: int ) -> int: diff --git a/synapse/storage/schema/state/delta/70/08_state_group_edges_unique.sql b/synapse/storage/schema/state/delta/70/08_state_group_edges_unique.sql new file mode 100644 index 000000000000..b8c0ee0fa03e --- /dev/null +++ b/synapse/storage/schema/state/delta/70/08_state_group_edges_unique.sql @@ -0,0 +1,17 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (ordering, update_name, progress_json) VALUES + (7008, 'state_group_edges_unique_idx', '{}'); From 684feeaf2f32d853b02951794789830e48e75a64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Thu, 19 May 2022 16:23:59 +0200 Subject: [PATCH 068/181] Properly close providers.json file stream. (#12794) --- changelog.d/12794.bugfix | 1 + synapse/config/oembed.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/12794.bugfix diff --git a/changelog.d/12794.bugfix b/changelog.d/12794.bugfix new file mode 100644 index 000000000000..2d1a2838e128 --- /dev/null +++ b/changelog.d/12794.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in 1.43.0 where a file (`providers.json`) was never closed. Contributed by @arkamar. diff --git a/synapse/config/oembed.py b/synapse/config/oembed.py index 690ffb52963e..e9edea073123 100644 --- a/synapse/config/oembed.py +++ b/synapse/config/oembed.py @@ -57,9 +57,9 @@ def _parse_and_validate_providers( """ # Whether to use the packaged providers.json file. if not oembed_config.get("disable_default_providers") or False: - providers = json.load( - pkg_resources.resource_stream("synapse", "res/providers.json") - ) + with pkg_resources.resource_stream("synapse", "res/providers.json") as s: + providers = json.load(s) + yield from self._parse_and_validate_provider( providers, config_path=("oembed",) ) From ab2a615cfb13f8ff91919c4332fcb182640d5484 Mon Sep 17 00:00:00 2001 From: Shay Date: Thu, 19 May 2022 07:46:33 -0700 Subject: [PATCH 069/181] Update configuration manual to document size-related suffixes (#12777) --- changelog.d/12777.doc | 2 ++ docs/usage/configuration/config_documentation.md | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 changelog.d/12777.doc diff --git a/changelog.d/12777.doc b/changelog.d/12777.doc new file mode 100644 index 000000000000..cc9c07704d02 --- /dev/null +++ b/changelog.d/12777.doc @@ -0,0 +1,2 @@ +Update configuration manual documentation to document size-related suffixes. + diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 6b9ffc09d2b3..525e1c7a9145 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -23,6 +23,14 @@ followed by a letter. Letters have the following meanings: For example, setting `redaction_retention_period: 5m` would remove redacted messages from the database after 5 minutes, rather than 5 months. +In addition, configuration options referring to size use the following suffixes: + +* `M` = MiB, or 1,048,576 bytes +* `K` = KiB, or 1024 bytes + +For example, setting `max_avatar_size: 10M` means that Synapse will not accept files larger than 10,485,760 bytes +for a user avatar. + ### YAML The configuration file is a [YAML](https://yaml.org/) file, which means that certain syntax rules apply if you want your config file to be read properly. A few helpful things to know: From eb4aaa1b4b828c7d2ab501f03ebe79b13c75b7e0 Mon Sep 17 00:00:00 2001 From: Shay Date: Thu, 19 May 2022 07:47:07 -0700 Subject: [PATCH 070/181] Add detail to `cache_autotuning` config option documentation (#12776) --- changelog.d/12776.doc | 2 ++ docs/usage/configuration/config_documentation.md | 13 +++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 changelog.d/12776.doc diff --git a/changelog.d/12776.doc b/changelog.d/12776.doc new file mode 100644 index 000000000000..c00489a8ce14 --- /dev/null +++ b/changelog.d/12776.doc @@ -0,0 +1,2 @@ +Add additional info to documentation of config option `cache_autotuning`. + diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 525e1c7a9145..0f5bda32b941 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1130,14 +1130,19 @@ Caching can be configured through the following sub-options: * `cache_autotuning` and its sub-options `max_cache_memory_usage`, `target_cache_memory_usage`, and `min_cache_ttl` work in conjunction with each other to maintain a balance between cache memory usage and cache entry availability. You must be using [jemalloc](/~https://github.com/matrix-org/synapse#help-synapse-is-slow-and-eats-all-my-ramcpu) - to utilize this option, and all three of the options must be specified for this feature to work. + to utilize this option, and all three of the options must be specified for this feature to work. This option + defaults to off, enable it by providing values for the sub-options listed below. Please note that the feature will not work + and may cause unstable behavior (such as excessive emptying of caches or exceptions) if all of the values are not provided. + Please see the [Config Conventions](#config-conventions) for information on how to specify memory size and cache expiry + durations. * `max_cache_memory_usage` sets a ceiling on how much memory the cache can use before caches begin to be continuously evicted. They will continue to be evicted until the memory usage drops below the `target_memory_usage`, set in - the flag below, or until the `min_cache_ttl` is hit. - * `target_memory_usage` sets a rough target for the desired memory usage of the caches. + the setting below, or until the `min_cache_ttl` is hit. There is no default value for this option. + * `target_memory_usage` sets a rough target for the desired memory usage of the caches. There is no default value + for this option. * `min_cache_ttl` sets a limit under which newer cache entries are not evicted and is only applied when caches are actively being evicted/`max_cache_memory_usage` has been exceeded. This is to protect hot caches - from being emptied while Synapse is evicting due to memory. + from being emptied while Synapse is evicting due to memory. There is no default value for this option. Example configuration: ```yaml From 177b884ad7cc1ecdd92ff74188732734df203150 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 19 May 2022 16:29:08 +0100 Subject: [PATCH 071/181] Lay some foundation work to allow workers to only subscribe to some kinds of messages, reducing replication traffic. (#12672) --- changelog.d/12672.misc | 1 + synapse/replication/tcp/handler.py | 34 ++++++++++++- synapse/replication/tcp/redis.py | 35 +++++++++---- tests/replication/_base.py | 54 +++++++++++++++----- tests/replication/tcp/test_handler.py | 73 +++++++++++++++++++++++++++ 5 files changed, 173 insertions(+), 24 deletions(-) create mode 100644 changelog.d/12672.misc create mode 100644 tests/replication/tcp/test_handler.py diff --git a/changelog.d/12672.misc b/changelog.d/12672.misc new file mode 100644 index 000000000000..265e0a801f78 --- /dev/null +++ b/changelog.d/12672.misc @@ -0,0 +1 @@ +Lay some foundation work to allow workers to only subscribe to some kinds of messages, reducing replication traffic. \ No newline at end of file diff --git a/synapse/replication/tcp/handler.py b/synapse/replication/tcp/handler.py index 9aba1cd45111..e1cbfa50ebd2 100644 --- a/synapse/replication/tcp/handler.py +++ b/synapse/replication/tcp/handler.py @@ -1,5 +1,5 @@ # Copyright 2017 Vector Creations Ltd -# Copyright 2020 The Matrix.org Foundation C.I.C. +# Copyright 2020, 2022 The Matrix.org Foundation C.I.C. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -101,6 +101,9 @@ def __init__(self, hs: "HomeServer"): self._instance_id = hs.get_instance_id() self._instance_name = hs.get_instance_name() + # Additional Redis channel suffixes to subscribe to. + self._channels_to_subscribe_to: List[str] = [] + self._is_presence_writer = ( hs.get_instance_name() in hs.config.worker.writers.presence ) @@ -243,6 +246,31 @@ def __init__(self, hs: "HomeServer"): # If we're NOT using Redis, this must be handled by the master self._should_insert_client_ips = hs.get_instance_name() == "master" + if self._is_master or self._should_insert_client_ips: + self.subscribe_to_channel("USER_IP") + + def subscribe_to_channel(self, channel_name: str) -> None: + """ + Indicates that we wish to subscribe to a Redis channel by name. + + (The name will later be prefixed with the server name; i.e. subscribing + to the 'ABC' channel actually subscribes to 'example.com/ABC' Redis-side.) + + Raises: + - If replication has already started, then it's too late to subscribe + to new channels. + """ + + if self._factory is not None: + # We don't allow subscribing after the fact to avoid the chance + # of missing an important message because we didn't subscribe in time. + raise RuntimeError( + "Cannot subscribe to more channels after replication started." + ) + + if channel_name not in self._channels_to_subscribe_to: + self._channels_to_subscribe_to.append(channel_name) + def _add_command_to_stream_queue( self, conn: IReplicationConnection, cmd: Union[RdataCommand, PositionCommand] ) -> None: @@ -321,7 +349,9 @@ def start_replication(self, hs: "HomeServer") -> None: # Now create the factory/connection for the subscription stream. self._factory = RedisDirectTcpReplicationClientFactory( - hs, outbound_redis_connection + hs, + outbound_redis_connection, + channel_names=self._channels_to_subscribe_to, ) hs.get_reactor().connectTCP( hs.config.redis.redis_host, diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py index 989c5be0327e..73294654eff1 100644 --- a/synapse/replication/tcp/redis.py +++ b/synapse/replication/tcp/redis.py @@ -14,7 +14,7 @@ import logging from inspect import isawaitable -from typing import TYPE_CHECKING, Any, Generic, Optional, Type, TypeVar, cast +from typing import TYPE_CHECKING, Any, Generic, List, Optional, Type, TypeVar, cast import attr import txredisapi @@ -85,14 +85,15 @@ class RedisSubscriber(txredisapi.SubscriberProtocol): Attributes: synapse_handler: The command handler to handle incoming commands. - synapse_stream_name: The *redis* stream name to subscribe to and publish + synapse_stream_prefix: The *redis* stream name to subscribe to and publish from (not anything to do with Synapse replication streams). synapse_outbound_redis_connection: The connection to redis to use to send commands. """ synapse_handler: "ReplicationCommandHandler" - synapse_stream_name: str + synapse_stream_prefix: str + synapse_channel_names: List[str] synapse_outbound_redis_connection: txredisapi.ConnectionHandler def __init__(self, *args: Any, **kwargs: Any): @@ -117,8 +118,13 @@ async def _send_subscribe(self) -> None: # it's important to make sure that we only send the REPLICATE command once we # have successfully subscribed to the stream - otherwise we might miss the # POSITION response sent back by the other end. - logger.info("Sending redis SUBSCRIBE for %s", self.synapse_stream_name) - await make_deferred_yieldable(self.subscribe(self.synapse_stream_name)) + fully_qualified_stream_names = [ + f"{self.synapse_stream_prefix}/{stream_suffix}" + for stream_suffix in self.synapse_channel_names + ] + [self.synapse_stream_prefix] + logger.info("Sending redis SUBSCRIBE for %r", fully_qualified_stream_names) + await make_deferred_yieldable(self.subscribe(fully_qualified_stream_names)) + logger.info( "Successfully subscribed to redis stream, sending REPLICATE command" ) @@ -217,7 +223,7 @@ async def _async_send_command(self, cmd: Command) -> None: await make_deferred_yieldable( self.synapse_outbound_redis_connection.publish( - self.synapse_stream_name, encoded_string + self.synapse_stream_prefix, encoded_string ) ) @@ -300,20 +306,27 @@ def format_address(address: IAddress) -> str: class RedisDirectTcpReplicationClientFactory(SynapseRedisFactory): """This is a reconnecting factory that connects to redis and immediately - subscribes to a stream. + subscribes to some streams. Args: hs outbound_redis_connection: A connection to redis that will be used to send outbound commands (this is separate to the redis connection used to subscribe). + channel_names: A list of channel names to append to the base channel name + to additionally subscribe to. + e.g. if ['ABC', 'DEF'] is specified then we'll listen to: + example.com; example.com/ABC; and example.com/DEF. """ maxDelay = 5 protocol = RedisSubscriber def __init__( - self, hs: "HomeServer", outbound_redis_connection: txredisapi.ConnectionHandler + self, + hs: "HomeServer", + outbound_redis_connection: txredisapi.ConnectionHandler, + channel_names: List[str], ): super().__init__( @@ -326,7 +339,8 @@ def __init__( ) self.synapse_handler = hs.get_replication_command_handler() - self.synapse_stream_name = hs.hostname + self.synapse_stream_prefix = hs.hostname + self.synapse_channel_names = channel_names self.synapse_outbound_redis_connection = outbound_redis_connection @@ -340,7 +354,8 @@ def buildProtocol(self, addr: IAddress) -> RedisSubscriber: # protocol. p.synapse_handler = self.synapse_handler p.synapse_outbound_redis_connection = self.synapse_outbound_redis_connection - p.synapse_stream_name = self.synapse_stream_name + p.synapse_stream_prefix = self.synapse_stream_prefix + p.synapse_channel_names = self.synapse_channel_names return p diff --git a/tests/replication/_base.py b/tests/replication/_base.py index a7602b4c96ae..970d5e533b35 100644 --- a/tests/replication/_base.py +++ b/tests/replication/_base.py @@ -12,7 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import Any, Dict, List, Optional, Tuple +from collections import defaultdict +from typing import Any, Dict, List, Optional, Set, Tuple from twisted.internet.address import IPv4Address from twisted.internet.protocol import Protocol @@ -32,6 +33,7 @@ from tests import unittest from tests.server import FakeTransport +from tests.utils import USE_POSTGRES_FOR_TESTS try: import hiredis @@ -475,22 +477,25 @@ class FakeRedisPubSubServer: """A fake Redis server for pub/sub.""" def __init__(self): - self._subscribers = set() + self._subscribers_by_channel: Dict[ + bytes, Set["FakeRedisPubSubProtocol"] + ] = defaultdict(set) - def add_subscriber(self, conn): + def add_subscriber(self, conn, channel: bytes): """A connection has called SUBSCRIBE""" - self._subscribers.add(conn) + self._subscribers_by_channel[channel].add(conn) def remove_subscriber(self, conn): - """A connection has called UNSUBSCRIBE""" - self._subscribers.discard(conn) + """A connection has lost connection""" + for subscribers in self._subscribers_by_channel.values(): + subscribers.discard(conn) - def publish(self, conn, channel, msg) -> int: + def publish(self, conn, channel: bytes, msg) -> int: """A connection want to publish a message to subscribers.""" - for sub in self._subscribers: + for sub in self._subscribers_by_channel[channel]: sub.send(["message", channel, msg]) - return len(self._subscribers) + return len(self._subscribers_by_channel) def buildProtocol(self, addr): return FakeRedisPubSubProtocol(self) @@ -531,9 +536,10 @@ def handle_command(self, command, *args): num_subscribers = self._server.publish(self, channel, message) self.send(num_subscribers) elif command == b"SUBSCRIBE": - (channel,) = args - self._server.add_subscriber(self) - self.send(["subscribe", channel, 1]) + for idx, channel in enumerate(args): + num_channels = idx + 1 + self._server.add_subscriber(self, channel) + self.send(["subscribe", channel, num_channels]) # Since we use SET/GET to cache things we can safely no-op them. elif command == b"SET": @@ -576,3 +582,27 @@ def encode(self, obj): def connectionLost(self, reason): self._server.remove_subscriber(self) + + +class RedisMultiWorkerStreamTestCase(BaseMultiWorkerStreamTestCase): + """ + A test case that enables Redis, providing a fake Redis server. + """ + + if not hiredis: + skip = "Requires hiredis" + + if not USE_POSTGRES_FOR_TESTS: + # Redis replication only takes place on Postgres + skip = "Requires Postgres" + + def default_config(self) -> Dict[str, Any]: + """ + Overrides the default config to enable Redis. + Even if the test only uses make_worker_hs, the main process needs Redis + enabled otherwise it won't create a Fake Redis server to listen on the + Redis port and accept fake TCP connections. + """ + base = super().default_config() + base["redis"] = {"enabled": True} + return base diff --git a/tests/replication/tcp/test_handler.py b/tests/replication/tcp/test_handler.py new file mode 100644 index 000000000000..e6a19eafd578 --- /dev/null +++ b/tests/replication/tcp/test_handler.py @@ -0,0 +1,73 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from tests.replication._base import RedisMultiWorkerStreamTestCase + + +class ChannelsTestCase(RedisMultiWorkerStreamTestCase): + def test_subscribed_to_enough_redis_channels(self) -> None: + # The default main process is subscribed to the USER_IP channel. + self.assertCountEqual( + self.hs.get_replication_command_handler()._channels_to_subscribe_to, + ["USER_IP"], + ) + + def test_background_worker_subscribed_to_user_ip(self) -> None: + # The default main process is subscribed to the USER_IP channel. + worker1 = self.make_worker_hs( + "synapse.app.generic_worker", + extra_config={ + "worker_name": "worker1", + "run_background_tasks_on": "worker1", + "redis": {"enabled": True}, + }, + ) + self.assertIn( + "USER_IP", + worker1.get_replication_command_handler()._channels_to_subscribe_to, + ) + + # Advance so the Redis subscription gets processed + self.pump(0.1) + + # The counts are 2 because both the main process and the worker are subscribed. + self.assertEqual(len(self._redis_server._subscribers_by_channel[b"test"]), 2) + self.assertEqual( + len(self._redis_server._subscribers_by_channel[b"test/USER_IP"]), 2 + ) + + def test_non_background_worker_not_subscribed_to_user_ip(self) -> None: + # The default main process is subscribed to the USER_IP channel. + worker2 = self.make_worker_hs( + "synapse.app.generic_worker", + extra_config={ + "worker_name": "worker2", + "run_background_tasks_on": "worker1", + "redis": {"enabled": True}, + }, + ) + self.assertNotIn( + "USER_IP", + worker2.get_replication_command_handler()._channels_to_subscribe_to, + ) + + # Advance so the Redis subscription gets processed + self.pump(0.1) + + # The count is 2 because both the main process and the worker are subscribed. + self.assertEqual(len(self._redis_server._subscribers_by_channel[b"test"]), 2) + # For USER_IP, the count is 1 because only the main process is subscribed. + self.assertEqual( + len(self._redis_server._subscribers_by_channel[b"test/USER_IP"]), 1 + ) From 96df31239cdbcd4f50f503bf329fe3bb86c39a20 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Thu, 19 May 2022 18:32:48 +0100 Subject: [PATCH 072/181] Add a unit test for copying over arbitrary room types when upgrading a room (#12792) --- changelog.d/12792.feature | 1 + synapse/handlers/room.py | 2 +- tests/rest/client/test_upgrade_room.py | 32 +++++++++++++++++++++++++- 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12792.feature diff --git a/changelog.d/12792.feature b/changelog.d/12792.feature new file mode 100644 index 000000000000..4778b8a394d4 --- /dev/null +++ b/changelog.d/12792.feature @@ -0,0 +1 @@ +Implement [MSC3818: Copy room type on upgrade](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3818). \ No newline at end of file diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index b7d64a2f5a29..794f94f6b353 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -427,7 +427,7 @@ async def clone_existing_room( requester: the user requesting the upgrade old_room_id : the id of the room to be replaced new_room_id: the id to give the new room (should already have been - created with _gemerate_room_id()) + created with _generate_room_id()) new_room_version: the new room version to use tombstone_event_id: the ID of the tombstone event in the old room. """ diff --git a/tests/rest/client/test_upgrade_room.py b/tests/rest/client/test_upgrade_room.py index c86fc5df0d08..a21cbe9fa874 100644 --- a/tests/rest/client/test_upgrade_room.py +++ b/tests/rest/client/test_upgrade_room.py @@ -76,7 +76,7 @@ def test_not_in_room(self) -> None: """ Upgrading a room should work fine. """ - # THe user isn't in the room. + # The user isn't in the room. roomless = self.register_user("roomless", "pass") roomless_token = self.login(roomless, "pass") @@ -263,3 +263,33 @@ def test_space(self) -> None: self.assertIn((EventTypes.SpaceChild, self.room_id), state_ids) # The child that was removed should not be copied over. self.assertNotIn((EventTypes.SpaceChild, old_room_id), state_ids) + + def test_custom_room_type(self) -> None: + """Test upgrading a room that has a custom room type set.""" + test_room_type = "com.example.my_custom_room_type" + + # Create a room with a custom room type. + room_id = self.helper.create_room_as( + self.creator, + tok=self.creator_token, + extra_content={ + "creation_content": {EventContentFields.ROOM_TYPE: test_room_type} + }, + ) + + # Upgrade the room! + channel = self._upgrade_room(room_id=room_id) + self.assertEqual(200, channel.code, channel.result) + self.assertIn("replacement_room", channel.json_body) + + new_room_id = channel.json_body["replacement_room"] + + state_ids = self.get_success(self.store.get_current_state_ids(new_room_id)) + + # Ensure the new room is the same type as the old room. + create_event = self.get_success( + self.store.get_event(state_ids[(EventTypes.Create, "")]) + ) + self.assertEqual( + create_event.content.get(EventContentFields.ROOM_TYPE), test_room_type + ) From 2be5a2b07becdbf0957008ece74be3b3f957b233 Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Thu, 19 May 2022 20:17:10 +0100 Subject: [PATCH 073/181] Fix `RetryDestinationLimiter` re-starting finished log contexts (#12803) Signed-off-by: Sean Quah --- changelog.d/12803.bugfix | 1 + synapse/util/retryutils.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12803.bugfix diff --git a/changelog.d/12803.bugfix b/changelog.d/12803.bugfix new file mode 100644 index 000000000000..6ddd3d24e05f --- /dev/null +++ b/changelog.d/12803.bugfix @@ -0,0 +1 @@ +Fix a long-standing bug where finished log contexts would be re-started when failing to contact remote homeservers. diff --git a/synapse/util/retryutils.py b/synapse/util/retryutils.py index 81bfed268ee7..d0a69ff843e5 100644 --- a/synapse/util/retryutils.py +++ b/synapse/util/retryutils.py @@ -16,8 +16,8 @@ from types import TracebackType from typing import TYPE_CHECKING, Any, Optional, Type -import synapse.logging.context from synapse.api.errors import CodeMessageException +from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage import DataStore from synapse.util import Clock @@ -265,4 +265,4 @@ async def store_retry_timings() -> None: logger.exception("Failed to store destination_retry_timings") # we deliberately do this in the background. - synapse.logging.context.run_in_background(store_retry_timings) + run_as_background_process("store_retry_timings", store_retry_timings) From 71e8afe34d2103c5ccc9f2d1c99587d14b2acc56 Mon Sep 17 00:00:00 2001 From: Shay Date: Fri, 20 May 2022 01:54:12 -0700 Subject: [PATCH 074/181] Update EventContext `get_current_event_ids` and `get_prev_event_ids` to accept state filters and update calls where possible (#12791) --- changelog.d/12791.misc | 1 + synapse/events/snapshot.py | 19 +++++++++++++++---- synapse/handlers/federation.py | 9 +++++++-- synapse/handlers/federation_event.py | 8 +++++++- synapse/handlers/message.py | 14 +++++++++++--- synapse/handlers/room.py | 5 ++++- synapse/handlers/room_member.py | 9 +++++++-- synapse/push/bulk_push_rule_evaluator.py | 9 +++++++-- synapse/storage/state.py | 7 +++++-- tests/test_state.py | 2 +- 10 files changed, 65 insertions(+), 18 deletions(-) create mode 100644 changelog.d/12791.misc diff --git a/changelog.d/12791.misc b/changelog.d/12791.misc new file mode 100644 index 000000000000..b6e92b7eafad --- /dev/null +++ b/changelog.d/12791.misc @@ -0,0 +1 @@ +Update EventContext `get_current_event_ids` and `get_prev_event_ids` to accept state filters and update calls where possible. diff --git a/synapse/events/snapshot.py b/synapse/events/snapshot.py index 9ccd24b298bb..7a91544119f7 100644 --- a/synapse/events/snapshot.py +++ b/synapse/events/snapshot.py @@ -24,6 +24,7 @@ if TYPE_CHECKING: from synapse.storage import Storage from synapse.storage.databases.main import DataStore + from synapse.storage.state import StateFilter @attr.s(slots=True, auto_attribs=True) @@ -196,7 +197,9 @@ def state_group(self) -> Optional[int]: return self._state_group - async def get_current_state_ids(self) -> Optional[StateMap[str]]: + async def get_current_state_ids( + self, state_filter: Optional["StateFilter"] = None + ) -> Optional[StateMap[str]]: """ Gets the room state map, including this event - ie, the state in ``state_group`` @@ -204,6 +207,9 @@ async def get_current_state_ids(self) -> Optional[StateMap[str]]: not make it into the room state. This method will raise an exception if ``rejected`` is set. + Arg: + state_filter: specifies the type of state event to fetch from DB, example: EventTypes.JoinRules + Returns: Returns None if state_group is None, which happens when the associated event is an outlier. @@ -216,7 +222,7 @@ async def get_current_state_ids(self) -> Optional[StateMap[str]]: assert self._state_delta_due_to_event is not None - prev_state_ids = await self.get_prev_state_ids() + prev_state_ids = await self.get_prev_state_ids(state_filter) if self._state_delta_due_to_event: prev_state_ids = dict(prev_state_ids) @@ -224,12 +230,17 @@ async def get_current_state_ids(self) -> Optional[StateMap[str]]: return prev_state_ids - async def get_prev_state_ids(self) -> StateMap[str]: + async def get_prev_state_ids( + self, state_filter: Optional["StateFilter"] = None + ) -> StateMap[str]: """ Gets the room state map, excluding this event. For a non-state event, this will be the same as get_current_state_ids(). + Args: + state_filter: specifies the type of state event to fetch from DB, example: EventTypes.JoinRules + Returns: Returns {} if state_group is None, which happens when the associated event is an outlier. @@ -239,7 +250,7 @@ async def get_prev_state_ids(self) -> StateMap[str]: """ assert self.state_group_before_event is not None return await self._storage.state.get_state_ids_for_group( - self.state_group_before_event + self.state_group_before_event, state_filter ) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index be5099b507f6..0386d0a07bba 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -54,6 +54,7 @@ ReplicationStoreRoomOnOutlierMembershipRestServlet, ) from synapse.storage.databases.main.events_worker import EventRedactBehaviour +from synapse.storage.state import StateFilter from synapse.types import JsonDict, StateMap, get_domain_from_id from synapse.util.async_helpers import Linearizer from synapse.util.retryutils import NotRetryingDestination @@ -1259,7 +1260,9 @@ async def add_display_name_to_third_party_invite( event.content["third_party_invite"]["signed"]["token"], ) original_invite = None - prev_state_ids = await context.get_prev_state_ids() + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types([(EventTypes.ThirdPartyInvite, None)]) + ) original_invite_id = prev_state_ids.get(key) if original_invite_id: original_invite = await self.store.get_event( @@ -1308,7 +1311,9 @@ async def _check_signature(self, event: EventBase, context: EventContext) -> Non signed = event.content["third_party_invite"]["signed"] token = signed["token"] - prev_state_ids = await context.get_prev_state_ids() + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types([(EventTypes.ThirdPartyInvite, None)]) + ) invite_event_id = prev_state_ids.get((EventTypes.ThirdPartyInvite, token)) invite_event = None diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 761caa04b726..05c122f22491 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -30,6 +30,7 @@ from prometheus_client import Counter +from synapse import event_auth from synapse.api.constants import ( EventContentFields, EventTypes, @@ -63,6 +64,7 @@ ) from synapse.state import StateResolutionStore from synapse.storage.databases.main.events_worker import EventRedactBehaviour +from synapse.storage.state import StateFilter from synapse.types import ( PersistedEventPosition, RoomStreamToken, @@ -1500,7 +1502,11 @@ async def _check_event_auth( return context # now check auth against what we think the auth events *should* be. - prev_state_ids = await context.get_prev_state_ids() + event_types = event_auth.auth_types_for_event(event.room_version, event) + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types(event_types) + ) + auth_events_ids = self._event_auth_handler.compute_auth_events( event, prev_state_ids, for_verification=True ) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 0951b9c71f75..e566ff1f8ed8 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -634,7 +634,9 @@ async def create_event( # federation as well as those created locally. As of room v3, aliases events # can be created by users that are not in the room, therefore we have to # tolerate them in event_auth.check(). - prev_state_ids = await context.get_prev_state_ids() + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types([(EventTypes.Member, None)]) + ) prev_event_id = prev_state_ids.get((EventTypes.Member, event.sender)) prev_event = ( await self.store.get_event(prev_event_id, allow_none=True) @@ -761,7 +763,9 @@ async def deduplicate_state_event( # This can happen due to out of band memberships return None - prev_state_ids = await context.get_prev_state_ids() + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types([(event.type, None)]) + ) prev_event_id = prev_state_ids.get((event.type, event.state_key)) if not prev_event_id: return None @@ -1547,7 +1551,11 @@ async def persist_and_notify_client_event( "Redacting MSC2716 events is not supported in this room version", ) - prev_state_ids = await context.get_prev_state_ids() + event_types = event_auth.auth_types_for_event(event.room_version, event) + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types(event_types) + ) + auth_events_ids = self._event_auth_handler.compute_auth_events( event, prev_state_ids, for_verification=True ) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 794f94f6b353..92e1de050071 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -303,7 +303,10 @@ async def _upgrade_room( context=tombstone_context, ) - old_room_state = await tombstone_context.get_current_state_ids() + state_filter = StateFilter.from_types( + [(EventTypes.CanonicalAlias, ""), (EventTypes.PowerLevels, "")] + ) + old_room_state = await tombstone_context.get_current_state_ids(state_filter) # We know the tombstone event isn't an outlier so it has current state. assert old_room_state is not None diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 802e57c4d0cc..ea876c168de7 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -38,6 +38,7 @@ from synapse.events import EventBase from synapse.events.snapshot import EventContext from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN +from synapse.storage.state import StateFilter from synapse.types import ( JsonDict, Requester, @@ -362,7 +363,9 @@ async def _local_membership_update( historical=historical, ) - prev_state_ids = await context.get_prev_state_ids() + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types([(EventTypes.Member, None)]) + ) prev_member_event_id = prev_state_ids.get((EventTypes.Member, user_id), None) @@ -1160,7 +1163,9 @@ async def send_membership_event( else: requester = types.create_requester(target_user) - prev_state_ids = await context.get_prev_state_ids() + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types([(EventTypes.GuestAccess, None)]) + ) if event.membership == Membership.JOIN: if requester.is_guest: guest_can_join = await self._can_guest_join(prev_state_ids) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 4ac2c546bf2a..4cc8a2ecca7a 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -20,7 +20,7 @@ from prometheus_client import Counter from synapse.api.constants import EventTypes, Membership, RelationTypes -from synapse.event_auth import get_user_power_level +from synapse.event_auth import auth_types_for_event, get_user_power_level from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY @@ -31,6 +31,7 @@ from synapse.util.caches.lrucache import LruCache from synapse.util.metrics import measure_func +from ..storage.state import StateFilter from .push_rule_evaluator import PushRuleEvaluatorForEvent if TYPE_CHECKING: @@ -168,8 +169,12 @@ def _get_rules_for_room(self, room_id: str) -> "RulesForRoomData": async def _get_power_levels_and_sender_level( self, event: EventBase, context: EventContext ) -> Tuple[dict, int]: - prev_state_ids = await context.get_prev_state_ids() + event_types = auth_types_for_event(event.room_version, event) + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types(event_types) + ) pl_event_id = prev_state_ids.get(POWER_KEY) + if pl_event_id: # fastpath: if there's a power level event, that's all we need, and # not having a power level event is an extreme edge case diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e58301a8f087..ab630953ac93 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -634,16 +634,19 @@ async def get_state_groups_ids( return group_to_state - async def get_state_ids_for_group(self, state_group: int) -> StateMap[str]: + async def get_state_ids_for_group( + self, state_group: int, state_filter: Optional[StateFilter] = None + ) -> StateMap[str]: """Get the event IDs of all the state in the given state group Args: state_group: A state group for which we want to get the state IDs. + state_filter: specifies the type of state event to fetch from DB, example: EventTypes.JoinRules Returns: Resolves to a map of (type, state_key) -> event_id """ - group_to_state = await self.get_state_for_groups((state_group,)) + group_to_state = await self.get_state_for_groups((state_group,), state_filter) return group_to_state[state_group] diff --git a/tests/test_state.py b/tests/test_state.py index 74a8ce6096b9..c6baea3d7604 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -88,7 +88,7 @@ async def get_state_groups_ids(self, room_id, event_ids): return groups - async def get_state_ids_for_group(self, state_group): + async def get_state_ids_for_group(self, state_group, state_filter=None): return self._group_to_state[state_group] async def store_state_group( From 10280fc9437038f7ef715873e491d54b0a6d2208 Mon Sep 17 00:00:00 2001 From: David Teller Date: Fri, 20 May 2022 14:53:25 +0200 Subject: [PATCH 075/181] Uniformize spam-checker API, part 1: the `Code` enum. (#12703) --- changelog.d/12703.misc | 1 + synapse/api/errors.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 changelog.d/12703.misc diff --git a/changelog.d/12703.misc b/changelog.d/12703.misc new file mode 100644 index 000000000000..9aaa1bbaa3d0 --- /dev/null +++ b/changelog.d/12703.misc @@ -0,0 +1 @@ +Convert namespace class `Codes` into a string enum. \ No newline at end of file diff --git a/synapse/api/errors.py b/synapse/api/errors.py index cb3b7323d568..9614be6b4e46 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -17,6 +17,7 @@ import logging import typing +from enum import Enum from http import HTTPStatus from typing import Any, Dict, List, Optional, Union @@ -30,7 +31,11 @@ logger = logging.getLogger(__name__) -class Codes: +class Codes(str, Enum): + """ + All known error codes, as an enum of strings. + """ + UNRECOGNIZED = "M_UNRECOGNIZED" UNAUTHORIZED = "M_UNAUTHORIZED" FORBIDDEN = "M_FORBIDDEN" @@ -265,7 +270,9 @@ class UnrecognizedRequestError(SynapseError): """An error indicating we don't understand the request you're trying to make""" def __init__( - self, msg: str = "Unrecognized request", errcode: str = Codes.UNRECOGNIZED + self, + msg: str = "Unrecognized request", + errcode: str = Codes.UNRECOGNIZED, ): super().__init__(400, msg, errcode) From 39dee30f0120290d6ef3504815655df1a6cf47a5 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Fri, 20 May 2022 15:28:23 +0100 Subject: [PATCH 076/181] Send `USER_IP` commands on a different Redis channel, in order to reduce traffic to workers that do not process these commands. (#12809) --- changelog.d/12672.feature | 1 + changelog.d/12672.misc | 1 - changelog.d/12809.feature | 1 + synapse/replication/tcp/commands.py | 12 ++++++++++++ synapse/replication/tcp/redis.py | 6 +++--- 5 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 changelog.d/12672.feature delete mode 100644 changelog.d/12672.misc create mode 100644 changelog.d/12809.feature diff --git a/changelog.d/12672.feature b/changelog.d/12672.feature new file mode 100644 index 000000000000..b989e0d208c4 --- /dev/null +++ b/changelog.d/12672.feature @@ -0,0 +1 @@ +Send `USER_IP` commands on a different Redis channel, in order to reduce traffic to workers that do not process these commands. \ No newline at end of file diff --git a/changelog.d/12672.misc b/changelog.d/12672.misc deleted file mode 100644 index 265e0a801f78..000000000000 --- a/changelog.d/12672.misc +++ /dev/null @@ -1 +0,0 @@ -Lay some foundation work to allow workers to only subscribe to some kinds of messages, reducing replication traffic. \ No newline at end of file diff --git a/changelog.d/12809.feature b/changelog.d/12809.feature new file mode 100644 index 000000000000..b989e0d208c4 --- /dev/null +++ b/changelog.d/12809.feature @@ -0,0 +1 @@ +Send `USER_IP` commands on a different Redis channel, in order to reduce traffic to workers that do not process these commands. \ No newline at end of file diff --git a/synapse/replication/tcp/commands.py b/synapse/replication/tcp/commands.py index fe34948168ab..32f52e54d8c7 100644 --- a/synapse/replication/tcp/commands.py +++ b/synapse/replication/tcp/commands.py @@ -58,6 +58,15 @@ def get_logcontext_id(self) -> str: # by default, we just use the command name. return self.NAME + def redis_channel_name(self, prefix: str) -> str: + """ + Returns the Redis channel name upon which to publish this command. + + Args: + prefix: The prefix for the channel. + """ + return prefix + SC = TypeVar("SC", bound="_SimpleCommand") @@ -395,6 +404,9 @@ def __repr__(self) -> str: f"{self.user_agent!r}, {self.device_id!r}, {self.last_seen})" ) + def redis_channel_name(self, prefix: str) -> str: + return f"{prefix}/USER_IP" + class RemoteServerUpCommand(_SimpleCommand): """Sent when a worker has detected that a remote server is no longer diff --git a/synapse/replication/tcp/redis.py b/synapse/replication/tcp/redis.py index 73294654eff1..fd1c0ec6afa2 100644 --- a/synapse/replication/tcp/redis.py +++ b/synapse/replication/tcp/redis.py @@ -221,10 +221,10 @@ async def _async_send_command(self, cmd: Command) -> None: # remote instances. tcp_outbound_commands_counter.labels(cmd.NAME, "redis").inc() + channel_name = cmd.redis_channel_name(self.synapse_stream_prefix) + await make_deferred_yieldable( - self.synapse_outbound_redis_connection.publish( - self.synapse_stream_prefix, encoded_string - ) + self.synapse_outbound_redis_connection.publish(channel_name, encoded_string) ) From fbf904bd54071ca22c8918e0e106dd2fb008d0fb Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 10:28:56 +0100 Subject: [PATCH 077/181] Fix media thumbnails being unusable before the index had been added in the background. (#12823) --- changelog.d/12823.bugfix | 1 + synapse/storage/database.py | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 changelog.d/12823.bugfix diff --git a/changelog.d/12823.bugfix b/changelog.d/12823.bugfix new file mode 100644 index 000000000000..1a1f5957e712 --- /dev/null +++ b/changelog.d/12823.bugfix @@ -0,0 +1 @@ +Fix a bug, introduced in Synapse 1.21.0, that led to media thumbnails being unusable before the index has been added in the background. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 5ddb58a8a2ca..a78d68a9d7fe 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -90,6 +90,8 @@ "device_lists_remote_extremeties": "device_lists_remote_extremeties_unique_idx", "device_lists_remote_cache": "device_lists_remote_cache_unique_idx", "event_search": "event_search_event_id_idx", + "local_media_repository_thumbnails": "local_media_repository_thumbnails_method_idx", + "remote_media_cache_thumbnails": "remote_media_repository_thumbnails_method_idx", } From 4fef76ca348209b7c9dd3c17d5f3d8ef12623c1b Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 10:29:24 +0100 Subject: [PATCH 078/181] Remove Caddy from the Synapse workers image used in Complement. (#12818) --- changelog.d/12818.misc | 1 + docker/complement/SynapseWorkers.Dockerfile | 12 +--- .../conf-workers/caddy.complement.json | 72 ------------------- .../conf-workers/caddy.supervisord.conf | 7 -- .../start-complement-synapse-workers.sh | 18 ++++- docker/conf-workers/nginx.conf.j2 | 16 +++++ docker/configure_workers_and_start.py | 5 ++ 7 files changed, 38 insertions(+), 93 deletions(-) create mode 100644 changelog.d/12818.misc delete mode 100644 docker/complement/conf-workers/caddy.complement.json delete mode 100644 docker/complement/conf-workers/caddy.supervisord.conf diff --git a/changelog.d/12818.misc b/changelog.d/12818.misc new file mode 100644 index 000000000000..2f9dacc21dd9 --- /dev/null +++ b/changelog.d/12818.misc @@ -0,0 +1 @@ +Remove Caddy from the Synapse workers image used in Complement. \ No newline at end of file diff --git a/docker/complement/SynapseWorkers.Dockerfile b/docker/complement/SynapseWorkers.Dockerfile index 9a4438e7303b..99a09cbc2bab 100644 --- a/docker/complement/SynapseWorkers.Dockerfile +++ b/docker/complement/SynapseWorkers.Dockerfile @@ -6,12 +6,6 @@ # /~https://github.com/matrix-org/synapse/blob/develop/docker/README-testing.md#testing-with-postgresql-and-single-or-multi-process-synapse FROM matrixdotorg/synapse-workers -# Download a caddy server to stand in front of nginx and terminate TLS using Complement's -# custom CA. -# We include this near the top of the file in order to cache the result. -RUN curl -OL "/~https://github.com/caddyserver/caddy/releases/download/v2.3.0/caddy_2.3.0_linux_amd64.tar.gz" && \ - tar xzf caddy_2.3.0_linux_amd64.tar.gz && rm caddy_2.3.0_linux_amd64.tar.gz && mv caddy /root - # Install postgresql RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y postgresql-13 @@ -31,16 +25,12 @@ COPY conf-workers/workers-shared.yaml /conf/workers/shared.yaml WORKDIR /data -# Copy the caddy config -COPY conf-workers/caddy.complement.json /root/caddy.json - COPY conf-workers/postgres.supervisord.conf /etc/supervisor/conf.d/postgres.conf -COPY conf-workers/caddy.supervisord.conf /etc/supervisor/conf.d/caddy.conf # Copy the entrypoint COPY conf-workers/start-complement-synapse-workers.sh / -# Expose caddy's listener ports +# Expose nginx's listener ports EXPOSE 8008 8448 ENTRYPOINT ["/start-complement-synapse-workers.sh"] diff --git a/docker/complement/conf-workers/caddy.complement.json b/docker/complement/conf-workers/caddy.complement.json deleted file mode 100644 index 09e2136af2e2..000000000000 --- a/docker/complement/conf-workers/caddy.complement.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "apps": { - "http": { - "servers": { - "srv0": { - "listen": [ - ":8448" - ], - "routes": [ - { - "match": [ - { - "host": [ - "{{ server_name }}" - ] - } - ], - "handle": [ - { - "handler": "subroute", - "routes": [ - { - "handle": [ - { - "handler": "reverse_proxy", - "upstreams": [ - { - "dial": "localhost:8008" - } - ] - } - ] - } - ] - } - ], - "terminal": true - } - ] - } - } - }, - "tls": { - "automation": { - "policies": [ - { - "subjects": [ - "{{ server_name }}" - ], - "issuers": [ - { - "module": "internal" - } - ], - "on_demand": true - } - ] - } - }, - "pki": { - "certificate_authorities": { - "local": { - "name": "Complement CA", - "root": { - "certificate": "/complement/ca/ca.crt", - "private_key": "/complement/ca/ca.key" - } - } - } - } - } - } diff --git a/docker/complement/conf-workers/caddy.supervisord.conf b/docker/complement/conf-workers/caddy.supervisord.conf deleted file mode 100644 index d9ddb51dac46..000000000000 --- a/docker/complement/conf-workers/caddy.supervisord.conf +++ /dev/null @@ -1,7 +0,0 @@ -[program:caddy] -command=/usr/local/bin/prefix-log /root/caddy run --config /root/caddy.json -autorestart=unexpected -stdout_logfile=/dev/stdout -stdout_logfile_maxbytes=0 -stderr_logfile=/dev/stderr -stderr_logfile_maxbytes=0 diff --git a/docker/complement/conf-workers/start-complement-synapse-workers.sh b/docker/complement/conf-workers/start-complement-synapse-workers.sh index b9a6b55bbe8e..a10b57a53f5e 100755 --- a/docker/complement/conf-workers/start-complement-synapse-workers.sh +++ b/docker/complement/conf-workers/start-complement-synapse-workers.sh @@ -9,9 +9,6 @@ function log { echo "$d $@" } -# Replace the server name in the caddy config -sed -i "s/{{ server_name }}/${SERVER_NAME}/g" /root/caddy.json - # Set the server name of the homeserver export SYNAPSE_SERVER_NAME=${SERVER_NAME} @@ -39,6 +36,21 @@ export SYNAPSE_WORKER_TYPES="\ appservice, \ pusher" + +# Generate a TLS key, then generate a certificate by having Complement's CA sign it +# Note that both the key and certificate are in PEM format (not DER). +openssl genrsa -out /conf/server.tls.key 2048 + +openssl req -new -key /conf/server.tls.key -out /conf/server.tls.csr \ + -subj "/CN=${SERVER_NAME}" + +openssl x509 -req -in /conf/server.tls.csr \ + -CA /complement/ca/ca.crt -CAkey /complement/ca/ca.key -set_serial 1 \ + -out /conf/server.tls.crt + +export SYNAPSE_TLS_CERT=/conf/server.tls.crt +export SYNAPSE_TLS_KEY=/conf/server.tls.key + # Run the script that writes the necessary config files and starts supervisord, which in turn # starts everything else exec /configure_workers_and_start.py diff --git a/docker/conf-workers/nginx.conf.j2 b/docker/conf-workers/nginx.conf.j2 index 1081979e06a0..967fc65e798c 100644 --- a/docker/conf-workers/nginx.conf.j2 +++ b/docker/conf-workers/nginx.conf.j2 @@ -9,6 +9,22 @@ server { listen 8008; listen [::]:8008; + {% if tls_cert_path is not none and tls_key_path is not none %} + listen 8448 ssl; + listen [::]:8448 ssl; + + ssl_certificate {{ tls_cert_path }}; + ssl_certificate_key {{ tls_key_path }}; + + # Some directives from cipherlist.eu (fka cipherli.st): + ssl_protocols TLSv1 TLSv1.1 TLSv1.2 TLSv1.3; + ssl_prefer_server_ciphers on; + ssl_ciphers "EECDH+AESGCM:EDH+AESGCM:AES256+EECDH:AES256+EDH"; + ssl_ecdh_curve secp384r1; # Requires nginx >= 1.1.0 + ssl_session_cache shared:SSL:10m; + ssl_session_tickets off; # Requires nginx >= 1.5.9 + {% endif %} + server_name localhost; # Nginx by default only allows file uploads up to 1M in size diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index b2b7938ae801..f46b9b675e90 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -21,6 +21,9 @@ # * SYNAPSE_REPORT_STATS: Whether to report stats. # * SYNAPSE_WORKER_TYPES: A comma separated list of worker names as specified in WORKER_CONFIG # below. Leave empty for no workers, or set to '*' for all possible workers. +# * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format. +# * SYNAPSE_TLS_KEY: Path to a TLS key. If this and SYNAPSE_TLS_CERT are specified, +# Nginx will be configured to serve TLS on port 8448. # # NOTE: According to Complement's ENTRYPOINT expectations for a homeserver image (as defined # in the project's README), this script may be run multiple times, and functionality should @@ -501,6 +504,8 @@ def generate_worker_files( "/etc/nginx/conf.d/matrix-synapse.conf", worker_locations=nginx_location_config, upstream_directives=nginx_upstream_config, + tls_cert_path=os.environ.get("SYNAPSE_TLS_CERT"), + tls_key_path=os.environ.get("SYNAPSE_TLS_KEY"), ) # Supervisord config From a6ab3f56196d0067a5be25917c24988a734f0d51 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 23 May 2022 11:28:14 +0100 Subject: [PATCH 079/181] Add a windows->unix file endings commit to git blame ignore file (#12824) --- .git-blame-ignore-revs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 83ddd568c207..50d28c68eeb8 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -6,3 +6,6 @@ aff1eb7c671b0a3813407321d2702ec46c71fa56 # Update black to 20.8b1 (#9381). 0a00b7ff14890987f09112a2ae696c61001e6cf1 + +# Convert tests/rest/admin/test_room.py to unix file endings (#7953). +c4268e3da64f1abb5b31deaeb5769adb6510c0a7 \ No newline at end of file From 438925c422fec9bffe6e90633abe8875c0c5fb5c Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 12:20:30 +0100 Subject: [PATCH 080/181] Fix Complement `TestCanRegisterAdmin` with workers, by adding Complement's shared registration secret. (#12819) --- changelog.d/12819.misc | 1 + docker/complement/conf-workers/workers-shared.yaml | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 changelog.d/12819.misc diff --git a/changelog.d/12819.misc b/changelog.d/12819.misc new file mode 100644 index 000000000000..7a03102a632d --- /dev/null +++ b/changelog.d/12819.misc @@ -0,0 +1 @@ +Add Complement's shared registration secret to the Complement worker image. This fixes tests that depend on it. \ No newline at end of file diff --git a/docker/complement/conf-workers/workers-shared.yaml b/docker/complement/conf-workers/workers-shared.yaml index 86ee11ecd0e5..cd7b50c65cc3 100644 --- a/docker/complement/conf-workers/workers-shared.yaml +++ b/docker/complement/conf-workers/workers-shared.yaml @@ -5,6 +5,12 @@ enable_registration: true enable_registration_without_verification: true bcrypt_rounds: 4 +## Registration ## + +# Needed by Complement to register admin users +# DO NOT USE in a production configuration! This should be a random secret. +registration_shared_secret: complement + ## Federation ## # trust certs signed by Complement's CA From 444588c5fc5e4fd0f3796d389fe5f062acc55286 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 23 May 2022 13:23:26 +0200 Subject: [PATCH 081/181] Add some type hints to tests files (#12833) Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- changelog.d/12833.misc | 1 + mypy.ini | 8 -------- tests/http/test_servlet.py | 14 ++++++++------ tests/http/test_site.py | 2 +- tests/scripts/test_new_matrix_user.py | 13 +++++++------ tests/storage/test_base.py | 2 +- tests/storage/test_roommember.py | 2 +- 7 files changed, 19 insertions(+), 23 deletions(-) create mode 100644 changelog.d/12833.misc diff --git a/changelog.d/12833.misc b/changelog.d/12833.misc new file mode 100644 index 000000000000..fad5df1afa34 --- /dev/null +++ b/changelog.d/12833.misc @@ -0,0 +1 @@ +Add some type hints to test files. \ No newline at end of file diff --git a/mypy.ini b/mypy.ini index df2622df983a..fe3e3f9b8efd 100644 --- a/mypy.ini +++ b/mypy.ini @@ -41,16 +41,11 @@ exclude = (?x) |tests/events/test_utils.py |tests/federation/test_federation_catch_up.py |tests/federation/test_federation_sender.py - |tests/federation/test_federation_server.py |tests/federation/transport/test_knocking.py - |tests/federation/transport/test_server.py |tests/handlers/test_typing.py |tests/http/federation/test_matrix_federation_agent.py |tests/http/federation/test_srv_resolver.py - |tests/http/test_fedclient.py |tests/http/test_proxyagent.py - |tests/http/test_servlet.py - |tests/http/test_site.py |tests/logging/__init__.py |tests/logging/test_terse_json.py |tests/module_api/test_api.py @@ -59,12 +54,9 @@ exclude = (?x) |tests/push/test_push_rule_evaluator.py |tests/rest/client/test_transactions.py |tests/rest/media/v1/test_media_storage.py - |tests/scripts/test_new_matrix_user.py |tests/server.py |tests/server_notices/test_resource_limits_server_notices.py |tests/state/test_v2.py - |tests/storage/test_base.py - |tests/storage/test_roommember.py |tests/test_metrics.py |tests/test_server.py |tests/test_state.py diff --git a/tests/http/test_servlet.py b/tests/http/test_servlet.py index ad521525cfaa..b3655d7b44c2 100644 --- a/tests/http/test_servlet.py +++ b/tests/http/test_servlet.py @@ -49,19 +49,21 @@ def test_parse_json_value(self): """Basic tests for parse_json_value_from_request.""" # Test round-tripping. obj = {"foo": 1} - result = parse_json_value_from_request(make_request(obj)) - self.assertEqual(result, obj) + result1 = parse_json_value_from_request(make_request(obj)) + self.assertEqual(result1, obj) # Results don't have to be objects. - result = parse_json_value_from_request(make_request(b'["foo"]')) - self.assertEqual(result, ["foo"]) + result2 = parse_json_value_from_request(make_request(b'["foo"]')) + self.assertEqual(result2, ["foo"]) # Test empty. with self.assertRaises(SynapseError): parse_json_value_from_request(make_request(b"")) - result = parse_json_value_from_request(make_request(b""), allow_empty_body=True) - self.assertIsNone(result) + result3 = parse_json_value_from_request( + make_request(b""), allow_empty_body=True + ) + self.assertIsNone(result3) # Invalid UTF-8. with self.assertRaises(SynapseError): diff --git a/tests/http/test_site.py b/tests/http/test_site.py index 8c13b4f6931e..b2dbf76d33b1 100644 --- a/tests/http/test_site.py +++ b/tests/http/test_site.py @@ -36,7 +36,7 @@ def test_large_request(self): # as a control case, first send a regular request. # complete the connection and wire it up to a fake transport - client_address = IPv6Address("TCP", "::1", "2345") + client_address = IPv6Address("TCP", "::1", 2345) protocol = factory.buildProtocol(client_address) transport = StringTransport() protocol.makeConnection(transport) diff --git a/tests/scripts/test_new_matrix_user.py b/tests/scripts/test_new_matrix_user.py index 19a145eeb65e..22f99c6ab1ce 100644 --- a/tests/scripts/test_new_matrix_user.py +++ b/tests/scripts/test_new_matrix_user.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import List from unittest.mock import Mock, patch from synapse._scripts.register_new_matrix_user import request_registration @@ -49,8 +50,8 @@ def post(url, json=None, verify=None): requests.post = post # The fake stdout will be written here - out = [] - err_code = [] + out: List[str] = [] + err_code: List[int] = [] with patch("synapse._scripts.register_new_matrix_user.requests", requests): request_registration( @@ -85,8 +86,8 @@ def get(url, verify=None): requests.get = get # The fake stdout will be written here - out = [] - err_code = [] + out: List[str] = [] + err_code: List[int] = [] with patch("synapse._scripts.register_new_matrix_user.requests", requests): request_registration( @@ -137,8 +138,8 @@ def post(url, json=None, verify=None): requests.post = post # The fake stdout will be written here - out = [] - err_code = [] + out: List[str] = [] + err_code: List[int] = [] with patch("synapse._scripts.register_new_matrix_user.requests", requests): request_registration( diff --git a/tests/storage/test_base.py b/tests/storage/test_base.py index a8ffb52c0503..cce8e75c7475 100644 --- a/tests/storage/test_base.py +++ b/tests/storage/test_base.py @@ -60,7 +60,7 @@ def runWithConnection(func, *args, **kwargs): db = DatabasePool(Mock(), Mock(config=sqlite_config), fake_engine) db._db_pool = self.db_pool - self.datastore = SQLBaseStore(db, None, hs) + self.datastore = SQLBaseStore(db, None, hs) # type: ignore[arg-type] @defer.inlineCallbacks def test_insert_1col(self): diff --git a/tests/storage/test_roommember.py b/tests/storage/test_roommember.py index a2a9c05f24c8..1218786d79d8 100644 --- a/tests/storage/test_roommember.py +++ b/tests/storage/test_roommember.py @@ -34,7 +34,7 @@ class RoomMemberStoreTestCase(unittest.HomeserverTestCase): room.register_servlets, ] - def prepare(self, reactor: MemoryReactor, clock: Clock, hs: TestHomeServer) -> None: + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: TestHomeServer) -> None: # type: ignore[override] # We can't test the RoomMemberStore on its own without the other event # storage logic From 67aae05ece9b6e07fedc73f737c0d6db6351d6c7 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 14:11:06 +0100 Subject: [PATCH 082/181] Support registering Application Services when running with workers under Complement. (#12826) Co-authored-by: Patrick Cloke --- changelog.d/12826.misc | 1 + .../start-complement-synapse-workers.sh | 5 +++++ docker/conf-workers/shared.yaml.j2 | 11 ++++++++++- docker/configure_workers_and_start.py | 15 +++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12826.misc diff --git a/changelog.d/12826.misc b/changelog.d/12826.misc new file mode 100644 index 000000000000..f5e91f1ed592 --- /dev/null +++ b/changelog.d/12826.misc @@ -0,0 +1 @@ +Support registering Application Services when running with workers under Complement. \ No newline at end of file diff --git a/docker/complement/conf-workers/start-complement-synapse-workers.sh b/docker/complement/conf-workers/start-complement-synapse-workers.sh index a10b57a53f5e..b7e24440006f 100755 --- a/docker/complement/conf-workers/start-complement-synapse-workers.sh +++ b/docker/complement/conf-workers/start-complement-synapse-workers.sh @@ -36,6 +36,11 @@ export SYNAPSE_WORKER_TYPES="\ appservice, \ pusher" +# Add Complement's appservice registration directory, if there is one +# (It can be absent when there are no application services in this test!) +if [ -d /complement/appservice ]; then + export SYNAPSE_AS_REGISTRATION_DIR=/complement/appservice +fi # Generate a TLS key, then generate a certificate by having Complement's CA sign it # Note that both the key and certificate are in PEM format (not DER). diff --git a/docker/conf-workers/shared.yaml.j2 b/docker/conf-workers/shared.yaml.j2 index f94b8c6aca0f..644ed788f3d5 100644 --- a/docker/conf-workers/shared.yaml.j2 +++ b/docker/conf-workers/shared.yaml.j2 @@ -6,4 +6,13 @@ redis: enabled: true -{{ shared_worker_config }} \ No newline at end of file +{% if appservice_registrations is not none %} +## Application Services ## +# A list of application service config files to use. +app_service_config_files: +{%- for path in appservice_registrations %} + - "{{ path }}" +{%- endfor %} +{%- endif %} + +{{ shared_worker_config }} diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index f46b9b675e90..b6ad14117325 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -21,6 +21,8 @@ # * SYNAPSE_REPORT_STATS: Whether to report stats. # * SYNAPSE_WORKER_TYPES: A comma separated list of worker names as specified in WORKER_CONFIG # below. Leave empty for no workers, or set to '*' for all possible workers. +# * SYNAPSE_AS_REGISTRATION_DIR: If specified, a directory in which .yaml and .yml files +# will be treated as Application Service registration files. # * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format. # * SYNAPSE_TLS_KEY: Path to a TLS key. If this and SYNAPSE_TLS_CERT are specified, # Nginx will be configured to serve TLS on port 8448. @@ -32,6 +34,7 @@ import os import subprocess import sys +from pathlib import Path from typing import Any, Dict, List, Mapping, MutableMapping, NoReturn, Set import jinja2 @@ -491,11 +494,23 @@ def generate_worker_files( master_log_config = generate_worker_log_config(environ, "master", data_dir) shared_config["log_config"] = master_log_config + # Find application service registrations + appservice_registrations = None + appservice_registration_dir = os.environ.get("SYNAPSE_AS_REGISTRATION_DIR") + if appservice_registration_dir: + # Scan for all YAML files that should be application service registrations. + appservice_registrations = [ + str(reg_path.resolve()) + for reg_path in Path(appservice_registration_dir).iterdir() + if reg_path.suffix.lower() in (".yaml", ".yml") + ] + # Shared homeserver config convert( "/conf/shared.yaml.j2", "/conf/workers/shared.yaml", shared_worker_config=yaml.dump(shared_config), + appservice_registrations=appservice_registrations, ) # Nginx config From 7a68203cde312c57137735a19c274a6d8470a2bf Mon Sep 17 00:00:00 2001 From: reivilibre Date: Mon, 23 May 2022 17:27:05 +0100 Subject: [PATCH 083/181] Disable 'faster room join' Complement tests when testing against Synapse with workers. (#12842) --- changelog.d/12842.misc | 1 + scripts-dev/complement.sh | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12842.misc diff --git a/changelog.d/12842.misc b/changelog.d/12842.misc new file mode 100644 index 000000000000..cec3f97d86fd --- /dev/null +++ b/changelog.d/12842.misc @@ -0,0 +1 @@ +Disable 'faster room join' Complement tests when testing against Synapse with workers. \ No newline at end of file diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 190df6909a6a..ca476d9a5e61 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -45,6 +45,8 @@ docker build -t matrixdotorg/synapse -f "docker/Dockerfile" . extra_test_args=() +test_tags="synapse_blacklist,msc2716,msc3030" + # If we're using workers, modify the docker files slightly. if [[ -n "$WORKERS" ]]; then # Build the workers docker image (from the base Synapse image). @@ -65,6 +67,10 @@ if [[ -n "$WORKERS" ]]; then else export COMPLEMENT_BASE_IMAGE=complement-synapse COMPLEMENT_DOCKERFILE=Dockerfile + + # We only test faster room joins on monoliths, because they are purposefully + # being developed without worker support to start with. + test_tags="$test_tags,faster_joins" fi # Build the Complement image from the Synapse image we just built. @@ -73,4 +79,5 @@ docker build -t $COMPLEMENT_BASE_IMAGE -f "docker/complement/$COMPLEMENT_DOCKERF # Run the tests! echo "Images built; running complement" cd "$COMPLEMENT_DIR" -go test -v -tags synapse_blacklist,msc2716,msc3030,faster_joins -count=1 "${extra_test_args[@]}" "$@" ./tests/... + +go test -v -tags $test_tags -count=1 "${extra_test_args[@]}" "$@" ./tests/... From a608ac847b36dd72634f21502be42e785add8b65 Mon Sep 17 00:00:00 2001 From: Jess Porter Date: Mon, 23 May 2022 17:36:21 +0100 Subject: [PATCH 084/181] add SpamChecker callback for silently dropping inbound federated events (#12744) Signed-off-by: jesopo --- changelog.d/12744.feature | 1 + docs/modules/spam_checker_callbacks.md | 18 ++++++++++ synapse/events/spamcheck.py | 40 +++++++++++++++++++++ synapse/federation/federation_server.py | 48 ++++++++++++++++++++++--- synapse/module_api/__init__.py | 5 +++ 5 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 changelog.d/12744.feature diff --git a/changelog.d/12744.feature b/changelog.d/12744.feature new file mode 100644 index 000000000000..9836d94f8ca6 --- /dev/null +++ b/changelog.d/12744.feature @@ -0,0 +1 @@ +Add a `drop_federated_event` callback to `SpamChecker` to disregard inbound federated events before they take up much processing power, in an emergency. diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index 472d95718087..27c5a0ed5cfe 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -249,6 +249,24 @@ callback returns `False`, Synapse falls through to the next one. The value of th callback that does not return `False` will be used. If this happens, Synapse will not call any of the subsequent implementations of this callback. +### `should_drop_federated_event` + +_First introduced in Synapse v1.60.0_ + +```python +async def should_drop_federated_event(event: "synapse.events.EventBase") -> bool +``` + +Called when checking whether a remote server can federate an event with us. **Returning +`True` from this function will silently drop a federated event and split-brain our view +of a room's DAG, and thus you shouldn't use this callback unless you know what you are +doing.** + +If multiple modules implement this callback, they will be considered in order. If a +callback returns `False`, Synapse falls through to the next one. The value of the first +callback that does not return `False` will be used. If this happens, Synapse will not call +any of the subsequent implementations of this callback. + ## Example The example below is a module that implements the spam checker callback diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index f30207376ae2..61bcbe2abe60 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -44,6 +44,10 @@ ["synapse.events.EventBase"], Awaitable[Union[bool, str]], ] +SHOULD_DROP_FEDERATED_EVENT_CALLBACK = Callable[ + ["synapse.events.EventBase"], + Awaitable[Union[bool, str]], +] USER_MAY_JOIN_ROOM_CALLBACK = Callable[[str, str, bool], Awaitable[bool]] USER_MAY_INVITE_CALLBACK = Callable[[str, str, str], Awaitable[bool]] USER_MAY_SEND_3PID_INVITE_CALLBACK = Callable[[str, str, str, str], Awaitable[bool]] @@ -168,6 +172,9 @@ def __init__(self, hs: "synapse.server.HomeServer") -> None: self.clock = hs.get_clock() self._check_event_for_spam_callbacks: List[CHECK_EVENT_FOR_SPAM_CALLBACK] = [] + self._should_drop_federated_event_callbacks: List[ + SHOULD_DROP_FEDERATED_EVENT_CALLBACK + ] = [] self._user_may_join_room_callbacks: List[USER_MAY_JOIN_ROOM_CALLBACK] = [] self._user_may_invite_callbacks: List[USER_MAY_INVITE_CALLBACK] = [] self._user_may_send_3pid_invite_callbacks: List[ @@ -191,6 +198,9 @@ def __init__(self, hs: "synapse.server.HomeServer") -> None: def register_callbacks( self, check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None, + should_drop_federated_event: Optional[ + SHOULD_DROP_FEDERATED_EVENT_CALLBACK + ] = None, user_may_join_room: Optional[USER_MAY_JOIN_ROOM_CALLBACK] = None, user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None, user_may_send_3pid_invite: Optional[USER_MAY_SEND_3PID_INVITE_CALLBACK] = None, @@ -209,6 +219,11 @@ def register_callbacks( if check_event_for_spam is not None: self._check_event_for_spam_callbacks.append(check_event_for_spam) + if should_drop_federated_event is not None: + self._should_drop_federated_event_callbacks.append( + should_drop_federated_event + ) + if user_may_join_room is not None: self._user_may_join_room_callbacks.append(user_may_join_room) @@ -268,6 +283,31 @@ async def check_event_for_spam( return False + async def should_drop_federated_event( + self, event: "synapse.events.EventBase" + ) -> Union[bool, str]: + """Checks if a given federated event is considered "spammy" by this + server. + + If the server considers an event spammy, it will be silently dropped, + and in doing so will split-brain our view of the room's DAG. + + Args: + event: the event to be checked + + Returns: + True if the event should be silently dropped + """ + for callback in self._should_drop_federated_event_callbacks: + with Measure( + self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) + ): + res: Union[bool, str] = await delay_cancellation(callback(event)) + if res: + return res + + return False + async def user_may_join_room( self, user_id: str, room_id: str, is_invited: bool ) -> bool: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 884b5d60b4f9..b8232e5257d2 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -110,6 +110,7 @@ def __init__(self, hs: "HomeServer"): self.handler = hs.get_federation_handler() self.storage = hs.get_storage() + self._spam_checker = hs.get_spam_checker() self._federation_event_handler = hs.get_federation_event_handler() self.state = hs.get_state_handler() self._event_auth_handler = hs.get_event_auth_handler() @@ -1019,6 +1020,12 @@ async def _handle_received_pdu(self, origin: str, pdu: EventBase) -> None: except SynapseError as e: raise FederationError("ERROR", e.code, e.msg, affected=pdu.event_id) + if await self._spam_checker.should_drop_federated_event(pdu): + logger.warning( + "Unstaged federated event contains spam, dropping %s", pdu.event_id + ) + return + # Add the event to our staging area await self.store.insert_received_event_to_staging(origin, pdu) @@ -1032,6 +1039,41 @@ async def _handle_received_pdu(self, origin: str, pdu: EventBase) -> None: pdu.room_id, room_version, lock, origin, pdu ) + async def _get_next_nonspam_staged_event_for_room( + self, room_id: str, room_version: RoomVersion + ) -> Optional[Tuple[str, EventBase]]: + """Fetch the first non-spam event from staging queue. + + Args: + room_id: the room to fetch the first non-spam event in. + room_version: the version of the room. + + Returns: + The first non-spam event in that room. + """ + + while True: + # We need to do this check outside the lock to avoid a race between + # a new event being inserted by another instance and it attempting + # to acquire the lock. + next = await self.store.get_next_staged_event_for_room( + room_id, room_version + ) + + if next is None: + return None + + origin, event = next + + if await self._spam_checker.should_drop_federated_event(event): + logger.warning( + "Staged federated event contains spam, dropping %s", + event.event_id, + ) + continue + + return next + @wrap_as_background_process("_process_incoming_pdus_in_room_inner") async def _process_incoming_pdus_in_room_inner( self, @@ -1109,12 +1151,10 @@ async def _process_incoming_pdus_in_room_inner( (self._clock.time_msec() - received_ts) / 1000 ) - # We need to do this check outside the lock to avoid a race between - # a new event being inserted by another instance and it attempting - # to acquire the lock. - next = await self.store.get_next_staged_event_for_room( + next = await self._get_next_nonspam_staged_event_for_room( room_id, room_version ) + if not next: break diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 73f92d2df8d6..c4f661bb9382 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -47,6 +47,7 @@ CHECK_MEDIA_FILE_FOR_SPAM_CALLBACK, CHECK_REGISTRATION_FOR_SPAM_CALLBACK, CHECK_USERNAME_FOR_SPAM_CALLBACK, + SHOULD_DROP_FEDERATED_EVENT_CALLBACK, USER_MAY_CREATE_ROOM_ALIAS_CALLBACK, USER_MAY_CREATE_ROOM_CALLBACK, USER_MAY_INVITE_CALLBACK, @@ -234,6 +235,9 @@ def register_spam_checker_callbacks( self, *, check_event_for_spam: Optional[CHECK_EVENT_FOR_SPAM_CALLBACK] = None, + should_drop_federated_event: Optional[ + SHOULD_DROP_FEDERATED_EVENT_CALLBACK + ] = None, user_may_join_room: Optional[USER_MAY_JOIN_ROOM_CALLBACK] = None, user_may_invite: Optional[USER_MAY_INVITE_CALLBACK] = None, user_may_send_3pid_invite: Optional[USER_MAY_SEND_3PID_INVITE_CALLBACK] = None, @@ -254,6 +258,7 @@ def register_spam_checker_callbacks( """ return self._spam_checker.register_callbacks( check_event_for_spam=check_event_for_spam, + should_drop_federated_event=should_drop_federated_event, user_may_join_room=user_may_join_room, user_may_invite=user_may_invite, user_may_send_3pid_invite=user_may_send_3pid_invite, From 4cc4229cd7a55d2556c798fecbb1c9660dc821c8 Mon Sep 17 00:00:00 2001 From: Brendan Abolivier Date: Mon, 23 May 2022 19:18:23 +0200 Subject: [PATCH 085/181] Prevent expired events from being filtered out when retention is disabled (#12611) Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Co-authored-by: Patrick Cloke --- changelog.d/12611.bugfix | 1 + synapse/handlers/pagination.py | 2 +- synapse/storage/databases/main/room.py | 45 ++++++++++++++------------ synapse/types.py | 6 ++++ synapse/visibility.py | 6 ++-- tests/rest/client/test_relations.py | 8 ++--- tests/rest/client/test_retention.py | 35 +++++++++++++++++--- 7 files changed, 71 insertions(+), 32 deletions(-) create mode 100644 changelog.d/12611.bugfix diff --git a/changelog.d/12611.bugfix b/changelog.d/12611.bugfix new file mode 100644 index 000000000000..093c45a20b7f --- /dev/null +++ b/changelog.d/12611.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.7.0 that would prevent events from being sent to clients if there's a retention policy in the room when the support for retention policies is disabled. diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 6ae88add9526..19a440705027 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -239,7 +239,7 @@ async def purge_history_for_rooms_in_range( # defined in the server's configuration, we can safely assume that's the # case and use it for this room. max_lifetime = ( - retention_policy["max_lifetime"] or self._retention_default_max_lifetime + retention_policy.max_lifetime or self._retention_default_max_lifetime ) # Cap the effective max_lifetime to be within the range allowed in the diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 87e9482c6054..ded15b92ef84 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -45,7 +45,7 @@ from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.types import Cursor from synapse.storage.util.id_generators import IdGenerator -from synapse.types import JsonDict, ThirdPartyInstanceID +from synapse.types import JsonDict, RetentionPolicy, ThirdPartyInstanceID from synapse.util import json_encoder from synapse.util.caches.descriptors import cached from synapse.util.stringutils import MXC_REGEX @@ -699,7 +699,7 @@ def delete_ratelimit_txn(txn: LoggingTransaction) -> None: await self.db_pool.runInteraction("delete_ratelimit", delete_ratelimit_txn) @cached() - async def get_retention_policy_for_room(self, room_id: str) -> Dict[str, int]: + async def get_retention_policy_for_room(self, room_id: str) -> RetentionPolicy: """Get the retention policy for a given room. If no retention policy has been found for this room, returns a policy defined @@ -707,12 +707,20 @@ async def get_retention_policy_for_room(self, room_id: str) -> Dict[str, int]: the 'max_lifetime' if no default policy has been defined in the server's configuration). + If support for retention policies is disabled, a policy with a 'min_lifetime' and + 'max_lifetime' of None is returned. + Args: room_id: The ID of the room to get the retention policy of. Returns: A dict containing "min_lifetime" and "max_lifetime" for this room. """ + # If the room retention feature is disabled, return a policy with no minimum nor + # maximum. This prevents incorrectly filtering out events when sending to + # the client. + if not self.config.retention.retention_enabled: + return RetentionPolicy() def get_retention_policy_for_room_txn( txn: LoggingTransaction, @@ -736,10 +744,10 @@ def get_retention_policy_for_room_txn( # If we don't know this room ID, ret will be None, in this case return the default # policy. if not ret: - return { - "min_lifetime": self.config.retention.retention_default_min_lifetime, - "max_lifetime": self.config.retention.retention_default_max_lifetime, - } + return RetentionPolicy( + min_lifetime=self.config.retention.retention_default_min_lifetime, + max_lifetime=self.config.retention.retention_default_max_lifetime, + ) min_lifetime = ret[0]["min_lifetime"] max_lifetime = ret[0]["max_lifetime"] @@ -754,10 +762,10 @@ def get_retention_policy_for_room_txn( if max_lifetime is None: max_lifetime = self.config.retention.retention_default_max_lifetime - return { - "min_lifetime": min_lifetime, - "max_lifetime": max_lifetime, - } + return RetentionPolicy( + min_lifetime=min_lifetime, + max_lifetime=max_lifetime, + ) async def get_media_mxcs_in_room(self, room_id: str) -> Tuple[List[str], List[str]]: """Retrieves all the local and remote media MXC URIs in a given room @@ -994,7 +1002,7 @@ def _quarantine_media_txn( async def get_rooms_for_retention_period_in_range( self, min_ms: Optional[int], max_ms: Optional[int], include_null: bool = False - ) -> Dict[str, Dict[str, Optional[int]]]: + ) -> Dict[str, RetentionPolicy]: """Retrieves all of the rooms within the given retention range. Optionally includes the rooms which don't have a retention policy. @@ -1016,7 +1024,7 @@ async def get_rooms_for_retention_period_in_range( def get_rooms_for_retention_period_in_range_txn( txn: LoggingTransaction, - ) -> Dict[str, Dict[str, Optional[int]]]: + ) -> Dict[str, RetentionPolicy]: range_conditions = [] args = [] @@ -1047,10 +1055,10 @@ def get_rooms_for_retention_period_in_range_txn( rooms_dict = {} for row in rows: - rooms_dict[row["room_id"]] = { - "min_lifetime": row["min_lifetime"], - "max_lifetime": row["max_lifetime"], - } + rooms_dict[row["room_id"]] = RetentionPolicy( + min_lifetime=row["min_lifetime"], + max_lifetime=row["max_lifetime"], + ) if include_null: # If required, do a second query that retrieves all of the rooms we know @@ -1065,10 +1073,7 @@ def get_rooms_for_retention_period_in_range_txn( # policy in its state), add it with a null policy. for row in rows: if row["room_id"] not in rooms_dict: - rooms_dict[row["room_id"]] = { - "min_lifetime": None, - "max_lifetime": None, - } + rooms_dict[row["room_id"]] = RetentionPolicy() return rooms_dict diff --git a/synapse/types.py b/synapse/types.py index bd8071d51d78..6f7128ddd604 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -932,3 +932,9 @@ class UserProfile(TypedDict): user_id: str display_name: Optional[str] avatar_url: Optional[str] + + +@attr.s(auto_attribs=True, frozen=True, slots=True) +class RetentionPolicy: + min_lifetime: Optional[int] = None + max_lifetime: Optional[int] = None diff --git a/synapse/visibility.py b/synapse/visibility.py index de6d2ffc526a..da4af02796c3 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -22,7 +22,7 @@ from synapse.events.utils import prune_event from synapse.storage import Storage from synapse.storage.state import StateFilter -from synapse.types import StateMap, get_domain_from_id +from synapse.types import RetentionPolicy, StateMap, get_domain_from_id logger = logging.getLogger(__name__) @@ -94,7 +94,7 @@ async def filter_events_for_client( if filter_send_to_client: room_ids = {e.room_id for e in events} - retention_policies = {} + retention_policies: Dict[str, RetentionPolicy] = {} for room_id in room_ids: retention_policies[ @@ -137,7 +137,7 @@ def allowed(event: EventBase) -> Optional[EventBase]: # events. if not event.is_state(): retention_policy = retention_policies[event.room_id] - max_lifetime = retention_policy.get("max_lifetime") + max_lifetime = retention_policy.max_lifetime if max_lifetime is not None: oldest_allowed_ts = storage.main.clock.time_msec() - max_lifetime diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index 27dee8f6975d..bc9cc51b92d5 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -995,7 +995,7 @@ def assert_annotations(bundled_aggregations: JsonDict) -> None: bundled_aggregations, ) - self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 7) + self._test_bundled_aggregations(RelationTypes.ANNOTATION, assert_annotations, 6) def test_annotation_to_annotation(self) -> None: """Any relation to an annotation should be ignored.""" @@ -1031,7 +1031,7 @@ def assert_annotations(bundled_aggregations: JsonDict) -> None: bundled_aggregations, ) - self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 7) + self._test_bundled_aggregations(RelationTypes.REFERENCE, assert_annotations, 6) def test_thread(self) -> None: """ @@ -1060,7 +1060,7 @@ def assert_thread(bundled_aggregations: JsonDict) -> None: bundled_aggregations.get("latest_event"), ) - self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 10) + self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 9) def test_thread_with_bundled_aggregations_for_latest(self) -> None: """ @@ -1106,7 +1106,7 @@ def assert_thread(bundled_aggregations: JsonDict) -> None: bundled_aggregations["latest_event"].get("unsigned"), ) - self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 10) + self._test_bundled_aggregations(RelationTypes.THREAD, assert_thread, 9) def test_nested_thread(self) -> None: """ diff --git a/tests/rest/client/test_retention.py b/tests/rest/client/test_retention.py index 7b8fe6d02522..2cd7a9e6c5f8 100644 --- a/tests/rest/client/test_retention.py +++ b/tests/rest/client/test_retention.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from typing import Any, Dict from unittest.mock import Mock from twisted.test.proto_helpers import MemoryReactor @@ -252,16 +253,24 @@ class RetentionNoDefaultPolicyTestCase(unittest.HomeserverTestCase): room.register_servlets, ] - def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: - config = self.default_config() - config["retention"] = { + def default_config(self) -> Dict[str, Any]: + config = super().default_config() + + retention_config = { "enabled": True, } + # Update this config with what's in the default config so that + # override_config works as expected. + retention_config.update(config.get("retention", {})) + config["retention"] = retention_config + + return config + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: mock_federation_client = Mock(spec=["backfill"]) self.hs = self.setup_test_homeserver( - config=config, federation_client=mock_federation_client, ) return self.hs @@ -295,6 +304,24 @@ def test_state_policy(self) -> None: self._test_retention(room_id, expected_code_for_first_event=404) + @unittest.override_config({"retention": {"enabled": False}}) + def test_visibility_when_disabled(self) -> None: + """Retention policies should be ignored when the retention feature is disabled.""" + room_id = self.helper.create_room_as(self.user_id, tok=self.token) + + self.helper.send_state( + room_id=room_id, + event_type=EventTypes.Retention, + body={"max_lifetime": one_day_ms}, + tok=self.token, + ) + + resp = self.helper.send(room_id=room_id, body="test", tok=self.token) + + self.reactor.advance(one_day_ms * 2 / 1000) + + self.get_event(room_id, resp["event_id"]) + def _test_retention( self, room_id: str, expected_code_for_first_event: int = 200 ) -> None: From 28199e93579b5a73841a95ed4d355322227432b5 Mon Sep 17 00:00:00 2001 From: David Teller Date: Mon, 23 May 2022 19:27:39 +0200 Subject: [PATCH 086/181] Uniformize spam-checker API, part 2: check_event_for_spam (#12808) Signed-off-by: David Teller --- changelog.d/12808.feature | 1 + docs/modules/spam_checker_callbacks.md | 27 ++++++++------ docs/upgrade.md | 29 +++++++++++++++ synapse/api/errors.py | 4 +-- synapse/events/spamcheck.py | 49 ++++++++++++++++++++------ synapse/federation/federation_base.py | 5 +-- synapse/handlers/message.py | 11 +++--- synapse/module_api/__init__.py | 5 +++ synapse/module_api/errors.py | 2 ++ synapse/spam_checker_api/__init__.py | 27 +++++++++++++- 10 files changed, 129 insertions(+), 31 deletions(-) create mode 100644 changelog.d/12808.feature diff --git a/changelog.d/12808.feature b/changelog.d/12808.feature new file mode 100644 index 000000000000..561c8b9d34a4 --- /dev/null +++ b/changelog.d/12808.feature @@ -0,0 +1 @@ +Update to `check_event_for_spam`. Deprecate the current callback signature, replace it with a new signature that is both less ambiguous (replacing booleans with explicit allow/block) and more powerful (ability to return explicit error codes). \ No newline at end of file diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index 27c5a0ed5cfe..71f6f9f0ab45 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -11,22 +11,29 @@ The available spam checker callbacks are: ### `check_event_for_spam` _First introduced in Synapse v1.37.0_ +_Signature extended to support Allow and Code in Synapse v1.60.0_ +_Boolean and string return value types deprecated in Synapse v1.60.0_ ```python -async def check_event_for_spam(event: "synapse.events.EventBase") -> Union[bool, str] +async def check_event_for_spam(event: "synapse.module_api.EventBase") -> Union["synapse.module_api.ALLOW", "synapse.module_api.error.Codes", str, bool] ``` -Called when receiving an event from a client or via federation. The callback must return -either: -- an error message string, to indicate the event must be rejected because of spam and - give a rejection reason to forward to clients; -- the boolean `True`, to indicate that the event is spammy, but not provide further details; or -- the booelan `False`, to indicate that the event is not considered spammy. +Called when receiving an event from a client or via federation. The callback must return either: + - `synapse.module_api.ALLOW`, to allow the operation. Other callbacks + may still decide to reject it. + - `synapse.api.Codes` to reject the operation with an error code. In case + of doubt, `synapse.api.error.Codes.FORBIDDEN` is a good error code. + - (deprecated) a `str` to reject the operation and specify an error message. Note that clients + typically will not localize the error message to the user's preferred locale. + - (deprecated) on `False`, behave as `ALLOW`. Deprecated as confusing, as some + callbacks in expect `True` to allow and others `True` to reject. + - (deprecated) on `True`, behave as `synapse.api.error.Codes.FORBIDDEN`. Deprecated as confusing, as + some callbacks in expect `True` to allow and others `True` to reject. If multiple modules implement this callback, they will be considered in order. If a -callback returns `False`, Synapse falls through to the next one. The value of the first -callback that does not return `False` will be used. If this happens, Synapse will not call -any of the subsequent implementations of this callback. +callback returns `synapse.module_api.ALLOW`, Synapse falls through to the next one. The value of the +first callback that does not return `synapse.module_api.ALLOW` will be used. If this happens, Synapse +will not call any of the subsequent implementations of this callback. ### `user_may_join_room` diff --git a/docs/upgrade.md b/docs/upgrade.md index 92ca31b2f8de..e7eadadb64bf 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -177,7 +177,36 @@ has queries that can be used to check a database for this problem in advance. +## SpamChecker API's `check_event_for_spam` has a new signature. +The previous signature has been deprecated. + +Whereas `check_event_for_spam` callbacks used to return `Union[str, bool]`, they should now return `Union["synapse.module_api.Allow", "synapse.module_api.errors.Codes"]`. + +This is part of an ongoing refactoring of the SpamChecker API to make it less ambiguous and more powerful. + +If your module implements `check_event_for_spam` as follows: + +```python +async def check_event_for_spam(event): + if ...: + # Event is spam + return True + # Event is not spam + return False +``` + +you should rewrite it as follows: + +```python +async def check_event_for_spam(event): + if ...: + # Event is spam, mark it as forbidden (you may use some more precise error + # code if it is useful). + return synapse.module_api.errors.Codes.FORBIDDEN + # Event is not spam, mark it as `ALLOW`. + return synapse.module_api.ALLOW +``` # Upgrading to v1.59.0 diff --git a/synapse/api/errors.py b/synapse/api/errors.py index 9614be6b4e46..6650e826d5af 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -270,9 +270,7 @@ class UnrecognizedRequestError(SynapseError): """An error indicating we don't understand the request you're trying to make""" def __init__( - self, - msg: str = "Unrecognized request", - errcode: str = Codes.UNRECOGNIZED, + self, msg: str = "Unrecognized request", errcode: str = Codes.UNRECOGNIZED ): super().__init__(400, msg, errcode) diff --git a/synapse/events/spamcheck.py b/synapse/events/spamcheck.py index 61bcbe2abe60..7984874e21df 100644 --- a/synapse/events/spamcheck.py +++ b/synapse/events/spamcheck.py @@ -27,9 +27,10 @@ Union, ) +from synapse.api.errors import Codes from synapse.rest.media.v1._base import FileInfo from synapse.rest.media.v1.media_storage import ReadableFileWrapper -from synapse.spam_checker_api import RegistrationBehaviour +from synapse.spam_checker_api import Allow, Decision, RegistrationBehaviour from synapse.types import RoomAlias, UserProfile from synapse.util.async_helpers import delay_cancellation, maybe_awaitable from synapse.util.metrics import Measure @@ -40,9 +41,19 @@ logger = logging.getLogger(__name__) + CHECK_EVENT_FOR_SPAM_CALLBACK = Callable[ ["synapse.events.EventBase"], - Awaitable[Union[bool, str]], + Awaitable[ + Union[ + Allow, + Codes, + # Deprecated + bool, + # Deprecated + str, + ] + ], ] SHOULD_DROP_FEDERATED_EVENT_CALLBACK = Callable[ ["synapse.events.EventBase"], @@ -259,7 +270,7 @@ def register_callbacks( async def check_event_for_spam( self, event: "synapse.events.EventBase" - ) -> Union[bool, str]: + ) -> Union[Decision, str]: """Checks if a given event is considered "spammy" by this server. If the server considers an event spammy, then it will be rejected if @@ -270,18 +281,36 @@ async def check_event_for_spam( event: the event to be checked Returns: - True or a string if the event is spammy. If a string is returned it - will be used as the error message returned to the user. + - on `ALLOW`, the event is considered good (non-spammy) and should + be let through. Other spamcheck filters may still reject it. + - on `Code`, the event is considered spammy and is rejected with a specific + error message/code. + - on `str`, the event is considered spammy and the string is used as error + message. This usage is generally discouraged as it doesn't support + internationalization. """ for callback in self._check_event_for_spam_callbacks: with Measure( self.clock, "{}.{}".format(callback.__module__, callback.__qualname__) ): - res: Union[bool, str] = await delay_cancellation(callback(event)) - if res: - return res - - return False + res: Union[Decision, str, bool] = await delay_cancellation( + callback(event) + ) + if res is False or res is Allow.ALLOW: + # This spam-checker accepts the event. + # Other spam-checkers may reject it, though. + continue + elif res is True: + # This spam-checker rejects the event with deprecated + # return value `True` + return Codes.FORBIDDEN + else: + # This spam-checker rejects the event either with a `str` + # or with a `Codes`. In either case, we stop here. + return res + + # No spam-checker has rejected the event, let it pass. + return Allow.ALLOW async def should_drop_federated_event( self, event: "synapse.events.EventBase" diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 41ac49fdc8bf..1e866b19d87b 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -15,6 +15,7 @@ import logging from typing import TYPE_CHECKING +import synapse from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import EventFormatVersions, RoomVersion @@ -98,9 +99,9 @@ async def _check_sigs_and_hash( ) return redacted_event - result = await self.spam_checker.check_event_for_spam(pdu) + spam_check = await self.spam_checker.check_event_for_spam(pdu) - if result: + if spam_check is not synapse.spam_checker_api.Allow.ALLOW: logger.warning("Event contains spam, soft-failing %s", pdu.event_id) # we redact (to save disk space) as well as soft-failing (to stop # using the event in prev_events). diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e566ff1f8ed8..cb1bc4c06f1c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -23,6 +23,7 @@ from twisted.internet.interfaces import IDelayedCall +import synapse from synapse import event_auth from synapse.api.constants import ( EventContentFields, @@ -885,11 +886,11 @@ async def create_and_send_nonmember_event( event.sender, ) - spam_error = await self.spam_checker.check_event_for_spam(event) - if spam_error: - if not isinstance(spam_error, str): - spam_error = "Spam is not permitted here" - raise SynapseError(403, spam_error, Codes.FORBIDDEN) + spam_check = await self.spam_checker.check_event_for_spam(event) + if spam_check is not synapse.spam_checker_api.Allow.ALLOW: + raise SynapseError( + 403, "This message had been rejected as probable spam", spam_check + ) ev = await self.handle_new_client_event( requester=requester, diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index c4f661bb9382..95f3b2792793 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -35,6 +35,7 @@ from twisted.internet import defer from twisted.web.resource import Resource +from synapse import spam_checker_api from synapse.api.errors import SynapseError from synapse.events import EventBase from synapse.events.presence_router import ( @@ -140,6 +141,9 @@ PRESENCE_ALL_USERS = PresenceRouter.ALL_USERS +ALLOW = spam_checker_api.Allow.ALLOW +# Singleton value used to mark a message as permitted. + __all__ = [ "errors", "make_deferred_yieldable", @@ -147,6 +151,7 @@ "respond_with_html", "run_in_background", "cached", + "Allow", "UserID", "DatabasePool", "LoggingTransaction", diff --git a/synapse/module_api/errors.py b/synapse/module_api/errors.py index e58e0e60feab..bedd045d6fe1 100644 --- a/synapse/module_api/errors.py +++ b/synapse/module_api/errors.py @@ -15,6 +15,7 @@ """Exception types which are exposed as part of the stable module API""" from synapse.api.errors import ( + Codes, InvalidClientCredentialsError, RedirectException, SynapseError, @@ -24,6 +25,7 @@ from synapse.storage.push_rule import RuleNotFoundException __all__ = [ + "Codes", "InvalidClientCredentialsError", "RedirectException", "SynapseError", diff --git a/synapse/spam_checker_api/__init__.py b/synapse/spam_checker_api/__init__.py index 73018f2d002e..95132c80b70e 100644 --- a/synapse/spam_checker_api/__init__.py +++ b/synapse/spam_checker_api/__init__.py @@ -12,13 +12,38 @@ # See the License for the specific language governing permissions and # limitations under the License. from enum import Enum +from typing import Union + +from synapse.api.errors import Codes class RegistrationBehaviour(Enum): """ - Enum to define whether a registration request should allowed, denied, or shadow-banned. + Enum to define whether a registration request should be allowed, denied, or shadow-banned. """ ALLOW = "allow" SHADOW_BAN = "shadow_ban" DENY = "deny" + + +# We define the following singleton enum rather than a string to be able to +# write `Union[Allow, ..., str]` in some of the callbacks for the spam-checker +# API, where the `str` is required to maintain backwards compatibility with +# previous versions of the API. +class Allow(Enum): + """ + Singleton to allow events to pass through in SpamChecker APIs. + """ + + ALLOW = "allow" + + +Decision = Union[Allow, Codes] +""" +Union to define whether a request should be allowed or rejected. + +To accept a request, return `ALLOW`. + +To reject a request without any specific information, use `Codes.FORBIDDEN`. +""" From 7c2a78bb3bd2091439722e9f1e4601837bcb8fc4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 23 May 2022 20:43:37 -0500 Subject: [PATCH 087/181] Marker events as state - MSC2716 (#12718) Sending marker events as state now so they are always able to be seen by homeservers (not lost in some timeline gap). Part of [MSC2716](/~https://github.com/matrix-org/matrix-spec-proposals/pull/2716) Complement tests: /~https://github.com/matrix-org/complement/pull/371 As initially discussed at /~https://github.com/matrix-org/matrix-spec-proposals/pull/2716#discussion_r782629097 and /~https://github.com/matrix-org/matrix-spec-proposals/pull/2716#discussion_r876684431 When someone joins a room, process all of the marker events we see in the current state. Marker events should be sent with a unique `state_key` so that they can all resolve in the current state to easily be discovered. Marker events as state - If we re-use the same `state_key` (like `""`), then we would have to fetch previous snapshots of state up through time to find all of the marker events. This way we can avoid all of that. This PR was originally doing this but then thought of the smarter way to tackle in an [out of band discussion with @erikjohnston](https://docs.google.com/document/d/1JJDuPfcPNX75fprdTWlxlaKjWOdbdJylbpZ03hzo638/edit#bookmark=id.sm92fqyq7vpp). - Also avoids state resolution conflicts where only one of the marker events win As a homeserver, when we see new marker state, we know there is new history imported somewhere back in time and should process it to fetch the insertion event where the historical messages are and set it as an insertion extremity. This way we know where to backfill more messages when someone asks for scrollback. --- changelog.d/12718.feature | 1 + synapse/handlers/federation_event.py | 26 +++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12718.feature diff --git a/changelog.d/12718.feature b/changelog.d/12718.feature new file mode 100644 index 000000000000..1056f519a4c1 --- /dev/null +++ b/changelog.d/12718.feature @@ -0,0 +1 @@ +Update [MSC2716](/~https://github.com/matrix-org/matrix-spec-proposals/pull/2716) implementation to process marker events from the current state to avoid markers being lost in timeline gaps for federated servers which would cause the imported history to be undiscovered. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 05c122f22491..ca82df8a6d9e 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -477,7 +477,23 @@ async def process_remote_join( # and discover that we do not have it. event.internal_metadata.proactively_send = False - return await self.persist_events_and_notify(room_id, [(event, context)]) + stream_id_after_persist = await self.persist_events_and_notify( + room_id, [(event, context)] + ) + + # If we're joining the room again, check if there is new marker + # state indicating that there is new history imported somewhere in + # the DAG. Multiple markers can exist in the current state with + # unique state_keys. + # + # Do this after the state from the remote join was persisted (via + # `persist_events_and_notify`). Otherwise we can run into a + # situation where the create event doesn't exist yet in the + # `current_state_events` + for e in state: + await self._handle_marker_event(origin, e) + + return stream_id_after_persist async def update_state_for_partial_state_event( self, destination: str, event: EventBase @@ -1230,6 +1246,14 @@ async def _handle_marker_event(self, origin: str, marker_event: EventBase) -> No # Nothing to retrieve then (invalid marker) return + already_seen_insertion_event = await self._store.have_seen_event( + marker_event.room_id, insertion_event_id + ) + if already_seen_insertion_event: + # No need to process a marker again if we have already seen the + # insertion event that it was pointing to + return + logger.debug( "_handle_marker_event: backfilling insertion event %s", insertion_event_id ) From f5b1c09909e81182bacc167e70188a7c43aad813 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 24 May 2022 11:35:08 +0100 Subject: [PATCH 088/181] Pin poetry.core in Docker images (#12853) --- changelog.d/12853.docker | 1 + docker/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12853.docker diff --git a/changelog.d/12853.docker b/changelog.d/12853.docker new file mode 100644 index 000000000000..cad10a79cc82 --- /dev/null +++ b/changelog.d/12853.docker @@ -0,0 +1 @@ +Fix the docker file after a dependency update. diff --git a/docker/Dockerfile b/docker/Dockerfile index ccc6a9f77849..7af0e51f97d2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -55,7 +55,7 @@ RUN \ # NB: In poetry 1.2 `poetry export` will be moved into a plugin; we'll need to also # pip install poetry-plugin-export (/~https://github.com/python-poetry/poetry-plugin-export). RUN --mount=type=cache,target=/root/.cache/pip \ - pip install --user git+/~https://github.com/python-poetry/poetry.git@fb13b3a676f476177f7937ffa480ee5cff9a90a5 + pip install --user "poetry-core==1.1.0a7" "git+/~https://github.com/python-poetry/poetry.git@fb13b3a676f476177f7937ffa480ee5cff9a90a5" WORKDIR /synapse From 0b3423fd51608a8ff9d61d61f4975a1cd877d679 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 24 May 2022 11:48:11 +0100 Subject: [PATCH 089/181] contributing_guide.md: fix link to DCO --- docs/development/contributing_guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md index f55a1fbb9002..2b3714df66f9 100644 --- a/docs/development/contributing_guide.md +++ b/docs/development/contributing_guide.md @@ -422,8 +422,8 @@ same lightweight approach that the Linux Kernel [submitting patches process]( https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin>), [Docker](/~https://github.com/docker/docker/blob/master/CONTRIBUTING.md), and many other -projects use: the DCO (Developer Certificate of Origin: -http://developercertificate.org/). This is a simple declaration that you wrote +projects use: the DCO ([Developer Certificate of Origin](http://developercertificate.org/)). +This is a simple declaration that you wrote the contribution or otherwise have the right to contribute it to Matrix: ``` From a670b5cda24cdabfd4cb5732bda9ed1ccd86a1d1 Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Tue, 24 May 2022 12:05:33 +0100 Subject: [PATCH 090/181] 1.60.0rc1 --- CHANGES.md | 112 ++++++++++++++++++++++++++++++++++++++ changelog.d/10533.misc | 1 - changelog.d/12477.misc | 1 - changelog.d/12498.misc | 1 - changelog.d/12513.feature | 1 - changelog.d/12567.misc | 1 - changelog.d/12586.misc | 1 - changelog.d/12588.misc | 1 - changelog.d/12611.bugfix | 1 - changelog.d/12618.feature | 1 - changelog.d/12623.feature | 1 - changelog.d/12630.misc | 1 - changelog.d/12672.feature | 1 - changelog.d/12673.feature | 1 - changelog.d/12676.misc | 1 - changelog.d/12677.misc | 1 - changelog.d/12679.misc | 1 - changelog.d/12680.misc | 1 - changelog.d/12683.bugfix | 1 - changelog.d/12687.bugfix | 1 - changelog.d/12689.misc | 1 - changelog.d/12691.misc | 1 - changelog.d/12693.misc | 1 - changelog.d/12694.misc | 1 - changelog.d/12695.misc | 1 - changelog.d/12696.bugfix | 1 - changelog.d/12698.misc | 1 - changelog.d/12699.misc | 1 - changelog.d/12700.misc | 1 - changelog.d/12701.feature | 1 - changelog.d/12703.misc | 1 - changelog.d/12705.misc | 1 - changelog.d/12708.misc | 1 - changelog.d/12709.removal | 1 - changelog.d/12711.misc | 1 - changelog.d/12713.bugfix | 1 - changelog.d/12715.doc | 1 - changelog.d/12716.misc | 1 - changelog.d/12717.misc | 1 - changelog.d/12718.feature | 1 - changelog.d/12720.misc | 1 - changelog.d/12721.bugfix | 1 - changelog.d/12723.misc | 1 - changelog.d/12726.misc | 1 - changelog.d/12727.doc | 1 - changelog.d/12731.misc | 1 - changelog.d/12734.misc | 1 - changelog.d/12742.doc | 1 - changelog.d/12744.feature | 1 - changelog.d/12747.bugfix | 1 - changelog.d/12748.doc | 1 - changelog.d/12749.doc | 1 - changelog.d/12753.misc | 1 - changelog.d/12759.doc | 1 - changelog.d/12761.doc | 1 - changelog.d/12762.misc | 1 - changelog.d/12765.doc | 1 - changelog.d/12769.misc | 1 - changelog.d/12770.bugfix | 1 - changelog.d/12772.misc | 1 - changelog.d/12773.doc | 1 - changelog.d/12774.misc | 1 - changelog.d/12775.misc | 1 - changelog.d/12776.doc | 2 - changelog.d/12777.doc | 2 - changelog.d/12779.bugfix | 1 - changelog.d/12781.misc | 1 - changelog.d/12783.misc | 1 - changelog.d/12785.doc | 1 - changelog.d/12786.feature | 1 - changelog.d/12789.misc | 1 - changelog.d/12790.misc | 1 - changelog.d/12791.misc | 1 - changelog.d/12792.feature | 1 - changelog.d/12794.bugfix | 1 - changelog.d/12803.bugfix | 1 - changelog.d/12808.feature | 1 - changelog.d/12809.feature | 1 - changelog.d/12818.misc | 1 - changelog.d/12819.misc | 1 - changelog.d/12823.bugfix | 1 - changelog.d/12826.misc | 1 - changelog.d/12833.misc | 1 - changelog.d/12842.misc | 1 - changelog.d/12853.docker | 1 - debian/changelog | 6 ++ pyproject.toml | 2 +- 87 files changed, 119 insertions(+), 87 deletions(-) delete mode 100644 changelog.d/10533.misc delete mode 100644 changelog.d/12477.misc delete mode 100644 changelog.d/12498.misc delete mode 100644 changelog.d/12513.feature delete mode 100644 changelog.d/12567.misc delete mode 100644 changelog.d/12586.misc delete mode 100644 changelog.d/12588.misc delete mode 100644 changelog.d/12611.bugfix delete mode 100644 changelog.d/12618.feature delete mode 100644 changelog.d/12623.feature delete mode 100644 changelog.d/12630.misc delete mode 100644 changelog.d/12672.feature delete mode 100644 changelog.d/12673.feature delete mode 100644 changelog.d/12676.misc delete mode 100644 changelog.d/12677.misc delete mode 100644 changelog.d/12679.misc delete mode 100644 changelog.d/12680.misc delete mode 100644 changelog.d/12683.bugfix delete mode 100644 changelog.d/12687.bugfix delete mode 100644 changelog.d/12689.misc delete mode 100644 changelog.d/12691.misc delete mode 100644 changelog.d/12693.misc delete mode 100644 changelog.d/12694.misc delete mode 100644 changelog.d/12695.misc delete mode 100644 changelog.d/12696.bugfix delete mode 100644 changelog.d/12698.misc delete mode 100644 changelog.d/12699.misc delete mode 100644 changelog.d/12700.misc delete mode 100644 changelog.d/12701.feature delete mode 100644 changelog.d/12703.misc delete mode 100644 changelog.d/12705.misc delete mode 100644 changelog.d/12708.misc delete mode 100644 changelog.d/12709.removal delete mode 100644 changelog.d/12711.misc delete mode 100644 changelog.d/12713.bugfix delete mode 100644 changelog.d/12715.doc delete mode 100644 changelog.d/12716.misc delete mode 100644 changelog.d/12717.misc delete mode 100644 changelog.d/12718.feature delete mode 100644 changelog.d/12720.misc delete mode 100644 changelog.d/12721.bugfix delete mode 100644 changelog.d/12723.misc delete mode 100644 changelog.d/12726.misc delete mode 100644 changelog.d/12727.doc delete mode 100644 changelog.d/12731.misc delete mode 100644 changelog.d/12734.misc delete mode 100644 changelog.d/12742.doc delete mode 100644 changelog.d/12744.feature delete mode 100644 changelog.d/12747.bugfix delete mode 100644 changelog.d/12748.doc delete mode 100644 changelog.d/12749.doc delete mode 100644 changelog.d/12753.misc delete mode 100644 changelog.d/12759.doc delete mode 100644 changelog.d/12761.doc delete mode 100644 changelog.d/12762.misc delete mode 100644 changelog.d/12765.doc delete mode 100644 changelog.d/12769.misc delete mode 100644 changelog.d/12770.bugfix delete mode 100644 changelog.d/12772.misc delete mode 100644 changelog.d/12773.doc delete mode 100644 changelog.d/12774.misc delete mode 100644 changelog.d/12775.misc delete mode 100644 changelog.d/12776.doc delete mode 100644 changelog.d/12777.doc delete mode 100644 changelog.d/12779.bugfix delete mode 100644 changelog.d/12781.misc delete mode 100644 changelog.d/12783.misc delete mode 100644 changelog.d/12785.doc delete mode 100644 changelog.d/12786.feature delete mode 100644 changelog.d/12789.misc delete mode 100644 changelog.d/12790.misc delete mode 100644 changelog.d/12791.misc delete mode 100644 changelog.d/12792.feature delete mode 100644 changelog.d/12794.bugfix delete mode 100644 changelog.d/12803.bugfix delete mode 100644 changelog.d/12808.feature delete mode 100644 changelog.d/12809.feature delete mode 100644 changelog.d/12818.misc delete mode 100644 changelog.d/12819.misc delete mode 100644 changelog.d/12823.bugfix delete mode 100644 changelog.d/12826.misc delete mode 100644 changelog.d/12833.misc delete mode 100644 changelog.d/12842.misc delete mode 100644 changelog.d/12853.docker diff --git a/CHANGES.md b/CHANGES.md index e10ac0314abf..f6ca5c472144 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,115 @@ +Synapse 1.60.0rc1 (2022-05-24) +============================== + +Features +-------- + +- Measure the time taken in spam-checking callbacks and expose those measurements as metrics. ([\#12513](/~https://github.com/matrix-org/synapse/issues/12513)) +- Add a `default_power_level_content_override` config option to set default room power levels per room preset. ([\#12618](/~https://github.com/matrix-org/synapse/issues/12618)) +- Add support for [MSC3787: Allowing knocks to restricted rooms](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3787). ([\#12623](/~https://github.com/matrix-org/synapse/issues/12623)) +- Send `USER_IP` commands on a different Redis channel, in order to reduce traffic to workers that do not process these commands. ([\#12672](/~https://github.com/matrix-org/synapse/issues/12672), [\#12809](/~https://github.com/matrix-org/synapse/issues/12809)) +- Synapse will now reload [cache config](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#caching) when it receives a [SIGHUP](https://en.wikipedia.org/wiki/SIGHUP) signal. ([\#12673](/~https://github.com/matrix-org/synapse/issues/12673)) +- Add a config options to allow for auto-tuning of caches. ([\#12701](/~https://github.com/matrix-org/synapse/issues/12701)) +- Update [MSC2716](/~https://github.com/matrix-org/matrix-spec-proposals/pull/2716) implementation to process marker events from the current state to avoid markers being lost in timeline gaps for federated servers which would cause the imported history to be undiscovered. ([\#12718](/~https://github.com/matrix-org/synapse/issues/12718)) +- Add a `drop_federated_event` callback to `SpamChecker` to disregard inbound federated events before they take up much processing power, in an emergency. ([\#12744](/~https://github.com/matrix-org/synapse/issues/12744)) +- Implement [MSC3818: Copy room type on upgrade](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3818). ([\#12786](/~https://github.com/matrix-org/synapse/issues/12786), [\#12792](/~https://github.com/matrix-org/synapse/issues/12792)) +- Update to `check_event_for_spam`. Deprecate the current callback signature, replace it with a new signature that is both less ambiguous (replacing booleans with explicit allow/block) and more powerful (ability to return explicit error codes). ([\#12808](/~https://github.com/matrix-org/synapse/issues/12808)) + + +Bugfixes +-------- + +- Fix a bug introduced in Synapse 1.7.0 that would prevent events from being sent to clients if there's a retention policy in the room when the support for retention policies is disabled. ([\#12611](/~https://github.com/matrix-org/synapse/issues/12611)) +- Fix a bug introduced in Synapse 1.57.0 where `/messages` would throw a 500 error when querying for a non-existent room. ([\#12683](/~https://github.com/matrix-org/synapse/issues/12683)) +- Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance. ([\#12687](/~https://github.com/matrix-org/synapse/issues/12687)) +- Fix a long-standing bug where an empty room would be created when a user with an insufficient power level tried to upgrade a room. ([\#12696](/~https://github.com/matrix-org/synapse/issues/12696)) +- Fix a bug introduced in Synapse 1.30.0 where empty rooms could be automatically created if a monthly active users limit is set. ([\#12713](/~https://github.com/matrix-org/synapse/issues/12713)) +- Fix push to dismiss notifications when read on another client. Contributed by @SpiritCroc @ Beeper. ([\#12721](/~https://github.com/matrix-org/synapse/issues/12721)) +- Fix poor database performance when reading the cache invalidation stream for large servers with lots of workers. ([\#12747](/~https://github.com/matrix-org/synapse/issues/12747)) +- Delete events from the `federation_inbound_events_staging` table when a room is purged through the admin API. ([\#12770](/~https://github.com/matrix-org/synapse/issues/12770)) +- Give a meaningful error message when a client tries to create a room with an invalid alias localpart. ([\#12779](/~https://github.com/matrix-org/synapse/issues/12779)) +- Fix a bug introduced in 1.43.0 where a file (`providers.json`) was never closed. Contributed by @arkamar. ([\#12794](/~https://github.com/matrix-org/synapse/issues/12794)) +- Fix a long-standing bug where finished log contexts would be re-started when failing to contact remote homeservers. ([\#12803](/~https://github.com/matrix-org/synapse/issues/12803)) +- Fix a bug, introduced in Synapse 1.21.0, that led to media thumbnails being unusable before the index has been added in the background. ([\#12823](/~https://github.com/matrix-org/synapse/issues/12823)) + + +Updates to the Docker image +--------------------------- + +- Fix the docker file after a dependency update. ([\#12853](/~https://github.com/matrix-org/synapse/issues/12853)) + + +Improved Documentation +---------------------- + +- Fix a typo in the Media Admin API documentation. ([\#12715](/~https://github.com/matrix-org/synapse/issues/12715)) +- Update the OpenID Connect example for Keycloak to be compatible with newer versions of Keycloak. Contributed by @nhh. ([\#12727](/~https://github.com/matrix-org/synapse/issues/12727)) +- Fix typo in server listener documentation. ([\#12742](/~https://github.com/matrix-org/synapse/issues/12742)) +- Link to the configuration manual from the welcome page of the documentation. ([\#12748](/~https://github.com/matrix-org/synapse/issues/12748)) +- Fix typo in 'run_background_tasks_on' option name in configuration manual documentation. ([\#12749](/~https://github.com/matrix-org/synapse/issues/12749)) +- Add information regarding the `rc_invites` ratelimiting option to the configuration docs. ([\#12759](/~https://github.com/matrix-org/synapse/issues/12759)) +- Add documentation for cancellation of request processing. ([\#12761](/~https://github.com/matrix-org/synapse/issues/12761)) +- Recommend using docker to run tests against postgres. ([\#12765](/~https://github.com/matrix-org/synapse/issues/12765)) +- Add missing user directory endpoint from the generic worker documentation. Contributed by @olmari. ([\#12773](/~https://github.com/matrix-org/synapse/issues/12773)) +- Add additional info to documentation of config option `cache_autotuning`. ([\#12776](/~https://github.com/matrix-org/synapse/issues/12776)) +- Update configuration manual documentation to document size-related suffixes. ([\#12777](/~https://github.com/matrix-org/synapse/issues/12777)) +- Fix invalid YAML syntax in the example documentation for the `url_preview_accept_language` config option. ([\#12785](/~https://github.com/matrix-org/synapse/issues/12785)) + + +Deprecations and Removals +------------------------- + +- Require a body in POST requests to `/rooms/{roomId}/receipt/{receiptType}/{eventId}`, as required by the [Matrix specification](https://spec.matrix.org/v1.2/client-server-api/#post_matrixclientv3roomsroomidreceiptreceipttypeeventid). This breaks compatibility with Element Android 1.2.0 and earlier: users of those clients will be unable to send read receipts. ([\#12709](/~https://github.com/matrix-org/synapse/issues/12709)) + + +Internal Changes +---------------- + +- Improve event caching mechanism to avoid having multiple copies of an event in memory at a time. ([\#10533](/~https://github.com/matrix-org/synapse/issues/10533)) +- Add some type hints to datastore. ([\#12477](/~https://github.com/matrix-org/synapse/issues/12477), [\#12717](/~https://github.com/matrix-org/synapse/issues/12717), [\#12753](/~https://github.com/matrix-org/synapse/issues/12753)) +- Preparation for faster-room-join work: return subsets of room state which we already have, immediately. ([\#12498](/~https://github.com/matrix-org/synapse/issues/12498)) +- Replace string literal instances of stream key types with typed constants. ([\#12567](/~https://github.com/matrix-org/synapse/issues/12567)) +- Add `@cancellable` decorator, for use on endpoint methods that can be cancelled when clients disconnect. ([\#12586](/~https://github.com/matrix-org/synapse/issues/12586)) +- Add ability to cancel disconnected requests to `SynapseRequest`. ([\#12588](/~https://github.com/matrix-org/synapse/issues/12588)) +- Add a helper class for testing request cancellation. ([\#12630](/~https://github.com/matrix-org/synapse/issues/12630)) +- Improve documentation of the `synapse.push` module. ([\#12676](/~https://github.com/matrix-org/synapse/issues/12676)) +- Refactor functions to on `PushRuleEvaluatorForEvent`. ([\#12677](/~https://github.com/matrix-org/synapse/issues/12677)) +- Preparation for database schema simplifications: stop writing to `event_reference_hashes`. ([\#12679](/~https://github.com/matrix-org/synapse/issues/12679)) +- Remove code which updates unused database column `application_services_state.last_txn`. ([\#12680](/~https://github.com/matrix-org/synapse/issues/12680)) +- Refactor `EventContext` class. ([\#12689](/~https://github.com/matrix-org/synapse/issues/12689)) +- Remove an unneeded class in the push code. ([\#12691](/~https://github.com/matrix-org/synapse/issues/12691)) +- Consolidate parsing of relation information from events. ([\#12693](/~https://github.com/matrix-org/synapse/issues/12693)) +- Capture the `Deferred` for request cancellation in `_AsyncResource`. ([\#12694](/~https://github.com/matrix-org/synapse/issues/12694)) +- Fixes an incorrect type hint for `Filter._check_event_relations`. ([\#12695](/~https://github.com/matrix-org/synapse/issues/12695)) +- Respect the `@cancellable` flag for `DirectServe{Html,Json}Resource`s. ([\#12698](/~https://github.com/matrix-org/synapse/issues/12698)) +- Respect the `@cancellable` flag for `RestServlet`s and `BaseFederationServlet`s. ([\#12699](/~https://github.com/matrix-org/synapse/issues/12699)) +- Respect the `@cancellable` flag for `ReplicationEndpoint`s. ([\#12700](/~https://github.com/matrix-org/synapse/issues/12700)) +- Convert namespace class `Codes` into a string enum. ([\#12703](/~https://github.com/matrix-org/synapse/issues/12703)) +- Complain if a federation endpoint has the `@cancellable` flag, since some of the wrapper code may not handle cancellation correctly yet. ([\#12705](/~https://github.com/matrix-org/synapse/issues/12705)) +- Enable cancellation of `GET /rooms/$room_id/members`, `GET /rooms/$room_id/state` and `GET /rooms/$room_id/state/$event_type/*` requests. ([\#12708](/~https://github.com/matrix-org/synapse/issues/12708)) +- Optimize private read receipt filtering. ([\#12711](/~https://github.com/matrix-org/synapse/issues/12711)) +- Add type annotations to increase the number of modules passing `disallow-untyped-defs`. ([\#12716](/~https://github.com/matrix-org/synapse/issues/12716), [\#12726](/~https://github.com/matrix-org/synapse/issues/12726)) +- Drop the logging level of status messages for the URL preview cache expiry job from INFO to DEBUG. ([\#12720](/~https://github.com/matrix-org/synapse/issues/12720)) +- Downgrade some OIDC errors to warnings in the logs, to reduce the noise of Sentry reports. ([\#12723](/~https://github.com/matrix-org/synapse/issues/12723)) +- Update configs used by Complement to allow more invites/3PID validations during tests. ([\#12731](/~https://github.com/matrix-org/synapse/issues/12731)) +- Tidy up and type-hint the database engine modules. ([\#12734](/~https://github.com/matrix-org/synapse/issues/12734)) +- Fix a long-standing bug where the user directory background process would fail to make forward progress if a user included a null codepoint in their display name or avatar. ([\#12762](/~https://github.com/matrix-org/synapse/issues/12762)) +- Tweak the mypy plugin so that `@cached` can accept `on_invalidate=None`. ([\#12769](/~https://github.com/matrix-org/synapse/issues/12769)) +- Move methods that call `add_push_rule` to the `PushRuleStore` class. ([\#12772](/~https://github.com/matrix-org/synapse/issues/12772)) +- Make handling of federation Authorization header (more) compliant with RFC7230. ([\#12774](/~https://github.com/matrix-org/synapse/issues/12774)) +- Refactor `resolve_state_groups_for_events` to not pull out full state when no state resolution happens. ([\#12775](/~https://github.com/matrix-org/synapse/issues/12775)) +- Do not keep going if there are 5 back-to-back background update failures. ([\#12781](/~https://github.com/matrix-org/synapse/issues/12781)) +- Fix federation when using the demo scripts. ([\#12783](/~https://github.com/matrix-org/synapse/issues/12783)) +- The `hash_password` script now fails when it is called without specifying a config file. ([\#12789](/~https://github.com/matrix-org/synapse/issues/12789)) +- Simplify `disallow_untyped_defs` config in `mypy.ini`. ([\#12790](/~https://github.com/matrix-org/synapse/issues/12790)) +- Update EventContext `get_current_event_ids` and `get_prev_event_ids` to accept state filters and update calls where possible. ([\#12791](/~https://github.com/matrix-org/synapse/issues/12791)) +- Remove Caddy from the Synapse workers image used in Complement. ([\#12818](/~https://github.com/matrix-org/synapse/issues/12818)) +- Add Complement's shared registration secret to the Complement worker image. This fixes tests that depend on it. ([\#12819](/~https://github.com/matrix-org/synapse/issues/12819)) +- Support registering Application Services when running with workers under Complement. ([\#12826](/~https://github.com/matrix-org/synapse/issues/12826)) +- Add some type hints to test files. ([\#12833](/~https://github.com/matrix-org/synapse/issues/12833)) +- Disable 'faster room join' Complement tests when testing against Synapse with workers. ([\#12842](/~https://github.com/matrix-org/synapse/issues/12842)) + + Synapse 1.59.1 (2022-05-18) =========================== diff --git a/changelog.d/10533.misc b/changelog.d/10533.misc deleted file mode 100644 index f70dc6496fcf..000000000000 --- a/changelog.d/10533.misc +++ /dev/null @@ -1 +0,0 @@ -Improve event caching mechanism to avoid having multiple copies of an event in memory at a time. diff --git a/changelog.d/12477.misc b/changelog.d/12477.misc deleted file mode 100644 index e793d08e5e3f..000000000000 --- a/changelog.d/12477.misc +++ /dev/null @@ -1 +0,0 @@ -Add some type hints to datastore. \ No newline at end of file diff --git a/changelog.d/12498.misc b/changelog.d/12498.misc deleted file mode 100644 index 8a00b94fbeef..000000000000 --- a/changelog.d/12498.misc +++ /dev/null @@ -1 +0,0 @@ -Preparation for faster-room-join work: return subsets of room state which we already have, immediately. diff --git a/changelog.d/12513.feature b/changelog.d/12513.feature deleted file mode 100644 index 01bf1d9d2cf6..000000000000 --- a/changelog.d/12513.feature +++ /dev/null @@ -1 +0,0 @@ -Measure the time taken in spam-checking callbacks and expose those measurements as metrics. diff --git a/changelog.d/12567.misc b/changelog.d/12567.misc deleted file mode 100644 index 35f08569bada..000000000000 --- a/changelog.d/12567.misc +++ /dev/null @@ -1 +0,0 @@ -Replace string literal instances of stream key types with typed constants. \ No newline at end of file diff --git a/changelog.d/12586.misc b/changelog.d/12586.misc deleted file mode 100644 index d26e332305ce..000000000000 --- a/changelog.d/12586.misc +++ /dev/null @@ -1 +0,0 @@ -Add `@cancellable` decorator, for use on endpoint methods that can be cancelled when clients disconnect. diff --git a/changelog.d/12588.misc b/changelog.d/12588.misc deleted file mode 100644 index f62d5c8e210c..000000000000 --- a/changelog.d/12588.misc +++ /dev/null @@ -1 +0,0 @@ -Add ability to cancel disconnected requests to `SynapseRequest`. diff --git a/changelog.d/12611.bugfix b/changelog.d/12611.bugfix deleted file mode 100644 index 093c45a20b7f..000000000000 --- a/changelog.d/12611.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in Synapse 1.7.0 that would prevent events from being sent to clients if there's a retention policy in the room when the support for retention policies is disabled. diff --git a/changelog.d/12618.feature b/changelog.d/12618.feature deleted file mode 100644 index 37fa03b3cb41..000000000000 --- a/changelog.d/12618.feature +++ /dev/null @@ -1 +0,0 @@ -Add a `default_power_level_content_override` config option to set default room power levels per room preset. diff --git a/changelog.d/12623.feature b/changelog.d/12623.feature deleted file mode 100644 index cdee19fafa36..000000000000 --- a/changelog.d/12623.feature +++ /dev/null @@ -1 +0,0 @@ -Add support for [MSC3787: Allowing knocks to restricted rooms](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3787). \ No newline at end of file diff --git a/changelog.d/12630.misc b/changelog.d/12630.misc deleted file mode 100644 index 43e12603e2d8..000000000000 --- a/changelog.d/12630.misc +++ /dev/null @@ -1 +0,0 @@ -Add a helper class for testing request cancellation. diff --git a/changelog.d/12672.feature b/changelog.d/12672.feature deleted file mode 100644 index b989e0d208c4..000000000000 --- a/changelog.d/12672.feature +++ /dev/null @@ -1 +0,0 @@ -Send `USER_IP` commands on a different Redis channel, in order to reduce traffic to workers that do not process these commands. \ No newline at end of file diff --git a/changelog.d/12673.feature b/changelog.d/12673.feature deleted file mode 100644 index f2bddd6e1c27..000000000000 --- a/changelog.d/12673.feature +++ /dev/null @@ -1 +0,0 @@ -Synapse will now reload [cache config](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#caching) when it receives a [SIGHUP](https://en.wikipedia.org/wiki/SIGHUP) signal. diff --git a/changelog.d/12676.misc b/changelog.d/12676.misc deleted file mode 100644 index 26490af00dee..000000000000 --- a/changelog.d/12676.misc +++ /dev/null @@ -1 +0,0 @@ -Improve documentation of the `synapse.push` module. diff --git a/changelog.d/12677.misc b/changelog.d/12677.misc deleted file mode 100644 index eed12e69e9ba..000000000000 --- a/changelog.d/12677.misc +++ /dev/null @@ -1 +0,0 @@ -Refactor functions to on `PushRuleEvaluatorForEvent`. diff --git a/changelog.d/12679.misc b/changelog.d/12679.misc deleted file mode 100644 index 6df1116b49ee..000000000000 --- a/changelog.d/12679.misc +++ /dev/null @@ -1 +0,0 @@ -Preparation for database schema simplifications: stop writing to `event_reference_hashes`. diff --git a/changelog.d/12680.misc b/changelog.d/12680.misc deleted file mode 100644 index dfd1f0a6c658..000000000000 --- a/changelog.d/12680.misc +++ /dev/null @@ -1 +0,0 @@ -Remove code which updates unused database column `application_services_state.last_txn`. diff --git a/changelog.d/12683.bugfix b/changelog.d/12683.bugfix deleted file mode 100644 index 2ce84a223a37..000000000000 --- a/changelog.d/12683.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in Synapse 1.57.0 where `/messages` would throw a 500 error when querying for a non-existent room. diff --git a/changelog.d/12687.bugfix b/changelog.d/12687.bugfix deleted file mode 100644 index 196d9766707a..000000000000 --- a/changelog.d/12687.bugfix +++ /dev/null @@ -1 +0,0 @@ -Add a unique index to `state_group_edges` to prevent duplicates being accidentally introduced and the consequential impact to performance. \ No newline at end of file diff --git a/changelog.d/12689.misc b/changelog.d/12689.misc deleted file mode 100644 index daa484ea3019..000000000000 --- a/changelog.d/12689.misc +++ /dev/null @@ -1 +0,0 @@ -Refactor `EventContext` class. diff --git a/changelog.d/12691.misc b/changelog.d/12691.misc deleted file mode 100644 index c63543421111..000000000000 --- a/changelog.d/12691.misc +++ /dev/null @@ -1 +0,0 @@ -Remove an unneeded class in the push code. diff --git a/changelog.d/12693.misc b/changelog.d/12693.misc deleted file mode 100644 index 8bd1e1cb0cd5..000000000000 --- a/changelog.d/12693.misc +++ /dev/null @@ -1 +0,0 @@ -Consolidate parsing of relation information from events. diff --git a/changelog.d/12694.misc b/changelog.d/12694.misc deleted file mode 100644 index e1e956a51301..000000000000 --- a/changelog.d/12694.misc +++ /dev/null @@ -1 +0,0 @@ -Capture the `Deferred` for request cancellation in `_AsyncResource`. diff --git a/changelog.d/12695.misc b/changelog.d/12695.misc deleted file mode 100644 index 1b39d969a4c5..000000000000 --- a/changelog.d/12695.misc +++ /dev/null @@ -1 +0,0 @@ -Fixes an incorrect type hint for `Filter._check_event_relations`. diff --git a/changelog.d/12696.bugfix b/changelog.d/12696.bugfix deleted file mode 100644 index e410184a22af..000000000000 --- a/changelog.d/12696.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug where an empty room would be created when a user with an insufficient power level tried to upgrade a room. diff --git a/changelog.d/12698.misc b/changelog.d/12698.misc deleted file mode 100644 index 5d626352f9c2..000000000000 --- a/changelog.d/12698.misc +++ /dev/null @@ -1 +0,0 @@ -Respect the `@cancellable` flag for `DirectServe{Html,Json}Resource`s. diff --git a/changelog.d/12699.misc b/changelog.d/12699.misc deleted file mode 100644 index d278a956c7a9..000000000000 --- a/changelog.d/12699.misc +++ /dev/null @@ -1 +0,0 @@ -Respect the `@cancellable` flag for `RestServlet`s and `BaseFederationServlet`s. diff --git a/changelog.d/12700.misc b/changelog.d/12700.misc deleted file mode 100644 index d93eb5dada74..000000000000 --- a/changelog.d/12700.misc +++ /dev/null @@ -1 +0,0 @@ -Respect the `@cancellable` flag for `ReplicationEndpoint`s. diff --git a/changelog.d/12701.feature b/changelog.d/12701.feature deleted file mode 100644 index bb2264602c84..000000000000 --- a/changelog.d/12701.feature +++ /dev/null @@ -1 +0,0 @@ -Add a config options to allow for auto-tuning of caches. diff --git a/changelog.d/12703.misc b/changelog.d/12703.misc deleted file mode 100644 index 9aaa1bbaa3d0..000000000000 --- a/changelog.d/12703.misc +++ /dev/null @@ -1 +0,0 @@ -Convert namespace class `Codes` into a string enum. \ No newline at end of file diff --git a/changelog.d/12705.misc b/changelog.d/12705.misc deleted file mode 100644 index a913d8bb85eb..000000000000 --- a/changelog.d/12705.misc +++ /dev/null @@ -1 +0,0 @@ -Complain if a federation endpoint has the `@cancellable` flag, since some of the wrapper code may not handle cancellation correctly yet. diff --git a/changelog.d/12708.misc b/changelog.d/12708.misc deleted file mode 100644 index aa99e7311b97..000000000000 --- a/changelog.d/12708.misc +++ /dev/null @@ -1 +0,0 @@ -Enable cancellation of `GET /rooms/$room_id/members`, `GET /rooms/$room_id/state` and `GET /rooms/$room_id/state/$event_type/*` requests. diff --git a/changelog.d/12709.removal b/changelog.d/12709.removal deleted file mode 100644 index 6bb03e28941f..000000000000 --- a/changelog.d/12709.removal +++ /dev/null @@ -1 +0,0 @@ -Require a body in POST requests to `/rooms/{roomId}/receipt/{receiptType}/{eventId}`, as required by the [Matrix specification](https://spec.matrix.org/v1.2/client-server-api/#post_matrixclientv3roomsroomidreceiptreceipttypeeventid). This breaks compatibility with Element Android 1.2.0 and earlier: users of those clients will be unable to send read receipts. diff --git a/changelog.d/12711.misc b/changelog.d/12711.misc deleted file mode 100644 index 0831ce045268..000000000000 --- a/changelog.d/12711.misc +++ /dev/null @@ -1 +0,0 @@ -Optimize private read receipt filtering. diff --git a/changelog.d/12713.bugfix b/changelog.d/12713.bugfix deleted file mode 100644 index 91e70f102c5d..000000000000 --- a/changelog.d/12713.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in Synapse 1.30.0 where empty rooms could be automatically created if a monthly active users limit is set. diff --git a/changelog.d/12715.doc b/changelog.d/12715.doc deleted file mode 100644 index 150d78c3f634..000000000000 --- a/changelog.d/12715.doc +++ /dev/null @@ -1 +0,0 @@ -Fix a typo in the Media Admin API documentation. diff --git a/changelog.d/12716.misc b/changelog.d/12716.misc deleted file mode 100644 index b07e1b52ee7c..000000000000 --- a/changelog.d/12716.misc +++ /dev/null @@ -1 +0,0 @@ -Add type annotations to increase the number of modules passing `disallow-untyped-defs`. \ No newline at end of file diff --git a/changelog.d/12717.misc b/changelog.d/12717.misc deleted file mode 100644 index e793d08e5e3f..000000000000 --- a/changelog.d/12717.misc +++ /dev/null @@ -1 +0,0 @@ -Add some type hints to datastore. \ No newline at end of file diff --git a/changelog.d/12718.feature b/changelog.d/12718.feature deleted file mode 100644 index 1056f519a4c1..000000000000 --- a/changelog.d/12718.feature +++ /dev/null @@ -1 +0,0 @@ -Update [MSC2716](/~https://github.com/matrix-org/matrix-spec-proposals/pull/2716) implementation to process marker events from the current state to avoid markers being lost in timeline gaps for federated servers which would cause the imported history to be undiscovered. diff --git a/changelog.d/12720.misc b/changelog.d/12720.misc deleted file mode 100644 index 01b427f200ae..000000000000 --- a/changelog.d/12720.misc +++ /dev/null @@ -1 +0,0 @@ -Drop the logging level of status messages for the URL preview cache expiry job from INFO to DEBUG. \ No newline at end of file diff --git a/changelog.d/12721.bugfix b/changelog.d/12721.bugfix deleted file mode 100644 index 6987f7ab15e1..000000000000 --- a/changelog.d/12721.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix push to dismiss notifications when read on another client. Contributed by @SpiritCroc @ Beeper. diff --git a/changelog.d/12723.misc b/changelog.d/12723.misc deleted file mode 100644 index 4f5bffeda639..000000000000 --- a/changelog.d/12723.misc +++ /dev/null @@ -1 +0,0 @@ -Downgrade some OIDC errors to warnings in the logs, to reduce the noise of Sentry reports. diff --git a/changelog.d/12726.misc b/changelog.d/12726.misc deleted file mode 100644 index b07e1b52ee7c..000000000000 --- a/changelog.d/12726.misc +++ /dev/null @@ -1 +0,0 @@ -Add type annotations to increase the number of modules passing `disallow-untyped-defs`. \ No newline at end of file diff --git a/changelog.d/12727.doc b/changelog.d/12727.doc deleted file mode 100644 index c41e50c85ba0..000000000000 --- a/changelog.d/12727.doc +++ /dev/null @@ -1 +0,0 @@ -Update the OpenID Connect example for Keycloak to be compatible with newer versions of Keycloak. Contributed by @nhh. diff --git a/changelog.d/12731.misc b/changelog.d/12731.misc deleted file mode 100644 index 962100d516c1..000000000000 --- a/changelog.d/12731.misc +++ /dev/null @@ -1 +0,0 @@ -Update configs used by Complement to allow more invites/3PID validations during tests. \ No newline at end of file diff --git a/changelog.d/12734.misc b/changelog.d/12734.misc deleted file mode 100644 index ffbfb0d63233..000000000000 --- a/changelog.d/12734.misc +++ /dev/null @@ -1 +0,0 @@ -Tidy up and type-hint the database engine modules. diff --git a/changelog.d/12742.doc b/changelog.d/12742.doc deleted file mode 100644 index 0084e27a7d03..000000000000 --- a/changelog.d/12742.doc +++ /dev/null @@ -1 +0,0 @@ -Fix typo in server listener documentation. \ No newline at end of file diff --git a/changelog.d/12744.feature b/changelog.d/12744.feature deleted file mode 100644 index 9836d94f8ca6..000000000000 --- a/changelog.d/12744.feature +++ /dev/null @@ -1 +0,0 @@ -Add a `drop_federated_event` callback to `SpamChecker` to disregard inbound federated events before they take up much processing power, in an emergency. diff --git a/changelog.d/12747.bugfix b/changelog.d/12747.bugfix deleted file mode 100644 index 0fb0059237cc..000000000000 --- a/changelog.d/12747.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix poor database performance when reading the cache invalidation stream for large servers with lots of workers. diff --git a/changelog.d/12748.doc b/changelog.d/12748.doc deleted file mode 100644 index 996ad3a1b926..000000000000 --- a/changelog.d/12748.doc +++ /dev/null @@ -1 +0,0 @@ -Link to the configuration manual from the welcome page of the documentation. diff --git a/changelog.d/12749.doc b/changelog.d/12749.doc deleted file mode 100644 index 4560319ee43b..000000000000 --- a/changelog.d/12749.doc +++ /dev/null @@ -1 +0,0 @@ -Fix typo in 'run_background_tasks_on' option name in configuration manual documentation. diff --git a/changelog.d/12753.misc b/changelog.d/12753.misc deleted file mode 100644 index e793d08e5e3f..000000000000 --- a/changelog.d/12753.misc +++ /dev/null @@ -1 +0,0 @@ -Add some type hints to datastore. \ No newline at end of file diff --git a/changelog.d/12759.doc b/changelog.d/12759.doc deleted file mode 100644 index 45d1c9c0ca1a..000000000000 --- a/changelog.d/12759.doc +++ /dev/null @@ -1 +0,0 @@ -Add information regarding the `rc_invites` ratelimiting option to the configuration docs. diff --git a/changelog.d/12761.doc b/changelog.d/12761.doc deleted file mode 100644 index 2eb2c0976f1b..000000000000 --- a/changelog.d/12761.doc +++ /dev/null @@ -1 +0,0 @@ -Add documentation for cancellation of request processing. diff --git a/changelog.d/12762.misc b/changelog.d/12762.misc deleted file mode 100644 index 990fb6fe74eb..000000000000 --- a/changelog.d/12762.misc +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug where the user directory background process would fail to make forward progress if a user included a null codepoint in their display name or avatar. diff --git a/changelog.d/12765.doc b/changelog.d/12765.doc deleted file mode 100644 index 277b037d6b03..000000000000 --- a/changelog.d/12765.doc +++ /dev/null @@ -1 +0,0 @@ -Recommend using docker to run tests against postgres. diff --git a/changelog.d/12769.misc b/changelog.d/12769.misc deleted file mode 100644 index 27bd53abe376..000000000000 --- a/changelog.d/12769.misc +++ /dev/null @@ -1 +0,0 @@ -Tweak the mypy plugin so that `@cached` can accept `on_invalidate=None`. diff --git a/changelog.d/12770.bugfix b/changelog.d/12770.bugfix deleted file mode 100644 index a958f9a16ba3..000000000000 --- a/changelog.d/12770.bugfix +++ /dev/null @@ -1 +0,0 @@ -Delete events from the `federation_inbound_events_staging` table when a room is purged through the admin API. diff --git a/changelog.d/12772.misc b/changelog.d/12772.misc deleted file mode 100644 index da66f376fe8e..000000000000 --- a/changelog.d/12772.misc +++ /dev/null @@ -1 +0,0 @@ -Move methods that call `add_push_rule` to the `PushRuleStore` class. diff --git a/changelog.d/12773.doc b/changelog.d/12773.doc deleted file mode 100644 index 6de371653427..000000000000 --- a/changelog.d/12773.doc +++ /dev/null @@ -1 +0,0 @@ -Add missing user directory endpoint from the generic worker documentation. Contributed by @olmari. \ No newline at end of file diff --git a/changelog.d/12774.misc b/changelog.d/12774.misc deleted file mode 100644 index 8651f2e0e062..000000000000 --- a/changelog.d/12774.misc +++ /dev/null @@ -1 +0,0 @@ -Make handling of federation Authorization header (more) compliant with RFC7230. diff --git a/changelog.d/12775.misc b/changelog.d/12775.misc deleted file mode 100644 index eac326cde3a7..000000000000 --- a/changelog.d/12775.misc +++ /dev/null @@ -1 +0,0 @@ -Refactor `resolve_state_groups_for_events` to not pull out full state when no state resolution happens. \ No newline at end of file diff --git a/changelog.d/12776.doc b/changelog.d/12776.doc deleted file mode 100644 index c00489a8ce14..000000000000 --- a/changelog.d/12776.doc +++ /dev/null @@ -1,2 +0,0 @@ -Add additional info to documentation of config option `cache_autotuning`. - diff --git a/changelog.d/12777.doc b/changelog.d/12777.doc deleted file mode 100644 index cc9c07704d02..000000000000 --- a/changelog.d/12777.doc +++ /dev/null @@ -1,2 +0,0 @@ -Update configuration manual documentation to document size-related suffixes. - diff --git a/changelog.d/12779.bugfix b/changelog.d/12779.bugfix deleted file mode 100644 index 7cf7a1f65f24..000000000000 --- a/changelog.d/12779.bugfix +++ /dev/null @@ -1 +0,0 @@ -Give a meaningful error message when a client tries to create a room with an invalid alias localpart. \ No newline at end of file diff --git a/changelog.d/12781.misc b/changelog.d/12781.misc deleted file mode 100644 index 8a045716172a..000000000000 --- a/changelog.d/12781.misc +++ /dev/null @@ -1 +0,0 @@ -Do not keep going if there are 5 back-to-back background update failures. \ No newline at end of file diff --git a/changelog.d/12783.misc b/changelog.d/12783.misc deleted file mode 100644 index 97575608bb8b..000000000000 --- a/changelog.d/12783.misc +++ /dev/null @@ -1 +0,0 @@ -Fix federation when using the demo scripts. diff --git a/changelog.d/12785.doc b/changelog.d/12785.doc deleted file mode 100644 index 5209dfeb053e..000000000000 --- a/changelog.d/12785.doc +++ /dev/null @@ -1 +0,0 @@ -Fix invalid YAML syntax in the example documentation for the `url_preview_accept_language` config option. diff --git a/changelog.d/12786.feature b/changelog.d/12786.feature deleted file mode 100644 index c90ddd411ee2..000000000000 --- a/changelog.d/12786.feature +++ /dev/null @@ -1 +0,0 @@ -Implement [MSC3818: Copy room type on upgrade](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3818). diff --git a/changelog.d/12789.misc b/changelog.d/12789.misc deleted file mode 100644 index 3398d00110c8..000000000000 --- a/changelog.d/12789.misc +++ /dev/null @@ -1 +0,0 @@ -The `hash_password` script now fails when it is called without specifying a config file. diff --git a/changelog.d/12790.misc b/changelog.d/12790.misc deleted file mode 100644 index b78156cf4e1d..000000000000 --- a/changelog.d/12790.misc +++ /dev/null @@ -1 +0,0 @@ -Simplify `disallow_untyped_defs` config in `mypy.ini`. diff --git a/changelog.d/12791.misc b/changelog.d/12791.misc deleted file mode 100644 index b6e92b7eafad..000000000000 --- a/changelog.d/12791.misc +++ /dev/null @@ -1 +0,0 @@ -Update EventContext `get_current_event_ids` and `get_prev_event_ids` to accept state filters and update calls where possible. diff --git a/changelog.d/12792.feature b/changelog.d/12792.feature deleted file mode 100644 index 4778b8a394d4..000000000000 --- a/changelog.d/12792.feature +++ /dev/null @@ -1 +0,0 @@ -Implement [MSC3818: Copy room type on upgrade](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3818). \ No newline at end of file diff --git a/changelog.d/12794.bugfix b/changelog.d/12794.bugfix deleted file mode 100644 index 2d1a2838e128..000000000000 --- a/changelog.d/12794.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug introduced in 1.43.0 where a file (`providers.json`) was never closed. Contributed by @arkamar. diff --git a/changelog.d/12803.bugfix b/changelog.d/12803.bugfix deleted file mode 100644 index 6ddd3d24e05f..000000000000 --- a/changelog.d/12803.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a long-standing bug where finished log contexts would be re-started when failing to contact remote homeservers. diff --git a/changelog.d/12808.feature b/changelog.d/12808.feature deleted file mode 100644 index 561c8b9d34a4..000000000000 --- a/changelog.d/12808.feature +++ /dev/null @@ -1 +0,0 @@ -Update to `check_event_for_spam`. Deprecate the current callback signature, replace it with a new signature that is both less ambiguous (replacing booleans with explicit allow/block) and more powerful (ability to return explicit error codes). \ No newline at end of file diff --git a/changelog.d/12809.feature b/changelog.d/12809.feature deleted file mode 100644 index b989e0d208c4..000000000000 --- a/changelog.d/12809.feature +++ /dev/null @@ -1 +0,0 @@ -Send `USER_IP` commands on a different Redis channel, in order to reduce traffic to workers that do not process these commands. \ No newline at end of file diff --git a/changelog.d/12818.misc b/changelog.d/12818.misc deleted file mode 100644 index 2f9dacc21dd9..000000000000 --- a/changelog.d/12818.misc +++ /dev/null @@ -1 +0,0 @@ -Remove Caddy from the Synapse workers image used in Complement. \ No newline at end of file diff --git a/changelog.d/12819.misc b/changelog.d/12819.misc deleted file mode 100644 index 7a03102a632d..000000000000 --- a/changelog.d/12819.misc +++ /dev/null @@ -1 +0,0 @@ -Add Complement's shared registration secret to the Complement worker image. This fixes tests that depend on it. \ No newline at end of file diff --git a/changelog.d/12823.bugfix b/changelog.d/12823.bugfix deleted file mode 100644 index 1a1f5957e712..000000000000 --- a/changelog.d/12823.bugfix +++ /dev/null @@ -1 +0,0 @@ -Fix a bug, introduced in Synapse 1.21.0, that led to media thumbnails being unusable before the index has been added in the background. diff --git a/changelog.d/12826.misc b/changelog.d/12826.misc deleted file mode 100644 index f5e91f1ed592..000000000000 --- a/changelog.d/12826.misc +++ /dev/null @@ -1 +0,0 @@ -Support registering Application Services when running with workers under Complement. \ No newline at end of file diff --git a/changelog.d/12833.misc b/changelog.d/12833.misc deleted file mode 100644 index fad5df1afa34..000000000000 --- a/changelog.d/12833.misc +++ /dev/null @@ -1 +0,0 @@ -Add some type hints to test files. \ No newline at end of file diff --git a/changelog.d/12842.misc b/changelog.d/12842.misc deleted file mode 100644 index cec3f97d86fd..000000000000 --- a/changelog.d/12842.misc +++ /dev/null @@ -1 +0,0 @@ -Disable 'faster room join' Complement tests when testing against Synapse with workers. \ No newline at end of file diff --git a/changelog.d/12853.docker b/changelog.d/12853.docker deleted file mode 100644 index cad10a79cc82..000000000000 --- a/changelog.d/12853.docker +++ /dev/null @@ -1 +0,0 @@ -Fix the docker file after a dependency update. diff --git a/debian/changelog b/debian/changelog index dda342a630db..6eba9b3a1bbd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.60.0~rc1) stable; urgency=medium + + * New Synapse release 1.60.0rc1. + + -- Synapse Packaging team Tue, 24 May 2022 12:05:01 +0100 + matrix-synapse-py3 (1.59.1) stable; urgency=medium * New Synapse release 1.59.1. diff --git a/pyproject.toml b/pyproject.toml index 5a5a2eaba73d..9359d211f79e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ skip_gitignore = true [tool.poetry] name = "matrix-synapse" -version = "1.59.1" +version = "1.60.0rc1" description = "Homeserver for the Matrix decentralised comms protocol" authors = ["Matrix.org Team and Contributors "] license = "Apache-2.0" From 9385cd063375d351c5cb01ffce00d47cdf482bcd Mon Sep 17 00:00:00 2001 From: Sean Quah Date: Tue, 24 May 2022 13:21:15 +0100 Subject: [PATCH 091/181] Update changelog --- CHANGES.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index f6ca5c472144..46ac3fce7a71 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,19 @@ Synapse 1.60.0rc1 (2022-05-24) ============================== +This release of Synapse adds a unique index to the `state_group_edges` table, in +order to prevent accidentally introducing duplicate information (for example, +because a database backup was restored multiple times). If your Synapse database +already has duplicate rows in this table, this could fail with an error and +require manual remediation. + +Additionally, the signature of the `check_event_for_spam` module callback has changed. +The previous signature has been deprecated and remains working for now. Module authors +should update their modules to use the new signature where possible. + +See [the upgrade notes](/~https://github.com/matrix-org/synapse/blob/develop/docs/upgrade.md#upgrading-to-v1600) +for more details. + Features -------- @@ -13,7 +26,7 @@ Features - Update [MSC2716](/~https://github.com/matrix-org/matrix-spec-proposals/pull/2716) implementation to process marker events from the current state to avoid markers being lost in timeline gaps for federated servers which would cause the imported history to be undiscovered. ([\#12718](/~https://github.com/matrix-org/synapse/issues/12718)) - Add a `drop_federated_event` callback to `SpamChecker` to disregard inbound federated events before they take up much processing power, in an emergency. ([\#12744](/~https://github.com/matrix-org/synapse/issues/12744)) - Implement [MSC3818: Copy room type on upgrade](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3818). ([\#12786](/~https://github.com/matrix-org/synapse/issues/12786), [\#12792](/~https://github.com/matrix-org/synapse/issues/12792)) -- Update to `check_event_for_spam`. Deprecate the current callback signature, replace it with a new signature that is both less ambiguous (replacing booleans with explicit allow/block) and more powerful (ability to return explicit error codes). ([\#12808](/~https://github.com/matrix-org/synapse/issues/12808)) +- Update to the `check_event_for_spam` module callback. Deprecate the current callback signature, replace it with a new signature that is both less ambiguous (replacing booleans with explicit allow/block) and more powerful (ability to return explicit error codes). ([\#12808](/~https://github.com/matrix-org/synapse/issues/12808)) Bugfixes @@ -46,7 +59,7 @@ Improved Documentation - Update the OpenID Connect example for Keycloak to be compatible with newer versions of Keycloak. Contributed by @nhh. ([\#12727](/~https://github.com/matrix-org/synapse/issues/12727)) - Fix typo in server listener documentation. ([\#12742](/~https://github.com/matrix-org/synapse/issues/12742)) - Link to the configuration manual from the welcome page of the documentation. ([\#12748](/~https://github.com/matrix-org/synapse/issues/12748)) -- Fix typo in 'run_background_tasks_on' option name in configuration manual documentation. ([\#12749](/~https://github.com/matrix-org/synapse/issues/12749)) +- Fix typo in `run_background_tasks_on` option name in configuration manual documentation. ([\#12749](/~https://github.com/matrix-org/synapse/issues/12749)) - Add information regarding the `rc_invites` ratelimiting option to the configuration docs. ([\#12759](/~https://github.com/matrix-org/synapse/issues/12759)) - Add documentation for cancellation of request processing. ([\#12761](/~https://github.com/matrix-org/synapse/issues/12761)) - Recommend using docker to run tests against postgres. ([\#12765](/~https://github.com/matrix-org/synapse/issues/12765)) @@ -66,12 +79,9 @@ Internal Changes ---------------- - Improve event caching mechanism to avoid having multiple copies of an event in memory at a time. ([\#10533](/~https://github.com/matrix-org/synapse/issues/10533)) -- Add some type hints to datastore. ([\#12477](/~https://github.com/matrix-org/synapse/issues/12477), [\#12717](/~https://github.com/matrix-org/synapse/issues/12717), [\#12753](/~https://github.com/matrix-org/synapse/issues/12753)) - Preparation for faster-room-join work: return subsets of room state which we already have, immediately. ([\#12498](/~https://github.com/matrix-org/synapse/issues/12498)) -- Replace string literal instances of stream key types with typed constants. ([\#12567](/~https://github.com/matrix-org/synapse/issues/12567)) -- Add `@cancellable` decorator, for use on endpoint methods that can be cancelled when clients disconnect. ([\#12586](/~https://github.com/matrix-org/synapse/issues/12586)) -- Add ability to cancel disconnected requests to `SynapseRequest`. ([\#12588](/~https://github.com/matrix-org/synapse/issues/12588)) -- Add a helper class for testing request cancellation. ([\#12630](/~https://github.com/matrix-org/synapse/issues/12630)) +- Add `@cancellable` decorator, for use on endpoint methods that can be cancelled when clients disconnect. ([\#12586](/~https://github.com/matrix-org/synapse/issues/12586), [\#12588](/~https://github.com/matrix-org/synapse/issues/12588), [\#12630](/~https://github.com/matrix-org/synapse/issues/12630), [\#12694](/~https://github.com/matrix-org/synapse/issues/12694), [\#12698](/~https://github.com/matrix-org/synapse/issues/12698), [\#12699](/~https://github.com/matrix-org/synapse/issues/12699), [\#12700](/~https://github.com/matrix-org/synapse/issues/12700), [\#12705](/~https://github.com/matrix-org/synapse/issues/12705)) +- Enable cancellation of `GET /rooms/$room_id/members`, `GET /rooms/$room_id/state` and `GET /rooms/$room_id/state/$event_type/*` requests. ([\#12708](/~https://github.com/matrix-org/synapse/issues/12708)) - Improve documentation of the `synapse.push` module. ([\#12676](/~https://github.com/matrix-org/synapse/issues/12676)) - Refactor functions to on `PushRuleEvaluatorForEvent`. ([\#12677](/~https://github.com/matrix-org/synapse/issues/12677)) - Preparation for database schema simplifications: stop writing to `event_reference_hashes`. ([\#12679](/~https://github.com/matrix-org/synapse/issues/12679)) @@ -79,20 +89,11 @@ Internal Changes - Refactor `EventContext` class. ([\#12689](/~https://github.com/matrix-org/synapse/issues/12689)) - Remove an unneeded class in the push code. ([\#12691](/~https://github.com/matrix-org/synapse/issues/12691)) - Consolidate parsing of relation information from events. ([\#12693](/~https://github.com/matrix-org/synapse/issues/12693)) -- Capture the `Deferred` for request cancellation in `_AsyncResource`. ([\#12694](/~https://github.com/matrix-org/synapse/issues/12694)) -- Fixes an incorrect type hint for `Filter._check_event_relations`. ([\#12695](/~https://github.com/matrix-org/synapse/issues/12695)) -- Respect the `@cancellable` flag for `DirectServe{Html,Json}Resource`s. ([\#12698](/~https://github.com/matrix-org/synapse/issues/12698)) -- Respect the `@cancellable` flag for `RestServlet`s and `BaseFederationServlet`s. ([\#12699](/~https://github.com/matrix-org/synapse/issues/12699)) -- Respect the `@cancellable` flag for `ReplicationEndpoint`s. ([\#12700](/~https://github.com/matrix-org/synapse/issues/12700)) - Convert namespace class `Codes` into a string enum. ([\#12703](/~https://github.com/matrix-org/synapse/issues/12703)) -- Complain if a federation endpoint has the `@cancellable` flag, since some of the wrapper code may not handle cancellation correctly yet. ([\#12705](/~https://github.com/matrix-org/synapse/issues/12705)) -- Enable cancellation of `GET /rooms/$room_id/members`, `GET /rooms/$room_id/state` and `GET /rooms/$room_id/state/$event_type/*` requests. ([\#12708](/~https://github.com/matrix-org/synapse/issues/12708)) - Optimize private read receipt filtering. ([\#12711](/~https://github.com/matrix-org/synapse/issues/12711)) -- Add type annotations to increase the number of modules passing `disallow-untyped-defs`. ([\#12716](/~https://github.com/matrix-org/synapse/issues/12716), [\#12726](/~https://github.com/matrix-org/synapse/issues/12726)) - Drop the logging level of status messages for the URL preview cache expiry job from INFO to DEBUG. ([\#12720](/~https://github.com/matrix-org/synapse/issues/12720)) - Downgrade some OIDC errors to warnings in the logs, to reduce the noise of Sentry reports. ([\#12723](/~https://github.com/matrix-org/synapse/issues/12723)) - Update configs used by Complement to allow more invites/3PID validations during tests. ([\#12731](/~https://github.com/matrix-org/synapse/issues/12731)) -- Tidy up and type-hint the database engine modules. ([\#12734](/~https://github.com/matrix-org/synapse/issues/12734)) - Fix a long-standing bug where the user directory background process would fail to make forward progress if a user included a null codepoint in their display name or avatar. ([\#12762](/~https://github.com/matrix-org/synapse/issues/12762)) - Tweak the mypy plugin so that `@cached` can accept `on_invalidate=None`. ([\#12769](/~https://github.com/matrix-org/synapse/issues/12769)) - Move methods that call `add_push_rule` to the `PushRuleStore` class. ([\#12772](/~https://github.com/matrix-org/synapse/issues/12772)) @@ -100,13 +101,12 @@ Internal Changes - Refactor `resolve_state_groups_for_events` to not pull out full state when no state resolution happens. ([\#12775](/~https://github.com/matrix-org/synapse/issues/12775)) - Do not keep going if there are 5 back-to-back background update failures. ([\#12781](/~https://github.com/matrix-org/synapse/issues/12781)) - Fix federation when using the demo scripts. ([\#12783](/~https://github.com/matrix-org/synapse/issues/12783)) -- The `hash_password` script now fails when it is called without specifying a config file. ([\#12789](/~https://github.com/matrix-org/synapse/issues/12789)) -- Simplify `disallow_untyped_defs` config in `mypy.ini`. ([\#12790](/~https://github.com/matrix-org/synapse/issues/12790)) +- The `hash_password` script now fails when it is called without specifying a config file. Contributed by @jae1911. ([\#12789](/~https://github.com/matrix-org/synapse/issues/12789)) +- Improve and fix type hints. ([\#12567](/~https://github.com/matrix-org/synapse/issues/12567), [\#12477](/~https://github.com/matrix-org/synapse/issues/12477), [\#12717](/~https://github.com/matrix-org/synapse/issues/12717), [\#12753](/~https://github.com/matrix-org/synapse/issues/12753), [\#12695](/~https://github.com/matrix-org/synapse/issues/12695), [\#12734](/~https://github.com/matrix-org/synapse/issues/12734), [\#12716](/~https://github.com/matrix-org/synapse/issues/12716), [\#12726](/~https://github.com/matrix-org/synapse/issues/12726), [\#12790](/~https://github.com/matrix-org/synapse/issues/12790), [\#12833](/~https://github.com/matrix-org/synapse/issues/12833)) - Update EventContext `get_current_event_ids` and `get_prev_event_ids` to accept state filters and update calls where possible. ([\#12791](/~https://github.com/matrix-org/synapse/issues/12791)) - Remove Caddy from the Synapse workers image used in Complement. ([\#12818](/~https://github.com/matrix-org/synapse/issues/12818)) - Add Complement's shared registration secret to the Complement worker image. This fixes tests that depend on it. ([\#12819](/~https://github.com/matrix-org/synapse/issues/12819)) - Support registering Application Services when running with workers under Complement. ([\#12826](/~https://github.com/matrix-org/synapse/issues/12826)) -- Add some type hints to test files. ([\#12833](/~https://github.com/matrix-org/synapse/issues/12833)) - Disable 'faster room join' Complement tests when testing against Synapse with workers. ([\#12842](/~https://github.com/matrix-org/synapse/issues/12842)) From 88ce3080d4d064b9872c9867208116dc9db73d7e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Tue, 24 May 2022 09:23:23 -0400 Subject: [PATCH 092/181] Experimental support for MSC3772 (#12740) Implements the following behind an experimental configuration flag: * A new push rule kind for mutually related events. * A new default push rule (`.m.rule.thread_reply`) under an unstable prefix. This is missing part of MSC3772: * The `.m.rule.thread_reply_to_me` push rule, this depends on MSC3664 / #11804. --- changelog.d/12740.feature | 1 + synapse/config/experimental.py | 3 + synapse/push/baserules.py | 14 ++++ synapse/push/bulk_push_rule_evaluator.py | 71 ++++++++++++++++- synapse/push/clientformat.py | 4 + synapse/push/push_rule_evaluator.py | 50 +++++++++++- synapse/storage/databases/main/events.py | 9 +++ synapse/storage/databases/main/push_rule.py | 5 ++ synapse/storage/databases/main/relations.py | 52 +++++++++++++ tests/push/test_push_rule_evaluator.py | 84 ++++++++++++++++++++- 10 files changed, 287 insertions(+), 6 deletions(-) create mode 100644 changelog.d/12740.feature diff --git a/changelog.d/12740.feature b/changelog.d/12740.feature new file mode 100644 index 000000000000..e674c31ae8aa --- /dev/null +++ b/changelog.d/12740.feature @@ -0,0 +1 @@ +Experimental support for [MSC3772](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3772): Push rule for mutually related events. diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index b20d949689ec..cc417e2fbf9b 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -84,3 +84,6 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # MSC3786 (Add a default push rule to ignore m.room.server_acl events) self.msc3786_enabled: bool = experimental.get("msc3786_enabled", False) + + # MSC3772: A push rule for mutual relations. + self.msc3772_enabled: bool = experimental.get("msc3772_enabled", False) diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py index a17b35a605fb..4c7278b5a112 100644 --- a/synapse/push/baserules.py +++ b/synapse/push/baserules.py @@ -139,6 +139,7 @@ def make_base_prepend_rules( { "kind": "event_match", "key": "content.body", + # Match the localpart of the requester's MXID. "pattern_type": "user_localpart", } ], @@ -191,6 +192,7 @@ def make_base_prepend_rules( "pattern": "invite", "_cache_key": "_invite_member", }, + # Match the requester's MXID. {"kind": "event_match", "key": "state_key", "pattern_type": "user_id"}, ], "actions": [ @@ -350,6 +352,18 @@ def make_base_prepend_rules( {"set_tweak": "highlight", "value": False}, ], }, + { + "rule_id": "global/underride/.org.matrix.msc3772.thread_reply", + "conditions": [ + { + "kind": "org.matrix.msc3772.relation_match", + "rel_type": "m.thread", + # Match the requester's MXID. + "sender_type": "user_id", + } + ], + "actions": ["notify", {"set_tweak": "highlight", "value": False}], + }, { "rule_id": "global/underride/.m.rule.message", "conditions": [ diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 4cc8a2ecca7a..1a8e7ef3dc46 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -13,8 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import itertools import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union import attr from prometheus_client import Counter @@ -121,6 +122,9 @@ def __init__(self, hs: "HomeServer"): resizable=False, ) + # Whether to support MSC3772 is supported. + self._relations_match_enabled = self.hs.config.experimental.msc3772_enabled + async def _get_rules_for_event( self, event: EventBase, context: EventContext ) -> Dict[str, List[Dict[str, Any]]]: @@ -192,6 +196,60 @@ async def _get_power_levels_and_sender_level( return pl_event.content if pl_event else {}, sender_level + async def _get_mutual_relations( + self, event: EventBase, rules: Iterable[Dict[str, Any]] + ) -> Dict[str, Set[Tuple[str, str]]]: + """ + Fetch event metadata for events which related to the same event as the given event. + + If the given event has no relation information, returns an empty dictionary. + + Args: + event_id: The event ID which is targeted by relations. + rules: The push rules which will be processed for this event. + + Returns: + A dictionary of relation type to: + A set of tuples of: + The sender + The event type + """ + + # If the experimental feature is not enabled, skip fetching relations. + if not self._relations_match_enabled: + return {} + + # If the event does not have a relation, then cannot have any mutual + # relations. + relation = relation_from_event(event) + if not relation: + return {} + + # Pre-filter to figure out which relation types are interesting. + rel_types = set() + for rule in rules: + # Skip disabled rules. + if "enabled" in rule and not rule["enabled"]: + continue + + for condition in rule["conditions"]: + if condition["kind"] != "org.matrix.msc3772.relation_match": + continue + + # rel_type is required. + rel_type = condition.get("rel_type") + if rel_type: + rel_types.add(rel_type) + + # If no valid rules were found, no mutual relations. + if not rel_types: + return {} + + # If any valid rules were found, fetch the mutual relations. + return await self.store.get_mutual_event_relations( + relation.parent_id, rel_types + ) + @measure_func("action_for_event_by_user") async def action_for_event_by_user( self, event: EventBase, context: EventContext @@ -216,8 +274,17 @@ async def action_for_event_by_user( sender_power_level, ) = await self._get_power_levels_and_sender_level(event, context) + relations = await self._get_mutual_relations( + event, itertools.chain(*rules_by_user.values()) + ) + evaluator = PushRuleEvaluatorForEvent( - event, len(room_members), sender_power_level, power_levels + event, + len(room_members), + sender_power_level, + power_levels, + relations, + self._relations_match_enabled, ) # If the event is not a state event check if any users ignore the sender. diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py index 63b22d50aea9..5117ef6854f1 100644 --- a/synapse/push/clientformat.py +++ b/synapse/push/clientformat.py @@ -48,6 +48,10 @@ def format_push_rules_for_user( elif pattern_type == "user_localpart": c["pattern"] = user.localpart + sender_type = c.pop("sender_type", None) + if sender_type == "user_id": + c["sender"] = user.to_string() + rulearray = rules["global"][template_name] template_rule = _rule_to_template(r) diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py index 54db6b5612a3..2e8a017add34 100644 --- a/synapse/push/push_rule_evaluator.py +++ b/synapse/push/push_rule_evaluator.py @@ -15,7 +15,7 @@ import logging import re -from typing import Any, Dict, List, Mapping, Optional, Pattern, Tuple, Union +from typing import Any, Dict, List, Mapping, Optional, Pattern, Set, Tuple, Union from matrix_common.regex import glob_to_regex, to_word_pattern @@ -120,11 +120,15 @@ def __init__( room_member_count: int, sender_power_level: int, power_levels: Dict[str, Union[int, Dict[str, int]]], + relations: Dict[str, Set[Tuple[str, str]]], + relations_match_enabled: bool, ): self._event = event self._room_member_count = room_member_count self._sender_power_level = sender_power_level self._power_levels = power_levels + self._relations = relations + self._relations_match_enabled = relations_match_enabled # Maps strings of e.g. 'content.body' -> event["content"]["body"] self._value_cache = _flatten_dict(event) @@ -188,7 +192,16 @@ def matches( return _sender_notification_permission( self._event, condition, self._sender_power_level, self._power_levels ) + elif ( + condition["kind"] == "org.matrix.msc3772.relation_match" + and self._relations_match_enabled + ): + return self._relation_match(condition, user_id) else: + # XXX This looks incorrect -- we have reached an unknown condition + # kind and are unconditionally returning that it matches. Note + # that it seems possible to provide a condition to the /pushrules + # endpoint with an unknown kind, see _rule_tuple_from_request_object. return True def _event_match(self, condition: dict, user_id: str) -> bool: @@ -256,6 +269,41 @@ def _contains_display_name(self, display_name: Optional[str]) -> bool: return bool(r.search(body)) + def _relation_match(self, condition: dict, user_id: str) -> bool: + """ + Check an "relation_match" push rule condition. + + Args: + condition: The "event_match" push rule condition to match. + user_id: The user's MXID. + + Returns: + True if the condition matches the event, False otherwise. + """ + rel_type = condition.get("rel_type") + if not rel_type: + logger.warning("relation_match condition missing rel_type") + return False + + sender_pattern = condition.get("sender") + if sender_pattern is None: + sender_type = condition.get("sender_type") + if sender_type == "user_id": + sender_pattern = user_id + type_pattern = condition.get("type") + + # If any other relations matches, return True. + for sender, event_type in self._relations.get(rel_type, ()): + if sender_pattern and not _glob_matches(sender_pattern, sender): + continue + if type_pattern and not _glob_matches(type_pattern, event_type): + continue + # All values must have matched. + return True + + # No relations matched. + return False + # Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches regex_cache: LruCache[Tuple[str, bool, bool], Pattern] = LruCache( diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 0df8ff53957a..17e35cf63e68 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1828,6 +1828,10 @@ def _handle_event_relations( self.store.get_aggregation_groups_for_event.invalidate, (relation.parent_id,), ) + txn.call_after( + self.store.get_mutual_event_relations_for_rel_type.invalidate, + (relation.parent_id,), + ) if relation.rel_type == RelationTypes.REPLACE: txn.call_after( @@ -2004,6 +2008,11 @@ def _handle_redact_relations( self.store._invalidate_cache_and_stream( txn, self.store.get_thread_participated, (redacted_relates_to,) ) + self.store._invalidate_cache_and_stream( + txn, + self.store.get_mutual_event_relations_for_rel_type, + (redacted_relates_to,), + ) self.db_pool.simple_delete_txn( txn, table="event_relations", keyvalues={"event_id": redacted_event_id} diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index ad67901cc1ac..4adabc88cce4 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -61,6 +61,11 @@ def _is_experimental_rule_enabled( and not experimental_config.msc3786_enabled ): return False + if ( + rule_id == "global/underride/.org.matrix.msc3772.thread_reply" + and not experimental_config.msc3772_enabled + ): + return False return True diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index fe8fded88b87..3b1b2ce6cb8d 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +from collections import defaultdict from typing import ( Collection, Dict, @@ -767,6 +768,57 @@ def _get_if_user_has_annotated_event(txn: LoggingTransaction) -> bool: "get_if_user_has_annotated_event", _get_if_user_has_annotated_event ) + @cached(iterable=True) + async def get_mutual_event_relations_for_rel_type( + self, event_id: str, relation_type: str + ) -> Set[Tuple[str, str]]: + raise NotImplementedError() + + @cachedList( + cached_method_name="get_mutual_event_relations_for_rel_type", + list_name="relation_types", + ) + async def get_mutual_event_relations( + self, event_id: str, relation_types: Collection[str] + ) -> Dict[str, Set[Tuple[str, str]]]: + """ + Fetch event metadata for events which related to the same event as the given event. + + If the given event has no relation information, returns an empty dictionary. + + Args: + event_id: The event ID which is targeted by relations. + relation_types: The relation types to check for mutual relations. + + Returns: + A dictionary of relation type to: + A set of tuples of: + The sender + The event type + """ + rel_type_sql, rel_type_args = make_in_list_sql_clause( + self.database_engine, "relation_type", relation_types + ) + + sql = f""" + SELECT DISTINCT relation_type, sender, type FROM event_relations + INNER JOIN events USING (event_id) + WHERE relates_to_id = ? AND {rel_type_sql} + """ + + def _get_event_relations( + txn: LoggingTransaction, + ) -> Dict[str, Set[Tuple[str, str]]]: + txn.execute(sql, [event_id] + rel_type_args) + result = defaultdict(set) + for rel_type, sender, type in txn.fetchall(): + result[rel_type].add((sender, type)) + return result + + return await self.db_pool.runInteraction( + "get_event_relations", _get_event_relations + ) + class RelationsStore(RelationsWorkerStore): pass diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 5dba1870762e..9b623d0033cd 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, Optional, Union +from typing import Dict, Optional, Set, Tuple, Union import frozendict @@ -26,7 +26,12 @@ class PushRuleEvaluatorTestCase(unittest.TestCase): - def _get_evaluator(self, content: JsonDict) -> PushRuleEvaluatorForEvent: + def _get_evaluator( + self, + content: JsonDict, + relations: Optional[Dict[str, Set[Tuple[str, str]]]] = None, + relations_match_enabled: bool = False, + ) -> PushRuleEvaluatorForEvent: event = FrozenEvent( { "event_id": "$event_id", @@ -42,7 +47,12 @@ def _get_evaluator(self, content: JsonDict) -> PushRuleEvaluatorForEvent: sender_power_level = 0 power_levels: Dict[str, Union[int, Dict[str, int]]] = {} return PushRuleEvaluatorForEvent( - event, room_member_count, sender_power_level, power_levels + event, + room_member_count, + sender_power_level, + power_levels, + relations or set(), + relations_match_enabled, ) def test_display_name(self) -> None: @@ -276,3 +286,71 @@ def test_tweaks_for_actions(self) -> None: push_rule_evaluator.tweaks_for_actions(actions), {"sound": "default", "highlight": True}, ) + + def test_relation_match(self) -> None: + """Test the relation_match push rule kind.""" + + # Check if the experimental feature is disabled. + evaluator = self._get_evaluator( + {}, {"m.annotation": {("@user:test", "m.reaction")}} + ) + condition = {"kind": "relation_match"} + # Oddly, an unknown condition always matches. + self.assertTrue(evaluator.matches(condition, "@user:test", "foo")) + + # A push rule evaluator with the experimental rule enabled. + evaluator = self._get_evaluator( + {}, {"m.annotation": {("@user:test", "m.reaction")}}, True + ) + + # Check just relation type. + condition = { + "kind": "org.matrix.msc3772.relation_match", + "rel_type": "m.annotation", + } + self.assertTrue(evaluator.matches(condition, "@user:test", "foo")) + + # Check relation type and sender. + condition = { + "kind": "org.matrix.msc3772.relation_match", + "rel_type": "m.annotation", + "sender": "@user:test", + } + self.assertTrue(evaluator.matches(condition, "@user:test", "foo")) + condition = { + "kind": "org.matrix.msc3772.relation_match", + "rel_type": "m.annotation", + "sender": "@other:test", + } + self.assertFalse(evaluator.matches(condition, "@user:test", "foo")) + + # Check relation type and event type. + condition = { + "kind": "org.matrix.msc3772.relation_match", + "rel_type": "m.annotation", + "type": "m.reaction", + } + self.assertTrue(evaluator.matches(condition, "@user:test", "foo")) + + # Check just sender, this fails since rel_type is required. + condition = { + "kind": "org.matrix.msc3772.relation_match", + "sender": "@user:test", + } + self.assertFalse(evaluator.matches(condition, "@user:test", "foo")) + + # Check sender glob. + condition = { + "kind": "org.matrix.msc3772.relation_match", + "rel_type": "m.annotation", + "sender": "@*:test", + } + self.assertTrue(evaluator.matches(condition, "@user:test", "foo")) + + # Check event type glob. + condition = { + "kind": "org.matrix.msc3772.relation_match", + "rel_type": "m.annotation", + "event_type": "*.reaction", + } + self.assertTrue(evaluator.matches(condition, "@user:test", "foo")) From 6855024e0a363ff09d50586dcf1b089b77ac3b0c Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Tue, 24 May 2022 15:39:54 +0100 Subject: [PATCH 093/181] Add authentication to thirdparty bridge APIs (#12746) Co-authored-by: Brendan Abolivier --- changelog.d/12746.bugfix | 1 + synapse/appservice/api.py | 15 ++++-- tests/appservice/test_api.py | 102 +++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 changelog.d/12746.bugfix create mode 100644 tests/appservice/test_api.py diff --git a/changelog.d/12746.bugfix b/changelog.d/12746.bugfix new file mode 100644 index 000000000000..67e7fc854c4f --- /dev/null +++ b/changelog.d/12746.bugfix @@ -0,0 +1 @@ +Always send an `access_token` in `/thirdparty/` requests to appservices, as required by the [Matrix specification](https://spec.matrix.org/v1.1/application-service-api/#third-party-networks). \ No newline at end of file diff --git a/synapse/appservice/api.py b/synapse/appservice/api.py index d19f8dd996b2..df1c21446203 100644 --- a/synapse/appservice/api.py +++ b/synapse/appservice/api.py @@ -14,7 +14,7 @@ # limitations under the License. import logging import urllib.parse -from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Tuple from prometheus_client import Counter from typing_extensions import TypeGuard @@ -155,6 +155,9 @@ async def query_3pe( if service.url is None: return [] + # This is required by the configuration. + assert service.hs_token is not None + uri = "%s%s/thirdparty/%s/%s" % ( service.url, APP_SERVICE_PREFIX, @@ -162,7 +165,11 @@ async def query_3pe( urllib.parse.quote(protocol), ) try: - response = await self.get_json(uri, fields) + args: Mapping[Any, Any] = { + **fields, + b"access_token": service.hs_token, + } + response = await self.get_json(uri, args=args) if not isinstance(response, list): logger.warning( "query_3pe to %s returned an invalid response %r", uri, response @@ -190,13 +197,15 @@ async def get_3pe_protocol( return {} async def _get() -> Optional[JsonDict]: + # This is required by the configuration. + assert service.hs_token is not None uri = "%s%s/thirdparty/protocol/%s" % ( service.url, APP_SERVICE_PREFIX, urllib.parse.quote(protocol), ) try: - info = await self.get_json(uri) + info = await self.get_json(uri, {"access_token": service.hs_token}) if not _is_valid_3pe_metadata(info): logger.warning( diff --git a/tests/appservice/test_api.py b/tests/appservice/test_api.py new file mode 100644 index 000000000000..3e0db4dd9871 --- /dev/null +++ b/tests/appservice/test_api.py @@ -0,0 +1,102 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Any, List, Mapping +from unittest.mock import Mock + +from twisted.test.proto_helpers import MemoryReactor + +from synapse.appservice import ApplicationService +from synapse.server import HomeServer +from synapse.types import JsonDict +from synapse.util import Clock + +from tests import unittest + +PROTOCOL = "myproto" +TOKEN = "myastoken" +URL = "http://mytestservice" + + +class ApplicationServiceApiTestCase(unittest.HomeserverTestCase): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer): + self.api = hs.get_application_service_api() + self.service = ApplicationService( + id="unique_identifier", + sender="@as:test", + url=URL, + token="unused", + hs_token=TOKEN, + hostname="myserver", + ) + + def test_query_3pe_authenticates_token(self): + """ + Tests that 3pe queries to the appservice are authenticated + with the appservice's token. + """ + + SUCCESS_RESULT_USER = [ + { + "protocol": PROTOCOL, + "userid": "@a:user", + "fields": { + "more": "fields", + }, + } + ] + SUCCESS_RESULT_LOCATION = [ + { + "protocol": PROTOCOL, + "alias": "#a:room", + "fields": { + "more": "fields", + }, + } + ] + + URL_USER = f"{URL}/_matrix/app/unstable/thirdparty/user/{PROTOCOL}" + URL_LOCATION = f"{URL}/_matrix/app/unstable/thirdparty/location/{PROTOCOL}" + + self.request_url = None + + async def get_json(url: str, args: Mapping[Any, Any]) -> List[JsonDict]: + if not args.get(b"access_token"): + raise RuntimeError("Access token not provided") + + self.assertEqual(args.get(b"access_token"), TOKEN) + self.request_url = url + if url == URL_USER: + return SUCCESS_RESULT_USER + elif url == URL_LOCATION: + return SUCCESS_RESULT_LOCATION + else: + raise RuntimeError( + "URL provided was invalid. This should never be seen." + ) + + # We assign to a method, which mypy doesn't like. + self.api.get_json = Mock(side_effect=get_json) # type: ignore[assignment] + + result = self.get_success( + self.api.query_3pe(self.service, "user", PROTOCOL, {b"some": [b"field"]}) + ) + self.assertEqual(self.request_url, URL_USER) + self.assertEqual(result, SUCCESS_RESULT_USER) + result = self.get_success( + self.api.query_3pe( + self.service, "location", PROTOCOL, {b"some": [b"field"]} + ) + ) + self.assertEqual(self.request_url, URL_LOCATION) + self.assertEqual(result, SUCCESS_RESULT_LOCATION) From 042e47970b15260eeb7e3162e4406b4f2e94008c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0imon=20Brandner?= Date: Tue, 24 May 2022 18:42:32 +0200 Subject: [PATCH 094/181] Remove `dont_notify` from the `.m.rule.room.server_acl` rule (#12849) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Šimon Brandner --- changelog.d/12849.misc | 1 + synapse/push/baserules.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12849.misc diff --git a/changelog.d/12849.misc b/changelog.d/12849.misc new file mode 100644 index 000000000000..4c2a15ce2b58 --- /dev/null +++ b/changelog.d/12849.misc @@ -0,0 +1 @@ +Remove `dont_notify` from the `.m.rule.room.server_acl` rule. \ No newline at end of file diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py index 4c7278b5a112..819bc9e9b680 100644 --- a/synapse/push/baserules.py +++ b/synapse/push/baserules.py @@ -292,7 +292,7 @@ def make_base_prepend_rules( "_cache_key": "_room_server_acl", } ], - "actions": ["dont_notify"], + "actions": [], }, ] From 81d9f2a8e9ee2d18f4ed9cc6d39fd9c2e793bc62 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 24 May 2022 17:50:50 +0100 Subject: [PATCH 095/181] Fixes to MSC3787 implementation (#12858) --- changelog.d/12858.bugfix | 1 + scripts-dev/complement.sh | 2 +- synapse/handlers/room_summary.py | 3 ++- synapse/storage/databases/main/room.py | 35 +++++++++++++------------- 4 files changed, 21 insertions(+), 20 deletions(-) create mode 100644 changelog.d/12858.bugfix diff --git a/changelog.d/12858.bugfix b/changelog.d/12858.bugfix new file mode 100644 index 000000000000..7a7ddc9a1343 --- /dev/null +++ b/changelog.d/12858.bugfix @@ -0,0 +1 @@ +Fix [MSC3878](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3787) rooms being omitted from room directory, room summary and space hierarchy responses. diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index ca476d9a5e61..3c472c576e70 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -45,7 +45,7 @@ docker build -t matrixdotorg/synapse -f "docker/Dockerfile" . extra_test_args=() -test_tags="synapse_blacklist,msc2716,msc3030" +test_tags="synapse_blacklist,msc2716,msc3030,msc3787" # If we're using workers, modify the docker files slightly. if [[ -n "$WORKERS" ]]; then diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index af83de319348..1dd74912fa95 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -662,7 +662,8 @@ async def _is_remote_room_accessible( # The API doesn't return the room version so assume that a # join rule of knock is valid. if ( - room.get("join_rules") in (JoinRules.PUBLIC, JoinRules.KNOCK) + room.get("join_rules") + in (JoinRules.PUBLIC, JoinRules.KNOCK, JoinRules.KNOCK_RESTRICTED) or room.get("world_readable") is True ): return True diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index ded15b92ef84..10f2ceb50b95 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -233,24 +233,23 @@ def _count_public_rooms_txn(txn: LoggingTransaction) -> int: UNION SELECT room_id from appservice_room_list """ - sql = """ + sql = f""" SELECT COUNT(*) FROM ( - %(published_sql)s + {published_sql} ) published INNER JOIN room_stats_state USING (room_id) INNER JOIN room_stats_current USING (room_id) WHERE ( - join_rules = 'public' OR join_rules = '%(knock_join_rule)s' + join_rules = '{JoinRules.PUBLIC}' + OR join_rules = '{JoinRules.KNOCK}' + OR join_rules = '{JoinRules.KNOCK_RESTRICTED}' OR history_visibility = 'world_readable' ) AND joined_members > 0 - """ % { - "published_sql": published_sql, - "knock_join_rule": JoinRules.KNOCK, - } + """ txn.execute(sql, query_args) return cast(Tuple[int], txn.fetchone())[0] @@ -369,29 +368,29 @@ async def get_largest_public_rooms( if where_clauses: where_clause = " AND " + " AND ".join(where_clauses) - sql = """ + dir = "DESC" if forwards else "ASC" + sql = f""" SELECT room_id, name, topic, canonical_alias, joined_members, avatar, history_visibility, guest_access, join_rules FROM ( - %(published_sql)s + {published_sql} ) published INNER JOIN room_stats_state USING (room_id) INNER JOIN room_stats_current USING (room_id) WHERE ( - join_rules = 'public' OR join_rules = '%(knock_join_rule)s' + join_rules = '{JoinRules.PUBLIC}' + OR join_rules = '{JoinRules.KNOCK}' + OR join_rules = '{JoinRules.KNOCK_RESTRICTED}' OR history_visibility = 'world_readable' ) AND joined_members > 0 - %(where_clause)s - ORDER BY joined_members %(dir)s, room_id %(dir)s - """ % { - "published_sql": published_sql, - "where_clause": where_clause, - "dir": "DESC" if forwards else "ASC", - "knock_join_rule": JoinRules.KNOCK, - } + {where_clause} + ORDER BY + joined_members {dir}, + room_id {dir} + """ if limit is not None: query_args.append(limit) From e7c77a8750094616419720379afa02506e716c7d Mon Sep 17 00:00:00 2001 From: David Robertson Date: Tue, 24 May 2022 19:17:21 +0100 Subject: [PATCH 096/181] Correct annotation of `_iterate_over_text` (#12860) --- changelog.d/12860.misc | 1 + synapse/rest/media/v1/preview_html.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12860.misc diff --git a/changelog.d/12860.misc b/changelog.d/12860.misc new file mode 100644 index 000000000000..b7d29430234e --- /dev/null +++ b/changelog.d/12860.misc @@ -0,0 +1 @@ +Correct a type annotation in the URL preview source code. diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py index ca73965fc28f..e72c8987ccb4 100644 --- a/synapse/rest/media/v1/preview_html.py +++ b/synapse/rest/media/v1/preview_html.py @@ -281,7 +281,7 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: def _iterate_over_text( - tree: "etree.Element", *tags_to_ignore: Iterable[Union[str, "etree.Comment"]] + tree: "etree.Element", *tags_to_ignore: Union[str, "etree.Comment"] ) -> Generator[str, None, None]: """Iterate over the tree returning text nodes in a depth first fashion, skipping text nodes inside certain tags. From 298911555c2572da823398f2816846f7353e89e9 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 25 May 2022 11:14:03 +0200 Subject: [PATCH 097/181] Fix typos in documentation (#12863) --- changelog.d/12863.doc | 1 + docs/message_retention_policies.md | 2 +- docs/structured_logging.md | 2 +- docs/workers.md | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/12863.doc diff --git a/changelog.d/12863.doc b/changelog.d/12863.doc new file mode 100644 index 000000000000..94f7b8371a5e --- /dev/null +++ b/changelog.d/12863.doc @@ -0,0 +1 @@ +Fix typos in documentation. \ No newline at end of file diff --git a/docs/message_retention_policies.md b/docs/message_retention_policies.md index 9214d6d7e98b..b52c4aaa2425 100644 --- a/docs/message_retention_policies.md +++ b/docs/message_retention_policies.md @@ -117,7 +117,7 @@ In this example, we define three jobs: Note that this example is tailored to show different configurations and features slightly more jobs than it's probably necessary (in practice, a server admin would probably consider it better to replace the two last -jobs with one that runs once a day and handles rooms which which +jobs with one that runs once a day and handles rooms which policy's `max_lifetime` is greater than 3 days). Keep in mind, when configuring these jobs, that a purge job can become diff --git a/docs/structured_logging.md b/docs/structured_logging.md index a6667e1a11e6..d43dc9eb6ee8 100644 --- a/docs/structured_logging.md +++ b/docs/structured_logging.md @@ -43,7 +43,7 @@ loggers: The above logging config will set Synapse as 'INFO' logging level by default, with the SQL layer at 'WARNING', and will log to a file, stored as JSON. -It is also possible to figure Synapse to log to a remote endpoint by using the +It is also possible to configure Synapse to log to a remote endpoint by using the `synapse.logging.RemoteHandler` class included with Synapse. It takes the following arguments: diff --git a/docs/workers.md b/docs/workers.md index 779069b8177f..5033722098bb 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -1,6 +1,6 @@ # Scaling synapse via workers -For small instances it recommended to run Synapse in the default monolith mode. +For small instances it is recommended to run Synapse in the default monolith mode. For larger instances where performance is a concern it can be helpful to split out functionality into multiple separate python processes. These processes are called 'workers', and are (eventually) intended to scale horizontally From 774ac4930dbb0e6f2f6dad4b9eb4630154e1e161 Mon Sep 17 00:00:00 2001 From: Carl Bordum Hansen Date: Wed, 25 May 2022 11:14:45 +0200 Subject: [PATCH 098/181] Make sure `prev_ids` defaults to empty list (#12829) Signed-off-by: Carl Bordum Hansen --- changelog.d/12829.bugfix | 1 + synapse/handlers/device.py | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 changelog.d/12829.bugfix diff --git a/changelog.d/12829.bugfix b/changelog.d/12829.bugfix new file mode 100644 index 000000000000..dfa1fed34e01 --- /dev/null +++ b/changelog.d/12829.bugfix @@ -0,0 +1 @@ +Fix a bug where we did not correctly handle invalid device list updates over federation. Contributed by Carl Bordum Hansen. diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 1d6d1f8a9248..e59937fd755c 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -763,6 +763,10 @@ async def incoming_device_list_update( device_id = edu_content.pop("device_id") stream_id = str(edu_content.pop("stream_id")) # They may come as ints prev_ids = edu_content.pop("prev_id", []) + if not isinstance(prev_ids, list): + raise SynapseError( + 400, "Device list update had an invalid 'prev_ids' field" + ) prev_ids = [str(p) for p in prev_ids] # They may come as ints if get_domain_from_id(user_id) != origin: From b4fab0b14f7167c907286ea065d65b5370ba8221 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Wed, 25 May 2022 10:20:34 +0100 Subject: [PATCH 099/181] Fix incorrect worker-allowed path in documentation (#12867) --- changelog.d/12867.doc | 1 + docs/workers.md | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) create mode 100644 changelog.d/12867.doc diff --git a/changelog.d/12867.doc b/changelog.d/12867.doc new file mode 100644 index 000000000000..1caeb7a290da --- /dev/null +++ b/changelog.d/12867.doc @@ -0,0 +1 @@ +Fix documentation incorrectly stating the `sendToDevice` endpoint can be directed at generic workers. Contributed by Nick @ Beeper. diff --git a/docs/workers.md b/docs/workers.md index 5033722098bb..25b9338e574b 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -237,9 +237,6 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/join/ ^/_matrix/client/(api/v1|r0|v3|unstable)/profile/ - # Device requests - ^/_matrix/client/(r0|v3|unstable)/sendToDevice/ - # Account data requests ^/_matrix/client/(r0|v3|unstable)/.*/tags ^/_matrix/client/(r0|v3|unstable)/.*/account_data From 2e5f88b5e69fa4d7385b32d9c439e0073e8d6916 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Wed, 25 May 2022 10:41:41 +0100 Subject: [PATCH 100/181] Add the `/account/whoami` endpoint to generic workers (#12866) --- changelog.d/12866.misc | 1 + docs/workers.md | 1 + synapse/app/generic_worker.py | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12866.misc diff --git a/changelog.d/12866.misc b/changelog.d/12866.misc new file mode 100644 index 000000000000..3f7ef5925319 --- /dev/null +++ b/changelog.d/12866.misc @@ -0,0 +1 @@ +Enable the `/account/whoami` endpoint on synapse worker processes. Contributed by Nick @ Beeper. diff --git a/docs/workers.md b/docs/workers.md index 25b9338e574b..3c3360ccb455 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -208,6 +208,7 @@ information. ^/_matrix/client/(v1|unstable/org.matrix.msc2946)/rooms/.*/hierarchy$ ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$ ^/_matrix/client/(r0|v3|unstable)/account/3pid$ + ^/_matrix/client/(r0|v3|unstable)/account/whoami$ ^/_matrix/client/(r0|v3|unstable)/devices$ ^/_matrix/client/versions$ ^/_matrix/client/(api/v1|r0|v3|unstable)/voip/turnServer$ diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 2a9480a5c161..39d9db8d989a 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -87,7 +87,7 @@ voip, ) from synapse.rest.client._base import client_patterns -from synapse.rest.client.account import ThreepidRestServlet +from synapse.rest.client.account import ThreepidRestServlet, WhoamiRestServlet from synapse.rest.client.devices import DevicesRestServlet from synapse.rest.client.keys import ( KeyChangesServlet, @@ -289,6 +289,7 @@ def _listen_http(self, listener_config: ListenerConfig) -> None: RegistrationTokenValidityRestServlet(self).register(resource) login.register_servlets(self, resource) ThreepidRestServlet(self).register(resource) + WhoamiRestServlet(self).register(resource) DevicesRestServlet(self).register(resource) # Read-only From 33e2916858c0503a54be1c01e242123dcfb02e21 Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Wed, 25 May 2022 10:46:05 +0100 Subject: [PATCH 101/181] Don't create empty AS txns when the AS is down (#12869) --- changelog.d/12869.misc | 1 + synapse/appservice/scheduler.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12869.misc diff --git a/changelog.d/12869.misc b/changelog.d/12869.misc new file mode 100644 index 000000000000..1d9d1c8921de --- /dev/null +++ b/changelog.d/12869.misc @@ -0,0 +1 @@ +Don't generate empty AS transactions when the AS is flagged as down. Contributed by Nick @ Beeper. diff --git a/synapse/appservice/scheduler.py b/synapse/appservice/scheduler.py index 3b49e6071677..de5e5216c223 100644 --- a/synapse/appservice/scheduler.py +++ b/synapse/appservice/scheduler.py @@ -384,6 +384,11 @@ async def send( device_list_summary: The device list summary to include in the transaction. """ try: + service_is_up = await self._is_service_up(service) + # Don't create empty txns when in recovery mode (ephemeral events are dropped) + if not service_is_up and not events: + return + txn = await self.store.create_appservice_txn( service=service, events=events, @@ -393,7 +398,6 @@ async def send( unused_fallback_keys=unused_fallback_keys or {}, device_list_summary=device_list_summary or DeviceListUpdates(), ) - service_is_up = await self._is_service_up(service) if service_is_up: sent = await txn.send(self.as_api) if sent: From 1f9013ce60ac7c2b75ea1bfacb9314239e4e0cff Mon Sep 17 00:00:00 2001 From: Nick Mills-Barrett Date: Wed, 25 May 2022 10:51:07 +0100 Subject: [PATCH 102/181] Add the `batch_send` endpoint to generic workers (#12868) --- changelog.d/12868.misc | 1 + docker/configure_workers_and_start.py | 1 + docs/workers.md | 1 + synapse/app/generic_worker.py | 2 ++ 4 files changed, 5 insertions(+) create mode 100644 changelog.d/12868.misc diff --git a/changelog.d/12868.misc b/changelog.d/12868.misc new file mode 100644 index 000000000000..382a876dab19 --- /dev/null +++ b/changelog.d/12868.misc @@ -0,0 +1 @@ +Enable the `batch_send` endpoint on synapse worker processes. Contributed by Nick @ Beeper. diff --git a/docker/configure_workers_and_start.py b/docker/configure_workers_and_start.py index b6ad14117325..f7dac902223f 100755 --- a/docker/configure_workers_and_start.py +++ b/docker/configure_workers_and_start.py @@ -158,6 +158,7 @@ "^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/(join|invite|leave|ban|unban|kick)$", "^/_matrix/client/(api/v1|r0|v3|unstable)/join/", "^/_matrix/client/(api/v1|r0|v3|unstable)/profile/", + "^/_matrix/client/(v1|unstable/org.matrix.msc2716)/rooms/.*/batch_send", ], "shared_extra_conf": {}, "worker_extra_conf": "", diff --git a/docs/workers.md b/docs/workers.md index 3c3360ccb455..6a76f43fa1d2 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -206,6 +206,7 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$ ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$ ^/_matrix/client/(v1|unstable/org.matrix.msc2946)/rooms/.*/hierarchy$ + ^/_matrix/client/(v1|unstable/org.matrix.msc2716)/rooms/.*/batch_send$ ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$ ^/_matrix/client/(r0|v3|unstable)/account/3pid$ ^/_matrix/client/(r0|v3|unstable)/account/whoami$ diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index 39d9db8d989a..c0d007bb7966 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -78,6 +78,7 @@ read_marker, receipts, room, + room_batch, room_keys, sendtodevice, sync, @@ -309,6 +310,7 @@ def _listen_http(self, listener_config: ListenerConfig) -> None: room.register_servlets(self, resource, is_worker=True) room.register_deprecated_servlets(self, resource) initial_sync.register_servlets(self, resource) + room_batch.register_servlets(self, resource) room_keys.register_servlets(self, resource) tags.register_servlets(self, resource) account_data.register_servlets(self, resource) From 6aeee9a19deb68ed071ddde7150609826bfa4988 Mon Sep 17 00:00:00 2001 From: David Robertson Date: Wed, 25 May 2022 11:19:22 +0100 Subject: [PATCH 103/181] Correct typo in changelog for #12858. --- changelog.d/12858.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/12858.bugfix b/changelog.d/12858.bugfix index 7a7ddc9a1343..8c95a3e3a323 100644 --- a/changelog.d/12858.bugfix +++ b/changelog.d/12858.bugfix @@ -1 +1 @@ -Fix [MSC3878](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3787) rooms being omitted from room directory, room summary and space hierarchy responses. +Fix [MSC3787](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3787) rooms being omitted from room directory, room summary and space hierarchy responses. From 4cbcd4a99959a4aaa04c023812f02d9c27e4945f Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 25 May 2022 07:49:12 -0400 Subject: [PATCH 104/181] Misc clean-up of push rules datastore (#12856) --- changelog.d/12856.misc | 1 + synapse/storage/databases/main/push_rule.py | 16 +++++----------- 2 files changed, 6 insertions(+), 11 deletions(-) create mode 100644 changelog.d/12856.misc diff --git a/changelog.d/12856.misc b/changelog.d/12856.misc new file mode 100644 index 000000000000..19ecefd9af8d --- /dev/null +++ b/changelog.d/12856.misc @@ -0,0 +1 @@ +Clean-up the push rules datastore. diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 4adabc88cce4..d5aefe02b6dc 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -174,7 +174,7 @@ async def get_push_rules_for_user(self, user_id: str) -> List[JsonDict]: "conditions", "actions", ), - desc="get_push_rules_enabled_for_user", + desc="get_push_rules_for_user", ) rows.sort(key=lambda row: (-int(row["priority_class"]), -int(row["priority"]))) @@ -188,10 +188,10 @@ async def get_push_rules_enabled_for_user(self, user_id: str) -> Dict[str, bool] results = await self.db_pool.simple_select_list( table="push_rules_enable", keyvalues={"user_name": user_id}, - retcols=("user_name", "rule_id", "enabled"), + retcols=("rule_id", "enabled"), desc="get_push_rules_enabled_for_user", ) - return {r["rule_id"]: False if r["enabled"] == 0 else True for r in results} + return {r["rule_id"]: bool(r["enabled"]) for r in results} async def have_push_rules_changed_for_user( self, user_id: str, last_id: int @@ -213,11 +213,7 @@ def have_push_rules_changed_txn(txn: LoggingTransaction) -> bool: "have_push_rules_changed", have_push_rules_changed_txn ) - @cachedList( - cached_method_name="get_push_rules_for_user", - list_name="user_ids", - num_args=1, - ) + @cachedList(cached_method_name="get_push_rules_for_user", list_name="user_ids") async def bulk_get_push_rules( self, user_ids: Collection[str] ) -> Dict[str, List[JsonDict]]: @@ -249,9 +245,7 @@ async def bulk_get_push_rules( return results @cachedList( - cached_method_name="get_push_rules_enabled_for_user", - list_name="user_ids", - num_args=1, + cached_method_name="get_push_rules_enabled_for_user", list_name="user_ids" ) async def bulk_get_push_rules_enabled( self, user_ids: Collection[str] From 759f9c09e1b2019b772f6baf6a40e74f79df9017 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 25 May 2022 07:49:54 -0400 Subject: [PATCH 105/181] Fix caching behavior for relations push rules. (#12859) By always returning all requested values from the function wrapped by cachedList. Otherwise implicit None values get added into the cache, which are unexpected. --- changelog.d/12859.feature | 1 + synapse/storage/databases/main/relations.py | 5 +++-- synapse/util/caches/descriptors.py | 15 ++++++++------- 3 files changed, 12 insertions(+), 9 deletions(-) create mode 100644 changelog.d/12859.feature diff --git a/changelog.d/12859.feature b/changelog.d/12859.feature new file mode 100644 index 000000000000..e674c31ae8aa --- /dev/null +++ b/changelog.d/12859.feature @@ -0,0 +1 @@ +Experimental support for [MSC3772](/~https://github.com/matrix-org/matrix-spec-proposals/pull/3772): Push rule for mutually related events. diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 3b1b2ce6cb8d..b457bc189eb4 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -13,7 +13,6 @@ # limitations under the License. import logging -from collections import defaultdict from typing import ( Collection, Dict, @@ -810,7 +809,9 @@ def _get_event_relations( txn: LoggingTransaction, ) -> Dict[str, Set[Tuple[str, str]]]: txn.execute(sql, [event_id] + rel_type_args) - result = defaultdict(set) + result: Dict[str, Set[Tuple[str, str]]] = { + rel_type: set() for rel_type in relation_types + } for rel_type, sender, type in txn.fetchall(): result[rel_type].add((sender, type)) return result diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index eda92d864dea..867f315b2ace 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -595,13 +595,14 @@ def cached( def cachedList( *, cached_method_name: str, list_name: str, num_args: Optional[int] = None ) -> Callable[[F], _CachedFunction[F]]: - """Creates a descriptor that wraps a function in a `CacheListDescriptor`. + """Creates a descriptor that wraps a function in a `DeferredCacheListDescriptor`. - Used to do batch lookups for an already created cache. A single argument + Used to do batch lookups for an already created cache. One of the arguments is specified as a list that is iterated through to lookup keys in the original cache. A new tuple consisting of the (deduplicated) keys that weren't in - the cache gets passed to the original function, the result of which is stored in the - cache. + the cache gets passed to the original function, which is expected to results + in a map of key to value for each passed value. THe new results are stored in the + original cache. Note that any missing values are cached as None. Args: cached_method_name: The name of the single-item lookup method. @@ -614,11 +615,11 @@ def cachedList( Example: class Example: - @cached(num_args=2) - def do_something(self, first_arg): + @cached() + def do_something(self, first_arg, second_arg): ... - @cachedList(do_something.cache, list_name="second_args", num_args=2) + @cachedList(cached_method_name="do_something", list_name="second_args") def batch_do_something(self, first_arg, second_args): ... """ From a8db8c6eba8625f8fc224b320be6074d849ceada Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Wed, 25 May 2022 07:53:40 -0400 Subject: [PATCH 106/181] Remove user-visible groups/communities code (#12553) Makes it so that groups/communities no longer exist from a user-POV. E.g. we remove: * All API endpoints (including Client-Server, Server-Server, and admin). * Documented configuration options (and the experimental flag, which is now unused). * Special handling during room upgrades. * The `groups` section of the `/sync` response. --- changelog.d/12553.removal | 1 + docs/sample_config.yaml | 10 - .../configuration/config_documentation.md | 19 - synapse/api/constants.py | 5 - synapse/app/generic_worker.py | 4 - synapse/config/experimental.py | 3 - synapse/config/groups.py | 12 - .../federation/transport/server/__init__.py | 48 +- .../transport/server/groups_local.py | 115 --- .../transport/server/groups_server.py | 755 -------------- synapse/handlers/room_member.py | 11 - synapse/handlers/sync.py | 65 -- synapse/rest/__init__.py | 3 - synapse/rest/admin/__init__.py | 3 - synapse/rest/admin/groups.py | 50 - synapse/rest/client/groups.py | 962 ------------------ synapse/rest/client/sync.py | 8 - tests/rest/admin/test_admin.py | 90 +- tests/rest/client/test_groups.py | 56 - 19 files changed, 3 insertions(+), 2217 deletions(-) create mode 100644 changelog.d/12553.removal delete mode 100644 synapse/federation/transport/server/groups_local.py delete mode 100644 synapse/federation/transport/server/groups_server.py delete mode 100644 synapse/rest/admin/groups.py delete mode 100644 synapse/rest/client/groups.py delete mode 100644 tests/rest/client/test_groups.py diff --git a/changelog.d/12553.removal b/changelog.d/12553.removal new file mode 100644 index 000000000000..41f6fae5da91 --- /dev/null +++ b/changelog.d/12553.removal @@ -0,0 +1 @@ +Remove support for the non-standard groups/communities feature from Synapse. diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index ee98d193cbb9..4388a00df1fd 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -2521,16 +2521,6 @@ push: # "events_default": 1 -# Uncomment to allow non-server-admin users to create groups on this server -# -#enable_group_creation: true - -# If enabled, non server admins can only create groups with local parts -# starting with this prefix -# -#group_creation_prefix: "unofficial_" - - # User Directory configuration # diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 0f5bda32b941..8724bf27e8af 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -3145,25 +3145,6 @@ Example configuration: encryption_enabled_by_default_for_room_type: invite ``` --- -Config option: `enable_group_creation` - -Set to true to allow non-server-admin users to create groups on this server - -Example configuration: -```yaml -enable_group_creation: true -``` ---- -Config option: `group_creation_prefix` - -If enabled/present, non-server admins can only create groups with local parts -starting with this prefix. - -Example configuration: -```yaml -group_creation_prefix: "unofficial_" -``` ---- Config option: `user_directory` This setting defines options related to the user directory. diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 330de21f6b80..4a0552e7e516 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -31,11 +31,6 @@ # the maximum length for a user id is 255 characters MAX_USERID_LENGTH = 255 -# The maximum length for a group id is 255 characters -MAX_GROUPID_LENGTH = 255 -MAX_GROUP_CATEGORYID_LENGTH = 255 -MAX_GROUP_ROLEID_LENGTH = 255 - class Membership: diff --git a/synapse/app/generic_worker.py b/synapse/app/generic_worker.py index c0d007bb7966..0a6dd618f682 100644 --- a/synapse/app/generic_worker.py +++ b/synapse/app/generic_worker.py @@ -69,7 +69,6 @@ from synapse.rest.client import ( account_data, events, - groups, initial_sync, login, presence, @@ -323,9 +322,6 @@ def _listen_http(self, listener_config: ListenerConfig) -> None: presence.register_servlets(self, resource) - if self.config.experimental.groups_enabled: - groups.register_servlets(self, resource) - resources.update({CLIENT_API_PREFIX: resource}) resources.update(build_synapse_client_resource_tree(self)) diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index cc417e2fbf9b..f2dfd49b0777 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -73,9 +73,6 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # MSC3720 (Account status endpoint) self.msc3720_enabled: bool = experimental.get("msc3720_enabled", False) - # The deprecated groups feature. - self.groups_enabled: bool = experimental.get("groups_enabled", False) - # MSC2654: Unread counts self.msc2654_enabled: bool = experimental.get("msc2654_enabled", False) diff --git a/synapse/config/groups.py b/synapse/config/groups.py index c9b9c6daadef..baa051fdd47f 100644 --- a/synapse/config/groups.py +++ b/synapse/config/groups.py @@ -25,15 +25,3 @@ class GroupsConfig(Config): def read_config(self, config: JsonDict, **kwargs: Any) -> None: self.enable_group_creation = config.get("enable_group_creation", False) self.group_creation_prefix = config.get("group_creation_prefix", "") - - def generate_config_section(self, **kwargs: Any) -> str: - return """\ - # Uncomment to allow non-server-admin users to create groups on this server - # - #enable_group_creation: true - - # If enabled, non server admins can only create groups with local parts - # starting with this prefix - # - #group_creation_prefix: "unofficial_" - """ diff --git a/synapse/federation/transport/server/__init__.py b/synapse/federation/transport/server/__init__.py index 71b2f90eb920..50623cd38513 100644 --- a/synapse/federation/transport/server/__init__.py +++ b/synapse/federation/transport/server/__init__.py @@ -27,10 +27,6 @@ FederationAccountStatusServlet, FederationTimestampLookupServlet, ) -from synapse.federation.transport.server.groups_local import GROUP_LOCAL_SERVLET_CLASSES -from synapse.federation.transport.server.groups_server import ( - GROUP_SERVER_SERVLET_CLASSES, -) from synapse.http.server import HttpServer, JsonResource from synapse.http.servlet import ( parse_boolean_from_args, @@ -199,38 +195,6 @@ async def on_POST( return 200, data -class FederationGroupsRenewAttestaionServlet(BaseFederationServlet): - """A group or user's server renews their attestation""" - - PATH = "/groups/(?P[^/]*)/renew_attestation/(?P[^/]*)" - - def __init__( - self, - hs: "HomeServer", - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self.handler = hs.get_groups_attestation_renewer() - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - # We don't need to check auth here as we check the attestation signatures - - new_content = await self.handler.on_renew_attestation( - group_id, user_id, content - ) - - return 200, new_content - - class OpenIdUserInfo(BaseFederationServlet): """ Exchange a bearer token for information about a user. @@ -292,16 +256,9 @@ async def on_GET( SERVLET_GROUPS: Dict[str, Iterable[Type[BaseFederationServlet]]] = { "federation": FEDERATION_SERVLET_CLASSES, "room_list": (PublicRoomList,), - "group_server": GROUP_SERVER_SERVLET_CLASSES, - "group_local": GROUP_LOCAL_SERVLET_CLASSES, - "group_attestation": (FederationGroupsRenewAttestaionServlet,), "openid": (OpenIdUserInfo,), } -DEFAULT_SERVLET_GROUPS = ("federation", "room_list", "openid") - -GROUP_SERVLET_GROUPS = ("group_server", "group_local", "group_attestation") - def register_servlets( hs: "HomeServer", @@ -324,10 +281,7 @@ def register_servlets( Defaults to ``DEFAULT_SERVLET_GROUPS``. """ if not servlet_groups: - servlet_groups = DEFAULT_SERVLET_GROUPS - # Only allow the groups servlets if the deprecated groups feature is enabled. - if hs.config.experimental.groups_enabled: - servlet_groups = servlet_groups + GROUP_SERVLET_GROUPS + servlet_groups = SERVLET_GROUPS.keys() for servlet_group in servlet_groups: # Skip unknown servlet groups. diff --git a/synapse/federation/transport/server/groups_local.py b/synapse/federation/transport/server/groups_local.py deleted file mode 100644 index 496472e1dcd8..000000000000 --- a/synapse/federation/transport/server/groups_local.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright 2021 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import TYPE_CHECKING, Dict, List, Tuple, Type - -from synapse.api.errors import SynapseError -from synapse.federation.transport.server._base import ( - Authenticator, - BaseFederationServlet, -) -from synapse.handlers.groups_local import GroupsLocalHandler -from synapse.types import JsonDict, get_domain_from_id -from synapse.util.ratelimitutils import FederationRateLimiter - -if TYPE_CHECKING: - from synapse.server import HomeServer - - -class BaseGroupsLocalServlet(BaseFederationServlet): - """Abstract base class for federation servlet classes which provides a groups local handler. - - See BaseFederationServlet for more information. - """ - - def __init__( - self, - hs: "HomeServer", - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self.handler = hs.get_groups_local_handler() - - -class FederationGroupsLocalInviteServlet(BaseGroupsLocalServlet): - """A group server has invited a local user""" - - PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/invite" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - if get_domain_from_id(group_id) != origin: - raise SynapseError(403, "group_id doesn't match origin") - - assert isinstance( - self.handler, GroupsLocalHandler - ), "Workers cannot handle group invites." - - new_content = await self.handler.on_invite(group_id, user_id, content) - - return 200, new_content - - -class FederationGroupsRemoveLocalUserServlet(BaseGroupsLocalServlet): - """A group server has removed a local user""" - - PATH = "/groups/local/(?P[^/]*)/users/(?P[^/]*)/remove" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, None]: - if get_domain_from_id(group_id) != origin: - raise SynapseError(403, "user_id doesn't match origin") - - assert isinstance( - self.handler, GroupsLocalHandler - ), "Workers cannot handle group removals." - - await self.handler.user_removed_from_group(group_id, user_id, content) - - return 200, None - - -class FederationGroupsBulkPublicisedServlet(BaseGroupsLocalServlet): - """Get roles in a group""" - - PATH = "/get_groups_publicised" - - async def on_POST( - self, origin: str, content: JsonDict, query: Dict[bytes, List[bytes]] - ) -> Tuple[int, JsonDict]: - resp = await self.handler.bulk_get_publicised_groups( - content["user_ids"], proxy=False - ) - - return 200, resp - - -GROUP_LOCAL_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( - FederationGroupsLocalInviteServlet, - FederationGroupsRemoveLocalUserServlet, - FederationGroupsBulkPublicisedServlet, -) diff --git a/synapse/federation/transport/server/groups_server.py b/synapse/federation/transport/server/groups_server.py deleted file mode 100644 index 851b50152ec5..000000000000 --- a/synapse/federation/transport/server/groups_server.py +++ /dev/null @@ -1,755 +0,0 @@ -# Copyright 2021 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from typing import TYPE_CHECKING, Dict, List, Tuple, Type - -from typing_extensions import Literal - -from synapse.api.constants import MAX_GROUP_CATEGORYID_LENGTH, MAX_GROUP_ROLEID_LENGTH -from synapse.api.errors import Codes, SynapseError -from synapse.federation.transport.server._base import ( - Authenticator, - BaseFederationServlet, -) -from synapse.http.servlet import parse_string_from_args -from synapse.types import JsonDict, get_domain_from_id -from synapse.util.ratelimitutils import FederationRateLimiter - -if TYPE_CHECKING: - from synapse.server import HomeServer - - -class BaseGroupsServerServlet(BaseFederationServlet): - """Abstract base class for federation servlet classes which provides a groups server handler. - - See BaseFederationServlet for more information. - """ - - def __init__( - self, - hs: "HomeServer", - authenticator: Authenticator, - ratelimiter: FederationRateLimiter, - server_name: str, - ): - super().__init__(hs, authenticator, ratelimiter, server_name) - self.handler = hs.get_groups_server_handler() - - -class FederationGroupsProfileServlet(BaseGroupsServerServlet): - """Get/set the basic profile of a group on behalf of a user""" - - PATH = "/groups/(?P[^/]*)/profile" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_group_profile(group_id, requester_user_id) - - return 200, new_content - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.update_group_profile( - group_id, requester_user_id, content - ) - - return 200, new_content - - -class FederationGroupsSummaryServlet(BaseGroupsServerServlet): - PATH = "/groups/(?P[^/]*)/summary" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_group_summary(group_id, requester_user_id) - - return 200, new_content - - -class FederationGroupsRoomsServlet(BaseGroupsServerServlet): - """Get the rooms in a group on behalf of a user""" - - PATH = "/groups/(?P[^/]*)/rooms" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_rooms_in_group(group_id, requester_user_id) - - return 200, new_content - - -class FederationGroupsAddRoomsServlet(BaseGroupsServerServlet): - """Add/remove room from group""" - - PATH = "/groups/(?P[^/]*)/room/(?P[^/]*)" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - room_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.add_room_to_group( - group_id, requester_user_id, room_id, content - ) - - return 200, new_content - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - room_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.remove_room_from_group( - group_id, requester_user_id, room_id - ) - - return 200, new_content - - -class FederationGroupsAddRoomsConfigServlet(BaseGroupsServerServlet): - """Update room config in group""" - - PATH = ( - "/groups/(?P[^/]*)/room/(?P[^/]*)" - "/config/(?P[^/]*)" - ) - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - room_id: str, - config_key: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - result = await self.handler.update_room_in_group( - group_id, requester_user_id, room_id, config_key, content - ) - - return 200, result - - -class FederationGroupsUsersServlet(BaseGroupsServerServlet): - """Get the users in a group on behalf of a user""" - - PATH = "/groups/(?P[^/]*)/users" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_users_in_group(group_id, requester_user_id) - - return 200, new_content - - -class FederationGroupsInvitedUsersServlet(BaseGroupsServerServlet): - """Get the users that have been invited to a group""" - - PATH = "/groups/(?P[^/]*)/invited_users" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.get_invited_users_in_group( - group_id, requester_user_id - ) - - return 200, new_content - - -class FederationGroupsInviteServlet(BaseGroupsServerServlet): - """Ask a group server to invite someone to the group""" - - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/invite" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.invite_to_group( - group_id, user_id, requester_user_id, content - ) - - return 200, new_content - - -class FederationGroupsAcceptInviteServlet(BaseGroupsServerServlet): - """Accept an invitation from the group server""" - - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/accept_invite" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - if get_domain_from_id(user_id) != origin: - raise SynapseError(403, "user_id doesn't match origin") - - new_content = await self.handler.accept_invite(group_id, user_id, content) - - return 200, new_content - - -class FederationGroupsJoinServlet(BaseGroupsServerServlet): - """Attempt to join a group""" - - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/join" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - if get_domain_from_id(user_id) != origin: - raise SynapseError(403, "user_id doesn't match origin") - - new_content = await self.handler.join_group(group_id, user_id, content) - - return 200, new_content - - -class FederationGroupsRemoveUserServlet(BaseGroupsServerServlet): - """Leave or kick a user from the group""" - - PATH = "/groups/(?P[^/]*)/users/(?P[^/]*)/remove" - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.remove_user_from_group( - group_id, user_id, requester_user_id, content - ) - - return 200, new_content - - -class FederationGroupsSummaryRoomsServlet(BaseGroupsServerServlet): - """Add/remove a room from the group summary, with optional category. - - Matches both: - - /groups/:group/summary/rooms/:room_id - - /groups/:group/summary/categories/:category/rooms/:room_id - """ - - PATH = ( - "/groups/(?P[^/]*)/summary" - "(/categories/(?P[^/]+))?" - "/rooms/(?P[^/]*)" - ) - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - room_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if category_id == "": - raise SynapseError( - 400, "category_id cannot be empty string", Codes.INVALID_PARAM - ) - - if len(category_id) > MAX_GROUP_CATEGORYID_LENGTH: - raise SynapseError( - 400, - "category_id may not be longer than %s characters" - % (MAX_GROUP_CATEGORYID_LENGTH,), - Codes.INVALID_PARAM, - ) - - resp = await self.handler.update_group_summary_room( - group_id, - requester_user_id, - room_id=room_id, - category_id=category_id, - content=content, - ) - - return 200, resp - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - room_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if category_id == "": - raise SynapseError(400, "category_id cannot be empty string") - - resp = await self.handler.delete_group_summary_room( - group_id, requester_user_id, room_id=room_id, category_id=category_id - ) - - return 200, resp - - -class FederationGroupsCategoriesServlet(BaseGroupsServerServlet): - """Get all categories for a group""" - - PATH = "/groups/(?P[^/]*)/categories/?" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - resp = await self.handler.get_group_categories(group_id, requester_user_id) - - return 200, resp - - -class FederationGroupsCategoryServlet(BaseGroupsServerServlet): - """Add/remove/get a category in a group""" - - PATH = "/groups/(?P[^/]*)/categories/(?P[^/]+)" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - resp = await self.handler.get_group_category( - group_id, requester_user_id, category_id - ) - - return 200, resp - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if category_id == "": - raise SynapseError(400, "category_id cannot be empty string") - - if len(category_id) > MAX_GROUP_CATEGORYID_LENGTH: - raise SynapseError( - 400, - "category_id may not be longer than %s characters" - % (MAX_GROUP_CATEGORYID_LENGTH,), - Codes.INVALID_PARAM, - ) - - resp = await self.handler.upsert_group_category( - group_id, requester_user_id, category_id, content - ) - - return 200, resp - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - category_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if category_id == "": - raise SynapseError(400, "category_id cannot be empty string") - - resp = await self.handler.delete_group_category( - group_id, requester_user_id, category_id - ) - - return 200, resp - - -class FederationGroupsRolesServlet(BaseGroupsServerServlet): - """Get roles in a group""" - - PATH = "/groups/(?P[^/]*)/roles/?" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - resp = await self.handler.get_group_roles(group_id, requester_user_id) - - return 200, resp - - -class FederationGroupsRoleServlet(BaseGroupsServerServlet): - """Add/remove/get a role in a group""" - - PATH = "/groups/(?P[^/]*)/roles/(?P[^/]+)" - - async def on_GET( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - resp = await self.handler.get_group_role(group_id, requester_user_id, role_id) - - return 200, resp - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if role_id == "": - raise SynapseError( - 400, "role_id cannot be empty string", Codes.INVALID_PARAM - ) - - if len(role_id) > MAX_GROUP_ROLEID_LENGTH: - raise SynapseError( - 400, - "role_id may not be longer than %s characters" - % (MAX_GROUP_ROLEID_LENGTH,), - Codes.INVALID_PARAM, - ) - - resp = await self.handler.update_group_role( - group_id, requester_user_id, role_id, content - ) - - return 200, resp - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if role_id == "": - raise SynapseError(400, "role_id cannot be empty string") - - resp = await self.handler.delete_group_role( - group_id, requester_user_id, role_id - ) - - return 200, resp - - -class FederationGroupsSummaryUsersServlet(BaseGroupsServerServlet): - """Add/remove a user from the group summary, with optional role. - - Matches both: - - /groups/:group/summary/users/:user_id - - /groups/:group/summary/roles/:role/users/:user_id - """ - - PATH = ( - "/groups/(?P[^/]*)/summary" - "(/roles/(?P[^/]+))?" - "/users/(?P[^/]*)" - ) - - async def on_POST( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if role_id == "": - raise SynapseError(400, "role_id cannot be empty string") - - if len(role_id) > MAX_GROUP_ROLEID_LENGTH: - raise SynapseError( - 400, - "role_id may not be longer than %s characters" - % (MAX_GROUP_ROLEID_LENGTH,), - Codes.INVALID_PARAM, - ) - - resp = await self.handler.update_group_summary_user( - group_id, - requester_user_id, - user_id=user_id, - role_id=role_id, - content=content, - ) - - return 200, resp - - async def on_DELETE( - self, - origin: str, - content: Literal[None], - query: Dict[bytes, List[bytes]], - group_id: str, - role_id: str, - user_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - if role_id == "": - raise SynapseError(400, "role_id cannot be empty string") - - resp = await self.handler.delete_group_summary_user( - group_id, requester_user_id, user_id=user_id, role_id=role_id - ) - - return 200, resp - - -class FederationGroupsSettingJoinPolicyServlet(BaseGroupsServerServlet): - """Sets whether a group is joinable without an invite or knock""" - - PATH = "/groups/(?P[^/]*)/settings/m.join_policy" - - async def on_PUT( - self, - origin: str, - content: JsonDict, - query: Dict[bytes, List[bytes]], - group_id: str, - ) -> Tuple[int, JsonDict]: - requester_user_id = parse_string_from_args( - query, "requester_user_id", required=True - ) - if get_domain_from_id(requester_user_id) != origin: - raise SynapseError(403, "requester_user_id doesn't match origin") - - new_content = await self.handler.set_group_join_policy( - group_id, requester_user_id, content - ) - - return 200, new_content - - -GROUP_SERVER_SERVLET_CLASSES: Tuple[Type[BaseFederationServlet], ...] = ( - FederationGroupsProfileServlet, - FederationGroupsSummaryServlet, - FederationGroupsRoomsServlet, - FederationGroupsUsersServlet, - FederationGroupsInvitedUsersServlet, - FederationGroupsInviteServlet, - FederationGroupsAcceptInviteServlet, - FederationGroupsJoinServlet, - FederationGroupsRemoveUserServlet, - FederationGroupsSummaryRoomsServlet, - FederationGroupsCategoriesServlet, - FederationGroupsCategoryServlet, - FederationGroupsRolesServlet, - FederationGroupsRoleServlet, - FederationGroupsSummaryUsersServlet, - FederationGroupsAddRoomsServlet, - FederationGroupsAddRoomsConfigServlet, - FederationGroupsSettingJoinPolicyServlet, -) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index ea876c168de7..00662dc96114 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -1081,17 +1081,6 @@ async def transfer_room_state_on_room_upgrade( # Transfer alias mappings in the room directory await self.store.update_aliases_for_room(old_room_id, room_id) - # Check if any groups we own contain the predecessor room - local_group_ids = await self.store.get_local_groups_for_room(old_room_id) - for group_id in local_group_ids: - # Add new the new room to those groups - await self.store.add_room_to_group( - group_id, room_id, old_room is not None and old_room["is_public"] - ) - - # Remove the old room from those groups - await self.store.remove_room_from_group(group_id, old_room_id) - async def copy_user_state_on_room_upgrade( self, old_room_id: str, new_room_id: str, user_ids: Iterable[str] ) -> None: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 59b5d497be68..dcbb5ce921af 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -166,16 +166,6 @@ def __bool__(self) -> bool: return True -@attr.s(slots=True, frozen=True, auto_attribs=True) -class GroupsSyncResult: - join: JsonDict - invite: JsonDict - leave: JsonDict - - def __bool__(self) -> bool: - return bool(self.join or self.invite or self.leave) - - @attr.s(slots=True, auto_attribs=True) class _RoomChanges: """The set of room entries to include in the sync, plus the set of joined @@ -206,7 +196,6 @@ class SyncResult: for this device device_unused_fallback_key_types: List of key types that have an unused fallback key - groups: Group updates, if any """ next_batch: StreamToken @@ -220,7 +209,6 @@ class SyncResult: device_lists: DeviceListUpdates device_one_time_keys_count: JsonDict device_unused_fallback_key_types: List[str] - groups: Optional[GroupsSyncResult] def __bool__(self) -> bool: """Make the result appear empty if there are no updates. This is used @@ -236,7 +224,6 @@ def __bool__(self) -> bool: or self.account_data or self.to_device or self.device_lists - or self.groups ) @@ -1157,10 +1144,6 @@ async def generate_sync_result( await self.store.get_e2e_unused_fallback_key_types(user_id, device_id) ) - if self.hs_config.experimental.groups_enabled: - logger.debug("Fetching group data") - await self._generate_sync_entry_for_groups(sync_result_builder) - num_events = 0 # debug for /~https://github.com/matrix-org/synapse/issues/9424 @@ -1184,57 +1167,11 @@ async def generate_sync_result( archived=sync_result_builder.archived, to_device=sync_result_builder.to_device, device_lists=device_lists, - groups=sync_result_builder.groups, device_one_time_keys_count=one_time_key_counts, device_unused_fallback_key_types=unused_fallback_key_types, next_batch=sync_result_builder.now_token, ) - @measure_func("_generate_sync_entry_for_groups") - async def _generate_sync_entry_for_groups( - self, sync_result_builder: "SyncResultBuilder" - ) -> None: - user_id = sync_result_builder.sync_config.user.to_string() - since_token = sync_result_builder.since_token - now_token = sync_result_builder.now_token - - if since_token and since_token.groups_key: - results = await self.store.get_groups_changes_for_user( - user_id, since_token.groups_key, now_token.groups_key - ) - else: - results = await self.store.get_all_groups_for_user( - user_id, now_token.groups_key - ) - - invited = {} - joined = {} - left = {} - for result in results: - membership = result["membership"] - group_id = result["group_id"] - gtype = result["type"] - content = result["content"] - - if membership == "join": - if gtype == "membership": - # TODO: Add profile - content.pop("membership", None) - joined[group_id] = content["content"] - else: - joined.setdefault(group_id, {})[gtype] = content - elif membership == "invite": - if gtype == "membership": - content.pop("membership", None) - invited[group_id] = content["content"] - else: - if gtype == "membership": - left[group_id] = content["content"] - - sync_result_builder.groups = GroupsSyncResult( - join=joined, invite=invited, leave=left - ) - @measure_func("_generate_sync_entry_for_device_list") async def _generate_sync_entry_for_device_list( self, @@ -2333,7 +2270,6 @@ class SyncResultBuilder: invited knocked archived - groups to_device """ @@ -2349,7 +2285,6 @@ class SyncResultBuilder: invited: List[InvitedSyncResult] = attr.Factory(list) knocked: List[KnockedSyncResult] = attr.Factory(list) archived: List[ArchivedSyncResult] = attr.Factory(list) - groups: Optional[GroupsSyncResult] = None to_device: List[JsonDict] = attr.Factory(list) def calculate_user_changes(self) -> Tuple[Set[str], Set[str]]: diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index 57c4773edce6..b71221511209 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -26,7 +26,6 @@ directory, events, filter, - groups, initial_sync, keys, knock, @@ -118,8 +117,6 @@ def register_servlets(client_resource: HttpServer, hs: "HomeServer") -> None: thirdparty.register_servlets(hs, client_resource) sendtodevice.register_servlets(hs, client_resource) user_directory.register_servlets(hs, client_resource) - if hs.config.experimental.groups_enabled: - groups.register_servlets(hs, client_resource) room_upgrade_rest_servlet.register_servlets(hs, client_resource) room_batch.register_servlets(hs, client_resource) capabilities.register_servlets(hs, client_resource) diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index cb4d55c89d78..1aa08f8d95d0 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -47,7 +47,6 @@ DestinationRestServlet, ListDestinationsRestServlet, ) -from synapse.rest.admin.groups import DeleteGroupAdminRestServlet from synapse.rest.admin.media import ListMediaInRoom, register_servlets_for_media_repo from synapse.rest.admin.registration_tokens import ( ListRegistrationTokensRestServlet, @@ -293,8 +292,6 @@ def register_servlets_for_client_rest_resource( ResetPasswordRestServlet(hs).register(http_server) SearchUsersRestServlet(hs).register(http_server) UserRegisterServlet(hs).register(http_server) - if hs.config.experimental.groups_enabled: - DeleteGroupAdminRestServlet(hs).register(http_server) AccountValidityRenewServlet(hs).register(http_server) # Load the media repo ones if we're using them. Otherwise load the servlets which diff --git a/synapse/rest/admin/groups.py b/synapse/rest/admin/groups.py deleted file mode 100644 index cd697e180ef6..000000000000 --- a/synapse/rest/admin/groups.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2019 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -from http import HTTPStatus -from typing import TYPE_CHECKING, Tuple - -from synapse.api.errors import SynapseError -from synapse.http.servlet import RestServlet -from synapse.http.site import SynapseRequest -from synapse.rest.admin._base import admin_patterns, assert_user_is_admin -from synapse.types import JsonDict - -if TYPE_CHECKING: - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - - -class DeleteGroupAdminRestServlet(RestServlet): - """Allows deleting of local groups""" - - PATTERNS = admin_patterns("/delete_group/(?P[^/]*)$") - - def __init__(self, hs: "HomeServer"): - self.group_server = hs.get_groups_server_handler() - self.is_mine_id = hs.is_mine_id - self.auth = hs.get_auth() - - async def on_POST( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - await assert_user_is_admin(self.auth, requester.user) - - if not self.is_mine_id(group_id): - raise SynapseError(HTTPStatus.BAD_REQUEST, "Can only delete local groups") - - await self.group_server.delete_group(group_id, requester.user.to_string()) - return HTTPStatus.OK, {} diff --git a/synapse/rest/client/groups.py b/synapse/rest/client/groups.py deleted file mode 100644 index 7e1149c7f433..000000000000 --- a/synapse/rest/client/groups.py +++ /dev/null @@ -1,962 +0,0 @@ -# Copyright 2017 Vector Creations Ltd -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -from functools import wraps -from typing import TYPE_CHECKING, Any, Awaitable, Callable, Optional, Tuple - -from twisted.web.server import Request - -from synapse.api.constants import ( - MAX_GROUP_CATEGORYID_LENGTH, - MAX_GROUP_ROLEID_LENGTH, - MAX_GROUPID_LENGTH, -) -from synapse.api.errors import Codes, SynapseError -from synapse.handlers.groups_local import GroupsLocalHandler -from synapse.http.server import HttpServer -from synapse.http.servlet import ( - RestServlet, - assert_params_in_dict, - parse_json_object_from_request, -) -from synapse.http.site import SynapseRequest -from synapse.types import GroupID, JsonDict - -from ._base import client_patterns - -if TYPE_CHECKING: - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - - -def _validate_group_id( - f: Callable[..., Awaitable[Tuple[int, JsonDict]]] -) -> Callable[..., Awaitable[Tuple[int, JsonDict]]]: - """Wrapper to validate the form of the group ID. - - Can be applied to any on_FOO methods that accepts a group ID as a URL parameter. - """ - - @wraps(f) - def wrapper( - self: RestServlet, request: Request, group_id: str, *args: Any, **kwargs: Any - ) -> Awaitable[Tuple[int, JsonDict]]: - if not GroupID.is_valid(group_id): - raise SynapseError(400, "%s is not a legal group ID" % (group_id,)) - - return f(self, request, group_id, *args, **kwargs) - - return wrapper - - -class GroupServlet(RestServlet): - """Get the group profile""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/profile$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_GET( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - requester_user_id = requester.user.to_string() - - group_description = await self.groups_handler.get_group_profile( - group_id, requester_user_id - ) - - return 200, group_description - - @_validate_group_id - async def on_POST( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - assert_params_in_dict( - content, ("name", "avatar_url", "short_description", "long_description") - ) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot create group profiles." - await self.groups_handler.update_group_profile( - group_id, requester_user_id, content - ) - - return 200, {} - - -class GroupSummaryServlet(RestServlet): - """Get the full group summary""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/summary$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_GET( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - requester_user_id = requester.user.to_string() - - get_group_summary = await self.groups_handler.get_group_summary( - group_id, requester_user_id - ) - - return 200, get_group_summary - - -class GroupSummaryRoomsCatServlet(RestServlet): - """Update/delete a rooms entry in the summary. - - Matches both: - - /groups/:group/summary/rooms/:room_id - - /groups/:group/summary/categories/:category/rooms/:room_id - """ - - PATTERNS = client_patterns( - "/groups/(?P[^/]*)/summary" - "(/categories/(?P[^/]+))?" - "/rooms/(?P[^/]*)$" - ) - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_PUT( - self, - request: SynapseRequest, - group_id: str, - category_id: Optional[str], - room_id: str, - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - if category_id == "": - raise SynapseError(400, "category_id cannot be empty", Codes.INVALID_PARAM) - - if category_id and len(category_id) > MAX_GROUP_CATEGORYID_LENGTH: - raise SynapseError( - 400, - "category_id may not be longer than %s characters" - % (MAX_GROUP_CATEGORYID_LENGTH,), - Codes.INVALID_PARAM, - ) - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group summaries." - resp = await self.groups_handler.update_group_summary_room( - group_id, - requester_user_id, - room_id=room_id, - category_id=category_id, - content=content, - ) - - return 200, resp - - @_validate_group_id - async def on_DELETE( - self, request: SynapseRequest, group_id: str, category_id: str, room_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group profiles." - resp = await self.groups_handler.delete_group_summary_room( - group_id, requester_user_id, room_id=room_id, category_id=category_id - ) - - return 200, resp - - -class GroupCategoryServlet(RestServlet): - """Get/add/update/delete a group category""" - - PATTERNS = client_patterns( - "/groups/(?P[^/]*)/categories/(?P[^/]+)$" - ) - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_GET( - self, request: SynapseRequest, group_id: str, category_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - requester_user_id = requester.user.to_string() - - category = await self.groups_handler.get_group_category( - group_id, requester_user_id, category_id=category_id - ) - - return 200, category - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str, category_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - if not category_id: - raise SynapseError(400, "category_id cannot be empty", Codes.INVALID_PARAM) - - if len(category_id) > MAX_GROUP_CATEGORYID_LENGTH: - raise SynapseError( - 400, - "category_id may not be longer than %s characters" - % (MAX_GROUP_CATEGORYID_LENGTH,), - Codes.INVALID_PARAM, - ) - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group categories." - resp = await self.groups_handler.update_group_category( - group_id, requester_user_id, category_id=category_id, content=content - ) - - return 200, resp - - @_validate_group_id - async def on_DELETE( - self, request: SynapseRequest, group_id: str, category_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group categories." - resp = await self.groups_handler.delete_group_category( - group_id, requester_user_id, category_id=category_id - ) - - return 200, resp - - -class GroupCategoriesServlet(RestServlet): - """Get all group categories""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/categories/$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_GET( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - requester_user_id = requester.user.to_string() - - category = await self.groups_handler.get_group_categories( - group_id, requester_user_id - ) - - return 200, category - - -class GroupRoleServlet(RestServlet): - """Get/add/update/delete a group role""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/roles/(?P[^/]+)$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_GET( - self, request: SynapseRequest, group_id: str, role_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - requester_user_id = requester.user.to_string() - - category = await self.groups_handler.get_group_role( - group_id, requester_user_id, role_id=role_id - ) - - return 200, category - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str, role_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - if not role_id: - raise SynapseError(400, "role_id cannot be empty", Codes.INVALID_PARAM) - - if len(role_id) > MAX_GROUP_ROLEID_LENGTH: - raise SynapseError( - 400, - "role_id may not be longer than %s characters" - % (MAX_GROUP_ROLEID_LENGTH,), - Codes.INVALID_PARAM, - ) - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group roles." - resp = await self.groups_handler.update_group_role( - group_id, requester_user_id, role_id=role_id, content=content - ) - - return 200, resp - - @_validate_group_id - async def on_DELETE( - self, request: SynapseRequest, group_id: str, role_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group roles." - resp = await self.groups_handler.delete_group_role( - group_id, requester_user_id, role_id=role_id - ) - - return 200, resp - - -class GroupRolesServlet(RestServlet): - """Get all group roles""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/roles/$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_GET( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - requester_user_id = requester.user.to_string() - - category = await self.groups_handler.get_group_roles( - group_id, requester_user_id - ) - - return 200, category - - -class GroupSummaryUsersRoleServlet(RestServlet): - """Update/delete a user's entry in the summary. - - Matches both: - - /groups/:group/summary/users/:room_id - - /groups/:group/summary/roles/:role/users/:user_id - """ - - PATTERNS = client_patterns( - "/groups/(?P[^/]*)/summary" - "(/roles/(?P[^/]+))?" - "/users/(?P[^/]*)$" - ) - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_PUT( - self, - request: SynapseRequest, - group_id: str, - role_id: Optional[str], - user_id: str, - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - if role_id == "": - raise SynapseError(400, "role_id cannot be empty", Codes.INVALID_PARAM) - - if role_id and len(role_id) > MAX_GROUP_ROLEID_LENGTH: - raise SynapseError( - 400, - "role_id may not be longer than %s characters" - % (MAX_GROUP_ROLEID_LENGTH,), - Codes.INVALID_PARAM, - ) - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group summaries." - resp = await self.groups_handler.update_group_summary_user( - group_id, - requester_user_id, - user_id=user_id, - role_id=role_id, - content=content, - ) - - return 200, resp - - @_validate_group_id - async def on_DELETE( - self, request: SynapseRequest, group_id: str, role_id: str, user_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group summaries." - resp = await self.groups_handler.delete_group_summary_user( - group_id, requester_user_id, user_id=user_id, role_id=role_id - ) - - return 200, resp - - -class GroupRoomServlet(RestServlet): - """Get all rooms in a group""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/rooms$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_GET( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - requester_user_id = requester.user.to_string() - - result = await self.groups_handler.get_rooms_in_group( - group_id, requester_user_id - ) - - return 200, result - - -class GroupUsersServlet(RestServlet): - """Get all users in a group""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/users$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_GET( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - requester_user_id = requester.user.to_string() - - result = await self.groups_handler.get_users_in_group( - group_id, requester_user_id - ) - - return 200, result - - -class GroupInvitedUsersServlet(RestServlet): - """Get users invited to a group""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/invited_users$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_GET( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - result = await self.groups_handler.get_invited_users_in_group( - group_id, requester_user_id - ) - - return 200, result - - -class GroupSettingJoinPolicyServlet(RestServlet): - """Set group join policy""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/settings/m.join_policy$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group join policy." - result = await self.groups_handler.set_group_join_policy( - group_id, requester_user_id, content - ) - - return 200, result - - -class GroupCreateServlet(RestServlet): - """Create a group""" - - PATTERNS = client_patterns("/create_group$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - self.server_name = hs.hostname - - async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - # TODO: Create group on remote server - content = parse_json_object_from_request(request) - localpart = content.pop("localpart") - group_id = GroupID(localpart, self.server_name).to_string() - - if not localpart: - raise SynapseError(400, "Group ID cannot be empty", Codes.INVALID_PARAM) - - if len(group_id) > MAX_GROUPID_LENGTH: - raise SynapseError( - 400, - "Group ID may not be longer than %s characters" % (MAX_GROUPID_LENGTH,), - Codes.INVALID_PARAM, - ) - - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot create groups." - result = await self.groups_handler.create_group( - group_id, requester_user_id, content - ) - - return 200, result - - -class GroupAdminRoomsServlet(RestServlet): - """Add a room to the group""" - - PATTERNS = client_patterns( - "/groups/(?P[^/]*)/admin/rooms/(?P[^/]*)$" - ) - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str, room_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify rooms in a group." - result = await self.groups_handler.add_room_to_group( - group_id, requester_user_id, room_id, content - ) - - return 200, result - - @_validate_group_id - async def on_DELETE( - self, request: SynapseRequest, group_id: str, room_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group categories." - result = await self.groups_handler.remove_room_from_group( - group_id, requester_user_id, room_id - ) - - return 200, result - - -class GroupAdminRoomsConfigServlet(RestServlet): - """Update the config of a room in a group""" - - PATTERNS = client_patterns( - "/groups/(?P[^/]*)/admin/rooms/(?P[^/]*)" - "/config/(?P[^/]*)$" - ) - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str, room_id: str, config_key: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot modify group categories." - result = await self.groups_handler.update_room_in_group( - group_id, requester_user_id, room_id, config_key, content - ) - - return 200, result - - -class GroupAdminUsersInviteServlet(RestServlet): - """Invite a user to the group""" - - PATTERNS = client_patterns( - "/groups/(?P[^/]*)/admin/users/invite/(?P[^/]*)$" - ) - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - self.store = hs.get_datastores().main - self.is_mine_id = hs.is_mine_id - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str, user_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - config = content.get("config", {}) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot invite users to a group." - result = await self.groups_handler.invite( - group_id, user_id, requester_user_id, config - ) - - return 200, result - - -class GroupAdminUsersKickServlet(RestServlet): - """Kick a user from the group""" - - PATTERNS = client_patterns( - "/groups/(?P[^/]*)/admin/users/remove/(?P[^/]*)$" - ) - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str, user_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot kick users from a group." - result = await self.groups_handler.remove_user_from_group( - group_id, user_id, requester_user_id, content - ) - - return 200, result - - -class GroupSelfLeaveServlet(RestServlet): - """Leave a joined group""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/self/leave$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot leave a group for a users." - result = await self.groups_handler.remove_user_from_group( - group_id, requester_user_id, requester_user_id, content - ) - - return 200, result - - -class GroupSelfJoinServlet(RestServlet): - """Attempt to join a group, or knock""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/self/join$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot join a user to a group." - result = await self.groups_handler.join_group( - group_id, requester_user_id, content - ) - - return 200, result - - -class GroupSelfAcceptInviteServlet(RestServlet): - """Accept a group invite""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/self/accept_invite$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - assert isinstance( - self.groups_handler, GroupsLocalHandler - ), "Workers cannot accept an invite to a group." - result = await self.groups_handler.accept_invite( - group_id, requester_user_id, content - ) - - return 200, result - - -class GroupSelfUpdatePublicityServlet(RestServlet): - """Update whether we publicise a users membership of a group""" - - PATTERNS = client_patterns("/groups/(?P[^/]*)/self/update_publicity$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.store = hs.get_datastores().main - - @_validate_group_id - async def on_PUT( - self, request: SynapseRequest, group_id: str - ) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request) - requester_user_id = requester.user.to_string() - - content = parse_json_object_from_request(request) - publicise = content["publicise"] - await self.store.update_group_publicity(group_id, requester_user_id, publicise) - - return 200, {} - - -class PublicisedGroupsForUserServlet(RestServlet): - """Get the list of groups a user is advertising""" - - PATTERNS = client_patterns("/publicised_groups/(?P[^/]*)$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.store = hs.get_datastores().main - self.groups_handler = hs.get_groups_local_handler() - - async def on_GET( - self, request: SynapseRequest, user_id: str - ) -> Tuple[int, JsonDict]: - await self.auth.get_user_by_req(request, allow_guest=True) - - result = await self.groups_handler.get_publicised_groups_for_user(user_id) - - return 200, result - - -class PublicisedGroupsForUsersServlet(RestServlet): - """Get the list of groups a user is advertising""" - - PATTERNS = client_patterns("/publicised_groups$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.store = hs.get_datastores().main - self.groups_handler = hs.get_groups_local_handler() - - async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - await self.auth.get_user_by_req(request, allow_guest=True) - - content = parse_json_object_from_request(request) - user_ids = content["user_ids"] - - result = await self.groups_handler.bulk_get_publicised_groups(user_ids) - - return 200, result - - -class GroupsForUserServlet(RestServlet): - """Get all groups the logged in user is joined to""" - - PATTERNS = client_patterns("/joined_groups$") - - def __init__(self, hs: "HomeServer"): - super().__init__() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.groups_handler = hs.get_groups_local_handler() - - async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: - requester = await self.auth.get_user_by_req(request, allow_guest=True) - requester_user_id = requester.user.to_string() - - result = await self.groups_handler.get_joined_groups(requester_user_id) - - return 200, result - - -def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: - GroupServlet(hs).register(http_server) - GroupSummaryServlet(hs).register(http_server) - GroupInvitedUsersServlet(hs).register(http_server) - GroupUsersServlet(hs).register(http_server) - GroupRoomServlet(hs).register(http_server) - GroupSettingJoinPolicyServlet(hs).register(http_server) - GroupCreateServlet(hs).register(http_server) - GroupAdminRoomsServlet(hs).register(http_server) - GroupAdminRoomsConfigServlet(hs).register(http_server) - GroupAdminUsersInviteServlet(hs).register(http_server) - GroupAdminUsersKickServlet(hs).register(http_server) - GroupSelfLeaveServlet(hs).register(http_server) - GroupSelfJoinServlet(hs).register(http_server) - GroupSelfAcceptInviteServlet(hs).register(http_server) - GroupsForUserServlet(hs).register(http_server) - GroupCategoryServlet(hs).register(http_server) - GroupCategoriesServlet(hs).register(http_server) - GroupSummaryRoomsCatServlet(hs).register(http_server) - GroupRoleServlet(hs).register(http_server) - GroupRolesServlet(hs).register(http_server) - GroupSelfUpdatePublicityServlet(hs).register(http_server) - GroupSummaryUsersRoleServlet(hs).register(http_server) - PublicisedGroupsForUserServlet(hs).register(http_server) - PublicisedGroupsForUsersServlet(hs).register(http_server) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index e8772f86e72f..f596b792fa2d 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -298,14 +298,6 @@ async def encode_response( if archived: response["rooms"][Membership.LEAVE] = archived - if sync_result.groups is not None: - if sync_result.groups.join: - response["groups"][Membership.JOIN] = sync_result.groups.join - if sync_result.groups.invite: - response["groups"][Membership.INVITE] = sync_result.groups.invite - if sync_result.groups.leave: - response["groups"][Membership.LEAVE] = sync_result.groups.leave - return response @staticmethod diff --git a/tests/rest/admin/test_admin.py b/tests/rest/admin/test_admin.py index 40571b753a9a..82ac5991e6e4 100644 --- a/tests/rest/admin/test_admin.py +++ b/tests/rest/admin/test_admin.py @@ -14,7 +14,6 @@ import urllib.parse from http import HTTPStatus -from typing import List from parameterized import parameterized @@ -23,7 +22,7 @@ import synapse.rest.admin from synapse.http.server import JsonResource from synapse.rest.admin import VersionServlet -from synapse.rest.client import groups, login, room +from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.util import Clock @@ -49,93 +48,6 @@ def test_version_string(self) -> None: ) -class DeleteGroupTestCase(unittest.HomeserverTestCase): - servlets = [ - synapse.rest.admin.register_servlets_for_client_rest_resource, - login.register_servlets, - groups.register_servlets, - ] - - def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: - self.admin_user = self.register_user("admin", "pass", admin=True) - self.admin_user_tok = self.login("admin", "pass") - - self.other_user = self.register_user("user", "pass") - self.other_user_token = self.login("user", "pass") - - @unittest.override_config({"experimental_features": {"groups_enabled": True}}) - def test_delete_group(self) -> None: - # Create a new group - channel = self.make_request( - "POST", - b"/create_group", - access_token=self.admin_user_tok, - content={"localpart": "test"}, - ) - - self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) - - group_id = channel.json_body["group_id"] - - self._check_group(group_id, expect_code=HTTPStatus.OK) - - # Invite/join another user - - url = "/groups/%s/admin/users/invite/%s" % (group_id, self.other_user) - channel = self.make_request( - "PUT", url.encode("ascii"), access_token=self.admin_user_tok, content={} - ) - self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) - - url = "/groups/%s/self/accept_invite" % (group_id,) - channel = self.make_request( - "PUT", url.encode("ascii"), access_token=self.other_user_token, content={} - ) - self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) - - # Check other user knows they're in the group - self.assertIn(group_id, self._get_groups_user_is_in(self.admin_user_tok)) - self.assertIn(group_id, self._get_groups_user_is_in(self.other_user_token)) - - # Now delete the group - url = "/_synapse/admin/v1/delete_group/" + group_id - channel = self.make_request( - "POST", - url.encode("ascii"), - access_token=self.admin_user_tok, - content={"localpart": "test"}, - ) - - self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) - - # Check group returns HTTPStatus.NOT_FOUND - self._check_group(group_id, expect_code=HTTPStatus.NOT_FOUND) - - # Check users don't think they're in the group - self.assertNotIn(group_id, self._get_groups_user_is_in(self.admin_user_tok)) - self.assertNotIn(group_id, self._get_groups_user_is_in(self.other_user_token)) - - def _check_group(self, group_id: str, expect_code: int) -> None: - """Assert that trying to fetch the given group results in the given - HTTP status code - """ - - url = "/groups/%s/profile" % (group_id,) - channel = self.make_request( - "GET", url.encode("ascii"), access_token=self.admin_user_tok - ) - - self.assertEqual(expect_code, channel.code, msg=channel.json_body) - - def _get_groups_user_is_in(self, access_token: str) -> List[str]: - """Returns the list of groups the user is in (given their access token)""" - channel = self.make_request("GET", b"/joined_groups", access_token=access_token) - - self.assertEqual(HTTPStatus.OK, channel.code, msg=channel.json_body) - - return channel.json_body["groups"] - - class QuarantineMediaTestCase(unittest.HomeserverTestCase): """Test /quarantine_media admin API.""" diff --git a/tests/rest/client/test_groups.py b/tests/rest/client/test_groups.py deleted file mode 100644 index e067cf825c62..000000000000 --- a/tests/rest/client/test_groups.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2021 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from synapse.rest.client import groups, room - -from tests import unittest -from tests.unittest import override_config - - -class GroupsTestCase(unittest.HomeserverTestCase): - user_id = "@alice:test" - room_creator_user_id = "@bob:test" - - servlets = [room.register_servlets, groups.register_servlets] - - @override_config({"enable_group_creation": True}) - def test_rooms_limited_by_visibility(self) -> None: - group_id = "+spqr:test" - - # Alice creates a group - channel = self.make_request("POST", "/create_group", {"localpart": "spqr"}) - self.assertEqual(channel.code, 200, msg=channel.text_body) - self.assertEqual(channel.json_body, {"group_id": group_id}) - - # Bob creates a private room - room_id = self.helper.create_room_as(self.room_creator_user_id, is_public=False) - self.helper.auth_user_id = self.room_creator_user_id - self.helper.send_state( - room_id, "m.room.name", {"name": "bob's secret room"}, tok=None - ) - self.helper.auth_user_id = self.user_id - - # Alice adds the room to her group. - channel = self.make_request( - "PUT", f"/groups/{group_id}/admin/rooms/{room_id}", {} - ) - self.assertEqual(channel.code, 200, msg=channel.text_body) - self.assertEqual(channel.json_body, {}) - - # Alice now tries to retrieve the room list of the space. - channel = self.make_request("GET", f"/groups/{group_id}/rooms") - self.assertEqual(channel.code, 200, msg=channel.text_body) - self.assertEqual( - channel.json_body, {"chunk": [], "total_room_count_estimate": 0} - ) From 4660d9fdcffc833ae4774ac7d162e63769373dc5 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 25 May 2022 12:59:04 +0100 Subject: [PATCH 107/181] Fix up `state_store` naming (#12871) --- changelog.d/12871.misc | 1 + synapse/handlers/admin.py | 4 ++-- synapse/handlers/device.py | 6 ++++-- synapse/handlers/federation.py | 6 ++++-- synapse/handlers/federation_event.py | 10 +++++----- synapse/handlers/initial_sync.py | 6 +++--- synapse/handlers/message.py | 10 +++++----- synapse/handlers/pagination.py | 4 ++-- synapse/handlers/room.py | 4 ++-- synapse/handlers/room_batch.py | 4 ++-- synapse/handlers/search.py | 4 ++-- synapse/handlers/sync.py | 24 ++++++++++++++---------- synapse/push/mailer.py | 6 +++--- synapse/state/__init__.py | 14 +++++++------- tests/handlers/test_federation.py | 4 ++-- 15 files changed, 58 insertions(+), 49 deletions(-) create mode 100644 changelog.d/12871.misc diff --git a/changelog.d/12871.misc b/changelog.d/12871.misc new file mode 100644 index 000000000000..94bd6c497421 --- /dev/null +++ b/changelog.d/12871.misc @@ -0,0 +1 @@ +Fix up the variable `state_store` naming. diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index 96376963f239..50e34743b73d 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -31,7 +31,7 @@ class AdminHandler: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self.storage = hs.get_storage() - self.state_store = self.storage.state + self.state_storage = self.storage.state async def get_whois(self, user: UserID) -> JsonDict: connections = [] @@ -233,7 +233,7 @@ async def export_user_data(self, user_id: str, writer: "ExfiltrationWriter") -> for event_id in extremities: if not event_to_unseen_prevs[event_id]: continue - state = await self.state_store.get_state_for_event(event_id) + state = await self.state_storage.get_state_for_event(event_id) writer.write_state(room_id, event_id, state) return writer.finished() diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index e59937fd755c..b21e46986543 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -70,7 +70,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self.notifier = hs.get_notifier() self.state = hs.get_state_handler() - self.state_store = hs.get_storage().state + self.state_storage = hs.get_storage().state self._auth_handler = hs.get_auth_handler() self.server_name = hs.hostname @@ -203,7 +203,9 @@ async def get_user_ids_changed( continue # mapping from event_id -> state_dict - prev_state_ids = await self.state_store.get_state_ids_for_events(event_ids) + prev_state_ids = await self.state_storage.get_state_ids_for_events( + event_ids + ) # Check if we've joined the room? If so we just blindly add all the users to # the "possibly changed" users. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 0386d0a07bba..c8233270d72c 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -126,7 +126,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self.storage = hs.get_storage() - self.state_store = self.storage.state + self.state_storage = self.storage.state self.federation_client = hs.get_federation_client() self.state_handler = hs.get_state_handler() self.server_name = hs.hostname @@ -1027,7 +1027,9 @@ async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: if event.internal_metadata.outlier: raise NotFoundError("State not known at event %s" % (event_id,)) - state_groups = await self.state_store.get_state_groups_ids(room_id, [event_id]) + state_groups = await self.state_storage.get_state_groups_ids( + room_id, [event_id] + ) # get_state_groups_ids should return exactly one result assert len(state_groups) == 1 diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index ca82df8a6d9e..8ce7187bef4a 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -99,7 +99,7 @@ class FederationEventHandler: def __init__(self, hs: "HomeServer"): self._store = hs.get_datastores().main self._storage = hs.get_storage() - self._state_store = self._storage.state + self._state_storage = self._storage.state self._state_handler = hs.get_state_handler() self._event_creation_handler = hs.get_event_creation_handler() @@ -533,7 +533,7 @@ async def update_state_for_partial_state_event( ) return await self._store.update_state_for_partial_state_event(event, context) - self._state_store.notify_event_un_partial_stated(event.event_id) + self._state_storage.notify_event_un_partial_stated(event.event_id) async def backfill( self, dest: str, room_id: str, limit: int, extremities: Collection[str] @@ -832,7 +832,7 @@ async def _resolve_state_at_missing_prevs( event_map = {event_id: event} try: # Get the state of the events we know about - ours = await self._state_store.get_state_groups_ids(room_id, seen) + ours = await self._state_storage.get_state_groups_ids(room_id, seen) # state_maps is a list of mappings from (type, state_key) to event_id state_maps: List[StateMap[str]] = list(ours.values()) @@ -1626,7 +1626,7 @@ async def _check_for_soft_fail( # given state at the event. This should correctly handle cases # like bans, especially with state res v2. - state_sets_d = await self._state_store.get_state_groups( + state_sets_d = await self._state_storage.get_state_groups( event.room_id, extrem_ids ) state_sets: List[Iterable[EventBase]] = list(state_sets_d.values()) @@ -1895,7 +1895,7 @@ async def _update_context_for_auth_events( # create a new state group as a delta from the existing one. prev_group = context.state_group - state_group = await self._state_store.store_state_group( + state_group = await self._state_storage.store_state_group( event.event_id, event.room_id, prev_group=prev_group, diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index d79248ad905b..c06932a41acf 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -68,7 +68,7 @@ def __init__(self, hs: "HomeServer"): ] = ResponseCache(hs.get_clock(), "initial_sync_cache") self._event_serializer = hs.get_event_client_serializer() self.storage = hs.get_storage() - self.state_store = self.storage.state + self.state_storage = self.storage.state async def snapshot_all_rooms( self, @@ -198,7 +198,7 @@ async def handle_room(event: RoomsForUser) -> None: event.stream_ordering, ) deferred_room_state = run_in_background( - self.state_store.get_state_for_events, [event.event_id] + self.state_storage.get_state_for_events, [event.event_id] ).addCallback( lambda states: cast(StateMap[EventBase], states[event.event_id]) ) @@ -355,7 +355,7 @@ async def _room_initial_sync_parted( member_event_id: str, is_peeking: bool, ) -> JsonDict: - room_state = await self.state_store.get_state_for_event(member_event_id) + room_state = await self.state_storage.get_state_for_event(member_event_id) limit = pagin_config.limit if pagin_config else None if limit is None: diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index cb1bc4c06f1c..9501e7f1b7de 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -78,7 +78,7 @@ def __init__(self, hs: "HomeServer"): self.state = hs.get_state_handler() self.store = hs.get_datastores().main self.storage = hs.get_storage() - self.state_store = self.storage.state + self.state_storage = self.storage.state self._event_serializer = hs.get_event_client_serializer() self._ephemeral_events_enabled = hs.config.server.enable_ephemeral_messages @@ -125,7 +125,7 @@ async def get_room_data( assert ( membership_event_id is not None ), "check_user_in_room_or_world_readable returned invalid data" - room_state = await self.state_store.get_state_for_events( + room_state = await self.state_storage.get_state_for_events( [membership_event_id], StateFilter.from_types([key]) ) data = room_state[membership_event_id].get(key) @@ -186,7 +186,7 @@ async def get_state_events( # check whether the user is in the room at that time to determine # whether they should be treated as peeking. - state_map = await self.state_store.get_state_for_event( + state_map = await self.state_storage.get_state_for_event( last_event.event_id, StateFilter.from_types([(EventTypes.Member, user_id)]), ) @@ -207,7 +207,7 @@ async def get_state_events( ) if visible_events: - room_state_events = await self.state_store.get_state_for_events( + room_state_events = await self.state_storage.get_state_for_events( [last_event.event_id], state_filter=state_filter ) room_state: Mapping[Any, EventBase] = room_state_events[ @@ -237,7 +237,7 @@ async def get_state_events( assert ( membership_event_id is not None ), "check_user_in_room_or_world_readable returned invalid data" - room_state_events = await self.state_store.get_state_for_events( + room_state_events = await self.state_storage.get_state_for_events( [membership_event_id], state_filter=state_filter ) room_state = room_state_events[membership_event_id] diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 19a440705027..6f4820c240cc 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -130,7 +130,7 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() self.store = hs.get_datastores().main self.storage = hs.get_storage() - self.state_store = self.storage.state + self.state_storage = self.storage.state self.clock = hs.get_clock() self._server_name = hs.hostname self._room_shutdown_handler = hs.get_room_shutdown_handler() @@ -539,7 +539,7 @@ async def get_messages( (EventTypes.Member, event.sender) for event in events ) - state_ids = await self.state_store.get_state_ids_for_event( + state_ids = await self.state_storage.get_state_ids_for_event( events[0].event_id, state_filter=state_filter ) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 92e1de050071..e2775b34f10b 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -1193,7 +1193,7 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() self.store = hs.get_datastores().main self.storage = hs.get_storage() - self.state_store = self.storage.state + self.state_storage = self.storage.state self._relations_handler = hs.get_relations_handler() async def get_event_context( @@ -1293,7 +1293,7 @@ async def filter_evts(events: List[EventBase]) -> List[EventBase]: # first? Shouldn't we be consistent with /sync? # /~https://github.com/matrix-org/matrix-doc/issues/687 - state = await self.state_store.get_state_for_events( + state = await self.state_storage.get_state_for_events( [last_event_id], state_filter=state_filter ) diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index fbfd7484065c..7ce32f2e9ce6 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -17,7 +17,7 @@ class RoomBatchHandler: def __init__(self, hs: "HomeServer"): self.hs = hs self.store = hs.get_datastores().main - self.state_store = hs.get_storage().state + self.state_storage = hs.get_storage().state self.event_creation_handler = hs.get_event_creation_handler() self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() @@ -141,7 +141,7 @@ async def get_most_recent_full_state_ids_from_event_id_list( ) = await self.store.get_max_depth_of(event_ids) # mapping from (type, state_key) -> state_event_id assert most_recent_event_id is not None - prev_state_map = await self.state_store.get_state_ids_for_event( + prev_state_map = await self.state_storage.get_state_ids_for_event( most_recent_event_id ) # List of state event ID's diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index cd1c47dae8b1..e02c915248c1 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -56,7 +56,7 @@ def __init__(self, hs: "HomeServer"): self._event_serializer = hs.get_event_client_serializer() self._relations_handler = hs.get_relations_handler() self.storage = hs.get_storage() - self.state_store = self.storage.state + self.state_storage = self.storage.state self.auth = hs.get_auth() async def get_old_rooms_from_upgraded_room(self, room_id: str) -> Iterable[str]: @@ -677,7 +677,7 @@ async def _calculate_event_contexts( [(EventTypes.Member, sender) for sender in senders] ) - state = await self.state_store.get_state_for_event( + state = await self.state_storage.get_state_for_event( last_event_id, state_filter ) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index dcbb5ce921af..c5c538e0c35e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -239,7 +239,7 @@ def __init__(self, hs: "HomeServer"): self.state = hs.get_state_handler() self.auth = hs.get_auth() self.storage = hs.get_storage() - self.state_store = self.storage.state + self.state_storage = self.storage.state # TODO: flush cache entries on subsequent sync request. # Once we get the next /sync request (ie, one with the same access token @@ -630,7 +630,7 @@ async def get_state_after_event( event: event of interest state_filter: The state filter used to fetch state from the database. """ - state_ids = await self.state_store.get_state_ids_for_event( + state_ids = await self.state_storage.get_state_ids_for_event( event.event_id, state_filter=state_filter or StateFilter.all() ) if event.is_state(): @@ -710,7 +710,7 @@ async def compute_summary( return None last_event = last_events[-1] - state_ids = await self.state_store.get_state_ids_for_event( + state_ids = await self.state_storage.get_state_ids_for_event( last_event.event_id, state_filter=StateFilter.from_types( [(EventTypes.Name, ""), (EventTypes.CanonicalAlias, "")] @@ -888,11 +888,13 @@ async def compute_state_delta( if full_state: if batch: - current_state_ids = await self.state_store.get_state_ids_for_event( - batch.events[-1].event_id, state_filter=state_filter + current_state_ids = ( + await self.state_storage.get_state_ids_for_event( + batch.events[-1].event_id, state_filter=state_filter + ) ) - state_ids = await self.state_store.get_state_ids_for_event( + state_ids = await self.state_storage.get_state_ids_for_event( batch.events[0].event_id, state_filter=state_filter ) @@ -913,7 +915,7 @@ async def compute_state_delta( elif batch.limited: if batch: state_at_timeline_start = ( - await self.state_store.get_state_ids_for_event( + await self.state_storage.get_state_ids_for_event( batch.events[0].event_id, state_filter=state_filter ) ) @@ -947,8 +949,10 @@ async def compute_state_delta( ) if batch: - current_state_ids = await self.state_store.get_state_ids_for_event( - batch.events[-1].event_id, state_filter=state_filter + current_state_ids = ( + await self.state_storage.get_state_ids_for_event( + batch.events[-1].event_id, state_filter=state_filter + ) ) else: # Its not clear how we get here, but empirically we do @@ -978,7 +982,7 @@ async def compute_state_delta( # So we fish out all the member events corresponding to the # timeline here, and then dedupe any redundant ones below. - state_ids = await self.state_store.get_state_ids_for_event( + state_ids = await self.state_storage.get_state_ids_for_event( batch.events[0].event_id, # we only want members! state_filter=StateFilter.from_types( diff --git a/synapse/push/mailer.py b/synapse/push/mailer.py index 5ccdd88364d7..84124af96527 100644 --- a/synapse/push/mailer.py +++ b/synapse/push/mailer.py @@ -114,7 +114,7 @@ def __init__( self.send_email_handler = hs.get_send_email_handler() self.store = self.hs.get_datastores().main - self.state_store = self.hs.get_storage().state + self.state_storage = self.hs.get_storage().state self.macaroon_gen = self.hs.get_macaroon_generator() self.state_handler = self.hs.get_state_handler() self.storage = hs.get_storage() @@ -494,7 +494,7 @@ async def _get_message_vars( ) else: # Attempt to check the historical state for the room. - historical_state = await self.state_store.get_state_for_event( + historical_state = await self.state_storage.get_state_for_event( event.event_id, StateFilter.from_types((type_state_key,)) ) sender_state_event = historical_state.get(type_state_key) @@ -767,7 +767,7 @@ async def _make_summary_text_from_member_events( member_event_ids.append(sender_state_event_id) else: # Attempt to check the historical state for the room. - historical_state = await self.state_store.get_state_for_event( + historical_state = await self.state_storage.get_state_for_event( event_id, StateFilter.from_types((type_state_key,)) ) sender_state_event = historical_state.get(type_state_key) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 4b4ed42cff33..536564b7ffe6 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -127,7 +127,7 @@ class StateHandler: def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() self.store = hs.get_datastores().main - self.state_store = hs.get_storage().state + self.state_storage = hs.get_storage().state self.hs = hs self._state_resolution_handler = hs.get_state_resolution_handler() self._storage = hs.get_storage() @@ -339,7 +339,7 @@ async def compute_event_context( # if not state_group_before_event: - state_group_before_event = await self.state_store.store_state_group( + state_group_before_event = await self.state_storage.store_state_group( event.event_id, event.room_id, prev_group=state_group_before_event_prev_group, @@ -384,7 +384,7 @@ async def compute_event_context( state_ids_after_event[key] = event.event_id delta_ids = {key: event.event_id} - state_group_after_event = await self.state_store.store_state_group( + state_group_after_event = await self.state_storage.store_state_group( event.event_id, event.room_id, prev_group=state_group_before_event, @@ -418,7 +418,7 @@ async def resolve_state_groups_for_events( """ logger.debug("resolve_state_groups event_ids %s", event_ids) - state_groups = await self.state_store.get_state_group_for_events(event_ids) + state_groups = await self.state_storage.get_state_group_for_events(event_ids) state_group_ids = state_groups.values() @@ -426,8 +426,8 @@ async def resolve_state_groups_for_events( state_group_ids_set = set(state_group_ids) if len(state_group_ids_set) == 1: (state_group_id,) = state_group_ids_set - state = await self.state_store.get_state_for_groups(state_group_ids_set) - prev_group, delta_ids = await self.state_store.get_state_group_delta( + state = await self.state_storage.get_state_for_groups(state_group_ids_set) + prev_group, delta_ids = await self.state_storage.get_state_group_delta( state_group_id ) return _StateCacheEntry( @@ -441,7 +441,7 @@ async def resolve_state_groups_for_events( room_version = await self.store.get_room_version_id(room_id) - state_to_resolve = await self.state_store.get_state_for_groups( + state_to_resolve = await self.state_storage.get_state_for_groups( state_group_ids_set ) diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index e95dfdce2086..bef6c2b77609 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -50,7 +50,7 @@ def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: hs = self.setup_test_homeserver(federation_http_client=None) self.handler = hs.get_federation_handler() self.store = hs.get_datastores().main - self.state_store = hs.get_storage().state + self.state_storage = hs.get_storage().state self._event_auth_handler = hs.get_event_auth_handler() return hs @@ -334,7 +334,7 @@ def test_backfill_floating_outlier_membership_auth(self) -> None: # mapping from (type, state_key) -> state_event_id assert most_recent_prev_event_id is not None prev_state_map = self.get_success( - self.state_store.get_state_ids_for_event(most_recent_prev_event_id) + self.state_storage.get_state_ids_for_event(most_recent_prev_event_id) ) # List of state event ID's prev_state_ids = list(prev_state_map.values()) From 1b338476afbcb83918c5df285975878032bbce75 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Wed, 25 May 2022 23:24:28 +0200 Subject: [PATCH 108/181] Allow bigger responses to `/federation/v1/state` (#12877) * Refactor HTTP response size limits Rather than passing a separate `max_response_size` down the stack, make it an attribute of the `parser`. * Allow bigger responses on `federation/v1/state` `/state` can return huge responses, so we need to handle that. --- changelog.d/12877.bugfix | 1 + synapse/federation/transport/client.py | 15 ++++++------- synapse/http/matrixfederationclient.py | 29 +++++++------------------- tests/http/test_fedclient.py | 6 +++--- 4 files changed, 19 insertions(+), 32 deletions(-) create mode 100644 changelog.d/12877.bugfix diff --git a/changelog.d/12877.bugfix b/changelog.d/12877.bugfix new file mode 100644 index 000000000000..1ecf448baffd --- /dev/null +++ b/changelog.d/12877.bugfix @@ -0,0 +1 @@ +Fix a bug introduced in Synapse 1.54 which could sometimes cause exceptions when handling federated traffic. diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 9ce06dfa28ba..25df1905c672 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -49,11 +49,6 @@ logger = logging.getLogger(__name__) -# Send join responses can be huge, so we set a separate limit here. The response -# is parsed in a streaming manner, which helps alleviate the issue of memory -# usage a bit. -MAX_RESPONSE_SIZE_SEND_JOIN = 500 * 1024 * 1024 - class TransportLayerClient: """Sends federation HTTP requests to other servers""" @@ -349,7 +344,6 @@ async def send_join_v1( path=path, data=content, parser=SendJoinParser(room_version, v1_api=True), - max_response_size=MAX_RESPONSE_SIZE_SEND_JOIN, ) async def send_join_v2( @@ -372,7 +366,6 @@ async def send_join_v2( args=query_params, data=content, parser=SendJoinParser(room_version, v1_api=False), - max_response_size=MAX_RESPONSE_SIZE_SEND_JOIN, ) async def send_leave_v1( @@ -1360,6 +1353,11 @@ class SendJoinParser(ByteParser[SendJoinResponse]): CONTENT_TYPE = "application/json" + # /send_join responses can be huge, so we override the size limit here. The response + # is parsed in a streaming manner, which helps alleviate the issue of memory + # usage a bit. + MAX_RESPONSE_SIZE = 500 * 1024 * 1024 + def __init__(self, room_version: RoomVersion, v1_api: bool): self._response = SendJoinResponse([], [], event_dict={}) self._room_version = room_version @@ -1427,6 +1425,9 @@ class _StateParser(ByteParser[StateRequestResponse]): CONTENT_TYPE = "application/json" + # As with /send_join, /state responses can be huge. + MAX_RESPONSE_SIZE = 500 * 1024 * 1024 + def __init__(self, room_version: RoomVersion): self._response = StateRequestResponse([], []) self._room_version = room_version diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 0b9475debdb1..db44721ef509 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -92,9 +92,6 @@ "synapse_http_matrixfederationclient_responses", "", ["method", "code"] ) -# a federation response can be rather large (eg a big state_ids is 50M or so), so we -# need a generous limit here. -MAX_RESPONSE_SIZE = 100 * 1024 * 1024 MAX_LONG_RETRIES = 10 MAX_SHORT_RETRIES = 3 @@ -116,6 +113,11 @@ class ByteParser(ByteWriteable, Generic[T], abc.ABC): the content type doesn't match we fail the request. """ + # a federation response can be rather large (eg a big state_ids is 50M or so), so we + # need a generous limit here. + MAX_RESPONSE_SIZE: int = 100 * 1024 * 1024 + """The largest response this parser will accept.""" + @abc.abstractmethod def finish(self) -> T: """Called when response has finished streaming and the parser should @@ -203,7 +205,6 @@ async def _handle_response( response: IResponse, start_ms: int, parser: ByteParser[T], - max_response_size: Optional[int] = None, ) -> T: """ Reads the body of a response with a timeout and sends it to a parser @@ -215,15 +216,12 @@ async def _handle_response( response: response to the request start_ms: Timestamp when request was made parser: The parser for the response - max_response_size: The maximum size to read from the response, if None - uses the default. Returns: The parsed response """ - if max_response_size is None: - max_response_size = MAX_RESPONSE_SIZE + max_response_size = parser.MAX_RESPONSE_SIZE try: check_content_type_is(response.headers, parser.CONTENT_TYPE) @@ -240,7 +238,7 @@ async def _handle_response( "{%s} [%s] JSON response exceeded max size %i - %s %s", request.txn_id, request.destination, - MAX_RESPONSE_SIZE, + max_response_size, request.method, request.uri.decode("ascii"), ) @@ -772,7 +770,6 @@ async def put_json( backoff_on_404: bool = False, try_trailing_slash_on_400: bool = False, parser: Literal[None] = None, - max_response_size: Optional[int] = None, ) -> Union[JsonDict, list]: ... @@ -790,7 +787,6 @@ async def put_json( backoff_on_404: bool = False, try_trailing_slash_on_400: bool = False, parser: Optional[ByteParser[T]] = None, - max_response_size: Optional[int] = None, ) -> T: ... @@ -807,7 +803,6 @@ async def put_json( backoff_on_404: bool = False, try_trailing_slash_on_400: bool = False, parser: Optional[ByteParser] = None, - max_response_size: Optional[int] = None, ): """Sends the specified json data using PUT @@ -843,8 +838,6 @@ async def put_json( enabled. parser: The parser to use to decode the response. Defaults to parsing as JSON. - max_response_size: The maximum size to read from the response, if None - uses the default. Returns: Succeeds when we get a 2xx HTTP response. The @@ -895,7 +888,6 @@ async def put_json( response, start_ms, parser=parser, - max_response_size=max_response_size, ) return body @@ -984,7 +976,6 @@ async def get_json( ignore_backoff: bool = False, try_trailing_slash_on_400: bool = False, parser: Literal[None] = None, - max_response_size: Optional[int] = None, ) -> Union[JsonDict, list]: ... @@ -999,7 +990,6 @@ async def get_json( ignore_backoff: bool = ..., try_trailing_slash_on_400: bool = ..., parser: ByteParser[T] = ..., - max_response_size: Optional[int] = ..., ) -> T: ... @@ -1013,7 +1003,6 @@ async def get_json( ignore_backoff: bool = False, try_trailing_slash_on_400: bool = False, parser: Optional[ByteParser] = None, - max_response_size: Optional[int] = None, ): """GETs some json from the given host homeserver and path @@ -1043,9 +1032,6 @@ async def get_json( parser: The parser to use to decode the response. Defaults to parsing as JSON. - max_response_size: The maximum size to read from the response. If None, - uses the default. - Returns: Succeeds when we get a 2xx HTTP response. The result will be the decoded JSON body. @@ -1090,7 +1076,6 @@ async def get_json( response, start_ms, parser=parser, - max_response_size=max_response_size, ) return body diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py index 638babae6995..006dbab093d5 100644 --- a/tests/http/test_fedclient.py +++ b/tests/http/test_fedclient.py @@ -26,7 +26,7 @@ from synapse.api.errors import RequestSendFailed from synapse.http.matrixfederationclient import ( - MAX_RESPONSE_SIZE, + JsonParser, MatrixFederationHttpClient, MatrixFederationRequest, ) @@ -609,9 +609,9 @@ def test_too_big(self): while not test_d.called: protocol.dataReceived(b"a" * chunk_size) sent += chunk_size - self.assertLessEqual(sent, MAX_RESPONSE_SIZE) + self.assertLessEqual(sent, JsonParser.MAX_RESPONSE_SIZE) - self.assertEqual(sent, MAX_RESPONSE_SIZE) + self.assertEqual(sent, JsonParser.MAX_RESPONSE_SIZE) f = self.failureResultOf(test_d) self.assertIsInstance(f.value, RequestSendFailed) From b83bc5fab57b37f75a79d02213d6032c586fd36e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 26 May 2022 10:48:12 +0100 Subject: [PATCH 109/181] Pull out less state when handling gaps mk2 (#12852) --- changelog.d/12852.misc | 1 + synapse/handlers/federation_event.py | 178 +++++++++++------------- synapse/handlers/message.py | 40 +++++- synapse/state/__init__.py | 22 ++- synapse/storage/databases/main/state.py | 59 ++++++++ tests/handlers/test_federation.py | 6 +- tests/storage/test_events.py | 43 ++++-- tests/test_state.py | 14 +- 8 files changed, 236 insertions(+), 127 deletions(-) create mode 100644 changelog.d/12852.misc diff --git a/changelog.d/12852.misc b/changelog.d/12852.misc new file mode 100644 index 000000000000..afca32471fb1 --- /dev/null +++ b/changelog.d/12852.misc @@ -0,0 +1 @@ +Pull out less state when handling gaps in room DAG. diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 8ce7187bef4a..a1361af2727d 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -274,7 +274,7 @@ async def on_receive_pdu(self, origin: str, pdu: EventBase) -> None: affected=pdu.event_id, ) - await self._process_received_pdu(origin, pdu, state=None) + await self._process_received_pdu(origin, pdu, state_ids=None) async def on_send_membership_event( self, origin: str, event: EventBase @@ -463,7 +463,9 @@ async def process_remote_join( with nested_logging_context(suffix=event.event_id): context = await self._state_handler.compute_event_context( event, - old_state=state, + state_ids_before_event={ + (e.type, e.state_key): e.event_id for e in state + }, partial_state=partial_state, ) @@ -512,12 +514,12 @@ async def update_state_for_partial_state_event( # # This is the same operation as we do when we receive a regular event # over federation. - state = await self._resolve_state_at_missing_prevs(destination, event) + state_ids = await self._resolve_state_at_missing_prevs(destination, event) # build a new state group for it if need be context = await self._state_handler.compute_event_context( event, - old_state=state, + state_ids_before_event=state_ids, ) if context.partial_state: # this can happen if some or all of the event's prev_events still have @@ -767,11 +769,12 @@ async def _process_pulled_event( return try: - state = await self._resolve_state_at_missing_prevs(origin, event) + state_ids = await self._resolve_state_at_missing_prevs(origin, event) # TODO(faster_joins): make sure that _resolve_state_at_missing_prevs does # not return partial state + await self._process_received_pdu( - origin, event, state=state, backfilled=backfilled + origin, event, state_ids=state_ids, backfilled=backfilled ) except FederationError as e: if e.code == 403: @@ -781,7 +784,7 @@ async def _process_pulled_event( async def _resolve_state_at_missing_prevs( self, dest: str, event: EventBase - ) -> Optional[Iterable[EventBase]]: + ) -> Optional[StateMap[str]]: """Calculate the state at an event with missing prev_events. This is used when we have pulled a batch of events from a remote server, and @@ -808,8 +811,8 @@ async def _resolve_state_at_missing_prevs( event: an event to check for missing prevs. Returns: - if we already had all the prev events, `None`. Otherwise, returns a list of - the events in the state at `event`. + if we already had all the prev events, `None`. Otherwise, returns + the event ids of the state at `event`. """ room_id = event.room_id event_id = event.event_id @@ -829,7 +832,7 @@ async def _resolve_state_at_missing_prevs( ) # Calculate the state after each of the previous events, and # resolve them to find the correct state at the current event. - event_map = {event_id: event} + try: # Get the state of the events we know about ours = await self._state_storage.get_state_groups_ids(room_id, seen) @@ -849,40 +852,23 @@ async def _resolve_state_at_missing_prevs( # note that if any of the missing prevs share missing state or # auth events, the requests to fetch those events are deduped # by the get_pdu_cache in federation_client. - remote_state = await self._get_state_after_missing_prev_event( - dest, room_id, p + remote_state_map = ( + await self._get_state_ids_after_missing_prev_event( + dest, room_id, p + ) ) - remote_state_map = { - (x.type, x.state_key): x.event_id for x in remote_state - } state_maps.append(remote_state_map) - for x in remote_state: - event_map[x.event_id] = x - room_version = await self._store.get_room_version_id(room_id) state_map = await self._state_resolution_handler.resolve_events_with_store( room_id, room_version, state_maps, - event_map, + event_map={event_id: event}, state_res_store=StateResolutionStore(self._store), ) - # We need to give _process_received_pdu the actual state events - # rather than event ids, so generate that now. - - # First though we need to fetch all the events that are in - # state_map, so we can build up the state below. - evs = await self._store.get_events( - list(state_map.values()), - get_prev_content=False, - redact_behaviour=EventRedactBehaviour.as_is, - ) - event_map.update(evs) - - state = [event_map[e] for e in state_map.values()] except Exception: logger.warning( "Error attempting to resolve state at missing prev_events", @@ -894,14 +880,14 @@ async def _resolve_state_at_missing_prevs( "We can't get valid state history.", affected=event_id, ) - return state + return state_map - async def _get_state_after_missing_prev_event( + async def _get_state_ids_after_missing_prev_event( self, destination: str, room_id: str, event_id: str, - ) -> List[EventBase]: + ) -> StateMap[str]: """Requests all of the room state at a given event from a remote homeserver. Args: @@ -910,7 +896,7 @@ async def _get_state_after_missing_prev_event( event_id: The id of the event we want the state at. Returns: - A list of events in the state, including the event itself + The event ids of the state *after* the given event. """ ( state_event_ids, @@ -925,19 +911,17 @@ async def _get_state_after_missing_prev_event( len(auth_event_ids), ) - # start by just trying to fetch the events from the store + # Start by checking events we already have in the DB desired_events = set(state_event_ids) desired_events.add(event_id) logger.debug("Fetching %i events from cache/store", len(desired_events)) - fetched_events = await self._store.get_events( - desired_events, allow_rejected=True - ) + have_events = await self._store.have_seen_events(room_id, desired_events) - missing_desired_events = desired_events - fetched_events.keys() + missing_desired_events = desired_events - have_events logger.debug( "We are missing %i events (got %i)", len(missing_desired_events), - len(fetched_events), + len(have_events), ) # We probably won't need most of the auth events, so let's just check which @@ -948,7 +932,7 @@ async def _get_state_after_missing_prev_event( # already have a bunch of the state events. It would be nice if the # federation api gave us a way of finding out which we actually need. - missing_auth_events = set(auth_event_ids) - fetched_events.keys() + missing_auth_events = set(auth_event_ids) - have_events missing_auth_events.difference_update( await self._store.have_seen_events(room_id, missing_auth_events) ) @@ -974,47 +958,51 @@ async def _get_state_after_missing_prev_event( destination=destination, room_id=room_id, event_ids=missing_events ) - # we need to make sure we re-load from the database to get the rejected - # state correct. - fetched_events.update( - await self._store.get_events(missing_desired_events, allow_rejected=True) - ) + # We now need to fill out the state map, which involves fetching the + # type and state key for each event ID in the state. + state_map = {} - # check for events which were in the wrong room. - # - # this can happen if a remote server claims that the state or - # auth_events at an event in room A are actually events in room B - - bad_events = [ - (event_id, event.room_id) - for event_id, event in fetched_events.items() - if event.room_id != room_id - ] + event_metadata = await self._store.get_metadata_for_events(state_event_ids) + for state_event_id, metadata in event_metadata.items(): + if metadata.room_id != room_id: + # This is a bogus situation, but since we may only discover it a long time + # after it happened, we try our best to carry on, by just omitting the + # bad events from the returned state set. + # + # This can happen if a remote server claims that the state or + # auth_events at an event in room A are actually events in room B + logger.warning( + "Remote server %s claims event %s in room %s is an auth/state " + "event in room %s", + destination, + state_event_id, + metadata.room_id, + room_id, + ) + continue - for bad_event_id, bad_room_id in bad_events: - # This is a bogus situation, but since we may only discover it a long time - # after it happened, we try our best to carry on, by just omitting the - # bad events from the returned state set. - logger.warning( - "Remote server %s claims event %s in room %s is an auth/state " - "event in room %s", - destination, - bad_event_id, - bad_room_id, - room_id, - ) + if metadata.state_key is None: + logger.warning( + "Remote server gave us non-state event in state: %s", state_event_id + ) + continue - del fetched_events[bad_event_id] + state_map[(metadata.event_type, metadata.state_key)] = state_event_id # if we couldn't get the prev event in question, that's a problem. - remote_event = fetched_events.get(event_id) + remote_event = await self._store.get_event( + event_id, + allow_none=True, + allow_rejected=True, + redact_behaviour=EventRedactBehaviour.as_is, + ) if not remote_event: raise Exception("Unable to get missing prev_event %s" % (event_id,)) # missing state at that event is a warning, not a blocker # XXX: this doesn't sound right? it means that we'll end up with incomplete # state. - failed_to_fetch = desired_events - fetched_events.keys() + failed_to_fetch = desired_events - event_metadata.keys() if failed_to_fetch: logger.warning( "Failed to fetch missing state events for %s %s", @@ -1022,14 +1010,12 @@ async def _get_state_after_missing_prev_event( failed_to_fetch, ) - remote_state = [ - fetched_events[e_id] for e_id in state_event_ids if e_id in fetched_events - ] - if remote_event.is_state() and remote_event.rejected_reason is None: - remote_state.append(remote_event) + state_map[ + (remote_event.type, remote_event.state_key) + ] = remote_event.event_id - return remote_state + return state_map async def _get_state_and_persist( self, destination: str, room_id: str, event_id: str @@ -1056,7 +1042,7 @@ async def _process_received_pdu( self, origin: str, event: EventBase, - state: Optional[Iterable[EventBase]], + state_ids: Optional[StateMap[str]], backfilled: bool = False, ) -> None: """Called when we have a new non-outlier event. @@ -1078,7 +1064,7 @@ async def _process_received_pdu( event: event to be persisted - state: Normally None, but if we are handling a gap in the graph + state_ids: Normally None, but if we are handling a gap in the graph (ie, we are missing one or more prev_events), the resolved state at the event @@ -1090,7 +1076,8 @@ async def _process_received_pdu( try: context = await self._state_handler.compute_event_context( - event, old_state=state + event, + state_ids_before_event=state_ids, ) context = await self._check_event_auth( origin, @@ -1107,7 +1094,7 @@ async def _process_received_pdu( # For new (non-backfilled and non-outlier) events we check if the event # passes auth based on the current state. If it doesn't then we # "soft-fail" the event. - await self._check_for_soft_fail(event, state, origin=origin) + await self._check_for_soft_fail(event, state_ids, origin=origin) await self._run_push_actions_and_persist_event(event, context, backfilled) @@ -1589,7 +1576,7 @@ async def _maybe_kick_guest_users(self, event: EventBase) -> None: async def _check_for_soft_fail( self, event: EventBase, - state: Optional[Iterable[EventBase]], + state_ids: Optional[StateMap[str]], origin: str, ) -> None: """Checks if we should soft fail the event; if so, marks the event as @@ -1597,7 +1584,7 @@ async def _check_for_soft_fail( Args: event - state: The state at the event if we don't have all the event's prev events + state_ids: The state at the event if we don't have all the event's prev events origin: The host the event originates from. """ extrem_ids_list = await self._store.get_latest_event_ids_in_room(event.room_id) @@ -1613,7 +1600,7 @@ async def _check_for_soft_fail( room_version_obj = KNOWN_ROOM_VERSIONS[room_version] # Calculate the "current state". - if state is not None: + if state_ids is not None: # If we're explicitly given the state then we won't have all the # prev events, and so we have a gap in the graph. In this case # we want to be a little careful as we might have been down for @@ -1626,17 +1613,20 @@ async def _check_for_soft_fail( # given state at the event. This should correctly handle cases # like bans, especially with state res v2. - state_sets_d = await self._state_storage.get_state_groups( + state_sets_d = await self._state_storage.get_state_groups_ids( event.room_id, extrem_ids ) - state_sets: List[Iterable[EventBase]] = list(state_sets_d.values()) - state_sets.append(state) - current_states = await self._state_handler.resolve_events( - room_version, state_sets, event + state_sets: List[StateMap[str]] = list(state_sets_d.values()) + state_sets.append(state_ids) + current_state_ids = ( + await self._state_resolution_handler.resolve_events_with_store( + event.room_id, + room_version, + state_sets, + event_map=None, + state_res_store=StateResolutionStore(self._store), + ) ) - current_state_ids: StateMap[str] = { - k: e.event_id for k, e in current_states.items() - } else: current_state_ids = await self._state_handler.get_current_state_ids( event.room_id, latest_event_ids=extrem_ids diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 9501e7f1b7de..7ca126dbd171 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -55,7 +55,14 @@ from synapse.replication.http.send_event import ReplicationSendEventRestServlet from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.state import StateFilter -from synapse.types import Requester, RoomAlias, StreamToken, UserID, create_requester +from synapse.types import ( + MutableStateMap, + Requester, + RoomAlias, + StreamToken, + UserID, + create_requester, +) from synapse.util import json_decoder, json_encoder, log_failure, unwrapFirstError from synapse.util.async_helpers import Linearizer, gather_results from synapse.util.caches.expiringcache import ExpiringCache @@ -1022,8 +1029,35 @@ async def create_new_client_event( # # TODO(faster_joins): figure out how this works, and make sure that the # old state is complete. - old_state = await self.store.get_events_as_list(state_event_ids) - context = await self.state.compute_event_context(event, old_state=old_state) + metadata = await self.store.get_metadata_for_events(state_event_ids) + + state_map_for_event: MutableStateMap[str] = {} + for state_id in state_event_ids: + data = metadata.get(state_id) + if data is None: + # We're trying to persist a new historical batch of events + # with the given state, e.g. via + # `RoomBatchSendEventRestServlet`. The state can be inferred + # by Synapse or set directly by the client. + # + # Either way, we should have persisted all the state before + # getting here. + raise Exception( + f"State event {state_id} not found in DB," + " Synapse should have persisted it before using it." + ) + + if data.state_key is None: + raise Exception( + f"Trying to set non-state event {state_id} as state" + ) + + state_map_for_event[(data.event_type, data.state_key)] = state_id + + context = await self.state.compute_event_context( + event, + state_ids_before_event=state_map_for_event, + ) else: context = await self.state.compute_event_context(event) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 536564b7ffe6..9c9d946f38c0 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -261,7 +261,7 @@ async def get_hosts_in_room_at_events( async def compute_event_context( self, event: EventBase, - old_state: Optional[Iterable[EventBase]] = None, + state_ids_before_event: Optional[StateMap[str]] = None, partial_state: bool = False, ) -> EventContext: """Build an EventContext structure for a non-outlier event. @@ -273,12 +273,12 @@ async def compute_event_context( Args: event: - old_state: The state at the event if it can't be - calculated from existing events. This is normally only specified - when receiving an event from federation where we don't have the - prev events for, e.g. when backfilling. - partial_state: True if `old_state` is partial and omits non-critical - membership events + state_ids_before_event: The event ids of the state before the event if + it can't be calculated from existing events. This is normally + only specified when receiving an event from federation where we + don't have the prev events, e.g. when backfilling. + partial_state: True if `state_ids_before_event` is partial and omits + non-critical membership events Returns: The event context. """ @@ -286,13 +286,11 @@ async def compute_event_context( assert not event.internal_metadata.is_outlier() # - # first of all, figure out the state before the event + # first of all, figure out the state before the event, unless we + # already have it. # - if old_state: + if state_ids_before_event: # if we're given the state before the event, then we use that - state_ids_before_event: StateMap[str] = { - (s.type, s.state_key): s.event_id for s in old_state - } state_group_before_event = None state_group_before_event_prev_group = None deltas_to_state_group_before_event = None diff --git a/synapse/storage/databases/main/state.py b/synapse/storage/databases/main/state.py index 18ae8aee295d..ea5cbdac08eb 100644 --- a/synapse/storage/databases/main/state.py +++ b/synapse/storage/databases/main/state.py @@ -16,6 +16,8 @@ import logging from typing import TYPE_CHECKING, Collection, Dict, Iterable, Optional, Set, Tuple +import attr + from synapse.api.constants import EventTypes, Membership from synapse.api.errors import NotFoundError, UnsupportedRoomVersionError from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion @@ -26,6 +28,7 @@ DatabasePool, LoggingDatabaseConnection, LoggingTransaction, + make_in_list_sql_clause, ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.roommember import RoomMemberWorkerStore @@ -33,6 +36,7 @@ from synapse.types import JsonDict, JsonMapping, StateMap from synapse.util.caches import intern_string from synapse.util.caches.descriptors import cached, cachedList +from synapse.util.iterutils import batch_iter if TYPE_CHECKING: from synapse.server import HomeServer @@ -43,6 +47,15 @@ MAX_STATE_DELTA_HOPS = 100 +@attr.s(slots=True, frozen=True, auto_attribs=True) +class EventMetadata: + """Returned by `get_metadata_for_events`""" + + room_id: str + event_type: str + state_key: Optional[str] + + def _retrieve_and_check_room_version(room_id: str, room_version_id: str) -> RoomVersion: v = KNOWN_ROOM_VERSIONS.get(room_version_id) if not v: @@ -133,6 +146,52 @@ def get_room_version_id_txn(self, txn: LoggingTransaction, room_id: str) -> str: return room_version + async def get_metadata_for_events( + self, event_ids: Collection[str] + ) -> Dict[str, EventMetadata]: + """Get some metadata (room_id, type, state_key) for the given events. + + This method is a faster alternative than fetching the full events from + the DB, and should be used when the full event is not needed. + + Returns metadata for rejected and redacted events. Events that have not + been persisted are omitted from the returned dict. + """ + + def get_metadata_for_events_txn( + txn: LoggingTransaction, + batch_ids: Collection[str], + ) -> Dict[str, EventMetadata]: + clause, args = make_in_list_sql_clause( + self.database_engine, "e.event_id", batch_ids + ) + + sql = f""" + SELECT e.event_id, e.room_id, e.type, e.state_key FROM events AS e + LEFT JOIN state_events USING (event_id) + WHERE {clause} + """ + + txn.execute(sql, args) + return { + event_id: EventMetadata( + room_id=room_id, event_type=event_type, state_key=state_key + ) + for event_id, room_id, event_type, state_key in txn + } + + result_map: Dict[str, EventMetadata] = {} + for batch_ids in batch_iter(event_ids, 1000): + result_map.update( + await self.db_pool.runInteraction( + "get_metadata_for_events", + get_metadata_for_events_txn, + batch_ids=batch_ids, + ) + ) + + return result_map + async def get_room_predecessor(self, room_id: str) -> Optional[JsonMapping]: """Get the predecessor of an upgraded room if it exists. Otherwise return None. diff --git a/tests/handlers/test_federation.py b/tests/handlers/test_federation.py index bef6c2b77609..ec0090062166 100644 --- a/tests/handlers/test_federation.py +++ b/tests/handlers/test_federation.py @@ -276,7 +276,11 @@ def test_backfill_with_many_backward_extremities(self) -> None: # federation handler wanting to backfill the fake event. self.get_success( federation_event_handler._process_received_pdu( - self.OTHER_SERVER_NAME, event, state=current_state + self.OTHER_SERVER_NAME, + event, + state_ids={ + (e.type, e.state_key): e.event_id for e in current_state + }, ) ) diff --git a/tests/storage/test_events.py b/tests/storage/test_events.py index ef5e25873c22..aaa3189b16ef 100644 --- a/tests/storage/test_events.py +++ b/tests/storage/test_events.py @@ -69,7 +69,7 @@ def prepare(self, reactor, clock, homeserver): def persist_event(self, event, state=None): """Persist the event, with optional state""" context = self.get_success( - self.state.compute_event_context(event, old_state=state) + self.state.compute_event_context(event, state_ids_before_event=state) ) self.get_success(self.persistence.persist_event(event, context)) @@ -103,9 +103,11 @@ def test_prune_gap(self): RoomVersions.V6, ) - state_before_gap = self.get_success(self.state.get_current_state(self.room_id)) + state_before_gap = self.get_success( + self.state.get_current_state_ids(self.room_id) + ) - self.persist_event(remote_event_2, state=state_before_gap.values()) + self.persist_event(remote_event_2, state=state_before_gap) # Check the new extremity is just the new remote event. self.assert_extremities([remote_event_2.event_id]) @@ -135,13 +137,14 @@ def test_do_not_prune_gap_if_state_different(self): # setting. The state resolution across the old and new event will then # include it, and so the resolved state won't match the new state. state_before_gap = dict( - self.get_success(self.state.get_current_state(self.room_id)) + self.get_success(self.state.get_current_state_ids(self.room_id)) ) state_before_gap.pop(("m.room.history_visibility", "")) context = self.get_success( self.state.compute_event_context( - remote_event_2, old_state=state_before_gap.values() + remote_event_2, + state_ids_before_event=state_before_gap, ) ) @@ -177,9 +180,11 @@ def test_prune_gap_if_old(self): RoomVersions.V6, ) - state_before_gap = self.get_success(self.state.get_current_state(self.room_id)) + state_before_gap = self.get_success( + self.state.get_current_state_ids(self.room_id) + ) - self.persist_event(remote_event_2, state=state_before_gap.values()) + self.persist_event(remote_event_2, state=state_before_gap) # Check the new extremity is just the new remote event. self.assert_extremities([remote_event_2.event_id]) @@ -207,9 +212,11 @@ def test_do_not_prune_gap_if_other_server(self): RoomVersions.V6, ) - state_before_gap = self.get_success(self.state.get_current_state(self.room_id)) + state_before_gap = self.get_success( + self.state.get_current_state_ids(self.room_id) + ) - self.persist_event(remote_event_2, state=state_before_gap.values()) + self.persist_event(remote_event_2, state=state_before_gap) # Check the new extremity is just the new remote event. self.assert_extremities([self.remote_event_1.event_id, remote_event_2.event_id]) @@ -247,9 +254,11 @@ def test_prune_gap_if_dummy_remote(self): RoomVersions.V6, ) - state_before_gap = self.get_success(self.state.get_current_state(self.room_id)) + state_before_gap = self.get_success( + self.state.get_current_state_ids(self.room_id) + ) - self.persist_event(remote_event_2, state=state_before_gap.values()) + self.persist_event(remote_event_2, state=state_before_gap) # Check the new extremity is just the new remote event. self.assert_extremities([remote_event_2.event_id]) @@ -289,9 +298,11 @@ def test_prune_gap_if_dummy_local(self): RoomVersions.V6, ) - state_before_gap = self.get_success(self.state.get_current_state(self.room_id)) + state_before_gap = self.get_success( + self.state.get_current_state_ids(self.room_id) + ) - self.persist_event(remote_event_2, state=state_before_gap.values()) + self.persist_event(remote_event_2, state=state_before_gap) # Check the new extremity is just the new remote event. self.assert_extremities([remote_event_2.event_id, local_message_event_id]) @@ -323,9 +334,11 @@ def test_do_not_prune_gap_if_not_dummy(self): RoomVersions.V6, ) - state_before_gap = self.get_success(self.state.get_current_state(self.room_id)) + state_before_gap = self.get_success( + self.state.get_current_state_ids(self.room_id) + ) - self.persist_event(remote_event_2, state=state_before_gap.values()) + self.persist_event(remote_event_2, state=state_before_gap) # Check the new extremity is just the new remote event. self.assert_extremities([local_message_event_id, remote_event_2.event_id]) diff --git a/tests/test_state.py b/tests/test_state.py index c6baea3d7604..84694d368d8b 100644 --- a/tests/test_state.py +++ b/tests/test_state.py @@ -442,7 +442,12 @@ def test_annotate_with_old_message(self): ] context = yield defer.ensureDeferred( - self.state.compute_event_context(event, old_state=old_state) + self.state.compute_event_context( + event, + state_ids_before_event={ + (e.type, e.state_key): e.event_id for e in old_state + }, + ) ) prev_state_ids = yield defer.ensureDeferred(context.get_prev_state_ids()) @@ -467,7 +472,12 @@ def test_annotate_with_old_state(self): ] context = yield defer.ensureDeferred( - self.state.compute_event_context(event, old_state=old_state) + self.state.compute_event_context( + event, + state_ids_before_event={ + (e.type, e.state_key): e.event_id for e in old_state + }, + ) ) prev_state_ids = yield defer.ensureDeferred(context.get_prev_state_ids()) From b5707ceabad79267928b1f5e0bff582b09488847 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 26 May 2022 07:09:16 -0400 Subject: [PATCH 110/181] Avoid attempting to delete push actions for remote users. (#12879) Remote users will never have push actions, so we can avoid a database round-trip/transaction completely. --- changelog.d/12879.misc | 1 + synapse/federation/sender/per_destination_queue.py | 2 +- synapse/storage/databases/main/event_push_actions.py | 2 +- synapse/storage/databases/main/receipts.py | 5 ++++- synapse/storage/persist_events.py | 2 +- 5 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 changelog.d/12879.misc diff --git a/changelog.d/12879.misc b/changelog.d/12879.misc new file mode 100644 index 000000000000..24fa0d0de05f --- /dev/null +++ b/changelog.d/12879.misc @@ -0,0 +1 @@ +Avoid running queries which will never result in deletions. diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index d80f0ac5e8c3..8983b5a53d8a 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -223,7 +223,7 @@ def mark_new_data(self) -> None: """Marks that the destination has new data to send, without starting a new transaction. - If a transaction loop is already in progress then a new transcation will + If a transaction loop is already in progress then a new transaction will be attempted when the current one finishes. """ diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index b7c4c62222bd..b019979350e3 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -938,7 +938,7 @@ def _remove_old_push_actions_before_txn( users can still get a list of recent highlights. Args: - txn: The transcation + txn: The transaction room_id: Room ID to delete from user_id: user ID to delete for stream_ordering: The lowest stream ordering which will diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index d035969a3178..cfa4d4924d54 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -673,8 +673,11 @@ def insert_linearized_receipt_txn( lock=False, ) + # When updating a local users read receipt, remove any push actions + # which resulted from the receipt's event and all earlier events. if ( - receipt_type in (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE) + self.hs.is_mine_id(user_id) + and receipt_type in (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE) and stream_ordering is not None ): self._remove_old_push_actions_before_txn( # type: ignore[attr-defined] diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 0fc282866bc5..a21dea91c852 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -313,7 +313,7 @@ async def persist_events( List of events persisted, the current position room stream position. The list of events persisted may not be the same as those passed in if they were deduplicated due to an event already existing that - matched the transcation ID; the existing event is returned in such + matched the transaction ID; the existing event is returned in such a case. """ partitioned: Dict[str, List[Tuple[EventBase, EventContext]]] = {} From 1885ee011395f9c1f121f8045ac6d47a74c4cc24 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 26 May 2022 07:10:28 -0400 Subject: [PATCH 111/181] Remove unstable APIs for /hierarchy. (#12851) Removes the unstable endpoint as well as a duplicated field which was modified during stabilization. --- changelog.d/12851.misc | 1 + docs/workers.md | 6 +++--- .../federation/transport/server/federation.py | 5 ----- synapse/handlers/room_summary.py | 5 +---- synapse/rest/client/room.py | 7 +------ tests/handlers/test_room_summary.py | 20 +++++++++---------- 6 files changed, 16 insertions(+), 28 deletions(-) create mode 100644 changelog.d/12851.misc diff --git a/changelog.d/12851.misc b/changelog.d/12851.misc new file mode 100644 index 000000000000..ca6f48c36943 --- /dev/null +++ b/changelog.d/12851.misc @@ -0,0 +1 @@ +Remove the unstable `/hierarchy` endpoint from [MSC2946](/~https://github.com/matrix-org/matrix-doc/pull/2946). diff --git a/docs/workers.md b/docs/workers.md index 6a76f43fa1d2..78973a498c45 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -193,7 +193,7 @@ information. ^/_matrix/federation/v1/user/devices/ ^/_matrix/federation/v1/get_groups_publicised$ ^/_matrix/key/v2/query - ^/_matrix/federation/(v1|unstable/org.matrix.msc2946)/hierarchy/ + ^/_matrix/federation/v1/hierarchy/ # Inbound federation transaction request ^/_matrix/federation/v1/send/ @@ -205,8 +205,8 @@ information. ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/context/.*$ ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/members$ ^/_matrix/client/(api/v1|r0|v3|unstable)/rooms/.*/state$ - ^/_matrix/client/(v1|unstable/org.matrix.msc2946)/rooms/.*/hierarchy$ - ^/_matrix/client/(v1|unstable/org.matrix.msc2716)/rooms/.*/batch_send$ + ^/_matrix/client/v1/rooms/.*/hierarchy$ + ^/_matrix/client/unstable/org.matrix.msc2716/rooms/.*/batch_send$ ^/_matrix/client/unstable/im.nheko.summary/rooms/.*/summary$ ^/_matrix/client/(r0|v3|unstable)/account/3pid$ ^/_matrix/client/(r0|v3|unstable)/account/whoami$ diff --git a/synapse/federation/transport/server/federation.py b/synapse/federation/transport/server/federation.py index 6fbc7b5f15a7..57e8fb21b0c9 100644 --- a/synapse/federation/transport/server/federation.py +++ b/synapse/federation/transport/server/federation.py @@ -650,10 +650,6 @@ async def on_GET( ) -class FederationRoomHierarchyUnstableServlet(FederationRoomHierarchyServlet): - PREFIX = FEDERATION_UNSTABLE_PREFIX + "/org.matrix.msc2946" - - class RoomComplexityServlet(BaseFederationServlet): """ Indicates to other servers how complex (and therefore likely @@ -752,7 +748,6 @@ async def on_POST( FederationVersionServlet, RoomComplexityServlet, FederationRoomHierarchyServlet, - FederationRoomHierarchyUnstableServlet, FederationV1SendKnockServlet, FederationMakeKnockServlet, FederationAccountStatusServlet, diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index 1dd74912fa95..75aee6a11165 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -662,7 +662,7 @@ async def _is_remote_room_accessible( # The API doesn't return the room version so assume that a # join rule of knock is valid. if ( - room.get("join_rules") + room.get("join_rule") in (JoinRules.PUBLIC, JoinRules.KNOCK, JoinRules.KNOCK_RESTRICTED) or room.get("world_readable") is True ): @@ -714,9 +714,6 @@ async def _build_room_entry(self, room_id: str, for_federation: bool) -> JsonDic "canonical_alias": stats["canonical_alias"], "num_joined_members": stats["joined_members"], "avatar_url": stats["avatar"], - # plural join_rules is a documentation error but kept for historical - # purposes. Should match /publicRooms. - "join_rules": stats["join_rules"], "join_rule": stats["join_rules"], "world_readable": ( stats["history_visibility"] == HistoryVisibility.WORLD_READABLE diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 5a2361a2e691..7a5ce8ad0e24 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -1193,12 +1193,7 @@ async def on_GET( class RoomHierarchyRestServlet(RestServlet): - PATTERNS = ( - re.compile( - "^/_matrix/client/(v1|unstable/org.matrix.msc2946)" - "/rooms/(?P[^/]*)/hierarchy$" - ), - ) + PATTERNS = (re.compile("^/_matrix/client/v1/rooms/(?P[^/]*)/hierarchy$"),) def __init__(self, hs: "HomeServer"): super().__init__() diff --git a/tests/handlers/test_room_summary.py b/tests/handlers/test_room_summary.py index e74eb71774df..0546655690fd 100644 --- a/tests/handlers/test_room_summary.py +++ b/tests/handlers/test_room_summary.py @@ -179,7 +179,7 @@ def _assert_hierarchy( result_children_ids.append( [ (cs["room_id"], cs["state_key"]) - for cs in result_room.get("children_state") + for cs in result_room["children_state"] ] ) @@ -772,7 +772,7 @@ def test_fed_filtering(self): { "room_id": public_room, "world_readable": False, - "join_rules": JoinRules.PUBLIC, + "join_rule": JoinRules.PUBLIC, }, ), ( @@ -780,7 +780,7 @@ def test_fed_filtering(self): { "room_id": knock_room, "world_readable": False, - "join_rules": JoinRules.KNOCK, + "join_rule": JoinRules.KNOCK, }, ), ( @@ -788,7 +788,7 @@ def test_fed_filtering(self): { "room_id": not_invited_room, "world_readable": False, - "join_rules": JoinRules.INVITE, + "join_rule": JoinRules.INVITE, }, ), ( @@ -796,7 +796,7 @@ def test_fed_filtering(self): { "room_id": invited_room, "world_readable": False, - "join_rules": JoinRules.INVITE, + "join_rule": JoinRules.INVITE, }, ), ( @@ -804,7 +804,7 @@ def test_fed_filtering(self): { "room_id": restricted_room, "world_readable": False, - "join_rules": JoinRules.RESTRICTED, + "join_rule": JoinRules.RESTRICTED, "allowed_room_ids": [], }, ), @@ -813,7 +813,7 @@ def test_fed_filtering(self): { "room_id": restricted_accessible_room, "world_readable": False, - "join_rules": JoinRules.RESTRICTED, + "join_rule": JoinRules.RESTRICTED, "allowed_room_ids": [self.room], }, ), @@ -822,7 +822,7 @@ def test_fed_filtering(self): { "room_id": world_readable_room, "world_readable": True, - "join_rules": JoinRules.INVITE, + "join_rule": JoinRules.INVITE, }, ), ( @@ -830,7 +830,7 @@ def test_fed_filtering(self): { "room_id": joined_room, "world_readable": False, - "join_rules": JoinRules.INVITE, + "join_rule": JoinRules.INVITE, }, ), ) @@ -911,7 +911,7 @@ def test_fed_invited(self): { "room_id": fed_room, "world_readable": False, - "join_rules": JoinRules.INVITE, + "join_rule": JoinRules.INVITE, }, ) From e76864436867deba7fc6b740d1f8d80f4717f44b Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 26 May 2022 12:19:01 +0100 Subject: [PATCH 112/181] Fix ambiguous column name that would prevent use of MSC2716 History Import when using Postgres as a database. (#12843) --- changelog.d/12843.bugfix | 1 + synapse/storage/databases/main/event_federation.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12843.bugfix diff --git a/changelog.d/12843.bugfix b/changelog.d/12843.bugfix new file mode 100644 index 000000000000..f87c0799a070 --- /dev/null +++ b/changelog.d/12843.bugfix @@ -0,0 +1 @@ +Fix bug where servers using a Postgres database would fail to backfill from an insertion event when MSC2716 is enabled (`experimental_features.msc2716_enabled`). diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index dcfe8caf473a..562dcbe94d3a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1057,7 +1057,7 @@ def _get_connected_batch_event_backfill_results_txn( INNER JOIN batch_events AS c ON i.next_batch_id = c.batch_id /* Get the depth of the batch start event from the events table */ - INNER JOIN events AS e USING (event_id) + INNER JOIN events AS e ON c.event_id = e.event_id /* Find an insertion event which matches the given event_id */ WHERE i.event_id = ? LIMIT ? From 1cba285a7971eb88f41139ff466918332a98b479 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 May 2022 12:42:21 +0000 Subject: [PATCH 113/181] Bump pyjwt from 2.3.0 to 2.4.0 (#12865) Bumps [pyjwt](/~https://github.com/jpadilla/pyjwt) from 2.3.0 to 2.4.0. - [Release notes](/~https://github.com/jpadilla/pyjwt/releases) - [Changelog](/~https://github.com/jpadilla/pyjwt/blob/master/CHANGELOG.rst) - [Commits](/~https://github.com/jpadilla/pyjwt/compare/2.3.0...2.4.0) --- updated-dependencies: - dependency-name: pyjwt dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- changelog.d/12865.misc | 1 + poetry.lock | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/12865.misc diff --git a/changelog.d/12865.misc b/changelog.d/12865.misc new file mode 100644 index 000000000000..d982ca762250 --- /dev/null +++ b/changelog.d/12865.misc @@ -0,0 +1 @@ +Update `pyjwt` dependency to [2.4.0](/~https://github.com/jpadilla/pyjwt/releases/tag/2.4.0). diff --git a/poetry.lock b/poetry.lock index 49a912a58962..f64d70941e77 100644 --- a/poetry.lock +++ b/poetry.lock @@ -813,7 +813,7 @@ python-versions = ">=3.5" [[package]] name = "pyjwt" -version = "2.3.0" +version = "2.4.0" description = "JSON Web Token implementation in Python" category = "main" optional = false @@ -2264,8 +2264,8 @@ pygments = [ {file = "Pygments-2.11.2.tar.gz", hash = "sha256:4e426f72023d88d03b2fa258de560726ce890ff3b630f88c21cbb8b2503b8c6a"}, ] pyjwt = [ - {file = "PyJWT-2.3.0-py3-none-any.whl", hash = "sha256:e0c4bb8d9f0af0c7f5b1ec4c5036309617d03d56932877f2f7a0beeb5318322f"}, - {file = "PyJWT-2.3.0.tar.gz", hash = "sha256:b888b4d56f06f6dcd777210c334e69c737be74755d3e5e9ee3fe67dc18a0ee41"}, + {file = "PyJWT-2.4.0-py3-none-any.whl", hash = "sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf"}, + {file = "PyJWT-2.4.0.tar.gz", hash = "sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba"}, ] pymacaroons = [ {file = "pymacaroons-0.13.0-py2.py3-none-any.whl", hash = "sha256:3e14dff6a262fdbf1a15e769ce635a8aea72e6f8f91e408f9a97166c53b91907"}, From 49f06866e4db2e19467a3733b2909ba397da265e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 26 May 2022 09:04:34 -0400 Subject: [PATCH 114/181] Remove backing code for groups/communities (#12558) Including handlers, configuration code, appservice support, and the GroupID construct. --- changelog.d/12558.removal | 1 + synapse/appservice/__init__.py | 43 +- synapse/config/_base.pyi | 2 - synapse/config/groups.py | 27 - synapse/config/homeserver.py | 2 - synapse/groups/__init__.py | 0 synapse/groups/attestations.py | 218 ------ synapse/groups/groups_server.py | 1019 --------------------------- synapse/handlers/groups_local.py | 503 ------------- synapse/server.py | 39 +- synapse/types.py | 23 - tests/appservice/test_appservice.py | 2 +- tests/test_types.py | 21 +- 13 files changed, 6 insertions(+), 1894 deletions(-) create mode 100644 changelog.d/12558.removal delete mode 100644 synapse/config/groups.py delete mode 100644 synapse/groups/__init__.py delete mode 100644 synapse/groups/attestations.py delete mode 100644 synapse/groups/groups_server.py delete mode 100644 synapse/handlers/groups_local.py diff --git a/changelog.d/12558.removal b/changelog.d/12558.removal new file mode 100644 index 000000000000..41f6fae5da91 --- /dev/null +++ b/changelog.d/12558.removal @@ -0,0 +1 @@ +Remove support for the non-standard groups/communities feature from Synapse. diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index a610fb785d38..ed92c2e91012 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -23,13 +23,7 @@ from synapse.api.constants import EventTypes from synapse.events import EventBase -from synapse.types import ( - DeviceListUpdates, - GroupID, - JsonDict, - UserID, - get_domain_from_id, -) +from synapse.types import DeviceListUpdates, JsonDict, UserID from synapse.util.caches.descriptors import _CacheContext, cached if TYPE_CHECKING: @@ -55,7 +49,6 @@ class ApplicationServiceState(Enum): @attr.s(slots=True, frozen=True, auto_attribs=True) class Namespace: exclusive: bool - group_id: Optional[str] regex: Pattern[str] @@ -141,30 +134,13 @@ def _check_namespaces( exclusive = regex_obj.get("exclusive") if not isinstance(exclusive, bool): raise ValueError("Expected bool for 'exclusive' in ns '%s'" % ns) - group_id = regex_obj.get("group_id") - if group_id: - if not isinstance(group_id, str): - raise ValueError( - "Expected string for 'group_id' in ns '%s'" % ns - ) - try: - GroupID.from_string(group_id) - except Exception: - raise ValueError( - "Expected valid group ID for 'group_id' in ns '%s'" % ns - ) - - if get_domain_from_id(group_id) != self.server_name: - raise ValueError( - "Expected 'group_id' to be this host in ns '%s'" % ns - ) regex = regex_obj.get("regex") if not isinstance(regex, str): raise ValueError("Expected string for 'regex' in ns '%s'" % ns) # Pre-compile regex. - result[ns].append(Namespace(exclusive, group_id, re.compile(regex))) + result[ns].append(Namespace(exclusive, re.compile(regex))) return result @@ -369,21 +345,6 @@ def get_exclusive_user_regexes(self) -> List[Pattern[str]]: if namespace.exclusive ] - def get_groups_for_user(self, user_id: str) -> Iterable[str]: - """Get the groups that this user is associated with by this AS - - Args: - user_id: The ID of the user. - - Returns: - An iterable that yields group_id strings. - """ - return ( - namespace.group_id - for namespace in self.namespaces[ApplicationService.NS_USERS] - if namespace.group_id and namespace.regex.match(user_id) - ) - def is_rate_limited(self) -> bool: return self.rate_limited diff --git a/synapse/config/_base.pyi b/synapse/config/_base.pyi index 71d6655fda4e..01ea2b4dab56 100644 --- a/synapse/config/_base.pyi +++ b/synapse/config/_base.pyi @@ -32,7 +32,6 @@ from synapse.config import ( emailconfig, experimental, federation, - groups, jwt, key, logger, @@ -107,7 +106,6 @@ class RootConfig: push: push.PushConfig spamchecker: spam_checker.SpamCheckerConfig room: room.RoomConfig - groups: groups.GroupsConfig userdirectory: user_directory.UserDirectoryConfig consent: consent.ConsentConfig stats: stats.StatsConfig diff --git a/synapse/config/groups.py b/synapse/config/groups.py deleted file mode 100644 index baa051fdd47f..000000000000 --- a/synapse/config/groups.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2017 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Any - -from synapse.types import JsonDict - -from ._base import Config - - -class GroupsConfig(Config): - section = "groups" - - def read_config(self, config: JsonDict, **kwargs: Any) -> None: - self.enable_group_creation = config.get("enable_group_creation", False) - self.group_creation_prefix = config.get("group_creation_prefix", "") diff --git a/synapse/config/homeserver.py b/synapse/config/homeserver.py index a4ec70690802..4d2b298a70be 100644 --- a/synapse/config/homeserver.py +++ b/synapse/config/homeserver.py @@ -25,7 +25,6 @@ from .emailconfig import EmailConfig from .experimental import ExperimentalConfig from .federation import FederationConfig -from .groups import GroupsConfig from .jwt import JWTConfig from .key import KeyConfig from .logger import LoggingConfig @@ -89,7 +88,6 @@ class HomeServerConfig(RootConfig): PushConfig, SpamCheckerConfig, RoomConfig, - GroupsConfig, UserDirectoryConfig, ConsentConfig, StatsConfig, diff --git a/synapse/groups/__init__.py b/synapse/groups/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py deleted file mode 100644 index ed26d6a6ce72..000000000000 --- a/synapse/groups/attestations.py +++ /dev/null @@ -1,218 +0,0 @@ -# Copyright 2017 Vector Creations Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Attestations ensure that users and groups can't lie about their memberships. - -When a user joins a group the HS and GS swap attestations, which allow them -both to independently prove to third parties their membership.These -attestations have a validity period so need to be periodically renewed. - -If a user leaves (or gets kicked out of) a group, either side can still use -their attestation to "prove" their membership, until the attestation expires. -Therefore attestations shouldn't be relied on to prove membership in important -cases, but can for less important situations, e.g. showing a users membership -of groups on their profile, showing flairs, etc. - -An attestation is a signed blob of json that looks like: - - { - "user_id": "@foo:a.example.com", - "group_id": "+bar:b.example.com", - "valid_until_ms": 1507994728530, - "signatures":{"matrix.org":{"ed25519:auto":"..."}} - } -""" - -import logging -import random -from typing import TYPE_CHECKING, Optional, Tuple - -from signedjson.sign import sign_json - -from twisted.internet.defer import Deferred - -from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError -from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.types import JsonDict, get_domain_from_id - -if TYPE_CHECKING: - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - - -# Default validity duration for new attestations we create -DEFAULT_ATTESTATION_LENGTH_MS = 3 * 24 * 60 * 60 * 1000 - -# We add some jitter to the validity duration of attestations so that if we -# add lots of users at once we don't need to renew them all at once. -# The jitter is a multiplier picked randomly between the first and second number -DEFAULT_ATTESTATION_JITTER = (0.9, 1.3) - -# Start trying to update our attestations when they come this close to expiring -UPDATE_ATTESTATION_TIME_MS = 1 * 24 * 60 * 60 * 1000 - - -class GroupAttestationSigning: - """Creates and verifies group attestations.""" - - def __init__(self, hs: "HomeServer"): - self.keyring = hs.get_keyring() - self.clock = hs.get_clock() - self.server_name = hs.hostname - self.signing_key = hs.signing_key - - async def verify_attestation( - self, - attestation: JsonDict, - group_id: str, - user_id: str, - server_name: Optional[str] = None, - ) -> None: - """Verifies that the given attestation matches the given parameters. - - An optional server_name can be supplied to explicitly set which server's - signature is expected. Otherwise assumes that either the group_id or user_id - is local and uses the other's server as the one to check. - """ - - if not server_name: - if get_domain_from_id(group_id) == self.server_name: - server_name = get_domain_from_id(user_id) - elif get_domain_from_id(user_id) == self.server_name: - server_name = get_domain_from_id(group_id) - else: - raise Exception("Expected either group_id or user_id to be local") - - if user_id != attestation["user_id"]: - raise SynapseError(400, "Attestation has incorrect user_id") - - if group_id != attestation["group_id"]: - raise SynapseError(400, "Attestation has incorrect group_id") - valid_until_ms = attestation["valid_until_ms"] - - # TODO: We also want to check that *new* attestations that people give - # us to store are valid for at least a little while. - now = self.clock.time_msec() - if valid_until_ms < now: - raise SynapseError(400, "Attestation expired") - - assert server_name is not None - await self.keyring.verify_json_for_server( - server_name, - attestation, - now, - ) - - def create_attestation(self, group_id: str, user_id: str) -> JsonDict: - """Create an attestation for the group_id and user_id with default - validity length. - """ - validity_period = DEFAULT_ATTESTATION_LENGTH_MS * random.uniform( - *DEFAULT_ATTESTATION_JITTER - ) - valid_until_ms = int(self.clock.time_msec() + validity_period) - - return sign_json( - { - "group_id": group_id, - "user_id": user_id, - "valid_until_ms": valid_until_ms, - }, - self.server_name, - self.signing_key, - ) - - -class GroupAttestionRenewer: - """Responsible for sending and receiving attestation updates.""" - - def __init__(self, hs: "HomeServer"): - self.clock = hs.get_clock() - self.store = hs.get_datastores().main - self.assestations = hs.get_groups_attestation_signing() - self.transport_client = hs.get_federation_transport_client() - self.is_mine_id = hs.is_mine_id - self.attestations = hs.get_groups_attestation_signing() - - if not hs.config.worker.worker_app: - self._renew_attestations_loop = self.clock.looping_call( - self._start_renew_attestations, 30 * 60 * 1000 - ) - - async def on_renew_attestation( - self, group_id: str, user_id: str, content: JsonDict - ) -> JsonDict: - """When a remote updates an attestation""" - attestation = content["attestation"] - - if not self.is_mine_id(group_id) and not self.is_mine_id(user_id): - raise SynapseError(400, "Neither user not group are on this server") - - await self.attestations.verify_attestation( - attestation, user_id=user_id, group_id=group_id - ) - - await self.store.update_remote_attestion(group_id, user_id, attestation) - - return {} - - def _start_renew_attestations(self) -> "Deferred[None]": - return run_as_background_process("renew_attestations", self._renew_attestations) - - async def _renew_attestations(self) -> None: - """Called periodically to check if we need to update any of our attestations""" - - now = self.clock.time_msec() - - rows = await self.store.get_attestations_need_renewals( - now + UPDATE_ATTESTATION_TIME_MS - ) - - async def _renew_attestation(group_user: Tuple[str, str]) -> None: - group_id, user_id = group_user - try: - if not self.is_mine_id(group_id): - destination = get_domain_from_id(group_id) - elif not self.is_mine_id(user_id): - destination = get_domain_from_id(user_id) - else: - logger.warning( - "Incorrectly trying to do attestations for user: %r in %r", - user_id, - group_id, - ) - await self.store.remove_attestation_renewal(group_id, user_id) - return - - attestation = self.attestations.create_attestation(group_id, user_id) - - await self.transport_client.renew_group_attestation( - destination, group_id, user_id, content={"attestation": attestation} - ) - - await self.store.update_attestation_renewal( - group_id, user_id, attestation - ) - except (RequestSendFailed, HttpResponseException) as e: - logger.warning( - "Failed to renew attestation of %r in %r: %s", user_id, group_id, e - ) - except Exception: - logger.exception( - "Error renewing attestation of %r in %r", user_id, group_id - ) - - for row in rows: - await _renew_attestation((row["group_id"], row["user_id"])) diff --git a/synapse/groups/groups_server.py b/synapse/groups/groups_server.py deleted file mode 100644 index dfd24af695ab..000000000000 --- a/synapse/groups/groups_server.py +++ /dev/null @@ -1,1019 +0,0 @@ -# Copyright 2017 Vector Creations Ltd -# Copyright 2018 New Vector Ltd -# Copyright 2019 Michael Telatynski <7t3chguy@gmail.com> -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -from typing import TYPE_CHECKING, Optional - -from synapse.api.errors import Codes, SynapseError -from synapse.handlers.groups_local import GroupsLocalHandler -from synapse.handlers.profile import MAX_AVATAR_URL_LEN, MAX_DISPLAYNAME_LEN -from synapse.types import GroupID, JsonDict, RoomID, UserID, get_domain_from_id -from synapse.util.async_helpers import concurrently_execute - -if TYPE_CHECKING: - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - - -# TODO: Allow users to "knock" or simply join depending on rules -# TODO: Federation admin APIs -# TODO: is_privileged flag to users and is_public to users and rooms -# TODO: Audit log for admins (profile updates, membership changes, users who tried -# to join but were rejected, etc) -# TODO: Flairs - - -# Note that the maximum lengths are somewhat arbitrary. -MAX_SHORT_DESC_LEN = 1000 -MAX_LONG_DESC_LEN = 10000 - - -class GroupsServerWorkerHandler: - def __init__(self, hs: "HomeServer"): - self.hs = hs - self.store = hs.get_datastores().main - self.room_list_handler = hs.get_room_list_handler() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.keyring = hs.get_keyring() - self.is_mine_id = hs.is_mine_id - self.signing_key = hs.signing_key - self.server_name = hs.hostname - self.attestations = hs.get_groups_attestation_signing() - self.transport_client = hs.get_federation_transport_client() - self.profile_handler = hs.get_profile_handler() - - async def check_group_is_ours( - self, - group_id: str, - requester_user_id: str, - and_exists: bool = False, - and_is_admin: Optional[str] = None, - ) -> Optional[dict]: - """Check that the group is ours, and optionally if it exists. - - If group does exist then return group. - - Args: - group_id: The group ID to check. - requester_user_id: The user ID of the requester. - and_exists: whether to also check if group exists - and_is_admin: whether to also check if given str is a user_id - that is an admin - """ - if not self.is_mine_id(group_id): - raise SynapseError(400, "Group not on this server") - - group = await self.store.get_group(group_id) - if and_exists and not group: - raise SynapseError(404, "Unknown group") - - is_user_in_group = await self.store.is_user_in_group( - requester_user_id, group_id - ) - if group and not is_user_in_group and not group["is_public"]: - raise SynapseError(404, "Unknown group") - - if and_is_admin: - is_admin = await self.store.is_user_admin_in_group(group_id, and_is_admin) - if not is_admin: - raise SynapseError(403, "User is not admin in group") - - return group - - async def get_group_summary( - self, group_id: str, requester_user_id: str - ) -> JsonDict: - """Get the summary for a group as seen by requester_user_id. - - The group summary consists of the profile of the room, and a curated - list of users and rooms. These list *may* be organised by role/category. - The roles/categories are ordered, and so are the users/rooms within them. - - A user/room may appear in multiple roles/categories. - """ - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - is_user_in_group = await self.store.is_user_in_group( - requester_user_id, group_id - ) - - profile = await self.get_group_profile(group_id, requester_user_id) - - users, roles = await self.store.get_users_for_summary_by_role( - group_id, include_private=is_user_in_group - ) - - # TODO: Add profiles to users - - rooms, categories = await self.store.get_rooms_for_summary_by_category( - group_id, include_private=is_user_in_group - ) - - for room_entry in rooms: - room_id = room_entry["room_id"] - joined_users = await self.store.get_users_in_room(room_id) - entry = await self.room_list_handler.generate_room_entry( - room_id, len(joined_users), with_alias=False, allow_private=True - ) - if entry is None: - continue - entry = dict(entry) # so we don't change what's cached - entry.pop("room_id", None) - - room_entry["profile"] = entry - - rooms.sort(key=lambda e: e.get("order", 0)) - - for user in users: - user_id = user["user_id"] - - if not self.is_mine_id(requester_user_id): - attestation = await self.store.get_remote_attestation(group_id, user_id) - if not attestation: - continue - - user["attestation"] = attestation - else: - user["attestation"] = self.attestations.create_attestation( - group_id, user_id - ) - - user_profile = await self.profile_handler.get_profile_from_cache(user_id) - user.update(user_profile) - - users.sort(key=lambda e: e.get("order", 0)) - - membership_info = await self.store.get_users_membership_info_in_group( - group_id, requester_user_id - ) - - return { - "profile": profile, - "users_section": { - "users": users, - "roles": roles, - "total_user_count_estimate": 0, # TODO - }, - "rooms_section": { - "rooms": rooms, - "categories": categories, - "total_room_count_estimate": 0, # TODO - }, - "user": membership_info, - } - - async def get_group_categories( - self, group_id: str, requester_user_id: str - ) -> JsonDict: - """Get all categories in a group (as seen by user)""" - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - categories = await self.store.get_group_categories(group_id=group_id) - return {"categories": categories} - - async def get_group_category( - self, group_id: str, requester_user_id: str, category_id: str - ) -> JsonDict: - """Get a specific category in a group (as seen by user)""" - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - return await self.store.get_group_category( - group_id=group_id, category_id=category_id - ) - - async def get_group_roles(self, group_id: str, requester_user_id: str) -> JsonDict: - """Get all roles in a group (as seen by user)""" - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - roles = await self.store.get_group_roles(group_id=group_id) - return {"roles": roles} - - async def get_group_role( - self, group_id: str, requester_user_id: str, role_id: str - ) -> JsonDict: - """Get a specific role in a group (as seen by user)""" - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - return await self.store.get_group_role(group_id=group_id, role_id=role_id) - - async def get_group_profile( - self, group_id: str, requester_user_id: str - ) -> JsonDict: - """Get the group profile as seen by requester_user_id""" - - await self.check_group_is_ours(group_id, requester_user_id) - - group = await self.store.get_group(group_id) - - if group: - cols = [ - "name", - "short_description", - "long_description", - "avatar_url", - "is_public", - ] - group_description = {key: group[key] for key in cols} - group_description["is_openly_joinable"] = group["join_policy"] == "open" - - return group_description - else: - raise SynapseError(404, "Unknown group") - - async def get_users_in_group( - self, group_id: str, requester_user_id: str - ) -> JsonDict: - """Get the users in group as seen by requester_user_id. - - The ordering is arbitrary at the moment - """ - - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - is_user_in_group = await self.store.is_user_in_group( - requester_user_id, group_id - ) - - user_results = await self.store.get_users_in_group( - group_id, include_private=is_user_in_group - ) - - chunk = [] - for user_result in user_results: - g_user_id = user_result["user_id"] - is_public = user_result["is_public"] - is_privileged = user_result["is_admin"] - - entry = {"user_id": g_user_id} - - profile = await self.profile_handler.get_profile_from_cache(g_user_id) - entry.update(profile) - - entry["is_public"] = bool(is_public) - entry["is_privileged"] = bool(is_privileged) - - if not self.is_mine_id(g_user_id): - attestation = await self.store.get_remote_attestation( - group_id, g_user_id - ) - if not attestation: - continue - - entry["attestation"] = attestation - else: - entry["attestation"] = self.attestations.create_attestation( - group_id, g_user_id - ) - - chunk.append(entry) - - # TODO: If admin add lists of users whose attestations have timed out - - return {"chunk": chunk, "total_user_count_estimate": len(user_results)} - - async def get_invited_users_in_group( - self, group_id: str, requester_user_id: str - ) -> JsonDict: - """Get the users that have been invited to a group as seen by requester_user_id. - - The ordering is arbitrary at the moment - """ - - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - is_user_in_group = await self.store.is_user_in_group( - requester_user_id, group_id - ) - - if not is_user_in_group: - raise SynapseError(403, "User not in group") - - invited_users = await self.store.get_invited_users_in_group(group_id) - - user_profiles = [] - - for user_id in invited_users: - user_profile = {"user_id": user_id} - try: - profile = await self.profile_handler.get_profile_from_cache(user_id) - user_profile.update(profile) - except Exception as e: - logger.warning("Error getting profile for %s: %s", user_id, e) - user_profiles.append(user_profile) - - return {"chunk": user_profiles, "total_user_count_estimate": len(invited_users)} - - async def get_rooms_in_group( - self, group_id: str, requester_user_id: str - ) -> JsonDict: - """Get the rooms in group as seen by requester_user_id - - This returns rooms in order of decreasing number of joined users - """ - - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - is_user_in_group = await self.store.is_user_in_group( - requester_user_id, group_id - ) - - # Note! room_results["is_public"] is about whether the room is considered - # public from the group's point of view. (i.e. whether non-group members - # should be able to see the room is in the group). - # This is not the same as whether the room itself is public (in the sense - # of being visible in the room directory). - # As such, room_results["is_public"] itself is not sufficient to determine - # whether any given user is permitted to see the room's metadata. - room_results = await self.store.get_rooms_in_group( - group_id, include_private=is_user_in_group - ) - - chunk = [] - for room_result in room_results: - room_id = room_result["room_id"] - - joined_users = await self.store.get_users_in_room(room_id) - - # check the user is actually allowed to see the room before showing it to them - allow_private = requester_user_id in joined_users - - entry = await self.room_list_handler.generate_room_entry( - room_id, - len(joined_users), - with_alias=False, - allow_private=allow_private, - ) - - if not entry: - continue - - entry["is_public"] = bool(room_result["is_public"]) - - chunk.append(entry) - - chunk.sort(key=lambda e: -e["num_joined_members"]) - - return {"chunk": chunk, "total_room_count_estimate": len(chunk)} - - -class GroupsServerHandler(GroupsServerWorkerHandler): - def __init__(self, hs: "HomeServer"): - super().__init__(hs) - - # Ensure attestations get renewed - hs.get_groups_attestation_renewer() - - async def update_group_summary_room( - self, - group_id: str, - requester_user_id: str, - room_id: str, - category_id: str, - content: JsonDict, - ) -> JsonDict: - """Add/update a room to the group summary""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - RoomID.from_string(room_id) # Ensure valid room id - - order = content.get("order", None) - - is_public = _parse_visibility_from_contents(content) - - await self.store.add_room_to_summary( - group_id=group_id, - room_id=room_id, - category_id=category_id, - order=order, - is_public=is_public, - ) - - return {} - - async def delete_group_summary_room( - self, group_id: str, requester_user_id: str, room_id: str, category_id: str - ) -> JsonDict: - """Remove a room from the summary""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - await self.store.remove_room_from_summary( - group_id=group_id, room_id=room_id, category_id=category_id - ) - - return {} - - async def set_group_join_policy( - self, group_id: str, requester_user_id: str, content: JsonDict - ) -> JsonDict: - """Sets the group join policy. - - Currently supported policies are: - - "invite": an invite must be received and accepted in order to join. - - "open": anyone can join. - """ - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - join_policy = _parse_join_policy_from_contents(content) - if join_policy is None: - raise SynapseError(400, "No value specified for 'm.join_policy'") - - await self.store.set_group_join_policy(group_id, join_policy=join_policy) - - return {} - - async def update_group_category( - self, group_id: str, requester_user_id: str, category_id: str, content: JsonDict - ) -> JsonDict: - """Add/Update a group category""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - is_public = _parse_visibility_from_contents(content) - profile = content.get("profile") - - await self.store.upsert_group_category( - group_id=group_id, - category_id=category_id, - is_public=is_public, - profile=profile, - ) - - return {} - - async def delete_group_category( - self, group_id: str, requester_user_id: str, category_id: str - ) -> JsonDict: - """Delete a group category""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - await self.store.remove_group_category( - group_id=group_id, category_id=category_id - ) - - return {} - - async def update_group_role( - self, group_id: str, requester_user_id: str, role_id: str, content: JsonDict - ) -> JsonDict: - """Add/update a role in a group""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - is_public = _parse_visibility_from_contents(content) - - profile = content.get("profile") - - await self.store.upsert_group_role( - group_id=group_id, role_id=role_id, is_public=is_public, profile=profile - ) - - return {} - - async def delete_group_role( - self, group_id: str, requester_user_id: str, role_id: str - ) -> JsonDict: - """Remove role from group""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - await self.store.remove_group_role(group_id=group_id, role_id=role_id) - - return {} - - async def update_group_summary_user( - self, - group_id: str, - requester_user_id: str, - user_id: str, - role_id: str, - content: JsonDict, - ) -> JsonDict: - """Add/update a users entry in the group summary""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - order = content.get("order", None) - - is_public = _parse_visibility_from_contents(content) - - await self.store.add_user_to_summary( - group_id=group_id, - user_id=user_id, - role_id=role_id, - order=order, - is_public=is_public, - ) - - return {} - - async def delete_group_summary_user( - self, group_id: str, requester_user_id: str, user_id: str, role_id: str - ) -> JsonDict: - """Remove a user from the group summary""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - await self.store.remove_user_from_summary( - group_id=group_id, user_id=user_id, role_id=role_id - ) - - return {} - - async def update_group_profile( - self, group_id: str, requester_user_id: str, content: JsonDict - ) -> None: - """Update the group profile""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - profile = {} - for keyname, max_length in ( - ("name", MAX_DISPLAYNAME_LEN), - ("avatar_url", MAX_AVATAR_URL_LEN), - ("short_description", MAX_SHORT_DESC_LEN), - ("long_description", MAX_LONG_DESC_LEN), - ): - if keyname in content: - value = content[keyname] - if not isinstance(value, str): - raise SynapseError( - 400, - "%r value is not a string" % (keyname,), - errcode=Codes.INVALID_PARAM, - ) - if len(value) > max_length: - raise SynapseError( - 400, - "Invalid %s parameter" % (keyname,), - errcode=Codes.INVALID_PARAM, - ) - profile[keyname] = value - - await self.store.update_group_profile(group_id, profile) - - async def add_room_to_group( - self, group_id: str, requester_user_id: str, room_id: str, content: JsonDict - ) -> JsonDict: - """Add room to group""" - RoomID.from_string(room_id) # Ensure valid room id - - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - is_public = _parse_visibility_from_contents(content) - - await self.store.add_room_to_group(group_id, room_id, is_public=is_public) - - return {} - - async def update_room_in_group( - self, - group_id: str, - requester_user_id: str, - room_id: str, - config_key: str, - content: JsonDict, - ) -> JsonDict: - """Update room in group""" - RoomID.from_string(room_id) # Ensure valid room id - - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - if config_key == "m.visibility": - is_public = _parse_visibility_dict(content) - - await self.store.update_room_in_group_visibility( - group_id, room_id, is_public=is_public - ) - else: - raise SynapseError(400, "Unknown config option") - - return {} - - async def remove_room_from_group( - self, group_id: str, requester_user_id: str, room_id: str - ) -> JsonDict: - """Remove room from group""" - await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - - await self.store.remove_room_from_group(group_id, room_id) - - return {} - - async def invite_to_group( - self, group_id: str, user_id: str, requester_user_id: str, content: JsonDict - ) -> JsonDict: - """Invite user to group""" - - group = await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True, and_is_admin=requester_user_id - ) - if not group: - raise SynapseError(400, "Group does not exist", errcode=Codes.BAD_STATE) - - # TODO: Check if user knocked - - invited_users = await self.store.get_invited_users_in_group(group_id) - if user_id in invited_users: - raise SynapseError( - 400, "User already invited to group", errcode=Codes.BAD_STATE - ) - - user_results = await self.store.get_users_in_group( - group_id, include_private=True - ) - if user_id in (user_result["user_id"] for user_result in user_results): - raise SynapseError(400, "User already in group") - - content = { - "profile": {"name": group["name"], "avatar_url": group["avatar_url"]}, - "inviter": requester_user_id, - } - - if self.hs.is_mine_id(user_id): - groups_local = self.hs.get_groups_local_handler() - assert isinstance( - groups_local, GroupsLocalHandler - ), "Workers cannot invites users to groups." - res = await groups_local.on_invite(group_id, user_id, content) - local_attestation = None - else: - local_attestation = self.attestations.create_attestation(group_id, user_id) - content.update({"attestation": local_attestation}) - - res = await self.transport_client.invite_to_group_notification( - get_domain_from_id(user_id), group_id, user_id, content - ) - - user_profile = res.get("user_profile", {}) - await self.store.add_remote_profile_cache( - user_id, - displayname=user_profile.get("displayname"), - avatar_url=user_profile.get("avatar_url"), - ) - - if res["state"] == "join": - if not self.hs.is_mine_id(user_id): - remote_attestation = res["attestation"] - - await self.attestations.verify_attestation( - remote_attestation, user_id=user_id, group_id=group_id - ) - else: - remote_attestation = None - - await self.store.add_user_to_group( - group_id, - user_id, - is_admin=False, - is_public=False, # TODO - local_attestation=local_attestation, - remote_attestation=remote_attestation, - ) - return {"state": "join"} - elif res["state"] == "invite": - await self.store.add_group_invite(group_id, user_id) - return {"state": "invite"} - elif res["state"] == "reject": - return {"state": "reject"} - else: - raise SynapseError(502, "Unknown state returned by HS") - - async def _add_user( - self, group_id: str, user_id: str, content: JsonDict - ) -> Optional[JsonDict]: - """Add a user to a group based on a content dict. - - See accept_invite, join_group. - """ - if not self.hs.is_mine_id(user_id): - local_attestation: Optional[ - JsonDict - ] = self.attestations.create_attestation(group_id, user_id) - - remote_attestation = content["attestation"] - - await self.attestations.verify_attestation( - remote_attestation, user_id=user_id, group_id=group_id - ) - else: - local_attestation = None - remote_attestation = None - - is_public = _parse_visibility_from_contents(content) - - await self.store.add_user_to_group( - group_id, - user_id, - is_admin=False, - is_public=is_public, - local_attestation=local_attestation, - remote_attestation=remote_attestation, - ) - - return local_attestation - - async def accept_invite( - self, group_id: str, requester_user_id: str, content: JsonDict - ) -> JsonDict: - """User tries to accept an invite to the group. - - This is different from them asking to join, and so should error if no - invite exists (and they're not a member of the group) - """ - - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - is_invited = await self.store.is_user_invited_to_local_group( - group_id, requester_user_id - ) - if not is_invited: - raise SynapseError(403, "User not invited to group") - - local_attestation = await self._add_user(group_id, requester_user_id, content) - - return {"state": "join", "attestation": local_attestation} - - async def join_group( - self, group_id: str, requester_user_id: str, content: JsonDict - ) -> JsonDict: - """User tries to join the group. - - This will error if the group requires an invite/knock to join - """ - - group_info = await self.check_group_is_ours( - group_id, requester_user_id, and_exists=True - ) - if not group_info: - raise SynapseError(404, "Group does not exist", errcode=Codes.NOT_FOUND) - if group_info["join_policy"] != "open": - raise SynapseError(403, "Group is not publicly joinable") - - local_attestation = await self._add_user(group_id, requester_user_id, content) - - return {"state": "join", "attestation": local_attestation} - - async def remove_user_from_group( - self, group_id: str, user_id: str, requester_user_id: str, content: JsonDict - ) -> JsonDict: - """Remove a user from the group; either a user is leaving or an admin - kicked them. - """ - - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - is_kick = False - if requester_user_id != user_id: - is_admin = await self.store.is_user_admin_in_group( - group_id, requester_user_id - ) - if not is_admin: - raise SynapseError(403, "User is not admin in group") - - is_kick = True - - await self.store.remove_user_from_group(group_id, user_id) - - if is_kick: - if self.hs.is_mine_id(user_id): - groups_local = self.hs.get_groups_local_handler() - assert isinstance( - groups_local, GroupsLocalHandler - ), "Workers cannot remove users from groups." - await groups_local.user_removed_from_group(group_id, user_id, {}) - else: - await self.transport_client.remove_user_from_group_notification( - get_domain_from_id(user_id), group_id, user_id, {} - ) - - if not self.hs.is_mine_id(user_id): - await self.store.maybe_delete_remote_profile_cache(user_id) - - # Delete group if the last user has left - users = await self.store.get_users_in_group(group_id, include_private=True) - if not users: - await self.store.delete_group(group_id) - - return {} - - async def create_group( - self, group_id: str, requester_user_id: str, content: JsonDict - ) -> JsonDict: - logger.info("Attempting to create group with ID: %r", group_id) - - # parsing the id into a GroupID validates it. - group_id_obj = GroupID.from_string(group_id) - - group = await self.check_group_is_ours(group_id, requester_user_id) - if group: - raise SynapseError(400, "Group already exists") - - is_admin = await self.auth.is_server_admin( - UserID.from_string(requester_user_id) - ) - if not is_admin: - if not self.hs.config.groups.enable_group_creation: - raise SynapseError( - 403, "Only a server admin can create groups on this server" - ) - localpart = group_id_obj.localpart - if not localpart.startswith(self.hs.config.groups.group_creation_prefix): - raise SynapseError( - 400, - "Can only create groups with prefix %r on this server" - % (self.hs.config.groups.group_creation_prefix,), - ) - - profile = content.get("profile", {}) - name = profile.get("name") - avatar_url = profile.get("avatar_url") - short_description = profile.get("short_description") - long_description = profile.get("long_description") - user_profile = content.get("user_profile", {}) - - await self.store.create_group( - group_id, - requester_user_id, - name=name, - avatar_url=avatar_url, - short_description=short_description, - long_description=long_description, - ) - - if not self.hs.is_mine_id(requester_user_id): - remote_attestation = content["attestation"] - - await self.attestations.verify_attestation( - remote_attestation, user_id=requester_user_id, group_id=group_id - ) - - local_attestation: Optional[ - JsonDict - ] = self.attestations.create_attestation(group_id, requester_user_id) - else: - local_attestation = None - remote_attestation = None - - await self.store.add_user_to_group( - group_id, - requester_user_id, - is_admin=True, - is_public=True, # TODO - local_attestation=local_attestation, - remote_attestation=remote_attestation, - ) - - if not self.hs.is_mine_id(requester_user_id): - await self.store.add_remote_profile_cache( - requester_user_id, - displayname=user_profile.get("displayname"), - avatar_url=user_profile.get("avatar_url"), - ) - - return {"group_id": group_id} - - async def delete_group(self, group_id: str, requester_user_id: str) -> None: - """Deletes a group, kicking out all current members. - - Only group admins or server admins can call this request - - Args: - group_id: The group ID to delete. - requester_user_id: The user requesting to delete the group. - """ - - await self.check_group_is_ours(group_id, requester_user_id, and_exists=True) - - # Only server admins or group admins can delete groups. - - is_admin = await self.store.is_user_admin_in_group(group_id, requester_user_id) - - if not is_admin: - is_admin = await self.auth.is_server_admin( - UserID.from_string(requester_user_id) - ) - - if not is_admin: - raise SynapseError(403, "User is not an admin") - - # Before deleting the group lets kick everyone out of it - users = await self.store.get_users_in_group(group_id, include_private=True) - - async def _kick_user_from_group(user_id: str) -> None: - if self.hs.is_mine_id(user_id): - groups_local = self.hs.get_groups_local_handler() - assert isinstance( - groups_local, GroupsLocalHandler - ), "Workers cannot kick users from groups." - await groups_local.user_removed_from_group(group_id, user_id, {}) - else: - await self.transport_client.remove_user_from_group_notification( - get_domain_from_id(user_id), group_id, user_id, {} - ) - await self.store.maybe_delete_remote_profile_cache(user_id) - - # We kick users out in the order of: - # 1. Non-admins - # 2. Other admins - # 3. The requester - # - # This is so that if the deletion fails for some reason other admins or - # the requester still has auth to retry. - non_admins = [] - admins = [] - for u in users: - if u["user_id"] == requester_user_id: - continue - if u["is_admin"]: - admins.append(u["user_id"]) - else: - non_admins.append(u["user_id"]) - - await concurrently_execute(_kick_user_from_group, non_admins, 10) - await concurrently_execute(_kick_user_from_group, admins, 10) - await _kick_user_from_group(requester_user_id) - - await self.store.delete_group(group_id) - - -def _parse_join_policy_from_contents(content: JsonDict) -> Optional[str]: - """Given a content for a request, return the specified join policy or None""" - - join_policy_dict = content.get("m.join_policy") - if join_policy_dict: - return _parse_join_policy_dict(join_policy_dict) - else: - return None - - -def _parse_join_policy_dict(join_policy_dict: JsonDict) -> str: - """Given a dict for the "m.join_policy" config return the join policy specified""" - join_policy_type = join_policy_dict.get("type") - if not join_policy_type: - return "invite" - - if join_policy_type not in ("invite", "open"): - raise SynapseError(400, "Synapse only supports 'invite'/'open' join rule") - return join_policy_type - - -def _parse_visibility_from_contents(content: JsonDict) -> bool: - """Given a content for a request parse out whether the entity should be - public or not - """ - - visibility = content.get("m.visibility") - if visibility: - return _parse_visibility_dict(visibility) - else: - is_public = True - - return is_public - - -def _parse_visibility_dict(visibility: JsonDict) -> bool: - """Given a dict for the "m.visibility" config return if the entity should - be public or not - """ - vis_type = visibility.get("type") - if not vis_type: - return True - - if vis_type not in ("public", "private"): - raise SynapseError(400, "Synapse only supports 'public'/'private' visibility") - return vis_type == "public" diff --git a/synapse/handlers/groups_local.py b/synapse/handlers/groups_local.py deleted file mode 100644 index e7a399787beb..000000000000 --- a/synapse/handlers/groups_local.py +++ /dev/null @@ -1,503 +0,0 @@ -# Copyright 2017 Vector Creations Ltd -# Copyright 2018 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -from typing import TYPE_CHECKING, Any, Awaitable, Callable, Dict, Iterable, List, Set - -from synapse.api.errors import HttpResponseException, RequestSendFailed, SynapseError -from synapse.types import GroupID, JsonDict, get_domain_from_id - -if TYPE_CHECKING: - from synapse.server import HomeServer - -logger = logging.getLogger(__name__) - - -def _create_rerouter(func_name: str) -> Callable[..., Awaitable[JsonDict]]: - """Returns an async function that looks at the group id and calls the function - on federation or the local group server if the group is local - """ - - async def f( - self: "GroupsLocalWorkerHandler", group_id: str, *args: Any, **kwargs: Any - ) -> JsonDict: - if not GroupID.is_valid(group_id): - raise SynapseError(400, "%s is not a legal group ID" % (group_id,)) - - if self.is_mine_id(group_id): - return await getattr(self.groups_server_handler, func_name)( - group_id, *args, **kwargs - ) - else: - destination = get_domain_from_id(group_id) - - try: - return await getattr(self.transport_client, func_name)( - destination, group_id, *args, **kwargs - ) - except HttpResponseException as e: - # Capture errors returned by the remote homeserver and - # re-throw specific errors as SynapseErrors. This is so - # when the remote end responds with things like 403 Not - # In Group, we can communicate that to the client instead - # of a 500. - raise e.to_synapse_error() - except RequestSendFailed: - raise SynapseError(502, "Failed to contact group server") - - return f - - -class GroupsLocalWorkerHandler: - def __init__(self, hs: "HomeServer"): - self.hs = hs - self.store = hs.get_datastores().main - self.room_list_handler = hs.get_room_list_handler() - self.groups_server_handler = hs.get_groups_server_handler() - self.transport_client = hs.get_federation_transport_client() - self.auth = hs.get_auth() - self.clock = hs.get_clock() - self.keyring = hs.get_keyring() - self.is_mine_id = hs.is_mine_id - self.signing_key = hs.signing_key - self.server_name = hs.hostname - self.notifier = hs.get_notifier() - self.attestations = hs.get_groups_attestation_signing() - - self.profile_handler = hs.get_profile_handler() - - # The following functions merely route the query to the local groups server - # or federation depending on if the group is local or remote - - get_group_profile = _create_rerouter("get_group_profile") - get_rooms_in_group = _create_rerouter("get_rooms_in_group") - get_invited_users_in_group = _create_rerouter("get_invited_users_in_group") - get_group_category = _create_rerouter("get_group_category") - get_group_categories = _create_rerouter("get_group_categories") - get_group_role = _create_rerouter("get_group_role") - get_group_roles = _create_rerouter("get_group_roles") - - async def get_group_summary( - self, group_id: str, requester_user_id: str - ) -> JsonDict: - """Get the group summary for a group. - - If the group is remote we check that the users have valid attestations. - """ - if self.is_mine_id(group_id): - res = await self.groups_server_handler.get_group_summary( - group_id, requester_user_id - ) - else: - try: - res = await self.transport_client.get_group_summary( - get_domain_from_id(group_id), group_id, requester_user_id - ) - except HttpResponseException as e: - raise e.to_synapse_error() - except RequestSendFailed: - raise SynapseError(502, "Failed to contact group server") - - group_server_name = get_domain_from_id(group_id) - - # Loop through the users and validate the attestations. - chunk = res["users_section"]["users"] - valid_users = [] - for entry in chunk: - g_user_id = entry["user_id"] - attestation = entry.pop("attestation", {}) - try: - if get_domain_from_id(g_user_id) != group_server_name: - await self.attestations.verify_attestation( - attestation, - group_id=group_id, - user_id=g_user_id, - server_name=get_domain_from_id(g_user_id), - ) - valid_users.append(entry) - except Exception as e: - logger.info("Failed to verify user is in group: %s", e) - - res["users_section"]["users"] = valid_users - - res["users_section"]["users"].sort(key=lambda e: e.get("order", 0)) - res["rooms_section"]["rooms"].sort(key=lambda e: e.get("order", 0)) - - # Add `is_publicised` flag to indicate whether the user has publicised their - # membership of the group on their profile - result = await self.store.get_publicised_groups_for_user(requester_user_id) - is_publicised = group_id in result - - res.setdefault("user", {})["is_publicised"] = is_publicised - - return res - - async def get_users_in_group( - self, group_id: str, requester_user_id: str - ) -> JsonDict: - """Get users in a group""" - if self.is_mine_id(group_id): - return await self.groups_server_handler.get_users_in_group( - group_id, requester_user_id - ) - - group_server_name = get_domain_from_id(group_id) - - try: - res = await self.transport_client.get_users_in_group( - get_domain_from_id(group_id), group_id, requester_user_id - ) - except HttpResponseException as e: - raise e.to_synapse_error() - except RequestSendFailed: - raise SynapseError(502, "Failed to contact group server") - - chunk = res["chunk"] - valid_entries = [] - for entry in chunk: - g_user_id = entry["user_id"] - attestation = entry.pop("attestation", {}) - try: - if get_domain_from_id(g_user_id) != group_server_name: - await self.attestations.verify_attestation( - attestation, - group_id=group_id, - user_id=g_user_id, - server_name=get_domain_from_id(g_user_id), - ) - valid_entries.append(entry) - except Exception as e: - logger.info("Failed to verify user is in group: %s", e) - - res["chunk"] = valid_entries - - return res - - async def get_joined_groups(self, user_id: str) -> JsonDict: - group_ids = await self.store.get_joined_groups(user_id) - return {"groups": group_ids} - - async def get_publicised_groups_for_user(self, user_id: str) -> JsonDict: - if self.hs.is_mine_id(user_id): - result = await self.store.get_publicised_groups_for_user(user_id) - - # Check AS associated groups for this user - this depends on the - # RegExps in the AS registration file (under `users`) - for app_service in self.store.get_app_services(): - result.extend(app_service.get_groups_for_user(user_id)) - - return {"groups": result} - else: - try: - bulk_result = await self.transport_client.bulk_get_publicised_groups( - get_domain_from_id(user_id), [user_id] - ) - except HttpResponseException as e: - raise e.to_synapse_error() - except RequestSendFailed: - raise SynapseError(502, "Failed to contact group server") - - result = bulk_result.get("users", {}).get(user_id) - # TODO: Verify attestations - return {"groups": result} - - async def bulk_get_publicised_groups( - self, user_ids: Iterable[str], proxy: bool = True - ) -> JsonDict: - destinations: Dict[str, Set[str]] = {} - local_users = set() - - for user_id in user_ids: - if self.hs.is_mine_id(user_id): - local_users.add(user_id) - else: - destinations.setdefault(get_domain_from_id(user_id), set()).add(user_id) - - if not proxy and destinations: - raise SynapseError(400, "Some user_ids are not local") - - results = {} - failed_results: List[str] = [] - for destination, dest_user_ids in destinations.items(): - try: - r = await self.transport_client.bulk_get_publicised_groups( - destination, list(dest_user_ids) - ) - results.update(r["users"]) - except Exception: - failed_results.extend(dest_user_ids) - - for uid in local_users: - results[uid] = await self.store.get_publicised_groups_for_user(uid) - - # Check AS associated groups for this user - this depends on the - # RegExps in the AS registration file (under `users`) - for app_service in self.store.get_app_services(): - results[uid].extend(app_service.get_groups_for_user(uid)) - - return {"users": results} - - -class GroupsLocalHandler(GroupsLocalWorkerHandler): - def __init__(self, hs: "HomeServer"): - super().__init__(hs) - - # Ensure attestations get renewed - hs.get_groups_attestation_renewer() - - # The following functions merely route the query to the local groups server - # or federation depending on if the group is local or remote - - update_group_profile = _create_rerouter("update_group_profile") - - add_room_to_group = _create_rerouter("add_room_to_group") - update_room_in_group = _create_rerouter("update_room_in_group") - remove_room_from_group = _create_rerouter("remove_room_from_group") - - update_group_summary_room = _create_rerouter("update_group_summary_room") - delete_group_summary_room = _create_rerouter("delete_group_summary_room") - - update_group_category = _create_rerouter("update_group_category") - delete_group_category = _create_rerouter("delete_group_category") - - update_group_summary_user = _create_rerouter("update_group_summary_user") - delete_group_summary_user = _create_rerouter("delete_group_summary_user") - - update_group_role = _create_rerouter("update_group_role") - delete_group_role = _create_rerouter("delete_group_role") - - set_group_join_policy = _create_rerouter("set_group_join_policy") - - async def create_group( - self, group_id: str, user_id: str, content: JsonDict - ) -> JsonDict: - """Create a group""" - - logger.info("Asking to create group with ID: %r", group_id) - - if self.is_mine_id(group_id): - res = await self.groups_server_handler.create_group( - group_id, user_id, content - ) - local_attestation = None - remote_attestation = None - else: - raise SynapseError(400, "Unable to create remote groups") - - is_publicised = content.get("publicise", False) - token = await self.store.register_user_group_membership( - group_id, - user_id, - membership="join", - is_admin=True, - local_attestation=local_attestation, - remote_attestation=remote_attestation, - is_publicised=is_publicised, - ) - self.notifier.on_new_event("groups_key", token, users=[user_id]) - - return res - - async def join_group( - self, group_id: str, user_id: str, content: JsonDict - ) -> JsonDict: - """Request to join a group""" - if self.is_mine_id(group_id): - await self.groups_server_handler.join_group(group_id, user_id, content) - local_attestation = None - remote_attestation = None - else: - local_attestation = self.attestations.create_attestation(group_id, user_id) - content["attestation"] = local_attestation - - try: - res = await self.transport_client.join_group( - get_domain_from_id(group_id), group_id, user_id, content - ) - except HttpResponseException as e: - raise e.to_synapse_error() - except RequestSendFailed: - raise SynapseError(502, "Failed to contact group server") - - remote_attestation = res["attestation"] - - await self.attestations.verify_attestation( - remote_attestation, - group_id=group_id, - user_id=user_id, - server_name=get_domain_from_id(group_id), - ) - - # TODO: Check that the group is public and we're being added publicly - is_publicised = content.get("publicise", False) - - token = await self.store.register_user_group_membership( - group_id, - user_id, - membership="join", - is_admin=False, - local_attestation=local_attestation, - remote_attestation=remote_attestation, - is_publicised=is_publicised, - ) - self.notifier.on_new_event("groups_key", token, users=[user_id]) - - return {} - - async def accept_invite( - self, group_id: str, user_id: str, content: JsonDict - ) -> JsonDict: - """Accept an invite to a group""" - if self.is_mine_id(group_id): - await self.groups_server_handler.accept_invite(group_id, user_id, content) - local_attestation = None - remote_attestation = None - else: - local_attestation = self.attestations.create_attestation(group_id, user_id) - content["attestation"] = local_attestation - - try: - res = await self.transport_client.accept_group_invite( - get_domain_from_id(group_id), group_id, user_id, content - ) - except HttpResponseException as e: - raise e.to_synapse_error() - except RequestSendFailed: - raise SynapseError(502, "Failed to contact group server") - - remote_attestation = res["attestation"] - - await self.attestations.verify_attestation( - remote_attestation, - group_id=group_id, - user_id=user_id, - server_name=get_domain_from_id(group_id), - ) - - # TODO: Check that the group is public and we're being added publicly - is_publicised = content.get("publicise", False) - - token = await self.store.register_user_group_membership( - group_id, - user_id, - membership="join", - is_admin=False, - local_attestation=local_attestation, - remote_attestation=remote_attestation, - is_publicised=is_publicised, - ) - self.notifier.on_new_event("groups_key", token, users=[user_id]) - - return {} - - async def invite( - self, group_id: str, user_id: str, requester_user_id: str, config: JsonDict - ) -> JsonDict: - """Invite a user to a group""" - content = {"requester_user_id": requester_user_id, "config": config} - if self.is_mine_id(group_id): - res = await self.groups_server_handler.invite_to_group( - group_id, user_id, requester_user_id, content - ) - else: - try: - res = await self.transport_client.invite_to_group( - get_domain_from_id(group_id), - group_id, - user_id, - requester_user_id, - content, - ) - except HttpResponseException as e: - raise e.to_synapse_error() - except RequestSendFailed: - raise SynapseError(502, "Failed to contact group server") - - return res - - async def on_invite( - self, group_id: str, user_id: str, content: JsonDict - ) -> JsonDict: - """One of our users were invited to a group""" - # TODO: Support auto join and rejection - - if not self.is_mine_id(user_id): - raise SynapseError(400, "User not on this server") - - local_profile = {} - if "profile" in content: - if "name" in content["profile"]: - local_profile["name"] = content["profile"]["name"] - if "avatar_url" in content["profile"]: - local_profile["avatar_url"] = content["profile"]["avatar_url"] - - token = await self.store.register_user_group_membership( - group_id, - user_id, - membership="invite", - content={"profile": local_profile, "inviter": content["inviter"]}, - ) - self.notifier.on_new_event("groups_key", token, users=[user_id]) - try: - user_profile = await self.profile_handler.get_profile(user_id) - except Exception as e: - logger.warning("No profile for user %s: %s", user_id, e) - user_profile = {} - - return {"state": "invite", "user_profile": user_profile} - - async def remove_user_from_group( - self, group_id: str, user_id: str, requester_user_id: str, content: JsonDict - ) -> JsonDict: - """Remove a user from a group""" - if user_id == requester_user_id: - token = await self.store.register_user_group_membership( - group_id, user_id, membership="leave" - ) - self.notifier.on_new_event("groups_key", token, users=[user_id]) - - # TODO: Should probably remember that we tried to leave so that we can - # retry if the group server is currently down. - - if self.is_mine_id(group_id): - res = await self.groups_server_handler.remove_user_from_group( - group_id, user_id, requester_user_id, content - ) - else: - content["requester_user_id"] = requester_user_id - try: - res = await self.transport_client.remove_user_from_group( - get_domain_from_id(group_id), - group_id, - requester_user_id, - user_id, - content, - ) - except HttpResponseException as e: - raise e.to_synapse_error() - except RequestSendFailed: - raise SynapseError(502, "Failed to contact group server") - - return res - - async def user_removed_from_group( - self, group_id: str, user_id: str, content: JsonDict - ) -> None: - """One of our users was removed/kicked from a group""" - # TODO: Check if user in group - token = await self.store.register_user_group_membership( - group_id, user_id, membership="leave" - ) - self.notifier.on_new_event("groups_key", token, users=[user_id]) diff --git a/synapse/server.py b/synapse/server.py index ee60cce8ebce..3fd23aaf52cd 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -21,17 +21,7 @@ import abc import functools import logging -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - List, - Optional, - TypeVar, - Union, - cast, -) +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, TypeVar, cast from twisted.internet.interfaces import IOpenSSLContextFactory from twisted.internet.tcp import Port @@ -60,8 +50,6 @@ from synapse.federation.send_queue import FederationRemoteSendQueue from synapse.federation.sender import AbstractFederationSender, FederationSender from synapse.federation.transport.client import TransportLayerClient -from synapse.groups.attestations import GroupAttestationSigning, GroupAttestionRenewer -from synapse.groups.groups_server import GroupsServerHandler, GroupsServerWorkerHandler from synapse.handlers.account import AccountHandler from synapse.handlers.account_data import AccountDataHandler from synapse.handlers.account_validity import AccountValidityHandler @@ -79,7 +67,6 @@ from synapse.handlers.events import EventHandler, EventStreamHandler from synapse.handlers.federation import FederationHandler from synapse.handlers.federation_event import FederationEventHandler -from synapse.handlers.groups_local import GroupsLocalHandler, GroupsLocalWorkerHandler from synapse.handlers.identity import IdentityHandler from synapse.handlers.initial_sync import InitialSyncHandler from synapse.handlers.message import EventCreationHandler, MessageHandler @@ -651,30 +638,6 @@ def get_bulk_push_rule_evaluator(self) -> BulkPushRuleEvaluator: def get_user_directory_handler(self) -> UserDirectoryHandler: return UserDirectoryHandler(self) - @cache_in_self - def get_groups_local_handler( - self, - ) -> Union[GroupsLocalWorkerHandler, GroupsLocalHandler]: - if self.config.worker.worker_app: - return GroupsLocalWorkerHandler(self) - else: - return GroupsLocalHandler(self) - - @cache_in_self - def get_groups_server_handler(self): - if self.config.worker.worker_app: - return GroupsServerWorkerHandler(self) - else: - return GroupsServerHandler(self) - - @cache_in_self - def get_groups_attestation_signing(self) -> GroupAttestationSigning: - return GroupAttestationSigning(self) - - @cache_in_self - def get_groups_attestation_renewer(self) -> GroupAttestionRenewer: - return GroupAttestionRenewer(self) - @cache_in_self def get_stats_handler(self) -> StatsHandler: return StatsHandler(self) diff --git a/synapse/types.py b/synapse/types.py index 6f7128ddd604..091cc611ab61 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -320,29 +320,6 @@ class EventID(DomainSpecificString): SIGIL = "$" -@attr.s(slots=True, frozen=True, repr=False) -class GroupID(DomainSpecificString): - """Structure representing a group ID.""" - - SIGIL = "+" - - @classmethod - def from_string(cls: Type[DS], s: str) -> DS: - group_id: DS = super().from_string(s) # type: ignore - - if not group_id.localpart: - raise SynapseError(400, "Group ID cannot be empty", Codes.INVALID_PARAM) - - if contains_invalid_mxid_characters(group_id.localpart): - raise SynapseError( - 400, - "Group ID can only contain characters a-z, 0-9, or '=_-./'", - Codes.INVALID_PARAM, - ) - - return group_id - - mxid_localpart_allowed_characters = set( "_-./=" + string.ascii_lowercase + string.digits ) diff --git a/tests/appservice/test_appservice.py b/tests/appservice/test_appservice.py index edc584d0cf50..7135362f7618 100644 --- a/tests/appservice/test_appservice.py +++ b/tests/appservice/test_appservice.py @@ -23,7 +23,7 @@ def _regex(regex: str, exclusive: bool = True) -> Namespace: - return Namespace(exclusive, None, re.compile(regex)) + return Namespace(exclusive, re.compile(regex)) class ApplicationServiceTestCase(unittest.TestCase): diff --git a/tests/test_types.py b/tests/test_types.py index 80888a744d1b..0b10dae84839 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -13,7 +13,7 @@ # limitations under the License. from synapse.api.errors import SynapseError -from synapse.types import GroupID, RoomAlias, UserID, map_username_to_mxid_localpart +from synapse.types import RoomAlias, UserID, map_username_to_mxid_localpart from tests import unittest @@ -62,25 +62,6 @@ def test_validate(self): self.assertFalse(RoomAlias.is_valid(id_string)) -class GroupIDTestCase(unittest.TestCase): - def test_parse(self): - group_id = GroupID.from_string("+group/=_-.123:my.domain") - self.assertEqual("group/=_-.123", group_id.localpart) - self.assertEqual("my.domain", group_id.domain) - - def test_validate(self): - bad_ids = ["$badsigil:domain", "+:empty"] + [ - "+group" + c + ":domain" for c in "A%?æ£" - ] - for id_string in bad_ids: - try: - GroupID.from_string(id_string) - self.fail("Parsing '%s' should raise exception" % id_string) - except SynapseError as exc: - self.assertEqual(400, exc.code) - self.assertEqual("M_INVALID_PARAM", exc.errcode) - - class MapUsernameTestCase(unittest.TestCase): def testPassThrough(self): self.assertEqual(map_username_to_mxid_localpart("test1234"), "test1234") From 317248d42cb05ffa39119d6fefb7da286cb46225 Mon Sep 17 00:00:00 2001 From: reivilibre Date: Thu, 26 May 2022 16:07:27 +0100 Subject: [PATCH 115/181] Improve URL previews by not including the content of media tags in the generated description. (#12887) --- changelog.d/12887.misc | 1 + synapse/rest/media/v1/preview_html.py | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 changelog.d/12887.misc diff --git a/changelog.d/12887.misc b/changelog.d/12887.misc new file mode 100644 index 000000000000..7f6f7318329f --- /dev/null +++ b/changelog.d/12887.misc @@ -0,0 +1 @@ +Improve URL previews by not including the content of media tags in the generated description. \ No newline at end of file diff --git a/synapse/rest/media/v1/preview_html.py b/synapse/rest/media/v1/preview_html.py index ca73965fc28f..0358c68a6452 100644 --- a/synapse/rest/media/v1/preview_html.py +++ b/synapse/rest/media/v1/preview_html.py @@ -246,7 +246,9 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]: Grabs any text nodes which are inside the tag, unless they are within an HTML5 semantic markup tag (
,