From 2674aeb96a1b75583dc3ea514cbce580e8ae35c8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 16 Feb 2015 16:16:47 +0000 Subject: [PATCH 1/6] Factor out ExpiringCache from StateHandler --- synapse/state.py | 46 +++----------- synapse/util/expiringcache.py | 115 ++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 37 deletions(-) create mode 100644 synapse/util/expiringcache.py diff --git a/synapse/state.py b/synapse/state.py index fe5f3dc84b47..80cced351d0a 100644 --- a/synapse/state.py +++ b/synapse/state.py @@ -18,6 +18,7 @@ from synapse.util.logutils import log_function from synapse.util.async import run_on_reactor +from synapse.util.expiringcache import ExpiringCache from synapse.api.constants import EventTypes from synapse.api.errors import AuthError from synapse.events.snapshot import EventContext @@ -51,7 +52,6 @@ class _StateCacheEntry(object): def __init__(self, state, state_group, ts): self.state = state self.state_group = state_group - self.ts = ts class StateHandler(object): @@ -69,12 +69,15 @@ def __init__(self, hs): def start_caching(self): logger.debug("start_caching") - self._state_cache = {} - - def f(): - self._prune_cache() + self._state_cache = ExpiringCache( + cache_name="state_cache", + clock=self.clock, + max_len=SIZE_OF_CACHE, + expiry_ms=EVICTION_TIMEOUT_SECONDS*1000, + reset_expiry_on_get=True, + ) - self.clock.looping_call(f, 5*1000) + self._state_cache.start() @defer.inlineCallbacks def get_current_state(self, room_id, event_type=None, state_key=""): @@ -409,34 +412,3 @@ def key_func(e): return -int(e.depth), hashlib.sha1(e.event_id).hexdigest() return sorted(events, key=key_func) - - def _prune_cache(self): - logger.debug( - "_prune_cache. before len: %d", - len(self._state_cache.keys()) - ) - - now = self.clock.time_msec() - - if len(self._state_cache.keys()) > SIZE_OF_CACHE: - sorted_entries = sorted( - self._state_cache.items(), - key=lambda k, v: v.ts, - ) - - for k, _ in sorted_entries[SIZE_OF_CACHE:]: - self._state_cache.pop(k) - - keys_to_delete = set() - - for key, cache_entry in self._state_cache.items(): - if now - cache_entry.ts > EVICTION_TIMEOUT_SECONDS*1000: - keys_to_delete.add(key) - - for k in keys_to_delete: - self._state_cache.pop(k) - - logger.debug( - "_prune_cache. after len: %d", - len(self._state_cache.keys()) - ) diff --git a/synapse/util/expiringcache.py b/synapse/util/expiringcache.py new file mode 100644 index 000000000000..fb5b27ad1025 --- /dev/null +++ b/synapse/util/expiringcache.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + + +logger = logging.getLogger(__name__) + + +class ExpiringCache(object): + def __init__(self, cache_name, clock, max_len=0, expiry_ms=0, + reset_expiry_on_get=False): + """ + Args: + cache_name (str): Name of this cache, used for logging. + clock (Clock) + max_len (int): Max size of dict. If the dict grows larger than this + then the oldest items get automatically evicted. Default is 0, + which indicates there is no max limit. + expiry_ms (int): How long before an item is evicted from the cache + in milliseconds. Default is 0, indicating items never get + evicted based on time. + reset_expiry_on_get (bool): If true, will reset the expiry time for + an item on access. Defaults to False. + + """ + self._cache_name = cache_name + + self._clock = clock + + self._max_len = max_len + self._expiry_ms = expiry_ms + + self._reset_expiry_on_get = reset_expiry_on_get + + self._cache = {} + + def start(self): + if not self._expiry_ms: + # Don't bother starting the loop if things never expire + return + + def f(): + self._prune_cache() + + self._clock.looping_call(f, self._expiry_ms) + + def __setitem__(self, key, value): + now = self._clock.time_msec() + self._cache[key] = _CacheEntry(now, value) + + # Evict if there are now too many items + if self._max_len and len(self._cache.keys()) > self._max_len: + sorted_entries = sorted( + self._cache.items(), + key=lambda k, v: v.time, + ) + + for k, _ in sorted_entries[self._max_len:]: + self._cache.pop(k) + + def __getitem__(self, key): + entry = self._cache[key] + + if self._reset_expiry_on_get: + entry.time = self._clock.time_msec() + + return entry.value + + def get(self, key, default=None): + try: + return self[key] + except KeyError: + return default + + def _purge_cache(self): + if not self._expiry_ms: + # zero expiry time means don't expire. This should never get called + # since we have this check in start too. + return + begin_length = len(self._cache) + + now = self._clock.time_msec() + + keys_to_delete = set() + + for key, cache_entry in self._cache.items(): + if now - cache_entry.time > self._expiry_ms: + keys_to_delete.add(key) + + for k in keys_to_delete: + self._cache.pop(k) + + logger.debug( + "[%s] _prune_cache before: %d, after len: %d", + self._cache_name, begin_length, len(self._cache.keys()) + ) + + +class _CacheEntry(object): + def __init__(self, time, value): + self.time = time + self.value = value From baa5b9a97582d4b3c825be1225aba7863230c047 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 16 Feb 2015 18:02:39 +0000 Subject: [PATCH 2/6] Cache results of get_pdu. --- synapse/federation/federation_client.py | 42 +++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 70c9a6f46bbf..83b4947b992e 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -19,7 +19,8 @@ from .federation_base import FederationBase from .units import Edu -from synapse.api.errors import CodeMessageException +from synapse.api.errors import CodeMessageException, SynapseError +from synapse.util.expiringcache import ExpiringCache from synapse.util.logutils import log_function from synapse.events import FrozenEvent @@ -30,6 +31,20 @@ class FederationClient(FederationBase): + def __init__(self): + self._fail_fetch_pdu_cache = None + + def start_pdu_fail_cache(self): + self._fail_fetch_pdu_cache = ExpiringCache( + cache_name="get_pdu_cache", + clock=self._clock, + max_len=1000, + expiry_ms=120*1000, + reset_expiry_on_get=False, + ) + + self._fail_fetch_pdu_cache.start() + @log_function def send_pdu(self, pdu, destinations): """Informs the replication layer about a new PDU generated within the @@ -160,6 +175,11 @@ def get_pdu(self, destinations, event_id, outlier=False): # TODO: Rate limit the number of times we try and get the same event. + if self._fail_fetch_pdu_cache: + e = self._fail_fetch_pdu_cache.get(event_id) + if e: + defer.returnValue(e) + pdu = None for destination in destinations: try: @@ -181,8 +201,21 @@ def get_pdu(self, destinations, event_id, outlier=False): pdu = yield self._check_sigs_and_hash(pdu) break - except CodeMessageException: - raise + except SynapseError: + logger.info( + "Failed to get PDU %s from %s because %s", + event_id, destination, e, + ) + continue + except CodeMessageException as e: + if 400 <= e.code < 500: + raise + + logger.info( + "Failed to get PDU %s from %s because %s", + event_id, destination, e, + ) + continue except Exception as e: logger.info( "Failed to get PDU %s from %s because %s", @@ -190,6 +223,9 @@ def get_pdu(self, destinations, event_id, outlier=False): ) continue + if self._fail_fetch_pdu_cache is not None: + self._fail_fetch_pdu_cache[event_id] = pdu + defer.returnValue(pdu) @defer.inlineCallbacks From 8b919c00f30c03ddae257f0129f58f2d0285723c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Feb 2015 15:44:01 +0000 Subject: [PATCH 3/6] Start the get_pdu cache --- synapse/app/homeserver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 27b478a1c38e..7565d94449cd 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -275,6 +275,7 @@ def setup(): hs.get_pusherpool().start() hs.get_state_handler().start_caching() hs.get_datastore().start_profiling() + hs.get_replication_layer().start_pdu_fail_cache() if config.daemonize: print config.pid_file From e7e20417ca468c1afe2e27162b9790f860d2da51 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Feb 2015 15:44:26 +0000 Subject: [PATCH 4/6] ExpiringCache: purge every 1/2 interval --- synapse/util/expiringcache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/expiringcache.py b/synapse/util/expiringcache.py index fb5b27ad1025..3c8409b164bf 100644 --- a/synapse/util/expiringcache.py +++ b/synapse/util/expiringcache.py @@ -55,7 +55,7 @@ def start(self): def f(): self._prune_cache() - self._clock.looping_call(f, self._expiry_ms) + self._clock.looping_call(f, self._expiry_ms/2) def __setitem__(self, key, value): now = self._clock.time_msec() From 964bb43fbe15105c58f9550f376fa76709734cd8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Feb 2015 15:44:41 +0000 Subject: [PATCH 5/6] Fix typo in function name --- synapse/util/expiringcache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/util/expiringcache.py b/synapse/util/expiringcache.py index 3c8409b164bf..1c7859297a1c 100644 --- a/synapse/util/expiringcache.py +++ b/synapse/util/expiringcache.py @@ -85,7 +85,7 @@ def get(self, key, default=None): except KeyError: return default - def _purge_cache(self): + def _prune_cache(self): if not self._expiry_ms: # zero expiry time means don't expire. This should never get called # since we have this check in start too. From ec847059f3e9b9b5de62aa2f7ad2366c4e883fac Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 18 Feb 2015 10:14:10 +0000 Subject: [PATCH 6/6] Rename _fail_fetch_pdu_cache to _get_pdu_cache --- synapse/app/homeserver.py | 2 +- synapse/federation/federation_client.py | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 7565d94449cd..7be82d0576d2 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -275,7 +275,7 @@ def setup(): hs.get_pusherpool().start() hs.get_state_handler().start_caching() hs.get_datastore().start_profiling() - hs.get_replication_layer().start_pdu_fail_cache() + hs.get_replication_layer().start_get_pdu_cache() if config.daemonize: print config.pid_file diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 83b4947b992e..6042e366bd7e 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -32,10 +32,10 @@ class FederationClient(FederationBase): def __init__(self): - self._fail_fetch_pdu_cache = None + self._get_pdu_cache = None - def start_pdu_fail_cache(self): - self._fail_fetch_pdu_cache = ExpiringCache( + def start_get_pdu_cache(self): + self._get_pdu_cache = ExpiringCache( cache_name="get_pdu_cache", clock=self._clock, max_len=1000, @@ -43,7 +43,7 @@ def start_pdu_fail_cache(self): reset_expiry_on_get=False, ) - self._fail_fetch_pdu_cache.start() + self._get_pdu_cache.start() @log_function def send_pdu(self, pdu, destinations): @@ -175,8 +175,8 @@ def get_pdu(self, destinations, event_id, outlier=False): # TODO: Rate limit the number of times we try and get the same event. - if self._fail_fetch_pdu_cache: - e = self._fail_fetch_pdu_cache.get(event_id) + if self._get_pdu_cache: + e = self._get_pdu_cache.get(event_id) if e: defer.returnValue(e) @@ -223,8 +223,8 @@ def get_pdu(self, destinations, event_id, outlier=False): ) continue - if self._fail_fetch_pdu_cache is not None: - self._fail_fetch_pdu_cache[event_id] = pdu + if self._get_pdu_cache is not None: + self._get_pdu_cache[event_id] = pdu defer.returnValue(pdu)