Skip to content

Commit

Permalink
Filter out rooms from the room directory being served to other homese…
Browse files Browse the repository at this point in the history
…rvers when those rooms block that homeserver by their Access Control Lists. (#16759)

The idea here being that the directory server shouldn't advertise rooms
to a requesting server is the requesting server would not be allowed to
join or participate in the room.

<!--
Fixes: # <!-- -->
<!--
Supersedes: # <!-- -->
<!--
Follows: # <!-- -->
<!--
Part of: # <!-- -->
Base: `develop` <!-- git-stack-base-branch:develop -->

<!--
This pull request is commit-by-commit review friendly. <!-- -->
<!--
This pull request is intended for commit-by-commit review. <!-- -->

Original commit schedule, with full messages:

<ol>
<li>

Pass `from_federation_origin` down into room list retrieval code 

</li>
<li>

Don't cache /publicRooms response for inbound federated requests 

</li>
<li>

fixup! Don't cache /publicRooms response for inbound federated requests 

</li>
<li>

Cap the number of /publicRooms entries to 100 

</li>
<li>

Simplify code now that you can't request unlimited rooms 

</li>
<li>

Filter out rooms from federated requests that don't have the correct ACL

</li>
<li>

Request a handful more when filtering ACLs so that we can try to avoid
shortchanging the requester

</li>
</ol>

---------

Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>
  • Loading branch information
reivilibre authored Jan 8, 2024
1 parent 5d3850b commit a83a337
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 52 deletions.
1 change: 1 addition & 0 deletions changelog.d/16759.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists.
7 changes: 5 additions & 2 deletions synapse/federation/transport/server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,10 @@ async def on_GET(
limit = None

data = await self.handler.get_local_public_room_list(
limit, since_token, network_tuple=network_tuple, from_federation=True
limit,
since_token,
network_tuple=network_tuple,
from_federation_origin=origin,
)
return 200, data

Expand Down Expand Up @@ -195,7 +198,7 @@ async def on_POST(
since_token=since_token,
search_filter=search_filter,
network_tuple=network_tuple,
from_federation=True,
from_federation_origin=origin,
)

return 200, data
Expand Down
177 changes: 127 additions & 50 deletions synapse/handlers/room_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#

import logging
from typing import TYPE_CHECKING, Any, Optional, Tuple
from typing import TYPE_CHECKING, Any, List, Optional, Tuple

import attr
import msgpack
Expand Down Expand Up @@ -54,6 +54,9 @@
# This is used to indicate we should only return rooms published to the main list.
EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)

# Maximum number of local public rooms returned over the CS or SS API
MAX_PUBLIC_ROOMS_IN_RESPONSE = 100


class RoomListHandler:
def __init__(self, hs: "HomeServer"):
Expand All @@ -74,7 +77,7 @@ async def get_local_public_room_list(
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation: bool = False,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a local public room list.
Expand All @@ -89,7 +92,8 @@ async def get_local_public_room_list(
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation: true iff the request comes from the federation API
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""
if not self.enable_room_list_search:
return {"chunk": [], "total_room_count_estimate": 0}
Expand All @@ -102,36 +106,43 @@ async def get_local_public_room_list(
network_tuple,
)

if search_filter:
capped_limit: int = (
MAX_PUBLIC_ROOMS_IN_RESPONSE
if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE
else limit
)

if search_filter or from_federation_origin is not None:
# We explicitly don't bother caching searches or requests for
# appservice specific lists.
logger.info("Bypassing cache as search request.")
# We also don't bother caching requests from federated homeservers.
logger.debug("Bypassing cache as search or federation request.")

return await self._get_public_room_list(
limit,
capped_limit,
since_token,
search_filter,
network_tuple=network_tuple,
from_federation=from_federation,
from_federation_origin=from_federation_origin,
)

key = (limit, since_token, network_tuple)
key = (capped_limit, since_token, network_tuple)
return await self.response_cache.wrap(
key,
self._get_public_room_list,
limit,
capped_limit,
since_token,
network_tuple=network_tuple,
from_federation=from_federation,
from_federation_origin=from_federation_origin,
)

async def _get_public_room_list(
self,
limit: Optional[int] = None,
limit: int,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation: bool = False,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a public room list.
Args:
Expand All @@ -142,8 +153,8 @@ async def _get_public_room_list(
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation: Whether this request originated from a
federating server or a client. Used for room filtering.
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""

# Pagination tokens work by storing the room ID sent in the last batch,
Expand All @@ -165,16 +176,24 @@ async def _get_public_room_list(
forwards = True
has_batch_token = False

# we request one more than wanted to see if there are more pages to come
probing_limit = limit + 1 if limit is not None else None
if from_federation_origin is None:
# Client-Server API:
# we request one more than wanted to see if there are more pages to come
probing_limit = limit + 1
else:
# Federation API:
# we request a handful more in case any get filtered out by ACLs
# as a best easy effort attempt to return the full number of entries
# specified by `limit`.
probing_limit = limit + 10

results = await self.store.get_largest_public_rooms(
network_tuple,
search_filter,
probing_limit,
bounds=bounds,
forwards=forwards,
ignore_non_federatable=from_federation,
ignore_non_federatable=from_federation_origin is not None,
)

def build_room_entry(room: LargestRoomStats) -> JsonDict:
Expand All @@ -195,59 +214,117 @@ def build_room_entry(room: LargestRoomStats) -> JsonDict:
# Filter out Nones – rather omit the field altogether
return {k: v for k, v in entry.items() if v is not None}

response: JsonDict = {}
# Build a list of up to `limit` entries.
room_entries: List[JsonDict] = []
rooms_iterator = results if forwards else reversed(results)

# Track the first and last 'considered' rooms so that we can provide correct
# next_batch/prev_batch tokens.
# This is needed because the loop might finish early when
# `len(room_entries) >= limit` and we might be left with rooms we didn't
# 'consider' (iterate over) and we should save those rooms for the next
# batch.
first_considered_room: Optional[LargestRoomStats] = None
last_considered_room: Optional[LargestRoomStats] = None
cut_off_due_to_limit: bool = False

for room_result in rooms_iterator:
if len(room_entries) >= limit:
cut_off_due_to_limit = True
break

if first_considered_room is None:
first_considered_room = room_result
last_considered_room = room_result

if from_federation_origin is not None:
# If this is a federated request, apply server ACLs if the room has any set
acl_evaluator = (
await self._storage_controllers.state.get_server_acl_for_room(
room_result.room_id
)
)

if acl_evaluator is not None:
if not acl_evaluator.server_matches_acl_event(
from_federation_origin
):
# the requesting server is ACL blocked by the room,
# don't show in directory
continue

room_entries.append(build_room_entry(room_result))

if not forwards:
# If we are paginating backwards, we still return the chunk in
# biggest-first order, so reverse again.
room_entries.reverse()
# Swap the order of first/last considered rooms.
first_considered_room, last_considered_room = (
last_considered_room,
first_considered_room,
)

response: JsonDict = {
"chunk": room_entries,
}
num_results = len(results)
if limit is not None:
more_to_come = num_results == probing_limit

# Depending on direction we trim either the front or back.
if forwards:
results = results[:limit]
more_to_come_from_database = num_results == probing_limit

if forwards and has_batch_token:
# If there was a token given then we assume that there
# must be previous results, even if there were no results in this batch.
if first_considered_room is not None:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()
else:
results = results[-limit:]
else:
more_to_come = False
# If we didn't find any results this time,
# we don't have an actual room ID to put in the token.
# But since `first_considered_room` is None, we know that we have
# reached the end of the results.
# So we can use a token of (0, empty room ID) to paginate from the end
# next time.
response["prev_batch"] = RoomListNextBatch(
last_joined_members=0,
last_room_id="",
direction_is_forward=False,
).to_token()

if num_results > 0:
final_entry = results[-1]
initial_entry = results[0]

assert first_considered_room is not None
assert last_considered_room is not None
if forwards:
if has_batch_token:
# If there was a token given then we assume that there
# must be previous results.
response["prev_batch"] = RoomListNextBatch(
last_joined_members=initial_entry.joined_members,
last_room_id=initial_entry.room_id,
direction_is_forward=False,
).to_token()

if more_to_come:
if more_to_come_from_database or cut_off_due_to_limit:
response["next_batch"] = RoomListNextBatch(
last_joined_members=final_entry.joined_members,
last_room_id=final_entry.room_id,
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()
else:
else: # backwards
if has_batch_token:
response["next_batch"] = RoomListNextBatch(
last_joined_members=final_entry.joined_members,
last_room_id=final_entry.room_id,
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()

if more_to_come:
if more_to_come_from_database or cut_off_due_to_limit:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=initial_entry.joined_members,
last_room_id=initial_entry.room_id,
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()

response["chunk"] = [build_room_entry(r) for r in results]

# We can't efficiently count the total number of rooms that are not
# blocked by ACLs, but this is just an estimate so that should be
# good enough.
response["total_room_count_estimate"] = await self.store.count_public_rooms(
network_tuple,
ignore_non_federatable=from_federation,
ignore_non_federatable=from_federation_origin is not None,
search_filter=search_filter,
)

Expand Down
88 changes: 88 additions & 0 deletions tests/handlers/test_room_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from http import HTTPStatus
from typing import Optional, Set

from synapse.rest import admin
from synapse.rest.client import directory, login, room
from synapse.types import JsonDict

from tests import unittest


class RoomListHandlerTestCase(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets,
login.register_servlets,
room.register_servlets,
directory.register_servlets,
]

def _create_published_room(
self, tok: str, extra_content: Optional[JsonDict] = None
) -> str:
room_id = self.helper.create_room_as(tok=tok, extra_content=extra_content)
channel = self.make_request(
"PUT",
f"/_matrix/client/v3/directory/list/room/{room_id}?access_token={tok}",
content={
"visibility": "public",
},
)
assert channel.code == HTTPStatus.OK, f"couldn't publish room: {channel.result}"
return room_id

def test_acls_applied_to_room_directory_results(self) -> None:
"""
Creates 3 rooms. Room 2 has an ACL that only permits the homeservers
`test` and `test2` to access it.
We then simulate `test2` and `test3` requesting the room directory and assert
that `test3` does not see room 2, but `test2` sees all 3.
"""
self.register_user("u1", "p1")
u1tok = self.login("u1", "p1")
room1 = self._create_published_room(u1tok)

room2 = self._create_published_room(
u1tok,
extra_content={
"initial_state": [
{
"type": "m.room.server_acl",
"content": {
"allow": ["test", "test2"],
},
}
]
},
)

room3 = self._create_published_room(u1tok)

room_list = self.get_success(
self.hs.get_room_list_handler().get_local_public_room_list(
limit=50, from_federation_origin="test2"
)
)
room_ids_in_test2_list: Set[str] = {
entry["room_id"] for entry in room_list["chunk"]
}

room_list = self.get_success(
self.hs.get_room_list_handler().get_local_public_room_list(
limit=50, from_federation_origin="test3"
)
)
room_ids_in_test3_list: Set[str] = {
entry["room_id"] for entry in room_list["chunk"]
}

self.assertEqual(
room_ids_in_test2_list,
{room1, room2, room3},
"test2 should be able to see all 3 rooms",
)
self.assertEqual(
room_ids_in_test3_list,
{room1, room3},
"test3 should be able to see only 2 rooms",
)

0 comments on commit a83a337

Please sign in to comment.