Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Treat "\u0000" as "\u0020" for the purposes of message search (message indexing) #10820

Merged
merged 27 commits into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
177ca9a
add test to check if null code points are being inserted
H-Shay Sep 13, 2021
7ae5ccf
add logic to detect and replace null code points before insertion int…
H-Shay Sep 13, 2021
c86d8b5
lints
H-Shay Sep 13, 2021
f681896
add license to test
H-Shay Sep 14, 2021
534f141
change approach to null substitution
H-Shay Sep 14, 2021
4574d66
add type hint for SearchEntry
H-Shay Sep 14, 2021
fb1f9a0
Merge branch 'develop' into test_homeserver
H-Shay Sep 14, 2021
0628de7
Add changelog entry
H-Shay Sep 14, 2021
b0b5792
updated changelog
H-Shay Sep 14, 2021
49fc935
update chanelog message
H-Shay Sep 14, 2021
8de418a
remove duplicate changelog
H-Shay Sep 14, 2021
b2c7c88
Update synapse/storage/databases/main/events.py remove extra space
H-Shay Sep 14, 2021
163f569
rename and move test file, update tests, delete old test file
H-Shay Sep 16, 2021
0d2c316
fix typo in comments
H-Shay Sep 16, 2021
7e7de36
update _find_highlights_in_postgres to replace null byte with space
H-Shay Sep 16, 2021
584ccd5
replace null byte in sqlite search insertion
H-Shay Sep 17, 2021
e922659
beef up and reorganize test for this pr
H-Shay Sep 17, 2021
1db26b6
Merge branch 'test_homeserver' of /~https://github.com/H-Shay/synapse i…
H-Shay Sep 17, 2021
baf2250
update changelog
H-Shay Sep 17, 2021
54ca415
add type hints and update docstring
H-Shay Sep 17, 2021
0cad2fe
check db engine directly vs using env variable
H-Shay Sep 17, 2021
c6f6fc2
refactor tests to be less repetetive
H-Shay Sep 20, 2021
3247106
move rplace logic into seperate function
H-Shay Sep 20, 2021
103125f
requested changes
H-Shay Sep 21, 2021
2639029
Fix typo.
clokep Sep 21, 2021
7c1679b
Update synapse/storage/databases/main/search.py
H-Shay Sep 22, 2021
88ad982
Update changelog.d/10820.misc
H-Shay Sep 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10820.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug where sending an `m.room.text` event containing a null byte would cause an internal server error.
clokep marked this conversation as resolved.
Show resolved Hide resolved
25 changes: 16 additions & 9 deletions synapse/storage/databases/main/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
import logging
import re
from collections import namedtuple
from typing import Collection, List, Optional, Set
from typing import Collection, Iterable, List, Optional, Set

from synapse.api.errors import SynapseError
from synapse.events import EventBase
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
from synapse.storage.database import DatabasePool
from synapse.storage.database import DatabasePool, LoggingTransaction
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
from synapse.storage.engines import PostgresEngine, Sqlite3Engine

Expand All @@ -33,13 +33,14 @@


class SearchWorkerStore(SQLBaseStore):
def store_search_entries_txn(self, txn, entries):
def store_search_entries_txn(
self, txn: LoggingTransaction, entries: Iterable[SearchEntry]
) -> None:
"""Add entries to the search table

Args:
txn (cursor):
entries (iterable[SearchEntry]):
entries to be added to the table
txn:
entries: entries to be added to the table
"""
if not self.hs.config.enable_search:
return
Expand All @@ -55,7 +56,7 @@ def store_search_entries_txn(self, txn, entries):
entry.event_id,
entry.room_id,
entry.key,
entry.value,
entry.value.replace("\u0000", " "),
clokep marked this conversation as resolved.
Show resolved Hide resolved
entry.stream_ordering,
entry.origin_server_ts,
)
Expand All @@ -70,11 +71,16 @@ def store_search_entries_txn(self, txn, entries):
" VALUES (?,?,?,?)"
)
args = (
(entry.event_id, entry.room_id, entry.key, entry.value)
(
entry.event_id,
entry.room_id,
entry.key,
entry.value.replace("\u0000", " "),
)
for entry in entries
)

txn.execute_batch(sql, args)

else:
# This should be unreachable.
raise Exception("Unrecognized database engine")
Expand Down Expand Up @@ -646,6 +652,7 @@ def f(txn):
for key in ("body", "name", "topic"):
v = event.content.get(key, None)
if v:
v = v.replace("\u0000", " ")
values.append(v)

if not values:
Expand Down
80 changes: 80 additions & 0 deletions tests/storage/test_room_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import synapse.rest.admin
from synapse.rest.client import login, room
from synapse.storage.engines import PostgresEngine

from tests.unittest import HomeserverTestCase


class NullByteInsertionTest(HomeserverTestCase):
servlets = [
synapse.rest.admin.register_servlets_for_client_rest_resource,
login.register_servlets,
room.register_servlets,
]

def test_null_byte(self):
"""
Postgres/SQLite don't like null bytes going into the search tables. Internally
we replace those with a space.
clokep marked this conversation as resolved.
Show resolved Hide resolved

Ensure this doesn't break anything.
"""

# register a user and create a room, create some messages
self.register_user("alice", "password")
access_token = self.login("alice", "password")
room_id = self.helper.create_room_as("alice", True, "1", access_token)
clokep marked this conversation as resolved.
Show resolved Hide resolved
body1 = "hi\u0000bob"
body2 = "another message"
body3 = "hi alice"

# send messages and ensure they don't cause an internal server
# error
resp1 = self.helper.send(room_id, body1, "1", access_token)
resp2 = self.helper.send(room_id, body2, "2", access_token)
resp3 = self.helper.send(room_id, body3, "3", access_token)
self.assertTrue("event_id" in resp1)
self.assertTrue("event_id" in resp2)
self.assertTrue("event_id" in resp3)
clokep marked this conversation as resolved.
Show resolved Hide resolved

# check that search still works with the message where the null byte was replaced
clokep marked this conversation as resolved.
Show resolved Hide resolved
store = self.hs.get_datastore()
res1 = self.get_success(
store.search_msgs([room_id], "hi bob", ["content.body"])
)
self.assertEquals(res1.get("count"), 1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be clearer to put the highlights checks here after the request. We won't need to keep as much state in our head as we move down through the test then.

(And we can use a single response variable that gets re-used instead of storing them separately).


# check that search still works with another unrelated message
clokep marked this conversation as resolved.
Show resolved Hide resolved
res2 = self.get_success(
store.search_msgs([room_id], "another", ["content.body"])
)
self.assertEquals(res2.get("count"), 1)

# check that search still works when given a search term that overlaps
# with the message that we replaced the null byte in and an unrelated one
clokep marked this conversation as resolved.
Show resolved Hide resolved
res3 = self.get_success(store.search_msgs([room_id], "hi", ["content.body"]))
self.assertEquals(res3.get("count"), 2)
res4 = self.get_success(
store.search_msgs([room_id], "hi alice", ["content.body"])
)

# check content of highlights if we are using postgres
if isinstance(store.database_engine, PostgresEngine):
self.assertIn("bob", res1.get("highlights"))
self.assertIn("hi", res1.get("highlights"))
self.assertIn("another", res2.get("highlights"))
self.assertIn("alice", res4.get("highlights"))