diff --git a/test_runner/fixtures/compute_reconfigure.py b/test_runner/fixtures/compute_reconfigure.py index 33f01f80fbb9..425abef93504 100644 --- a/test_runner/fixtures/compute_reconfigure.py +++ b/test_runner/fixtures/compute_reconfigure.py @@ -69,7 +69,10 @@ def handler(request: Request) -> Response: # This causes the endpoint to query storage controller for its location, which # is redundant since we already have it here, but this avoids extending the # neon_local CLI to take full lists of locations - reconfigure_threads.submit(lambda workload=workload: workload.reconfigure()) # type: ignore[misc] + fut = reconfigure_threads.submit(lambda workload=workload: workload.reconfigure()) # type: ignore[misc] + + # To satisfy semantics of notify-attach API, we must wait for the change to be applied before returning 200 + fut.result() return Response(status=200) diff --git a/test_runner/fixtures/neon_cli.py b/test_runner/fixtures/neon_cli.py index 33d422c590ed..9f13cf753254 100644 --- a/test_runner/fixtures/neon_cli.py +++ b/test_runner/fixtures/neon_cli.py @@ -486,6 +486,7 @@ def endpoint_create( lsn: Lsn | None = None, pageserver_id: int | None = None, allow_multiple=False, + update_catalog: bool = False, ) -> subprocess.CompletedProcess[str]: args = [ "endpoint", @@ -511,6 +512,8 @@ def endpoint_create( args.extend(["--pageserver-id", str(pageserver_id)]) if allow_multiple: args.extend(["--allow-multiple"]) + if update_catalog: + args.extend(["--update-catalog"]) res = self.raw_cli(args) res.check_returncode() diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 3d3a445b9718..8ef3e3226d8b 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -3846,6 +3846,7 @@ def create( config_lines: list[str] | None = None, pageserver_id: int | None = None, allow_multiple: bool = False, + update_catalog: bool = False, ) -> Self: """ Create a new Postgres endpoint. @@ -3870,6 +3871,7 @@ def create( pg_version=self.env.pg_version, pageserver_id=pageserver_id, allow_multiple=allow_multiple, + update_catalog=update_catalog, ) path = Path("endpoints") / self.endpoint_id / "pgdata" self.pgdata_dir = self.env.repo_dir / path @@ -4283,6 +4285,7 @@ def create( hot_standby: bool = False, config_lines: list[str] | None = None, pageserver_id: int | None = None, + update_catalog: bool = False, ) -> Endpoint: ep = Endpoint( self.env, @@ -4303,6 +4306,7 @@ def create( hot_standby=hot_standby, config_lines=config_lines, pageserver_id=pageserver_id, + update_catalog=update_catalog, ) def stop_all(self, fail_on_error=True) -> Self: diff --git a/test_runner/fixtures/workload.py b/test_runner/fixtures/workload.py index eea0ec2b95c3..1947a9c3fbe0 100644 --- a/test_runner/fixtures/workload.py +++ b/test_runner/fixtures/workload.py @@ -53,6 +53,8 @@ def __init__( self._endpoint: Endpoint | None = None self._endpoint_opts = endpoint_opts or {} + self._configured_pageserver: int | None = None + def branch( self, timeline_id: TimelineId, @@ -92,8 +94,12 @@ def endpoint(self, pageserver_id: int | None = None) -> Endpoint: **self._endpoint_opts, ) self._endpoint.start(pageserver_id=pageserver_id) + self._configured_pageserver = pageserver_id else: - self._endpoint.reconfigure(pageserver_id=pageserver_id) + if self._configured_pageserver != pageserver_id: + self._configured_pageserver = pageserver_id + self._endpoint.reconfigure(pageserver_id=pageserver_id) + self._endpoint_config = pageserver_id connstring = self._endpoint.safe_psql( "SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'" @@ -122,6 +128,7 @@ def init(self, pageserver_id: int | None = None, allow_recreate=False): def write_rows(self, n: int, pageserver_id: int | None = None, upload: bool = True): endpoint = self.endpoint(pageserver_id) + start = self.expect_rows end = start + n - 1 self.expect_rows += n diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index f3347b594e32..f10872590c9b 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -689,9 +689,7 @@ def test_pageserver_compaction_circuit_breaker(neon_env_builder: NeonEnvBuilder) env.pageserver.http_client().configure_failpoints((FAILPOINT, "return")) # Write some data to trigger compaction - workload.write_rows(1024, upload=False) - workload.write_rows(1024, upload=False) - workload.write_rows(1024, upload=False) + workload.write_rows(32768, upload=False) def assert_broken(): env.pageserver.assert_log_contains(BROKEN_LOG) diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index 6f8070e2ba15..891087369045 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -91,7 +91,7 @@ def get_sizes(): workload.init() sizes_before = get_sizes() - workload.write_rows(256) + workload.write_rows(65536) # Test that we can read data back from a sharded tenant workload.validate() @@ -1368,6 +1368,7 @@ def test_sharding_split_failures( workload = Workload(env, tenant_id, timeline_id) workload.init() workload.write_rows(100) + compute_reconfigure_listener.register_workload(workload) # Put the environment into a failing state (exact meaning depends on `failure`) failure.apply(env) @@ -1546,6 +1547,9 @@ def shards_info(): # Tip: set to 100MB to make the test fail "max_replication_write_lag=1MB", ], + # We need `neon` extension for calling backpressure functions, + # this flag instructs `compute_ctl` to pre-install it. + "update_catalog": True, }, ) workload.init() @@ -1815,6 +1819,9 @@ def test_sharding_gc( # This is not okay, but it's not a scrubber bug: it's a pageserver issue that is exposed by # the specific pattern of aggressive checkpointing+image layer generation + GC that this test does. # TODO: remove when /~https://github.com/neondatabase/neon/issues/10720 is fixed - ps.allowed_errors.append( - ".*could not find data for key 020000000000000000000000000000000000.*" + ps.allowed_errors.extend( + [ + ".*could not find data for key 020000000000000000000000000000000000.*", + ".*could not ingest record.*", + ] ) diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index 46038ccbbb49..b8253fb125be 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -316,8 +316,11 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_ # This is not okay, but it's not a scrubber bug: it's a pageserver issue that is exposed by # the specific pattern of aggressive checkpointing+image layer generation + GC that this test does. # TODO: remove when /~https://github.com/neondatabase/neon/issues/10720 is fixed - ps.allowed_errors.append( - ".*could not find data for key 020000000000000000000000000000000000.*" + ps.allowed_errors.extend( + [ + ".*could not find data for key 020000000000000000000000000000000000.*", + ".*could not ingest record.*", + ] )