Skip to content

Commit

Permalink
Merge 'Fix for missing cycles column and more' from Travis Downs
Browse files Browse the repository at this point in the history
In this series:

 - perf_tests_perf: minimal self-benches for PERF_TEST stuff
 - De-duplicate perf_tests main loop, which also fixes #2587

Example results for `perf_test_perf` after the fix:

```
test                             iterations      median         mad         min         max      allocs       tasks        inst      cycles
perf_tests.test_simple_1          774019546     1.313ns     0.019ns     1.259ns     1.333ns       0.000       0.000         7.0         5.0
perf_tests.test_simple_n         3188188100     0.330ns     0.001ns     0.308ns     0.387ns       0.000       0.000         3.1         1.3
perf_tests.test_ready_async_1    3638567766     0.280ns     0.005ns     0.271ns     0.290ns       0.000       0.000         4.0         1.1
perf_tests.test_ready_async_n    3179866200     0.312ns     0.006ns     0.305ns     0.435ns       0.000       0.000         3.1         1.3
perf_tests.test_unready_async_1    28622990    34.253ns     0.303ns    33.950ns    36.198ns       1.000       2.000       360.1       125.3
perf_tests.test_unready_async_n  1082973200     0.942ns     0.011ns     0.897ns     1.031ns       0.020       0.030         9.3         3.5
fixture.test_fixture_1            781121567     1.303ns     0.006ns     1.282ns     1.309ns       0.000       0.000         7.0         5.0
fixture.test_fixture_n           3096749700     0.330ns     0.005ns     0.318ns     0.363ns       0.000       0.000         3.1         1.3
fixture.test_coro_1                71657240    13.640ns     0.179ns    13.461ns    14.588ns       1.000       0.000       150.1        49.2
fixture.test_coro_n              2214644700     0.475ns     0.019ns     0.453ns     0.722ns       0.010       0.000         4.6         1.9
```

Closes #2588

* /~https://github.com/scylladb/seastar:
  perf_tests: coroutinize main loop
  add perf_tests_perf
  • Loading branch information
xemul committed Jan 15, 2025
2 parents 57d7d48 + ff20df3 commit 96098fb
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 65 deletions.
100 changes: 35 additions & 65 deletions include/seastar/testing/perf_tests.hh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include <fmt/format.h>

#include <seastar/core/coroutine.hh>
#include <seastar/core/future.hh>
#include <seastar/core/loop.hh>
#include <seastar/testing/linux_perf_event.hh>
Expand Down Expand Up @@ -222,39 +223,17 @@ public:

extern time_measurement measure_time;

namespace {

template<bool Condition, typename TrueFn, typename FalseFn>
struct do_if_constexpr_ : FalseFn {
do_if_constexpr_(TrueFn, FalseFn false_fn) : FalseFn(std::move(false_fn)) { }
decltype(auto) operator()() const {
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64095
return FalseFn::operator()(0);
}
};
template<typename TrueFn, typename FalseFn>
struct do_if_constexpr_<true, TrueFn, FalseFn> : TrueFn {
do_if_constexpr_(TrueFn true_fn, FalseFn) : TrueFn(std::move(true_fn)) { }
decltype(auto) operator()() const { return TrueFn::operator()(0); }
};

template<bool Condition, typename TrueFn, typename FalseFn>
do_if_constexpr_<Condition, TrueFn, FalseFn> if_constexpr_(TrueFn&& true_fn, FalseFn&& false_fn)
{
return do_if_constexpr_<Condition, TrueFn, FalseFn>(std::forward<TrueFn>(true_fn),
std::forward<FalseFn>(false_fn));
}

}

template<typename Test>
class concrete_performance_test final : public performance_test {
std::optional<Test> _test;

using test_ret_type = decltype(_test->run());
// true iff the test method returns future<...>
static constexpr bool is_async_test = is_future<test_ret_type>::value;
// true iff the test returns the number of iterations run, otherwise it returns
// void and we consider each invocation to be 1 iteration
static constexpr bool is_iteration_returning = !(std::is_same_v<test_ret_type, future<>> || std::is_void_v<test_ret_type>);
private:
template<typename... Args>
auto run_test(Args&&...) {
return _test->run();
}

protected:
virtual void set_up() override {
Expand All @@ -267,45 +246,36 @@ protected:

[[gnu::hot]]
virtual future<run_result> do_single_run() override {
// Redundant 'this->'s courtesy of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61636
_instructions_retired_counter.enable();
_cpu_cycles_retired_counter.enable();
return if_constexpr_<is_future<decltype(_test->run())>::value>([&] (auto&&...) {
measure_time.start_run(&_instructions_retired_counter);
return do_until([this] { return this->stop_iteration(); }, [this] {
return if_constexpr_<std::is_same_v<decltype(_test->run()), future<>>>([&] (auto&&...) {
this->next_iteration(1);
return _test->run();
}, [&] (auto&&... dependency) {
// We need `dependency` to make sure the compiler won't be able to instantiate anything
// (and notice that the code does not compile) if this part of if_constexpr_ is not active.
return run_test(dependency...).then([&] (size_t n) {
this->next_iteration(n);
});
})();
}).then([] {
return measure_time.stop_run();
}).finally([this] {
_instructions_retired_counter.disable();
_cpu_cycles_retired_counter.disable();
});
}, [&] (auto&&...) {
measure_time.start_run(&_instructions_retired_counter, &_cpu_cycles_retired_counter);
while (!stop_iteration()) {
if_constexpr_<std::is_void_v<decltype(_test->run())>>([&] (auto&&...) {
(void)_test->run();
this->next_iteration(1);
}, [&] (auto&&... dependency) {
// We need `dependency` to make sure the compiler won't be able to instantiate anything
// (and notice that the code does not compile) if this part of if_constexpr_ is not active.
this->next_iteration(run_test(dependency...));
})();
measure_time.start_run(&_instructions_retired_counter, &_cpu_cycles_retired_counter);
while (!stop_iteration()) {
if constexpr (is_async_test) {
if constexpr (is_iteration_returning) {
auto f = _test->run();
next_iteration(f.available() ? std::move(f).get() : co_await std::move(f));
} else {
auto f = _test->run();
// The available() check is functionally redundant, but is significantly faster
// than invoking the co_await machinery on a future-returning function.
if (!f.available()) {
co_await std::move(f);
}
next_iteration(1);
}
} else {
if constexpr (is_iteration_returning) {
next_iteration(_test->run());
} else {
_test->run();
next_iteration(1);
}
}
auto ret = measure_time.stop_run();
_instructions_retired_counter.disable();
_cpu_cycles_retired_counter.disable();
return make_ready_future<run_result>(std::move(ret));
})();
}
auto ret = measure_time.stop_run();
_instructions_retired_counter.disable();
_cpu_cycles_retired_counter.disable();
co_return ret;
}
public:
using performance_test::performance_test;
Expand Down
3 changes: 3 additions & 0 deletions tests/perf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,6 @@ seastar_add_test (container
seastar_add_test (http_client
SOURCES http_client_perf.cc linux_perf_event.cc
NO_SEASTAR_PERF_TESTING_LIBRARY)

seastar_add_test (perf_tests
SOURCES perf_tests_perf.cc)
68 changes: 68 additions & 0 deletions tests/perf/perf_tests_perf.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* This file is open source software, licensed to you under the terms
* of the Apache License, Version 2.0 (the "License"). See the NOTICE file
* distributed with this work for additional information regarding copyright
* ownership. You may not use this file except in compliance with the License.
*
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <seastar/core/future.hh>
#include <seastar/coroutine/maybe_yield.hh>
#include <seastar/util/later.hh>
#include <seastar/core/coroutine.hh>
#include <seastar/testing/perf_tests.hh>

// Benchmarks that test raw overhead of almost empty perf tests
// in all the basic variations.

namespace {
volatile int sink;
constexpr auto ITER_COUNT = 100;
struct fixture { };
auto loop() {
for (size_t i = 0; i < ITER_COUNT; i++) {
perf_tests::do_not_optimize(i);
}
return ITER_COUNT;
}
}

PERF_TEST(perf_tests, test_simple_1) { perf_tests::do_not_optimize(sink); }

PERF_TEST(perf_tests, test_simple_n) { return loop(); }

PERF_TEST(perf_tests, test_ready_async_1) { return now(); }

PERF_TEST(perf_tests, test_ready_async_n) { return as_ready_future(loop()); }

PERF_TEST(perf_tests, test_unready_async_1) { return yield(); }

PERF_TEST(perf_tests, test_unready_async_n) {
auto i = loop();
return yield().then([=] { return i; });
};

PERF_TEST_F(fixture, test_fixture_1) { perf_tests::do_not_optimize(sink); }

PERF_TEST_F(fixture, test_fixture_n) { return loop(); }

PERF_TEST_C(fixture, test_coro_1) {
// without the next line, compiler will optimize away the coroutine nature of
// this function and compile/inline it as a regular function
co_await coroutine::maybe_yield();
}

PERF_TEST_CN(fixture, test_coro_n) {
co_await coroutine::maybe_yield();
co_return loop();
}

0 comments on commit 96098fb

Please sign in to comment.