Skip to content

Commit

Permalink
Merge pull request #22714 from rockwotj/protobuf
Browse files Browse the repository at this point in the history
Seastar friendly protobuf parser
  • Loading branch information
rockwotj authored Aug 16, 2024
2 parents 24d6aa2 + c0c0dc6 commit 6a68058
Show file tree
Hide file tree
Showing 16 changed files with 1,869 additions and 22 deletions.
1 change: 1 addition & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ use_repo(non_module_dependencies, "hwloc")
use_repo(non_module_dependencies, "jsoncons")
use_repo(non_module_dependencies, "krb5")
use_repo(non_module_dependencies, "libpciaccess")
use_repo(non_module_dependencies, "libprotobuf_mutator")
use_repo(non_module_dependencies, "libxml2")
use_repo(non_module_dependencies, "lksctp")
use_repo(non_module_dependencies, "numactl")
Expand Down
205 changes: 204 additions & 1 deletion MODULE.bazel.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions bazel/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ def data_dependency():
url = "https://gitlab.freedesktop.org/xorg/lib/libpciaccess/-/archive/2ec2576cabefef1eaa5dd9307c97de2e887fc347/libpciaccess-2ec2576cabefef1eaa5dd9307c97de2e887fc347.tar.gz",
)

http_archive(
name = "libprotobuf_mutator",
build_file = "//bazel/thirdparty:libprotobuf-mutator.BUILD",
integrity = "sha256-KWUbFgNpDJtAO6Kr0eTo1v6iczEOta72jSle9oivFhg=",
strip_prefix = "libprotobuf-mutator-b922c8ab9004ef9944982e4f165e2747b13223fa",
url = "/~https://github.com/google/libprotobuf-mutator/archive/b922c8ab9004ef9944982e4f165e2747b13223fa.zip",
)

http_archive(
name = "libxml2",
build_file = "//bazel/thirdparty:libxml2.BUILD",
Expand Down
25 changes: 25 additions & 0 deletions bazel/thirdparty/libprotobuf-mutator.BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# See google/libprotobuf-mutator#91

cc_library(
name = "libprotobuf_mutator",
testonly = 1,
srcs = glob(
[
"src/*.cc",
"src/*.h",
],
exclude = [
"**/*_test.cc",
"src/mutator.h",
],
) + [
"port/protobuf.h",
],
hdrs = [
"src/mutator.h",
],
include_prefix = "protobuf_mutator",
strip_include_prefix = "src",
visibility = ["//visibility:public"],
deps = ["@protobuf"],
)
7 changes: 7 additions & 0 deletions src/v/bytes/iobuf_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ class iobuf_parser_base {
return iobuf_copy(in, len);
}

bytes peek_bytes(size_t n) const {
auto in = _in;
auto b = ss::uninitialized_string<bytes>(n);
in.consume_to(n, b.begin());
return b;
}

protected:
iobuf& ref() { return *std::get<owned_buf>(_buf); }

Expand Down
27 changes: 15 additions & 12 deletions src/v/container/fragmented_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ class fragmented_vector {
using this_type = fragmented_vector<T, fragment_size_bytes>;
using backing_type = std::vector<std::vector<T>>;
using value_type = T;
using reference = T&;
using const_reference = const T&;
using reference = std::conditional_t<std::is_same_v<T, bool>, bool, T&>;
using const_reference
= std::conditional_t<std::is_same_v<T, bool>, bool, const T&>;
using size_type = size_t;
using allocator_type = backing_type::allocator_type;
using difference_type = backing_type::difference_type;
Expand Down Expand Up @@ -211,26 +212,26 @@ class fragmented_vector {
update_generation();
}

const T& at(size_t index) const {
const_reference at(size_t index) const {
return _frags.at(index / elems_per_frag).at(index % elems_per_frag);
}

T& at(size_t index) {
reference at(size_t index) {
return _frags.at(index / elems_per_frag).at(index % elems_per_frag);
}

const T& operator[](size_t index) const {
const_reference operator[](size_t index) const {
return _frags[index / elems_per_frag][index % elems_per_frag];
}

T& operator[](size_t index) {
reference operator[](size_t index) {
return _frags[index / elems_per_frag][index % elems_per_frag];
}

const T& front() const { return _frags.front().front(); }
const T& back() const { return _frags.back().back(); }
T& front() { return _frags.front().front(); }
T& back() { return _frags.back().back(); }
const_reference front() const { return _frags.front().front(); }
const_reference back() const { return _frags.back().back(); }
reference front() { return _frags.front().front(); }
reference back() { return _frags.back().back(); }
bool empty() const noexcept { return _size == 0; }
size_t size() const noexcept { return _size; }
size_t capacity() const noexcept { return _capacity; }
Expand Down Expand Up @@ -333,8 +334,10 @@ class fragmented_vector {
using iterator_category = std::random_access_iterator_tag;
using value_type = typename std::conditional_t<C, const T, T>;
using difference_type = std::ptrdiff_t;
using pointer = value_type*;
using reference = value_type&;
using pointer
= std::conditional_t<std::is_same_v<T, bool>, bool, value_type*>;
using reference
= std::conditional_t<std::is_same_v<T, bool>, bool, value_type&>;

iter() = default;

Expand Down
21 changes: 21 additions & 0 deletions src/v/serde/protobuf/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
load("//bazel:build.bzl", "redpanda_cc_library")

redpanda_cc_library(
name = "protobuf",
srcs = [
"parser.cc",
],
hdrs = [
"parser.h",
],
include_prefix = "serde/protobuf",
visibility = ["//visibility:public"],
deps = [
"//src/v/bytes:iobuf",
"//src/v/bytes:iobuf_parser",
"//src/v/container:chunked_hash_map",
"//src/v/container:fragmented_vector",
"//src/v/utils:vint",
"@protobuf",
],
)
19 changes: 19 additions & 0 deletions src/v/serde/protobuf/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Protobuf Parser

This directory contains a seastar friendly protobuf parser.
It should adhere to the same compatibility guarantees as the offical C++ library
but has a few notable differences:

1. Does not make contiguous allocations of repeated fields, maps or strings/bytes types
2. Is reactor friendly on deeply nested or large protobufs in that it will yield control
3. Is a stackless parser, so it is not bound by the smallish 1MB stacks that seastar uses for threads

## Development

If you are tasked with updating this code, here are a few helpful links:

1. [Encoding spec](https://protobuf.dev/programming-guides/encoding/) (note this elides some important details about how invalid/corrupted data is handled)
2. [Golang protobuf parser](/~https://github.com/protocolbuffers/protobuf-go/blob/master/proto/decode.go)
3. [Java protobuf parser](/~https://github.com/protocolbuffers/protobuf/tree/main/java/core/src/main/java/com/google/protobuf)
4. [C++ protobuf parser](/~https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/wire_format_lite.cc)
5. [Protobuf Zero](/~https://github.com/mapbox/protozero/blob/master/include/protozero/pbf_reader.hpp)
Loading

0 comments on commit 6a68058

Please sign in to comment.