Skip to content

Commit

Permalink
Add safe encoding/decoding mode with buffer overflow control
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry Grudin committed Nov 25, 2024
1 parent 094a886 commit aa4fd38
Show file tree
Hide file tree
Showing 29 changed files with 1,202 additions and 252 deletions.
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ jobs:
- uses: actions/checkout@v2
- name: install build tools (gcc)
run: |
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
sudo apt install g++-11 -y
echo "CC=gcc-11" >> $GITHUB_ENV
echo "CXX=g++-11" >> $GITHUB_ENV
Expand Down
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ using Class = message<
>;
```
Subsequently, both serialization and deserialization become so easy to do:
- Mode without buffer overflow control
```c++
// serialization
Student twice {123, "twice"}, tom{456, "tom"}, jerry{123456, "jerry"};
Expand All @@ -71,6 +72,30 @@ assert(yourClass["students"_f][1] == (Student{123456, "jerry"}));
assert(yourClass == myClass);
assert(begin_diff(bufferEnd2, bufferEnd) == 0);
```
- Mode with buffer overflow control (safe mode)
```c++
// serialization
Student twice {123, "twice"}, tom{456, "tom"}, jerry{123456, "jerry"};
Class myClass {"class 101", {tom, jerry}};
myClass["students"_f].push_back(twice);
array<byte, 64> buffer{};
auto result = message_coder<Class>::encode<true>(myClass, buffer);
assert (result.has_value());
const auto& bufferEnd = *result;
assert(begin_diff(bufferEnd, buffer) == 45);
// deserialization
auto result2 = message_coder<Class>::decode<true>(buffer);
assert (result2.has_value());
const auto& [yourClass, bufferEnd2] = *result2;
assert(yourClass["name"_f] == "class 101");
assert(yourClass["students"_f][2]["name"_f] == "twice");
assert(yourClass["students"_f][2]["id"_f] == 123);
assert(yourClass["students"_f][1] == (Student{123456, "jerry"}));
assert(yourClass == myClass);
assert(begin_diff(bufferEnd2, bufferEnd) == 0);
```
More examples can be found in our test cases ([test/message.cpp](/~https://github.com/PragmaTwice/protopuf/blob/master/test/message.cpp)).

## Supported Field Types
Expand Down
56 changes: 43 additions & 13 deletions include/protopuf/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,33 +44,53 @@ namespace pp {

array_coder() = delete;

static constexpr bytes encode(const R& con, bytes b) {
template <coder_mode Mode = unsafe_mode>
static constexpr encode_result<Mode> encode(const R& con, bytes b) {
uint<8> n = 0;

for(const auto &i : con) {
n += skipper<C>::encode_skip(i);
}

b = varint_coder<uint<8>>::encode(n, b);
bytes safe_b;
if (!Mode::get_value_from_result(varint_coder<uint<8>>::encode<Mode>(n, b), safe_b)) {
return {};
}

for(const auto& i : con) {
b = C::encode(i, b);
if (!Mode::get_value_from_result(C::template encode<Mode>(i, safe_b), safe_b)) {
return {};
}
}

return b;
return encode_result<Mode>{safe_b};
}

static constexpr decode_result<R> decode(bytes b) {
template <coder_mode Mode = unsafe_mode>
static constexpr decode_result<R, Mode> decode(bytes b) {
decode_value<uint<8>> decode_len;
if (!Mode::get_value_from_result(varint_coder<uint<8>>::decode<Mode>(b), decode_len)) {
return {};
}

uint<8> len = 0;
std::tie(len, b) = varint_coder<uint<8>>::decode(b);
std::tie(len, b) = decode_len;
R con;

auto origin_b = b;
if constexpr (requires { con.reserve(); }) {
con.reserve(len);
}

const auto origin_b = b;
decode_value<typename C::value_type> decode_v;
while(begin_diff(b, origin_b) < len) {
std::tie(*std::inserter(con, con.end()), b) = C::decode(b);
if (Mode::get_value_from_result(C::template decode<Mode>(b), decode_v)) {
std::tie(*std::inserter(con, con.end()), b) = std::move(decode_v);
} else {
return {};
}
}

return {con, b};
return Mode::template make_result<decode_result<R, Mode>>(std::move(con), b);
}
};

Expand All @@ -91,11 +111,21 @@ namespace pp {
return n;
}

static constexpr bytes decode_skip(bytes b) {
template <coder_mode Mode = unsafe_mode>
static constexpr decode_skip_result<Mode> decode_skip(bytes b) {
decode_value<uint<8>> decode_len;
if (!Mode::get_value_from_result(varint_coder<uint<8>>::decode<Mode>(b), decode_len)) {
return {};
}

uint<8> n = 0;
std::tie(n, b) = varint_coder<uint<8>>::decode(b);
std::tie(n, b) = decode_len;

if (!Mode::check_bytes_span(b, n)) {
return {};
}

return b.subspan(n);
return Mode::template make_result<decode_skip_result<Mode>>(b.subspan(n));
}
};

Expand Down
12 changes: 7 additions & 5 deletions include/protopuf/bool.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include "coder.h"
#include "byte.h"
#include "varint.h"
#include "int.h"

namespace pp {

Expand All @@ -27,12 +27,14 @@ namespace pp {

bool_coder() = delete;

static constexpr bytes encode(bool i, bytes b) {
return integer_coder<uint<1>>::encode(i, b);
template <coder_mode Mode = unsafe_mode>
static constexpr encode_result<Mode> encode(bool i, bytes b) {
return integer_coder<uint<1>>::encode<Mode>(i, b);
}

static constexpr decode_result<bool> decode(bytes b) {
return integer_coder<uint<1>>::decode(b);
template <coder_mode Mode = unsafe_mode>
static constexpr decode_result<bool, Mode> decode(bytes b) {
return integer_coder<uint<1>>::decode<Mode>(b);
}
};

Expand Down
2 changes: 1 addition & 1 deletion include/protopuf/byte.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace pp {
/// Returns the byte-distance between `begin(a)` and `begin(b)`.
inline constexpr std::size_t begin_diff(bytes a, bytes b) {
// `std::to_address` is used here for MSVC, ref to /~https://github.com/microsoft/STL/issues/1435
return std::to_address(a.begin()) - std::to_address(b.begin());
return static_cast<std::size_t>(std::to_address(a.begin()) - std::to_address(b.begin()));
}
}

Expand Down
26 changes: 21 additions & 5 deletions include/protopuf/coder.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,42 +17,58 @@

#include <utility>
#include "byte.h"
#include "coder_mode.h"

namespace pp {

/// @brief A type which `encoder`'s `encode` returns.
/// @param Mode the encoding mode
template<coder_mode Mode>
using encode_result = typename Mode::template result_type<bytes>;

/// @brief A pair type which `decoder`'s `decode` returns.
/// - Left type of pair `T`: the type of decoded object.
/// - Right type of pair `bytes`: the `bytes` which remains not decoded after finishing `decode`.
template<typename T>
using decode_result = std::pair<T, bytes>;
using decode_value = std::pair<T, bytes>;

/// @brief A type which `decoder`'s `decode` returns.
/// @param T the type of decoded object
/// @param Mode the decoding mode
template<typename T, coder_mode Mode>
using decode_result = typename Mode::template result_type<decode_value<T>>;

/// @brief Describes a type with static member function `encode`, which serializes an object to `bytes` (no ownership).
///
/// Encoding can be performed in different modes.
/// Type alias `value_type` describes type of the object to be encoded.
/// Static member function `encode`:
/// @param v the object to be encoded (source object).
/// @param s the bytes which the object `v` is encoded into (target bytes).
/// @returns a bytes from `begin(s) + encoding_length(v)` to `end(s)`, where `encoding_length` is the length of
/// @returns the @ref encode_result which depends on the encoding mode.
/// The result contains a bytes from `begin(s) + encoding_length(v)` to `end(s)`, where `encoding_length` is the length of
/// encoded object (bytes form), representing the left bytes which remains not used yet.
template<typename T>
concept encoder = requires(typename T::value_type v, bytes s) {
typename T::value_type;
{ T::encode(v, s) } -> std::same_as<bytes>;
{ T::template encode<unsafe_mode>(v, s) } -> std::same_as<encode_result<unsafe_mode>>;
};

/// @brief Describes a type with static member function `decode`, which deserializes some `bytes` to an object.
///
/// Decoding can be performed in different modes.
/// Type alias `value_type` describes type of the object to be decoded.
/// Static member function `decode`:
/// @param s the bytes which the object is decoded from (source bytes).
/// @returns the @ref decode_result which is a pair including:
/// @returns the @ref decode_result which depends on the encoding mode.
/// The result contains a pair including:
/// - the decoded object `v`;
/// - the bytes from `begin(s) + decoding_length(v)` to `end(s)`, where `decoding_length` is the length of
/// decoded object (bytes form), representing the left bytes which remains not used yet.
template<typename T>
concept decoder = requires(bytes s) {
typename T::value_type;
{ T::decode(s) } -> std::same_as<decode_result<typename T::value_type>>;
{ T::template decode<unsafe_mode>(s) } -> std::same_as<decode_result<typename T::value_type, unsafe_mode>>;
};

/// @brief Describes a type which is both @ref encoder and @ref decoder.
Expand Down
127 changes: 127 additions & 0 deletions include/protopuf/coder_mode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Copyright 2020-2024 PragmaTwice
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef PROTOPUF_CODER_MODE_H
#define PROTOPUF_CODER_MODE_H

#include <optional>
#include "byte.h"

namespace pp {

/// @brief Describes a type with static member function `make_result`,
/// which make an encoding/decoding result which depends on the coding mode.
/// @param v some object that could potentially be contained as a result of encoding.
/// @returns the encoding/decoding result which depends on the coding mode.
template<typename T>
concept coder_result_maker = requires(std::pair<int, int> v) {
typename T::template result_type<decltype(v)>;

{ T::template make_result<typename T::template result_type<decltype(v)>>(0, 0) } ->
std::same_as<typename T::template result_type<decltype(v)>>;
};

/// @brief Describes a type with static member function `get_value_from_result`,
/// which extract value from encoding/decoding result depending on encoding/decoding mode.
/// @param v some object that could potentially be extracted from the encoding result.
/// @param r the encoding/decoding result.
/// @returns true if the value is extracted, otherwise false.
template<typename T>
concept coder_result_value_getter = requires(std::pair<int, int> v, typename T::template result_type<decltype(v)> r) {
typename T::template result_type<decltype(v)>;

{ T::template get_value_from_result<typename T::template result_type<decltype(v)>>(std::move(r), v) } -> std::same_as<bool>;
};

/// @brief Describes a type with static member function `check_iterator`,
/// which checks if an iterator is valid depending on the encoding/decoding mode.
/// @param itr the iterator that checks for validity.
/// @param end the iterator to the element following the last element.
/// @returns true if the iterator is valid, otherwise false.
template<typename T>
concept iterator_checker = requires(bytes::iterator itr, bytes::iterator end) {
{ T::check_iterator(itr, end) } -> std::same_as<bool>;
};

/// @brief Describes a type with static member function `check_bytes_span`,
/// which checks if the span offset is valid depending on the encoding/decoding mode.
/// @param b the byte span.
/// @param offset offset into the span of byte that checks for validity.
/// @returns true if the offset is valid, otherwise false.
template<typename T>
concept bytes_span_checker = requires(bytes b, std::size_t offset) {
{ T::check_bytes_span(b, offset) } -> std::same_as<bool>;
};

/// @brief Describes a type for the coder operating mode.
template<typename T>
concept coder_mode = coder_result_maker<T> && coder_result_value_getter<T> && iterator_checker<T> && bytes_span_checker<T>;

/// @brief Unsafe @ref coder_mode to perform coding without buffer overflow checking
struct unsafe_mode {
template<typename T>
using result_type = std::remove_reference_t<T>;

template<typename R, typename... Args>
static constexpr R make_result(Args&&... args) {
return R{std::forward<Args>(args)...};
}

template<typename T>
static constexpr bool get_value_from_result(T&& result, auto& value) {
value = std::forward<T>(result);
return true;
}

static constexpr bool check_iterator(bytes::iterator, bytes::iterator) {
return true;
}

static constexpr bool check_bytes_span(bytes, std::size_t) {
return true;
}
};

/// @brief Safe @ref coder_mode to perform coding with buffer overflow checking (the coding result is wrapped into std::optional)
struct safe_mode {
template<typename T>
using result_type = std::optional<std::remove_reference_t<T>>;

template<typename R, typename... Args>
static constexpr R make_result(Args&&... args) {
return R{std::in_place, std::forward<Args>(args)...};
}

template<typename T>
static constexpr bool get_value_from_result(T&& result, auto& value) {
if (result.has_value()) {
value = std::forward<decltype(*result)>(*result);
} else {
return false;
}
return true;
}

static constexpr bool check_iterator(bytes::iterator iter, bytes::iterator end) {
return iter != end;
}

static constexpr bool check_bytes_span(bytes b, std::size_t offset) {
return b.size() >= offset;
}
};

}

#endif //PROTOPUF_CODER_MODE_H
16 changes: 11 additions & 5 deletions include/protopuf/enum.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,19 @@ namespace pp {

enum_coder() = delete;

static constexpr bytes encode(T i, bytes b) {
return varint_coder<std::underlying_type_t<T>>::encode(static_cast<std::underlying_type_t<T>>(i), b);
template<coder_mode Mode>
static constexpr encode_result<Mode> encode(T i, bytes b) {
return varint_coder<std::underlying_type_t<T>>::template encode<Mode>(static_cast<std::underlying_type_t<T>>(i), b);
}

static constexpr decode_result<T> decode(bytes b) {
auto [res, bytes] = varint_coder<std::underlying_type_t<T>>::decode(b);
return {static_cast<T>(res), bytes};
template<coder_mode Mode>
static constexpr decode_result<T, Mode> decode(bytes b) {
decode_value<std::underlying_type_t<T>> decode_v;
if (Mode::get_value_from_result(varint_coder<std::underlying_type_t<T>>::template decode<Mode>(b), decode_v)) {
return Mode::template make_result<decode_result<T, Mode>>(static_cast<T>(decode_v.first), decode_v.second);
}

return {};
}
};

Expand Down
1 change: 1 addition & 0 deletions include/protopuf/fixed_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <cstddef>
#include <algorithm>
#include <string_view>

namespace pp {

Expand Down
Loading

0 comments on commit aa4fd38

Please sign in to comment.