Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve dump_integer performance #1411

Merged
merged 3 commits into from
Jan 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ file(COPY ${CMAKE_SOURCE_DIR}/data DESTINATION .)
file(COPY ${CMAKE_SOURCE_DIR}/../test/data/regression/floats.json
${CMAKE_SOURCE_DIR}/../test/data/regression/unsigned_ints.json
${CMAKE_SOURCE_DIR}/../test/data/regression/signed_ints.json
${CMAKE_SOURCE_DIR}/../test/data/regression/small_signed_ints.json
DESTINATION data/numbers)

# benchmark binary
Expand Down
33 changes: 18 additions & 15 deletions benchmarks/src/benchmarks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ static void ParseFile(benchmark::State& state, const char* filename)
std::ifstream file(filename, std::ios::binary | std::ios::ate);
state.SetBytesProcessed(state.iterations() * file.tellg());
}
BENCHMARK_CAPTURE(ParseFile, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseFile, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseFile, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseFile, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseFile, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json");

BENCHMARK_CAPTURE(ParseFile, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseFile, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseFile, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseFile, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseFile, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseFile, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseFile, unsigned_ints, "data/numbers/unsigned_ints.json");
BENCHMARK_CAPTURE(ParseFile, small_signed_ints, "data/numbers/small_signed_ints.json");

//////////////////////////////////////////////////////////////////////////////
// parse JSON from string
Expand All @@ -61,13 +61,14 @@ static void ParseString(benchmark::State& state, const char* filename)

state.SetBytesProcessed(state.iterations() * str.size());
}
BENCHMARK_CAPTURE(ParseString, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseString, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseString, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseString, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json");
BENCHMARK_CAPTURE(ParseString, jeopardy, "data/jeopardy/jeopardy.json");
BENCHMARK_CAPTURE(ParseString, canada, "data/nativejson-benchmark/canada.json");
BENCHMARK_CAPTURE(ParseString, citm_catalog, "data/nativejson-benchmark/citm_catalog.json");
BENCHMARK_CAPTURE(ParseString, twitter, "data/nativejson-benchmark/twitter.json");
BENCHMARK_CAPTURE(ParseString, floats, "data/numbers/floats.json");
BENCHMARK_CAPTURE(ParseString, signed_ints, "data/numbers/signed_ints.json");
BENCHMARK_CAPTURE(ParseString, unsigned_ints, "data/numbers/unsigned_ints.json");
BENCHMARK_CAPTURE(ParseString, small_signed_ints, "data/numbers/small_signed_ints.json");


//////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -101,6 +102,8 @@ BENCHMARK_CAPTURE(Dump, signed_ints / -, "data/numbers/signed_ints.json",
BENCHMARK_CAPTURE(Dump, signed_ints / 4, "data/numbers/signed_ints.json", 4);
BENCHMARK_CAPTURE(Dump, unsigned_ints / -, "data/numbers/unsigned_ints.json", -1);
BENCHMARK_CAPTURE(Dump, unsigned_ints / 4, "data/numbers/unsigned_ints.json", 4);
BENCHMARK_CAPTURE(Dump, small_signed_ints / -, "data/numbers/small_signed_ints.json", -1);
BENCHMARK_CAPTURE(Dump, small_signed_ints / 4, "data/numbers/small_signed_ints.json", 4);


BENCHMARK_MAIN();
108 changes: 94 additions & 14 deletions include/nlohmann/detail/output/serializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,40 @@ class serializer
}
}

/*!
@brief count digits

Count the number of decimal (base 10) digits for an input unsigned integer.

@param[in] x unsigned integer number to count its digits
@return number of decimal digits
*/
inline unsigned int count_digits(number_unsigned_t x) noexcept
{
unsigned int n_digits = 1;
for (;;)
{
if (x < 10)
{
return n_digits;
}
if (x < 100)
{
return n_digits + 1;
}
if (x < 1000)
{
return n_digits + 2;
}
if (x < 10000)
{
return n_digits + 3;
}
x = x / 10000u;
n_digits += 4;
}
}

/*!
@brief dump an integer

Expand All @@ -542,35 +576,81 @@ class serializer
int> = 0>
void dump_integer(NumberType x)
{
static constexpr std::array<std::array<char, 2>, 100> digits_to_99
{
{
{'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
{'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
{'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
{'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
{'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
{'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
{'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
{'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
{'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
{'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'},
}
};

// special case for "0"
if (x == 0)
{
o->write_character('0');
return;
}

const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not (x >= 0); // see issue #755
std::size_t i = 0;
// use a pointer to fill the buffer
auto buffer_ptr = begin(number_buffer);

const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not(x >= 0); // see issue #755
number_unsigned_t abs_value;

unsigned int n_chars;

while (x != 0)
if (is_negative)
{
*buffer_ptr = '-';
abs_value = static_cast<number_unsigned_t>(0 - x);

// account one more byte for the minus sign
n_chars = 1 + count_digits(abs_value);
}
else
{
// spare 1 byte for '\0'
assert(i < number_buffer.size() - 1);
abs_value = static_cast<number_unsigned_t>(x);
n_chars = count_digits(abs_value);
}

// spare 1 byte for '\0'
assert(n_chars < number_buffer.size() - 1);

const auto digit = std::labs(static_cast<long>(x % 10));
number_buffer[i++] = static_cast<char>('0' + digit);
x /= 10;
// jump to the end to generate the string from backward
// so we later avoid reversing the result
buffer_ptr += n_chars;

// Fast int2ascii implementation inspired by "Fastware" talk by Andrei Alexandrescu
// See: https://www.youtube.com/watch?v=o4-CwDo2zpg
const auto buffer_end = buffer_ptr;
while (abs_value >= 100)
{
const auto digits_index = static_cast<unsigned>((abs_value % 100));
abs_value /= 100;
*(--buffer_ptr) = digits_to_99[digits_index][1];
*(--buffer_ptr) = digits_to_99[digits_index][0];
}

if (is_negative)
if (abs_value >= 10)
{
const auto digits_index = static_cast<unsigned>(abs_value);
*(--buffer_ptr) = digits_to_99[digits_index][1];
*(--buffer_ptr) = digits_to_99[digits_index][0];
}
else
{
// make sure there is capacity for the '-'
assert(i < number_buffer.size() - 2);
number_buffer[i++] = '-';
*(--buffer_ptr) = static_cast<char>('0' + abs_value);
}

std::reverse(number_buffer.begin(), number_buffer.begin() + i);
o->write_characters(number_buffer.data(), i);
o->write_characters(number_buffer.data(), n_chars);
}

/*!
Expand Down
108 changes: 94 additions & 14 deletions single_include/nlohmann/json.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11410,6 +11410,40 @@ class serializer
}
}

/*!
@brief count digits

Count the number of decimal (base 10) digits for an input unsigned integer.

@param[in] x unsigned integer number to count its digits
@return number of decimal digits
*/
inline unsigned int count_digits(number_unsigned_t x) noexcept
{
unsigned int n_digits = 1;
for (;;)
{
if (x < 10)
{
return n_digits;
}
if (x < 100)
{
return n_digits + 1;
}
if (x < 1000)
{
return n_digits + 2;
}
if (x < 10000)
{
return n_digits + 3;
}
x = x / 10000u;
n_digits += 4;
}
}

/*!
@brief dump an integer

Expand All @@ -11425,35 +11459,81 @@ class serializer
int> = 0>
void dump_integer(NumberType x)
{
static constexpr std::array<std::array<char, 2>, 100> digits_to_99
{
{
{'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
{'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
{'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
{'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
{'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
{'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
{'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
{'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
{'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
{'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'},
}
};

// special case for "0"
if (x == 0)
{
o->write_character('0');
return;
}

const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not (x >= 0); // see issue #755
std::size_t i = 0;
// use a pointer to fill the buffer
auto buffer_ptr = begin(number_buffer);

const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not(x >= 0); // see issue #755
number_unsigned_t abs_value;

unsigned int n_chars;

while (x != 0)
if (is_negative)
{
*buffer_ptr = '-';
abs_value = static_cast<number_unsigned_t>(0 - x);

// account one more byte for the minus sign
n_chars = 1 + count_digits(abs_value);
}
else
{
// spare 1 byte for '\0'
assert(i < number_buffer.size() - 1);
abs_value = static_cast<number_unsigned_t>(x);
n_chars = count_digits(abs_value);
}

// spare 1 byte for '\0'
assert(n_chars < number_buffer.size() - 1);

const auto digit = std::labs(static_cast<long>(x % 10));
number_buffer[i++] = static_cast<char>('0' + digit);
x /= 10;
// jump to the end to generate the string from backward
// so we later avoid reversing the result
buffer_ptr += n_chars;

// Fast int2ascii implementation inspired by "Fastware" talk by Andrei Alexandrescu
// See: https://www.youtube.com/watch?v=o4-CwDo2zpg
const auto buffer_end = buffer_ptr;
while (abs_value >= 100)
{
const auto digits_index = static_cast<unsigned>((abs_value % 100));
abs_value /= 100;
*(--buffer_ptr) = digits_to_99[digits_index][1];
*(--buffer_ptr) = digits_to_99[digits_index][0];
}

if (is_negative)
if (abs_value >= 10)
{
const auto digits_index = static_cast<unsigned>(abs_value);
*(--buffer_ptr) = digits_to_99[digits_index][1];
*(--buffer_ptr) = digits_to_99[digits_index][0];
}
else
{
// make sure there is capacity for the '-'
assert(i < number_buffer.size() - 2);
number_buffer[i++] = '-';
*(--buffer_ptr) = static_cast<char>('0' + abs_value);
}

std::reverse(number_buffer.begin(), number_buffer.begin() + i);
o->write_characters(number_buffer.data(), i);
o->write_characters(number_buffer.data(), n_chars);
}

/*!
Expand Down
Loading