diff --git a/deps/ada/ada.cpp b/deps/ada/ada.cpp index 909fd50034d4fe..449046ff41d3ca 100644 --- a/deps/ada/ada.cpp +++ b/deps/ada/ada.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2023-10-22 19:50:50 -0400. Do not edit! */ +/* auto-generated on 2023-11-19 13:35:02 -0500. Do not edit! */ /* begin file src/ada.cpp */ #include "ada.h" /* begin file src/checkers.cpp */ @@ -7,62 +7,79 @@ namespace ada::checkers { ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept { - size_t last_dot = view.rfind('.'); - if (last_dot == view.size() - 1) { + // The string is not empty and does not contain upper case ASCII characters. + // + // Optimization. To be considered as a possible ipv4, the string must end + // with 'x' or a lowercase hex character. + // Most of the time, this will be false so this simple check will save a lot + // of effort. + char last_char = view.back(); + // If the address ends with a dot, we need to prune it (special case). + if (last_char == '.') { view.remove_suffix(1); - last_dot = view.rfind('.'); + if (view.empty()) { + return false; + } + last_char = view.back(); } - std::string_view number = - (last_dot == std::string_view::npos) ? view : view.substr(last_dot + 1); - if (number.empty()) { + bool possible_ipv4 = (last_char >= '0' && last_char <= '9') || + (last_char >= 'a' && last_char <= 'f') || + last_char == 'x'; + if (!possible_ipv4) { return false; } + // From the last character, find the last dot. + size_t last_dot = view.rfind('.'); + if (last_dot != std::string_view::npos) { + // We have at least one dot. + view = view.substr(last_dot + 1); + } /** Optimization opportunity: we have basically identified the last number of the ipv4 if we return true here. We might as well parse it and have at least one number parsed when we get to parse_ipv4. */ - if (std::all_of(number.begin(), number.end(), ada::checkers::is_digit)) { + if (std::all_of(view.begin(), view.end(), ada::checkers::is_digit)) { return true; } - return (checkers::has_hex_prefix(number) && - std::all_of(number.begin() + 2, number.end(), - ada::unicode::is_lowercase_hex)); + // It could be hex (0x), but not if there is a single character. + if (view.size() == 1) { + return false; + } + // It must start with 0x. + if (!std::equal(view.begin(), view.begin() + 2, "0x")) { + return false; + } + // We must allow "0x". + if (view.size() == 2) { + return true; + } + // We have 0x followed by some characters, we need to check that they are + // hexadecimals. + return std::all_of(view.begin() + 2, view.end(), + ada::unicode::is_lowercase_hex); } // for use with path_signature, we include all characters that need percent // encoding. -static constexpr uint8_t path_signature_table[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; -static_assert(path_signature_table[uint8_t('?')] == 1); -static_assert(path_signature_table[uint8_t('`')] == 1); -static_assert(path_signature_table[uint8_t('{')] == 1); -static_assert(path_signature_table[uint8_t('}')] == 1); -// -static_assert(path_signature_table[uint8_t(' ')] == 1); -static_assert(path_signature_table[uint8_t('?')] == 1); -static_assert(path_signature_table[uint8_t('"')] == 1); -static_assert(path_signature_table[uint8_t('#')] == 1); -static_assert(path_signature_table[uint8_t('<')] == 1); -static_assert(path_signature_table[uint8_t('>')] == 1); -static_assert(path_signature_table[uint8_t('\\')] == 2); -static_assert(path_signature_table[uint8_t('.')] == 4); -static_assert(path_signature_table[uint8_t('%')] == 8); - -// -static_assert(path_signature_table[0] == 1); -static_assert(path_signature_table[31] == 1); -static_assert(path_signature_table[127] == 1); -static_assert(path_signature_table[128] == 1); -static_assert(path_signature_table[255] == 1); +static constexpr std::array path_signature_table = + []() constexpr { + std::array result{}; + for (size_t i = 0; i < 256; i++) { + if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e || + i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7b || i == 0x7d || + i > 0x7e) { + result[i] = 1; + } else if (i == 0x25) { + result[i] = 8; + } else if (i == 0x2e) { + result[i] = 4; + } else if (i == 0x5c) { + result[i] = 2; + } else { + result[i] = 0; + } + } + return result; + }(); ada_really_inline constexpr uint8_t path_signature( std::string_view input) noexcept { @@ -9912,56 +9929,36 @@ ada_really_inline bool has_tabs_or_newline( // U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>), // U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or // U+007C (|). -constexpr static bool is_forbidden_host_code_point_table[] = { - 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -static_assert(sizeof(is_forbidden_host_code_point_table) == 256); +constexpr static std::array is_forbidden_host_code_point_table = + []() constexpr { + std::array result{}; + for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', + '>', '?', '@', '[', '\\', ']', '^', '|'}) { + result[c] = true; + } + return result; + }(); ada_really_inline constexpr bool is_forbidden_host_code_point( const char c) noexcept { return is_forbidden_host_code_point_table[uint8_t(c)]; } -static_assert(unicode::is_forbidden_host_code_point('\0')); -static_assert(unicode::is_forbidden_host_code_point('\t')); -static_assert(unicode::is_forbidden_host_code_point('\n')); -static_assert(unicode::is_forbidden_host_code_point('\r')); -static_assert(unicode::is_forbidden_host_code_point(' ')); -static_assert(unicode::is_forbidden_host_code_point('#')); -static_assert(unicode::is_forbidden_host_code_point('/')); -static_assert(unicode::is_forbidden_host_code_point(':')); -static_assert(unicode::is_forbidden_host_code_point('?')); -static_assert(unicode::is_forbidden_host_code_point('@')); -static_assert(unicode::is_forbidden_host_code_point('[')); -static_assert(unicode::is_forbidden_host_code_point('?')); -static_assert(unicode::is_forbidden_host_code_point('<')); -static_assert(unicode::is_forbidden_host_code_point('>')); -static_assert(unicode::is_forbidden_host_code_point('\\')); -static_assert(unicode::is_forbidden_host_code_point(']')); -static_assert(unicode::is_forbidden_host_code_point('^')); -static_assert(unicode::is_forbidden_host_code_point('|')); - -constexpr static uint8_t is_forbidden_domain_code_point_table[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +constexpr static std::array is_forbidden_domain_code_point_table = + []() constexpr { + std::array result{}; + for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', + '>', '?', '@', '[', '\\', ']', '^', '|', '%'}) { + result[c] = true; + } + for (uint8_t c = 0; c <= 32; c++) { + result[c] = true; + } + for (size_t c = 127; c < 255; c++) { + result[c] = true; + } + return result; + }(); static_assert(sizeof(is_forbidden_domain_code_point_table) == 256); @@ -9986,22 +9983,24 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point( return accumulator; } -constexpr static uint8_t is_forbidden_domain_code_point_table_or_upper[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - -static_assert(sizeof(is_forbidden_domain_code_point_table_or_upper) == 256); -static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('A')] == 2); -static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('Z')] == 2); +constexpr static std::array + is_forbidden_domain_code_point_table_or_upper = []() constexpr { + std::array result{}; + for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', + '>', '?', '@', '[', '\\', ']', '^', '|', '%'}) { + result[c] = 1; + } + for (uint8_t c = 'A'; c <= 'Z'; c++) { + result[c] = 2; + } + for (uint8_t c = 0; c <= 32; c++) { + result[c] = 1; + } + for (size_t c = 127; c < 255; c++) { + result[c] = 1; + } + return result; + }(); ada_really_inline constexpr uint8_t contains_forbidden_domain_code_point_or_upper(const char* input, @@ -10025,41 +10024,22 @@ contains_forbidden_domain_code_point_or_upper(const char* input, return accumulator; } -static_assert(unicode::is_forbidden_domain_code_point('%')); -static_assert(unicode::is_forbidden_domain_code_point('\x7f')); -static_assert(unicode::is_forbidden_domain_code_point('\0')); -static_assert(unicode::is_forbidden_domain_code_point('\t')); -static_assert(unicode::is_forbidden_domain_code_point('\n')); -static_assert(unicode::is_forbidden_domain_code_point('\r')); -static_assert(unicode::is_forbidden_domain_code_point(' ')); -static_assert(unicode::is_forbidden_domain_code_point('#')); -static_assert(unicode::is_forbidden_domain_code_point('/')); -static_assert(unicode::is_forbidden_domain_code_point(':')); -static_assert(unicode::is_forbidden_domain_code_point('?')); -static_assert(unicode::is_forbidden_domain_code_point('@')); -static_assert(unicode::is_forbidden_domain_code_point('[')); -static_assert(unicode::is_forbidden_domain_code_point('?')); -static_assert(unicode::is_forbidden_domain_code_point('<')); -static_assert(unicode::is_forbidden_domain_code_point('>')); -static_assert(unicode::is_forbidden_domain_code_point('\\')); -static_assert(unicode::is_forbidden_domain_code_point(']')); -static_assert(unicode::is_forbidden_domain_code_point('^')); -static_assert(unicode::is_forbidden_domain_code_point('|')); - -constexpr static bool is_alnum_plus_table[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -static_assert(sizeof(is_alnum_plus_table) == 256); +// std::isalnum(c) || c == '+' || c == '-' || c == '.') is true for +constexpr static std::array is_alnum_plus_table = []() constexpr { + std::array result{}; + for (size_t c = 0; c < 256; c++) { + if (c >= '0' && c <= '9') { + result[c] = true; + } else if (c >= 'a' && c <= 'z') { + result[c] = true; + } else if (c >= 'A' && c <= 'Z') { + result[c] = true; + } else if (c == '+' || c == '-' || c == '.') { + result[c] = true; + } + } + return result; +}(); ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept { return is_alnum_plus_table[uint8_t(c)]; @@ -10067,13 +10047,6 @@ ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept { // following under most compilers: return // return (std::isalnum(c) || c == '+' || c == '-' || c == '.'); } -static_assert(unicode::is_alnum_plus('+')); -static_assert(unicode::is_alnum_plus('-')); -static_assert(unicode::is_alnum_plus('.')); -static_assert(unicode::is_alnum_plus('0')); -static_assert(unicode::is_alnum_plus('1')); -static_assert(unicode::is_alnum_plus('a')); -static_assert(unicode::is_alnum_plus('b')); ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || @@ -10617,155 +10590,302 @@ ada_really_inline void resize(std::string_view& input, size_t pos) noexcept { input.remove_suffix(input.size() - pos); } -// Reverse the byte order. -ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept { - // performance: this often compiles to a single instruction (e.g., bswap) - return ((((val)&0xff00000000000000ull) >> 56) | - (((val)&0x00ff000000000000ull) >> 40) | - (((val)&0x0000ff0000000000ull) >> 24) | - (((val)&0x000000ff00000000ull) >> 8) | - (((val)&0x00000000ff000000ull) << 8) | - (((val)&0x0000000000ff0000ull) << 24) | - (((val)&0x000000000000ff00ull) << 40) | - (((val)&0x00000000000000ffull) << 56)); -} - -ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept { - // performance: under little-endian systems (most systems), this function - // is free (just returns the input). -#if ADA_IS_BIG_ENDIAN - return swap_bytes(val); -#else - return val; // unchanged (trivial) -#endif +// computes the number of trailing zeroes +// this is a private inline function only defined in this source file. +ada_really_inline int trailing_zeroes(uint32_t input_num) noexcept { +#ifdef ADA_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward(&ret, input_num); + return (int)ret; +#else // ADA_REGULAR_VISUAL_STUDIO + return __builtin_ctzl(input_num); +#endif // ADA_REGULAR_VISUAL_STUDIO } // starting at index location, this finds the next location of a character // :, /, \\, ? or [. If none is found, view.size() is returned. // For use within get_host_delimiter_location. +#if ADA_NEON +// The ada_make_uint8x16_t macro is necessary because Visual Studio does not +// support direct initialization of uint8x16_t. See +// https://developercommunity.visualstudio.com/t/error-C2078:-too-many-initializers-whe/402911?q=backend+neon +#ifndef ada_make_uint8x16_t +#define ada_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + static uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) +#endif + ada_really_inline size_t find_next_host_delimiter_special( std::string_view view, size_t location) noexcept { - // performance: if you plan to call find_next_host_delimiter more than once, - // you *really* want find_next_host_delimiter to be inlined, because - // otherwise, the constants may get reloaded each time (bad). - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '\\' || + view[i] == '?' || view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + auto to_bitmask = [](uint8x16_t input) -> uint16_t { + uint8x16_t bit_mask = + ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, + 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + uint8x16_t minput = vandq_u8(input, bit_mask); + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); }; + + // fast path for long strings (expected to be common) size_t i = location; - uint64_t mask1 = broadcast(':'); - uint64_t mask2 = broadcast('/'); - uint64_t mask3 = broadcast('\\'); - uint64_t mask4 = broadcast('?'); - uint64_t mask5 = broadcast('['); - // This loop will get autovectorized under many optimizing compilers, - // so you get actually SIMD! - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - // performance: the next memcpy translates into a single CPU instruction. - memcpy(&word, view.data() + i, sizeof(word)); - // performance: on little-endian systems (most systems), this next line is - // free. - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4) | - has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t low_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x04, 0x04, 0x00, 0x00, 0x03); + uint8x16_t high_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + uint8x16_t fmask = vmovq_n_u8(0xf); + uint8x16_t zero{0}; + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t*)view.data() + i); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return i + trailing_zeroes(is_non_zero); } } + if (i < view.size()) { - uint64_t word{}; - // performance: the next memcpy translates into a function call, but - // that is difficult to avoid. Might be a bit expensive. - memcpy(&word, view.data() + i, view.size() - i); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4) | - has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t word = + vld1q_u8((const uint8_t*)view.data() + view.length() - 16); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return view.length() - 16 + trailing_zeroes(is_non_zero); } } - return view.size(); + return size_t(view.size()); } +#elif ADA_SSE2 +ada_really_inline size_t find_next_host_delimiter_special( + std::string_view view, size_t location) noexcept { + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '\\' || + view[i] == '?' || view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + // fast path for long strings (expected to be common) + size_t i = location; + const __m128i mask1 = _mm_set1_epi8(':'); + const __m128i mask2 = _mm_set1_epi8('/'); + const __m128i mask3 = _mm_set1_epi8('\\'); + const __m128i mask4 = _mm_set1_epi8('?'); + const __m128i mask5 = _mm_set1_epi8('['); + + for (; i + 15 < view.size(); i += 16) { + __m128i word = _mm_loadu_si128((const __m128i*)(view.data() + i)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m3 = _mm_cmpeq_epi8(word, mask3); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128( + _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return i + trailing_zeroes(mask); + } + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128((const __m128i*)(view.data() + view.length() - 16)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m3 = _mm_cmpeq_epi8(word, mask3); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128( + _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return view.length() - 16 + trailing_zeroes(mask); + } + } + return size_t(view.length()); +} +#else +// : / [ \\ ? +static constexpr std::array special_host_delimiters = + []() constexpr { + std::array result{}; + for (int i : {':', '/', '[', '\\', '?'}) { + result[i] = 1; + } + return result; + }(); +// credit: @the-moisrex recommended a table-based approach +ada_really_inline size_t find_next_host_delimiter_special( + std::string_view view, size_t location) noexcept { + auto const str = view.substr(location); + for (auto pos = str.begin(); pos != str.end(); ++pos) { + if (special_host_delimiters[(uint8_t)*pos]) { + return pos - str.begin() + location; + } + } + return size_t(view.size()); +} +#endif // starting at index location, this finds the next location of a character // :, /, ? or [. If none is found, view.size() is returned. // For use within get_host_delimiter_location. +#if ADA_NEON ada_really_inline size_t find_next_host_delimiter(std::string_view view, size_t location) noexcept { - // performance: if you plan to call find_next_host_delimiter more than once, - // you *really* want find_next_host_delimiter to be inlined, because - // otherwise, the constants may get reloaded each time (bad). - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '?' || + view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + auto to_bitmask = [](uint8x16_t input) -> uint16_t { + uint8x16_t bit_mask = + ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, + 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + uint8x16_t minput = vandq_u8(input, bit_mask); + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); }; + + // fast path for long strings (expected to be common) size_t i = location; - uint64_t mask1 = broadcast(':'); - uint64_t mask2 = broadcast('/'); - uint64_t mask4 = broadcast('?'); - uint64_t mask5 = broadcast('['); - // This loop will get autovectorized under many optimizing compilers, - // so you get actually SIMD! - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - // performance: the next memcpy translates into a single CPU instruction. - memcpy(&word, view.data() + i, sizeof(word)); - // performance: on little-endian systems (most systems), this next line is - // free. - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor4) | has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t low_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x04, 0x00, 0x00, 0x00, 0x03); + uint8x16_t high_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + uint8x16_t fmask = vmovq_n_u8(0xf); + uint8x16_t zero{0}; + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t*)view.data() + i); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return i + trailing_zeroes(is_non_zero); + } + } + + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t*)view.data() + view.length() - 16); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return view.length() - 16 + trailing_zeroes(is_non_zero); + } + } + return size_t(view.size()); +} +#elif ADA_SSE2 +ada_really_inline size_t find_next_host_delimiter(std::string_view view, + size_t location) noexcept { + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '?' || + view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + // fast path for long strings (expected to be common) + size_t i = location; + const __m128i mask1 = _mm_set1_epi8(':'); + const __m128i mask2 = _mm_set1_epi8('/'); + const __m128i mask4 = _mm_set1_epi8('?'); + const __m128i mask5 = _mm_set1_epi8('['); + + for (; i + 15 < view.size(); i += 16) { + __m128i word = _mm_loadu_si128((const __m128i*)(view.data() + i)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5)); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return i + trailing_zeroes(mask); } } if (i < view.size()) { - uint64_t word{}; - // performance: the next memcpy translates into a function call, but - // that is difficult to avoid. Might be a bit expensive. - memcpy(&word, view.data() + i, view.size() - i); - // performance: on little-endian systems (most systems), this next line is - // free. - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor4) | has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + __m128i word = + _mm_loadu_si128((const __m128i*)(view.data() + view.length() - 16)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5)); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return view.length() - 16 + trailing_zeroes(mask); + } + } + return size_t(view.length()); +} +#else +// : / [ ? +static constexpr std::array host_delimiters = []() constexpr { + std::array result{}; + for (int i : {':', '/', '?', '['}) { + result[i] = 1; + } + return result; +}(); +// credit: @the-moisrex recommended a table-based approach +ada_really_inline size_t find_next_host_delimiter(std::string_view view, + size_t location) noexcept { + auto const str = view.substr(location); + for (auto pos = str.begin(); pos != str.end(); ++pos) { + if (host_delimiters[(uint8_t)*pos]) { + return pos - str.begin() + location; } } - return view.size(); + return size_t(view.size()); } +#endif ada_really_inline std::pair get_host_delimiter_location( const bool is_special, std::string_view& view) noexcept { @@ -11040,101 +11160,47 @@ ada_really_inline void strip_trailing_spaces_from_opaque_path( url.update_base_pathname(path); } +// @ / \\ ? +static constexpr std::array authority_delimiter_special = + []() constexpr { + std::array result{}; + for (int i : {'@', '/', '\\', '?'}) { + result[i] = 1; + } + return result; + }(); +// credit: @the-moisrex recommended a table-based approach ada_really_inline size_t find_authority_delimiter_special(std::string_view view) noexcept { - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; - }; - size_t i = 0; - uint64_t mask1 = broadcast('@'); - uint64_t mask2 = broadcast('/'); - uint64_t mask3 = broadcast('?'); - uint64_t mask4 = broadcast('\\'); - - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - memcpy(&word, view.data() + i, sizeof(word)); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); - } - } - - if (i < view.size()) { - uint64_t word{}; - memcpy(&word, view.data() + i, view.size() - i); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + // performance note: we might be able to gain further performance + // with SIMD instrinsics. + for (auto pos = view.begin(); pos != view.end(); ++pos) { + if (authority_delimiter_special[(uint8_t)*pos]) { + return pos - view.begin(); } } - - return view.size(); + return size_t(view.size()); } +// @ / ? +static constexpr std::array authority_delimiter = []() constexpr { + std::array result{}; + for (int i : {'@', '/', '?'}) { + result[i] = 1; + } + return result; +}(); +// credit: @the-moisrex recommended a table-based approach ada_really_inline size_t find_authority_delimiter(std::string_view view) noexcept { - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; - }; - size_t i = 0; - uint64_t mask1 = broadcast('@'); - uint64_t mask2 = broadcast('/'); - uint64_t mask3 = broadcast('?'); - - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - memcpy(&word, view.data() + i, sizeof(word)); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t is_match = - has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); - } - } - - if (i < view.size()) { - uint64_t word{}; - memcpy(&word, view.data() + i, view.size() - i); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t is_match = - has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + // performance note: we might be able to gain further performance + // with SIMD instrinsics. + for (auto pos = view.begin(); pos != view.end(); ++pos) { + if (authority_delimiter[(uint8_t)*pos]) { + return pos - view.begin(); } } - - return view.size(); + return size_t(view.size()); } } // namespace ada::helpers @@ -11143,6 +11209,7 @@ namespace ada { ada_warn_unused std::string to_string(ada::state state) { return ada::helpers::get_state(state); } +#undef ada_make_uint8x16_t } // namespace ada /* end file src/helpers.cpp */ /* begin file src/url.cpp */ diff --git a/deps/ada/ada.h b/deps/ada/ada.h index 6d98b37075f892..a4aa39d17046ad 100644 --- a/deps/ada/ada.h +++ b/deps/ada/ada.h @@ -1,4 +1,4 @@ -/* auto-generated on 2023-10-22 19:50:50 -0400. Do not edit! */ +/* auto-generated on 2023-11-19 13:35:02 -0500. Do not edit! */ /* begin file include/ada.h */ /** * @file ada.h @@ -1670,18 +1670,6 @@ template ada_really_inline void strip_trailing_spaces_from_opaque_path( url_type& url) noexcept; -/** - * @private - * Reverse the order of the bytes. - */ -ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept; - -/** - * @private - * Reverse the order of the bytes but only if the system is big endian - */ -ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept; - /** * @private * Finds the delimiter of a view in authority state. @@ -5032,7 +5020,9 @@ ada_really_inline bool begins_with(std::string_view view, std::string_view prefix); /** - * Returns true if an input is an ipv4 address. + * Returns true if an input is an ipv4 address. It is assumed that the string + * does not contain uppercase ASCII characters (the input should have been + * lowered cased before calling this function) and is not empty. */ ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept; @@ -7088,14 +7078,14 @@ url_search_params_entries_iter::next() { #ifndef ADA_ADA_VERSION_H #define ADA_ADA_VERSION_H -#define ADA_VERSION "2.7.2" +#define ADA_VERSION "2.7.4" namespace ada { enum { ADA_VERSION_MAJOR = 2, ADA_VERSION_MINOR = 7, - ADA_VERSION_REVISION = 2, + ADA_VERSION_REVISION = 4, }; } // namespace ada diff --git a/doc/contributing/maintaining/maintaining-dependencies.md b/doc/contributing/maintaining/maintaining-dependencies.md index bc4c73f8105a9e..74791922b655fe 100644 --- a/doc/contributing/maintaining/maintaining-dependencies.md +++ b/doc/contributing/maintaining/maintaining-dependencies.md @@ -9,7 +9,7 @@ All dependencies are located within the `deps` directory. This a list of all the dependencies: * [acorn 8.11.2][] -* [ada 2.7.2][] +* [ada 2.7.4][] * [base64 0.5.1][] * [brotli 1.0.9][] * [c-ares 1.20.1][] @@ -151,7 +151,7 @@ The [acorn](/~https://github.com/acornjs/acorn) dependency is a JavaScript parser. [acorn-walk](/~https://github.com/acornjs/acorn/tree/master/acorn-walk) is an abstract syntax tree walker for the ESTree format. -### ada 2.7.2 +### ada 2.7.4 The [ada](/~https://github.com/ada-url/ada) dependency is a fast and spec-compliant URL parser written in C++. @@ -325,7 +325,7 @@ it comes from the Chromium team's zlib fork which incorporated performance improvements not currently available in standard zlib. [acorn 8.11.2]: #acorn-8112 -[ada 2.7.2]: #ada-272 +[ada 2.7.4]: #ada-274 [base64 0.5.1]: #base64-051 [brotli 1.0.9]: #brotli-109 [c-ares 1.20.1]: #c-ares-1201