diff --git a/tools/inspector_protocol/code_generator.py b/tools/inspector_protocol/code_generator.py index c1f78dc7492d78..0b8baea0ae710e 100755 --- a/tools/inspector_protocol/code_generator.py +++ b/tools/inspector_protocol/code_generator.py @@ -103,6 +103,17 @@ def init_defaults(config_tuple, path, defaults): ".lib": False, ".lib.export_macro": "", ".lib.export_header": False, + # The encoding lib consists of encoding/encoding.h and + # encoding/encoding.cc in its subdirectory, which binaries + # may link / depend on, instead of relying on the + # JINJA2 templates lib/encoding_{h,cc}.template. + # In that case, |header| identifies the include file + # and |namespace| is the namespace it's using. Usually + # inspector_protocol_encoding but for v8's copy it's + # v8_inspector_protocol_encoding. + # TODO(johannes): Migrate away from lib/encoding_{h,cc}.template + # in favor of this. + ".encoding_lib": { "header": "", "namespace": []}, } for key_value in config_values: parts = key_value.split("=") diff --git a/tools/inspector_protocol/encoding/encoding.cc b/tools/inspector_protocol/encoding/encoding.cc index 353316a555373d..1513767a85592b 100644 --- a/tools/inspector_protocol/encoding/encoding.cc +++ b/tools/inspector_protocol/encoding/encoding.cc @@ -185,11 +185,10 @@ namespace internals { // |type| is the major type as specified in RFC 7049 Section 2.1. // |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size // (e.g. for BYTE_STRING). -// If successful, returns the number of bytes read. Otherwise returns -1. -// TODO(johannes): change return type to size_t and use 0 for error. -int8_t ReadTokenStart(span bytes, MajorType* type, uint64_t* value) { +// If successful, returns the number of bytes read. Otherwise returns 0. +size_t ReadTokenStart(span bytes, MajorType* type, uint64_t* value) { if (bytes.empty()) - return -1; + return 0; uint8_t initial_byte = bytes[0]; *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift); @@ -203,32 +202,32 @@ int8_t ReadTokenStart(span bytes, MajorType* type, uint64_t* value) { if (additional_information == kAdditionalInformation1Byte) { // Values 24-255 are encoded with one initial byte, followed by the value. if (bytes.size() < 2) - return -1; + return 0; *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); return 2; } if (additional_information == kAdditionalInformation2Bytes) { // Values 256-65535: 1 initial byte + 2 bytes payload. if (bytes.size() < 1 + sizeof(uint16_t)) - return -1; + return 0; *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); return 3; } if (additional_information == kAdditionalInformation4Bytes) { // 32 bit uint: 1 initial byte + 4 bytes payload. if (bytes.size() < 1 + sizeof(uint32_t)) - return -1; + return 0; *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); return 5; } if (additional_information == kAdditionalInformation8Bytes) { // 64 bit uint: 1 initial byte + 8 bytes payload. if (bytes.size() < 1 + sizeof(uint64_t)) - return -1; + return 0; *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); return 9; } - return -1; + return 0; } // Writes the start of a token with |type|. The |value| may indicate the size, @@ -770,10 +769,10 @@ void CBORTokenizer::ReadNextToken(bool enter_envelope) { SetToken(CBORTokenTag::NULL_VALUE, 1); return; case kExpectedConversionToBase64Tag: { // BINARY - const int8_t bytes_read = internals::ReadTokenStart( + const size_t bytes_read = internals::ReadTokenStart( bytes_.subspan(status_.pos + 1), &token_start_type_, &token_start_internal_value_); - if (bytes_read < 0 || token_start_type_ != MajorType::BYTE_STRING || + if (!bytes_read || token_start_type_ != MajorType::BYTE_STRING || token_start_internal_value_ > kMaxValidLength) { SetError(Error::CBOR_INVALID_BINARY); return; @@ -823,48 +822,47 @@ void CBORTokenizer::ReadNextToken(bool enter_envelope) { return; } default: { - const int8_t token_start_length = internals::ReadTokenStart( + const size_t bytes_read = internals::ReadTokenStart( bytes_.subspan(status_.pos), &token_start_type_, &token_start_internal_value_); - const bool success = token_start_length >= 0; switch (token_start_type_) { case MajorType::UNSIGNED: // INT32. // INT32 is a signed int32 (int32 makes sense for the // inspector_protocol, it's not a CBOR limitation), so we check // against the signed max, so that the allowable values are // 0, 1, 2, ... 2^31 - 1. - if (!success || - static_cast(std::numeric_limits::max()) < - static_cast(token_start_internal_value_)) { + if (!bytes_read || std::numeric_limits::max() < + token_start_internal_value_) { SetError(Error::CBOR_INVALID_INT32); return; } - SetToken(CBORTokenTag::INT32, token_start_length); + SetToken(CBORTokenTag::INT32, bytes_read); return; case MajorType::NEGATIVE: { // INT32. // INT32 is a signed int32 (int32 makes sense for the // inspector_protocol, it's not a CBOR limitation); in CBOR, the // negative values for INT32 are represented as NEGATIVE, that is, -1 // INT32 is represented as 1 << 5 | 0 (major type 1, additional info - // value 0). The minimal allowed INT32 value in our protocol is - // std::numeric_limits::min(). We check for it by directly - // checking the payload against the maximal allowed signed (!) int32 - // value. - if (!success || token_start_internal_value_ > - std::numeric_limits::max()) { + // value 0). + // The represented allowed values range is -1 to -2^31. + // They are mapped into the encoded range of 0 to 2^31-1. + // We check the the payload in token_start_internal_value_ against + // that range (2^31-1 is also known as + // std::numeric_limits::max()). + if (!bytes_read || token_start_internal_value_ > + std::numeric_limits::max()) { SetError(Error::CBOR_INVALID_INT32); return; } - SetToken(CBORTokenTag::INT32, token_start_length); + SetToken(CBORTokenTag::INT32, bytes_read); return; } case MajorType::STRING: { // STRING8. - if (!success || token_start_internal_value_ > kMaxValidLength) { + if (!bytes_read || token_start_internal_value_ > kMaxValidLength) { SetError(Error::CBOR_INVALID_STRING8); return; } - uint64_t token_byte_length = - token_start_internal_value_ + token_start_length; + uint64_t token_byte_length = token_start_internal_value_ + bytes_read; if (token_byte_length > remaining_bytes) { SetError(Error::CBOR_INVALID_STRING8); return; @@ -876,13 +874,12 @@ void CBORTokenizer::ReadNextToken(bool enter_envelope) { case MajorType::BYTE_STRING: { // STRING16. // Length must be divisible by 2 since UTF16 is 2 bytes per // character, hence the &1 check. - if (!success || token_start_internal_value_ > kMaxValidLength || + if (!bytes_read || token_start_internal_value_ > kMaxValidLength || token_start_internal_value_ & 1) { SetError(Error::CBOR_INVALID_STRING16); return; } - uint64_t token_byte_length = - token_start_internal_value_ + token_start_length; + uint64_t token_byte_length = token_start_internal_value_ + bytes_read; if (token_byte_length > remaining_bytes) { SetError(Error::CBOR_INVALID_STRING16); return; diff --git a/tools/inspector_protocol/encoding/encoding.h b/tools/inspector_protocol/encoding/encoding.h index 90916d42b36dae..08596e9e1e43f0 100644 --- a/tools/inspector_protocol/encoding/encoding.h +++ b/tools/inspector_protocol/encoding/encoding.h @@ -427,7 +427,7 @@ Status AppendString8EntryToCBORMap(span string8_key, std::string* cbor); namespace internals { // Exposed only for writing tests. -int8_t ReadTokenStart(span bytes, +size_t ReadTokenStart(span bytes, cbor::MajorType* type, uint64_t* value); diff --git a/tools/inspector_protocol/encoding/encoding_test.cc b/tools/inspector_protocol/encoding/encoding_test.cc index 338d1ece10b87f..6893fe2581683c 100644 --- a/tools/inspector_protocol/encoding/encoding_test.cc +++ b/tools/inspector_protocol/encoding/encoding_test.cc @@ -235,7 +235,9 @@ TEST(EncodeDecodeInt32Test, RoundtripsInt32Max) { } TEST(EncodeDecodeInt32Test, RoundtripsInt32Min) { - // std::numeric_limits is encoded as a uint32 after the initial byte. + // std::numeric_limits is encoded as a uint32 (4 unsigned bytes) + // after the initial byte, which effectively carries the sign by + // designating the token as NEGATIVE. std::vector encoded; EncodeInt32(std::numeric_limits::min(), &encoded); // 1 for initial byte, 4 for the uint32. @@ -248,6 +250,10 @@ TEST(EncodeDecodeInt32Test, RoundtripsInt32Min) { CBORTokenizer tokenizer(SpanFrom(encoded)); EXPECT_EQ(CBORTokenTag::INT32, tokenizer.TokenTag()); EXPECT_EQ(std::numeric_limits::min(), tokenizer.GetInt32()); + // It's nice to see how the min int32 value reads in hex: + // That is, -1 minus the unsigned payload (0x7fffffff, see above). + int32_t expected = -1 - 0x7fffffff; + EXPECT_EQ(expected, tokenizer.GetInt32()); tokenizer.Next(); EXPECT_EQ(CBORTokenTag::DONE, tokenizer.TokenTag()); } diff --git a/tools/inspector_protocol/lib/Values_cpp.template b/tools/inspector_protocol/lib/Values_cpp.template index 17c69255851ee7..8b4dfc91e3b9c9 100644 --- a/tools/inspector_protocol/lib/Values_cpp.template +++ b/tools/inspector_protocol/lib/Values_cpp.template @@ -6,6 +6,10 @@ //#include "Values.h" +{% if config.encoding_lib.header %} +#include "{{config.encoding_lib.header}}" +{% endif %} + {% for namespace in config.protocol.namespace %} namespace {{namespace}} { {% endfor %} @@ -64,6 +68,30 @@ void escapeStringForJSONInternal(const Char* str, unsigned len, // to this constant. static constexpr int kStackLimitValues = 1000; +{% if config.encoding_lib.namespace %} +using {{"::".join(config.encoding_lib.namespace)}}::Error; +using {{"::".join(config.encoding_lib.namespace)}}::Status; +using {{"::".join(config.encoding_lib.namespace)}}::span; +namespace cbor { +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::CBORTokenTag; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::CBORTokenizer; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeBinary; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeDouble; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeFalse; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeFromLatin1; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeFromUTF16; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeIndefiniteLengthArrayStart; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeIndefiniteLengthMapStart; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeInt32; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeNull; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeStop; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeString8; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EncodeTrue; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::EnvelopeEncoder; +using {{"::".join(config.encoding_lib.namespace + ['cbor'])}}::InitialByteForEnvelope; +} // namespace cbor +{% endif %} + // Below are three parsing routines for CBOR, which cover enough // to roundtrip JSON messages. std::unique_ptr parseMap(int32_t stack_depth, cbor::CBORTokenizer* tokenizer); diff --git a/tools/inspector_protocol/lib/encoding_cpp.template b/tools/inspector_protocol/lib/encoding_cpp.template index 2fc7dd623fdcc3..e950acd6a6f34d 100644 --- a/tools/inspector_protocol/lib/encoding_cpp.template +++ b/tools/inspector_protocol/lib/encoding_cpp.template @@ -5,6 +5,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +{% if config.encoding_lib.header == "" %} #include #include @@ -192,11 +193,10 @@ namespace internals { // |type| is the major type as specified in RFC 7049 Section 2.1. // |value| is the payload (e.g. for MajorType::UNSIGNED) or is the size // (e.g. for BYTE_STRING). -// If successful, returns the number of bytes read. Otherwise returns -1. -// TODO(johannes): change return type to size_t and use 0 for error. -int8_t ReadTokenStart(span bytes, MajorType* type, uint64_t* value) { +// If successful, returns the number of bytes read. Otherwise returns 0. +size_t ReadTokenStart(span bytes, MajorType* type, uint64_t* value) { if (bytes.empty()) - return -1; + return 0; uint8_t initial_byte = bytes[0]; *type = MajorType((initial_byte & kMajorTypeMask) >> kMajorTypeBitShift); @@ -210,32 +210,32 @@ int8_t ReadTokenStart(span bytes, MajorType* type, uint64_t* value) { if (additional_information == kAdditionalInformation1Byte) { // Values 24-255 are encoded with one initial byte, followed by the value. if (bytes.size() < 2) - return -1; + return 0; *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); return 2; } if (additional_information == kAdditionalInformation2Bytes) { // Values 256-65535: 1 initial byte + 2 bytes payload. if (bytes.size() < 1 + sizeof(uint16_t)) - return -1; + return 0; *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); return 3; } if (additional_information == kAdditionalInformation4Bytes) { // 32 bit uint: 1 initial byte + 4 bytes payload. if (bytes.size() < 1 + sizeof(uint32_t)) - return -1; + return 0; *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); return 5; } if (additional_information == kAdditionalInformation8Bytes) { // 64 bit uint: 1 initial byte + 8 bytes payload. if (bytes.size() < 1 + sizeof(uint64_t)) - return -1; + return 0; *value = ReadBytesMostSignificantByteFirst(bytes.subspan(1)); return 9; } - return -1; + return 0; } // Writes the start of a token with |type|. The |value| may indicate the size, @@ -777,10 +777,10 @@ void CBORTokenizer::ReadNextToken(bool enter_envelope) { SetToken(CBORTokenTag::NULL_VALUE, 1); return; case kExpectedConversionToBase64Tag: { // BINARY - const int8_t bytes_read = internals::ReadTokenStart( + const size_t bytes_read = internals::ReadTokenStart( bytes_.subspan(status_.pos + 1), &token_start_type_, &token_start_internal_value_); - if (bytes_read < 0 || token_start_type_ != MajorType::BYTE_STRING || + if (!bytes_read || token_start_type_ != MajorType::BYTE_STRING || token_start_internal_value_ > kMaxValidLength) { SetError(Error::CBOR_INVALID_BINARY); return; @@ -830,47 +830,47 @@ void CBORTokenizer::ReadNextToken(bool enter_envelope) { return; } default: { - const int8_t token_start_length = internals::ReadTokenStart( + const size_t bytes_read = internals::ReadTokenStart( bytes_.subspan(status_.pos), &token_start_type_, &token_start_internal_value_); - const bool success = token_start_length >= 0; switch (token_start_type_) { case MajorType::UNSIGNED: // INT32. // INT32 is a signed int32 (int32 makes sense for the // inspector_protocol, it's not a CBOR limitation), so we check // against the signed max, so that the allowable values are // 0, 1, 2, ... 2^31 - 1. - if (!success || - static_cast(std::numeric_limits::max()) < - static_cast(token_start_internal_value_)) { + if (!bytes_read || std::numeric_limits::max() < + token_start_internal_value_) { SetError(Error::CBOR_INVALID_INT32); return; } - SetToken(CBORTokenTag::INT32, token_start_length); + SetToken(CBORTokenTag::INT32, bytes_read); return; case MajorType::NEGATIVE: { // INT32. // INT32 is a signed int32 (int32 makes sense for the - // inspector_protocol, it's not a CBOR limitation); in CBOR, - // the negative values for INT32 are represented as NEGATIVE, - // that is, -1 INT32 is represented as 1 << 5 | 0 (major type 1, - // additional info value 0). So here, we compute the INT32 value - // and then check it against the INT32 min. - int64_t actual_value = - -static_cast(token_start_internal_value_) - 1; - if (!success || actual_value < std::numeric_limits::min()) { + // inspector_protocol, it's not a CBOR limitation); in CBOR, the + // negative values for INT32 are represented as NEGATIVE, that is, -1 + // INT32 is represented as 1 << 5 | 0 (major type 1, additional info + // value 0). + // The represented allowed values range is -1 to -2^31. + // They are mapped into the encoded range of 0 to 2^31-1. + // We check the the payload in token_start_internal_value_ against + // that range (2^31-1 is also known as + // std::numeric_limits::max()). + if (!bytes_read || token_start_internal_value_ > + std::numeric_limits::max()) { SetError(Error::CBOR_INVALID_INT32); return; } - SetToken(CBORTokenTag::INT32, token_start_length); + SetToken(CBORTokenTag::INT32, bytes_read); return; } case MajorType::STRING: { // STRING8. - if (!success || token_start_internal_value_ > kMaxValidLength) { + if (!bytes_read || token_start_internal_value_ > kMaxValidLength) { SetError(Error::CBOR_INVALID_STRING8); return; } - uint64_t token_byte_length = - token_start_internal_value_ + token_start_length; + uint64_t token_byte_length = token_start_internal_value_ + bytes_read; if (token_byte_length > remaining_bytes) { SetError(Error::CBOR_INVALID_STRING8); return; @@ -882,13 +882,12 @@ void CBORTokenizer::ReadNextToken(bool enter_envelope) { case MajorType::BYTE_STRING: { // STRING16. // Length must be divisible by 2 since UTF16 is 2 bytes per // character, hence the &1 check. - if (!success || token_start_internal_value_ > kMaxValidLength || + if (!bytes_read || token_start_internal_value_ > kMaxValidLength || token_start_internal_value_ & 1) { SetError(Error::CBOR_INVALID_STRING16); return; } - uint64_t token_byte_length = - token_start_internal_value_ + token_start_length; + uint64_t token_byte_length = token_start_internal_value_ + bytes_read; if (token_byte_length > remaining_bytes) { SetError(Error::CBOR_INVALID_STRING16); return; @@ -1864,7 +1863,7 @@ class JsonParser { // If the |Char| we're dealing with is really a byte, then // we have utf8 here, and we need to check for multibyte characters // and transcode them to utf16 (either one or two utf16 chars). - if (sizeof(Char) == sizeof(uint8_t) && c >= 0x7f) { + if (sizeof(Char) == sizeof(uint8_t) && c > 0x7f) { // Inspect the leading byte to figure out how long the utf8 // byte sequence is; while doing this initialize |codepoint| // with the first few bits. @@ -1903,7 +1902,7 @@ class JsonParser { // Disallow overlong encodings for ascii characters, as these // would include " and other characters significant to JSON // string termination / control. - if (codepoint < 0x7f) + if (codepoint <= 0x7f) return false; // Invalid in UTF8, and can't be represented in UTF16 anyway. if (codepoint > 0x10ffff) @@ -2198,3 +2197,5 @@ Status ConvertJSONToCBOR(const Platform& platform, {% for namespace in config.protocol.namespace %} } // namespace {{namespace}} {% endfor %} + +{% endif %} diff --git a/tools/inspector_protocol/lib/encoding_h.template b/tools/inspector_protocol/lib/encoding_h.template index f1a52a1958a14d..2c6cfc10d594c2 100644 --- a/tools/inspector_protocol/lib/encoding_h.template +++ b/tools/inspector_protocol/lib/encoding_h.template @@ -5,6 +5,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +{% if config.encoding_lib.header == "" %} #ifndef {{"_".join(config.protocol.namespace)}}_encoding_h #define {{"_".join(config.protocol.namespace)}}_encoding_h @@ -435,7 +436,7 @@ Status AppendString8EntryToCBORMap(span string8_key, std::string* cbor); namespace internals { // Exposed only for writing tests. -int8_t ReadTokenStart(span bytes, +size_t ReadTokenStart(span bytes, cbor::MajorType* type, uint64_t* value); @@ -518,3 +519,4 @@ Status ConvertJSONToCBOR(const Platform& platform, } // namespace {{namespace}} {% endfor %} #endif // !defined({{"_".join(config.protocol.namespace)}}_encoding_h) +{% endif %}