Skip to content

Commit

Permalink
util: add fast path for text-decoder fatal flag
Browse files Browse the repository at this point in the history
PR-URL: #45803
Reviewed-By: Robert Nagy <ronagy@icloud.com>
Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: Michael Dawson <midawson@redhat.com>
  • Loading branch information
anonrig authored and RafaelGSS committed Jan 4, 2023
1 parent f537eac commit 73209f0
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 9 deletions.
11 changes: 8 additions & 3 deletions benchmark/util/text-decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ const common = require('../common.js');
const bench = common.createBenchmark(main, {
encoding: ['utf-8', 'latin1', 'iso-8859-3'],
ignoreBOM: [0, 1],
fatal: [0, 1],
len: [256, 1024 * 16, 1024 * 512],
n: [1e2],
type: ['SharedArrayBuffer', 'ArrayBuffer', 'Buffer']
});

function main({ encoding, len, n, ignoreBOM, type }) {
const decoder = new TextDecoder(encoding, { ignoreBOM });
function main({ encoding, len, n, ignoreBOM, type, fatal }) {
const decoder = new TextDecoder(encoding, { ignoreBOM, fatal });
let buf;

switch (type) {
Expand All @@ -31,7 +32,11 @@ function main({ encoding, len, n, ignoreBOM, type }) {

bench.start();
for (let i = 0; i < n; i++) {
decoder.decode(buf);
try {
decoder.decode(buf);
} catch {
// eslint-disable no-empty
}
}
bench.end(n);
}
12 changes: 6 additions & 6 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const kFlags = Symbol('flags');
const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kEncoder = Symbol('encoder');
const kFatal = Symbol('kFatal');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');

Expand Down Expand Up @@ -396,17 +397,16 @@ function makeTextDecoderICU() {
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

// Only support fast path for UTF-8 without FATAL flag
const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);

this[kDecoder] = true;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kUTF8FastPath] = fastPathAvailable;
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
this[kUTF8FastPath] = enc === 'utf-8';
this[kHandle] = undefined;

if (!fastPathAvailable) {
if (!this[kUTF8FastPath]) {
this.#prepareConverter();
}
}
Expand All @@ -425,7 +425,7 @@ function makeTextDecoderICU() {
this[kUTF8FastPath] &&= !(options?.stream);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM]);
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}

this.#prepareConverter();
Expand Down
11 changes: 11 additions & 0 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "node_internals.h"

#include "env-inl.h"
#include "simdutf.h"
#include "string_bytes.h"
#include "string_search.h"
#include "util-inl.h"
Expand Down Expand Up @@ -583,10 +584,20 @@ void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
ArrayBufferViewContents<char> buffer(args[0]);

bool ignore_bom = args[1]->IsTrue();
bool has_fatal = args[2]->IsTrue();

const char* data = buffer.data();
size_t length = buffer.length();

if (has_fatal) {
auto result = simdutf::validate_utf8_with_errors(data, length);

if (result.error) {
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
env->isolate(), "The encoded data was not valid for encoding utf-8");
}
}

if (!ignore_bom && length >= 3) {
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
data += 3;
Expand Down

0 comments on commit 73209f0

Please sign in to comment.