-
Notifications
You must be signed in to change notification settings - Fork 2.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Carry over node isIPv4/isIPv6 performance improvements #8271
feat: Carry over node isIPv4/isIPv6 performance improvements #8271
Conversation
Copy over the `isIPv4`/`isIPv6` performance improvements from <nodejs/node#49568>.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
JavaScriptCore is a different engine, so the performance may be different. Do you have a benchmark that shows this regex is faster than the previous?
Sure, that makes sense, since the regex engine is likely to be quite different. The new code is also faster in bun except for some specific cases. Here are mitata based benchmarks:import { baseline, bench, group, run } from 'mitata';
import assert from 'node:assert';
const v4Seg_OLD = '(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])';
const v4Str_OLD = `(${v4Seg_OLD}[.]){3}${v4Seg_OLD}`;
const IPv4Reg_OLD = new RegExp(`^${v4Str_OLD}$`);
const v6Seg_OLD = '(?:[0-9a-fA-F]{1,4})';
const IPv6Reg_OLD = new RegExp(
'^(' +
`(?:${v6Seg_OLD}:){7}(?:${v6Seg_OLD}|:)|` +
`(?:${v6Seg_OLD}:){6}(?:${v4Str_OLD}|:${v6Seg_OLD}|:)|` +
`(?:${v6Seg_OLD}:){5}(?::${v4Str_OLD}|(:${v6Seg_OLD}){1,2}|:)|` +
`(?:${v6Seg_OLD}:){4}(?:(:${v6Seg_OLD}){0,1}:${v4Str_OLD}|(:${v6Seg_OLD}){1,3}|:)|` +
`(?:${v6Seg_OLD}:){3}(?:(:${v6Seg_OLD}){0,2}:${v4Str_OLD}|(:${v6Seg_OLD}){1,4}|:)|` +
`(?:${v6Seg_OLD}:){2}(?:(:${v6Seg_OLD}){0,3}:${v4Str_OLD}|(:${v6Seg_OLD}){1,5}|:)|` +
`(?:${v6Seg_OLD}:){1}(?:(:${v6Seg_OLD}){0,4}:${v4Str_OLD}|(:${v6Seg_OLD}){1,6}|:)|` +
`(?::((?::${v6Seg_OLD}){0,5}:${v4Str_OLD}|(?::${v6Seg_OLD}){1,7}|:))` +
')(%[0-9a-zA-Z-.:]{1,})?$',
);
function isIPv4_OLD(s) {
return IPv4Reg_OLD.test(s);
}
function isIPv6_OLD(s) {
return IPv6Reg_OLD.test(s);
}
function isIP_OLD(s) {
if (isIPv4_OLD(s)) return 4;
if (isIPv6_OLD(s)) return 6;
return 0;
}
const v4Seg_NEW = '(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])';
const v4Str_NEW = `(?:${v4Seg_NEW}\\.){3}${v4Seg_NEW}`;
const IPv4Reg_NEW = new RegExp(`^${v4Str_NEW}$`);
const v6Seg_NEW = '(?:[0-9a-fA-F]{1,4})';
const IPv6Reg2_NEW = new RegExp(
'^(?:' +
`(?:${v6Seg_NEW}:){7}(?:${v6Seg_NEW}|:)|` +
`(?:${v6Seg_NEW}:){6}(?:${v4Str_NEW}|:${v6Seg_NEW}|:)|` +
`(?:${v6Seg_NEW}:){5}(?::${v4Str_NEW}|(?::${v6Seg_NEW}){1,2}|:)|` +
`(?:${v6Seg_NEW}:){4}(?:(?::${v6Seg_NEW}){0,1}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,3}|:)|` +
`(?:${v6Seg_NEW}:){3}(?:(?::${v6Seg_NEW}){0,2}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,4}|:)|` +
`(?:${v6Seg_NEW}:){2}(?:(?::${v6Seg_NEW}){0,3}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,5}|:)|` +
`(?:${v6Seg_NEW}:){1}(?:(?::${v6Seg_NEW}){0,4}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,6}|:)|` +
`(?::(?:(?::${v6Seg_NEW}){0,5}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,7}|:))` +
')(?:%[0-9a-zA-Z-.:]{1,})?$',
);
function isIPv4_NEW(s) {
return IPv4Reg_NEW.test(s);
}
function isIPv6_NEW(s) {
return IPv6Reg2_NEW.test(s);
}
function isIP_NEW(s) {
if (isIPv4_NEW(s)) return 4;
if (isIPv6_NEW(s)) return 6;
return 0;
}
const v4_ips = [
'0.0.0.0',
'127.0.0.1',
'255.255.255.255',
'192.168.0.1',
'192.0.2.33',
];
const v6_ips = [
'::1',
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff',
'2001:db8:85a3:0:0:8a2e:370:7334',
'::ffff:192.0.2.33',
];
const invalid_ips = [
'',
'.',
'0.0.0',
'10.168.209.250.1',
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffg',
];
baseline('baseline', () => 0); // prevent some first run bias
group('isIPv4 true', () => {
for (const ip of v4_ips) {
assert.ok(isIPv4_NEW(ip));
assert.ok(isIPv4_OLD(ip));
bench(`NEW ${ip}`, () => isIPv4_NEW(ip));
bench(`OLD ${ip}`, () => isIPv4_OLD(ip));
}
});
group('isIPv4 false', () => {
for (const ip of [...v6_ips, ...invalid_ips]) {
assert.ok(!isIPv4_NEW(ip));
assert.ok(!isIPv4_OLD(ip));
bench(`OLD ${ip}`, () => isIPv4_OLD(ip));
bench(`NEW ${ip}`, () => isIPv4_NEW(ip));
}
});
group('isIPv6 true', () => {
for (const ip of v6_ips) {
assert.ok(isIPv6_NEW(ip));
assert.ok(isIPv6_OLD(ip));
bench(`NEW ${ip}`, () => isIPv6_NEW(ip));
bench(`OLD ${ip}`, () => isIPv6_OLD(ip));
}
});
group('isIPv6 false', () => {
for (const ip of [...v4_ips, ...invalid_ips]) {
assert.ok(!isIPv6_NEW(ip));
assert.ok(!isIPv6_OLD(ip));
bench(`OLD ${ip}`, () => isIPv6_OLD(ip));
bench(`NEW ${ip}`, () => isIPv6_NEW(ip));
}
});
group('isIP true', () => {
for (const ip of v4_ips) {
assert.strictEqual(isIP_NEW(ip), 4);
assert.strictEqual(isIP_OLD(ip), 4);
bench(`NEW ${ip}`, () => isIP_NEW(ip));
bench(`OLD ${ip}`, () => isIP_OLD(ip));
}
for (const ip of v6_ips) {
assert.strictEqual(isIP_NEW(ip), 6);
assert.strictEqual(isIP_OLD(ip), 6);
bench(`NEW ${ip}`, () => isIP_NEW(ip));
bench(`OLD ${ip}`, () => isIP_OLD(ip));
}
});
group('isIP false', () => {
for (const ip of invalid_ips) {
assert.strictEqual(isIP_NEW(ip), 0);
assert.strictEqual(isIP_OLD(ip), 0);
bench(`NEW ${ip}`, () => isIP_NEW(ip));
bench(`OLD ${ip}`, () => isIP_OLD(ip));
}
});
await run(); And the mitata results:
I sometimes get unreliable results when using Here are benchmark based benchmarks:import Benchmark from 'benchmark';
import assert from 'node:assert';
// Original regexes
const v4Seg_OLD = '(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])';
const v4Str_OLD = `(${v4Seg_OLD}[.]){3}${v4Seg_OLD}`;
const IPv4Reg_OLD = new RegExp(`^${v4Str_OLD}$`);
const v6Seg_OLD = '(?:[0-9a-fA-F]{1,4})';
const IPv6Reg_OLD = new RegExp(
'^(' +
`(?:${v6Seg_OLD}:){7}(?:${v6Seg_OLD}|:)|` +
`(?:${v6Seg_OLD}:){6}(?:${v4Str_OLD}|:${v6Seg_OLD}|:)|` +
`(?:${v6Seg_OLD}:){5}(?::${v4Str_OLD}|(:${v6Seg_OLD}){1,2}|:)|` +
`(?:${v6Seg_OLD}:){4}(?:(:${v6Seg_OLD}){0,1}:${v4Str_OLD}|(:${v6Seg_OLD}){1,3}|:)|` +
`(?:${v6Seg_OLD}:){3}(?:(:${v6Seg_OLD}){0,2}:${v4Str_OLD}|(:${v6Seg_OLD}){1,4}|:)|` +
`(?:${v6Seg_OLD}:){2}(?:(:${v6Seg_OLD}){0,3}:${v4Str_OLD}|(:${v6Seg_OLD}){1,5}|:)|` +
`(?:${v6Seg_OLD}:){1}(?:(:${v6Seg_OLD}){0,4}:${v4Str_OLD}|(:${v6Seg_OLD}){1,6}|:)|` +
`(?::((?::${v6Seg_OLD}){0,5}:${v4Str_OLD}|(?::${v6Seg_OLD}){1,7}|:))` +
')(%[0-9a-zA-Z-.:]{1,})?$',
);
function isIPv4_OLD(s) {
return IPv4Reg_OLD.test(s);
}
function isIPv6_OLD(s) {
return IPv6Reg_OLD.test(s);
}
function isIP_OLD(s) {
if (isIPv4_OLD(s)) return 4;
if (isIPv6_OLD(s)) return 6;
return 0;
}
// With performance optimizations from node.js project PR
const v4Seg_NEW = '(?:25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])';
const v4Str_NEW = `(?:${v4Seg_NEW}\\.){3}${v4Seg_NEW}`;
const IPv4Reg_NEW = new RegExp(`^${v4Str_NEW}$`);
const v6Seg_NEW = '(?:[0-9a-fA-F]{1,4})';
const IPv6Reg2_NEW = new RegExp(
'^(?:' +
`(?:${v6Seg_NEW}:){7}(?:${v6Seg_NEW}|:)|` +
`(?:${v6Seg_NEW}:){6}(?:${v4Str_NEW}|:${v6Seg_NEW}|:)|` +
`(?:${v6Seg_NEW}:){5}(?::${v4Str_NEW}|(?::${v6Seg_NEW}){1,2}|:)|` +
`(?:${v6Seg_NEW}:){4}(?:(?::${v6Seg_NEW}){0,1}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,3}|:)|` +
`(?:${v6Seg_NEW}:){3}(?:(?::${v6Seg_NEW}){0,2}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,4}|:)|` +
`(?:${v6Seg_NEW}:){2}(?:(?::${v6Seg_NEW}){0,3}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,5}|:)|` +
`(?:${v6Seg_NEW}:){1}(?:(?::${v6Seg_NEW}){0,4}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,6}|:)|` +
`(?::(?:(?::${v6Seg_NEW}){0,5}:${v4Str_NEW}|(?::${v6Seg_NEW}){1,7}|:))` +
')(?:%[0-9a-zA-Z-.:]{1,})?$',
);
function isIPv4_NEW(s) {
return IPv4Reg_NEW.test(s);
}
function isIPv6_NEW(s) {
return IPv6Reg2_NEW.test(s);
}
function isIP_NEW(s) {
if (isIPv4_NEW(s)) return 4;
if (isIPv6_NEW(s)) return 6;
return 0;
}
function run(group) {
for (const cases of group) {
const suite = new Benchmark.Suite();
for (const [name, fn] of cases) {
suite.add(name, fn);
}
suite
.on('cycle', function (event) {
console.log(String(event.target));
})
.on('complete', function () {
console.log('Fastest is ' + this.filter('fastest').map('name') + '\n');
})
.run();
}
}
const v4_ips = [
'0.0.0.0',
'127.0.0.1',
'255.255.255.255',
'192.168.0.1',
'192.0.2.33',
];
const v6_ips = [
'::1',
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff',
'2001:db8:85a3:0:0:8a2e:370:7334',
'::ffff:192.0.2.33',
];
const invalid_ips = [
'',
'.',
'0.0.0',
'10.168.209.250.1',
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffg',
];
/** @type {[string, ()=>void][][]} */
const cases = [[], [], [], [], []];
v4_ips.forEach((ip) => {
assert.ok(isIPv4_OLD(ip));
assert.ok(isIPv4_NEW(ip));
assert.strictEqual(isIP_OLD(ip), 4);
assert.strictEqual(isIP_NEW(ip), 4);
cases[0].push([`isIPv4 ${ip} OLD`, () => isIPv4_OLD(ip)]);
cases[0].push([`isIPv4 ${ip} NEW`, () => isIPv4_NEW(ip)]);
cases[3].push([`isIP ${ip} OLD`, () => isIP_OLD(ip)]);
cases[3].push([`isIP ${ip} NEW`, () => isIP_NEW(ip)]);
});
v6_ips.forEach((ip) => {
assert.ok(isIPv6_OLD(ip));
assert.ok(isIPv6_NEW(ip));
assert.strictEqual(isIP_OLD(ip), 6);
assert.strictEqual(isIP_NEW(ip), 6);
cases[1].push([`isIPv6 ${ip} OLD`, () => isIPv6_OLD(ip)]);
cases[1].push([`isIPv6 ${ip} NEW`, () => isIPv6_NEW(ip)]);
cases[3].push([`isIP ${ip} OLD`, () => isIP_OLD(ip)]);
cases[3].push([`isIP ${ip} NEW`, () => isIP_NEW(ip)]);
});
invalid_ips.forEach((ip) => {
assert.ok(!isIPv4_OLD(ip));
assert.ok(!isIPv4_NEW(ip));
assert.ok(!isIPv6_OLD(ip));
assert.ok(!isIPv6_NEW(ip));
assert.strictEqual(isIP_OLD(ip), 0);
assert.strictEqual(isIP_NEW(ip), 0);
cases[2].push([`isIPv4 ${ip} OLD`, () => isIPv4_OLD(ip)]);
cases[2].push([`isIPv4 ${ip} NEW`, () => isIPv4_NEW(ip)]);
cases[2].push([`isIPv6 ${ip} OLD`, () => isIPv6_OLD(ip)]);
cases[2].push([`isIPv6 ${ip} NEW`, () => isIPv6_NEW(ip)]);
cases[4].push([`isIP ${ip} OLD`, () => isIP_OLD(ip)]);
cases[4].push([`isIP ${ip} NEW`, () => isIP_NEW(ip)]);
});
run(cases); And the benchmark results:
The performance depends on the specific string that's passed to the functions. For the IP strings I benchmarked, only |
Since the changes were making the capturing group to be non-capturing, I suppose it would also use less memory? |
Awesome. Thank you . |
Well... #4691 This will not motivate people to participate... |
@Uzlopak sorry, that's my bad. I did look for an existing issue but it seems I didn't search through the existing PRs! |
…#8271) Copy over the `isIPv4`/`isIPv6` performance improvements from <nodejs/node#49568>.
What does this PR do?
Copy over the
isIPv4
/isIPv6
performance improvements fromnodejs/node#49568.
How did you verify your code works?
Run relevant tests:
./build/bun-debug test test/js/bun/dns
./build/bun-debug test test/js/node/net