diff --git a/DEPENDENCIES.md b/DEPENDENCIES.md index 88128ab1ad9ec..a123c2da804dd 100644 --- a/DEPENDENCIES.md +++ b/DEPENDENCIES.md @@ -583,6 +583,7 @@ graph LR; npmcli-arborist-->isaacs-string-locale-compare["@isaacs/string-locale-compare"]; npmcli-arborist-->json-parse-even-better-errors; npmcli-arborist-->json-stringify-nice; + npmcli-arborist-->lru-cache; npmcli-arborist-->minify-registry-metadata; npmcli-arborist-->minimatch; npmcli-arborist-->nock; diff --git a/smoke-tests/test/fixtures/setup.js b/smoke-tests/test/fixtures/setup.js index d2500507119a8..91e0ddec2415f 100644 --- a/smoke-tests/test/fixtures/setup.js +++ b/smoke-tests/test/fixtures/setup.js @@ -74,7 +74,7 @@ const getCleanPaths = async () => { } module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy = false } = {}) => { - const debugLog = debug || CI ? (...a) => console.error(...a) : () => {} + const debugLog = debug || CI ? (...a) => t.comment(...a) : () => {} const cleanPaths = await getCleanPaths() // setup fixtures @@ -158,7 +158,7 @@ module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy log(`${spawnCmd} ${spawnArgs.join(' ')}`) log('-'.repeat(40)) - const { stderr, stdout } = await spawn(spawnCmd, spawnArgs, { + const p = spawn(spawnCmd, spawnArgs, { cwd, env: { ...getEnvPath(), @@ -169,10 +169,20 @@ module.exports = async (t, { testdir = {}, debug, mockRegistry = true, useProxy ...opts, }) - log(stderr) - log('-'.repeat(40)) - log(stdout) - log('='.repeat(40)) + // In debug mode, stream stdout and stderr to console so we can debug hanging processes + if (debug) { + p.process.stdout.on('data', (c) => log('STDOUT: ' + c.toString().trim())) + p.process.stderr.on('data', (c) => log('STDERR: ' + c.toString().trim())) + } + + const { stdout, stderr } = await p + // If not in debug mode, print full stderr and stdout contents separately + if (!debug) { + log(stderr) + log('-'.repeat(40)) + log(stdout) + log('='.repeat(40)) + } return { stderr, stdout } } diff --git a/smoke-tests/test/large-install.js b/smoke-tests/test/large-install.js index a44579904e6fd..a9260c8d3f0f1 100644 --- a/smoke-tests/test/large-install.js +++ b/smoke-tests/test/large-install.js @@ -5,18 +5,20 @@ const setup = require('./fixtures/setup.js') const getFixture = (p) => require( path.resolve(__dirname, './fixtures/large-install', p)) -const runTest = async (t) => { +const runTest = async (t, { lowMemory } = {}) => { const { npm } = await setup(t, { // This test relies on the actual production registry mockRegistry: false, testdir: { project: { 'package.json': getFixture('package.json'), - 'package-lock.json': getFixture('package-lock.json'), + ...lowMemory ? {} : { 'package-lock.json': getFixture('package-lock.json') }, }, }, }) - return npm('install', '--no-audit', '--no-fund') + return npm('install', '--no-audit', '--no-fund', { + env: lowMemory ? { NODE_OPTIONS: '--max-old-space-size=500' } : {}, + }) } // This test was originally created for /~https://github.com/npm/cli/issues/6763 @@ -31,3 +33,9 @@ t.test('large install', async t => { // installs the same number of packages. t.match(stdout, `added 126${process.platform === 'linux' ? 4 : 5} packages in`) }) + +t.test('large install, no lock and low memory', async t => { + // Run the same install but with no lockfile and constrained max-old-space-size + const { stdout } = await runTest(t, { lowMemory: true }) + t.match(stdout, /added \d+ packages/) +}) diff --git a/smoke-tests/test/npm-replace-global.js b/smoke-tests/test/npm-replace-global.js index 03a976ab6ccf3..12364e6899d9f 100644 --- a/smoke-tests/test/npm-replace-global.js +++ b/smoke-tests/test/npm-replace-global.js @@ -125,7 +125,7 @@ t.test('publish and replace global self', async t => { await npmPackage({ manifest: { packuments: [publishedPackument] }, tarballs: { [version]: tarball }, - times: 2, + times: 3, }) await fs.rm(cache, { recursive: true, force: true }) await useNpm('install', 'npm@latest', '--global') diff --git a/test/lib/commands/audit.js b/test/lib/commands/audit.js index 701d374ade985..d8714cb61912a 100644 --- a/test/lib/commands/audit.js +++ b/test/lib/commands/audit.js @@ -184,6 +184,7 @@ t.test('audit fix - bulk endpoint', async t => { tarballs: { '1.0.1': path.join(npm.prefix, 'test-dep-a-fixed'), }, + times: 2, }) const advisory = registry.advisory({ id: 100, vulnerable_versions: '1.0.0' }) registry.nock.post('/-/npm/v1/security/advisories/bulk', body => { diff --git a/workspaces/arborist/lib/arborist/index.js b/workspaces/arborist/lib/arborist/index.js index ba180f354708a..b348d490def65 100644 --- a/workspaces/arborist/lib/arborist/index.js +++ b/workspaces/arborist/lib/arborist/index.js @@ -31,10 +31,10 @@ const { homedir } = require('os') const { depth } = require('treeverse') const mapWorkspaces = require('@npmcli/map-workspaces') const { log, time } = require('proc-log') - const { saveTypeMap } = require('../add-rm-pkg-deps.js') const AuditReport = require('../audit-report.js') const relpath = require('../relpath.js') +const PackumentCache = require('../packument-cache.js') const mixins = [ require('../tracker.js'), @@ -82,7 +82,7 @@ class Arborist extends Base { installStrategy: options.global ? 'shallow' : (options.installStrategy ? options.installStrategy : 'hoisted'), lockfileVersion: lockfileVersion(options.lockfileVersion), packageLockOnly: !!options.packageLockOnly, - packumentCache: options.packumentCache || new Map(), + packumentCache: options.packumentCache || new PackumentCache(), path: options.path || '.', rebuildBundle: 'rebuildBundle' in options ? !!options.rebuildBundle : true, replaceRegistryHost: options.replaceRegistryHost, diff --git a/workspaces/arborist/lib/node.js b/workspaces/arborist/lib/node.js index bdc021b7c12a9..d5c5a478cce0b 100644 --- a/workspaces/arborist/lib/node.js +++ b/workspaces/arborist/lib/node.js @@ -119,6 +119,8 @@ class Node { // package's dependencies in a virtual root. this.sourceReference = sourceReference + // TODO if this came from pacote.manifest we don't have to do this, + // we can be told to skip this step const pkg = sourceReference ? sourceReference.package : normalize(options.pkg || {}) diff --git a/workspaces/arborist/lib/packument-cache.js b/workspaces/arborist/lib/packument-cache.js new file mode 100644 index 0000000000000..26e463eb9d334 --- /dev/null +++ b/workspaces/arborist/lib/packument-cache.js @@ -0,0 +1,77 @@ +const { LRUCache } = require('lru-cache') +const { getHeapStatistics } = require('node:v8') +const { log } = require('proc-log') + +// This is an in-memory cache that Pacote uses for packuments. +// Packuments are usually cached on disk. This allows for rapid re-requests +// of the same packument to bypass disk reads. The tradeoff here is memory +// usage for disk reads. +class PackumentCache extends LRUCache { + static #heapLimit = Math.floor(getHeapStatistics().heap_size_limit) + + #sizeKey + #disposed = new Set() + + #log (...args) { + log.silly('packumentCache', ...args) + } + + constructor ({ + // How much of this.#heapLimit to take up + heapFactor = 0.25, + // How much of this.#maxSize we allow any one packument to take up + // Anything over this is not cached + maxEntryFactor = 0.5, + sizeKey = '_contentLength', + } = {}) { + const maxSize = Math.floor(PackumentCache.#heapLimit * heapFactor) + const maxEntrySize = Math.floor(maxSize * maxEntryFactor) + super({ + maxSize, + maxEntrySize, + sizeCalculation: (p) => { + // Don't cache if we dont know the size + // Some versions of pacote set this to `0`, newer versions set it to `null` + if (!p[sizeKey]) { + return maxEntrySize + 1 + } + if (p[sizeKey] < 10_000) { + return p[sizeKey] * 2 + } + if (p[sizeKey] < 1_000_000) { + return Math.floor(p[sizeKey] * 1.5) + } + // It is less beneficial to store a small amount of super large things + // at the cost of all other packuments. + return maxEntrySize + 1 + }, + dispose: (v, k) => { + this.#disposed.add(k) + this.#log(k, 'dispose') + }, + }) + this.#sizeKey = sizeKey + this.#log(`heap:${PackumentCache.#heapLimit} maxSize:${maxSize} maxEntrySize:${maxEntrySize}`) + } + + set (k, v, ...args) { + // we use disposed only for a logging signal if we are setting packuments that + // have already been evicted from the cache previously. logging here could help + // us tune this in the future. + const disposed = this.#disposed.has(k) + /* istanbul ignore next - this doesnt happen consistently so hard to test without resorting to unit tests */ + if (disposed) { + this.#disposed.delete(k) + } + this.#log(k, 'set', `size:${v[this.#sizeKey]} disposed:${disposed}`) + return super.set(k, v, ...args) + } + + has (k, ...args) { + const has = super.has(k, ...args) + this.#log(k, `cache-${has ? 'hit' : 'miss'}`) + return has + } +} + +module.exports = PackumentCache diff --git a/workspaces/arborist/test/arborist/rebuild.js b/workspaces/arborist/test/arborist/rebuild.js index 2843155f2809e..7cf6c381197d2 100644 --- a/workspaces/arborist/test/arborist/rebuild.js +++ b/workspaces/arborist/test/arborist/rebuild.js @@ -197,7 +197,7 @@ t.test('verify dep flags in script environments', async t => { const file = resolve(path, 'node_modules', pkg, 'env') t.strictSame(flags, fs.readFileSync(file, 'utf8').split('\n'), pkg) } - t.strictSame(checkLogs().sort((a, b) => + t.strictSame(checkLogs().filter(l => l[0] === 'info' && l[1] === 'run').sort((a, b) => localeCompare(a[2], b[2]) || (typeof a[4] === 'string' ? -1 : 1)), [ ['info', 'run', 'devdep@1.0.0', 'postinstall', 'node_modules/devdep', 'node ../../env.js'], ['info', 'run', 'devdep@1.0.0', 'postinstall', { code: 0, signal: null }], diff --git a/workspaces/arborist/test/audit-report.js b/workspaces/arborist/test/audit-report.js index de2cd429c96cc..18751b9cd96a5 100644 --- a/workspaces/arborist/test/audit-report.js +++ b/workspaces/arborist/test/audit-report.js @@ -222,7 +222,7 @@ t.test('audit returns an error', async t => { const report = await AuditReport.load(tree, arb.options) t.equal(report.report, null, 'did not get audit response') t.equal(report.size, 0, 'did not find any vulnerabilities') - t.match(logs, [ + t.match(logs.filter(l => l[1].includes('audit')), [ [ 'silly', 'audit',