From 951d22ed4a28c0182604557f4dae74658248078b Mon Sep 17 00:00:00 2001 From: Luke Karrys Date: Wed, 26 Oct 2022 15:33:28 -0700 Subject: [PATCH] fix: use hosted-git-info to parse registry urls Previously this was using `new URL` which would fail on some urls that `hosted-git-info` is able to parse. But if we still get a url that can't be parsed, we now set it to be removed from the tree instead of erroring. Fixes: #5278 --- DEPENDENCIES.md | 2 + package-lock.json | 1 + workspaces/arborist/lib/arborist/reify.js | 27 ++++++-- workspaces/arborist/package.json | 1 + workspaces/arborist/test/arborist/reify.js | 79 +++++++++++++++++++++- 5 files changed, 103 insertions(+), 7 deletions(-) diff --git a/DEPENDENCIES.md b/DEPENDENCIES.md index ce0a5d454fd94..b30376c1908d1 100644 --- a/DEPENDENCIES.md +++ b/DEPENDENCIES.md @@ -157,6 +157,7 @@ graph LR; npm-registry-fetch-->proc-log; npmcli-arborist-->bin-links; npmcli-arborist-->cacache; + npmcli-arborist-->hosted-git-info; npmcli-arborist-->json-parse-even-better-errors; npmcli-arborist-->minify-registry-metadata; npmcli-arborist-->nopt; @@ -791,6 +792,7 @@ graph LR; npmcli-arborist-->cacache; npmcli-arborist-->chalk; npmcli-arborist-->common-ancestor-path; + npmcli-arborist-->hosted-git-info; npmcli-arborist-->isaacs-string-locale-compare["@isaacs/string-locale-compare"]; npmcli-arborist-->json-parse-even-better-errors; npmcli-arborist-->json-stringify-nice; diff --git a/package-lock.json b/package-lock.json index e84cc520f81aa..57be1d9fa086a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14053,6 +14053,7 @@ "bin-links": "^4.0.1", "cacache": "^17.0.1", "common-ancestor-path": "^1.0.1", + "hosted-git-info": "^6.1.0", "json-parse-even-better-errors": "^3.0.0", "json-stringify-nice": "^1.1.4", "minimatch": "^5.1.0", diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index 03b5c378d5052..dcbe42d59a4cf 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -9,6 +9,7 @@ const semver = require('semver') const debug = require('../debug.js') const walkUp = require('walk-up-path') const log = require('proc-log') +const hgi = require('hosted-git-info') const { dirname, resolve, relative } = require('path') const { depth: dfwalk } = require('treeverse') @@ -640,10 +641,15 @@ module.exports = cls => class Reifier extends cls { // and no 'bundled: true' setting. // Do the best with what we have, or else remove it from the tree // entirely, since we can't possibly reify it. - const res = node.resolved ? `${node.name}@${this[_registryResolved](node.resolved)}` - : node.packageName && node.version - ? `${node.packageName}@${node.version}` - : null + let res = null + if (node.resolved) { + const registryResolved = this[_registryResolved](node.resolved) + if (registryResolved) { + res = `${node.name}@${registryResolved}` + } + } else if (node.packageName && node.version) { + res = `${node.packageName}@${node.version}` + } // no idea what this thing is. remove it from the tree. if (!res) { @@ -721,12 +727,21 @@ module.exports = cls => class Reifier extends cls { // ${REGISTRY} or something. This has to be threaded through the // Shrinkwrap and Node classes carefully, so for now, just treat // the default reg as the magical animal that it has been. - const resolvedURL = new URL(resolved) + // const resolvedURL = new URL(resolved) + const resolvedURL = hgi.parseUrl(resolved) + + if (!resolvedURL) { + // if we could not parse the url at all then returning nothing + // here means it will get removed from the tree in the next step + return + } + if ((this.options.replaceRegistryHost === resolvedURL.hostname) || this.options.replaceRegistryHost === 'always') { // this.registry always has a trailing slash - resolved = `${this.registry.slice(0, -1)}${resolvedURL.pathname}${resolvedURL.searchParams}` + return `${this.registry.slice(0, -1)}${resolvedURL.pathname}${resolvedURL.searchParams}` } + return resolved } diff --git a/workspaces/arborist/package.json b/workspaces/arborist/package.json index e51a162eaa614..ec1534eb2d687 100644 --- a/workspaces/arborist/package.json +++ b/workspaces/arborist/package.json @@ -16,6 +16,7 @@ "bin-links": "^4.0.1", "cacache": "^17.0.1", "common-ancestor-path": "^1.0.1", + "hosted-git-info": "^6.1.0", "json-parse-even-better-errors": "^3.0.0", "json-stringify-nice": "^1.1.4", "minimatch": "^5.1.0", diff --git a/workspaces/arborist/test/arborist/reify.js b/workspaces/arborist/test/arborist/reify.js index 0c68bdd4dd748..ea7c3f0c66e52 100644 --- a/workspaces/arborist/test/arborist/reify.js +++ b/workspaces/arborist/test/arborist/reify.js @@ -2936,7 +2936,20 @@ t.test('installLinks', (t) => { }) t.only('should preserve exact ranges, missing actual tree', async (t) => { - const Arborist = require('../../lib/index.js') + const Pacote = require('pacote') + const Arborist = t.mock('../../lib/arborist', { + pacote: { + ...Pacote, + extract: async (...args) => { + if (args[0].startsWith('gitssh')) { + // we just want to test that this url is handled properly + // but its not a real git url we can clone so return early + return true + } + return Pacote.extract(...args) + }, + }, + }) const abbrev = resolve(__dirname, '../fixtures/registry-mocks/content/abbrev/-/abbrev-1.1.1.tgz') const abbrevTGZ = fs.readFileSync(abbrev) @@ -2973,6 +2986,40 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { }, }) + const gitSshPackument = JSON.stringify({ + _id: 'gitssh', + _rev: 'lkjadflkjasdf', + name: 'gitssh', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'gitssh', + version: '1.1.1', + dist: { + // this is a url that `new URL()` cant parse + // /~https://github.com/npm/cli/issues/5278 + tarball: 'git+ssh://git@github.com:a/b/c.git#lkjadflkjasdf', + }, + }, + }, + }) + + const notAUrlPackument = JSON.stringify({ + _id: 'notaurl', + _rev: 'lkjadflkjasdf', + name: 'notaurl', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'notaurl', + version: '1.1.1', + dist: { + tarball: 'hey been trying to break this test', + }, + }, + }, + }) + t.only('host should not be replaced replaceRegistryHost=never', async (t) => { const testdir = t.testdir({ project: { @@ -2981,6 +3028,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { version: '1.0.0', dependencies: { abbrev: '1.1.1', + gitssh: '1.1.1', + notaurl: '1.1.1', }, }), }, @@ -2994,6 +3043,14 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { .get('/abbrev/-/abbrev-1.1.1.tgz') .reply(200, abbrevTGZ) + tnock(t, 'https://registry.github.com') + .get('/gitssh') + .reply(200, gitSshPackument) + + tnock(t, 'https://registry.github.com') + .get('/notaurl') + .reply(200, notAUrlPackument) + const arb = new Arborist({ path: resolve(testdir, 'project'), registry: 'https://registry.github.com', @@ -3011,6 +3068,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { version: '1.0.0', dependencies: { abbrev: '1.1.1', + gitssh: '1.1.1', + notaurl: '1.1.1', }, }), }, @@ -3020,10 +3079,18 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { .get('/abbrev') .reply(200, abbrevPackument) + tnock(t, 'https://registry.github.com') + .get('/gitssh') + .reply(200, gitSshPackument) + tnock(t, 'https://registry.github.com') .get('/abbrev/-/abbrev-1.1.1.tgz') .reply(200, abbrevTGZ) + tnock(t, 'https://registry.github.com') + .get('/notaurl') + .reply(200, notAUrlPackument) + const arb = new Arborist({ path: resolve(testdir, 'project'), registry: 'https://registry.github.com', @@ -3041,6 +3108,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { version: '1.0.0', dependencies: { abbrev: '1.1.1', + gitssh: '1.1.1', + notaurl: '1.1.1', }, }), }, @@ -3050,10 +3119,18 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { .get('/abbrev') .reply(200, abbrevPackument2) + tnock(t, 'https://registry.github.com') + .get('/gitssh') + .reply(200, gitSshPackument) + tnock(t, 'https://registry.github.com') .get('/abbrev/-/abbrev-1.1.1.tgz') .reply(200, abbrevTGZ) + tnock(t, 'https://registry.github.com') + .get('/notaurl') + .reply(200, notAUrlPackument) + const arb = new Arborist({ path: resolve(testdir, 'project'), registry: 'https://registry.github.com',