Skip to content

Commit

Permalink
Companion: rewrite request and purest to got (transloadit#3953)
Browse files Browse the repository at this point in the history
* rewrite to async

* rewrite box and dropbox to got

(not yet working due to jest esm issues)

* downgrade got

* update developer notes

* rewrite

- rewrite remaining providers to got
- rewrite to async/await
- pull out adapt code into adapters
- provider/companion tests still todo

* add zoom to dev dashboard

* rewrites

- rewrite remaining providers to got and reuse code
- port tests
- remove request
- remove purest
- rewrite periodic ping job to got
- rewrite uploader to got
- rewrite "url" to got
- rewrite getRedirectEvaluator/request to got
- rewrite http/https agent/request to got
- rewrite credentials.js to got
- fix "todo: handle failures differently to return 400 for this case instead"
- add test for http/https agent
- improve test for credentials (remote/local)
- make /zoom/logout return 424 instead of 500 on credentials error
- remove useless http-agent tests
- fix various eslint warnings

* work around ts error

* remove forgotten change
  • Loading branch information
mifi authored Aug 16, 2022
1 parent 4dc26a1 commit 15b6b01
Show file tree
Hide file tree
Showing 43 changed files with 1,694 additions and 2,258 deletions.
4 changes: 1 addition & 3 deletions ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,12 @@ These controllers are generalized to work for any provider. The provider specifi

To add a new provider to Companion, you need to do two things: add the provider config to `config/grant.js`, and then create a new file in `server/providers` that describes how to interface with the provider’s API.

We are using a library called [purest](/~https://github.com/simov/purest) to make it easier to interface with third party APIs. Instead of dealing with each single provider’s client library/SDK, we use Purest, a “generic REST API client library” that gives us a consistent, “generic” API to interface with any provider. This makes life a lot easier.
We are using a library called [got](/~https://github.com/sindresorhus/got) to make it easier to interface with third party APIs.

Since each API works differently, we need to describe how to `download` and `list` files from the provider in a file within `server/providers`. The name of the file should be the same as what endpoint it will use. For example, `server/providers/foobar.js` if the client requests a list of files from `https://our-server/foobar/list`.

**Note:** As of right now, you only need to implement `YourProvider.prototype.list` and `YourProvider.prototype.download` for each provider, I believe. `stats` seems to be used by Dropbox to get a list of files, so that’s required there, but `upload` is optional unless you all decide to allow uploading to third parties. I got that code from an example.

This whole approach was inspired by an example from `purest 2.x`. Keep in mind that we’re using `3.x`, so the API is different, but here is the example for reference: </~https://github.com/simov/purest/tree/2.x/examples/storage>

## WebSockets

Companion uses WebSockets to transfer `progress` events to the client during file transfers. It’s only set up to transfer progress during Tus uploads to the target server.
Expand Down
5 changes: 2 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
],
"bin": "./bin/companion",
"dependencies": {
"@purest/providers": "1.0.1",
"atob": "2.1.2",
"aws-sdk": "^2.1038.0",
"body-parser": "1.19.0",
Expand All @@ -44,6 +43,8 @@
"express-prom-bundle": "6.3.0",
"express-request-id": "1.4.1",
"express-session": "1.17.1",
"form-data": "^3.0.0",
"got": "11",
"grant": "4.7.0",
"helmet": "^4.6.0",
"ipaddr.js": "^2.0.1",
Expand All @@ -56,9 +57,7 @@
"ms": "2.1.2",
"node-schedule": "1.3.2",
"prom-client": "12.0.0",
"purest": "3.1.0",
"redis": "4.2.0",
"request": "2.88.2",
"semver": "6.3.0",
"serialize-error": "^2.1.0",
"serialize-javascript": "^6.0.0",
Expand Down
102 changes: 52 additions & 50 deletions src/server/Uploader.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
const tus = require('tus-js-client')
const { randomUUID } = require('node:crypto')
const validator = require('validator')
const request = require('request')
const got = require('got').default
const { pipeline: pipelineCb } = require('node:stream')
const { join } = require('node:path')
const fs = require('node:fs')
const { promisify } = require('node:util')
const FormData = require('form-data')

// TODO move to `require('streams/promises').pipeline` when dropping support for Node.js 14.x.
const pipeline = promisify(pipelineCb)
Expand Down Expand Up @@ -557,73 +558,74 @@ class Uploader {
throw new Error('No multipart endpoint set')
}

function getRespObj (response) {
// remove browser forbidden headers
const { 'set-cookie': deleted, 'set-cookie2': deleted2, ...responseHeaders } = response.headers

return {
responseText: response.body,
status: response.statusCode,
statusText: response.statusMessage,
headers: responseHeaders,
}
}

// upload progress
let bytesUploaded = 0
stream.on('data', (data) => {
bytesUploaded += data.length
this.onProgress(bytesUploaded, undefined)
})

const httpMethod = (this.options.httpMethod || '').toLowerCase() === 'put' ? 'put' : 'post'
const headers = headerSanitize(this.options.headers)
const reqOptions = { url: this.options.endpoint, headers, encoding: null }
const runRequest = request[httpMethod]
const url = this.options.endpoint
const reqOptions = {
headers: headerSanitize(this.options.headers),
}

if (this.options.useFormData) {
reqOptions.formData = {
...this.options.metadata,
[this.options.fieldname]: {
value: stream,
options: {
filename: this.uploadFileName,
contentType: this.options.metadata.type,
knownLength: this.size,
},
},
}
// todo refactor once upgraded to got 12
const formData = new FormData()

Object.entries(this.options.metadata).forEach(([key, value]) => formData.append(key, value))

formData.append(this.options.fieldname, stream, {
filename: this.uploadFileName,
contentType: this.options.metadata.type,
knownLength: this.size,
})

reqOptions.body = formData
} else {
reqOptions.headers['content-length'] = this.size
reqOptions.body = stream
}

const { response, body } = await new Promise((resolve, reject) => {
runRequest(reqOptions, (error, response2, body2) => {
if (error) {
logger.error(error, 'upload.multipart.error')
reject(error)
return
}

resolve({ response: response2, body: body2 })
})
})

// remove browser forbidden headers
delete response.headers['set-cookie']
delete response.headers['set-cookie2']
try {
const httpMethod = (this.options.httpMethod || '').toLowerCase() === 'put' ? 'put' : 'post'
const runRequest = got[httpMethod]

const respObj = {
responseText: body.toString(),
status: response.statusCode,
statusText: response.statusMessage,
headers: response.headers,
}
const response = await runRequest(url, reqOptions)

if (response.statusCode >= 400) {
logger.error(`upload failed with status: ${response.statusCode}`, 'upload.multipart.error')
const err = new Error(response.statusMessage)
// @ts-ignore
err.extraData = respObj
throw err
}
if (bytesUploaded !== this.size) {
const errMsg = `uploaded only ${bytesUploaded} of ${this.size} with status: ${response.statusCode}`
logger.error(errMsg, 'upload.multipart.mismatch.error')
throw new Error(errMsg)
}

if (bytesUploaded !== this.size) {
const errMsg = `uploaded only ${bytesUploaded} of ${this.size} with status: ${response.statusCode}`
logger.error(errMsg, 'upload.multipart.mismatch.error')
throw new Error(errMsg)
return {
url: null,
extraData: { response: getRespObj(response), bytesUploaded },
}
} catch (err) {
logger.error(err, 'upload.multipart.error')
const statusCode = err.response?.statusCode
if (statusCode != null) {
throw Object.assign(new Error(err.statusMessage), {
extraData: getRespObj(err.response),
})
}
throw new Error('Unknown multipart upload error')
}

return { url: null, extraData: { response: respObj, bytesUploaded } }
}

/**
Expand Down
3 changes: 1 addition & 2 deletions src/server/controllers/thumbnail.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
* @param {object} res
*/
async function thumbnail (req, res, next) {
const { providerName } = req.params
const { id } = req.params
const { providerName, id } = req.params
const token = req.companion.providerTokens[providerName]
const { provider } = req.companion

Expand Down
38 changes: 10 additions & 28 deletions src/server/controllers/url.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
const router = require('express').Router
const request = require('request')
const { URL } = require('node:url')
const validator = require('validator')

const { startDownUpload } = require('../helpers/upload')
const { getURLMeta, getRedirectEvaluator, getProtectedHttpAgent } = require('../helpers/request')
const { prepareStream } = require('../helpers/utils')
const { getURLMeta, getProtectedGot } = require('../helpers/request')
const logger = require('../logger')

/**
Expand Down Expand Up @@ -46,32 +45,15 @@ const validateURL = (url, ignoreTld) => {
* @returns {Promise}
*/
const downloadURL = async (url, blockLocalIPs, traceId) => {
const opts = {
uri: url,
method: 'GET',
followRedirect: getRedirectEvaluator(url, blockLocalIPs),
agentClass: getProtectedHttpAgent((new URL(url)).protocol, blockLocalIPs),
try {
const protectedGot = getProtectedGot({ url, blockLocalIPs })
const stream = protectedGot.stream.get(url, { responseType: 'json' })
await prepareStream(stream)
return stream
} catch (err) {
logger.error(err, 'controller.url.download.error', traceId)
throw err
}

return new Promise((resolve, reject) => {
const req = request(opts)
.on('response', (resp) => {
if (resp.statusCode >= 300) {
req.abort() // No need to keep request
reject(new Error(`URL server responded with status: ${resp.statusCode}`))
return
}

// Don't allow any more data to flow yet.
// /~https://github.com/request/request/issues/1990#issuecomment-184712275
resp.pause()
resolve(resp)
})
.on('error', (err) => {
logger.error(err, 'controller.url.download.error', traceId)
reject(err)
})
})
}

/**
Expand Down
87 changes: 50 additions & 37 deletions src/server/helpers/request.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ const http = require('node:http')
const https = require('node:https')
const { URL } = require('node:url')
const dns = require('node:dns')
const request = require('request')
const ipaddr = require('ipaddr.js')
const got = require('got').default

const logger = require('../logger')

Expand All @@ -17,16 +17,15 @@ const isDisallowedIP = (ipAddress) => ipaddr.parse(ipAddress).range() !== 'unica

module.exports.FORBIDDEN_IP_ADDRESS = FORBIDDEN_IP_ADDRESS

module.exports.getRedirectEvaluator = (rawRequestURL, blockPrivateIPs) => {
module.exports.getRedirectEvaluator = (rawRequestURL, isEnabled) => {
const requestURL = new URL(rawRequestURL)
return (res) => {
if (!blockPrivateIPs) {
return true
}

return ({ headers }) => {
if (!isEnabled) return true

let redirectURL = null
try {
redirectURL = new URL(res.headers.location, requestURL)
redirectURL = new URL(headers.location, requestURL)
} catch (err) {
return false
}
Expand Down Expand Up @@ -87,16 +86,30 @@ class HttpsAgent extends https.Agent {
* Returns http Agent that will prevent requests to private IPs (to preven SSRF)
*
* @param {string} protocol http or http: or https: or https protocol needed for the request
* @param {boolean} blockPrivateIPs if set to false, this protection will be disabled
*/
module.exports.getProtectedHttpAgent = (protocol, blockPrivateIPs) => {
if (blockPrivateIPs) {
return protocol.startsWith('https') ? HttpsAgent : HttpAgent
module.exports.getProtectedHttpAgent = (protocol) => {
return protocol.startsWith('https') ? HttpsAgent : HttpAgent
}

function getProtectedGot ({ url, blockLocalIPs }) {
const httpAgent = new (module.exports.getProtectedHttpAgent('http'))()
const httpsAgent = new (module.exports.getProtectedHttpAgent('https'))()

const redirectEvaluator = module.exports.getRedirectEvaluator(url, blockLocalIPs)

const beforeRedirect = (options, response) => {
const allowRedirect = redirectEvaluator(response)
if (!allowRedirect) {
throw new Error(`Redirect evaluator does not allow the redirect to ${response.headers.location}`)
}
}

return protocol.startsWith('https') ? https.Agent : http.Agent
// @ts-ignore
return got.extend({ hooks: { beforeRedirect: [beforeRedirect] }, agent: { http: httpAgent, https: httpsAgent } })
}

module.exports.getProtectedGot = getProtectedGot

/**
* Gets the size and content type of a url's content
*
Expand All @@ -105,31 +118,30 @@ module.exports.getProtectedHttpAgent = (protocol, blockPrivateIPs) => {
* @returns {Promise<{type: string, size: number}>}
*/
exports.getURLMeta = async (url, blockLocalIPs = false) => {
const requestWithMethod = async (method) => new Promise((resolve, reject) => {
const opts = {
uri: url,
method,
followRedirect: exports.getRedirectEvaluator(url, blockLocalIPs),
agentClass: exports.getProtectedHttpAgent((new URL(url)).protocol, blockLocalIPs),
}

const req = request(opts, (err) => {
if (err) reject(err)
})
req.on('response', (response) => {
// Can be undefined for unknown length URLs, e.g. transfer-encoding: chunked
const contentLength = parseInt(response.headers['content-length'], 10)

// No need to get the rest of the response, as we only want header (not really relevant for HEAD, but why not)
req.abort()

resolve({
type: response.headers['content-type'],
size: Number.isNaN(contentLength) ? null : contentLength,
statusCode: response.statusCode,
})
})
})
async function requestWithMethod (method) {
const protectedGot = getProtectedGot({ url, blockLocalIPs })
const stream = protectedGot.stream(url, { method, throwHttpErrors: false })

return new Promise((resolve, reject) => (
stream
.on('response', (response) => {
// Can be undefined for unknown length URLs, e.g. transfer-encoding: chunked
const contentLength = parseInt(response.headers['content-length'], 10)

// No need to get the rest of the response, as we only want header (not really relevant for HEAD, but why not)
stream.destroy()

resolve({
type: response.headers['content-type'],
size: Number.isNaN(contentLength) ? null : contentLength,
statusCode: response.statusCode,
})
})
.on('error', (err) => {
reject(err)
})
))
}

// We prefer to use a HEAD request, as it doesn't download the content. If the URL doesn't
// support HEAD, or doesn't follow the spec and provide the correct Content-Length, we
Expand All @@ -140,6 +152,7 @@ exports.getURLMeta = async (url, blockLocalIPs = false) => {
// (e.g. HEAD doesn't work on signed S3 URLs)
// We look for status codes in the 400 and 500 ranges here, as 3xx errors are
// unlikely to have to do with our choice of method
// todo add unit test for this
if (urlMeta.statusCode >= 400 || urlMeta.size === 0 || urlMeta.size == null) {
urlMeta = await requestWithMethod('GET')
}
Expand Down
Loading

0 comments on commit 15b6b01

Please sign in to comment.