Skip to content

Commit

Permalink
fix(gatsby-source-filesystem): fix broken stream with gzipped files (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
wardpeet authored Jan 8, 2021
1 parent 638ac0a commit a8b516f
Show file tree
Hide file tree
Showing 7 changed files with 413 additions and 11 deletions.
3 changes: 2 additions & 1 deletion packages/gatsby-source-filesystem/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
"@babel/cli": "^7.12.1",
"@babel/core": "^7.12.3",
"babel-preset-gatsby-package": "^0.11.0-next.0",
"cross-env": "^7.0.3"
"cross-env": "^7.0.3",
"msw": "^0.25.0"
},
"homepage": "/~https://github.com/gatsbyjs/gatsby/tree/master/packages/gatsby-source-filesystem#readme",
"keywords": [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
import * as path from "path"
import * as zlib from "zlib"
import * as os from "os"
import { rest } from "msw"
import { setupServer } from "msw/node"
import { Writable } from "stream"
import got from "got"
import createRemoteFileNode from "../create-remote-file-node"

const fs = jest.requireActual(`fs-extra`)

const gotStream = jest.spyOn(got, `stream`)
const urlCount = new Map()

async function getFileSize(file) {
const stat = await fs.stat(file)

return stat.size
}

/**
* A utility to help create file responses
* - Url with attempts will use maxBytes for x amount of time until it delivers the full response
* - MaxBytes indicates how much bytes we'll be sending
*
* @param {string} file File path on disk
* @param {Object} req Is the request object from msw
* @param {{ compress: boolean}} options Options for the getFilecontent (use gzip or not)
*/
async function getFileContent(file, req, options = {}) {
const cacheKey = req.url.origin + req.url.pathname
const maxRetry = req.url.searchParams.get(`attempts`)
const maxBytes = req.url.searchParams.get(`maxBytes`)
const currentRetryCount = urlCount.get(cacheKey) || 0
urlCount.set(cacheKey, currentRetryCount + 1)

let fileContentBuffer = await fs.readFile(file)
if (options.compress) {
fileContentBuffer = zlib.deflateSync(fileContentBuffer)
}

const content = await new Promise(resolve => {
const fileStream = fs.createReadStream(file, {
end:
currentRetryCount < Number(maxRetry)
? Number(maxBytes)
: Number.MAX_SAFE_INTEGER,
})

const writableStream = new Writable()
const result = []
writableStream._write = (chunk, encoding, next) => {
result.push(chunk)

next()
}

writableStream.on(`finish`, () => {
resolve(Buffer.concat(result))
})

// eslint-disable-next-line no-unused-vars
let stream = fileStream
if (options.compress) {
stream = stream.pipe(zlib.createDeflate())
}

stream.pipe(writableStream)
})

return {
content,
contentLength:
req.url.searchParams.get(`contentLength`) === `false`
? undefined
: fileContentBuffer.length,
}
}

const server = setupServer(
rest.get(`http://external.com/logo.svg`, async (req, res, ctx) => {
const { content, contentLength } = await getFileContent(
path.join(__dirname, `./fixtures/gatsby-logo.svg`),
req
)

return res(
ctx.set(`Content-Type`, `image/svg+xml`),
ctx.set(`Content-Length`, contentLength),
ctx.status(200),
ctx.body(content)
)
}),
rest.get(`http://external.com/logo-gzip.svg`, async (req, res, ctx) => {
const { content, contentLength } = await getFileContent(
path.join(__dirname, `./fixtures/gatsby-logo.svg`),
req,
{
compress: true,
}
)

return res(
ctx.set(`Content-Type`, `image/svg+xml`),
ctx.set(`content-encoding`, `gzip`),
ctx.set(`Content-Length`, contentLength),
ctx.status(200),
ctx.body(content)
)
}),
rest.get(`http://external.com/dog.jpg`, async (req, res, ctx) => {
const { content, contentLength } = await getFileContent(
path.join(__dirname, `./fixtures/dog-thumbnail.jpg`),
req
)

return res(
ctx.set(`Content-Type`, `image/svg+xml`),
ctx.set(`Content-Length`, contentLength),
ctx.status(200),
ctx.body(content)
)
})
)

function createMockCache() {
const tmpDir = fs.mkdtempSync(
path.join(os.tmpdir(), `gatsby-source-filesystem-`)
)

return {
get: jest.fn(),
set: jest.fn(),
directory: tmpDir,
}
}

const reporter = jest.fn(() => {
return {}
})

describe(`create-remote-file-node`, () => {
let cache

beforeAll(() => {
cache = createMockCache()
// Establish requests interception layer before all tests.
server.listen()
})
afterAll(() => {
if (cache) {
fs.removeSync(cache.directory)
}

// Clean up after all tests are done, preventing this
// interception layer from affecting irrelevant tests.
server.close()
})

beforeEach(() => {
gotStream.mockClear()
urlCount.clear()
})

it(`downloads and create a file`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/logo.svg`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`logo.svg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(1)
})

it(`downloads and create a gzip file`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/logo-gzip.svg`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`logo-gzip.svg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(1)
})

it(`downloads and create a file`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/dog.jpg`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`dog.jpg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/dog-thumbnail.jpg`))
)
expect(gotStream).toBeCalledTimes(1)
})

it(`doesn't retry when no content-length is given`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/logo-gzip.svg?attempts=1&maxBytes=300&contentLength=false`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`logo-gzip.svg`)
expect(fileNode.size).not.toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(1)
})

describe(`retries the download`, () => {
it(`Retries when gzip compression file is incomplete`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/logo-gzip.svg?attempts=1&maxBytes=300`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`logo-gzip.svg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/gatsby-logo.svg`))
)
expect(gotStream).toBeCalledTimes(2)
})

it(`Retries when binary file is incomplete`, async () => {
const fileNode = await createRemoteFileNode({
url: `http://external.com/dog.jpg?attempts=1&maxBytes=300`,
store: {},
getCache: () => cache,
createNode: jest.fn(),
createNodeId: jest.fn(),
reporter,
})

expect(fileNode.base).toBe(`dog.jpg`)
expect(fileNode.size).toBe(
await getFileSize(path.join(__dirname, `./fixtures/dog-thumbnail.jpg`))
)
expect(gotStream).toBeCalledTimes(2)
})
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,7 @@ describe(`create-remote-file-node`, () => {
describe(`valid url`, () => {
let uuid = 0

const setup = (
args = {},
type = `response`,
response = { statusCode: 200 }
) => {
const setup = (args = {}, response = { statusCode: 200 }) => {
const url = `https://images.whatever.com/real-image-trust-me-${uuid}.png`

const gotMock = {
Expand All @@ -121,14 +117,21 @@ describe(`create-remote-file-node`, () => {
got.stream.mockReturnValueOnce({
pipe: jest.fn(() => gotMock),
on: jest.fn((mockType, mockCallback) => {
if (mockType === type) {
if (mockType === `response`) {
// got throws on 404/500 so we mimic this behaviour
if (response.statusCode === 404) {
throw new Error(`Response code 404 (Not Found)`)
}

mockCallback(response)
}
if (mockType === `downloadProgress`) {
mockCallback({
progress: 1,
transferred: 1,
total: 1,
})
}

return gotMock
}),
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 11 additions & 3 deletions packages/gatsby-source-filesystem/src/create-remote-file-node.js
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ const requestRemoteNode = (url, headers, tmpFilename, httpOpts, attempt = 1) =>
},
...httpOpts,
})

let haveAllBytesBeenWritten = false
responseStream.on(`downloadProgress`, progress => {
if (progress.transferred === progress.total || progress.total === null) {
haveAllBytesBeenWritten = true
}
})

const fsWriteStream = fs.createWriteStream(tmpFilename)
responseStream.pipe(fsWriteStream)

Expand All @@ -180,12 +188,12 @@ const requestRemoteNode = (url, headers, tmpFilename, httpOpts, attempt = 1) =>

responseStream.on(`response`, response => {
resetTimeout()
const contentLength =
response.headers && Number(response.headers[`content-length`])

fsWriteStream.on(`finish`, () => {
fsWriteStream.close()

// We have an incomplete download
if (contentLength && contentLength !== fsWriteStream.bytesWritten) {
if (!haveAllBytesBeenWritten) {
fs.removeSync(tmpFilename)

if (attempt < INCOMPLETE_RETRY_LIMIT) {
Expand Down
Loading

0 comments on commit a8b516f

Please sign in to comment.