Skip to content

Commit

Permalink
zlib: detect gzip files when using unzip*
Browse files Browse the repository at this point in the history
Detect whether a gzip file is being passed to `unzip*` by
testing the first bytes for the gzip magic bytes, and setting
the decompression mode to `GUNZIP` or `INFLATE` according to
the result.

This enables gzip-only features like multi-member support
to be used together with the `unzip*` autodetection support
and thereby makes `gunzip*` and `unzip*` return identical
results for gzip input again.

Add a simple test for checking that features specific to
`zlib.gunzip`, notably support for multiple members, also work
when using `zlib.unzip`.

PR-URL: #5884
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
addaleax authored and bnoordhuis committed Apr 5, 2016
1 parent 0d41463 commit 2d7e316
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 1 deletion.
50 changes: 49 additions & 1 deletion src/node_zlib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ class ZCtx : public AsyncWrap {
windowBits_(0),
write_in_progress_(false),
pending_close_(false),
refs_(0) {
refs_(0),
gzip_id_bytes_read_(0) {
MakeWeak<ZCtx>(this);
}

Expand Down Expand Up @@ -225,6 +226,8 @@ class ZCtx : public AsyncWrap {
static void Process(uv_work_t* work_req) {
ZCtx *ctx = ContainerOf(&ZCtx::work_req_, work_req);

const Bytef* next_expected_header_byte = nullptr;

// If the avail_out is left at 0, then it means that it ran out
// of room. If there was avail_out left over, then it means
// that all of the input was consumed.
Expand All @@ -235,6 +238,50 @@ class ZCtx : public AsyncWrap {
ctx->err_ = deflate(&ctx->strm_, ctx->flush_);
break;
case UNZIP:
if (ctx->strm_.avail_in > 0) {
next_expected_header_byte = ctx->strm_.next_in;
}

switch (ctx->gzip_id_bytes_read_) {
case 0:
if (next_expected_header_byte == nullptr) {
break;
}

if (*next_expected_header_byte == GZIP_HEADER_ID1) {
ctx->gzip_id_bytes_read_ = 1;
next_expected_header_byte++;

if (ctx->strm_.avail_in == 1) {
// The only available byte was already read.
break;
}
} else {
ctx->mode_ = INFLATE;
break;
}

// fallthrough
case 1:
if (next_expected_header_byte == nullptr) {
break;
}

if (*next_expected_header_byte == GZIP_HEADER_ID2) {
ctx->gzip_id_bytes_read_ = 2;
ctx->mode_ = GUNZIP;
} else {
// There is no actual difference between INFLATE and INFLATERAW
// (after initialization).
ctx->mode_ = INFLATE;
}

break;
default:
CHECK(0 && "invalid number of gzip magic number bytes read");
}

// fallthrough
case INFLATE:
case GUNZIP:
case INFLATERAW:
Expand Down Expand Up @@ -591,6 +638,7 @@ class ZCtx : public AsyncWrap {
bool write_in_progress_;
bool pending_close_;
unsigned int refs_;
unsigned int gzip_id_bytes_read_;
};


Expand Down
14 changes: 14 additions & 0 deletions test/parallel/test-zlib-from-concatenated-gzip.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@ zlib.gunzip(data, common.mustCall((err, result) => {
assert.equal(result, 'abcdef', 'result should match original string');
}));

zlib.unzip(data, common.mustCall((err, result) => {
assert.ifError(err);
assert.equal(result, 'abcdef', 'result should match original string');
}));

// Multi-member support does not apply to zlib inflate/deflate.
zlib.unzip(Buffer.concat([
zlib.deflateSync('abc'),
zlib.deflateSync('def')
]), common.mustCall((err, result) => {
assert.ifError(err);
assert.equal(result, 'abc', 'result should match contents of first "member"');
}));

// files that have the "right" magic bytes for starting a new gzip member
// in the middle of themselves, even if they are part of a single
// regularly compressed member
Expand Down
28 changes: 28 additions & 0 deletions test/parallel/test-zlib-unzip-one-byte-chunks.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
'use strict';
const common = require('../common');
const assert = require('assert');
const zlib = require('zlib');

const data = Buffer.concat([
zlib.gzipSync('abc'),
zlib.gzipSync('def')
]);

const resultBuffers = [];

const unzip = zlib.createUnzip()
.on('error', (err) => {
assert.ifError(err);
})
.on('data', (data) => resultBuffers.push(data))
.on('finish', common.mustCall(() => {
assert.deepStrictEqual(Buffer.concat(resultBuffers).toString(), 'abcdef',
'result should match original string');
}));

for (let i = 0; i < data.length; i++) {
// Write each single byte individually.
unzip.write(Buffer.from([data[i]]));
}

unzip.end();

0 comments on commit 2d7e316

Please sign in to comment.