Skip to content

Commit

Permalink
file inference improvements for .tbr and .tgz
Browse files Browse the repository at this point in the history
When unpacking, only infer brotli compression from the filename if the
first 512 bytes are an invalid tar header (or the stream is less than
512 bytes)

While Brotli doesn't give us magic header bytes like gzip, we can be
reasonably sure that a .tbr file starting with 512 bytes of valid tar
data is almost certainly not a brotli compressed archive.

And a .tbr file starting with the magic gzip bytes is almost certainly a
gzip archive, and not brotli, despite what the filename says.

In all cases, if explicit boolean or object values appear in the options
for either gzip or brotli, we respect that, and ignore the filename.
  • Loading branch information
isaacs committed Sep 5, 2023
1 parent 336fa8f commit db6f539
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 5 deletions.
1 change: 1 addition & 0 deletions lib/pack.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ const Pack = warner(class Pack extends Minipass {

this.portable = !!opt.portable
this.zip = null

if (opt.gzip || opt.brotli) {
if (opt.gzip && opt.brotli) {
throw new TypeError('gzip and brotli are mutually exclusive')
Expand Down
45 changes: 42 additions & 3 deletions lib/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,13 @@ module.exports = warner(class Parser extends EE {
// Unlike gzip, brotli doesn't have any magic bytes to identify it
// Users need to explicitly tell us they're extracting a brotli file
// Or we infer from the file extension
this.brotli = opt.brotli || (opt.file && (opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')))
const isTBR = (opt.file && (
opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr')))
// if it's a tbr file it MIGHT be brotli, but we don't know until
// we look at it and verify it's not a valid tar file.
this.brotli = !opt.gzip && opt.brotli !== undefined ? opt.brotli
: isTBR ? undefined
: false

// have to set this so that streams are ok piping into it
this.writable = true
Expand Down Expand Up @@ -351,7 +357,9 @@ module.exports = warner(class Parser extends EE {
}

// first write, might be gzipped
if (this[UNZIP] === null && chunk) {
const needSniff = this[UNZIP] === null ||
this.brotli === undefined && this[UNZIP] === false
if (needSniff && chunk) {
if (this[BUFFER]) {
chunk = Buffer.concat([this[BUFFER], chunk])
this[BUFFER] = null
Expand All @@ -360,15 +368,45 @@ module.exports = warner(class Parser extends EE {
this[BUFFER] = chunk
return true
}

// look for gzip header
for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) {
if (chunk[i] !== gzipHeader[i]) {
this[UNZIP] = false
}
}

const maybeBrotli = this.brotli === undefined
if (this[UNZIP] === false && maybeBrotli) {
// read the first header to see if it's a valid tar file. If so,
// we can safely assume that it's not actually brotli, despite the
// .tbr or .tar.br file extension.
// if we ended before getting a full chunk, yes, def brotli
if (chunk.length < 512) {
if (this[ENDED]) {
this.brotli = true
} else {
this[BUFFER] = chunk
return true
}
} else {
// if it's tar, it's pretty reliably not brotli, chances of
// that happening are astronomical.
try {
new Header(chunk.slice(0, 512))
this.brotli = false
} catch (_) {
this.brotli = true
}
}
}

if (this[UNZIP] === null || (this[UNZIP] === false && this.brotli)) {
const ended = this[ENDED]
this[ENDED] = false
this[UNZIP] = this.brotli ? new zlib.BrotliDecompress() : new zlib.Unzip()
this[UNZIP] = this[UNZIP] === null
? new zlib.Unzip()
: new zlib.BrotliDecompress()
this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk))
this[UNZIP].on('error', er => this.abort(er))
this[UNZIP].on('end', _ => {
Expand Down Expand Up @@ -506,6 +544,7 @@ module.exports = warner(class Parser extends EE {
this[UNZIP].end(chunk)
} else {
this[ENDED] = true
if (this.brotli === undefined) chunk = chunk || Buffer.alloc(0)
this.write(chunk)
}
}
Expand Down
53 changes: 51 additions & 2 deletions test/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ t.test('fixture tests', t => {
const eventsFile = parsedir + '/' + base + tail
const expect = require(eventsFile)

t.test('one byte at a time', t => {
t.test('uncompressed one byte at a time', t => {
const bs = new ByteStream()
const opt = (maxMeta || filter || strict) ? {
maxMetaEntrySize: maxMeta,
Expand All @@ -93,7 +93,7 @@ t.test('fixture tests', t => {
bs.end(tardata)
})

t.test('all at once', t => {
t.test('uncompressed all at once', t => {
const p = new Parse({
maxMetaEntrySize: maxMeta,
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
Expand All @@ -103,6 +103,31 @@ t.test('fixture tests', t => {
p.end(tardata)
})

t.test('uncompressed one byte at a time, filename .tbr', t => {
const bs = new ByteStream()
const opt = (maxMeta || filter || strict) ? {
maxMetaEntrySize: maxMeta,
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
strict: strict,
file: 'example.tbr',
} : null
const bp = new Parse(opt)
trackEvents(t, expect, bp)
bs.pipe(bp)
bs.end(tardata)
})

t.test('uncompressed all at once, filename .tar.br', t => {
const p = new Parse({
maxMetaEntrySize: maxMeta,
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
strict: strict,
file: 'example.tar.br',
})
trackEvents(t, expect, p)
p.end(tardata)
})

t.test('gzipped all at once', t => {
const p = new Parse({
maxMetaEntrySize: maxMeta,
Expand All @@ -113,6 +138,17 @@ t.test('fixture tests', t => {
p.end(zlib.gzipSync(tardata))
})

t.test('gzipped all at once, filename .tbr', t => {
const p = new Parse({
maxMetaEntrySize: maxMeta,
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
strict: strict,
file: 'example.tbr',
})
trackEvents(t, expect, p)
p.end(zlib.gzipSync(tardata))
})

t.test('gzipped byte at a time', t => {
const bs = new ByteStream()
const bp = new Parse({
Expand Down Expand Up @@ -171,6 +207,19 @@ t.test('fixture tests', t => {
bs.end(zlib.brotliCompressSync(tardata))
})

t.test('compress with brotli .tbr byte at a time', t => {
const bs = new ByteStream()
const bp = new Parse({
maxMetaEntrySize: maxMeta,
filter: filter ? (path, entry) => entry.size % 2 !== 0 : null,
strict: strict,
file: 'example.tbr',
})
trackEvents(t, expect, bp)
bs.pipe(bp)
bs.end(zlib.brotliCompressSync(tardata))
})

t.test('async chunks', t => {
const p = new Parse({
maxMetaEntrySize: maxMeta,
Expand Down

0 comments on commit db6f539

Please sign in to comment.