Skip to content

Commit

Permalink
pythongh-112346: Always set OS byte to 255, simpler gzip.compress fun…
Browse files Browse the repository at this point in the history
…ction. (pythonGH-120486)

This matches the output behavior in 3.10 and earlier; the optimization in 3.11 allowed the zlib library's "os" value to be filled in instead in the circumstance when mtime was 0.  this keeps things consistent.
  • Loading branch information
rhpvorderman authored and estyxx committed Jul 17, 2024
1 parent 9aa8bc4 commit 653dd3e
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 34 deletions.
8 changes: 5 additions & 3 deletions Doc/library/gzip.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,7 @@ The module defines the following items:

Compress the *data*, returning a :class:`bytes` object containing
the compressed data. *compresslevel* and *mtime* have the same meaning as in
the :class:`GzipFile` constructor above. When *mtime* is set to ``0``, this
function is equivalent to :func:`zlib.compress` with *wbits* set to ``31``.
The zlib function is faster.
the :class:`GzipFile` constructor above.

.. versionadded:: 3.2
.. versionchanged:: 3.8
Expand All @@ -200,6 +198,10 @@ The module defines the following items:
streamed fashion. Calls with *mtime* set to ``0`` are delegated to
:func:`zlib.compress` for better speed.

.. versionchanged:: 3.13
The gzip header OS byte is guaranteed to be set to 255 when this function
is used as was the case in 3.10 and earlier.

.. function:: decompress(data)

Decompress the *data*, returning a :class:`bytes` object containing the
Expand Down
38 changes: 8 additions & 30 deletions Lib/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,43 +580,21 @@ def _rewind(self):
self._new_member = True


def _create_simple_gzip_header(compresslevel: int,
mtime = None) -> bytes:
"""
Write a simple gzip header with no extra fields.
:param compresslevel: Compresslevel used to determine the xfl bytes.
:param mtime: The mtime (must support conversion to a 32-bit integer).
:return: A bytes object representing the gzip header.
"""
if mtime is None:
mtime = time.time()
if compresslevel == _COMPRESS_LEVEL_BEST:
xfl = 2
elif compresslevel == _COMPRESS_LEVEL_FAST:
xfl = 4
else:
xfl = 0
# Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
# fields added to header), mtime, xfl and os (255 for unknown OS).
return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)


def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
"""Compress data in one shot and return the compressed string.
compresslevel sets the compression level in range of 0-9.
mtime can be used to set the modification time. The modification time is
set to the current time by default.
"""
if mtime == 0:
# Use zlib as it creates the header with 0 mtime by default.
# This is faster and with less overhead.
return zlib.compress(data, level=compresslevel, wbits=31)
header = _create_simple_gzip_header(compresslevel, mtime)
trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))
# Wbits=-15 creates a raw deflate block.
return (header + zlib.compress(data, level=compresslevel, wbits=-15) +
trailer)
# Wbits=31 automatically includes a gzip header and trailer.
gzip_data = zlib.compress(data, level=compresslevel, wbits=31)
if mtime is None:
mtime = time.time()
# Reuse gzip header created by zlib, replace mtime and OS byte for
# consistency.
header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255)
return header + gzip_data[10:]


def decompress(data):
Expand Down
12 changes: 11 additions & 1 deletion Lib/test/test_gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,14 +714,24 @@ def test_compress_mtime(self):
self.assertEqual(f.mtime, mtime)

def test_compress_correct_level(self):
# gzip.compress calls with mtime == 0 take a different code path.
for mtime in (0, 42):
with self.subTest(mtime=mtime):
nocompress = gzip.compress(data1, compresslevel=0, mtime=mtime)
yescompress = gzip.compress(data1, compresslevel=1, mtime=mtime)
self.assertIn(data1, nocompress)
self.assertNotIn(data1, yescompress)

def test_issue112346(self):
# The OS byte should be 255, this should not change between Python versions.
for mtime in (0, 42):
with self.subTest(mtime=mtime):
compress = gzip.compress(data1, compresslevel=1, mtime=mtime)
self.assertEqual(
struct.unpack("<IxB", compress[4:10]),
(mtime, 255),
"Gzip header does not properly set either mtime or OS byte."
)

def test_decompress(self):
for data in (data1, data2):
buf = io.BytesIO()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
The OS byte in gzip headers is now always set to 255 when using
:func:`gzip.compress`.

0 comments on commit 653dd3e

Please sign in to comment.