@@ -217,12 +217,15 @@ def __init__(self, filename=None, mode=None,
217
217
FutureWarning , 2 )
218
218
self .mode = WRITE
219
219
self ._init_write (filename )
220
+ if mtime is None :
221
+ mtime = int (time .time ())
220
222
self .compress = zlib .compressobj (compresslevel ,
221
223
zlib .DEFLATED ,
222
- - zlib .MAX_WBITS ,
224
+ 16 + zlib .MAX_WBITS ,
223
225
zlib .DEF_MEM_LEVEL ,
224
- 0 )
225
- self ._write_mtime = mtime
226
+ 0 ,
227
+ mtime = mtime ,
228
+ fname = self ._encode_fname ())
226
229
self ._buffer_size = _WRITE_BUFFER_SIZE
227
230
self ._buffer = io .BufferedWriter (_WriteBufferStream (self ),
228
231
buffer_size = self ._buffer_size )
@@ -231,9 +234,6 @@ def __init__(self, filename=None, mode=None,
231
234
232
235
self .fileobj = fileobj
233
236
234
- if self .mode == WRITE :
235
- self ._write_gzip_header (compresslevel )
236
-
237
237
@property
238
238
def mtime (self ):
239
239
"""Last modification time read from stream, or None"""
@@ -245,7 +245,6 @@ def __repr__(self):
245
245
246
246
def _init_write (self , filename ):
247
247
self .name = filename
248
- self .crc = zlib .crc32 (b"" )
249
248
self .size = 0
250
249
self .writebuf = []
251
250
self .bufsize = 0
@@ -256,9 +255,7 @@ def tell(self):
256
255
self ._buffer .flush ()
257
256
return super ().tell ()
258
257
259
- def _write_gzip_header (self , compresslevel ):
260
- self .fileobj .write (b'\037 \213 ' ) # magic header
261
- self .fileobj .write (b'\010 ' ) # compression method
258
+ def _encode_fname (self ):
262
259
try :
263
260
# RFC 1952 requires the FNAME field to be Latin-1. Do not
264
261
# include filenames that cannot be represented that way.
@@ -269,24 +266,7 @@ def _write_gzip_header(self, compresslevel):
269
266
fname = fname [:- 3 ]
270
267
except UnicodeEncodeError :
271
268
fname = b''
272
- flags = 0
273
- if fname :
274
- flags = FNAME
275
- self .fileobj .write (chr (flags ).encode ('latin-1' ))
276
- mtime = self ._write_mtime
277
- if mtime is None :
278
- mtime = time .time ()
279
- write32u (self .fileobj , int (mtime ))
280
- if compresslevel == _COMPRESS_LEVEL_BEST :
281
- xfl = b'\002 '
282
- elif compresslevel == _COMPRESS_LEVEL_FAST :
283
- xfl = b'\004 '
284
- else :
285
- xfl = b'\000 '
286
- self .fileobj .write (xfl )
287
- self .fileobj .write (b'\377 ' )
288
- if fname :
289
- self .fileobj .write (fname + b'\000 ' )
269
+ return fname
290
270
291
271
def write (self ,data ):
292
272
self ._check_not_closed ()
@@ -311,7 +291,6 @@ def _write_raw(self, data):
311
291
if length > 0 :
312
292
self .fileobj .write (self .compress .compress (data ))
313
293
self .size += length
314
- self .crc = zlib .crc32 (data , self .crc )
315
294
self .offset += length
316
295
317
296
return length
@@ -355,9 +334,6 @@ def close(self):
355
334
if self .mode == WRITE :
356
335
self ._buffer .flush ()
357
336
fileobj .write (self .compress .flush ())
358
- write32u (fileobj , self .crc )
359
- # self.size may exceed 2 GiB, or even 4 GiB
360
- write32u (fileobj , self .size & 0xffffffff )
361
337
elif self .mode == READ :
362
338
self ._buffer .close ()
363
339
finally :
@@ -439,62 +415,17 @@ def _read_exact(fp, n):
439
415
return data
440
416
441
417
442
- def _read_gzip_header (fp ):
443
- '''Read a gzip header from `fp` and progress to the end of the header.
444
-
445
- Returns last mtime if header was present or None otherwise.
446
- '''
447
- magic = fp .read (2 )
448
- if magic == b'' :
449
- return None
450
-
451
- if magic != b'\037 \213 ' :
452
- raise BadGzipFile ('Not a gzipped file (%r)' % magic )
453
-
454
- (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , _read_exact (fp , 8 ))
455
- if method != 8 :
456
- raise BadGzipFile ('Unknown compression method' )
457
-
458
- if flag & FEXTRA :
459
- # Read & discard the extra field, if present
460
- extra_len , = struct .unpack ("<H" , _read_exact (fp , 2 ))
461
- _read_exact (fp , extra_len )
462
- if flag & FNAME :
463
- # Read and discard a null-terminated string containing the filename
464
- while True :
465
- s = fp .read (1 )
466
- if not s or s == b'\000 ' :
467
- break
468
- if flag & FCOMMENT :
469
- # Read and discard a null-terminated string containing a comment
470
- while True :
471
- s = fp .read (1 )
472
- if not s or s == b'\000 ' :
473
- break
474
- if flag & FHCRC :
475
- _read_exact (fp , 2 ) # Read & discard the 16-bit header CRC
476
- return last_mtime
477
-
478
-
479
418
class _GzipReader (_compression .DecompressReader ):
480
419
def __init__ (self , fp ):
481
420
super ().__init__ (_PaddedFile (fp ), zlib ._ZlibDecompressor ,
482
- wbits = - zlib .MAX_WBITS )
421
+ wbits = 16 + zlib .MAX_WBITS )
483
422
# Set flag indicating start of a new member
484
423
self ._new_member = True
485
424
self ._last_mtime = None
486
425
487
426
def _init_read (self ):
488
- self ._crc = zlib .crc32 (b"" )
489
427
self ._stream_size = 0 # Decompressed size of unconcatenated stream
490
428
491
- def _read_gzip_header (self ):
492
- last_mtime = _read_gzip_header (self ._fp )
493
- if last_mtime is None :
494
- return False
495
- self ._last_mtime = last_mtime
496
- return True
497
-
498
429
def read (self , size = - 1 ):
499
430
if size < 0 :
500
431
return self .readall ()
@@ -508,33 +439,35 @@ def read(self, size=-1):
508
439
while True :
509
440
if self ._decompressor .eof :
510
441
# Ending case: we've come to the end of a member in the file,
511
- # so finish up this member, and read a new gzip header.
512
- # Check the CRC and file size, and set the flag so we read
513
- # a new member
442
+ # so finish up this member and set the flag, so that we read a
443
+ # new member
514
444
self ._read_eof ()
515
445
self ._new_member = True
516
446
self ._decompressor = self ._decomp_factory (
517
447
** self ._decomp_args )
518
448
519
- if self ._new_member :
520
- # If the _new_member flag is set, we have to
521
- # jump to the next member, if there is one.
522
- self ._init_read ()
523
- if not self ._read_gzip_header ():
524
- self ._size = self ._pos
525
- return b""
526
- self ._new_member = False
527
-
528
449
# Read a chunk of data from the file
529
450
if self ._decompressor .needs_input :
530
451
buf = self ._fp .read (READ_BUFFER_SIZE )
452
+ if self ._new_member :
453
+ # If the _new_member flag is set, we have to
454
+ # jump to the next member, if there is one.
455
+ self ._init_read ()
456
+ if not buf :
457
+ self ._size = self ._pos
458
+ return b""
459
+ self ._new_member = False
531
460
uncompress = self ._decompressor .decompress (buf , size )
532
461
else :
462
+ assert not self ._new_member
533
463
uncompress = self ._decompressor .decompress (b"" , size )
534
464
465
+ if self ._decompressor .gz_header_done :
466
+ self ._last_mtime = self ._decompressor .gz_header_mtime
467
+
535
468
if self ._decompressor .unused_data != b"" :
536
469
# Prepend the already read bytes to the fileobj so they can
537
- # be seen by _read_eof() and _read_gzip_header()
470
+ # be seen by _read_eof()
538
471
self ._fp .prepend (self ._decompressor .unused_data )
539
472
540
473
if uncompress != b"" :
@@ -543,23 +476,12 @@ def read(self, size=-1):
543
476
raise EOFError ("Compressed file ended before the "
544
477
"end-of-stream marker was reached" )
545
478
546
- self ._crc = zlib .crc32 (uncompress , self ._crc )
547
479
self ._stream_size += len (uncompress )
548
480
self ._pos += len (uncompress )
549
481
return uncompress
550
482
551
483
def _read_eof (self ):
552
484
# We've read to the end of the file
553
- # We check that the computed CRC and size of the
554
- # uncompressed data matches the stored values. Note that the size
555
- # stored is the true file size mod 2**32.
556
- crc32 , isize = struct .unpack ("<II" , _read_exact (self ._fp , 8 ))
557
- if crc32 != self ._crc :
558
- raise BadGzipFile ("CRC check failed %s != %s" % (hex (crc32 ),
559
- hex (self ._crc )))
560
- elif isize != (self ._stream_size & 0xffffffff ):
561
- raise BadGzipFile ("Incorrect length of data produced" )
562
-
563
485
# Gzip files can be padded with zeroes and still have archives.
564
486
# Consume all zero bytes and set the file position to the first
565
487
# non-zero byte. See http://www.gzip.org/#faq8
@@ -574,68 +496,32 @@ def _rewind(self):
574
496
self ._new_member = True
575
497
576
498
577
- def _create_simple_gzip_header (compresslevel : int ,
578
- mtime = None ) -> bytes :
579
- """
580
- Write a simple gzip header with no extra fields.
581
- :param compresslevel: Compresslevel used to determine the xfl bytes.
582
- :param mtime: The mtime (must support conversion to a 32-bit integer).
583
- :return: A bytes object representing the gzip header.
584
- """
585
- if mtime is None :
586
- mtime = time .time ()
587
- if compresslevel == _COMPRESS_LEVEL_BEST :
588
- xfl = 2
589
- elif compresslevel == _COMPRESS_LEVEL_FAST :
590
- xfl = 4
591
- else :
592
- xfl = 0
593
- # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
594
- # fields added to header), mtime, xfl and os (255 for unknown OS).
595
- return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
596
-
597
-
598
499
def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
599
500
"""Compress data in one shot and return the compressed string.
600
501
601
502
compresslevel sets the compression level in range of 0-9.
602
503
mtime can be used to set the modification time. The modification time is
603
504
set to the current time by default.
604
505
"""
605
- if mtime == 0 :
606
- # Use zlib as it creates the header with 0 mtime by default.
607
- # This is faster and with less overhead.
608
- return zlib .compress (data , level = compresslevel , wbits = 31 )
609
- header = _create_simple_gzip_header (compresslevel , mtime )
610
- trailer = struct .pack ("<LL" , zlib .crc32 (data ), (len (data ) & 0xffffffff ))
611
- # Wbits=-15 creates a raw deflate block.
612
- return (header + zlib .compress (data , level = compresslevel , wbits = - 15 ) +
613
- trailer )
506
+ if mtime is None :
507
+ mtime = int (time .time ())
508
+ return zlib .compress (data , level = compresslevel , wbits = 31 , mtime = mtime )
614
509
615
510
616
511
def decompress (data ):
617
512
"""Decompress a gzip compressed string in one shot.
618
513
Return the decompressed string.
619
514
"""
620
515
decompressed_members = []
621
- while True :
622
- fp = io .BytesIO (data )
623
- if _read_gzip_header (fp ) is None :
624
- return b"" .join (decompressed_members )
625
- # Use a zlib raw deflate compressor
626
- do = zlib .decompressobj (wbits = - zlib .MAX_WBITS )
627
- # Read all the data except the header
628
- decompressed = do .decompress (data [fp .tell ():])
629
- if not do .eof or len (do .unused_data ) < 8 :
516
+ while data :
517
+ do = zlib .decompressobj (wbits = 16 + zlib .MAX_WBITS )
518
+ decompressed = do .decompress (data )
519
+ if not do .eof :
630
520
raise EOFError ("Compressed file ended before the end-of-stream "
631
521
"marker was reached" )
632
- crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
633
- if crc != zlib .crc32 (decompressed ):
634
- raise BadGzipFile ("CRC check failed" )
635
- if length != (len (decompressed ) & 0xffffffff ):
636
- raise BadGzipFile ("Incorrect length of data produced" )
637
522
decompressed_members .append (decompressed )
638
- data = do .unused_data [8 :].lstrip (b"\x00 " )
523
+ data = do .unused_data .lstrip (b"\x00 " )
524
+ return b"" .join (decompressed_members )
639
525
640
526
641
527
def main ():
0 commit comments