@@ -200,20 +200,20 @@ def __init__(self, filename=None, mode=None,
200
200
FutureWarning , 2 )
201
201
self .mode = WRITE
202
202
self ._init_write (filename )
203
+ if mtime is None :
204
+ mtime = int (time .time ())
203
205
self .compress = zlib .compressobj (compresslevel ,
204
206
zlib .DEFLATED ,
205
- - zlib .MAX_WBITS ,
207
+ 16 + zlib .MAX_WBITS ,
206
208
zlib .DEF_MEM_LEVEL ,
207
- 0 )
208
- self ._write_mtime = mtime
209
+ 0 ,
210
+ mtime = mtime ,
211
+ fname = self ._encode_fname ())
209
212
else :
210
213
raise ValueError ("Invalid mode: {!r}" .format (mode ))
211
214
212
215
self .fileobj = fileobj
213
216
214
- if self .mode == WRITE :
215
- self ._write_gzip_header (compresslevel )
216
-
217
217
@property
218
218
def mtime (self ):
219
219
"""Last modification time read from stream, or None"""
@@ -225,15 +225,12 @@ def __repr__(self):
225
225
226
226
def _init_write (self , filename ):
227
227
self .name = filename
228
- self .crc = zlib .crc32 (b"" )
229
228
self .size = 0
230
229
self .writebuf = []
231
230
self .bufsize = 0
232
231
self .offset = 0 # Current file offset for seek(), tell(), etc
233
232
234
- def _write_gzip_header (self , compresslevel ):
235
- self .fileobj .write (b'\037 \213 ' ) # magic header
236
- self .fileobj .write (b'\010 ' ) # compression method
233
+ def _encode_fname (self ):
237
234
try :
238
235
# RFC 1952 requires the FNAME field to be Latin-1. Do not
239
236
# include filenames that cannot be represented that way.
@@ -244,24 +241,7 @@ def _write_gzip_header(self, compresslevel):
244
241
fname = fname [:- 3 ]
245
242
except UnicodeEncodeError :
246
243
fname = b''
247
- flags = 0
248
- if fname :
249
- flags = FNAME
250
- self .fileobj .write (chr (flags ).encode ('latin-1' ))
251
- mtime = self ._write_mtime
252
- if mtime is None :
253
- mtime = time .time ()
254
- write32u (self .fileobj , int (mtime ))
255
- if compresslevel == _COMPRESS_LEVEL_BEST :
256
- xfl = b'\002 '
257
- elif compresslevel == _COMPRESS_LEVEL_FAST :
258
- xfl = b'\004 '
259
- else :
260
- xfl = b'\000 '
261
- self .fileobj .write (xfl )
262
- self .fileobj .write (b'\377 ' )
263
- if fname :
264
- self .fileobj .write (fname + b'\000 ' )
244
+ return fname
265
245
266
246
def write (self ,data ):
267
247
self ._check_not_closed ()
@@ -282,7 +262,6 @@ def write(self,data):
282
262
if length > 0 :
283
263
self .fileobj .write (self .compress .compress (data ))
284
264
self .size += length
285
- self .crc = zlib .crc32 (data , self .crc )
286
265
self .offset += length
287
266
288
267
return length
@@ -326,9 +305,6 @@ def close(self):
326
305
try :
327
306
if self .mode == WRITE :
328
307
fileobj .write (self .compress .flush ())
329
- write32u (fileobj , self .crc )
330
- # self.size may exceed 2 GiB, or even 4 GiB
331
- write32u (fileobj , self .size & 0xffffffff )
332
308
elif self .mode == READ :
333
309
self ._buffer .close ()
334
310
finally :
@@ -409,62 +385,17 @@ def _read_exact(fp, n):
409
385
return data
410
386
411
387
412
- def _read_gzip_header (fp ):
413
- '''Read a gzip header from `fp` and progress to the end of the header.
414
-
415
- Returns last mtime if header was present or None otherwise.
416
- '''
417
- magic = fp .read (2 )
418
- if magic == b'' :
419
- return None
420
-
421
- if magic != b'\037 \213 ' :
422
- raise BadGzipFile ('Not a gzipped file (%r)' % magic )
423
-
424
- (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , _read_exact (fp , 8 ))
425
- if method != 8 :
426
- raise BadGzipFile ('Unknown compression method' )
427
-
428
- if flag & FEXTRA :
429
- # Read & discard the extra field, if present
430
- extra_len , = struct .unpack ("<H" , _read_exact (fp , 2 ))
431
- _read_exact (fp , extra_len )
432
- if flag & FNAME :
433
- # Read and discard a null-terminated string containing the filename
434
- while True :
435
- s = fp .read (1 )
436
- if not s or s == b'\000 ' :
437
- break
438
- if flag & FCOMMENT :
439
- # Read and discard a null-terminated string containing a comment
440
- while True :
441
- s = fp .read (1 )
442
- if not s or s == b'\000 ' :
443
- break
444
- if flag & FHCRC :
445
- _read_exact (fp , 2 ) # Read & discard the 16-bit header CRC
446
- return last_mtime
447
-
448
-
449
388
class _GzipReader (_compression .DecompressReader ):
450
389
def __init__ (self , fp ):
451
390
super ().__init__ (_PaddedFile (fp ), zlib ._ZlibDecompressor ,
452
- wbits = - zlib .MAX_WBITS )
391
+ wbits = 16 + zlib .MAX_WBITS )
453
392
# Set flag indicating start of a new member
454
393
self ._new_member = True
455
394
self ._last_mtime = None
456
395
457
396
def _init_read (self ):
458
- self ._crc = zlib .crc32 (b"" )
459
397
self ._stream_size = 0 # Decompressed size of unconcatenated stream
460
398
461
- def _read_gzip_header (self ):
462
- last_mtime = _read_gzip_header (self ._fp )
463
- if last_mtime is None :
464
- return False
465
- self ._last_mtime = last_mtime
466
- return True
467
-
468
399
def read (self , size = - 1 ):
469
400
if size < 0 :
470
401
return self .readall ()
@@ -478,33 +409,35 @@ def read(self, size=-1):
478
409
while True :
479
410
if self ._decompressor .eof :
480
411
# Ending case: we've come to the end of a member in the file,
481
- # so finish up this member, and read a new gzip header.
482
- # Check the CRC and file size, and set the flag so we read
483
- # a new member
412
+ # so finish up this member and set the flag, so that we read a
413
+ # new member
484
414
self ._read_eof ()
485
415
self ._new_member = True
486
416
self ._decompressor = self ._decomp_factory (
487
417
** self ._decomp_args )
488
418
489
- if self ._new_member :
490
- # If the _new_member flag is set, we have to
491
- # jump to the next member, if there is one.
492
- self ._init_read ()
493
- if not self ._read_gzip_header ():
494
- self ._size = self ._pos
495
- return b""
496
- self ._new_member = False
497
-
498
419
# Read a chunk of data from the file
499
420
if self ._decompressor .needs_input :
500
421
buf = self ._fp .read (READ_BUFFER_SIZE )
422
+ if self ._new_member :
423
+ # If the _new_member flag is set, we have to
424
+ # jump to the next member, if there is one.
425
+ self ._init_read ()
426
+ if not buf :
427
+ self ._size = self ._pos
428
+ return b""
429
+ self ._new_member = False
501
430
uncompress = self ._decompressor .decompress (buf , size )
502
431
else :
432
+ assert not self ._new_member
503
433
uncompress = self ._decompressor .decompress (b"" , size )
504
434
435
+ if self ._decompressor .gz_header_done :
436
+ self ._last_mtime = self ._decompressor .gz_header_mtime
437
+
505
438
if self ._decompressor .unused_data != b"" :
506
439
# Prepend the already read bytes to the fileobj so they can
507
- # be seen by _read_eof() and _read_gzip_header()
440
+ # be seen by _read_eof()
508
441
self ._fp .prepend (self ._decompressor .unused_data )
509
442
510
443
if uncompress != b"" :
@@ -513,23 +446,12 @@ def read(self, size=-1):
513
446
raise EOFError ("Compressed file ended before the "
514
447
"end-of-stream marker was reached" )
515
448
516
- self ._crc = zlib .crc32 (uncompress , self ._crc )
517
449
self ._stream_size += len (uncompress )
518
450
self ._pos += len (uncompress )
519
451
return uncompress
520
452
521
453
def _read_eof (self ):
522
454
# We've read to the end of the file
523
- # We check that the computed CRC and size of the
524
- # uncompressed data matches the stored values. Note that the size
525
- # stored is the true file size mod 2**32.
526
- crc32 , isize = struct .unpack ("<II" , _read_exact (self ._fp , 8 ))
527
- if crc32 != self ._crc :
528
- raise BadGzipFile ("CRC check failed %s != %s" % (hex (crc32 ),
529
- hex (self ._crc )))
530
- elif isize != (self ._stream_size & 0xffffffff ):
531
- raise BadGzipFile ("Incorrect length of data produced" )
532
-
533
455
# Gzip files can be padded with zeroes and still have archives.
534
456
# Consume all zero bytes and set the file position to the first
535
457
# non-zero byte. See http://www.gzip.org/#faq8
@@ -544,68 +466,32 @@ def _rewind(self):
544
466
self ._new_member = True
545
467
546
468
547
- def _create_simple_gzip_header (compresslevel : int ,
548
- mtime = None ) -> bytes :
549
- """
550
- Write a simple gzip header with no extra fields.
551
- :param compresslevel: Compresslevel used to determine the xfl bytes.
552
- :param mtime: The mtime (must support conversion to a 32-bit integer).
553
- :return: A bytes object representing the gzip header.
554
- """
555
- if mtime is None :
556
- mtime = time .time ()
557
- if compresslevel == _COMPRESS_LEVEL_BEST :
558
- xfl = 2
559
- elif compresslevel == _COMPRESS_LEVEL_FAST :
560
- xfl = 4
561
- else :
562
- xfl = 0
563
- # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
564
- # fields added to header), mtime, xfl and os (255 for unknown OS).
565
- return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
566
-
567
-
568
469
def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
569
470
"""Compress data in one shot and return the compressed string.
570
471
571
472
compresslevel sets the compression level in range of 0-9.
572
473
mtime can be used to set the modification time. The modification time is
573
474
set to the current time by default.
574
475
"""
575
- if mtime == 0 :
576
- # Use zlib as it creates the header with 0 mtime by default.
577
- # This is faster and with less overhead.
578
- return zlib .compress (data , level = compresslevel , wbits = 31 )
579
- header = _create_simple_gzip_header (compresslevel , mtime )
580
- trailer = struct .pack ("<LL" , zlib .crc32 (data ), (len (data ) & 0xffffffff ))
581
- # Wbits=-15 creates a raw deflate block.
582
- return (header + zlib .compress (data , level = compresslevel , wbits = - 15 ) +
583
- trailer )
476
+ if mtime is None :
477
+ mtime = int (time .time ())
478
+ return zlib .compress (data , level = compresslevel , wbits = 31 , mtime = mtime )
584
479
585
480
586
481
def decompress (data ):
587
482
"""Decompress a gzip compressed string in one shot.
588
483
Return the decompressed string.
589
484
"""
590
485
decompressed_members = []
591
- while True :
592
- fp = io .BytesIO (data )
593
- if _read_gzip_header (fp ) is None :
594
- return b"" .join (decompressed_members )
595
- # Use a zlib raw deflate compressor
596
- do = zlib .decompressobj (wbits = - zlib .MAX_WBITS )
597
- # Read all the data except the header
598
- decompressed = do .decompress (data [fp .tell ():])
599
- if not do .eof or len (do .unused_data ) < 8 :
486
+ while data :
487
+ do = zlib .decompressobj (wbits = 16 + zlib .MAX_WBITS )
488
+ decompressed = do .decompress (data )
489
+ if not do .eof :
600
490
raise EOFError ("Compressed file ended before the end-of-stream "
601
491
"marker was reached" )
602
- crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
603
- if crc != zlib .crc32 (decompressed ):
604
- raise BadGzipFile ("CRC check failed" )
605
- if length != (len (decompressed ) & 0xffffffff ):
606
- raise BadGzipFile ("Incorrect length of data produced" )
607
492
decompressed_members .append (decompressed )
608
- data = do .unused_data [8 :].lstrip (b"\x00 " )
493
+ data = do .unused_data .lstrip (b"\x00 " )
494
+ return b"" .join (decompressed_members )
609
495
610
496
611
497
def main ():
0 commit comments