@@ -217,12 +217,15 @@ def __init__(self, filename=None, mode=None,
217
217
FutureWarning , 2 )
218
218
self .mode = WRITE
219
219
self ._init_write (filename )
220
+ if mtime is None :
221
+ mtime = int (time .time ())
220
222
self .compress = zlib .compressobj (compresslevel ,
221
223
zlib .DEFLATED ,
222
- - zlib .MAX_WBITS ,
224
+ 16 + zlib .MAX_WBITS ,
223
225
zlib .DEF_MEM_LEVEL ,
224
- 0 )
225
- self ._write_mtime = mtime
226
+ 0 ,
227
+ mtime = mtime ,
228
+ fname = self ._encode_fname ())
226
229
self ._buffer_size = _WRITE_BUFFER_SIZE
227
230
self ._buffer = io .BufferedWriter (_WriteBufferStream (self ),
228
231
buffer_size = self ._buffer_size )
@@ -231,9 +234,6 @@ def __init__(self, filename=None, mode=None,
231
234
232
235
self .fileobj = fileobj
233
236
234
- if self .mode == WRITE :
235
- self ._write_gzip_header (compresslevel )
236
-
237
237
@property
238
238
def mtime (self ):
239
239
"""Last modification time read from stream, or None"""
@@ -245,7 +245,6 @@ def __repr__(self):
245
245
246
246
def _init_write (self , filename ):
247
247
self .name = filename
248
- self .crc = zlib .crc32 (b"" )
249
248
self .size = 0
250
249
self .writebuf = []
251
250
self .bufsize = 0
@@ -256,9 +255,7 @@ def tell(self):
256
255
self ._buffer .flush ()
257
256
return super ().tell ()
258
257
259
- def _write_gzip_header (self , compresslevel ):
260
- self .fileobj .write (b'\037 \213 ' ) # magic header
261
- self .fileobj .write (b'\010 ' ) # compression method
258
+ def _encode_fname (self ):
262
259
try :
263
260
# RFC 1952 requires the FNAME field to be Latin-1. Do not
264
261
# include filenames that cannot be represented that way.
@@ -269,24 +266,7 @@ def _write_gzip_header(self, compresslevel):
269
266
fname = fname [:- 3 ]
270
267
except UnicodeEncodeError :
271
268
fname = b''
272
- flags = 0
273
- if fname :
274
- flags = FNAME
275
- self .fileobj .write (chr (flags ).encode ('latin-1' ))
276
- mtime = self ._write_mtime
277
- if mtime is None :
278
- mtime = time .time ()
279
- write32u (self .fileobj , int (mtime ))
280
- if compresslevel == _COMPRESS_LEVEL_BEST :
281
- xfl = b'\002 '
282
- elif compresslevel == _COMPRESS_LEVEL_FAST :
283
- xfl = b'\004 '
284
- else :
285
- xfl = b'\000 '
286
- self .fileobj .write (xfl )
287
- self .fileobj .write (b'\377 ' )
288
- if fname :
289
- self .fileobj .write (fname + b'\000 ' )
269
+ return fname
290
270
291
271
def write (self ,data ):
292
272
self ._check_not_closed ()
@@ -311,7 +291,6 @@ def _write_raw(self, data):
311
291
if length > 0 :
312
292
self .fileobj .write (self .compress .compress (data ))
313
293
self .size += length
314
- self .crc = zlib .crc32 (data , self .crc )
315
294
self .offset += length
316
295
317
296
return length
@@ -355,9 +334,6 @@ def close(self):
355
334
if self .mode == WRITE :
356
335
self ._buffer .flush ()
357
336
fileobj .write (self .compress .flush ())
358
- write32u (fileobj , self .crc )
359
- # self.size may exceed 2 GiB, or even 4 GiB
360
- write32u (fileobj , self .size & 0xffffffff )
361
337
elif self .mode == READ :
362
338
self ._buffer .close ()
363
339
finally :
@@ -427,78 +403,17 @@ def readline(self, size=-1):
427
403
return self ._buffer .readline (size )
428
404
429
405
430
- def _read_exact (fp , n ):
431
- '''Read exactly *n* bytes from `fp`
432
-
433
- This method is required because fp may be unbuffered,
434
- i.e. return short reads.
435
- '''
436
- data = fp .read (n )
437
- while len (data ) < n :
438
- b = fp .read (n - len (data ))
439
- if not b :
440
- raise EOFError ("Compressed file ended before the "
441
- "end-of-stream marker was reached" )
442
- data += b
443
- return data
444
-
445
-
446
- def _read_gzip_header (fp ):
447
- '''Read a gzip header from `fp` and progress to the end of the header.
448
-
449
- Returns last mtime if header was present or None otherwise.
450
- '''
451
- magic = fp .read (2 )
452
- if magic == b'' :
453
- return None
454
-
455
- if magic != b'\037 \213 ' :
456
- raise BadGzipFile ('Not a gzipped file (%r)' % magic )
457
-
458
- (method , flag , last_mtime ) = struct .unpack ("<BBIxx" , _read_exact (fp , 8 ))
459
- if method != 8 :
460
- raise BadGzipFile ('Unknown compression method' )
461
-
462
- if flag & FEXTRA :
463
- # Read & discard the extra field, if present
464
- extra_len , = struct .unpack ("<H" , _read_exact (fp , 2 ))
465
- _read_exact (fp , extra_len )
466
- if flag & FNAME :
467
- # Read and discard a null-terminated string containing the filename
468
- while True :
469
- s = fp .read (1 )
470
- if not s or s == b'\000 ' :
471
- break
472
- if flag & FCOMMENT :
473
- # Read and discard a null-terminated string containing a comment
474
- while True :
475
- s = fp .read (1 )
476
- if not s or s == b'\000 ' :
477
- break
478
- if flag & FHCRC :
479
- _read_exact (fp , 2 ) # Read & discard the 16-bit header CRC
480
- return last_mtime
481
-
482
-
483
406
class _GzipReader (_compression .DecompressReader ):
484
407
def __init__ (self , fp ):
485
408
super ().__init__ (_PaddedFile (fp ), zlib ._ZlibDecompressor ,
486
- wbits = - zlib .MAX_WBITS )
409
+ wbits = 16 + zlib .MAX_WBITS )
487
410
# Set flag indicating start of a new member
488
411
self ._new_member = True
489
412
self ._last_mtime = None
490
413
491
414
def _init_read (self ):
492
- self ._crc = zlib .crc32 (b"" )
493
415
self ._stream_size = 0 # Decompressed size of unconcatenated stream
494
416
495
- def _read_gzip_header (self ):
496
- last_mtime = _read_gzip_header (self ._fp )
497
- if last_mtime is None :
498
- return False
499
- self ._last_mtime = last_mtime
500
- return True
501
-
502
417
def read (self , size = - 1 ):
503
418
if size < 0 :
504
419
return self .readall ()
@@ -512,33 +427,41 @@ def read(self, size=-1):
512
427
while True :
513
428
if self ._decompressor .eof :
514
429
# Ending case: we've come to the end of a member in the file,
515
- # so finish up this member, and read a new gzip header.
516
- # Check the CRC and file size, and set the flag so we read
517
- # a new member
430
+ # so finish up this member and set the flag, so that we read a
431
+ # new member
518
432
self ._read_eof ()
519
433
self ._new_member = True
520
434
self ._decompressor = self ._decomp_factory (
521
435
** self ._decomp_args )
522
436
523
- if self ._new_member :
524
- # If the _new_member flag is set, we have to
525
- # jump to the next member, if there is one.
526
- self ._init_read ()
527
- if not self ._read_gzip_header ():
528
- self ._size = self ._pos
529
- return b""
530
- self ._new_member = False
531
-
532
437
# Read a chunk of data from the file
533
438
if self ._decompressor .needs_input :
534
439
buf = self ._fp .read (READ_BUFFER_SIZE )
535
- uncompress = self ._decompressor .decompress (buf , size )
440
+ if self ._new_member :
441
+ # If the _new_member flag is set, we have to
442
+ # jump to the next member, if there is one.
443
+ self ._init_read ()
444
+ if not buf :
445
+ self ._size = self ._pos
446
+ return b""
447
+ self ._new_member = False
448
+ compressed_buf = buf
536
449
else :
537
- uncompress = self ._decompressor .decompress (b"" , size )
450
+ assert not self ._new_member
451
+ compressed_buf = b""
452
+ try :
453
+ uncompress = self ._decompressor .decompress (compressed_buf , size )
454
+ except zlib .error :
455
+ if self ._decompressor .gz_header_done == 1 :
456
+ raise
457
+ raise BadGzipFile ()
458
+
459
+ if self ._decompressor .gz_header_done == 1 :
460
+ self ._last_mtime = self ._decompressor .gz_header_mtime
538
461
539
462
if self ._decompressor .unused_data != b"" :
540
463
# Prepend the already read bytes to the fileobj so they can
541
- # be seen by _read_eof() and _read_gzip_header()
464
+ # be seen by _read_eof()
542
465
self ._fp .prepend (self ._decompressor .unused_data )
543
466
544
467
if uncompress != b"" :
@@ -547,23 +470,12 @@ def read(self, size=-1):
547
470
raise EOFError ("Compressed file ended before the "
548
471
"end-of-stream marker was reached" )
549
472
550
- self ._crc = zlib .crc32 (uncompress , self ._crc )
551
473
self ._stream_size += len (uncompress )
552
474
self ._pos += len (uncompress )
553
475
return uncompress
554
476
555
477
def _read_eof (self ):
556
478
# We've read to the end of the file
557
- # We check that the computed CRC and size of the
558
- # uncompressed data matches the stored values. Note that the size
559
- # stored is the true file size mod 2**32.
560
- crc32 , isize = struct .unpack ("<II" , _read_exact (self ._fp , 8 ))
561
- if crc32 != self ._crc :
562
- raise BadGzipFile ("CRC check failed %s != %s" % (hex (crc32 ),
563
- hex (self ._crc )))
564
- elif isize != (self ._stream_size & 0xffffffff ):
565
- raise BadGzipFile ("Incorrect length of data produced" )
566
-
567
479
# Gzip files can be padded with zeroes and still have archives.
568
480
# Consume all zero bytes and set the file position to the first
569
481
# non-zero byte. See http://www.gzip.org/#faq8
@@ -578,68 +490,32 @@ def _rewind(self):
578
490
self ._new_member = True
579
491
580
492
581
- def _create_simple_gzip_header (compresslevel : int ,
582
- mtime = None ) -> bytes :
583
- """
584
- Write a simple gzip header with no extra fields.
585
- :param compresslevel: Compresslevel used to determine the xfl bytes.
586
- :param mtime: The mtime (must support conversion to a 32-bit integer).
587
- :return: A bytes object representing the gzip header.
588
- """
589
- if mtime is None :
590
- mtime = time .time ()
591
- if compresslevel == _COMPRESS_LEVEL_BEST :
592
- xfl = 2
593
- elif compresslevel == _COMPRESS_LEVEL_FAST :
594
- xfl = 4
595
- else :
596
- xfl = 0
597
- # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
598
- # fields added to header), mtime, xfl and os (255 for unknown OS).
599
- return struct .pack ("<BBBBLBB" , 0x1f , 0x8b , 8 , 0 , int (mtime ), xfl , 255 )
600
-
601
-
602
493
def compress (data , compresslevel = _COMPRESS_LEVEL_BEST , * , mtime = None ):
603
494
"""Compress data in one shot and return the compressed string.
604
495
605
496
compresslevel sets the compression level in range of 0-9.
606
497
mtime can be used to set the modification time. The modification time is
607
498
set to the current time by default.
608
499
"""
609
- if mtime == 0 :
610
- # Use zlib as it creates the header with 0 mtime by default.
611
- # This is faster and with less overhead.
612
- return zlib .compress (data , level = compresslevel , wbits = 31 )
613
- header = _create_simple_gzip_header (compresslevel , mtime )
614
- trailer = struct .pack ("<LL" , zlib .crc32 (data ), (len (data ) & 0xffffffff ))
615
- # Wbits=-15 creates a raw deflate block.
616
- return (header + zlib .compress (data , level = compresslevel , wbits = - 15 ) +
617
- trailer )
500
+ if mtime is None :
501
+ mtime = int (time .time ())
502
+ return zlib .compress (data , level = compresslevel , wbits = 31 , mtime = mtime )
618
503
619
504
620
505
def decompress (data ):
621
506
"""Decompress a gzip compressed string in one shot.
622
507
Return the decompressed string.
623
508
"""
624
509
decompressed_members = []
625
- while True :
626
- fp = io .BytesIO (data )
627
- if _read_gzip_header (fp ) is None :
628
- return b"" .join (decompressed_members )
629
- # Use a zlib raw deflate compressor
630
- do = zlib .decompressobj (wbits = - zlib .MAX_WBITS )
631
- # Read all the data except the header
632
- decompressed = do .decompress (data [fp .tell ():])
633
- if not do .eof or len (do .unused_data ) < 8 :
510
+ while data :
511
+ do = zlib .decompressobj (wbits = 16 + zlib .MAX_WBITS )
512
+ decompressed = do .decompress (data )
513
+ if not do .eof :
634
514
raise EOFError ("Compressed file ended before the end-of-stream "
635
515
"marker was reached" )
636
- crc , length = struct .unpack ("<II" , do .unused_data [:8 ])
637
- if crc != zlib .crc32 (decompressed ):
638
- raise BadGzipFile ("CRC check failed" )
639
- if length != (len (decompressed ) & 0xffffffff ):
640
- raise BadGzipFile ("Incorrect length of data produced" )
641
516
decompressed_members .append (decompressed )
642
- data = do .unused_data [8 :].lstrip (b"\x00 " )
517
+ data = do .unused_data .lstrip (b"\x00 " )
518
+ return b"" .join (decompressed_members )
643
519
644
520
645
521
def main ():
0 commit comments