Skip to content

Commit 243b95a

Browse files
jjollythatch
authored andcommitted
bpo-28494: Improve zipfile.is_zipfile reliability
The zipfile.is_zipfile function would only search for the EndOfZipfile section header. This failed to correctly identify non-zipfiles that contained this header. Now the zipfile.is_zipfile function verifies the first central directory entry. Changes: * Extended zipfile.is_zipfile to verify zipfile catalog * Added tests to validate failure of binary non-zipfiles
1 parent de70614 commit 243b95a

File tree

3 files changed

+44
-2
lines changed

3 files changed

+44
-2
lines changed

Lib/test/test_zipfile/test_core.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,6 +1991,25 @@ def test_is_zip_erroneous_file(self):
19911991
self.assertFalse(zipfile.is_zipfile(fp))
19921992
fp.seek(0, 0)
19931993
self.assertFalse(zipfile.is_zipfile(fp))
1994+
# - passing non-zipfile with ZIP header elements
1995+
# data created using pyPNG like so:
1996+
# d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
1997+
# w = png.Writer(1,2,alpha=True,compression=0)
1998+
# f = open('onepix.png', 'wb')
1999+
# w.write(f, d)
2000+
# w.close()
2001+
data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
2002+
b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
2003+
b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
2004+
b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
2005+
# - passing a filename
2006+
with open(TESTFN, "wb") as fp:
2007+
fp.write(data)
2008+
self.assertFalse(zipfile.is_zipfile(TESTFN))
2009+
# - passing a file-like object
2010+
fp = io.BytesIO()
2011+
fp.write(data)
2012+
self.assertFalse(zipfile.is_zipfile(fp))
19942013

19952014
def test_damaged_zipfile(self):
19962015
"""Check that zipfiles with missing bytes at the end raise BadZipFile."""

Lib/zipfile/__init__.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,18 @@ def strip(cls, data, xids):
234234

235235
def _check_zipfile(fp):
236236
try:
237-
if _EndRecData(fp):
238-
return True # file has correct magic number
237+
endrec = _EndRecData(fp)
238+
if endrec:
239+
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
240+
return True # Empty zipfiles are still zipfiles
241+
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
242+
fp.seek(endrec[_ECD_OFFSET]) # Central directory is on the same disk
243+
if fp.tell() == endrec[_ECD_OFFSET] and endrec[_ECD_SIZE] >= sizeCentralDir:
244+
data = fp.read(sizeCentralDir) # CD is where we expect it to be
245+
if len(data) == sizeCentralDir:
246+
centdir = struct.unpack(structCentralDir, data) # CD is the right size
247+
if centdir[_CD_SIGNATURE] == stringCentralDir:
248+
return True # First central directory entry has correct magic number
239249
except OSError:
240250
pass
241251
return False
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Improve zipfile validation in `zipfile.is_zipfile`.
2+
3+
Before this change `zipfile.is_zipfile()` only checked the End Central Directory
4+
signature. If the signature could be found in the last 64k of the file,
5+
success! This produced false positives on any file with `'PK\x05\x06'` in the
6+
last 64k of the file - including PDFs and PNGs.
7+
8+
This is now corrected by actually validating the Central Directory location
9+
and size based on the information provided by the End Central Directory
10+
along with verifying the Central Directory signature of the first entry.
11+
12+
This should be sufficient for the vast number of zipfiles with fewer
13+
false positives.

0 commit comments

Comments
 (0)