Skip to content

Commit 632a408

Browse files
committed
Extract archive members using an auto-incrementing integer, avoiding the need to sanitise filenames. (Closes: #854723)
Signed-off-by: Chris Lamb <lamby@debian.org>
1 parent b468a28 commit 632a408

1 file changed

Lines changed: 14 additions & 27 deletions

File tree

diffoscope/comparators/utils/libarchive.py

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import ctypes
2424
import logging
2525
import libarchive
26+
import collections
2627

2728
from diffoscope.tempfiles import get_temporary_directory
2829

@@ -168,11 +169,11 @@ def close_archive(self):
168169

169170
def get_member_names(self):
170171
self.ensure_unpacked()
171-
return self._member_names
172+
return self._members.keys()
172173

173174
def extract(self, member_name, dest_dir):
174175
self.ensure_unpacked()
175-
return os.path.join(self._unpacked, member_name)
176+
return self._members[member_name]
176177

177178
def get_member(self, member_name):
178179
with libarchive.file_reader(self.source.path) as archive:
@@ -197,45 +198,31 @@ def get_subclass(self, entry):
197198
return LibarchiveMember(self, entry)
198199

199200
def ensure_unpacked(self):
200-
if hasattr(self, '_unpacked'):
201+
if hasattr(self, '_members'):
201202
return
202203

203-
self._unpacked = get_temporary_directory().name
204-
self._member_names = []
204+
tmpdir = get_temporary_directory().name
205+
self._members = collections.OrderedDict()
205206

206-
logger.debug("Extracting %s to %s", self.source.path, self._unpacked)
207+
logger.debug("Extracting %s to %s", self.source.path, tmpdir)
207208

208209
with libarchive.file_reader(self.source.path) as archive:
209-
for entry in archive:
210-
self._member_names.append(entry.pathname)
210+
for idx, entry in enumerate(archive):
211+
# Maintain a mapping of archive path to the extracted path,
212+
# avoiding the need to sanitise filenames.
213+
dst = os.path.join(tmpdir, '{}'.format(idx))
214+
self._members[entry.pathname] = dst
211215

212216
if entry.isdir:
213217
continue
214218

215-
# All extracted locations must be underneath self._unpacked
216-
force_prefix = os.path.join(self._unpacked, "")
217-
218-
# Try to pick a safe and reasonable candidate name
219-
candidate_name = os.path.normpath(entry.pathname.rstrip('/' + os.sep))
220-
if os.path.isabs(candidate_name):
221-
candidate_name = os.path.relpath(candidate_name, os.path.join(os.path.sep))
222-
223-
dst = os.path.normpath(os.path.join(self._unpacked, candidate_name))
224-
if not dst.startswith(force_prefix):
225-
logger.warn("Skipping member because we could not make a safe name to extract it to: '%s'",
226-
entry.pathname)
227-
continue
228-
229-
# TODO: need to fix reading these cleaned members. currently
230-
# reading will still try to use the uncleaned name.
231-
#logging.debug("Extracting %s to %s", entry.pathname, dst)
232-
os.makedirs(os.path.dirname(dst), exist_ok=True)
219+
logger.debug("Extracting %s to %s", entry.pathname, dst)
233220

234221
with open(dst, 'wb') as f:
235222
for block in entry.get_blocks():
236223
f.write(block)
237224

238225
logger.debug(
239226
"Extracted %d entries from %s to %s",
240-
len(self._member_names), self.source.path, self._unpacked,
227+
len(self._members), self.source.path, tmpdir,
241228
)

0 commit comments

Comments
 (0)