2323import ctypes
2424import logging
2525import libarchive
26+ import collections
2627
2728from diffoscope .tempfiles import get_temporary_directory
2829
@@ -168,11 +169,11 @@ def close_archive(self):
168169
169170 def get_member_names (self ):
170171 self .ensure_unpacked ()
171- return self ._member_names
172+ return self ._members . keys ()
172173
173174 def extract (self , member_name , dest_dir ):
174175 self .ensure_unpacked ()
175- return os . path . join ( self ._unpacked , member_name )
176+ return self ._members [ member_name ]
176177
177178 def get_member (self , member_name ):
178179 with libarchive .file_reader (self .source .path ) as archive :
@@ -197,45 +198,31 @@ def get_subclass(self, entry):
197198 return LibarchiveMember (self , entry )
198199
199200 def ensure_unpacked (self ):
200- if hasattr (self , '_unpacked ' ):
201+ if hasattr (self , '_members ' ):
201202 return
202203
203- self . _unpacked = get_temporary_directory ().name
204- self ._member_names = []
204+ tmpdir = get_temporary_directory ().name
205+ self ._members = collections . OrderedDict ()
205206
206- logger .debug ("Extracting %s to %s" , self .source .path , self . _unpacked )
207+ logger .debug ("Extracting %s to %s" , self .source .path , tmpdir )
207208
208209 with libarchive .file_reader (self .source .path ) as archive :
209- for entry in archive :
210- self ._member_names .append (entry .pathname )
210+ for idx , entry in enumerate (archive ):
211+ # Maintain a mapping of archive path to the extracted path,
212+ # avoiding the need to sanitise filenames.
213+ dst = os .path .join (tmpdir , '{}' .format (idx ))
214+ self ._members [entry .pathname ] = dst
211215
212216 if entry .isdir :
213217 continue
214218
215- # All extracted locations must be underneath self._unpacked
216- force_prefix = os .path .join (self ._unpacked , "" )
217-
218- # Try to pick a safe and reasonable candidate name
219- candidate_name = os .path .normpath (entry .pathname .rstrip ('/' + os .sep ))
220- if os .path .isabs (candidate_name ):
221- candidate_name = os .path .relpath (candidate_name , os .path .join (os .path .sep ))
222-
223- dst = os .path .normpath (os .path .join (self ._unpacked , candidate_name ))
224- if not dst .startswith (force_prefix ):
225- logger .warn ("Skipping member because we could not make a safe name to extract it to: '%s'" ,
226- entry .pathname )
227- continue
228-
229- # TODO: need to fix reading these cleaned members. currently
230- # reading will still try to use the uncleaned name.
231- #logging.debug("Extracting %s to %s", entry.pathname, dst)
232- os .makedirs (os .path .dirname (dst ), exist_ok = True )
219+ logger .debug ("Extracting %s to %s" , entry .pathname , dst )
233220
234221 with open (dst , 'wb' ) as f :
235222 for block in entry .get_blocks ():
236223 f .write (block )
237224
238225 logger .debug (
239226 "Extracted %d entries from %s to %s" ,
240- len (self ._member_names ), self .source .path , self . _unpacked ,
227+ len (self ._members ), self .source .path , tmpdir ,
241228 )
0 commit comments