@@ -149,39 +149,22 @@ def getitem(self, idx):
149
149
150
150
return frame
151
151
152
- def prepare (self ):
153
- """Prepare the dataset. The human crowd dataset has a lot of huge 4k images that drasticly slow down
154
- the training. To be more effective, this method will go through all images from the dataset and will
155
- save a new version of the dataset under `{self.dataset_dir_prepared}`. Once the dataset is prepared,
156
- the path to the dir in /.aloception/alodataset_config.json will be replace by the new prepared one.
157
-
158
- Notes
159
- -----
160
- If the dataset is already prepared, this method will simply check that all file
161
- are prepared and stored into the prepared folder. Otherwise, if the original directory is no longer
162
- on the disk, the method will simply use the prepared dir as it is and the prepare step will be skiped.
163
- """
152
+ def _prepare (self , img_folder , ann_file , dataset_dir , idx ):
164
153
from alodataset import transforms as T
165
154
166
- if self .sample is not None : # Nothing to do. Samples are ready
167
- return
168
-
169
- if self .dataset_dir .endswith ("_prepared" ) and not os .path .exists (self .dataset_dir .replace ("_prepared" , "" )):
170
- return
171
-
172
- dataset_dir_name = os .path .basename (os .path .normpath (self .dataset_dir ))
155
+ dataset_dir_name = os .path .basename (os .path .normpath (dataset_dir ))
173
156
if "_prepared" not in dataset_dir_name :
174
157
wip_dir = f".wip_{ dataset_dir_name } _prepared"
175
158
prepared_dir = f"{ dataset_dir_name } _prepared"
176
- img_folder = self . img_folder
177
- ann_file = self . ann_file
159
+ img_folder = img_folder
160
+ ann_file = ann_file
178
161
else :
179
162
wip_dir = f".wip_{ dataset_dir_name } "
180
163
prepared_dir = dataset_dir_name
181
- img_folder = os .path .join (self . dataset_dir .replace ("_prepared" , "" ), self . _img_folder , "Images" )
182
- ann_file = os .path .join (self . dataset_dir .replace ("_prepared" , "" ), self . _ann_file )
164
+ img_folder = os .path .join (dataset_dir .replace ("_prepared" , "" ), img_folder , "Images" )
165
+ ann_file = os .path .join (dataset_dir .replace ("_prepared" , "" ), ann_file )
183
166
184
- base_datadir = Path (os .path .normpath (self . dataset_dir )).parent
167
+ base_datadir = Path (os .path .normpath (dataset_dir )).parent
185
168
186
169
# Setup a new directory to work with to prepare the dataset
187
170
n_wip_dir = os .path .join (base_datadir , wip_dir )
@@ -194,7 +177,7 @@ def prepare(self):
194
177
if not os .path .exists (n_wip_dir ):
195
178
os .makedirs (n_wip_dir )
196
179
197
- p = Path (self . dataset_dir )
180
+ p = Path (dataset_dir )
198
181
p_parts = list (p .parts )
199
182
p_parts [p_parts .index (dataset_dir_name )] = wip_dir
200
183
@@ -236,7 +219,7 @@ def prepare(self):
236
219
237
220
if not os .path .exists (tgt_ann_file ) and not os .path .exists (final_tgt_ann_file ):
238
221
# Write back the file with all boxes in relative position instead of absolute.
239
- content = self .load_json_lines (ann_file )
222
+ content = self .load_json_lines (ann_file , idx )
240
223
nb_line = len (content )
241
224
for c in range (len (content )):
242
225
line = content [c ]
@@ -270,16 +253,48 @@ def prepare(self):
270
253
271
254
print ("Preparing dataset: Moving the whole structure into the final prepared directory (if needed)" )
272
255
fs .move_and_replace (n_wip_dir , prepared_dir )
273
-
274
256
self .set_dataset_dir (prepared_dir )
275
- self .ann_file = final_tgt_ann_file
276
- self .img_folder = final_tgt_image_dir
277
- self .items = self .load_json_lines (self .ann_file )
257
+
258
+ return final_tgt_image_dir , final_tgt_ann_file
259
+
260
+ def prepare (self ):
261
+ """Prepare the dataset. The human crowd dataset has a lot of huge 4k images that drasticly slow down
262
+ the training. To be more effective, this method will go through all images from the dataset and will
263
+ save a new version of the dataset under `{self.dataset_dir_prepared}`. Once the dataset is prepared,
264
+ the path to the dir in /.aloception/alodataset_config.json will be replace by the new prepared one.
265
+
266
+ Notes
267
+ -----
268
+ If the dataset is already prepared, this method will simply check that all file
269
+ are prepared and stored into the prepared folder. Otherwise, if the original directory is no longer
270
+ on the disk, the method will simply use the prepared dir as it is and the prepare step will be skiped.
271
+ """
272
+ if self .sample is not None and self .sample is not False : # Nothing to do. Samples are ready
273
+ return
274
+
275
+ if self .dataset_dir .endswith ("_prepared" ) and not os .path .exists (self .dataset_dir .replace ("_prepared" , "" )):
276
+ return
277
+
278
+ dataset_dir = self .dataset_dir
279
+ dataset_dir_name = os .path .basename (os .path .normpath (self .dataset_dir ))
280
+ for idx , (img_folder , ann_file ) in enumerate (zip (self .img_folder , self .ann_file )):
281
+ if "_prepared" not in dataset_dir_name :
282
+ n_img_folder , n_ann_file = self ._prepare (img_folder , ann_file , dataset_dir , idx )
283
+ else :
284
+ n_img_folder , n_ann_file = self ._prepare (self ._img_folder [idx ], self ._ann_file [idx ], dataset_dir , idx )
285
+ self .img_folder [idx ] = n_img_folder
286
+ self .ann_file [idx ] = n_ann_file
287
+
288
+ # Set back the items with the annotation files
289
+ self .items = []
290
+ for a , ann_file in enumerate (self .ann_file ):
291
+ line = self .load_json_lines (ann_file , a )
292
+ self .items += line
278
293
279
294
280
295
def main ():
281
296
"""Main"""
282
- crowd_human_dataset = CrowdHumanDataset (sample = True )
297
+ crowd_human_dataset = CrowdHumanDataset (img_folder = "CrowdHuman_train" , ann_file = "annotation_train.odgt" )
283
298
284
299
crowd_human_dataset .prepare ()
285
300
for i , frames in enumerate (crowd_human_dataset .train_loader (batch_size = 2 , sampler = None , num_workers = 0 )):
0 commit comments