Skip to content

Commit cf91d5c

Browse files
authored
Merge pull request #97 from seth-planet/tpu_runner3
Coral Object Detection: Various optimizations & fix YOLOv8 image for value scaling bug
2 parents dc972c8 + 3759bce commit cf91d5c

File tree

6 files changed

+170
-368
lines changed

6 files changed

+170
-368
lines changed

src/modules/ObjectDetectionCoral/modulesettings.json

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
"RuntimeLocation": "Local", // Can be Local, Shared or System
2626
"PostStartPauseSecs": 1, // Generally 1 if using GPU, 0 for CPU
2727
"Queue": "objectdetection_queue", // We make all Object detectors use the same queue.
28-
"Parallelism": 1 // 0 = Default = number of CPUs / 2
28+
"Parallelism": 16 // Should probably be TPU count * 2; I don't see harm in overprovisioning threads
2929
},
3030

3131
"ModelRequirements" : [{
@@ -36,7 +36,7 @@
3636

3737
"EnvironmentVariables": {
3838
"CPAI_CORAL_MULTI_TPU": "true",
39-
"CPAI_CORAL_MODEL_NAME": "MobileNet SSD", // "MobileNet SSD", "EfficientDet-Lite", "YOLOv5"
39+
"CPAI_CORAL_MODEL_NAME": "MobileNet SSD", // "MobileNet SSD", "EfficientDet-Lite", "YOLOv5", "YOLOv8"
4040

4141
"MODELS_DIR": "%CURRENT_MODULE_PATH%/assets",
4242
"MODEL_SIZE": "Small"
@@ -85,16 +85,16 @@
8585
],
8686
"DownloadableModels":[
8787

88-
{ "Name": "EfficientDet Large", "Filename": "objectdetection-efficientdet-large-edgetpu.zip", "Folder": "assets", "Description": "EfficientDet object detection, Large", "FileSizeKb": 275800, "PreInstall": false },
89-
{ "Name": "EfficientDet Medium", "Filename": "objectdetection-efficientdet-medium-edgetpu.zip", "Folder": "assets", "Description": "EfficientDet object detection, Medium", "FileSizeKb": 275800, "PreInstall": false },
90-
{ "Name": "EfficientDet Small", "Filename": "objectdetection-efficientdet-small-edgetpu.zip", "Folder": "assets", "Description": "EfficientDet object detection, Small", "FileSizeKb": 275800, "PreInstall": false },
91-
{ "Name": "EfficientDet Tiny", "Filename": "objectdetection-efficientdet-tiny-edgetpu.zip", "Folder": "assets", "Description": "EfficientDet object detection, Tiny", "FileSizeKb": 275800, "PreInstall": false },
92-
9388
{ "Name": "MobileNet Large", "Filename": "objectdetection-mobilenet-large-edgetpu.zip", "Folder": "assets", "Description": "MobileNet object detection, Large", "FileSizeKb": 275800, "PreInstall": true },
9489
{ "Name": "MobileNet Medium", "Filename": "objectdetection-mobilenet-medium-edgetpu.zip", "Folder": "assets", "Description": "MobileNet object detection, Medium", "FileSizeKb": 275800, "PreInstall": true },
9590
{ "Name": "MobileNet Small", "Filename": "objectdetection-mobilenet-small-edgetpu.zip", "Folder": "assets", "Description": "MobileNet object detection, Small", "FileSizeKb": 275800, "PreInstall": true },
9691
{ "Name": "MobileNet Tiny", "Filename": "objectdetection-mobilenet-tiny-edgetpu.zip", "Folder": "assets", "Description": "MobileNet object detection, Tiny", "FileSizeKb": 275800, "PreInstall": true },
9792

93+
{ "Name": "EfficientDet Large", "Filename": "objectdetection-efficientdet-large-edgetpu.zip", "Folder": "assets", "Description": "EfficientDet object detection, Large", "FileSizeKb": 275800, "PreInstall": false },
94+
{ "Name": "EfficientDet Medium", "Filename": "objectdetection-efficientdet-medium-edgetpu.zip", "Folder": "assets", "Description": "EfficientDet object detection, Medium", "FileSizeKb": 275800, "PreInstall": false },
95+
{ "Name": "EfficientDet Small", "Filename": "objectdetection-efficientdet-small-edgetpu.zip", "Folder": "assets", "Description": "EfficientDet object detection, Small", "FileSizeKb": 275800, "PreInstall": false },
96+
{ "Name": "EfficientDet Tiny", "Filename": "objectdetection-efficientdet-tiny-edgetpu.zip", "Folder": "assets", "Description": "EfficientDet object detection, Tiny", "FileSizeKb": 275800, "PreInstall": false },
97+
9898
{ "Name": "YOLOv5 Large", "Filename": "objectdetection-yolov5-large-edgetpu.zip", "Folder": "assets", "Description": "YOLOv5 object detection, Large", "FileSizeKb": 275800, "PreInstall": false },
9999
{ "Name": "YOLOv5 Medium", "Filename": "objectdetection-yolov5-medium-edgetpu.zip", "Folder": "assets", "Description": "YOLOv5 object detection, Medium", "FileSizeKb": 275800, "PreInstall": false },
100100
{ "Name": "YOLOv5 Small", "Filename": "objectdetection-yolov5-small-edgetpu.zip", "Folder": "assets", "Description": "YOLOv5 object detection, Small", "FileSizeKb": 275800, "PreInstall": false },
@@ -112,9 +112,9 @@
112112
"Label": "Model",
113113
"Options": [
114114
{ "Label": "MobileNet SSD", "Setting": "CPAI_CORAL_MODEL_NAME", "Value": "MobileNet SSD" },
115+
{ "Label": "EfficientDet Lite", "Setting": "CPAI_CORAL_MODEL_NAME", "Value": "EfficientDet-Lite" },
115116
{ "Label": "YOLOv5", "Setting": "CPAI_CORAL_MODEL_NAME", "Value": "YOLOv5" },
116-
{ "Label": "YOLOv8", "Setting": "CPAI_CORAL_MODEL_NAME", "Value": "YOLOv8" },
117-
{ "Label": "EfficientDet Lite", "Setting": "CPAI_CORAL_MODEL_NAME", "Value": "EfficientDet-Lite" }
117+
{ "Label": "YOLOv8", "Setting": "CPAI_CORAL_MODEL_NAME", "Value": "YOLOv8" }
118118
]
119119
},
120120
{

src/modules/ObjectDetectionCoral/objectdetection_coral_multitpu.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -231,20 +231,28 @@ def main():
231231

232232
thread_cnt = 16
233233
tot_infr_time = 0
234+
half_wall_start = None
235+
half_infr_count = 0
234236
if args.count > 1:
235237
with concurrent.futures.ThreadPoolExecutor(max_workers=thread_cnt) as executor:
236238
start = time.perf_counter()
237239
for chunk_i in range(0, args.count-1, thread_cnt*8):
238240
fs = [executor.submit(_tpu_runner.process_image, options, copy.copy(image), args.threshold)
239241
for i in range(min(thread_cnt*8, args.count-1 - chunk_i))]
240242
for f in concurrent.futures.as_completed(fs):
241-
_, infr_time = f.result()
243+
_, infr_time, _ = f.result()
242244
tot_infr_time += infr_time
245+
246+
# Start a timer for the last ~half of the run for more accurate benchmark
247+
if chunk_i > (args.count-1) / 3.0:
248+
half_infr_count += 1
249+
if half_wall_start is None:
250+
half_wall_start = time.perf_counter()
243251

244252
# Uncomment for testing
245253
# import random
246254
# logging.info("Pause")
247-
# time.sleep(random.randint(0,INTERPRETER_LIFESPAN_SECONDS*2))
255+
# time.sleep(random.randint(0,INTERPRETER_LIFESPAN_SECONDS*3))
248256
else:
249257
start = time.perf_counter()
250258

@@ -254,20 +262,22 @@ def main():
254262
# print(stat)
255263

256264
start_one = time.perf_counter()
257-
objs, infr_time = _tpu_runner.process_image(options, image, args.threshold)
265+
objs, infr_time, _ = _tpu_runner.process_image(options, copy.copy(image), args.threshold)
258266
tot_infr_time += infr_time
267+
half_infr_count += 1
259268
wall_time = time.perf_counter() - start
260-
print('completed one run every %.2f ms for %d runs; %.2f ms wall time for a single run' %
269+
270+
half_wall_time = 0.0
271+
if half_wall_start is not None:
272+
half_wall_time = time.perf_counter() - half_wall_start
273+
274+
print('completed one run every %.2fms for %d runs; %.2fms wall time for a single run' %
261275
(wall_time * 1000 / args.count, args.count,
262276
(time.perf_counter() - start_one) * 1000))
263277

264-
# Optimizing the number of segments used for a model would result in the
265-
# lowest average time spent adjusted for number of TPUs used. At some point,
266-
# adding additional segments just removes from the pool of TPUs you can use
267-
# for parallelism.
268-
print('%.2f ms avg time blocked across %d threads; %.2f avg TPU * ms / run' %
278+
print('%.2fms avg time blocked across %d threads; %.2fms ea for final %d inferences' %
269279
(tot_infr_time / args.count, thread_cnt,
270-
len(_tpu_runner.pipe.tpu_list) * wall_time * 1000 / args.count))
280+
half_wall_time * 1000 / half_infr_count, half_infr_count))
271281

272282
print('-------RESULTS--------')
273283
if not objs:

src/modules/ObjectDetectionCoral/options.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
print("Unable to import ModuleOptions, running with defaults")
88
class ModuleOptions:
99
module_path = '.'
10-
def getEnvVariable(self, a, b):
10+
def getEnvVariable(a, b):
1111
return b
1212

1313
class Settings:
@@ -163,7 +163,7 @@ def __init__(self):
163163

164164
self.MAX_PIPELINE_QUEUE_LEN = 1000 # Multi-only
165165
self.TILE_OVERLAP = 15 # Multi-only.
166-
self.DOWNSAMPLE_BY = 5.2 # Multi-only. Smaller number results in more tiles generated
166+
self.DOWNSAMPLE_BY = 6.0 # Multi-only. Smaller number results in more tiles generated
167167
self.IOU_THRESHOLD = 0.1 # Multi-only
168168

169169
# ----------------------------------------------------------------------

src/modules/ObjectDetectionCoral/pipelined_model_runner.py

Lines changed: 0 additions & 210 deletions
This file was deleted.

src/modules/ObjectDetectionCoral/segment_and_test.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import subprocess
33
import time
44
import shutil
5+
import re
56

67
#'''
78
fn_list = [
@@ -11,12 +12,12 @@
1112
#'ssd_mobilenet_v1_coco_quant_postprocess',
1213
'tf2_ssd_mobilenet_v1_fpn_640x640_coco17_ptq',
1314
#'efficientdet_lite0_320_ptq',
14-
'efficientdet_lite1_384_ptq',
15+
#'efficientdet_lite1_384_ptq',
1516
'efficientdet_lite2_448_ptq',
1617
'efficientdet_lite3_512_ptq',
1718
'efficientdet_lite3x_640_ptq',
1819
#'yolov5n-int8',
19-
'yolov5s-int8',
20+
#'yolov5s-int8',
2021
'yolov5m-int8',
2122
'yolov5l-int8',
2223
#'yolov8n_416_640px', # lg 1st seg
@@ -192,7 +193,7 @@ def seg_exists(filename, segment_type, segment_count):
192193

193194
MAX_TPU_COUNT = 8
194195

195-
#'''
196+
'''
196197
# Generate segment files
197198
for sn in range(1,MAX_TPU_COUNT+1):
198199
for fn in fn_list:
@@ -313,15 +314,12 @@ def seg_exists(filename, segment_type, segment_count):
313314
seg_list + ["--labels","coral/pycoral/test_data/coco_labels.txt","--input","/home/seth/coral/pycoral/test_data/grace_hopper.bmp",
314315
"--count","1000","--num-tpus",str(num_tpus)]
315316
print(cmd)
316-
start_time = time.perf_counter()
317-
subprocess.run(cmd)
318-
timings.append(((time.perf_counter() - start_time), num_tpus, fn, seg_type, sn))
317+
c = subprocess.run(cmd, capture_output=True)
318+
print(c.stdout)
319+
print(c.stderr)
320+
ms_time = float(re.compile(r'threads; ([\d\.]+)ms ea').findall(c.stdout)[0])
321+
timings.append((ms_time, num_tpus, fn, seg_type, sn))
319322

320-
# Find segment pipeline efficencies
321-
#cmd = ["/home/seth/libcoral/out/k8/tools/model_pipelining_performance_analysis","--data_dir",seg_dir+seg_type,
322-
# "--model_list",fn,"--num_segments_list",','.join([str(i) for i in range(1,max_seg+1)])]
323-
#print(cmd)
324-
#subprocess.run(cmd)
325323
timings = sorted(timings, key=lambda t: t[0])
326324

327325
# Print the top three

0 commit comments

Comments
 (0)