Skip to content

Commit 4014a82

Browse files
feat: cache capacity change option
1 parent ebb2b89 commit 4014a82

File tree

6 files changed

+26
-4
lines changed

6 files changed

+26
-4
lines changed

.editorconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ indent_size = 2
88
insert_final_newline = true
99
trim_trailing_whitespace = true
1010

11+
[*.py]
12+
indent_size = 4
13+
1114
[*.{ts,js}]
1215
quote_type = single
1316

docs/docs/install/environment-variables.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ Redis (Sentinel) URL example JSON before encoding:
172172
| `MACHINE_LEARNING_RKNN` | Enable RKNN hardware acceleration if supported | `True` | machine learning |
173173
| `MACHINE_LEARNING_RKNN_THREADS` | How many threads of RKNN runtime should be spinned up while inferencing. | `1` | machine learning |
174174
| `MACHINE_LEARNING_MODEL_ARENA` | Pre-allocates CPU memory to avoid memory fragmentation | true | machine learning |
175+
| `MACHINE_LEARNING_OPENVINO_CACHE_CAPACITY` | The max number of image dimensions for which models have cached optimizations | `20` | machine learning |
175176

176177
\*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.
177178

machine-learning/immich_ml/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class Settings(BaseSettings):
7070
rknn_threads: int = 1
7171
preload: PreloadModelData | None = None
7272
max_batch_size: MaxBatchSize | None = None
73+
openvino_cache_capacity: int = 20
7374

7475
@property
7576
def device_id(self) -> str:

machine-learning/immich_ml/sessions/ort.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import numpy as np
77
import onnxruntime as ort
8+
import orjson
89
from numpy.typing import NDArray
910

1011
from immich_ml.models.constants import SUPPORTED_PROVIDERS
@@ -93,10 +94,19 @@ def _provider_options_default(self) -> list[dict[str, Any]]:
9394
case "CUDAExecutionProvider" | "ROCMExecutionProvider":
9495
options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id}
9596
case "OpenVINOExecutionProvider":
97+
openvino_dir = self.model_path.parent / "openvino"
98+
openvino_dir.mkdir(parents=True, exist_ok=True)
9699
options = {
97100
"device_type": f"GPU.{settings.device_id}",
98101
"precision": "FP32",
99-
"cache_dir": (self.model_path.parent / "openvino").as_posix(),
102+
"cache_dir": openvino_dir.as_posix(),
103+
"load_config": orjson.dumps(
104+
{
105+
f"GPU.{settings.device_id}": {
106+
"CPU_RUNTIME_CACHE_CAPACITY": str(settings.openvino_cache_capacity)
107+
},
108+
}
109+
).decode(),
100110
}
101111
case "CoreMLExecutionProvider":
102112
options = {

machine-learning/test_main.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,13 +244,19 @@ def test_sets_default_provider_options(self, ov_device_ids: list[str]) -> None:
244244
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"])
245245

246246
assert session.provider_options == [
247-
{"device_type": "GPU.0", "precision": "FP32", "cache_dir": "/cache/ViT-B-32__openai/openvino"},
247+
{
248+
"device_type": "GPU.0",
249+
"precision": "FP32",
250+
"cache_dir": "/cache/ViT-B-32__openai/openvino",
251+
"load_config": "{\"GPU.0\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
252+
},
248253
{"arena_extend_strategy": "kSameAsRequested"},
249254
]
250255

251-
def test_sets_provider_options_for_openvino(self) -> None:
256+
def test_sets_provider_options_for_openvino(self, mocker: MockerFixture) -> None:
252257
model_path = "/cache/ViT-B-32__openai/textual/model.onnx"
253258
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
259+
mocker.patch.object(settings, "openvino_cache_capacity", 10)
254260

255261
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
256262

@@ -259,6 +265,7 @@ def test_sets_provider_options_for_openvino(self) -> None:
259265
"device_type": "GPU.1",
260266
"precision": "FP32",
261267
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
268+
"load_config": "{\"GPU.1\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"10\"}}"
262269
}
263270
]
264271

machine-learning/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)