Skip to content

Commit fc82842

Browse files
feat: cache capacity change option
1 parent ebb2b89 commit fc82842

File tree

6 files changed

+35
-5
lines changed

6 files changed

+35
-5
lines changed

.editorconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ indent_size = 2
88
insert_final_newline = true
99
trim_trailing_whitespace = true
1010

11+
[*.py]
12+
indent_size = 4
13+
1114
[*.{ts,js}]
1215
quote_type = single
1316

docs/docs/install/environment-variables.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ Redis (Sentinel) URL example JSON before encoding:
172172
| `MACHINE_LEARNING_RKNN` | Enable RKNN hardware acceleration if supported | `True` | machine learning |
173173
| `MACHINE_LEARNING_RKNN_THREADS` | How many threads of RKNN runtime should be spinned up while inferencing. | `1` | machine learning |
174174
| `MACHINE_LEARNING_MODEL_ARENA` | Pre-allocates CPU memory to avoid memory fragmentation | true | machine learning |
175+
| `MACHINE_LEARNING_OPENVINO_CACHE_CAPACITY` | The max number of image dimensions for which models have cached optimizations | `20` | machine learning |
175176

176177
\*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.
177178

machine-learning/immich_ml/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class Settings(BaseSettings):
7070
rknn_threads: int = 1
7171
preload: PreloadModelData | None = None
7272
max_batch_size: MaxBatchSize | None = None
73+
openvino_cache_capacity: int = 20
7374

7475
@property
7576
def device_id(self) -> str:

machine-learning/immich_ml/sessions/ort.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import numpy as np
77
import onnxruntime as ort
8+
import orjson
89
from numpy.typing import NDArray
910

1011
from immich_ml.models.constants import SUPPORTED_PROVIDERS
@@ -93,10 +94,18 @@ def _provider_options_default(self) -> list[dict[str, Any]]:
9394
case "CUDAExecutionProvider" | "ROCMExecutionProvider":
9495
options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id}
9596
case "OpenVINOExecutionProvider":
97+
openvino_dir = self.model_path.parent / "openvino"
98+
openvino_dir.mkdir(parents=True, exist_ok=True)
99+
device = f"GPU.{settings.device_id}"
96100
options = {
97101
"device_type": f"GPU.{settings.device_id}",
98102
"precision": "FP32",
99-
"cache_dir": (self.model_path.parent / "openvino").as_posix(),
103+
"cache_dir": openvino_dir.as_posix(),
104+
"load_config": {
105+
device: {
106+
"CPU_RUNTIME_CACHE_CAPACITY": str(settings.openvino_cache_capacity)
107+
},
108+
},
100109
}
101110
case "CoreMLExecutionProvider":
102111
options = {

machine-learning/test_main.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,18 +239,31 @@ def test_sets_provider_kwarg(self) -> None:
239239
assert session.providers == providers
240240

241241
@pytest.mark.ov_device_ids(["GPU.0", "CPU"])
242-
def test_sets_default_provider_options(self, ov_device_ids: list[str]) -> None:
242+
def test_sets_default_provider_options(self, ov_device_ids: list[str], mocker: MockerFixture) -> None:
243243
model_path = "/cache/ViT-B-32__openai/model.onnx"
244+
mock_mkdir = mocker.patch.object(Path, "mkdir")
245+
mock_write_bytes = mocker.patch.object(Path, "write_bytes")
246+
244247
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"])
245248

246249
assert session.provider_options == [
247-
{"device_type": "GPU.0", "precision": "FP32", "cache_dir": "/cache/ViT-B-32__openai/openvino"},
250+
{
251+
"device_type": "GPU.0",
252+
"precision": "FP32",
253+
"cache_dir": "/cache/ViT-B-32__openai/openvino",
254+
"load_config": "/cache/ViT-B-32__openai/textual/openvino/config.json",
255+
},
248256
{"arena_extend_strategy": "kSameAsRequested"},
249257
]
258+
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
259+
mock_write_bytes.assert_called_once_with("""{"GPU.0":{"CPU_RUNTIME_CACHE_CAPACITY":"20"}}""".encode())
250260

251-
def test_sets_provider_options_for_openvino(self) -> None:
261+
def test_sets_provider_options_for_openvino(self, mocker: MockerFixture) -> None:
252262
model_path = "/cache/ViT-B-32__openai/textual/model.onnx"
253263
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
264+
mock_mkdir = mocker.patch.object(Path, "mkdir")
265+
mock_write_bytes = mocker.patch.object(Path, "write_bytes")
266+
mocker.patch.object(settings, "openvino_cache_capacity", 10)
254267

255268
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
256269

@@ -259,8 +272,11 @@ def test_sets_provider_options_for_openvino(self) -> None:
259272
"device_type": "GPU.1",
260273
"precision": "FP32",
261274
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
275+
"load_config": "/cache/ViT-B-32__openai/textual/openvino/config.json",
262276
}
263277
]
278+
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
279+
mock_write_bytes.assert_called_once_with("""{"GPU.1":{"CPU_RUNTIME_CACHE_CAPACITY":"10"}}""".encode())
264280

265281
def test_sets_provider_options_for_cuda(self) -> None:
266282
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"

machine-learning/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)