feat: cache capacity change option

savely-krasovsky · savely-krasovsky · commit 4014a82f0987 · 2025-11-04T13:10:41.000+01:00
diff --git a/.editorconfig b/.editorconfig
@@ -8,6 +8,9 @@ indent_size = 2
 insert_final_newline = true
 trim_trailing_whitespace = true
 
+[*.py]
+indent_size = 4
+
 [*.{ts,js}]
 quote_type = single
 
diff --git a/docs/docs/install/environment-variables.md b/docs/docs/install/environment-variables.md
@@ -172,6 +172,7 @@ Redis (Sentinel) URL example JSON before encoding:
 | `MACHINE_LEARNING_RKNN`                                     | Enable RKNN hardware acceleration if supported                                                      |             `True`              | machine learning |
 | `MACHINE_LEARNING_RKNN_THREADS`                             | How many threads of RKNN runtime should be spinned up while inferencing.                            |               `1`               | machine learning |
 | `MACHINE_LEARNING_MODEL_ARENA`                              | Pre-allocates CPU memory to avoid memory fragmentation                                              |              true               | machine learning |
+| `MACHINE_LEARNING_OPENVINO_CACHE_CAPACITY`                  | The max number of image dimensions for which models have cached optimizations                       |              `20`               | machine learning |
 
 \*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.
 
diff --git a/machine-learning/immich_ml/config.py b/machine-learning/immich_ml/config.py
@@ -70,6 +70,7 @@ class Settings(BaseSettings):
     rknn_threads: int = 1
     preload: PreloadModelData | None = None
     max_batch_size: MaxBatchSize | None = None
+    openvino_cache_capacity: int = 20
 
     @property
     def device_id(self) -> str:
diff --git a/machine-learning/immich_ml/sessions/ort.py b/machine-learning/immich_ml/sessions/ort.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import onnxruntime as ort
+import orjson
 from numpy.typing import NDArray
 
 from immich_ml.models.constants import SUPPORTED_PROVIDERS
@@ -93,10 +94,19 @@ def _provider_options_default(self) -> list[dict[str, Any]]:
                 case "CUDAExecutionProvider" | "ROCMExecutionProvider":
                     options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id}
                 case "OpenVINOExecutionProvider":
+                    openvino_dir = self.model_path.parent / "openvino"
+                    openvino_dir.mkdir(parents=True, exist_ok=True)
                     options = {
                         "device_type": f"GPU.{settings.device_id}",
                         "precision": "FP32",
-                        "cache_dir": (self.model_path.parent / "openvino").as_posix(),
+                        "cache_dir": openvino_dir.as_posix(),
+                        "load_config": orjson.dumps(
+                            {
+                                f"GPU.{settings.device_id}": {
+                                    "CPU_RUNTIME_CACHE_CAPACITY": str(settings.openvino_cache_capacity)
+                                },
+                            }
+                        ).decode(),
                     }
                 case "CoreMLExecutionProvider":
                     options = {
diff --git a/machine-learning/test_main.py b/machine-learning/test_main.py
@@ -244,13 +244,19 @@ def test_sets_default_provider_options(self, ov_device_ids: list[str]) -> None:
         session = OrtSession(model_path, providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"])
 
         assert session.provider_options == [
-            {"device_type": "GPU.0", "precision": "FP32", "cache_dir": "/cache/ViT-B-32__openai/openvino"},
+            {
+                "device_type": "GPU.0",
+                "precision": "FP32",
+                "cache_dir": "/cache/ViT-B-32__openai/openvino",
+                "load_config": "{\"GPU.0\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
+            },
             {"arena_extend_strategy": "kSameAsRequested"},
         ]
 
-    def test_sets_provider_options_for_openvino(self) -> None:
+    def test_sets_provider_options_for_openvino(self, mocker: MockerFixture) -> None:
         model_path = "/cache/ViT-B-32__openai/textual/model.onnx"
         os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
+        mocker.patch.object(settings, "openvino_cache_capacity", 10)
 
         session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
 
@@ -259,6 +265,7 @@ def test_sets_provider_options_for_openvino(self) -> None:
                 "device_type": "GPU.1",
                 "precision": "FP32",
                 "cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
+                "load_config": "{\"GPU.1\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"10\"}}"
             }
         ]
 
diff --git a/machine-learning/uv.lock b/machine-learning/uv.lock