Refactor to inference device endpoint (#5093)

A-Artemis · web-flow · commit e22dc723dde3 · 2025-12-29T09:10:54.000Z
diff --git a/application/backend/app/api/routers/system.py b/application/backend/app/api/routers/system.py
@@ -14,12 +14,12 @@
 router = APIRouter(prefix="/api")
 
 
-@router.get("/system/devices")
-async def get_devices(
+@router.get("/system/devices/inference")
+async def get_inference_devices(
     system_service: Annotated[SystemService, Depends(get_system_service)],
 ) -> list[DeviceInfo]:
-    """Returns the list of available compute devices (CPU, Intel XPU, NVIDIA CUDA)."""
-    return system_service.get_devices()
+    """Returns the list of available compute devices (CPU, Intel XPU)."""
+    return system_service.get_inference_devices()
 
 
 @router.get("/system/metrics/memory")
diff --git a/application/backend/app/services/system_service.py b/application/backend/app/services/system_service.py
@@ -75,6 +75,15 @@ def get_devices() -> list[DeviceInfo]:
 
         return devices
 
+    def get_inference_devices(self) -> list[DeviceInfo]:
+        """
+        Get available compute devices for inference (CPU, XPU, ...)
+
+        Returns:
+            list[DeviceInfo]: List of available devices
+        """
+        return [device for device in self.get_devices() if device.type != DeviceType.CUDA]
+
     def validate_device(self, device_str: str) -> bool:
         """
         Validate if a device string is available on the system.
diff --git a/application/backend/tests/unit/routers/test_system.py b/application/backend/tests/unit/routers/test_system.py
@@ -26,13 +26,13 @@ def fxt_system_service() -> Mock:
 
 
 class TestSystemEndpoints:
-    def test_get_devices_cpu_only(self, fxt_system_service: Mock, fxt_client: TestClient):
-        """Test GET /api/system/devices with CPU only"""
-        fxt_system_service.get_devices.return_value = [
+    def test_get_inference_devices_cpu_only(self, fxt_system_service: Mock, fxt_client: TestClient):
+        """Test GET /api/system/devices/inference with CPU only"""
+        fxt_system_service.get_inference_devices.return_value = [
             DeviceInfo(type=DeviceType.CPU, name="CPU", memory=None, index=None),
         ]
 
-        response = fxt_client.get("/api/system/devices")
+        response = fxt_client.get("/api/system/devices/inference")
 
         assert response.status_code == status.HTTP_200_OK
         devices = response.json()
@@ -42,14 +42,14 @@ def test_get_devices_cpu_only(self, fxt_system_service: Mock, fxt_client: TestCl
         assert devices[0]["memory"] is None
         assert devices[0]["index"] is None
 
-    def test_get_devices_with_xpu(self, fxt_system_service: Mock, fxt_client: TestClient):
-        """Test GET /api/system/devices with Intel XPU"""
-        fxt_system_service.get_devices.return_value = [
+    def test_get_inference_devices_with_xpu(self, fxt_system_service: Mock, fxt_client: TestClient):
+        """Test GET /api/system/devices/inference with Intel XPU"""
+        fxt_system_service.get_inference_devices.return_value = [
             DeviceInfo(type=DeviceType.CPU, name="CPU", memory=None, index=None),
             DeviceInfo(type=DeviceType.XPU, name="Intel(R) Graphics [0x7d41]", memory=36022263808, index=0),
         ]
 
-        response = fxt_client.get("/api/system/devices")
+        response = fxt_client.get("/api/system/devices/inference")
 
         assert response.status_code == status.HTTP_200_OK
         devices = response.json()
@@ -60,41 +60,6 @@ def test_get_devices_with_xpu(self, fxt_system_service: Mock, fxt_client: TestCl
         assert devices[1]["memory"] == 36022263808
         assert devices[1]["index"] == 0
 
-    def test_get_devices_with_cuda(self, fxt_system_service: Mock, fxt_client: TestClient):
-        """Test GET /api/system/devices with NVIDIA CUDA"""
-        fxt_system_service.get_devices.return_value = [
-            DeviceInfo(type=DeviceType.CPU, name="CPU", memory=None, index=None),
-            DeviceInfo(type=DeviceType.CUDA, name="NVIDIA GeForce RTX 4090", memory=25769803776, index=0),
-        ]
-
-        response = fxt_client.get("/api/system/devices")
-
-        assert response.status_code == status.HTTP_200_OK
-        devices = response.json()
-        assert len(devices) == 2
-        assert devices[0]["type"] == "cpu"
-        assert devices[1]["type"] == "cuda"
-        assert devices[1]["name"] == "NVIDIA GeForce RTX 4090"
-        assert devices[1]["memory"] == 25769803776
-        assert devices[1]["index"] == 0
-
-    def test_get_devices_with_all_devices(self, fxt_system_service: Mock, fxt_client: TestClient):
-        """Test GET /api/system/devices with all device types"""
-        fxt_system_service.get_devices.return_value = [
-            DeviceInfo(type=DeviceType.CPU, name="CPU", memory=None, index=None),
-            DeviceInfo(type=DeviceType.XPU, name="Intel(R) Graphics [0x7d41]", memory=36022263808, index=0),
-            DeviceInfo(type=DeviceType.CUDA, name="NVIDIA GeForce RTX 4090", memory=25769803776, index=0),
-        ]
-
-        response = fxt_client.get("/api/system/devices")
-
-        assert response.status_code == status.HTTP_200_OK
-        devices = response.json()
-        assert len(devices) == 3
-        assert devices[0]["type"] == "cpu"
-        assert devices[1]["type"] == "xpu"
-        assert devices[2]["type"] == "cuda"
-
     def test_get_memory(self, fxt_system_service: Mock, fxt_client: TestClient):
         """Test GET /api/system/metrics/memory"""
         fxt_system_service.get_memory_usage.return_value = (1024.5, 8192.0)
diff --git a/application/backend/tests/unit/services/test_system_service.py b/application/backend/tests/unit/services/test_system_service.py
@@ -169,6 +169,32 @@ def test_validate_device_cuda_not_available(self, fxt_system_service: SystemServ
             assert fxt_system_service.validate_device("cuda") is False
             assert fxt_system_service.validate_device("cuda-0") is False
 
+    def test_get_inference_devices_with_multiple_devices(self, fxt_system_service: SystemService):
+        """Test getting inference devices when multiple GPUs are available"""
+        with patch("app.services.system_service.torch") as mock_torch:
+            # Mock XPU device
+            mock_xpu_dp = MagicMock()
+            mock_xpu_dp.name = "Intel(R) Graphics [0x7d41]"
+            mock_xpu_dp.total_memory = 36022263808
+
+            mock_torch.xpu.is_available.return_value = True
+            mock_torch.xpu.device_count.return_value = 1
+            mock_torch.xpu.get_device_properties.return_value = mock_xpu_dp
+
+            # Mock CUDA device
+            mock_cuda_dp = MagicMock()
+            mock_cuda_dp.name = "NVIDIA GeForce RTX 4090"
+            mock_cuda_dp.total_memory = 25769803776
+
+            mock_torch.cuda.is_available.return_value = True
+            mock_torch.cuda.device_count.return_value = 1
+            mock_torch.cuda.get_device_properties.return_value = mock_cuda_dp
+
+            inference_devices = fxt_system_service.get_inference_devices()
+
+            assert len(inference_devices) == 2
+            assert not any(device.type == "cuda" for device in inference_devices)
+
     def test_validate_device_invalid_type(self, fxt_system_service: SystemService):
         """Test validating invalid device types"""
         with patch("app.services.system_service.torch") as mock_torch, pytest.raises(ValueError):