From a4b845972c0d5ebd35398c6d413158a7ee3dba50 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 27 Mar 2025 09:01:12 +0000
Subject: [PATCH 1/2] adds available_device to test_confusion_matrix.py #3335

---
 tests/ignite/metrics/test_confusion_matrix.py | 82 ++++++++++++-------
 1 file changed, 51 insertions(+), 31 deletions(-)

diff --git a/tests/ignite/metrics/test_confusion_matrix.py b/tests/ignite/metrics/test_confusion_matrix.py
index 8d5ed61817da..789df8af265f 100644
--- a/tests/ignite/metrics/test_confusion_matrix.py
+++ b/tests/ignite/metrics/test_confusion_matrix.py
@@ -73,9 +73,10 @@ def test_data(request):
 
 
 @pytest.mark.parametrize("n_times", range(5))
-def test_multiclass_input(n_times, test_data):
+def test_multiclass_input(n_times, test_data, available_device):
     y_pred, y, num_classes, batch_size = test_data
-    cm = ConfusionMatrix(num_classes=num_classes)
+    cm = ConfusionMatrix(num_classes=num_classes, device=available_device)
+    assert cm._device == torch.device(available_device)
     cm.reset()
     if batch_size > 1:
         n_iters = y.shape[0] // batch_size + 1
@@ -90,9 +91,10 @@ def test_multiclass_input(n_times, test_data):
     assert np.all(confusion_matrix(np_y, np_y_pred, labels=list(range(num_classes))) == cm.compute().numpy())
 
 
-def test_ignored_out_of_num_classes_indices():
+def test_ignored_out_of_num_classes_indices(available_device):
     num_classes = 21
-    cm = ConfusionMatrix(num_classes=num_classes)
+    cm = ConfusionMatrix(num_classes=num_classes, device=available_device)
+    assert cm._device == torch.device(available_device)
 
     y_pred = torch.rand(4, num_classes, 12, 10)
     y = torch.randint(0, 255, size=(4, 12, 10)).long()
@@ -127,9 +129,10 @@ def compute_th_y_true_y_logits(y_true, y_pred):
     return th_y_true, th_y_logits
 
 
-def test_multiclass_images():
+def test_multiclass_images(available_device):
     num_classes = 3
-    cm = ConfusionMatrix(num_classes=num_classes)
+    cm = ConfusionMatrix(num_classes=num_classes, device=available_device)
+    assert cm._device == torch.device(available_device)
 
     y_true, y_pred = get_y_true_y_pred()
 
@@ -148,7 +151,8 @@ def test_multiclass_images():
 
     # Another test on batch of 2 images
     num_classes = 3
-    cm = ConfusionMatrix(num_classes=num_classes)
+    cm = ConfusionMatrix(num_classes=num_classes, device=available_device)
+    assert cm._device == torch.device(available_device)
 
     # Create a batch of two images:
     th_y_true1 = torch.from_numpy(y_true).reshape(1, 30, 30)
@@ -200,7 +204,7 @@ def test_iou_wrong_input():
 
 
 @pytest.mark.parametrize("average", [None, "samples"])
-def test_iou(average):
+def test_iou(average, available_device):
     y_true, y_pred = get_y_true_y_pred()
     th_y_true, th_y_logits = compute_th_y_true_y_logits(y_true, y_pred)
 
@@ -212,7 +216,8 @@ def test_iou(average):
         union = bin_y_true | bin_y_pred
         true_res[index] = intersection.sum() / union.sum()
 
-    cm = ConfusionMatrix(num_classes=3, average=average)
+    cm = ConfusionMatrix(num_classes=3, average=average, device=available_device)
+    assert cm._device == torch.device(available_device)
     iou_metric = IoU(cm)
 
     # Update metric
@@ -224,7 +229,8 @@ def test_iou(average):
     assert np.all(res == true_res)
 
     for ignore_index in range(3):
-        cm = ConfusionMatrix(num_classes=3)
+        cm = ConfusionMatrix(num_classes=3, device=available_device)
+        assert cm._device == torch.device(available_device)
         iou_metric = IoU(cm, ignore_index=ignore_index)
         # Update metric
         output = (th_y_logits, th_y_true)
@@ -238,7 +244,7 @@ def test_iou(average):
         IoU(cm)
 
 
-def test_miou():
+def test_miou(available_device):
     y_true, y_pred = get_y_true_y_pred()
     th_y_true, th_y_logits = compute_th_y_true_y_logits(y_true, y_pred)
 
@@ -252,7 +258,8 @@ def test_miou():
 
     true_res_ = np.mean(true_res)
 
-    cm = ConfusionMatrix(num_classes=3)
+    cm = ConfusionMatrix(num_classes=3, device=available_device)
+    assert cm._device == torch.device(available_device)
     iou_metric = mIoU(cm)
 
     # Update metric
@@ -264,7 +271,8 @@ def test_miou():
     assert pytest.approx(res) == true_res_
 
     for ignore_index in range(3):
-        cm = ConfusionMatrix(num_classes=3)
+        cm = ConfusionMatrix(num_classes=3, device=available_device)
+        assert cm._device == torch.device(available_device)
         iou_metric = mIoU(cm, ignore_index=ignore_index)
         # Update metric
         output = (th_y_logits, th_y_true)
@@ -274,13 +282,14 @@ def test_miou():
         assert pytest.approx(res) == true_res_, f"{ignore_index}: {res} vs {true_res_}"
 
 
-def test_cm_accuracy():
+def test_cm_accuracy(available_device):
     y_true, y_pred = get_y_true_y_pred()
     th_y_true, th_y_logits = compute_th_y_true_y_logits(y_true, y_pred)
 
     true_acc = accuracy_score(y_true.reshape(-1), y_pred.reshape(-1))
 
-    cm = ConfusionMatrix(num_classes=3)
+    cm = ConfusionMatrix(num_classes=3, device=available_device)
+    assert cm._device == torch.device(available_device)
     acc_metric = cmAccuracy(cm)
 
     # Update metric
@@ -292,13 +301,14 @@ def test_cm_accuracy():
     assert pytest.approx(res) == true_acc
 
 
-def test_cm_precision():
+def test_cm_precision(available_device):
     y_true, y_pred = np.random.randint(0, 10, size=(1000,)), np.random.randint(0, 10, size=(1000,))
     th_y_true, th_y_logits = compute_th_y_true_y_logits(y_true, y_pred)
 
     true_pr = precision_score(y_true.reshape(-1), y_pred.reshape(-1), average="macro")
 
-    cm = ConfusionMatrix(num_classes=10)
+    cm = ConfusionMatrix(num_classes=10, device=available_device)
+    assert cm._device == torch.device(available_device)
     pr_metric = cmPrecision(cm, average=True)
 
     # Update metric
@@ -310,7 +320,8 @@ def test_cm_precision():
     assert pytest.approx(res) == true_pr
 
     true_pr = precision_score(y_true.reshape(-1), y_pred.reshape(-1), average=None)
-    cm = ConfusionMatrix(num_classes=10)
+    cm = ConfusionMatrix(num_classes=10, device=available_device)
+    assert cm._device == torch.device(available_device)
     pr_metric = cmPrecision(cm, average=False)
 
     # Update metric
@@ -322,13 +333,14 @@ def test_cm_precision():
     assert np.all(res == true_pr)
 
 
-def test_cm_recall():
+def test_cm_recall(available_device):
     y_true, y_pred = np.random.randint(0, 10, size=(1000,)), np.random.randint(0, 10, size=(1000,))
     th_y_true, th_y_logits = compute_th_y_true_y_logits(y_true, y_pred)
 
     true_re = recall_score(y_true.reshape(-1), y_pred.reshape(-1), average="macro")
 
-    cm = ConfusionMatrix(num_classes=10)
+    cm = ConfusionMatrix(num_classes=10, device=available_device)
+    assert cm._device == torch.device(available_device)
     re_metric = cmRecall(cm, average=True)
 
     # Update metric
@@ -340,7 +352,8 @@ def test_cm_recall():
     assert pytest.approx(res) == true_re
 
     true_re = recall_score(y_true.reshape(-1), y_pred.reshape(-1), average=None)
-    cm = ConfusionMatrix(num_classes=10)
+    cm = ConfusionMatrix(num_classes=10, device=available_device)
+    assert cm._device == torch.device(available_device)
     re_metric = cmRecall(cm, average=False)
 
     # Update metric
@@ -352,20 +365,22 @@ def test_cm_recall():
     assert np.all(res == true_re)
 
 
-def test_cm_with_average():
+def test_cm_with_average(available_device):
     num_classes = 5
     y_pred = torch.rand(40, num_classes)
     y = torch.randint(0, num_classes, size=(40,)).long()
     np_y_pred = y_pred.numpy().argmax(axis=1).ravel()
     np_y = y.numpy().ravel()
 
-    cm = ConfusionMatrix(num_classes=num_classes, average="samples")
+    cm = ConfusionMatrix(num_classes=num_classes, average="samples", device=available_device)
+    assert cm._device == torch.device(available_device)
     cm.update((y_pred, y))
     true_res = confusion_matrix(np_y, np_y_pred, labels=list(range(num_classes))) * 1.0 / len(np_y)
     res = cm.compute().numpy()
     np.testing.assert_almost_equal(true_res, res)
 
-    cm = ConfusionMatrix(num_classes=num_classes, average="recall")
+    cm = ConfusionMatrix(num_classes=num_classes, average="recall", device=available_device)
+    assert cm._device == torch.device(available_device)
     cm.update((y_pred, y))
     true_re = recall_score(np_y, np_y_pred, average=None, labels=list(range(num_classes)))
     res = cm.compute().numpy().diagonal()
@@ -375,7 +390,8 @@ def test_cm_with_average():
     true_res = confusion_matrix(np_y, np_y_pred, normalize="true")
     np.testing.assert_almost_equal(true_res, res)
 
-    cm = ConfusionMatrix(num_classes=num_classes, average="precision")
+    cm = ConfusionMatrix(num_classes=num_classes, average="precision", device=available_device)
+    assert cm._device == torch.device(available_device)
     cm.update((y_pred, y))
     true_pr = precision_score(np_y, np_y_pred, average=None, labels=list(range(num_classes)))
     res = cm.compute().numpy().diagonal()
@@ -404,7 +420,7 @@ def test_dice_coefficient_wrong_input():
         DiceCoefficient(cm, ignore_index=11)
 
 
-def test_dice_coefficient():
+def test_dice_coefficient(available_device):
     y_true, y_pred = get_y_true_y_pred()
     th_y_true, th_y_logits = compute_th_y_true_y_logits(y_true, y_pred)
 
@@ -418,7 +434,8 @@ def test_dice_coefficient():
         union = bin_y_true | bin_y_pred
         true_res[index] = 2.0 * intersection.sum() / (union.sum() + intersection.sum())
 
-    cm = ConfusionMatrix(num_classes=3)
+    cm = ConfusionMatrix(num_classes=3, device=available_device)
+    assert cm._device == torch.device(available_device)
     dice_metric = DiceCoefficient(cm)
 
     # Update metric
@@ -429,7 +446,8 @@ def test_dice_coefficient():
     np.testing.assert_allclose(res, true_res)
 
     for ignore_index in range(3):
-        cm = ConfusionMatrix(num_classes=3)
+        cm = ConfusionMatrix(num_classes=3, device=available_device)
+        assert cm._device == torch.device(available_device)
         dice_metric = DiceCoefficient(cm, ignore_index=ignore_index)
         # Update metric
         output = (th_y_logits, th_y_true)
@@ -529,7 +547,7 @@ def _test_distrib_accumulator_device(device):
 
 
 @pytest.mark.parametrize("average", [None, "samples"])
-def test_jaccard_index(average):
+def test_jaccard_index(average, available_device):
     y_true, y_pred = get_y_true_y_pred()
     th_y_true, th_y_logits = compute_th_y_true_y_logits(y_true, y_pred)
 
@@ -541,7 +559,8 @@ def test_jaccard_index(average):
         union = bin_y_true | bin_y_pred
         true_res[index] = intersection.sum() / union.sum()
 
-    cm = ConfusionMatrix(num_classes=3, average=average)
+    cm = ConfusionMatrix(num_classes=3, average=average, device=available_device)
+    assert cm._device == torch.device(available_device)
     jaccard_index = JaccardIndex(cm)
 
     # Update metric
@@ -553,7 +572,8 @@ def test_jaccard_index(average):
     assert np.all(res == true_res)
 
     for ignore_index in range(3):
-        cm = ConfusionMatrix(num_classes=3)
+        cm = ConfusionMatrix(num_classes=3, device=available_device)
+        assert cm._device == torch.device(available_device)
         jaccard_index_metric = JaccardIndex(cm, ignore_index=ignore_index)
         # Update metric
         output = (th_y_logits, th_y_true)

From fbf7e6f743e8f6f42e18be0b267dca30be419314 Mon Sep 17 00:00:00 2001
From: BanzaiTokyo <banzaitokyo@gmail.com>
Date: Thu, 27 Mar 2025 10:41:39 +0000
Subject: [PATCH 2/2] moves tensor to CPU before calling .numpy()

---
 tests/ignite/metrics/test_confusion_matrix.py | 60 ++++++++++---------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/tests/ignite/metrics/test_confusion_matrix.py b/tests/ignite/metrics/test_confusion_matrix.py
index 789df8af265f..7973ee110c59 100644
--- a/tests/ignite/metrics/test_confusion_matrix.py
+++ b/tests/ignite/metrics/test_confusion_matrix.py
@@ -86,9 +86,9 @@ def test_multiclass_input(n_times, test_data, available_device):
     else:
         cm.update((y_pred, y))
 
-    np_y_pred = y_pred.numpy().argmax(axis=1).ravel()
-    np_y = y.numpy().ravel()
-    assert np.all(confusion_matrix(np_y, np_y_pred, labels=list(range(num_classes))) == cm.compute().numpy())
+    np_y_pred = y_pred.cpu().numpy().argmax(axis=1).ravel()
+    np_y = y.cpu().numpy().ravel()
+    assert np.all(confusion_matrix(np_y, np_y_pred, labels=list(range(num_classes))) == cm.compute().cpu().numpy())
 
 
 def test_ignored_out_of_num_classes_indices(available_device):
@@ -99,9 +99,9 @@ def test_ignored_out_of_num_classes_indices(available_device):
     y_pred = torch.rand(4, num_classes, 12, 10)
     y = torch.randint(0, 255, size=(4, 12, 10)).long()
     cm.update((y_pred, y))
-    np_y_pred = y_pred.numpy().argmax(axis=1).ravel()
-    np_y = y.numpy().ravel()
-    assert np.all(confusion_matrix(np_y, np_y_pred, labels=list(range(num_classes))) == cm.compute().numpy())
+    np_y_pred = y_pred.cpu().numpy().argmax(axis=1).ravel()
+    np_y = y.cpu().numpy().ravel()
+    assert np.all(confusion_matrix(np_y, np_y_pred, labels=list(range(num_classes))) == cm.compute().cpu().numpy())
 
 
 def get_y_true_y_pred():
@@ -145,7 +145,7 @@ def test_multiclass_images(available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = cm.compute().numpy()
+    res = cm.compute().cpu().numpy()
 
     assert np.all(true_res == res)
 
@@ -177,10 +177,12 @@ def test_multiclass_images(available_device):
     # Update metric & compute
     output = (th_y_logits, th_y_true)
     cm.update(output)
-    res = cm.compute().numpy()
+    res = cm.compute().cpu().numpy()
 
     # Compute confusion matrix with sklearn
-    true_res = confusion_matrix(th_y_true.numpy().reshape(-1), np.argmax(th_y_logits.numpy(), axis=1).reshape(-1))
+    true_res = confusion_matrix(
+        th_y_true.cpu().numpy().reshape(-1), np.argmax(th_y_logits.cpu().numpy(), axis=1).reshape(-1)
+    )
 
     assert np.all(true_res == res)
 
@@ -224,7 +226,7 @@ def test_iou(average, available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = iou_metric.compute().numpy()
+    res = iou_metric.compute().cpu().numpy()
 
     assert np.all(res == true_res)
 
@@ -235,7 +237,7 @@ def test_iou(average, available_device):
         # Update metric
         output = (th_y_logits, th_y_true)
         cm.update(output)
-        res = iou_metric.compute().numpy()
+        res = iou_metric.compute().cpu().numpy()
         true_res_ = true_res[:ignore_index] + true_res[ignore_index + 1 :]
         assert np.all(res == true_res_), f"{ignore_index}: {res} vs {true_res_}"
 
@@ -266,7 +268,7 @@ def test_miou(available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = iou_metric.compute().numpy()
+    res = iou_metric.compute().cpu().numpy()
 
     assert pytest.approx(res) == true_res_
 
@@ -277,7 +279,7 @@ def test_miou(available_device):
         # Update metric
         output = (th_y_logits, th_y_true)
         cm.update(output)
-        res = iou_metric.compute().numpy()
+        res = iou_metric.compute().cpu().numpy()
         true_res_ = np.mean(true_res[:ignore_index] + true_res[ignore_index + 1 :])
         assert pytest.approx(res) == true_res_, f"{ignore_index}: {res} vs {true_res_}"
 
@@ -296,7 +298,7 @@ def test_cm_accuracy(available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = acc_metric.compute().numpy()
+    res = acc_metric.compute().cpu().numpy()
 
     assert pytest.approx(res) == true_acc
 
@@ -315,7 +317,7 @@ def test_cm_precision(available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = pr_metric.compute().numpy()
+    res = pr_metric.compute().cpu().numpy()
 
     assert pytest.approx(res) == true_pr
 
@@ -328,7 +330,7 @@ def test_cm_precision(available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = pr_metric.compute().numpy()
+    res = pr_metric.compute().cpu().numpy()
 
     assert np.all(res == true_pr)
 
@@ -347,7 +349,7 @@ def test_cm_recall(available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = re_metric.compute().numpy()
+    res = re_metric.compute().cpu().numpy()
 
     assert pytest.approx(res) == true_re
 
@@ -360,7 +362,7 @@ def test_cm_recall(available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = re_metric.compute().numpy()
+    res = re_metric.compute().cpu().numpy()
 
     assert np.all(res == true_re)
 
@@ -369,24 +371,24 @@ def test_cm_with_average(available_device):
     num_classes = 5
     y_pred = torch.rand(40, num_classes)
     y = torch.randint(0, num_classes, size=(40,)).long()
-    np_y_pred = y_pred.numpy().argmax(axis=1).ravel()
-    np_y = y.numpy().ravel()
+    np_y_pred = y_pred.cpu().numpy().argmax(axis=1).ravel()
+    np_y = y.cpu().numpy().ravel()
 
     cm = ConfusionMatrix(num_classes=num_classes, average="samples", device=available_device)
     assert cm._device == torch.device(available_device)
     cm.update((y_pred, y))
     true_res = confusion_matrix(np_y, np_y_pred, labels=list(range(num_classes))) * 1.0 / len(np_y)
-    res = cm.compute().numpy()
+    res = cm.compute().cpu().numpy()
     np.testing.assert_almost_equal(true_res, res)
 
     cm = ConfusionMatrix(num_classes=num_classes, average="recall", device=available_device)
     assert cm._device == torch.device(available_device)
     cm.update((y_pred, y))
     true_re = recall_score(np_y, np_y_pred, average=None, labels=list(range(num_classes)))
-    res = cm.compute().numpy().diagonal()
+    res = cm.compute().cpu().numpy().diagonal()
     np.testing.assert_almost_equal(true_re, res)
 
-    res = cm.compute().numpy()
+    res = cm.compute().cpu().numpy()
     true_res = confusion_matrix(np_y, np_y_pred, normalize="true")
     np.testing.assert_almost_equal(true_res, res)
 
@@ -394,10 +396,10 @@ def test_cm_with_average(available_device):
     assert cm._device == torch.device(available_device)
     cm.update((y_pred, y))
     true_pr = precision_score(np_y, np_y_pred, average=None, labels=list(range(num_classes)))
-    res = cm.compute().numpy().diagonal()
+    res = cm.compute().cpu().numpy().diagonal()
     np.testing.assert_almost_equal(true_pr, res)
 
-    res = cm.compute().numpy()
+    res = cm.compute().cpu().numpy()
     true_res = confusion_matrix(np_y, np_y_pred, normalize="pred")
     np.testing.assert_almost_equal(true_res, res)
 
@@ -442,7 +444,7 @@ def test_dice_coefficient(available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = dice_metric.compute().numpy()
+    res = dice_metric.compute().cpu().numpy()
     np.testing.assert_allclose(res, true_res)
 
     for ignore_index in range(3):
@@ -452,7 +454,7 @@ def test_dice_coefficient(available_device):
         # Update metric
         output = (th_y_logits, th_y_true)
         cm.update(output)
-        res = dice_metric.compute().numpy()
+        res = dice_metric.compute().cpu().numpy()
         true_res_ = true_res[:ignore_index] + true_res[ignore_index + 1 :]
         assert np.all(res == true_res_), f"{ignore_index}: {res} vs {true_res_}"
 
@@ -567,7 +569,7 @@ def test_jaccard_index(average, available_device):
     output = (th_y_logits, th_y_true)
     cm.update(output)
 
-    res = jaccard_index.compute().numpy()
+    res = jaccard_index.compute().cpu().numpy()
 
     assert np.all(res == true_res)
 
@@ -578,7 +580,7 @@ def test_jaccard_index(average, available_device):
         # Update metric
         output = (th_y_logits, th_y_true)
         cm.update(output)
-        res = jaccard_index_metric.compute().numpy()
+        res = jaccard_index_metric.compute().cpu().numpy()
         true_res_ = true_res[:ignore_index] + true_res[ignore_index + 1 :]
         assert np.all(res == true_res_), f"{ignore_index}: {res} vs {true_res_}"