Fix failing GPU errors for influential examples (#1081)

NarineK · facebook-github-bot · commit 8bc12c8b4ea6 · 2022-12-14T18:11:30.000-08:00
Summary: Due to floating point arithmetic inaccuracies and limitations switching to double for test cases and passing dtype to projection matrix so that the the arithmetic error is within accepted range. Pull Request resolved: #1081 Reviewed By: 99warriors Differential Revision: D41919707 Pulled By: NarineK fbshipit-source-id: f8ef65e751ada7c3d3baddb1231b547c3be1823c
diff --git a/captum/influence/_core/tracincp_fast_rand_proj.py b/captum/influence/_core/tracincp_fast_rand_proj.py
@@ -1380,6 +1380,7 @@ def _set_projections_tracincp_fast_rand_proj(
                 1
             ]  # this is the dimension of the input of the last fully-connected layer
             device = batch_jacobians.device
+            dtype = batch_jacobians.dtype
 
             # choose projection if needed
             # without projection, the dimension of the intermediate quantities returned
@@ -1409,8 +1410,8 @@ def _set_projections_tracincp_fast_rand_proj(
                 )
 
                 projection_quantities = jacobian_projection.to(
-                    device
-                ), layer_input_projection.to(device)
+                    device=device, dtype=dtype
+                ), layer_input_projection.to(device=device, dtype=dtype)
 
         return projection_quantities
 
diff --git a/tests/influence/_utils/common.py b/tests/influence/_utils/common.py
@@ -190,35 +190,42 @@ def get_random_model_and_data(
         BasicLinearNet(in_features, hidden_nodes, out_features)
         if not unpack_inputs
         else MultLinearNet(in_features, hidden_nodes, out_features, num_inputs)
-    )
+    ).double()
 
     num_checkpoints = 5
 
     for i in range(num_checkpoints):
-        net.linear1.weight.data = torch.normal(3, 4, (hidden_nodes, in_features))
-        net.linear2.weight.data = torch.normal(5, 6, (out_features, hidden_nodes))
+        net.linear1.weight.data = torch.normal(
+            3, 4, (hidden_nodes, in_features)
+        ).double()
+        net.linear2.weight.data = torch.normal(
+            5, 6, (out_features, hidden_nodes)
+        ).double()
         if unpack_inputs:
             net.pre.weight.data = torch.normal(
                 3, 4, (in_features, in_features * num_inputs)
             )
+        if hasattr(net, "pre"):
+            net.pre.weight.data = net.pre.weight.data.double()
         checkpoint_name = "-".join(["checkpoint-reg", str(i + 1) + ".pt"])
         net_adjusted = _wrap_model_in_dataparallel(net) if use_gpu else net
         torch.save(net_adjusted.state_dict(), os.path.join(tmpdir, checkpoint_name))
 
     num_samples = 50
     num_train = 32
-    all_labels = torch.normal(1, 2, (num_samples, out_features))
+    all_labels = torch.normal(1, 2, (num_samples, out_features)).double()
     train_labels = all_labels[:num_train]
     test_labels = all_labels[num_train:]
 
     if unpack_inputs:
         all_samples = [
-            torch.normal(0, 1, (num_samples, in_features)) for _ in range(num_inputs)
+            torch.normal(0, 1, (num_samples, in_features)).double()
+            for _ in range(num_inputs)
         ]
         train_samples = [ts[:num_train] for ts in all_samples]
         test_samples = [ts[num_train:] for ts in all_samples]
     else:
-        all_samples = torch.normal(0, 1, (num_samples, in_features))
+        all_samples = torch.normal(0, 1, (num_samples, in_features)).double()
         train_samples = all_samples[:num_train]
         test_samples = all_samples[num_train:]
 

Original file line number	Diff line number	Diff line change
`@@ -1380,6 +1380,7 @@ def _set_projections_tracincp_fast_rand_proj(`
`1380`	`1380`	`1`
`1381`	`1381`	`] # this is the dimension of the input of the last fully-connected layer`
`1382`	`1382`	`device = batch_jacobians.device`
	`1383`	`+ dtype = batch_jacobians.dtype`
`1383`	`1384`
`1384`	`1385`	`# choose projection if needed`
`1385`	`1386`	`# without projection, the dimension of the intermediate quantities returned`
`@@ -1409,8 +1410,8 @@ def _set_projections_tracincp_fast_rand_proj(`
`1409`	`1410`	`)`
`1410`	`1411`
`1411`	`1412`	`projection_quantities = jacobian_projection.to(`
`1412`		`- device`
`1413`		`- ), layer_input_projection.to(device)`
	`1413`	`+ device=device, dtype=dtype`
	`1414`	`+ ), layer_input_projection.to(device=device, dtype=dtype)`
`1414`	`1415`
`1415`	`1416`	`return projection_quantities`
`1416`	`1417`