From 75c701cf234cb640a72766ce73426dc133501328 Mon Sep 17 00:00:00 2001 From: "Rose K. Cersonsky" <47536110+rosecers@users.noreply.github.com> Date: Mon, 24 Apr 2023 12:20:56 -0500 Subject: [PATCH 1/2] reducing dataset in pcov cur sample selection --- tests/test_sample_pcov_cur.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_sample_pcov_cur.py b/tests/test_sample_pcov_cur.py index cb0f3d5d0..c14c50c48 100644 --- a/tests/test_sample_pcov_cur.py +++ b/tests/test_sample_pcov_cur.py @@ -12,7 +12,8 @@ class TestPCovCUR(unittest.TestCase): def setUp(self): self.X, self.y = get_dataset(return_X_y=True) - self.idx = [256, 304, 58, 10, 23, 278, 230, 285, 291, 357] + self.X = self.X[:, :4] + self.idx = [256, 304, 41, 408, 311, 364, 152, 78, 359, 102] def test_known(self): """ @@ -51,7 +52,7 @@ def test_non_it(self): """ This test checks that the model can be run non-iteratively """ - self.idx = [256, 32, 138, 290, 362, 141, 359, 254, 428, 9] + self.idx = [256, 32, 138, 290, 362, 141, 359, 428, 254, 9] selector = PCovCUR(n_to_select=10, recompute_every=0) selector.fit(self.X, self.y) @@ -62,7 +63,7 @@ def test_multiple_k(self): This test checks that the model can be run with multiple k's """ - for k in np.logspace(0, np.log10(self.X.shape[0]), 4, dtype=int): + for k in list(set(np.logspace(0, np.log10(min(self.X.shape)), 4, dtype=int))): selector = PCovCUR(n_to_select=10, k=k) selector.fit(self.X, self.y) From dbab1f249687615b0f9c0e53e3528828ef8bdb91 Mon Sep 17 00:00:00 2001 From: "Rose K. Cersonsky" <47536110+rosecers@users.noreply.github.com> Date: Mon, 24 Apr 2023 12:26:05 -0500 Subject: [PATCH 2/2] Turns out we already made this one faster, but that patch didn't get updated when we moved to the diabetes ds --- tests/test_kernel_pcovr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_kernel_pcovr.py b/tests/test_kernel_pcovr.py index ded3532a9..18a2ec844 100644 --- a/tests/test_kernel_pcovr.py +++ b/tests/test_kernel_pcovr.py @@ -21,7 +21,7 @@ def __init__(self, *args, **kwargs): self.X, self.Y = get_dataset(return_X_y=True) # for the sake of expedience, only use a subset of the dataset - idx = self.random_state.choice(len(self.X), 1000) + idx = self.random_state.choice(len(self.X), 100) self.X = self.X[idx] self.Y = self.Y[idx]