@@ -148,34 +148,67 @@ TEST_F(ReaderTest, projectColumnsMutation) {
148148 makeFlatVector<int64_t >({0 , 1 , 3 , 4 , 5 , 6 , 7 , 8 , 9 }),
149149 });
150150 test::assertEqualVectors (expected, actual);
151- random::setSeed (42 );
152- random::RandomSkipTracker randomSkip (0.5 );
153- mutation.randomSkip = &randomSkip;
154- actual = RowReader::projectColumns (input, spec, &mutation);
155- if constexpr (std::is_same_v<folly::detail::DefaultGenerator, std::mt19937>) {
156- #if __APPLE__
157- expected = makeRowVector ({
158- makeFlatVector<int64_t >({1 , 5 , 6 , 7 , 8 , 9 }),
159- });
160- #else
161- expected = makeRowVector ({
162- makeFlatVector<int64_t >({3 , 4 , 7 , 9 }),
163- });
164- #endif
165- #if FOLLY_HAVE_EXTRANDOM_SFMT19937
166- } else if constexpr (std::is_same_v<
167- folly::detail::DefaultGenerator,
168- __gnu_cxx::sfmt19937>) {
169- expected = makeRowVector ({
170- makeFlatVector<int64_t >({0 , 1 , 3 , 5 , 6 , 8 }),
171- });
172- #endif
173- } else {
174- expected = makeRowVector ({
175- makeFlatVector<int64_t >({1 , 3 , 5 , 7 }),
176- });
151+
152+ constexpr auto kNumRounds = 1U << 6 ;
153+
154+ size_t numNonZero = 0 ;
155+ size_t numNonMax = 0 ;
156+
157+ // Test with random skip - use property-based testing instead of hardcoded
158+ // outputs to avoid brittleness when folly::Random implementation changes.
159+ std::mt19937 seeds;
160+ for (size_t round = 0 ; round < kNumRounds ; ++round) {
161+ const auto seed = seeds ();
162+
163+ random::setSeed (folly::to_narrow (seed));
164+ random::RandomSkipTracker randomSkip (0.5 );
165+ mutation.randomSkip = &randomSkip;
166+ actual = RowReader::projectColumns (input, spec, &mutation);
167+
168+ // Property 1: Result size should be less than input size (some rows
169+ // skipped). With 0.5 sample rate and 9 eligible rows (excluding deleted row
170+ // 2), we expect roughly 4-5 rows, but allow wider range for RNG variance.
171+ EXPECT_GE (actual->size (), 0 );
172+ EXPECT_LE (actual->size (), kSize - 1 );
173+
174+ numNonZero += actual->size () > 0 ;
175+ numNonMax += actual->size () < kSize - 1 ;
176+
177+ // The result is a RowVector with one child column. Assume it.
178+ auto res = actual->as <RowVector>()->childAt (0 )->as <SimpleVector<int64_t >>();
179+ std::vector<int64_t > vec;
180+ vec.reserve (actual->size ());
181+ for (vector_size_t i = 0 ; i < actual->size (); ++i) {
182+ vec.push_back (res->valueAt (i));
183+ }
184+
185+ // Property 2: All values in result must be from original input.
186+ for (auto val : vec) {
187+ // Each value must be in valid range
188+ EXPECT_GE (val, 0 );
189+ EXPECT_LT (val, kSize );
190+ // Deleted row should never appear
191+ EXPECT_NE (val, 2 );
192+ }
193+
194+ // Property 3: Values should be in ascending order (projectColumns preserves
195+ // order).
196+ EXPECT_TRUE (std::is_sorted (vec.begin (), vec.end ()));
197+
198+ // Property 4: No duplicate values (each input row appears at most once).
199+ EXPECT_TRUE (std::adjacent_find (vec.begin (), vec.end ()) == vec.end ());
200+
201+ // Property 5: With a fixed seed, the result should be deterministic
202+ // (same seed = same output, even if we don't know what that output is)
203+ random::setSeed (folly::to_narrow (seed));
204+ random::RandomSkipTracker randomSkip2 (0.5 );
205+ mutation.randomSkip = &randomSkip2;
206+ auto actual2 = RowReader::projectColumns (input, spec, &mutation);
207+ test::assertEqualVectors (actual, actual2);
177208 }
178- test::assertEqualVectors (expected, actual);
209+
210+ EXPECT_NE (0 , numNonZero);
211+ EXPECT_NE (0 , numNonMax);
179212}
180213
181214} // namespace
0 commit comments