|
26 | 26 | #include "velox/exec/tests/utils/AssertQueryBuilder.h" |
27 | 27 | #include "velox/exec/tests/utils/HiveConnectorTestBase.h" |
28 | 28 | #include "velox/exec/tests/utils/PlanBuilder.h" |
| 29 | +#include "velox/exec/TaskStats.h" |
29 | 30 |
|
30 | 31 | #ifdef VELOX_ENABLE_PARQUET |
31 | 32 | #include "velox/dwio/parquet/RegisterParquetReader.h" |
@@ -181,7 +182,7 @@ class HiveIcebergTest : public HiveConnectorTestBase { |
181 | 182 | /// positions for data_file_1 and data_file_2. THere are 3 RowGroups in this |
182 | 183 | /// delete file, the first two contain positions for data_file_1, and the last |
183 | 184 | /// contain positions for data_file_2 |
184 | | - void assertPositionalDeletes( |
| 185 | + std::shared_ptr<exec::Task> assertPositionalDeletes( |
185 | 186 | const std::map<std::string, std::vector<int64_t>>& rowGroupSizesForFiles, |
186 | 187 | const std::unordered_map< |
187 | 188 | std::string, |
@@ -242,8 +243,11 @@ class HiveIcebergTest : public HiveConnectorTestBase { |
242 | 243 | auto planStats = toPlanStats(task->taskStats()); |
243 | 244 |
|
244 | 245 | auto it = planStats.find(plan->id()); |
245 | | - ASSERT_TRUE(it != planStats.end()); |
246 | | - ASSERT_TRUE(it->second.peakMemoryBytes > 0); |
| 246 | + EXPECT_TRUE(it != planStats.end()); |
| 247 | + if (it != planStats.end()) { |
| 248 | + EXPECT_TRUE(it->second.peakMemoryBytes > 0); |
| 249 | + } |
| 250 | + return task; |
247 | 251 | } |
248 | 252 |
|
249 | 253 | const static int rowCount = 20000; |
@@ -947,4 +951,48 @@ TEST_F(HiveIcebergTest, positionalDeleteFileWithRowGroupFilter) { |
947 | 951 | 0); |
948 | 952 | } |
949 | 953 | #endif |
| 954 | + |
| 955 | +TEST_F(HiveIcebergTest, icebergMetrics) { |
| 956 | + folly::SingletonVault::singleton()->registrationComplete(); |
| 957 | + |
| 958 | + // Helper function to aggregate a runtime metric across all pipelines and operators |
| 959 | + auto getAggregatedRuntimeMetric = [](const exec::TaskStats& taskStats, const std::string& metricName) -> int64_t { |
| 960 | + int64_t total = 0; |
| 961 | + for (const auto& pipelineStats : taskStats.pipelineStats) { |
| 962 | + for (const auto& operatorStats : pipelineStats.operatorStats) { |
| 963 | + auto it = operatorStats.runtimeStats.find(metricName); |
| 964 | + if (it != operatorStats.runtimeStats.end()) { |
| 965 | + total += it->second.sum; |
| 966 | + } |
| 967 | + } |
| 968 | + } |
| 969 | + return total; |
| 970 | + }; |
| 971 | + |
| 972 | + std::map<std::string, std::vector<int64_t>> rowGroupSizesForFiles = { |
| 973 | + {"data_file_1", {100, 85}}}; |
| 974 | + std::unordered_map< |
| 975 | + std::string, |
| 976 | + std::multimap<std::string, std::vector<int64_t>>> |
| 977 | + deleteFilesForBaseDatafiles; |
| 978 | + deleteFilesForBaseDatafiles["delete_file_1"] = {{"data_file_1", {0, 1, 99}}}; |
| 979 | + auto task = |
| 980 | + assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles); |
| 981 | + const auto& taskStats = task->taskStats(); |
| 982 | + |
| 983 | + ASSERT_EQ(getAggregatedRuntimeMetric(taskStats, "iceberg.numSplits"), 1); |
| 984 | + ASSERT_EQ(getAggregatedRuntimeMetric(taskStats, "iceberg.numDeletes"), 3); |
| 985 | + |
| 986 | + rowGroupSizesForFiles = { |
| 987 | + {"data_file_1", {100, 85}}, {"data_file_2", {99, 1}}}; |
| 988 | + deleteFilesForBaseDatafiles.clear(); |
| 989 | + deleteFilesForBaseDatafiles["delete_file_1"] = { |
| 990 | + {"data_file_1", {0, 100, 102, 184}}, {"data_file_2", {1, 98, 99}}}; |
| 991 | + task = |
| 992 | + assertPositionalDeletes(rowGroupSizesForFiles, deleteFilesForBaseDatafiles); |
| 993 | + const auto& taskStats2 = task->taskStats(); |
| 994 | + |
| 995 | + ASSERT_EQ(getAggregatedRuntimeMetric(taskStats2, "iceberg.numSplits"), 2); |
| 996 | + ASSERT_EQ(getAggregatedRuntimeMetric(taskStats2, "iceberg.numDeletes"), 7); |
| 997 | +} |
950 | 998 | } // namespace facebook::velox::connector::hive::iceberg |
0 commit comments