Skip to content

Commit 749fe54

Browse files
authored
Add metrics to track requests throttled in QueryThrottler (#18740)
Signed-off-by: Stuti Biyani <[email protected]>
1 parent 2d926e6 commit 749fe54

File tree

5 files changed

+102
-21
lines changed

5 files changed

+102
-21
lines changed

go/vt/vttablet/tabletserver/querythrottler/query_throttler.go

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import (
2222
"sync"
2323
"time"
2424

25+
"vitess.io/vitess/go/stats"
26+
"vitess.io/vitess/go/vt/servenv"
2527
"vitess.io/vitess/go/vt/sqlparser"
2628

2729
"vitess.io/vitess/go/vt/log"
@@ -37,10 +39,18 @@ import (
3739
)
3840

3941
const (
42+
queryThrottlerAppName = "QueryThrottler"
4043
// defaultPriority is the default priority value when none is specified
4144
defaultPriority = 100 // sqlparser.MaxPriorityValue
4245
)
4346

47+
type Stats struct {
48+
requestsTotal *stats.CountersWithMultiLabels
49+
requestsThrottled *stats.CountersWithMultiLabels
50+
totalLatency *servenv.MultiTimingsWrapper
51+
evaluateLatency *servenv.MultiTimingsWrapper
52+
}
53+
4454
type QueryThrottler struct {
4555
ctx context.Context
4656
throttleClient *throttle.Client
@@ -52,6 +62,8 @@ type QueryThrottler struct {
5262
cfgLoader ConfigLoader
5363
// strategy is the current throttling strategy handler.
5464
strategy registry.ThrottlingStrategyHandler
65+
env tabletenv.Env
66+
stats Stats
5567
}
5668

5769
// NewQueryThrottler creates a new query throttler.
@@ -65,6 +77,13 @@ func NewQueryThrottler(ctx context.Context, throttler *throttle.Throttler, cfgLo
6577
cfg: Config{},
6678
cfgLoader: cfgLoader,
6779
strategy: &registry.NoOpStrategy{}, // default strategy until config is loaded
80+
env: env,
81+
stats: Stats{
82+
requestsTotal: env.Exporter().NewCountersWithMultiLabels(queryThrottlerAppName+"Requests", "query throttler requests", []string{"Strategy", "Workload", "Priority"}),
83+
requestsThrottled: env.Exporter().NewCountersWithMultiLabels(queryThrottlerAppName+"Throttled", "query throttler requests throttled", []string{"Strategy", "Workload", "Priority", "MetricName", "MetricValue", "DryRun"}),
84+
totalLatency: env.Exporter().NewMultiTimings(queryThrottlerAppName+"TotalLatencyNs", "Total time each request takes in query throttling including evaluation, metric checks, and other overhead (nanoseconds)", []string{"Strategy", "Workload", "Priority"}),
85+
evaluateLatency: env.Exporter().NewMultiTimings(queryThrottlerAppName+"EvaluateLatencyNs", "Time each request takes to make the throttling decision (nanoseconds)", []string{"Strategy", "Workload", "Priority"}),
86+
},
6887
}
6988

7089
// Start the initial strategy
@@ -97,27 +116,49 @@ func (qt *QueryThrottler) Shutdown() {
97116
func (qt *QueryThrottler) Throttle(ctx context.Context, tabletType topodatapb.TabletType, parsedQuery *sqlparser.ParsedQuery, transactionID int64, options *querypb.ExecuteOptions) error {
98117
// Lock-free read: for maximum performance in the hot path as cfg and strategy are updated rarely (default once per minute).
99118
// They are word-sized and safe for atomic reads; stale data for one query is acceptable and avoids mutex contention in the hot path.
100-
if !qt.cfg.Enabled {
119+
tCfg := qt.cfg
120+
tStrategy := qt.strategy
121+
122+
if !tCfg.Enabled {
101123
return nil
102124
}
103125

126+
// Capture start time for latency measurements only when throttling is enabled
127+
startTime := time.Now()
128+
104129
// Extract query attributes once to avoid re computation in strategies
105130
attrs := registry.QueryAttributes{
106131
WorkloadName: extractWorkloadName(options),
107132
Priority: extractPriority(options),
108133
}
134+
strategyName := tStrategy.GetStrategyName()
135+
workload := attrs.WorkloadName
136+
priorityStr := strconv.Itoa(attrs.Priority)
137+
138+
// Defer total latency recording to ensure it's always emitted regardless of return path.
139+
defer func() {
140+
qt.stats.totalLatency.Record([]string{strategyName, workload, priorityStr}, startTime)
141+
}()
109142

110143
// Evaluate the throttling decision
111144
decision := qt.strategy.Evaluate(ctx, tabletType, parsedQuery, transactionID, attrs)
112145

146+
// Record evaluate-window latency immediately after Evaluate returns
147+
qt.stats.evaluateLatency.Record([]string{strategyName, workload, priorityStr}, startTime)
148+
149+
qt.stats.requestsTotal.Add([]string{strategyName, workload, priorityStr}, 1)
150+
113151
// If no throttling is needed, allow the query
114152
if !decision.Throttle {
115153
return nil
116154
}
117155

156+
// Emit metric of query being throttled.
157+
qt.stats.requestsThrottled.Add([]string{strategyName, workload, priorityStr, decision.MetricName, strconv.FormatFloat(decision.MetricValue, 'f', -1, 64), strconv.FormatBool(tCfg.DryRun)}, 1)
158+
118159
// If dry-run mode is enabled, log the decision but don't throttle
119-
if qt.cfg.DryRun {
120-
log.Warningf("[DRY-RUN] %s", decision.Message)
160+
if tCfg.DryRun {
161+
log.Warningf("[DRY-RUN] %s, metric name: %s, metric value: %f", decision.Message, decision.MetricName, decision.MetricValue)
121162
return nil
122163
}
123164

go/vt/vttablet/tabletserver/querythrottler/query_throttler_test.go

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"testing"
2323
"time"
2424

25+
"vitess.io/vitess/go/stats"
2526
"vitess.io/vitess/go/vt/log"
2627
querypb "vitess.io/vitess/go/vt/proto/query"
2728
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
@@ -157,16 +158,18 @@ func TestQueryThrottler_Shutdown(t *testing.T) {
157158
require.NotNil(t, strategy)
158159
}
159160

160-
// TestIncomingQueryThrottler_DryRunMode tests that dry-run mode logs decisions but doesn't throttle queries.
161-
func TestIncomingQueryThrottler_DryRunMode(t *testing.T) {
161+
// TestQueryThrottler_DryRunMode tests that dry-run mode logs decisions but doesn't throttle queries.
162+
func TestQueryThrottler_DryRunMode(t *testing.T) {
162163
tests := []struct {
163-
name string
164-
enabled bool
165-
dryRun bool
166-
throttleDecision registry.ThrottleDecision
167-
expectError bool
168-
expectDryRunLog bool
169-
expectedLogMsg string
164+
name string
165+
enabled bool
166+
dryRun bool
167+
throttleDecision registry.ThrottleDecision
168+
expectError bool
169+
expectDryRunLog bool
170+
expectedLogMsg string
171+
expectedTotalRequests int64
172+
expectedThrottledRequests int64
170173
}{
171174
{
172175
name: "Disabled throttler - no checks performed",
@@ -198,8 +201,9 @@ func TestIncomingQueryThrottler_DryRunMode(t *testing.T) {
198201
Throttle: false,
199202
Message: "Query allowed",
200203
},
201-
expectError: false,
202-
expectDryRunLog: false,
204+
expectError: false,
205+
expectDryRunLog: false,
206+
expectedTotalRequests: 1,
203207
},
204208
{
205209
name: "Normal mode - query throttled",
@@ -213,8 +217,10 @@ func TestIncomingQueryThrottler_DryRunMode(t *testing.T) {
213217
Threshold: 80.0,
214218
ThrottlePercentage: 1.0,
215219
},
216-
expectError: true,
217-
expectDryRunLog: false,
220+
expectError: true,
221+
expectDryRunLog: false,
222+
expectedTotalRequests: 1,
223+
expectedThrottledRequests: 1,
218224
},
219225
{
220226
name: "Dry-run mode - query would be throttled but allowed",
@@ -228,9 +234,11 @@ func TestIncomingQueryThrottler_DryRunMode(t *testing.T) {
228234
Threshold: 80.0,
229235
ThrottlePercentage: 1.0,
230236
},
231-
expectError: false,
232-
expectDryRunLog: true,
233-
expectedLogMsg: "[DRY-RUN] Query throttled: metric=cpu value=95.0 threshold=80.0",
237+
expectError: false,
238+
expectDryRunLog: true,
239+
expectedLogMsg: "[DRY-RUN] Query throttled: metric=cpu value=95.0 threshold=80.0, metric name: cpu, metric value: 95.000000",
240+
expectedTotalRequests: 1,
241+
expectedThrottledRequests: 1,
234242
},
235243
{
236244
name: "Dry-run mode - query allowed normally",
@@ -240,8 +248,9 @@ func TestIncomingQueryThrottler_DryRunMode(t *testing.T) {
240248
Throttle: false,
241249
Message: "Query allowed",
242250
},
243-
expectError: false,
244-
expectDryRunLog: false,
251+
expectError: false,
252+
expectDryRunLog: false,
253+
expectedTotalRequests: 1,
245254
},
246255
}
247256

@@ -252,6 +261,8 @@ func TestIncomingQueryThrottler_DryRunMode(t *testing.T) {
252261
decision: tt.throttleDecision,
253262
}
254263

264+
env := tabletenv.NewEnv(vtenv.NewTestEnv(), &tabletenv.TabletConfig{}, "TestThrottler")
265+
255266
// Create throttler with controlled config
256267
iqt := &QueryThrottler{
257268
ctx: context.Background(),
@@ -260,8 +271,18 @@ func TestIncomingQueryThrottler_DryRunMode(t *testing.T) {
260271
DryRun: tt.dryRun,
261272
},
262273
strategy: mockStrategy,
274+
env: env,
275+
stats: Stats{
276+
requestsTotal: env.Exporter().NewCountersWithMultiLabels(queryThrottlerAppName+"Requests", "TestThrottler requests", []string{"Strategy", "Workload", "Priority"}),
277+
requestsThrottled: env.Exporter().NewCountersWithMultiLabels(queryThrottlerAppName+"Throttled", "TestThrottler throttled", []string{"Strategy", "Workload", "Priority", "MetricName", "MetricValue", "DryRun"}),
278+
totalLatency: env.Exporter().NewMultiTimings(queryThrottlerAppName+"TotalLatencyMs", "Total latency of QueryThrottler.Throttle in milliseconds", []string{"Strategy", "Workload", "Priority"}),
279+
evaluateLatency: env.Exporter().NewMultiTimings(queryThrottlerAppName+"EvaluateLatencyMs", "Latency from Throttle entry to completion of Evaluate in milliseconds", []string{"Strategy", "Workload", "Priority"}),
280+
},
263281
}
264282

283+
iqt.stats.requestsTotal.ResetAll()
284+
iqt.stats.requestsThrottled.ResetAll()
285+
265286
// Capture log output
266287
logCapture := &testLogCapture{}
267288
originalLogWarningf := log.Warningf
@@ -299,6 +320,12 @@ func TestIncomingQueryThrottler_DryRunMode(t *testing.T) {
299320
} else {
300321
require.Empty(t, logCapture.logs, "Expected no log messages")
301322
}
323+
324+
// Verify stats expectation
325+
totalRequests := stats.CounterForDimension(iqt.stats.requestsTotal, "Strategy")
326+
throttledRequests := stats.CounterForDimension(iqt.stats.requestsThrottled, "Strategy")
327+
require.Equal(t, tt.expectedTotalRequests, totalRequests.Counts()["MockStrategy"], "Total requests should match expected")
328+
require.Equal(t, tt.expectedThrottledRequests, throttledRequests.Counts()["MockStrategy"], "Throttled requests should match expected")
302329
})
303330
}
304331
}

go/vt/vttablet/tabletserver/querythrottler/registry/noop_strategy.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,8 @@ func (s *NoOpStrategy) Start() {
5151
func (s *NoOpStrategy) Stop() {
5252
// No-op: NoOpStrategy has no resources to clean up
5353
}
54+
55+
// GetStrategyName returns the name of the strategy.
56+
func (s *NoOpStrategy) GetStrategyName() string {
57+
return string(ThrottlingStrategyUnknown)
58+
}

go/vt/vttablet/tabletserver/querythrottler/registry/noop_strategy_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,3 +91,8 @@ func TestNoOpStrategy_Evaluate(t *testing.T) {
9191
})
9292
}
9393
}
94+
95+
func TestNoOpStrategy_GetStrategyName(t *testing.T) {
96+
strategy := &NoOpStrategy{}
97+
require.Equal(t, string(ThrottlingStrategyUnknown), strategy.GetStrategyName())
98+
}

go/vt/vttablet/tabletserver/querythrottler/registry/throttling_handler.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,7 @@ type ThrottlingStrategyHandler interface {
5858
// This method should be called when the strategy is no longer needed.
5959
// Implementations should clean up background processes, caches, or other resources.
6060
Stop()
61+
62+
// GetStrategyName returns the name of the strategy.
63+
GetStrategyName() string
6164
}

0 commit comments

Comments
 (0)