Skip to content

Commit 5524bf4

Browse files
DifferentialOrangeTotktonada
authored andcommitted
stats: make quantile age params configurable
The main motivation of making these parameters configurable is to be able to write time-adequate test cases for #286 issue, but they may be useful in getting rid of #286 effect at all or in quantile time window calibration. After this patch, statistics summary quantile aging params `age_bucket_count` and `max_age_time` [1] could be configured: crud.cfg{ stats_quantile_age_bucket_count = 3, stats_quantile_max_age_time = 30, } Only type validation is conducted in crud.cfg, every other validation is performed by metrics itself. 1. https://www.tarantool.io/ru/doc/latest/book/monitoring/api_reference/#summary Part of #286
1 parent 2f39734 commit 5524bf4

File tree

9 files changed

+293
-13
lines changed

9 files changed

+293
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
88
## [Unreleased]
99

1010
### Added
11+
* Make metrics quantile collector age params configurable (#286).
1112

1213
### Changed
1314

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,17 @@ crud.cfg{stats_quantile_tolerated_error = 1e-4}
797797
```
798798
See [tarantool/metrics#189](https://github.com/tarantool/metrics/issues/189) for
799799
details about the issue.
800-
800+
You can also configure quantile `age_bucket_count` (default: 2) and
801+
`max_age_time` (in seconds, default: 60):
802+
```lua
803+
crud.cfg{
804+
stats_quantile_age_bucket_count = 3,
805+
stats_quantile_max_age_time = 30,
806+
}
807+
```
808+
See [`metrics` summary API](https://www.tarantool.io/ru/doc/latest/book/monitoring/api_reference/#summary)
809+
for details. These parameters can be used to smooth time window move
810+
or reduce the amount on `-nan` gaps for low request frequency applications.
801811

802812
`select` section additionally contains `details` collectors.
803813
```lua

crud/cfg.lua

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ local function set_defaults_if_empty(cfg)
2929
cfg.stats_quantile_tolerated_error = stats.DEFAULT_QUANTILE_TOLERATED_ERROR
3030
end
3131

32+
if cfg.stats_quantile_age_buckets_count == nil then
33+
cfg.stats_quantile_age_buckets_count = stats.DEFAULT_QUANTILE_AGE_BUCKET_COUNT
34+
end
35+
36+
if cfg.stats_quantile_max_age_time == nil then
37+
cfg.stats_quantile_max_age_time = stats.DEFAULT_QUANTILE_MAX_AGE_TIME
38+
end
39+
3240
return cfg
3341
end
3442

@@ -38,7 +46,9 @@ local function configure_stats(cfg, opts)
3846
if (opts.stats == nil)
3947
and (opts.stats_driver == nil)
4048
and (opts.stats_quantiles == nil)
41-
and (opts.stats_quantile_tolerated_error == nil) then
49+
and (opts.stats_quantile_tolerated_error == nil)
50+
and (opts.stats_quantile_age_buckets_count == nil)
51+
and (opts.stats_quantile_max_age_time == nil) then
4252
return
4353
end
4454

@@ -58,11 +68,21 @@ local function configure_stats(cfg, opts)
5868
opts.stats_quantile_tolerated_error = cfg.stats_quantile_tolerated_error
5969
end
6070

71+
if opts.stats_quantile_age_buckets_count == nil then
72+
opts.stats_quantile_age_buckets_count = cfg.stats_quantile_age_buckets_count
73+
end
74+
75+
if opts.stats_quantile_max_age_time == nil then
76+
opts.stats_quantile_max_age_time = cfg.stats_quantile_max_age_time
77+
end
78+
6179
if opts.stats == true then
6280
stats.enable{
6381
driver = opts.stats_driver,
6482
quantiles = opts.stats_quantiles,
6583
quantile_tolerated_error = opts.stats_quantile_tolerated_error,
84+
quantile_age_buckets_count = opts.stats_quantile_age_buckets_count,
85+
quantile_max_age_time = opts.stats_quantile_max_age_time,
6686
}
6787
else
6888
stats.disable()
@@ -72,6 +92,8 @@ local function configure_stats(cfg, opts)
7292
rawset(cfg, 'stats_driver', opts.stats_driver)
7393
rawset(cfg, 'stats_quantiles', opts.stats_quantiles)
7494
rawset(cfg, 'stats_quantile_tolerated_error', opts.stats_quantile_tolerated_error)
95+
rawset(cfg, 'stats_quantile_age_buckets_count', opts.stats_quantile_age_buckets_count)
96+
rawset(cfg, 'stats_quantile_max_age_time', opts.stats_quantile_max_age_time)
7597
end
7698

7799
--- Configure CRUD module.
@@ -107,6 +129,21 @@ end
107129
-- See https://github.com/tarantool/metrics/issues/189 for issue details.
108130
-- Decreasing the value increases computational load.
109131
--
132+
-- @number[opt=2] opts.stats_quantile_age_buckets_count
133+
-- Count of summary quantile buckets.
134+
-- See tarantool/metrics summary API for details:
135+
-- https://www.tarantool.io/ru/doc/latest/book/monitoring/api_reference/#summary
136+
-- Increasing the value smoothes time window move,
137+
-- but consumes additional memory and CPU.
138+
--
139+
-- @number[opt=60] opts.stats_quantile_max_age_time
140+
-- Duration of each bucket’s lifetime in seconds.
141+
-- See tarantool/metrics summary API for details:
142+
-- https://www.tarantool.io/ru/doc/latest/book/monitoring/api_reference/#summary
143+
-- Smaller bucket lifetime results in smaller time window for quantiles,
144+
-- but more CPU is spent on bucket rotation. If your application has low request
145+
-- frequency, increase the value to reduce the amount of `-nan` gaps in quantile values.
146+
--
110147
-- @return Configuration table.
111148
--
112149
local function __call(self, opts)
@@ -115,6 +152,8 @@ local function __call(self, opts)
115152
stats_driver = '?string',
116153
stats_quantiles = '?boolean',
117154
stats_quantile_tolerated_error = '?number',
155+
stats_quantile_age_buckets_count = '?number',
156+
stats_quantile_max_age_time = '?number',
118157
})
119158

120159
opts = table.deepcopy(opts) or {}

crud/stats/init.lua

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,30 @@ end
9090
-- If quantile value is -Inf, try to decrease quantile tolerated error.
9191
-- See https://github.com/tarantool/metrics/issues/189 for issue details.
9292
--
93+
-- @number[opt=2] opts.quantile_age_buckets_count
94+
-- Count of summary quantile buckets.
95+
-- See tarantool/metrics summary API for details:
96+
-- https://www.tarantool.io/ru/doc/latest/book/monitoring/api_reference/#summary
97+
-- Increasing the value smoothes time window move,
98+
-- but consumes additional memory and CPU.
99+
--
100+
-- @number[opt=60] opts.quantile_max_age_time
101+
-- Duration of each bucket’s lifetime in seconds.
102+
-- See tarantool/metrics summary API for details:
103+
-- https://www.tarantool.io/ru/doc/latest/book/monitoring/api_reference/#summary
104+
-- Smaller bucket lifetime results in smaller time window for quantiles,
105+
-- but more CPU is spent on bucket rotation. If your application has low request
106+
-- frequency, increase the value to reduce the amount of `-nan` gaps in quantile values.
107+
--
93108
-- @treturn boolean Returns `true`.
94109
--
95110
function stats.enable(opts)
96111
checks({
97112
driver = '?string',
98113
quantiles = '?boolean',
99114
quantile_tolerated_error = '?number',
115+
quantile_age_buckets_count = '?number',
116+
quantile_max_age_time = '?number',
100117
})
101118

102119
StatsError:assert(
@@ -122,10 +139,20 @@ function stats.enable(opts)
122139
opts.quantile_tolerated_error = stats.DEFAULT_QUANTILE_TOLERATED_ERROR
123140
end
124141

142+
if opts.quantile_age_buckets_count == nil then
143+
opts.quantile_age_buckets_count = stats.DEFAULT_QUANTILE_AGE_BUCKET_COUNT
144+
end
145+
146+
if opts.quantile_max_age_time == nil then
147+
opts.quantile_max_age_time = stats.DEFAULT_QUANTILE_MAX_AGE_TIME
148+
end
149+
125150
-- Do not reinit if called with same options.
126151
if internal.driver == opts.driver
127152
and internal.quantiles == opts.quantiles
128-
and internal.quantile_tolerated_error == opts.quantile_tolerated_error then
153+
and internal.quantile_tolerated_error == opts.quantile_tolerated_error
154+
and internal.quantile_age_buckets_count == opts.quantile_age_buckets_count
155+
and internal.quantile_max_age_time == opts.quantile_max_age_time then
129156
return true
130157
end
131158

@@ -136,11 +163,15 @@ function stats.enable(opts)
136163

137164
internal:get_registry().init{
138165
quantiles = opts.quantiles,
139-
quantile_tolerated_error = opts.quantile_tolerated_error
166+
quantile_tolerated_error = opts.quantile_tolerated_error,
167+
quantile_age_buckets_count = opts.quantile_age_buckets_count,
168+
quantile_max_age_time = opts.quantile_max_age_time,
140169
}
141170

142171
internal.quantiles = opts.quantiles
143172
internal.quantile_tolerated_error = opts.quantile_tolerated_error
173+
internal.quantile_age_buckets_count = opts.quantile_age_buckets_count
174+
internal.quantile_max_age_time = opts.quantile_max_age_time
144175

145176
return true
146177
end
@@ -162,7 +193,9 @@ function stats.reset()
162193
internal:get_registry().destroy()
163194
internal:get_registry().init{
164195
quantiles = internal.quantiles,
165-
quantile_tolerated_error = internal.quantile_tolerated_error
196+
quantile_tolerated_error = internal.quantile_tolerated_error,
197+
quantile_age_buckets_count = internal.quantile_age_buckets_count,
198+
quantile_max_age_time = internal.quantile_max_age_time,
166199
}
167200

168201
return true
@@ -184,6 +217,9 @@ function stats.disable()
184217
internal:get_registry().destroy()
185218
internal.driver = nil
186219
internal.quantiles = nil
220+
internal.quantile_tolerated_error = nil
221+
internal.quantile_age_buckets_count = nil
222+
internal.quantile_max_age_time = nil
187223

188224
return true
189225
end
@@ -495,4 +531,10 @@ stats.internal = internal
495531
--- Default metrics quantile precision.
496532
stats.DEFAULT_QUANTILE_TOLERATED_ERROR = 1e-3
497533

534+
--- Default metrics quantile bucket count.
535+
stats.DEFAULT_QUANTILE_AGE_BUCKET_COUNT = 2
536+
537+
--- Default metrics quantile bucket lifetime.
538+
stats.DEFAULT_QUANTILE_MAX_AGE_TIME = 60
539+
498540
return stats

crud/stats/local_registry.lua

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,20 @@ local StatsLocalError = errors.new_class('StatsLocalError', {capture_stack = fal
2828
-- @number opts.quantile_tolerated_error
2929
-- Quantiles is not supported for local, so the value is ignored.
3030
--
31+
-- @number opts.quantile_age_buckets_count
32+
-- Quantiles is not supported for local, so the value is ignored.
33+
--
34+
-- @number opts.quantile_max_age_time
35+
-- Quantiles is not supported for local, so the value is ignored.
36+
--
3137
-- @treturn boolean Returns `true`.
3238
--
3339
function registry.init(opts)
3440
dev_checks({
3541
quantiles = 'boolean',
3642
quantile_tolerated_error = 'number',
43+
quantile_age_buckets_count = 'number',
44+
quantile_max_age_time = 'number',
3745
})
3846

3947
StatsLocalError:assert(opts.quantiles == false,

crud/stats/metrics_registry.lua

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,6 @@ local metric_name = {
3131

3232
local LATENCY_QUANTILE = 0.99
3333

34-
local DEFAULT_AGE_PARAMS = {
35-
age_buckets_count = 2,
36-
max_age_time = 60,
37-
}
38-
3934
--- Check if application supports metrics rock for registry
4035
--
4136
-- `metrics >= 0.10.0` is required.
@@ -87,19 +82,39 @@ end
8782
-- If quantile value is -Inf, try to decrease quantile tolerated error.
8883
-- See https://github.com/tarantool/metrics/issues/189 for issue details.
8984
--
85+
-- @number[opt=2] opts.quantile_age_buckets_count
86+
-- Count of summary quantile buckets.
87+
-- See tarantool/metrics summary API for details:
88+
-- https://www.tarantool.io/ru/doc/latest/book/monitoring/api_reference/#summary
89+
-- Increasing the value smoothes time window move,
90+
-- but consumes additional memory and CPU.
91+
--
92+
-- @number[opt=60] opts.quantile_max_age_time
93+
-- Duration of each bucket’s lifetime in seconds.
94+
-- See tarantool/metrics summary API for details:
95+
-- https://www.tarantool.io/ru/doc/latest/book/monitoring/api_reference/#summary
96+
-- Smaller bucket lifetime results in smaller time window for quantiles,
97+
-- but more CPU is spent on bucket rotation. If your application has low request
98+
-- frequency, increase the value to reduce the amount of `-nan` gaps in quantile values.
99+
--
90100
-- @treturn boolean Returns `true`.
91101
--
92102
function registry.init(opts)
93103
dev_checks({
94104
quantiles = 'boolean',
95105
quantile_tolerated_error = 'number',
106+
quantile_age_buckets_count = 'number',
107+
quantile_max_age_time = 'number',
96108
})
97109

98110
local quantile_params = nil
99111
local age_params = nil
100112
if opts.quantiles == true then
101113
quantile_params = {[LATENCY_QUANTILE] = opts.quantile_tolerated_error}
102-
age_params = DEFAULT_AGE_PARAMS
114+
age_params = {
115+
age_buckets_count = opts.quantile_age_buckets_count,
116+
max_age_time = opts.quantile_max_age_time,
117+
}
103118
end
104119

105120
internal.registry = {}

test/integration/cfg_test.lua

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ group.test_defaults = function(g)
2727
stats_driver = stats.get_default_driver(),
2828
stats_quantiles = false,
2929
stats_quantile_tolerated_error = 1e-3,
30+
stats_quantile_age_buckets_count = 2,
31+
stats_quantile_max_age_time = 60,
3032
})
3133
end
3234

@@ -100,3 +102,47 @@ group.test_gh_284_preset_stats_quantile_tolerated_error_is_preserved = function(
100102
t.assert_equals(cfg.stats_quantile_tolerated_error, 1e-4,
101103
'Preset stats_quantile_tolerated_error presents')
102104
end
105+
106+
group.test_gh_284_preset_stats_quantile_age_buckets_count_is_preserved = function(g)
107+
-- Arrange some cfg values so test case will not depend on defaults.
108+
local cfg = g.cluster:server('router'):eval(
109+
"return require('crud').cfg(...)",
110+
{{ stats = false }})
111+
t.assert_equals(cfg.stats, false)
112+
113+
-- Set stats_age_buckets_count.
114+
local cfg = g.cluster:server('router'):eval(
115+
"return require('crud').cfg(...)",
116+
{{ stats_quantile_age_buckets_count = 3 }})
117+
t.assert_equals(cfg.stats_quantile_age_buckets_count, 3)
118+
119+
-- Set another cfg parameter, assert preset stats_quantile_age_buckets_count presents.
120+
local cfg = g.cluster:server('router'):eval(
121+
"return require('crud').cfg(...)",
122+
{{ stats = true }})
123+
t.assert_equals(cfg.stats, true)
124+
t.assert_equals(cfg.stats_quantile_age_buckets_count, 3,
125+
'Preset stats_quantile_age_buckets_count presents')
126+
end
127+
128+
group.test_gh_284_preset_stats_quantile_max_age_time_is_preserved = function(g)
129+
-- Arrange some cfg values so test case will not depend on defaults.
130+
local cfg = g.cluster:server('router'):eval(
131+
"return require('crud').cfg(...)",
132+
{{ stats = false }})
133+
t.assert_equals(cfg.stats, false)
134+
135+
-- Set stats_age_buckets_count.
136+
local cfg = g.cluster:server('router'):eval(
137+
"return require('crud').cfg(...)",
138+
{{ stats_quantile_max_age_time = 30 }})
139+
t.assert_equals(cfg.stats_quantile_max_age_time, 30)
140+
141+
-- Set another cfg parameter, assert preset stats_quantile_max_age_time presents.
142+
local cfg = g.cluster:server('router'):eval(
143+
"return require('crud').cfg(...)",
144+
{{ stats = true }})
145+
t.assert_equals(cfg.stats, true)
146+
t.assert_equals(cfg.stats_quantile_max_age_time, 30,
147+
'Preset stats_quantile_max_age_time presents')
148+
end

test/integration/stats_test.lua

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,10 @@ local function enable_stats(g, params)
5555
require('crud').cfg{
5656
stats = true,
5757
stats_driver = params.driver,
58-
stats_quantiles = params.quantiles
58+
stats_quantiles = params.quantiles,
59+
stats_quantile_tolerated_error = 1e-3,
60+
stats_quantile_age_buckets_count = 3,
61+
stats_quantile_max_age_time = 60,
5962
}
6063
]], { params })
6164
end

0 commit comments

Comments
 (0)