|
1 | | -{ |
2 | | - prometheusAlerts+:: { |
3 | | - groups+: [{ |
4 | | - name: 'cert-manager', |
5 | | - rules: [ |
6 | | - { |
7 | | - alert: 'CertManagerAbsent', |
8 | | - expr: 'absent(up{job="%(certManagerJobLabel)s"})' % $._config, |
9 | | - 'for': '10m', |
10 | | - labels: { |
11 | | - severity: 'critical', |
12 | | - }, |
13 | | - annotations: { |
14 | | - summary: 'Cert Manager has dissapeared from Prometheus service discovery.', |
15 | | - description: "New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back.", |
16 | | - runbook_url: 'https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent', |
17 | | - }, |
18 | | - }, |
19 | | - { |
20 | | - alert: 'CertManagerCertExpirySoon', |
21 | | - expr: ||| |
22 | | - avg by (exported_namespace, namespace, name) ( |
23 | | - certmanager_certificate_expiration_timestamp_seconds - time() |
24 | | - ) < (%s * 24 * 3600) # 21 days in seconds |
25 | | - ||| % $._config.certManagerCertExpiryDays, |
26 | | - 'for': '1h', |
27 | | - labels: { |
28 | | - severity: 'warning', |
29 | | - }, |
30 | | - annotations: { |
31 | | - summary: 'The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry, it should have renewed over a week ago.', |
32 | | - description: 'The domain that this cert covers will be unavailable after {{ $value | humanizeDuration }}. Clients using endpoints that this cert protects will start to fail in {{ $value | humanizeDuration }}.', |
33 | | - dashboard_url: $._config.grafanaExternalUrl + '/d/TvuRo2iMk/cert-manager', |
34 | | - runbook_url: 'https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#CertManagerCertExpirySoon', |
35 | | - }, |
36 | | - }, |
37 | | - { |
38 | | - alert: 'CertManagerCertNotReady', |
39 | | - expr: ||| |
40 | | - max by (name, exported_namespace, namespace, condition) ( |
41 | | - certmanager_certificate_ready_status{condition!="True"} == 1 |
42 | | - ) |
43 | | - |||, |
44 | | - 'for': '10m', |
45 | | - labels: { |
46 | | - severity: 'critical', |
47 | | - }, |
48 | | - annotations: { |
49 | | - summary: 'The cert `{{ $labels.name }}` is not ready to serve traffic.', |
50 | | - description: 'This certificate has not been ready to serve traffic for at least 10m. If the cert is being renewed or there is another valid cert, the ingress controller _may_ be able to serve that instead.', |
51 | | - dashboard_url: $._config.grafanaExternalUrl + '/d/TvuRo2iMk/cert-manager', |
52 | | - runbook_url: 'https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#CertManagerCertNotReady', |
53 | | - }, |
54 | | - }, |
55 | | - { |
56 | | - alert: 'CertManagerCertExpiryMetricMissing', |
57 | | - expr: 'absent(certmanager_certificate_expiration_timestamp_seconds)', |
58 | | - 'for': '10m', |
59 | | - labels: { |
60 | | - severity: 'info', |
61 | | - }, |
62 | | - annotations: { |
63 | | - summary: 'The metric used to observe cert-manager cert expiry is missing.', |
64 | | - description: 'We are blind as to whether or not we can alert on certificates expiring. It could also be the case that there have not had any Certificate CRDs created.', |
65 | | - dashboard_url: $._config.grafanaExternalUrl + '/d/TvuRo2iMk/cert-manager', |
66 | | - runbook_url: 'https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#CertManagerCertExpiryMetricMissing', |
67 | | - }, |
68 | | - }, |
69 | | - { |
70 | | - alert: 'CertManagerHittingRateLimits', |
71 | | - expr: ||| |
72 | | - sum by (host) ( |
73 | | - rate(certmanager_http_acme_client_request_count{status="429"}[5m]) |
74 | | - ) > 0 |
75 | | - |||, |
76 | | - 'for': '5m', |
77 | | - labels: { |
78 | | - severity: 'critical', |
79 | | - }, |
80 | | - annotations: { |
81 | | - summary: 'Cert manager hitting LetsEncrypt rate limits.', |
82 | | - description: 'Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week.', |
83 | | - dashboard_url: $._config.grafanaExternalUrl + '/d/TvuRo2iMk/cert-manager', |
84 | | - runbook_url: 'https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#CertManagerHittingRateLimits', |
85 | | - }, |
86 | | - }, |
87 | | - ], |
88 | | - }], |
89 | | - }, |
90 | | -} |
| 1 | +(import 'absent.libsonnet') + |
| 2 | +(import 'certificates.libsonnet') + |
| 3 | +(import 'add-runbook-links.libsonnet') |
0 commit comments