Skip to content

wip: local k3d dev env (cAdvisor+kubelet) #1080

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions scripts/lgtm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

set -ex

# export time in milliseconds
# export OTEL_METRIC_EXPORT_INTERVAL=500

# use http instead of https (needed because of https://github.com/open-telemetry/opentelemetry-go/issues/4834)
# export OTEL_EXPORTER_OTLP_INSECURE="true"

# https://github.com/grafana/docker-otel-lgtm/tree/main/examples

# docker run -p 3001:3000 -p 4317:4317 -p 4318:4318 \
# -v ./provisioning/dashboards:/otel-lgtm/grafana/conf/provisioning/dashboards \
# -v ../dashboards_out:/kubernetes-mixin/dashboards_out \
# --rm -ti grafana/otel-lgtm

# set up 1-node k3d cluster
k3d cluster create kubernetes-mixin \
-v "$PWD"/provisioning:/kubernetes-mixin/provisioning \
-v "$PWD"/../dashboards_out:/kubernetes-mixin/dashboards_out

# run grafana, prometheus
# install dashboards in grafana
# wget https://raw.githubusercontent.com/grafana/docker-otel-lgtm/refs/heads/main/k8s/lgtm.yaml
kubectl apply -f lgtm.yaml
# kubectl port-forward service/lgtm 3001:3000 4317:4317 4318:4318

# scrape kube-state-metrics, node_exporter, cAdvisor, kubelet, kube-proxy, kube-apiserver, kube-controller-manager, kube-scheduler... write to prometheus
helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
helm repo update
helm upgrade --install otel-collector-deployment open-telemetry/opentelemetry-collector \
-n default \
-f otel-collector-deployment.values.yaml

# TODO install kube-state-metrics, node_exporter

# TODO OATs:
# https://github.com/grafana/oats
# test metrics in prometheus
# test recording rules in prometheus
# test alerting rules in prometheus

# TODO: e2e test dashboards?
94 changes: 94 additions & 0 deletions scripts/lgtm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# this is intended for demo / testing purposes only, not for production usage
apiVersion: v1
kind: Service
metadata:
name: lgtm
spec:
selector:
app: lgtm
ports:
- name: grafana
protocol: TCP
port: 3000
targetPort: 3000
- name: otel-grpc
protocol: TCP
port: 4317
targetPort: 4317
- name: otel-http
protocol: TCP
port: 4318
targetPort: 4318
- name: prometheus
protocol: TCP
port: 9090
targetPort: 9090
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: lgtm
spec:
replicas: 1
selector:
matchLabels:
app: lgtm
template:
metadata:
labels:
app: lgtm
spec:
containers:
- name: lgtm
image: grafana/otel-lgtm:latest
ports:
- containerPort: 3000
- containerPort: 4317
- containerPort: 4318
- containerPort: 9090
readinessProbe:
exec:
command:
- cat
- /tmp/ready
# NOTE: By default OpenShift does not allow writing the root directory.
# Thats why the data dirs for grafana, prometheus and loki can not be
# created and the pod never becomes ready.
# See: https://github.com/grafana/docker-otel-lgtm/issues/132
volumeMounts:
- name: tempo-data
mountPath: /data/tempo
- name: grafana-data
mountPath: /data/grafana
- name: loki-data
mountPath: /data/loki
- name: loki-storage
mountPath: /loki
- name: p8s-storage
mountPath: /data/prometheus
- name: pyroscope-storage
mountPath: /data/pyroscope

- name: dashboards
mountPath: /otel-lgtm/grafana/conf/provisioning/dashboards
- name: dashboards-out
mountPath: /kubernetes-mixin/dashboards_out
volumes:
- name: tempo-data
emptyDir: {}
- name: loki-data
emptyDir: {}
- name: grafana-data
emptyDir: {}
- name: loki-storage
emptyDir: {}
- name: p8s-storage
emptyDir: {}
- name: pyroscope-storage
emptyDir: {}
- name: dashboards
hostPath:
path: /kubernetes-mixin/provisioning/dashboards
- name: dashboards-out
hostPath:
path: /kubernetes-mixin/dashboards_out
141 changes: 141 additions & 0 deletions scripts/otel-collector-deployment.values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Based on the following guide:
# https://grafana.com/docs/grafana-cloud/monitor-infrastructure/kubernetes-monitoring/configuration/config-other-methods/otel-collector/
mode: deployment

image:
repository: otel/opentelemetry-collector-contrib

clusterRole:
create: true
rules:
- apiGroups:
- ''
resources:
- nodes
- nodes/proxy
- services
- endpoints
- pods
- events
- namespaces
- namespaces/status
- pods/status
- replicationcontrollers
- replicationcontrollers/status
- resourcequotas
verbs:
- get
- list
- watch
- nonResourceURLs:
- /metrics
verbs:
- get
- apiGroups:
- apps
resources:
- daemonsets
- deployments
- replicasets
- statefulsets
verbs:
- get
- list
- watch
- apiGroups:
- extensions
resources:
- daemonsets
- deployments
- replicasets
verbs:
- get
- list
- watch
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- get
- list
- watch
- apiGroups:
- autoscaling
resources:
- horizontalpodautoscalers
verbs:
- get
- list
- watch

config:
receivers:
prometheus:
config:
scrape_configs:
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
job_name: cadvisor
kubernetes_sd_configs:
- role: node
relabel_configs:
- replacement: kubernetes.default.svc.cluster.local:443
target_label: __address__
- regex: (.+)
replacement: /api/v1/nodes/$${1}/proxy/metrics/cadvisor
source_labels:
- __meta_kubernetes_node_name
target_label: __metrics_path__
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: false
server_name: kubernetes

- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
job_name: kubelet
kubernetes_sd_configs:
- role: node
relabel_configs:
- replacement: kubernetes.default.svc.cluster.local:443
target_label: __address__
- regex: (.+)
replacement: /api/v1/nodes/$${1}/proxy/metrics
source_labels:
- __meta_kubernetes_node_name
target_label: __metrics_path__
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: false
server_name: kubernetes

processors:
batch: {}

exporters:
prometheusremotewrite:
external_labels:
cluster: 'kubernetes-mixin'
endpoint: 'http://lgtm:9090/api/v1/write'

prometheus:
endpoint: "0.0.0.0:8889"
resource_to_telemetry_conversion:
enabled: true

service:
extensions: [health_check]
pipelines:
metrics:
receivers: [prometheus]
processors: [batch]
exporters: [prometheus, prometheusremotewrite]

ports:
prometheus:
enabled: true
containerPort: 8889
servicePort: 8889
protocol: TCP
9 changes: 9 additions & 0 deletions scripts/provisioning/dashboards/dashboards.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: 1

providers:
- name: dashboards
type: file
updateIntervalSeconds: 10
options:
path: /kubernetes-mixin/dashboards_out
foldersFromFilesStructure: true