Skip to content

[8.19] (backport #7754) Set fullnameOverride and validate Otel kube-stack deployment datastreams are being written (metrics, traces) #8002

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: feature

# Change summary; a 80ish characters long description of the change.
summary: set collectors fullnameOverride for edot kube-stack values

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/7754

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
issue: https://github.com/elastic/elastic-agent/issues/7381
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ collectors:
# Cluster is a K8s deployment EDOT collector focused on gathering telemetry
# at the cluster level (Kubernetes Events and cluster metrics).
cluster:
fullnameOverride: "opentelemetry-kube-stack-cluster-stats"
env:
- name: ELASTIC_AGENT_OTEL
value: '"true"'
Expand Down Expand Up @@ -184,6 +185,7 @@ collectors:
# node level and exposing an OTLP endpoint for data ingestion.
# Auto-instrumentation SDKs will use this endpoint.
daemon:
fullnameOverride: "opentelemetry-kube-stack-daemon"
env:
# Work around for open /mounts error: https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/35990
- name: HOST_PROC_MOUNTINFO
Expand Down Expand Up @@ -525,6 +527,7 @@ collectors:
# Gateway is a K8s deployment EDOT collector focused on processing and
# forwarding telemetry to an Elasticsearch endpoint.
gateway:
fullnameOverride: "opentelemetry-kube-stack-gateway"
suffix: gateway
replicas: 2
autoscaler:
Expand Down
3 changes: 3 additions & 0 deletions deploy/helm/edot-collector/kube-stack/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ collectors:
# Cluster is a K8s deployment EDOT collector focused on gathering telemetry
# at the cluster level (Kubernetes Events and cluster metrics).
cluster:
fullnameOverride: "opentelemetry-kube-stack-cluster-stats"
env:
- name: ELASTIC_AGENT_OTEL
value: '"true"'
Expand Down Expand Up @@ -184,6 +185,7 @@ collectors:
# node level and exposing an OTLP endpoint for data ingestion.
# Auto-instrumentation SDKs will use this endpoint.
daemon:
fullnameOverride: "opentelemetry-kube-stack-daemon"
env:
# Work around for open /mounts error: https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/35990
- name: HOST_PROC_MOUNTINFO
Expand Down Expand Up @@ -525,6 +527,7 @@ collectors:
# Gateway is a K8s deployment EDOT collector focused on processing and
# forwarding telemetry to an Elasticsearch endpoint.
gateway:
fullnameOverride: "opentelemetry-kube-stack-gateway"
resources:
limits:
cpu: 1500m
Expand Down
62 changes: 20 additions & 42 deletions testing/integration/kubernetes_agent_standalone_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/stretchr/testify/require"

"github.com/elastic/elastic-agent-libs/kibana"
"github.com/elastic/elastic-agent-libs/testing/estools"
"github.com/elastic/go-elasticsearch/v8"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -861,7 +862,8 @@ func k8sCheckAgentStatus(ctx context.Context, client klient.Client, stdout *byte

// k8sGetAgentID returns the agent ID for the given agent pod
func k8sGetAgentID(ctx context.Context, client klient.Client, stdout *bytes.Buffer, stderr *bytes.Buffer,
namespace string, agentPodName string, containerName string) (string, error) {
namespace string, agentPodName string, containerName string,
) (string, error) {
command := []string{"elastic-agent", "status", "--output=json"}

status := atesting.AgentStatusOutput{} // clear status output
Expand Down Expand Up @@ -994,7 +996,7 @@ func k8sDumpPods(t *testing.T, ctx context.Context, client klient.Client, testNa
header := &tar.Header{
Name: logFileName,
Size: int64(len(b)),
Mode: 0600,
Mode: 0o600,
ModTime: time.Now(),
AccessTime: time.Now(),
ChangeTime: time.Now(),
Expand All @@ -1021,7 +1023,7 @@ func k8sDumpPods(t *testing.T, ctx context.Context, client klient.Client, testNa
header := &tar.Header{
Name: statesDumpFile,
Size: int64(len(b)),
Mode: 0600,
Mode: 0o600,
ModTime: time.Now(),
AccessTime: time.Now(),
ChangeTime: time.Now(),
Expand Down Expand Up @@ -1099,35 +1101,8 @@ func k8sRenderKustomize(kustomizePath string) ([]byte, error) {
}

// generateESAPIKey generates an API key for the given Elasticsearch.
func generateESAPIKey(esClient *elasticsearch.Client, keyName string) (string, error) {
apiKeyReqBody := fmt.Sprintf(`{
"name": "%s",
"expiration": "1d"
}`, keyName)

resp, err := esClient.Security.CreateAPIKey(strings.NewReader(apiKeyReqBody))
if err != nil {
return "", err
}
defer resp.Body.Close()

response := make(map[string]interface{})
err = json.NewDecoder(resp.Body).Decode(&response)
if err != nil {
return "", err
}

keyToken := response["api_key"].(string)
if keyToken == "" {
return "", fmt.Errorf("key token is empty")
}

keyID := response["id"].(string)
if keyID == "" {
return "", fmt.Errorf("key ID is empty")
}

return fmt.Sprintf("%s:%s", keyID, keyToken), nil
func generateESAPIKey(esClient *elasticsearch.Client, keyName string) (estools.APIKeyResponse, error) {
return estools.CreateAPIKey(context.Background(), esClient, estools.APIKeyRequest{Name: keyName, Expiration: "1d"})
}

// k8sDeleteOpts contains options for deleting k8s objects
Expand Down Expand Up @@ -1327,6 +1302,8 @@ type k8sContext struct {
esHost string
// esAPIKey is the API key of the elasticsearch to use in the test
esAPIKey string
// esEncodedAPIKey is the encoded API key of the elasticsearch to use in the test
esEncodedAPIKey string
// enrollParams contains the information needed to enroll an agent with Fleet in the test
enrollParams *fleettools.EnrollParams
// createdAt is the time when the k8sContext was created
Expand Down Expand Up @@ -1412,16 +1389,17 @@ func k8sGetContext(t *testing.T, info *define.Info) k8sContext {
require.NoError(t, err, "failed to create fleet enroll params")

return k8sContext{
client: client,
clientSet: clientSet,
agentImage: agentImage,
agentImageRepo: agentImageRepo,
agentImageTag: agentImageTag,
logsBasePath: testLogsBasePath,
esHost: esHost,
esAPIKey: esAPIKey,
enrollParams: enrollParams,
createdAt: time.Now(),
client: client,
clientSet: clientSet,
agentImage: agentImage,
agentImageRepo: agentImageRepo,
agentImageTag: agentImageTag,
logsBasePath: testLogsBasePath,
esHost: esHost,
esAPIKey: esAPIKey.APIKey,
esEncodedAPIKey: esAPIKey.Encoded,
enrollParams: enrollParams,
createdAt: time.Now(),
}
}

Expand Down
87 changes: 84 additions & 3 deletions testing/integration/otel_helm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@
package integration

import (
"bufio"
"bytes"
"context"
"fmt"
"os"
"path/filepath"
"testing"
"time"

Expand All @@ -20,7 +24,9 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/elastic/elastic-agent-libs/testing/estools"
"github.com/elastic/elastic-agent/pkg/testing/define"
testK8s "github.com/elastic/elastic-agent/pkg/testing/kubernetes"
)

var (
Expand All @@ -37,6 +43,9 @@ func TestOtelKubeStackHelm(t *testing.T) {
// only test the basic and the wolfi container with otel
{Type: define.Kubernetes, DockerVariant: "basic"},
{Type: define.Kubernetes, DockerVariant: "wolfi"},
// elastic otel collector image
{Type: define.Kubernetes, DockerVariant: "elastic-otel-collector"},
{Type: define.Kubernetes, DockerVariant: "elastic-otel-collector-wolfi"},
},
Group: define.Kubernetes,
})
Expand Down Expand Up @@ -64,12 +73,17 @@ func TestOtelKubeStackHelm(t *testing.T) {
k8sStepHelmDeployWithValueOptions(chartLocation, "kube-stack-otel",
values.Options{
ValueFiles: []string{"../../deploy/helm/edot-collector/kube-stack/values.yaml"},
Values: []string{fmt.Sprintf("defaultCRConfig.image.repository=%s", kCtx.agentImageRepo), fmt.Sprintf("defaultCRConfig.image.tag=%s", kCtx.agentImageTag)},

Values: []string{
fmt.Sprintf("defaultCRConfig.image.repository=%s", kCtx.agentImageRepo),
fmt.Sprintf("defaultCRConfig.image.tag=%s", kCtx.agentImageTag),
// override cluster wide
// endpoint for tests
"instrumentation.exporter.endpoint=http://opentelemetry-kube-stack-daemon-collector:4318",
},
// override secrets reference with env variables
JSONValues: []string{
fmt.Sprintf(`collectors.gateway.env[1]={"name":"ELASTIC_ENDPOINT","value":"%s"}`, kCtx.esHost),
fmt.Sprintf(`collectors.gateway.env[2]={"name":"ELASTIC_API_KEY","value":"%s"}`, kCtx.esAPIKey),
fmt.Sprintf(`collectors.gateway.env[2]={"name":"ELASTIC_API_KEY","value":"%s"}`, kCtx.esEncodedAPIKey),
},
},
),
Expand All @@ -83,6 +97,15 @@ func TestOtelKubeStackHelm(t *testing.T) {
// - Two Gateway pods to collect, aggregate and forward
// telemetry.
k8sStepCheckRunningPods("app.kubernetes.io/managed-by=opentelemetry-operator", 4, "otc-container"),
// validate kubeletstats metrics are being
// pushed
k8sStepCheckDatastreamsHits(info, "metrics", "kubeletstatsreceiver.otel", "default"),
// validates auto-instrumentation and traces
// datastream generation
func(t *testing.T, ctx context.Context, kCtx k8sContext, namespace string) {
k8sStepDeployJavaApp()(t, ctx, kCtx, namespace)
k8sStepCheckDatastreamsHits(info, "traces", "generic.otel", "default")(t, ctx, kCtx, namespace)
},
},
},
{
Expand Down Expand Up @@ -175,3 +198,61 @@ func k8sStepCheckRunningPods(podLabelSelector string, expectedPodNumber int, con
}, 5*time.Minute, 10*time.Second, fmt.Sprintf("at least %d agent containers should be checked", expectedPodNumber))
}
}

func k8sStepDeployJavaApp() k8sTestStep {
return func(t *testing.T, ctx context.Context, kCtx k8sContext, namespace string) {
javaApp, err := os.ReadFile(filepath.Join("testdata", "java_app.yaml"))
require.NoError(t, err)

objects, err := testK8s.LoadFromYAML(bufio.NewReader(bytes.NewReader(javaApp)))
require.NoError(t, err, "failed to parse rendered kustomize")

err = k8sCreateObjects(ctx, kCtx.client, k8sCreateOpts{wait: true, namespace: namespace}, objects...)
require.NoError(t, err, "failed to create objects")
}
}

// k8sStepCheckDatastreams checks the corresponding Elasticsearch datastreams
// are created and documents being written
func k8sStepCheckDatastreamsHits(info *define.Info, dsType, dataset, datastreamNamespace string) k8sTestStep {
return func(t *testing.T, ctx context.Context, kCtx k8sContext, namespace string) {
require.Eventually(t, func() bool {
query := queryK8sNamespaceDataStream(dsType, dataset, datastreamNamespace, namespace)
docs, err := estools.PerformQueryForRawQuery(ctx, query, fmt.Sprintf(".ds-%s*", dsType), info.ESClient)
require.NoError(t, err, "failed to get %s datastream documents", fmt.Sprintf("%s-%s-%s", dsType, dataset, datastreamNamespace))
return docs.Hits.Total.Value > 0
}, 5*time.Minute, 10*time.Second, fmt.Sprintf("at least one document should be available for %s datastream", fmt.Sprintf("%s-%s-%s", dsType, dataset, datastreamNamespace)))
}
}

func queryK8sNamespaceDataStream(dsType, dataset, datastreamNamespace, k8snamespace string) map[string]any {
return map[string]any{
"_source": []string{"message"},
"query": map[string]any{
"bool": map[string]any{
"filter": []any{
map[string]any{
"term": map[string]any{
"data_stream.dataset": dataset,
},
},
map[string]any{
"term": map[string]any{
"data_stream.namespace": datastreamNamespace,
},
},
map[string]any{
"term": map[string]any{
"data_stream.type": dsType,
},
},
map[string]any{
"term": map[string]any{
"resource.attributes.k8s.namespace.name": k8snamespace,
},
},
},
},
},
}
}
24 changes: 24 additions & 0 deletions testing/integration/testdata/java_app.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: java-app
name: java-app
spec:
replicas: 1
selector:
matchLabels:
app: java-app
template:
metadata:
labels:
app: java-app
annotations:
instrumentation.opentelemetry.io/inject-java: "true"
spec:
containers:
- name: java-app
image: docker.elastic.co/demos/apm/k8s-webhook-test
env:
- name: OTEL_INSTRUMENTATION_METHODS_INCLUDE
value: "test.Testing[methodB]"