Skip to content

feat: add config option to set service explorer attributes #202

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 24 additions & 21 deletions integration/scripts/test_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,20 @@
import utils as u


@u.print_test_decorator
def run_test_windows(remote_host: u.Host, env_vars: dict) -> None:
init_command = r'Set-Location "C:\Program Files\Observe\observe-agent"; ./observe-agent init-config --token {} --observe_url {} --cloud_resource_detectors ec2'.format(
def init_config_command(env_vars: dict) -> str:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change sets a resource attribute in the config for all tests. I had some yaml spacing issues in my diff originally, so I added extra logging; that is why the test files have other changes.

return "init-config --token {} --observe_url {} --cloud_resource_detectors ec2 --resource_attributes deployment.environment=test".format(
env_vars["observe_token"], env_vars["observe_url"]
)


def run_init_config_common(remote_host: u.Host, init_command: str) -> None:
# Print the config to be used first
result = remote_host.run_command(init_command + " --print")
print("Setting agent config:\n{}\n".format("=" * 21))
u.print_remote_result(result)
if result.exited != 0 or result.stderr:
raise ValueError("❌ Error in init-config print")

# Set up correct config with observe url and token
result = remote_host.run_command(init_command)
if result.exited != 0 or result.stderr:
Expand All @@ -16,30 +24,25 @@ def run_test_windows(remote_host: u.Host, env_vars: dict) -> None:


@u.print_test_decorator
def run_test_docker(remote_host: u.Host, env_vars: dict) -> None:
docker_prefix = u.get_docker_prefix(remote_host, False)
init_command = "{} init-config --token {} --observe_url {} --cloud_resource_detectors ec2".format(
docker_prefix, env_vars["observe_token"], env_vars["observe_url"]
def run_test_windows(remote_host: u.Host, env_vars: dict) -> None:
init_command = (
r'Set-Location "C:\Program Files\Observe\observe-agent"; ./observe-agent '
+ init_config_command(env_vars)
)
run_init_config_common(remote_host, init_command)

# Set up correct config with observe url and token
result = remote_host.run_command(init_command)
if result.exited != 0 or result.stderr:
u.print_remote_result(result)
raise ValueError("❌ Error in init-config")

@u.print_test_decorator
def run_test_docker(remote_host: u.Host, env_vars: dict) -> None:
docker_prefix = u.get_docker_prefix(remote_host, False)
init_command = docker_prefix + " " + init_config_command(env_vars)
run_init_config_common(remote_host, init_command)


@u.print_test_decorator
def run_test_linux(remote_host: u.Host, env_vars: dict) -> None:
init_command = "sudo observe-agent init-config --token {} --observe_url {} --cloud_resource_detectors ec2".format(
env_vars["observe_token"], env_vars["observe_url"]
)

# Set up correct config with observe url and token
result = remote_host.run_command(init_command)
if result.exited != 0 or result.stderr:
u.print_remote_result(result)
raise ValueError("❌ Error in init-config")
init_command = "sudo observe-agent " + init_config_command(env_vars)
run_init_config_common(remote_host, init_command)


if __name__ == "__main__":
Expand Down
34 changes: 19 additions & 15 deletions integration/scripts/test_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,28 @@


def _check_status_loop(
remote_host: u.Host, start_timeout: int, status_command: str
remote_host: u.Host, status_command: str, num_retries: int = 10, sleep_seconds: float = 1.5
) -> bool:
"""Run Check Status Command in a loop to wait for observe-agent to start

Args:
remote_host (Host): instance to ssh into
start_timeout (int): timeout in seconds to wait for agent to start
status_command (str): windows/linux status command to run
num_retries (int): number of times to check for the running agent before giving up
sleep_seconds (float): number of seconds to sleep between each retry

Returns:
bool: agent_status
"""

agent_status = False
for _ in range(start_timeout):
time.sleep(sleep_seconds)
for _ in range(num_retries):
metrics_dict = defaultdict(list)
try:
result = remote_host.run_command(status_command)
except Exception as e:
print("Ignoring exception: ", e)
time.sleep(1)
time.sleep(sleep_seconds)
continue
for line in result.stdout.splitlines():
if ":" in line:
Expand All @@ -44,10 +45,10 @@ def _check_status_loop(
break
print(
"❌ Observe Agent is not running. Retry Count is {}/{}...".format(
_ + 1, start_timeout
_ + 1, num_retries
)
)
time.sleep(1)
time.sleep(sleep_seconds)
return agent_status


Expand All @@ -65,7 +66,6 @@ def run_test_windows(remote_host: u.Host, env_vars: dict) -> None:
# status
start_command = r".\start_agent_windows.ps1"
status_command = r'Get-Service ObserveAgent;Set-Location "${Env:Programfiles}\Observe\observe-agent"; ./observe-agent status'
start_timeout = 30 # how long to wait for observe-agent to start

# Get windows home dir paths for consistency
home_dir = r"/C:/Users/{}".format(env_vars["user"]) # for user in sftp
Expand All @@ -82,15 +82,15 @@ def run_test_windows(remote_host: u.Host, env_vars: dict) -> None:
) # Eg: sftp to /C:/Users/Adminstrator/install_windows.ps1
# Run start_agent script
result = remote_host.run_command(start_command)
print(result)
u.print_remote_result(result)

if (
result.stderr
): # Powershell script failure does not cause command failure as the installation command succeeds so we need to check the stderr
raise RuntimeError("❌ Error in start_agent_windows.ps1 powershell script")

# Check Agent Status
agent_status = _check_status_loop(remote_host, start_timeout, status_command)
agent_status = _check_status_loop(remote_host, status_command)
if not agent_status:
u.die("❌ Error in Observe Agent Status Test ")

Expand All @@ -99,11 +99,11 @@ def run_test_windows(remote_host: u.Host, env_vars: dict) -> None:
def run_test_docker(remote_host: u.Host, env_vars: dict) -> None:
docker_prefix = u.get_docker_prefix(remote_host, True)
start_command = "start"
start_timeout = 30 # how long to wait for observe-agent to start

# Start Observe Agent
result = remote_host.run_command(docker_prefix + " " + start_command)
if result.stderr:
u.print_remote_result(result)
u.die("❌ Error starting observe-agent container")
else:
print("✅ Observe Agent started successfully: " + result.stdout)
Expand All @@ -113,7 +113,7 @@ def run_test_docker(remote_host: u.Host, env_vars: dict) -> None:
status_command = f"sudo docker exec {container_id} ./observe-agent status"

# Check Agent Status
agent_status = _check_status_loop(remote_host, start_timeout, status_command)
agent_status = _check_status_loop(remote_host, status_command)
if not agent_status:
u.die("❌ Error in Observe Agent Status Test ")

Expand All @@ -131,14 +131,18 @@ def run_test_linux(remote_host: u.Host, env_vars: dict) -> None:

start_command = "sudo systemctl enable --now observe-agent"
status_command = "observe-agent status"
start_timeout = 30 # how long to wait for observe-agent to start

# Start Observe Agent
remote_host.run_command(start_command)
result = remote_host.run_command(start_command)
u.print_remote_result(result)

# Check Agent Status
agent_status = _check_status_loop(remote_host, start_timeout, status_command)
agent_status = _check_status_loop(remote_host, status_command)
if not agent_status:
# If the agent never started up, try running start to see what the error is. Use unsafe because we expect a non-zero exit code.
u.print_remote_result(
remote_host.run_command_unsafe("timeout 10s sudo observe-agent start")
)
u.die("❌ Error in Observe Agent Status Test ")


Expand Down
17 changes: 11 additions & 6 deletions integration/scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,7 @@ def _get_connection(self) -> Connection:
)

def run_command(self, command) -> Result:
try:
with self._get_connection() as connection:
print("Running `{0}` on {1}".format(command, self.host_ip))
result = connection.run(command, warn=True, hide=True)
except (socket_error, AuthenticationException) as exc:
self._raise_authentication_err(exc)
result = self.run_command_unsafe(command)

if result.failed:
raise ExampleException(
Expand All @@ -188,6 +183,16 @@ def run_command(self, command) -> Result:

return result

def run_command_unsafe(self, command) -> Result:
try:
with self._get_connection() as connection:
print("Running `{0}` on {1}".format(command, self.host_ip))
result = connection.run(command, warn=True, hide=True)
except (socket_error, AuthenticationException) as exc:
self._raise_authentication_err(exc)

return result

def put_file(self, local_path, remote_path) -> None:
try:
with self._get_connection() as connection:
Expand Down
4 changes: 4 additions & 0 deletions internal/commands/initconfig/initconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ var (
token string
observe_url string
cloud_resource_detectors []string
resource_attributes map[string]string
self_monitoring_enabled bool
host_monitoring_enabled bool
host_monitoring_logs_enabled bool
Expand Down Expand Up @@ -83,6 +84,9 @@ func RegisterConfigFlags(cmd *cobra.Command, v *viper.Viper) {
cmd.PersistentFlags().StringSliceVar(&cloud_resource_detectors, "cloud_resource_detectors", []string{}, "The cloud environments from which to detect resources")
v.BindPFlag("cloud_resource_detectors", cmd.PersistentFlags().Lookup("cloud_resource_detectors"))

cmd.PersistentFlags().StringToStringVar(&resource_attributes, "resource_attributes", map[string]string{}, "The cloud environments from which to detect resources")
v.BindPFlag("resource_attributes", cmd.PersistentFlags().Lookup("resource_attributes"))

cmd.PersistentFlags().BoolVar(&self_monitoring_enabled, "self_monitoring::enabled", true, "Enable self monitoring")
v.BindPFlag("self_monitoring::enabled", cmd.PersistentFlags().Lookup("self_monitoring::enabled"))
v.SetDefault("self_monitoring::enabled", true)
Expand Down
10 changes: 10 additions & 0 deletions internal/config/configschema.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ type AgentConfig struct {
ObserveURL string `yaml:"observe_url" mapstructure:"observe_url"`
CloudResourceDetectors []string `yaml:"cloud_resource_detectors,omitempty" mapstructure:"cloud_resource_detectors"`
Debug bool `yaml:"debug,omitempty" mapstructure:"debug"`
Attributes map[string]string `yaml:"attributes,omitempty" mapstructure:"attributes"`
ResourceAttributes map[string]string `yaml:"resource_attributes,omitempty" mapstructure:"resource_attributes"`
HealthCheck HealthCheckConfig `yaml:"health_check" mapstructure:"health_check"`
Forwarding ForwardingConfig `yaml:"forwarding" mapstructure:"forwarding"`
InternalTelemetry InternalTelemetryConfig `yaml:"internal_telemetry" mapstructure:"internal_telemetry"`
Expand All @@ -81,6 +83,14 @@ type AgentConfig struct {
OtelConfigOverrides map[string]any `yaml:"otel_config_overrides,omitempty" mapstructure:"otel_config_overrides"`
}

func (config *AgentConfig) HasAttributes() bool {
return len(config.Attributes) > 0
}

func (config *AgentConfig) HasResourceAttributes() bool {
return len(config.ResourceAttributes) > 0
}

func SetViperDefaults(v *viper.Viper, separator string) {
var config AgentConfig
defaults.SetDefaults(&config)
Expand Down
6 changes: 6 additions & 0 deletions internal/connections/allconnectiontypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ var CommonConnectionType = MakeConnectionType(
},
colConfigFilePath: "base.yaml.tmpl",
},
{
enabledCheck: func(agentConfig *config.AgentConfig) bool {
return agentConfig.HasAttributes() || agentConfig.HasResourceAttributes()
},
colConfigFilePath: "attributes.yaml.tmpl",
},
{
enabledCheck: func(agentConfig *config.AgentConfig) bool {
return agentConfig.Forwarding.Enabled
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
processors:
{{- if .HasAttributes }}
attributes/observe_global_attributes:
actions:
{{- range $key, $value := .Attributes }}
- key: {{ $key }}
value: {{ $value }}
action: insert
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think maybe this might need to be overrideable as well, upsert is potentially a desired value

{{- end }}
{{- end }}
{{- if .HasResourceAttributes }}
resource/observe_global_resource_attributes:
attributes:
{{- range $key, $value := .ResourceAttributes }}
- key: {{ $key }}
value: {{ $value }}
action: insert
{{- end }}
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ processors:
host.id:
enabled: false
os.type:
enabled: true
enabled: true
host.arch:
enabled: true
host.name:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,41 @@ service:
pipelines:
metrics/forward:
receivers: [otlp]
processors: [resourcedetection, resourcedetection/cloud, deltatocumulative, batch]
processors:
- resourcedetection
- resourcedetection/cloud
- deltatocumulative
{{- if .HasAttributes }}
- attributes/observe_global_attributes
{{- end }}
{{- if .HasResourceAttributes }}
- resource/observe_global_resource_attributes
{{- end }}
- batch
exporters: [prometheusremotewrite/observe]

logs/forward:
receivers: [otlp]
processors: [resourcedetection, resourcedetection/cloud]
processors:
- resourcedetection
- resourcedetection/cloud
{{- if .HasAttributes }}
- attributes/observe_global_attributes
{{- end }}
{{- if .HasResourceAttributes }}
- resource/observe_global_resource_attributes
{{- end }}
exporters: [otlphttp/observe, count]

traces/forward:
receivers: [otlp]
processors: [resourcedetection, resourcedetection/cloud]
processors:
- resourcedetection
- resourcedetection/cloud
{{- if .HasAttributes }}
- attributes/observe_global_attributes
{{- end }}
{{- if .HasResourceAttributes }}
- resource/observe_global_resource_attributes
{{- end }}
exporters: [otlphttp/observetracing]
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ receivers:
service:
pipelines:
metrics/agent-filestats:
receivers: [filestats/agent]
processors: [resourcedetection, resourcedetection/cloud]
exporters: [prometheusremotewrite/observe]
receivers: [filestats/agent]
processors:
- memory_limiter
- resourcedetection
- resourcedetection/cloud
{{- if .HasAttributes }}
- attributes/observe_global_attributes
{{- end }}
{{- if .HasResourceAttributes }}
- resource/observe_global_resource_attributes
{{- end }}
- batch
exporters: [prometheusremotewrite/observe]
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,15 @@ service:
pipelines:
metrics/host_monitoring_host:
receivers: [hostmetrics/host-monitoring-host]
processors: [memory_limiter, resourcedetection, resourcedetection/cloud, batch]
processors:
- memory_limiter
- resourcedetection
- resourcedetection/cloud
{{- if .HasAttributes }}
- attributes/observe_global_attributes
{{- end }}
{{- if .HasResourceAttributes }}
- resource/observe_global_resource_attributes
{{- end }}
- batch
exporters: [prometheusremotewrite/observe]
Loading
Loading