Overview of the Issue
Consul server crashes with a runtime panic while reconciling API Gateway routes.
The crash signature is:
fatal error: concurrent map iteration and map write
The panic stack consistently points to go-memdb WatchSet iteration, reached from Consul controller trigger execution:
github.com/hashicorp/go-memdb WatchSet watchMany and WatchCtx
github.com/hashicorp/consul agent consul controller AddTrigger func1 at controller.go:328
In many cases, immediately before the panic, Consul logs:
- module: agent.server.api_gateway_controller
- message: error while running trigger, adding re-reconcilation anyway
- error: context canceled
This indicates high churn should be survivable, but currently results in process-level crash loops.
Impact:
- server process exits unexpectedly
Reproduction Steps
- Start a Consul server with API Gateway controller enabled.
- Create or update API Gateway routes that reference one or more upstream services.
- Repeatedly update route and/or discovery chain related config entries to force reconciliations.
- Watch server logs.
- Server eventually panics with concurrent map iteration and map write.
Consul info for both Client and Server
Client info
agent:
check_monitors = 1
check_ttls = 1
checks = 114
services = 78
build:
prerelease =
revision = 97566680
version = 1.22.6
version_metadata =
consul:
acl = enabled
known_servers = 3
server = false
runtime:
arch = amd64
cpu_count = 16
goroutines = 2020
max_procs = 16
os = linux
version = go1.25.8 X:boringcrypto
serf_lan:
coordinate_resets = 0
encrypted = true
event_queue = 0
event_time = 848
failed = 2
health_score = 0
intent_queue = 0
left = 0
member_time = 60996
members = 166
query_queue = 0
query_time = 5
acl = {
enabled = true
enable_token_persistence = false
down_policy = "extend-cache"
default_policy = "deny"
token_ttl = "60s"
}
# Encryption
encrypt = "<REDACTED_GOSSIP_KEY>"
encrypt_verify_incoming = true
encrypt_verify_outgoing = true
enable_agent_tls_for_checks = true
# Node Settings
node_name = "<REDACTED_NODE_NAME>"
data_dir = "<REDACTED_PATH>"
client_addr = "0.0.0.0"
datacenter = "<REDACTED_DATACENTER>"
primary_datacenter = "<REDACTED_PRIMARY_DATACENTER>"
bind_addr = "<REDACTED_BIND_IP>"
check_update_interval = "1584h"
discard_check_output = true
ui_config = {
enabled = false
}
server = false
log_file = "<REDACTED_PATH>"
log_rotate_max_files = 5
log_rotate_bytes = 10000000
log_json = true
enable_local_script_checks = true
enable_script_checks = false
disable_update_check = true
dns_config = {
udp_answer_limit = 99
service_ttl = {
"*" = "1s"
}
}
recursors = ["<REDACTED_DNS_IP_1>", "<REDACTED_DNS_IP_2>"]
enable_central_service_config = true
# Consul Connect Specific settings
connect = {
enabled = true
ca_provider = "vault"
ca_config {
address = "<REDACTED_VAULT_ADDR>"
token = "<REDACTED_VAULT_TOKEN>"
root_pki_path = "pki"
intermediate_pki_path = "pki_consul_connect_int"
ca_csr_max_concurrent = 0
ca_csr_max_per_second = 200
leaf_cert_ttl = "2880h"
}
}
ports = {
grpc_tls = 8502
http = 8500
https = 8501
}
raft_protocol = 3
limits = {
http_max_conns_per_client = 1000
rpc_max_conns_per_client = 500
https_handshake_timeout = "5s"
rpc_handshake_timeout = "5s"
}
skip_leave_on_interrupt = true
config_entries {
bootstrap {
Kind = "mesh"
TLS {
Incoming {
TLSMinVersion = "TLSv1_2"
CipherSuites = [
"TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
"TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
]
}
}
HTTP {
SanitizeXForwardedClientCert = true
}
}
}
http_config {
response_headers {
Strict-Transport-Security = "max-age=31536000; includeSubDomains"
X-Frame-Options = "DENY"
X-Content-Type-Options = "nosniff"
X-Xss-Protection = "1; mode=block"
Content-Security-Policy = "frame-ancestors 'none'"
Referrer-Policy = "no-referrer"
}
}
tls {
defaults {
verify_outgoing = true
verify_incoming = true
ca_file = "<REDACTED_PATH>"
cert_file = "<REDACTED_PATH>"
key_file = "<REDACTED_PATH>"
tls_cipher_suites = "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256"
}
internal_rpc {
verify_server_hostname = true
}
https {
# setting verify_incoming to 'false' so that clients invoking http calls to consul don't need to provide client certificate.
verify_incoming = false
}
grpc {
# setting verify_incoming to 'false' here, when set as 'true' it breaks grpc connections, https://github.com/hashicorp/consul/issues/13124
verify_incoming = false
}
}
tls_prefer_server_cipher_suites = true
Server info
agent:
check_monitors = 1
check_ttls = 1
checks = 8
services = 10
build:
prerelease =
revision = 97566680
version = 1.22.6
version_metadata =
consul:
acl = enabled
bootstrap = false
known_datacenters = 1
leader = true
leader_addr = 10.14.88.44:8300
server = true
raft:
applied_index = 296116118
commit_index = 296116118
fsm_pending = 0
last_contact = 0
last_log_index = 296116118
last_log_term = 854
last_snapshot_index = 296111491
last_snapshot_term = 854
latest_configuration = [{Suffrage:Voter ID:b0d0bd8b-bb20-b5a5-6fd3-a14c3429b2f3 Address:10.14.88.44:8300} {Suffrage:Voter ID:72a0085d-dc8e-1903-9e0d-a3057b998285 Address:10.14.88.56:8300} {Suffrage:Voter ID:b3250461-3187-7d47-c09d-50b9cdbc3689 Address:10.14.88.80:8300}]
latest_configuration_index = 0
num_peers = 2
protocol_version = 3
protocol_version_max = 3
protocol_version_min = 0
snapshot_version_max = 1
snapshot_version_min = 0
state = Leader
term = 854
runtime:
arch = amd64
cpu_count = 16
goroutines = 69077
max_procs = 16
os = linux
version = go1.25.8 X:boringcrypto
serf_lan:
coordinate_resets = 0
encrypted = true
event_queue = 0
event_time = 848
failed = 0
health_score = 0
intent_queue = 0
left = 0
member_time = 60996
members = 164
query_queue = 0
query_time = 5
serf_wan:
coordinate_resets = 0
encrypted = true
event_queue = 0
event_time = 1
failed = 0
health_score = 0
intent_queue = 0
left = 0
member_time = 1259
members = 3
query_queue = 0
query_time = 1
acl = {
enabled = true
enable_token_persistence = false
down_policy = "extend-cache"
default_policy = "deny"
token_ttl = "60s"
}
# Encryption
encrypt = "<REDACTED_GOSSIP_KEY>"
encrypt_verify_incoming = true
encrypt_verify_outgoing = true
enable_agent_tls_for_checks = true
# Node Settings
node_name = "<REDACTED_NODE_NAME>"
data_dir = "<REDACTED_PATH>"
client_addr = "0.0.0.0"
datacenter = "<REDACTED_DATACENTER>"
primary_datacenter = "<REDACTED_PRIMARY_DATACENTER>"
bind_addr = "<REDACTED_BIND_IP>"
check_update_interval = "1584h"
discard_check_output = true
bootstrap_expect = 3
performance = {
raft_multiplier = 1
}
peering {
enabled = false
}
ui_config = {
enabled = true
}
server = true
log_file = "<REDACTED_PATH>"
log_rotate_max_files = 5
log_rotate_bytes = 10000000
log_json = true
enable_local_script_checks = true
enable_script_checks = false
disable_update_check = true
dns_config = {
udp_answer_limit = 99
service_ttl = {
"*" = "1s"
}
}
recursors = ["<REDACTED_DNS_IP_1>", "<REDACTED_DNS_IP_2>"]
enable_central_service_config = true
# Consul Connect Specific settings
connect = {
enabled = true
ca_provider = "vault"
ca_config {
address = "<REDACTED_VAULT_ADDR>"
token = "<REDACTED_VAULT_TOKEN>"
root_pki_path = "pki"
intermediate_pki_path = "pki_consul_connect_int"
ca_csr_max_concurrent = 0
ca_csr_max_per_second = 200
leaf_cert_ttl = "2880h"
}
}
ports = {
grpc_tls = 8502
http = 8500
https = 8501
}
raft_protocol = 3
limits = {
http_max_conns_per_client = 1000
rpc_max_conns_per_client = 500
https_handshake_timeout = "5s"
rpc_handshake_timeout = "5s"
}
skip_leave_on_interrupt = true
config_entries {
bootstrap {
Kind = "mesh"
TLS {
Incoming {
TLSMinVersion = "TLSv1_2"
CipherSuites = [
"TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
"TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
"TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
]
}
}
HTTP {
SanitizeXForwardedClientCert = true
}
}
}
http_config {
response_headers {
Strict-Transport-Security = "max-age=31536000; includeSubDomains"
X-Frame-Options = "DENY"
X-Content-Type-Options = "nosniff"
X-Xss-Protection = "1; mode=block"
Content-Security-Policy = "frame-ancestors 'none'"
Referrer-Policy = "no-referrer"
}
}
tls {
defaults {
verify_outgoing = true
verify_incoming = true
ca_file = "<REDACTED_PATH>"
cert_file = "<REDACTED_PATH>"
key_file = "<REDACTED_PATH>"
tls_cipher_suites = "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256"
}
internal_rpc {
verify_incoming = true
verify_server_hostname = true
}
https {
# setting verify_incoming to 'false' so that clients invoking http calls to consul don't need to provide client certificate.
verify_incoming = false
}
grpc {
# setting verify_incoming to 'false' here, when set as 'true' it breaks grpc connections, https://github.com/hashicorp/consul/issues/13124
verify_incoming = false
}
}
tls_prefer_server_cipher_suites = true
Operating system and Environment details
OS: AlmaLinux
Architecture: amd64
Consul version: 1.22.6
Deployment mode: multi-server cluster
Log Fragments
{"@level":"error","@message":"error while running trigger, adding re-reconcilation anyway","@module":"agent.server.api_gateway_controller","@timestamp":"2026-04-23T17:13:21.011917Z","controller":"apiGatewayController","error":"context canceled"}
fatal error: concurrent map iteration and map write
goroutine 1009884 [running]:
internal/runtime/maps.fatal({0x45e94d6?, 0x5d92a5?})
/usr/local/go/src/runtime/panic.go:1046 +0x18
internal/runtime/maps.(*Iter).Next(0xc048c6da00?)
/usr/local/go/src/internal/runtime/maps/table.go:792 +0x86
github.com/hashicorp/go-memdb.WatchSet.watchMany(0xc08c590540, {0x5227088, 0xc0220ef450})
/root/go/pkg/mod/github.com/hashicorp/go-memdb@v1.3.4/watch.go:113 +0x14b
github.com/hashicorp/go-memdb.WatchSet.WatchCtx(0xc0357a6f08?, {0x5227088?, 0xc0220ef450?})
/root/go/pkg/mod/github.com/hashicorp/go-memdb@v1.3.4/watch.go:89 +0xb0
github.com/hashicorp/consul/agent/consul/controller.(*controller).AddTrigger.func1()
/source/consul/agent/consul/controller/controller.go:328 +0x3e
golang.org/x/sync/errgroup.(*Group).Go.func1()
/root/go/pkg/mod/golang.org/x/sync@v0.19.0/errgroup/errgroup.go:93 +0x50
created by golang.org/x/sync/errgroup.(*Group).Go in goroutine 662403
/root/go/pkg/mod/golang.org/x/sync@v0.19.0/errgroup/errgroup.go:78 +0x95
goroutine 1 [select, 1 minutes]:
github.com/hashicorp/consul/command/agent.(*cmd).run(0xc000a7b008, {0xc0000741d0?, 0x1?, 0x3f89700?})
/source/consul/command/agent/agent.go:250 +0x16c7
github.com/hashicorp/consul/command/agent.(*cmd).Run(0xc000a7b008, {0xc0000741d0?, 0x0?, 0x0?})
/source/consul/command/agent/agent.go:72 +0x25
github.com/mitchellh/cli.(*CLI).Run(0xc0009fbb80)
/root/go/pkg/mod/github.com/mitchellh/cli@v1.1.5/cli.go:262 +0x4ed
main.realMain()
/source/consul/main.go:51 +0x465
main.main()
/source/consul/main.go:21 +0x13
Expected behavior:
Trigger cancellation or re-reconciliation under churn should not panic the Consul server process.
Actual behavior:
Server panics with concurrent map iteration and map write and exits.
Suspected cause:
The same WatchSet appears to be watched and mutated concurrently during API gateway route reconciliation.
Overview of the Issue
Consul server crashes with a runtime panic while reconciling API Gateway routes.
The crash signature is:
fatal error: concurrent map iteration and map write
The panic stack consistently points to go-memdb WatchSet iteration, reached from Consul controller trigger execution:
github.com/hashicorp/go-memdb WatchSet watchMany and WatchCtx
github.com/hashicorp/consul agent consul controller AddTrigger func1 at controller.go:328
In many cases, immediately before the panic, Consul logs:
This indicates high churn should be survivable, but currently results in process-level crash loops.
Impact:
Reproduction Steps
Consul info for both Client and Server
Client info
Server info
Operating system and Environment details
OS: AlmaLinux
Architecture: amd64
Consul version: 1.22.6
Deployment mode: multi-server cluster
Log Fragments
Expected behavior:
Trigger cancellation or re-reconciliation under churn should not panic the Consul server process.
Actual behavior:
Server panics with concurrent map iteration and map write and exits.
Suspected cause:
The same WatchSet appears to be watched and mutated concurrently during API gateway route reconciliation.