-
Notifications
You must be signed in to change notification settings - Fork 4.6k
transport: Add values to the grpc.disconnect_error label for grpc.subchannel.disconnections metric (A94) #8973
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
de97023
4bdc057
dcaf507
5584fb4
8eab968
88cd619
2dacd03
8907a8e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,12 +24,16 @@ import ( | |
| "fmt" | ||
| "math" | ||
| "net/url" | ||
| "os" | ||
| "slices" | ||
| "strings" | ||
| "sync" | ||
| "sync/atomic" | ||
| "syscall" | ||
| "time" | ||
|
|
||
| "golang.org/x/net/http2" | ||
|
|
||
| "google.golang.org/grpc/balancer" | ||
| "google.golang.org/grpc/balancer/base" | ||
| "google.golang.org/grpc/balancer/pickfirst" | ||
|
|
@@ -1270,6 +1274,7 @@ type addrConn struct { | |
|
|
||
| localityLabel string | ||
| backendServiceLabel string | ||
| disconnectError string | ||
| } | ||
|
|
||
| // Note: this requires a lock on ac.mu. | ||
|
|
@@ -1286,9 +1291,14 @@ func (ac *addrConn) updateConnectivityState(s connectivity.State, lastErr error) | |
| // TODO: https://github.com/grpc/grpc-go/issues/7862 - Remove the second | ||
| // part of the if condition below once the issue is fixed. | ||
| if ac.state == connectivity.Ready || (ac.state == connectivity.Connecting && s == connectivity.Idle) { | ||
| disconnectionsMetric.Record(ac.cc.metricsRecorderList, 1, ac.cc.target, ac.backendServiceLabel, ac.localityLabel, "unknown") | ||
| disconnectError := ac.disconnectError | ||
| if disconnectError == "" { | ||
| disconnectError = "unknown" | ||
| } | ||
| disconnectionsMetric.Record(ac.cc.metricsRecorderList, 1, ac.cc.target, ac.backendServiceLabel, ac.localityLabel, disconnectError) | ||
| openConnectionsMetric.Record(ac.cc.metricsRecorderList, -1, ac.cc.target, ac.backendServiceLabel, ac.securityLevelLocked(), ac.localityLabel) | ||
| } | ||
| ac.disconnectError = "" // Reset for next time | ||
| ac.state = s | ||
| ac.channelz.ChannelMetrics.State.Store(&s) | ||
| if lastErr == nil { | ||
|
|
@@ -1483,7 +1493,7 @@ func (ac *addrConn) createTransport(ctx context.Context, addr resolver.Address, | |
| addr.ServerName = ac.cc.getServerName(addr) | ||
| hctx, hcancel := context.WithCancel(ctx) | ||
|
|
||
| onClose := func(r transport.GoAwayReason) { | ||
| onClose := func(r transport.GoAwayReason, goAwayCode http2.ErrCode, err error) { | ||
| ac.mu.Lock() | ||
| defer ac.mu.Unlock() | ||
| // adjust params based on GoAwayReason | ||
|
|
@@ -1504,6 +1514,7 @@ func (ac *addrConn) createTransport(ctx context.Context, addr resolver.Address, | |
| return | ||
| } | ||
| ac.transport = nil | ||
| ac.disconnectError = disconnectErrorString(r, goAwayCode, err) | ||
| // Refresh the name resolver on any connection loss. | ||
| ac.cc.resolveNow(resolver.ResolveNowOptions{}) | ||
| // Always go idle and wait for the LB policy to initiate a new | ||
|
|
@@ -1560,6 +1571,31 @@ func (ac *addrConn) createTransport(ctx context.Context, addr resolver.Address, | |
| return nil | ||
| } | ||
|
|
||
| func disconnectErrorString(r transport.GoAwayReason, goAwayCode http2.ErrCode, err error) string { | ||
| if r != transport.GoAwayInvalid { | ||
|
||
| return fmt.Sprintf("GOAWAY %s", goAwayCode.String()) | ||
| } | ||
| if err == nil { | ||
| return "unknown" | ||
| } | ||
| if errors.Is(err, context.Canceled) { | ||
| return "subchannel shutdown" | ||
| } | ||
| if errors.Is(err, syscall.ECONNRESET) { | ||
| return "connection reset" | ||
| } | ||
| if errors.Is(err, syscall.ETIMEDOUT) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, os.ErrDeadlineExceeded) { | ||
| return "connection timed out" | ||
| } | ||
| if errors.Is(err, syscall.ECONNABORTED) { | ||
| return "connection aborted" | ||
| } | ||
| if errors.Is(err, syscall.ECONNREFUSED) { | ||
| return "socket error" | ||
| } | ||
| return "unknown" | ||
| } | ||
|
|
||
| // startHealthCheck starts the health checking stream (RPC) to watch the health | ||
| // stats of this connection if health checking is requested and configured. | ||
| // | ||
|
|
@@ -1665,6 +1701,9 @@ func (ac *addrConn) tearDown(err error) { | |
| ac.transport = nil | ||
| // We have to set the state to Shutdown before anything else to prevent races | ||
| // between setting the state and logic that waits on context cancellation / etc. | ||
| if ac.disconnectError == "" { | ||
| ac.disconnectError = "subchannel shutdown" | ||
| } | ||
| ac.updateConnectivityState(connectivity.Shutdown, nil) | ||
| ac.cancel() | ||
| ac.curAddr = resolver.Address{} | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -134,6 +134,8 @@ type http2Client struct { | |
| // goAwayDebugMessage contains a detailed human readable string about a | ||
| // GoAway frame, useful for error messages. | ||
| goAwayDebugMessage string | ||
| // goAwayCode records the http2.ErrCode received with the GoAway frame. | ||
| goAwayCode http2.ErrCode | ||
| // A condition variable used to signal when the keepalive goroutine should | ||
| // go dormant. The condition for dormancy is based on the number of active | ||
| // streams and the `PermitWithoutStream` keepalive client parameter. And | ||
|
|
@@ -147,7 +149,7 @@ type http2Client struct { | |
|
|
||
| channelz *channelz.Socket | ||
|
|
||
| onClose func(GoAwayReason) | ||
| onClose func(GoAwayReason, http2.ErrCode, error) | ||
|
|
||
| bufferPool mem.BufferPool | ||
|
|
||
|
|
@@ -204,7 +206,7 @@ func isTemporary(err error) bool { | |
| // NewHTTP2Client constructs a connected ClientTransport to addr based on HTTP2 | ||
| // and starts to receive messages on it. Non-nil error returns if construction | ||
| // fails. | ||
| func NewHTTP2Client(connectCtx, ctx context.Context, addr resolver.Address, opts ConnectOptions, onClose func(GoAwayReason)) (_ ClientTransport, err error) { | ||
| func NewHTTP2Client(connectCtx, ctx context.Context, addr resolver.Address, opts ConnectOptions, onClose func(GoAwayReason, http2.ErrCode, error)) (_ ClientTransport, err error) { | ||
| scheme := "http" | ||
| ctx, cancel := context.WithCancel(ctx) | ||
| defer func() { | ||
|
|
@@ -1015,7 +1017,7 @@ func (t *http2Client) Close(err error) { | |
| // Call t.onClose ASAP to prevent the client from attempting to create new | ||
| // streams. | ||
| if t.state != draining { | ||
| t.onClose(GoAwayInvalid) | ||
| t.onClose(GoAwayInvalid, http2.ErrCodeNo, err) | ||
| } | ||
| t.state = closing | ||
| streams := t.activeStreams | ||
|
|
@@ -1086,7 +1088,7 @@ func (t *http2Client) GracefulClose() { | |
| if t.logger.V(logLevel) { | ||
| t.logger.Infof("GracefulClose called") | ||
| } | ||
| t.onClose(GoAwayInvalid) | ||
| t.onClose(GoAwayInvalid, http2.ErrCodeNo, nil) | ||
| t.state = draining | ||
| active := len(t.activeStreams) | ||
| t.mu.Unlock() | ||
|
|
@@ -1372,7 +1374,7 @@ func (t *http2Client) handleGoAway(f *http2.GoAwayFrame) error { | |
| // draining, to allow the client to stop attempting to create streams | ||
| // before disallowing new streams on this connection. | ||
| if t.state != draining { | ||
| t.onClose(t.goAwayReason) | ||
| t.onClose(t.goAwayReason, t.goAwayCode, nil) | ||
| t.state = draining | ||
| } | ||
| } | ||
|
|
@@ -1417,11 +1419,11 @@ func (t *http2Client) setGoAwayReason(f *http2.GoAwayFrame) { | |
| t.goAwayReason = GoAwayTooManyPings | ||
| } | ||
| } | ||
| if len(f.DebugData()) == 0 { | ||
| t.goAwayDebugMessage = fmt.Sprintf("code: %s", f.ErrCode) | ||
| } else { | ||
| t.goAwayDebugMessage = fmt.Sprintf("code: %s, debug data: %q", f.ErrCode, string(f.DebugData())) | ||
| t.goAwayDebugMessage = fmt.Sprintf("code: %s", f.ErrCode) | ||
| if len(f.DebugData()) > 0 { | ||
| t.goAwayDebugMessage += fmt.Sprintf(", debug data: %q", string(f.DebugData())) | ||
|
||
| } | ||
| t.goAwayCode = f.ErrCode | ||
| } | ||
|
|
||
| func (t *http2Client) GetGoAwayReason() (GoAwayReason, string) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Can we add a
Labelsuffix to stay consistent with existing field names.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.