diff --git a/go/flags/endtoend/vtcombo.txt b/go/flags/endtoend/vtcombo.txt
index 860ff5c62fa..2d06cc309b2 100644
--- a/go/flags/endtoend/vtcombo.txt
+++ b/go/flags/endtoend/vtcombo.txt
@@ -170,7 +170,7 @@ Flags:
       --grpc-use-effective-groups                                        If set, and SSL is not used, will set the immediate caller's security groups from the effective caller id's groups.
       --grpc-use-static-authentication-callerid                          If set, will set the immediate caller id to the username authenticated by the static auth plugin.
       --health-check-interval duration                                   Interval between health checks (default 20s)
-      --healthcheck-retry-delay duration                                 health check retry delay (default 2ms)
+      --healthcheck-retry-delay duration                                 health check retry delay (default 5s)
       --healthcheck-timeout duration                                     the health check timeout period (default 1m0s)
       --heartbeat-enable                                                 If true, vttablet records (if master) or checks (if replica) the current time of a replication heartbeat in the sidecar database's heartbeat table. The result is used to inform the serving state of the vttablet via healthchecks.
       --heartbeat-interval duration                                      How frequently to read and write replication heartbeat. (default 1s)
diff --git a/go/flags/endtoend/vtgate.txt b/go/flags/endtoend/vtgate.txt
index ba928b978d6..c0bf837d8da 100644
--- a/go/flags/endtoend/vtgate.txt
+++ b/go/flags/endtoend/vtgate.txt
@@ -100,7 +100,7 @@ Flags:
       --grpc-use-effective-callerid                                      If set, and SSL is not used, will set the immediate caller id from the effective caller id's principal.
       --grpc-use-effective-groups                                        If set, and SSL is not used, will set the immediate caller's security groups from the effective caller id's groups.
       --grpc-use-static-authentication-callerid                          If set, will set the immediate caller id to the username authenticated by the static auth plugin.
-      --healthcheck-retry-delay duration                                 health check retry delay (default 2ms)
+      --healthcheck-retry-delay duration                                 health check retry delay (default 5s)
       --healthcheck-timeout duration                                     the health check timeout period (default 1m0s)
   -h, --help                                                             help for vtgate
       --keep-logs duration                                               keep logs for this long (using ctime) (zero to keep forever)
diff --git a/go/test/endtoend/reparent/newfeaturetest/reparent_with_open_tx_test.go b/go/test/endtoend/reparent/newfeaturetest/reparent_with_open_tx_test.go
index b217d99a1ae..7fc58b54d21 100644
--- a/go/test/endtoend/reparent/newfeaturetest/reparent_with_open_tx_test.go
+++ b/go/test/endtoend/reparent/newfeaturetest/reparent_with_open_tx_test.go
@@ -17,6 +17,7 @@ limitations under the License.
 package newfeaturetest
 
 import (
+	"strings"
 	"testing"
 	"time"
 
@@ -113,7 +114,15 @@ func testExecuteErrorWhileTabletIsNotServing(t *testing.T, conn *mysql.Conn, clu
 		if !assert.ErrorContains(t, err, "VT15001") {
 			return
 		}
-		if !assert.ErrorContains(t, err, vterrors.WrongTablet) {
+		// Depending on whether vtgate has re-established its health stream to the
+		// restarted tablet by the time the query runs, the underlying reason is
+		// either the tablet rejecting the query as the wrong type, or vtgate not
+		// yet having a live connection to it. Both are valid VT15001 transaction
+		// errors against a no-longer-serving pinned tablet.
+		if !assert.True(t,
+			strings.Contains(err.Error(), vterrors.WrongTablet) ||
+				strings.Contains(err.Error(), "is either down or nonexistent"),
+			"unexpected VT15001 reason: %v", err) {
 			return
 		}
 
diff --git a/go/vt/discovery/healthcheck_test.go b/go/vt/discovery/healthcheck_test.go
index 7ae4cbb2e57..9ed9193c74e 100644
--- a/go/vt/discovery/healthcheck_test.go
+++ b/go/vt/discovery/healthcheck_test.go
@@ -343,6 +343,76 @@ func TestHealthCheckStreamError(t *testing.T) {
 	assert.Empty(t, a, "wrong result, expected empty list")
 }
 
+// TestHealthCheckRetryDelayIsBounded verifies that repeated stream failures do
+// not cause the retry delay to grow. After multiple consecutive errors the
+// tablet should still be rediscovered within a short, bounded window once it
+// recovers, rather than the ever-growing window the old exponential backoff
+// produced. Regression test for https://github.com/vitessio/vitess/issues/19894.
+func TestHealthCheckRetryDelayIsBounded(t *testing.T) {
+	ctx := utils.LeakCheckContext(t)
+
+	ts := memorytopo.NewServer(ctx, "cell")
+	defer ts.Close()
+	hc := createTestHc(ctx, ts)
+	// Use a short but measurable retry delay so we can assert timing. With
+	// jitter the actual interval stays within [7.5ms, 12.5ms].
+	hc.retryDelay = 10 * time.Millisecond
+	defer hc.Close()
+
+	tablet := createTestTablet(0, "cell", "a")
+	// input is buffered so the recovery send below does not block until the
+	// healthcheck goroutine has already slept and reconnected. See the timing
+	// comment where the healthy response is sent.
+	input := make(chan *querypb.StreamHealthResponse, 1)
+	resultChan := hc.Subscribe("TestHealthCheckRetryDelayIsBounded")
+	fc := createFakeConn(tablet, input)
+	fc.errCh = make(chan error)
+	hc.AddTablet(tablet)
+
+	// Drain the initial not-serving notification from AddTablet.
+	<-resultChan
+
+	// Send multiple consecutive stream errors to simulate a prolonged outage
+	// where the tablet is unreachable for many retry cycles.
+	const numErrors = 8 // with exponential backoff the post-recovery sleep would reach ~1.28s (10ms*2^7)
+	for range numErrors {
+		fc.errCh <- errors.New("connection refused")
+		<-resultChan // drain the not-serving update
+	}
+
+	// The tablet recovers. Build the healthy response up front so the timing
+	// window below covers only the reconnect delay, not test bookkeeping.
+	shr := &querypb.StreamHealthResponse{
+		TabletAlias:               tablet.Alias,
+		Target:                    &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_REPLICA},
+		Serving:                   true,
+		PrimaryTermStartTimestamp: 0,
+		RealtimeStats:             &querypb.RealtimeStats{ReplicationLagSeconds: 1, CpuUsage: 0.2},
+	}
+
+	// Because input is buffered, this send returns immediately instead of
+	// blocking until the healthcheck goroutine has finished its retry sleep and
+	// reconnected. That is what lets us start timing before the sleep and
+	// actually measure it. With an unbuffered channel the send would absorb the
+	// sleep, and the test would pass even if exponential backoff were restored.
+	start := time.Now()
+	input <- shr
+
+	// The fixed retry delay (~10ms +/- jitter) rediscovers the tablet within a
+	// single cycle. The old exponential backoff would sleep ~1.28s after
+	// numErrors failures (10ms * 2^7), so the 400ms bound sits far above the fixed
+	// interval (no CI-timing flakiness) yet well below the backoff value, failing
+	// clearly if the delay regrows. The 5s arm only trips on a true hang.
+	select {
+	case result := <-resultChan:
+		assert.True(t, result.Serving, "tablet should be serving after recovery")
+		assert.Less(t, time.Since(start), 400*time.Millisecond,
+			"rediscovery took too long after recovery; retry delay may be growing exponentially")
+	case <-time.After(5 * time.Second):
+		require.Fail(t, "tablet was not rediscovered after recovery")
+	}
+}
+
 // TestHealthCheckErrorOnPrimary is the same as TestHealthCheckStreamError except for tablet type
 func TestHealthCheckErrorOnPrimary(t *testing.T) {
 	ctx := utils.LeakCheckContext(t)
@@ -668,7 +738,7 @@ func TestHealthCheckTimeout(t *testing.T) {
 	fc.resetCanceledFlag()
 	input <- shr
 
-	// wait for the exponential backoff to wear off and health monitoring to resume.
+	// wait for the retry delay to pass and health monitoring to resume.
 	result = <-resultChan
 	mustMatch(t, want, result, "Wrong TabletHealth data")
 }
diff --git a/go/vt/discovery/tablet_health_check.go b/go/vt/discovery/tablet_health_check.go
index 4c6e569cfc5..b12340a7145 100644
--- a/go/vt/discovery/tablet_health_check.go
+++ b/go/vt/discovery/tablet_health_check.go
@@ -19,6 +19,7 @@ package discovery
 import (
 	"context"
 	"fmt"
+	"math/rand/v2"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -243,7 +244,6 @@ func (thc *tabletHealthCheck) checkConn(hc *HealthCheckImpl) {
 	// Initialize error counter
 	hcErrorCounters.Add([]string{thc.Target.Keyspace, thc.Target.Shard, topoproto.TabletTypeLString(thc.Target.TabletType)}, 0)
 
-	retryDelay := hc.retryDelay
 	for {
 		streamCtx, streamCancel := context.WithCancel(thc.ctx)
 
@@ -275,8 +275,6 @@ func (thc *tabletHealthCheck) checkConn(hc *HealthCheckImpl) {
 
 		// Read stream health responses.
 		err := thc.stream(streamCtx, func(shr *query.StreamHealthResponse) error {
-			// We received a message. Reset the back-off.
-			retryDelay = hc.retryDelay
 			// Don't block on send to avoid deadlocks.
 			select {
 			case servingStatus <- shr.Serving:
@@ -318,20 +316,34 @@ func (thc *tabletHealthCheck) checkConn(hc *HealthCheckImpl) {
 
 		// Streaming RPC failed e.g. because vttablet was restarted or took too long.
 		// Sleep until the next retry is up or the context is done/canceled.
+		// We use a fixed retry interval with jitter instead of exponential backoff
+		// so that vtgate rediscovers recovered tablets promptly without the fleet
+		// stampeding the tablet in lockstep after a shared outage. See #19894.
 		select {
 		case <-thc.ctx.Done():
 			return
-		case <-time.After(retryDelay):
-			// Exponentially back-off to prevent tight-loop.
-			retryDelay *= 2
-			// Limit the retry delay backoff to the health check timeout
-			if retryDelay > hc.healthCheckTimeout {
-				retryDelay = hc.healthCheckTimeout
-			}
+		case <-time.After(retryInterval(hc.retryDelay)):
 		}
 	}
 }
 
+// retryInterval returns the configured retry delay with +/-25% jitter applied.
+// The jitter de-synchronizes reconnection attempts across vtgate instances so a
+// fleet recovering from a shared tablet outage does not stampede the tablet at
+// the same instant, while never growing the interval the way exponential
+// backoff did. See #19894.
+func retryInterval(base time.Duration) time.Duration {
+	// half is the jitter span passed to rand.Int64N, which panics on n <= 0. For
+	// a non-positive base, or one so small that base/2 truncates to zero (sub-2ns),
+	// there is no room to jitter, so return the base unchanged.
+	half := base / 2
+	if half <= 0 {
+		return base
+	}
+	// Spread uniformly within [base-base/4, base+base/4).
+	return base - base/4 + time.Duration(rand.Int64N(int64(half)))
+}
+
 func (thc *tabletHealthCheck) closeConnection(ctx context.Context, err error) {
 	thc.logger.Warningf("tablet %v healthcheck stream error: %v", thc.Tablet, err)
 	thc.setServingState(false, err.Error())
diff --git a/go/vt/discovery/tablet_health_check_test.go b/go/vt/discovery/tablet_health_check_test.go
new file mode 100644
index 00000000000..63ce59283b7
--- /dev/null
+++ b/go/vt/discovery/tablet_health_check_test.go
@@ -0,0 +1,62 @@
+/*
+Copyright 2026 The Vitess Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package discovery
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+)
+
+// TestRetryInterval verifies that the retry interval stays within a bounded
+// jitter window around the configured base delay. The jitter de-synchronizes
+// reconnection attempts across vtgate instances so that a fleet recovering from
+// a shared outage does not stampede a tablet at the same instant (#19894),
+// while never growing the interval the way exponential backoff did.
+func TestRetryInterval(t *testing.T) {
+	const base = 5 * time.Second
+	lower := time.Duration(float64(base) * 0.75)
+	upper := time.Duration(float64(base) * 1.25)
+
+	seen := make(map[time.Duration]struct{})
+	for range 1000 {
+		got := retryInterval(base)
+		assert.GreaterOrEqual(t, got, lower, "interval must not drop below 75% of base")
+		assert.LessOrEqual(t, got, upper, "interval must not exceed 125% of base")
+		seen[got] = struct{}{}
+	}
+
+	// Jitter must actually vary; a single repeated value would re-synchronize
+	// the fleet and defeat the purpose.
+	assert.Greater(t, len(seen), 1, "retry interval should vary across calls")
+}
+
+// TestRetryIntervalNonPositive verifies that a zero or negative base delay is
+// returned unchanged, so the jitter math never produces a panic or a negative
+// sleep duration.
+func TestRetryIntervalNonPositive(t *testing.T) {
+	assert.Equal(t, time.Duration(0), retryInterval(0))
+	assert.Equal(t, -time.Second, retryInterval(-time.Second))
+}
+
+// TestRetryIntervalTinyBase verifies that a positive base too small to jitter
+// (base/2 truncates to zero) is returned unchanged rather than panicking inside
+// rand.Int64N, which rejects a non-positive argument.
+func TestRetryIntervalTinyBase(t *testing.T) {
+	assert.Equal(t, time.Nanosecond, retryInterval(time.Nanosecond))
+}
diff --git a/go/vt/vtgate/vtgate.go b/go/vt/vtgate/vtgate.go
index b2ee573e3b4..8ca37b5074b 100644
--- a/go/vt/vtgate/vtgate.go
+++ b/go/vt/vtgate/vtgate.go
@@ -87,8 +87,10 @@ var (
 	preventCrossKeyspaceReads bool
 	enableShardRouting        bool
 
-	// healthCheckRetryDelay is the time to wait before retrying healthcheck
-	healthCheckRetryDelay = 2 * time.Millisecond
+	// healthCheckRetryDelay is the fixed interval between healthcheck reconnection
+	// attempts. Since it is now a steady-state interval rather than a backoff seed,
+	// it defaults to discovery's 5s instead of a few milliseconds.
+	healthCheckRetryDelay = discovery.DefaultHealthCheckRetryDelay
 	// healthCheckTimeout is the timeout on the RPC call to tablets
 	healthCheckTimeout = time.Minute