From 7e323a86c1d5c6bf34de792d5add7b23ddbba986 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Wed, 27 May 2026 20:13:56 +0200 Subject: [PATCH 1/3] fix quality/correctness bugs Signed-off-by: Tim Vaillancourt --- .../reparentutil/emergency_reparenter.go | 20 ++++++++--- .../reparentutil/emergency_reparenter_test.go | 2 +- .../vtctl/reparentutil/planned_reparenter.go | 16 +++++++-- go/vt/vtctl/reparentutil/replication.go | 2 +- go/vt/vtctl/reparentutil/util.go | 35 ++++++++++++------- 5 files changed, 54 insertions(+), 21 deletions(-) diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter.go b/go/vt/vtctl/reparentutil/emergency_reparenter.go index cff8f1dc49c..f246a174dd8 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter.go @@ -652,6 +652,13 @@ func (erp *EmergencyReparenter) reparentReplicas( handleReplica := func(alias string, ti *topo.TabletInfo) { defer replWg.Done() + defer func() { + if r := recover(); r != nil { + err := vterrors.Errorf(vtrpc.Code_INTERNAL, "panic in replica handler for %v: %v", alias, r) + erp.logger.Errorf("%v", err) + rec.RecordError(err) + } + }() erp.logger.Infof("setting new primary on replica %v", alias) forceStart := false @@ -717,9 +724,14 @@ func (erp *EmergencyReparenter) reparentReplicas( // On primary failure, replCancel() is called immediately below, // which is safe because cancel functions are idempotent. go func() { + defer allReplicasDoneCancel() + defer replCancel() + defer func() { + if r := recover(); r != nil { + erp.logger.Errorf("panic while waiting for replicas to finish: %v", r) + } + }() replWg.Wait() - allReplicasDoneCancel() - replCancel() }() primaryErr := handlePrimary(topoproto.TabletAliasString(newPrimaryTablet.Alias), newPrimaryTablet) @@ -833,8 +845,8 @@ func (erp *EmergencyReparenter) identifyPrimaryCandidate( // constraint failures and can make forward progress on being promoted. It will filter out candidates taking backups // if possible. func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, tabletsBackupState map[string]bool, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) { - var restrictedValidTablets []*topodatapb.Tablet - var notPreferredValidTablets []*topodatapb.Tablet + restrictedValidTablets := make([]*topodatapb.Tablet, 0, len(validTablets)) + notPreferredValidTablets := make([]*topodatapb.Tablet, 0, len(validTablets)) for _, tablet := range validTablets { tabletAliasStr := topoproto.TabletAliasString(tablet.Alias) // Remove tablets which have MustNot promote rule since they must never be promoted diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go index 19abf15b513..9b627bf8f3a 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter_test.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter_test.go @@ -4939,7 +4939,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) { validTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet}, tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet}, tabletsTakingBackup: noTabletsTakingBackup, - filteredTablets: nil, + filteredTablets: []*topodatapb.Tablet{}, }, { name: "filter mixed", durability: policy.DurabilityCrossCell, diff --git a/go/vt/vtctl/reparentutil/planned_reparenter.go b/go/vt/vtctl/reparentutil/planned_reparenter.go index 2193af24dd2..805fa7fd422 100644 --- a/go/vt/vtctl/reparentutil/planned_reparenter.go +++ b/go/vt/vtctl/reparentutil/planned_reparenter.go @@ -771,9 +771,19 @@ func (pr *PlannedReparenter) verifyAllTabletsReachable(ctx context.Context, tabl if err != nil { return err } - // We are ignoring the error in conversion because some MySQL variants might not have this - // status variable like MariaDB. - val, _ := strconv.Atoi(statusValues[InnodbBufferPoolsDataVar]) + // Some MySQL variants like MariaDB don't expose this status variable. Omit those + // tablets from the map so a missing or unparseable value can't be confused with a + // legitimate zero when tiebreaking primary election by buffer-pool warmth. + rawVal, ok := statusValues[InnodbBufferPoolsDataVar] + if !ok { + return nil + } + val, err := strconv.Atoi(rawVal) + if err != nil { + pr.logger.Warningf("could not parse %s=%q for tablet %v as int: %v", + InnodbBufferPoolsDataVar, rawVal, tblStr, err) + return nil + } mu.Lock() defer mu.Unlock() innodbBufferPoolsData[tblStr] = val diff --git a/go/vt/vtctl/reparentutil/replication.go b/go/vt/vtctl/reparentutil/replication.go index 1cedafcd5d7..51a474e37f9 100644 --- a/go/vt/vtctl/reparentutil/replication.go +++ b/go/vt/vtctl/reparentutil/replication.go @@ -291,7 +291,7 @@ func stopReplicationAndBuildStatusMaps( res = &replicationSnapshot{ statusMap: map[string]*replicationdatapb.StopReplicationStatus{}, primaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{}, - reachableTablets: []*topodatapb.Tablet{}, + reachableTablets: make([]*topodatapb.Tablet, 0, len(tabletMap)), tabletsBackupState: map[string]bool{}, } ) diff --git a/go/vt/vtctl/reparentutil/util.go b/go/vt/vtctl/reparentutil/util.go index dd52a0ec19c..80681e6c963 100644 --- a/go/vt/vtctl/reparentutil/util.go +++ b/go/vt/vtctl/reparentutil/util.go @@ -81,12 +81,7 @@ func ElectNewPrimary( } var ( - // mutex to secure the next two fields from concurrent access - mu sync.Mutex - // tablets that are possible candidates to be the new primary and their positions - validTablets []*topodatapb.Tablet - tabletPositions []*RelayLogPositions - innodbBufferPool []int + mu sync.Mutex errorGroup, groupCtx = errgroup.WithContext(ctx) ) @@ -123,6 +118,9 @@ func ElectNewPrimary( return candidates[0].Alias, nil } + validTablets := make([]*topodatapb.Tablet, 0, len(candidates)) + tabletPositions := make([]*RelayLogPositions, 0, len(candidates)) + for _, tablet := range candidates { tb := tablet errorGroup.Go(func() error { @@ -138,7 +136,6 @@ func ElectNewPrimary( } else { validTablets = append(validTablets, tb) tabletPositions = append(tabletPositions, pos) - innodbBufferPool = append(innodbBufferPool, innodbBufferPoolData[topoproto.TabletAliasString(tb.Alias)]) } } else { fmt.Fprintf(&reasonsToInvalidate, "\n%v has %v replication lag which is more than the tolerable amount", topoproto.TabletAliasString(tablet.Alias), replLag) @@ -157,6 +154,24 @@ func ElectNewPrimary( return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "cannot find a tablet to reparent to%v", reasonsToInvalidate.String()) } + // Use buffer-pool data for tiebreaking only if every valid tablet has it. A missing + // entry (e.g. MariaDB, which doesn't expose Innodb_buffer_pool_pages_data) appended as + // a zero would unfairly outrank a legitimately low value. We gate on validTablets + // rather than candidates so that an ineligible candidate (taking backup, excess lag, + // etc.) doesn't disable tiebreaking for the rest. + var innodbBufferPool []int + if len(innodbBufferPoolData) > 0 { + innodbBufferPool = make([]int, 0, len(validTablets)) + for _, t := range validTablets { + v, ok := innodbBufferPoolData[topoproto.TabletAliasString(t.Alias)] + if !ok { + innodbBufferPool = nil + break + } + innodbBufferPool = append(innodbBufferPool, v) + } + } + // sort preferred tablets for finding the best primary err = sortTabletsForReparent(validTablets, tabletPositions, innodbBufferPool, opts.durability) if err != nil { @@ -379,11 +394,7 @@ func waitForCatchUp( // Wait until the new primary has caught upto that position waitForPosCtx, cancelFunc := context.WithTimeout(ctx, waitTime) defer cancelFunc() - err = tmc.WaitForPosition(waitForPosCtx, newPrimary, pos) - if err != nil { - return err - } - return nil + return tmc.WaitForPosition(waitForPosCtx, newPrimary, pos) } // GetBackupCandidates is used to get a list of healthy tablets for backup From 4be0b0fed9bae3caa2f31c064cdf7226b1130b68 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Thu, 28 May 2026 14:18:36 +0200 Subject: [PATCH 2/3] verifyAllTabletsReachable: treat empty status value same as missing key Signed-off-by: Tim Vaillancourt --- go/vt/vtctl/reparentutil/planned_reparenter.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/go/vt/vtctl/reparentutil/planned_reparenter.go b/go/vt/vtctl/reparentutil/planned_reparenter.go index 805fa7fd422..e9621fcf632 100644 --- a/go/vt/vtctl/reparentutil/planned_reparenter.go +++ b/go/vt/vtctl/reparentutil/planned_reparenter.go @@ -773,9 +773,11 @@ func (pr *PlannedReparenter) verifyAllTabletsReachable(ctx context.Context, tabl } // Some MySQL variants like MariaDB don't expose this status variable. Omit those // tablets from the map so a missing or unparseable value can't be confused with a - // legitimate zero when tiebreaking primary election by buffer-pool warmth. + // legitimate zero when tiebreaking primary election by buffer-pool warmth. We treat + // an empty string the same as a missing key to avoid log spam on variants that + // return the row but with no value. rawVal, ok := statusValues[InnodbBufferPoolsDataVar] - if !ok { + if !ok || rawVal == "" { return nil } val, err := strconv.Atoi(rawVal) From 75ead54b2529ecf8a9678f76cd64761f8767fe95 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Wed, 3 Jun 2026 16:36:43 +0200 Subject: [PATCH 3/3] tests: cover buffer-pool fallback paths (mattlord review) - verifyAllTabletsReachable: assert missing, empty, and non-numeric Innodb_buffer_pool_pages_data values are all omitted from the returned map - ElectNewPrimary: partial buffer-pool data skips tiebreaking and falls through to alias; an ineligible candidate without data does NOT disable tiebreaking for the rest (gate is on validTablets, not candidates) Signed-off-by: Tim Vaillancourt --- .../planned_reparenter_flaky_test.go | 89 ++++++++++- go/vt/vtctl/reparentutil/util_test.go | 150 ++++++++++++++++++ 2 files changed, 237 insertions(+), 2 deletions(-) diff --git a/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go b/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go index 60da442858a..dc0ad46797e 100644 --- a/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go +++ b/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go @@ -4334,6 +4334,90 @@ func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) { }, }, wantErr: "context deadline exceeded", + }, { + // Mixed status-variable shapes that the buffer-pool tiebreaking + // fallback must omit from the returned map: + // - missing key (e.g. MariaDB doesn't expose this status variable) + // - present but empty string (variant returns the row with no value) + // - present but non-numeric (logged as a warning, then omitted) + // - present and numeric (kept) + // Each omitted tablet still participates in PRS — only its + // buffer-pool warmth score is dropped so the sorter's all-or-nothing + // gate falls back to durability ordering instead of treating the + // missing tablet as a legitimate zero. + name: "Missing, empty, and non-numeric values are omitted", + tmc: &testutil.TabletManagerClient{ + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error + }{ + "zone1-0000000100": { + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "1231", + }, + }, + "zone1-0000000200": { + // Missing key: variant doesn't expose the status variable. + Statuses: map[string]string{}, + }, + "zone1-0000000201": { + // Present but empty. + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "", + }, + }, + "zone1-0000000202": { + // Present but non-numeric. + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "not-a-number", + }, + }, + }, + }, + tabletMap: map[string]*topo.TabletInfo{ + "zone1-0000000100": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "zone1-0000000200": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 200, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000201": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 201, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000202": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 202, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + // Only the numeric value survives. The other three are reachable + // (no RPC error) so the function still returns nil — verifyAllTabletsReachable + // is about reachability, not about populating every tablet's score. + wantBufferPoolsData: map[string]int{ + "zone1-0000000100": 1231, + }, }, { name: "Restore tablet skipped before RPC", tmc: &testutil.TabletManagerClient{ @@ -4402,8 +4486,9 @@ func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) { defer ts.Close() pr := &PlannedReparenter{ - ts: ts, - tmc: tt.tmc, + ts: ts, + tmc: tt.tmc, + logger: logutil.NewMemoryLogger(), } if tt.remoteOpTime != 0 { oldTime := topo.RemoteOperationTimeout diff --git a/go/vt/vtctl/reparentutil/util_test.go b/go/vt/vtctl/reparentutil/util_test.go index 0460f271b98..01f8e97fc36 100644 --- a/go/vt/vtctl/reparentutil/util_test.go +++ b/go/vt/vtctl/reparentutil/util_test.go @@ -813,6 +813,156 @@ func TestElectNewPrimary(t *testing.T) { }, errContains: nil, }, + { + // Buffer-pool tiebreaking must be skipped unless every VALID tablet has + // a value. A missing entry (e.g. a MariaDB replica that doesn't expose + // Innodb_buffer_pool_pages_data) used to be treated as a legitimate 0, + // which could unfairly outrank a tablet with a legitimately low buffer + // pool. When one valid tablet is missing data we fall back to the + // next tiebreaker (alias). + // + // Setup: 101 and 102 tied on position; 102 has buffer data, 101 doesn't. + // If tiebreaking applied: 102 would win (higher buffer). The gate + // rejects partial data, falls through to alias, and 101 wins. + name: "buffer pool tiebreaking skipped when a valid tablet lacks data", + tmc: &chooseNewPrimaryTestTMClient{ + replicationStatuses: map[string]*replicationdatapb.Status{ + "zone1-0000000101": { + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", + }, + "zone1-0000000102": { + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", + }, + }, + }, + innodbBufferPoolData: map[string]int{ + "zone1-0000000102": 50, + }, + shardInfo: topo.NewShardInfo("testkeyspace", "-", &topodatapb.Shard{ + PrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + }, nil), + tabletMap: map[string]*topo.TabletInfo{ + "primary": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "replica1": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "replica2": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 102, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + avoidPrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 0, + }, + expected: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + errContains: nil, + }, + { + // The buffer-pool gate is on validTablets (post-RPC filtering), not on + // all candidates. An ineligible candidate (taking a backup, lagging + // beyond the tolerable threshold, etc.) that lacks buffer-pool data + // must NOT disable tiebreaking for the rest. Here 103 is taking a + // backup and has no buffer-pool entry; tiebreaking still applies for + // 101 vs 102 and 102 wins on higher buffer pool. + name: "buffer pool tiebreaking applies when only an ineligible candidate lacks data", + tmc: &chooseNewPrimaryTestTMClient{ + replicationStatuses: map[string]*replicationdatapb.Status{ + "zone1-0000000101": { + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", + }, + "zone1-0000000102": { + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", + }, + "zone1-0000000103": { + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-5", + BackupRunning: true, + }, + }, + }, + innodbBufferPoolData: map[string]int{ + "zone1-0000000101": 50, + "zone1-0000000102": 100, + }, + shardInfo: topo.NewShardInfo("testkeyspace", "-", &topodatapb.Shard{ + PrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + }, nil), + tabletMap: map[string]*topo.TabletInfo{ + "primary": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "replica1": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "replica2": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 102, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "replica3": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 103, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + avoidPrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 0, + }, + expected: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 102, + }, + errContains: nil, + }, { name: "no active primary in shard", tmc: &chooseNewPrimaryTestTMClient{