Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions go/vt/vtctl/reparentutil/emergency_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,13 @@ func (erp *EmergencyReparenter) reparentReplicas(

handleReplica := func(alias string, ti *topo.TabletInfo) {
defer replWg.Done()
defer func() {
if r := recover(); r != nil {
err := vterrors.Errorf(vtrpc.Code_INTERNAL, "panic in replica handler for %v: %v", alias, r)
erp.logger.Errorf("%v", err)
rec.RecordError(err)
}
}()
erp.logger.Infof("setting new primary on replica %v", alias)

forceStart := false
Expand Down Expand Up @@ -719,9 +726,14 @@ func (erp *EmergencyReparenter) reparentReplicas(
// On primary failure, replCancel() is called immediately below,
// which is safe because cancel functions are idempotent.
go func() {
defer allReplicasDoneCancel()
defer replCancel()
defer func() {
if r := recover(); r != nil {
erp.logger.Errorf("panic while waiting for replicas to finish: %v", r)
}
}()
replWg.Wait()
allReplicasDoneCancel()
replCancel()
}()

primaryErr := handlePrimary(topoproto.TabletAliasString(newPrimaryTablet.Alias), newPrimaryTablet)
Expand Down Expand Up @@ -835,8 +847,8 @@ func (erp *EmergencyReparenter) identifyPrimaryCandidate(
// constraint failures and can make forward progress on being promoted. It will filter out candidates taking backups
// if possible.
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, tabletsBackupState map[string]bool, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
var restrictedValidTablets []*topodatapb.Tablet
var notPreferredValidTablets []*topodatapb.Tablet
restrictedValidTablets := make([]*topodatapb.Tablet, 0, len(validTablets))
notPreferredValidTablets := make([]*topodatapb.Tablet, 0, len(validTablets))
for _, tablet := range validTablets {
tabletAliasStr := topoproto.TabletAliasString(tablet.Alias)
// Remove tablets which have MustNot promote rule since they must never be promoted
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtctl/reparentutil/emergency_reparenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4939,7 +4939,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
validTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet},
tabletsReachable: []*topodatapb.Tablet{primaryTablet, replicaTablet, rdonlyTablet, rdonlyCrossCellTablet},
tabletsTakingBackup: noTabletsTakingBackup,
filteredTablets: nil,
filteredTablets: []*topodatapb.Tablet{},
}, {
name: "filter mixed",
durability: policy.DurabilityCrossCell,
Expand Down
18 changes: 15 additions & 3 deletions go/vt/vtctl/reparentutil/planned_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -771,9 +771,21 @@ func (pr *PlannedReparenter) verifyAllTabletsReachable(ctx context.Context, tabl
if err != nil {
return err
}
// We are ignoring the error in conversion because some MySQL variants might not have this
// status variable like MariaDB.
val, _ := strconv.Atoi(statusValues[InnodbBufferPoolsDataVar])
// Some MySQL variants like MariaDB don't expose this status variable. Omit those
// tablets from the map so a missing or unparseable value can't be confused with a
// legitimate zero when tiebreaking primary election by buffer-pool warmth. We treat
// an empty string the same as a missing key to avoid log spam on variants that
// return the row but with no value.
Comment thread
timvaillancourt marked this conversation as resolved.
rawVal, ok := statusValues[InnodbBufferPoolsDataVar]
if !ok || rawVal == "" {
return nil
}
val, err := strconv.Atoi(rawVal)
if err != nil {
pr.logger.Warningf("could not parse %s=%q for tablet %v as int: %v",
InnodbBufferPoolsDataVar, rawVal, tblStr, err)
return nil
}
mu.Lock()
defer mu.Unlock()
innodbBufferPoolsData[tblStr] = val
Expand Down
89 changes: 87 additions & 2 deletions go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4334,6 +4334,90 @@ func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) {
},
},
wantErr: "context deadline exceeded",
}, {
// Mixed status-variable shapes that the buffer-pool tiebreaking
// fallback must omit from the returned map:
// - missing key (e.g. MariaDB doesn't expose this status variable)
// - present but empty string (variant returns the row with no value)
// - present but non-numeric (logged as a warning, then omitted)
// - present and numeric (kept)
// Each omitted tablet still participates in PRS — only its
// buffer-pool warmth score is dropped so the sorter's all-or-nothing
// gate falls back to durability ordering instead of treating the
// missing tablet as a legitimate zero.
name: "Missing, empty, and non-numeric values are omitted",
tmc: &testutil.TabletManagerClient{
GetGlobalStatusVarsResults: map[string]struct {
Statuses map[string]string
Error error
}{
"zone1-0000000100": {
Statuses: map[string]string{
InnodbBufferPoolsDataVar: "1231",
},
},
"zone1-0000000200": {
// Missing key: variant doesn't expose the status variable.
Statuses: map[string]string{},
},
"zone1-0000000201": {
// Present but empty.
Statuses: map[string]string{
InnodbBufferPoolsDataVar: "",
},
},
"zone1-0000000202": {
// Present but non-numeric.
Statuses: map[string]string{
InnodbBufferPoolsDataVar: "not-a-number",
},
},
},
},
tabletMap: map[string]*topo.TabletInfo{
"zone1-0000000100": {
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 100,
},
Type: topodatapb.TabletType_PRIMARY,
},
},
"zone1-0000000200": {
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 200,
},
Type: topodatapb.TabletType_REPLICA,
},
},
"zone1-0000000201": {
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 201,
},
Type: topodatapb.TabletType_REPLICA,
},
},
"zone1-0000000202": {
Tablet: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Uid: 202,
},
Type: topodatapb.TabletType_REPLICA,
},
},
},
// Only the numeric value survives. The other three are reachable
// (no RPC error) so the function still returns nil — verifyAllTabletsReachable
// is about reachability, not about populating every tablet's score.
wantBufferPoolsData: map[string]int{
"zone1-0000000100": 1231,
},
}, {
name: "Restore tablet skipped before RPC",
tmc: &testutil.TabletManagerClient{
Expand Down Expand Up @@ -4402,8 +4486,9 @@ func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) {
defer ts.Close()

pr := &PlannedReparenter{
ts: ts,
tmc: tt.tmc,
ts: ts,
tmc: tt.tmc,
logger: logutil.NewMemoryLogger(),
}
if tt.remoteOpTime != 0 {
oldTime := topo.RemoteOperationTimeout
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtctl/reparentutil/replication.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ func stopReplicationAndBuildStatusMaps(
res = &replicationSnapshot{
statusMap: map[string]*replicationdatapb.StopReplicationStatus{},
primaryStatusMap: map[string]*replicationdatapb.PrimaryStatus{},
reachableTablets: []*topodatapb.Tablet{},
reachableTablets: make([]*topodatapb.Tablet, 0, len(tabletMap)),
tabletsBackupState: map[string]bool{},
}
)
Expand Down
35 changes: 23 additions & 12 deletions go/vt/vtctl/reparentutil/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,7 @@ func ElectNewPrimary(
}

var (
// mutex to secure the next two fields from concurrent access
mu sync.Mutex
// tablets that are possible candidates to be the new primary and their positions
validTablets []*topodatapb.Tablet
tabletPositions []*RelayLogPositions
innodbBufferPool []int
mu sync.Mutex
errorGroup, groupCtx = errgroup.WithContext(ctx)
)

Expand Down Expand Up @@ -123,6 +118,9 @@ func ElectNewPrimary(
return candidates[0].Alias, nil
}

validTablets := make([]*topodatapb.Tablet, 0, len(candidates))
tabletPositions := make([]*RelayLogPositions, 0, len(candidates))

for _, tablet := range candidates {
tb := tablet
errorGroup.Go(func() error {
Expand All @@ -138,7 +136,6 @@ func ElectNewPrimary(
} else {
validTablets = append(validTablets, tb)
tabletPositions = append(tabletPositions, pos)
innodbBufferPool = append(innodbBufferPool, innodbBufferPoolData[topoproto.TabletAliasString(tb.Alias)])
}
} else {
fmt.Fprintf(&reasonsToInvalidate, "\n%v has %v replication lag which is more than the tolerable amount", topoproto.TabletAliasString(tablet.Alias), replLag)
Comment on lines 136 to 141
Expand All @@ -157,6 +154,24 @@ func ElectNewPrimary(
return nil, vterrors.Errorf(vtrpc.Code_INTERNAL, "cannot find a tablet to reparent to%v", reasonsToInvalidate.String())
}

// Use buffer-pool data for tiebreaking only if every valid tablet has it. A missing
// entry (e.g. MariaDB, which doesn't expose Innodb_buffer_pool_pages_data) appended as
// a zero would unfairly outrank a legitimately low value. We gate on validTablets
// rather than candidates so that an ineligible candidate (taking backup, excess lag,
// etc.) doesn't disable tiebreaking for the rest.
var innodbBufferPool []int
if len(innodbBufferPoolData) > 0 {
innodbBufferPool = make([]int, 0, len(validTablets))
for _, t := range validTablets {
v, ok := innodbBufferPoolData[topoproto.TabletAliasString(t.Alias)]
if !ok {
innodbBufferPool = nil
break
}
innodbBufferPool = append(innodbBufferPool, v)
}
}

// sort preferred tablets for finding the best primary
err = sortTabletsForReparent(validTablets, tabletPositions, innodbBufferPool, opts.durability)
if err != nil {
Expand Down Expand Up @@ -379,11 +394,7 @@ func waitForCatchUp(
// Wait until the new primary has caught upto that position
waitForPosCtx, cancelFunc := context.WithTimeout(ctx, waitTime)
defer cancelFunc()
err = tmc.WaitForPosition(waitForPosCtx, newPrimary, pos)
if err != nil {
return err
}
return nil
return tmc.WaitForPosition(waitForPosCtx, newPrimary, pos)
}

// GetBackupCandidates is used to get a list of healthy tablets for backup
Expand Down
Loading
Loading