Skip to content

Commit df7ee68

Browse files
Cherry-pick c3c9914 with conflicts
1 parent bd45bd6 commit df7ee68

6 files changed

Lines changed: 319 additions & 0 deletions

File tree

go/test/endtoend/vtorc/general/vtorc_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ import (
3131
"vitess.io/vitess/go/test/endtoend/vtorc/utils"
3232
"vitess.io/vitess/go/vt/log"
3333
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
34+
<<<<<<< HEAD
35+
=======
36+
vtutils "vitess.io/vitess/go/vt/utils"
37+
"vitess.io/vitess/go/vt/vtctl/reparentutil/policy"
38+
>>>>>>> c3c9914c38 (vtorc: add `StaleTopoPrimary` analysis and recovery (#19173))
3439
"vitess.io/vitess/go/vt/vtorc/inst"
3540
"vitess.io/vitess/go/vt/vtorc/logic"
3641
)
@@ -401,6 +406,76 @@ func TestRepairAfterTER(t *testing.T) {
401406
utils.CheckReplication(t, clusterInfo, newPrimary, []*cluster.Vttablet{curPrimary}, 15*time.Second)
402407
}
403408

409+
// TestStalePrimary tests that an old primary that remains writable and of tablet type PRIMARY in the topo
410+
// is properly demoted to a read-only replica by VTOrc.
411+
func TestStalePrimary(t *testing.T) {
412+
ctx := t.Context()
413+
414+
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
415+
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 4, 0, []string{"--topo-information-refresh-duration", "1s"}, cluster.VTOrcConfiguration{
416+
PreventCrossCellFailover: true,
417+
}, cluster.DefaultVtorcsByCell, policy.DurabilitySemiSync)
418+
keyspace := &clusterInfo.ClusterInstance.Keyspaces[0]
419+
shard0 := &keyspace.Shards[0]
420+
421+
curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0)
422+
assert.NotNil(t, curPrimary, "should have elected a primary")
423+
utils.CheckPrimaryTablet(t, clusterInfo, curPrimary, true)
424+
425+
var badPrimary, healthyReplica *cluster.Vttablet
426+
for _, tablet := range shard0.Vttablets {
427+
if tablet.Alias == curPrimary.Alias {
428+
continue
429+
}
430+
431+
if badPrimary == nil {
432+
badPrimary = tablet
433+
continue
434+
}
435+
436+
healthyReplica = tablet
437+
}
438+
439+
utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{badPrimary, healthyReplica}, 15*time.Second)
440+
441+
curPrimaryTopo, err := clusterInfo.Ts.GetTablet(ctx, curPrimary.GetAlias())
442+
require.NoError(t, err, "expected to read current primary topo record")
443+
444+
curPrimaryTermStart := protoutil.TimeFromProto(curPrimaryTopo.PrimaryTermStartTime)
445+
require.False(t, curPrimaryTermStart.IsZero(), "expected current primary term start time to be set")
446+
447+
err = utils.RunSQLs(t, []string{"SET GLOBAL read_only = OFF"}, badPrimary, "")
448+
require.NoError(t, err)
449+
require.True(t, utils.WaitForReadOnlyValue(t, badPrimary, 0))
450+
451+
// We set the tablet's type in the topology to PRIMARY. This mimics the situation where during a demotion
452+
// in a hypothetical ERS, the old primary starts running as a replica, but fails before updating the topology
453+
// accordingly.
454+
_, err = clusterInfo.Ts.UpdateTabletFields(ctx, badPrimary.GetAlias(), func(tablet *topodatapb.Tablet) error {
455+
tablet.Type = topodatapb.TabletType_PRIMARY
456+
tablet.PrimaryTermStartTime = protoutil.TimeToProto(curPrimaryTermStart.Add(-1 * time.Minute))
457+
return nil
458+
})
459+
require.NoError(t, err)
460+
461+
// Expect VTOrc to demote the stale primary to a read-only replica.
462+
require.Eventuallyf(t, func() bool {
463+
topoTablet, topoErr := clusterInfo.Ts.GetTablet(ctx, badPrimary.GetAlias())
464+
if topoErr != nil {
465+
t.Logf("stale primary probe: topo error=%v", topoErr)
466+
return false
467+
}
468+
469+
readOnly, readErr := badPrimary.VttabletProcess.GetDBVar("read_only", "")
470+
if readErr != nil {
471+
t.Logf("stale primary probe: alias=%s topo=%v read_only error=%v", badPrimary.Alias, topoTablet.Type, readErr)
472+
return false
473+
}
474+
475+
return topoTablet.Type == topodatapb.TabletType_REPLICA && readOnly == "ON"
476+
}, 30*time.Second, time.Second, "expected demotion to REPLICA with read_only=ON")
477+
}
478+
404479
// TestSemiSync tests that semi-sync is setup correctly by vtorc if it is incorrectly set
405480
func TestSemiSync(t *testing.T) {
406481
// stop any vtorc instance running due to a previous test.

go/vt/external/golib/sqlutils/sqlutils.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ func (rm *RowMap) GetTime(key string) time.Time {
135135
if t, err := time.Parse(DateTimeFormat, rm.GetString(key)); err == nil {
136136
return t
137137
}
138+
139+
if t, err := time.Parse(time.RFC3339Nano, rm.GetString(key)); err == nil {
140+
return t
141+
}
142+
138143
return time.Time{}
139144
}
140145

go/vt/vtorc/inst/analysis.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ const (
5959
PrimarySemiSyncBlocked AnalysisCode = "PrimarySemiSyncBlocked"
6060
ErrantGTIDDetected AnalysisCode = "ErrantGTIDDetected"
6161
PrimaryDiskStalled AnalysisCode = "PrimaryDiskStalled"
62+
63+
// StaleTopoPrimary describes when a tablet still has the type PRIMARY in the topology when a newer primary
64+
// has been elected. VTOrc should demote this primary to a replica.
65+
StaleTopoPrimary AnalysisCode = "StaleTopoPrimary"
6266
)
6367

6468
type StructureAnalysisCode string
@@ -84,6 +88,7 @@ type ReplicationAnalysisHints struct {
8488
AuditAnalysis bool
8589
}
8690

91+
<<<<<<< HEAD
8792
// ReplicationAnalysis notes analysis on replication chain status, per instance
8893
type ReplicationAnalysis struct {
8994
AnalyzedInstanceAlias string
@@ -94,6 +99,24 @@ type ReplicationAnalysis struct {
9499
ClusterDetails ClusterInfo
95100
AnalyzedKeyspace string
96101
AnalyzedShard string
102+
=======
103+
// DetectionAnalysis represents an analysis of a detected problem.
104+
type DetectionAnalysis struct {
105+
AnalyzedInstanceAlias string
106+
AnalyzedInstancePrimaryAlias string
107+
108+
// TabletType is the tablet's type as seen in the topology.
109+
TabletType topodatapb.TabletType
110+
111+
// CurrentTabletType is the type this tablet is currently running as.
112+
CurrentTabletType topodatapb.TabletType
113+
114+
PrimaryTimeStamp time.Time
115+
AnalyzedKeyspace string
116+
AnalyzedShard string
117+
AnalyzedKeyspaceEmergencyReparentDisabled bool
118+
AnalyzedShardEmergencyReparentDisabled bool
119+
>>>>>>> c3c9914c38 (vtorc: add `StaleTopoPrimary` analysis and recovery (#19173))
97120
// ShardPrimaryTermTimestamp is the primary term start time stored in the shard record.
98121
ShardPrimaryTermTimestamp string
99122
AnalyzedInstanceBinlogCoordinates BinlogCoordinates

go/vt/vtorc/inst/analysis_dao.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,22 @@ func initializeAnalysisDaoPostConfiguration() {
5151
}
5252

5353
type clusterAnalysis struct {
54+
<<<<<<< HEAD
5455
hasClusterwideAction bool
5556
totalTablets int
5657
primaryAlias string
5758
durability policy.Durabler
59+
=======
60+
hasShardWideAction bool
61+
totalTablets int
62+
primaryAlias string
63+
64+
// primaryTimestamp is the most recent primary term start time observed for the shard.
65+
primaryTimestamp time.Time
66+
67+
// durability is the shard's current durability policy.
68+
durability policy.Durabler
69+
>>>>>>> c3c9914c38 (vtorc: add `StaleTopoPrimary` analysis and recovery (#19173))
5870
}
5971

6072
// GetReplicationAnalysis will check for replication problems (dead primary; unreachable primary; etc)
@@ -379,6 +391,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
379391
if a.TabletType == topodatapb.TabletType_PRIMARY {
380392
a.IsClusterPrimary = true
381393
clusters[keyspaceShard].primaryAlias = a.AnalyzedInstanceAlias
394+
clusters[keyspaceShard].primaryTimestamp = a.PrimaryTimeStamp
382395
}
383396
durabilityPolicy := m.GetString("durability_policy")
384397
if durabilityPolicy == "" {
@@ -455,7 +468,14 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
455468
} else if a.IsClusterPrimary && a.CurrentTabletType != topodatapb.TabletType_UNKNOWN && a.CurrentTabletType != topodatapb.TabletType_PRIMARY {
456469
a.Analysis = PrimaryCurrentTypeMismatch
457470
a.Description = "Primary tablet's current type is not PRIMARY"
471+
<<<<<<< HEAD
458472
} else if topo.IsReplicaType(a.TabletType) && a.ErrantGTID != "" {
473+
=======
474+
case isStaleTopoPrimary(a, ca):
475+
a.Analysis = StaleTopoPrimary
476+
a.Description = "Primary tablet is stale, older than current primary"
477+
case topo.IsReplicaType(a.TabletType) && a.ErrantGTID != "":
478+
>>>>>>> c3c9914c38 (vtorc: add `StaleTopoPrimary` analysis and recovery (#19173))
459479
a.Analysis = ErrantGTIDDetected
460480
a.Description = "Tablet has errant GTIDs"
461481
} else if topo.IsReplicaType(a.TabletType) && ca.primaryAlias == "" && a.ShardPrimaryTermTimestamp == "" {
@@ -603,6 +623,16 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
603623
return result, err
604624
}
605625

626+
// isStaleTopoPrimary returns true when a tablet has type PRIMARY in the topology and has an older primary term
627+
// start time than the shard's current primary.
628+
func isStaleTopoPrimary(tablet *DetectionAnalysis, cluster *clusterAnalysis) bool {
629+
if tablet.TabletType != topodatapb.TabletType_PRIMARY {
630+
return false
631+
}
632+
633+
return tablet.PrimaryTimeStamp.Before(cluster.primaryTimestamp)
634+
}
635+
606636
// postProcessAnalyses is used to update different analyses based on the information gleaned from looking at all the analyses together instead of individual data.
607637
func postProcessAnalyses(result []*ReplicationAnalysis, clusters map[string]*clusterAnalysis) []*ReplicationAnalysis {
608638
for {

go/vt/vtorc/inst/analysis_dao_test.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,8 +993,113 @@ func TestGetReplicationAnalysisDecision(t *testing.T) {
993993
}
994994
}
995995

996+
<<<<<<< HEAD
996997
// TestGetReplicationAnalysis tests the entire GetReplicationAnalysis. It inserts data into the database and runs the function.
997998
// The database is not faked. This is intended to give more test coverage. This test is more comprehensive but more expensive than TestGetReplicationAnalysisDecision.
999+
=======
1000+
// TestStalePrimary tests that an old primary that remains writable and is of tablet type PRIMARY
1001+
// in the topo is demoted to a read-only replica by VTOrc.
1002+
func TestStalePrimary(t *testing.T) {
1003+
oldDB := db.Db
1004+
defer func() {
1005+
db.Db = oldDB
1006+
}()
1007+
1008+
currentPrimaryTimestamp := time.Now().UTC().Truncate(time.Microsecond)
1009+
stalePrimaryTimestamp := currentPrimaryTimestamp.Add(-1 * time.Minute)
1010+
shardPrimaryTermTimestamp := currentPrimaryTimestamp.Format(sqlutils.DateTimeFormat)
1011+
1012+
// We set up a real primary and replica, and then a stale primary running as REPLICA but with
1013+
// tablet type PRIMARY in the topology.
1014+
info := []*test.InfoForRecoveryAnalysis{
1015+
{
1016+
TabletInfo: &topodatapb.Tablet{
1017+
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101},
1018+
Hostname: "localhost",
1019+
Keyspace: "ks",
1020+
Shard: "0",
1021+
Type: topodatapb.TabletType_PRIMARY,
1022+
MysqlHostname: "localhost",
1023+
MysqlPort: 6708,
1024+
},
1025+
DurabilityPolicy: policy.DurabilitySemiSync,
1026+
LastCheckValid: 1,
1027+
CountReplicas: 1,
1028+
CountValidReplicas: 1,
1029+
CountValidReplicatingReplicas: 1,
1030+
IsPrimary: 1,
1031+
SemiSyncPrimaryEnabled: 1,
1032+
SemiSyncPrimaryStatus: 1,
1033+
SemiSyncPrimaryWaitForReplicaCount: 1,
1034+
SemiSyncPrimaryClients: 1,
1035+
CurrentTabletType: int(topodatapb.TabletType_PRIMARY),
1036+
PrimaryTimestamp: &currentPrimaryTimestamp,
1037+
ShardPrimaryTermTimestamp: shardPrimaryTermTimestamp,
1038+
},
1039+
{
1040+
TabletInfo: &topodatapb.Tablet{
1041+
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100},
1042+
Hostname: "localhost",
1043+
Keyspace: "ks",
1044+
Shard: "0",
1045+
Type: topodatapb.TabletType_REPLICA,
1046+
MysqlHostname: "localhost",
1047+
MysqlPort: 6709,
1048+
},
1049+
DurabilityPolicy: policy.DurabilitySemiSync,
1050+
PrimaryTabletInfo: &topodatapb.Tablet{
1051+
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 101},
1052+
},
1053+
LastCheckValid: 1,
1054+
ReadOnly: 1,
1055+
SemiSyncReplicaEnabled: 1,
1056+
ShardPrimaryTermTimestamp: shardPrimaryTermTimestamp,
1057+
},
1058+
{
1059+
TabletInfo: &topodatapb.Tablet{
1060+
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 102},
1061+
Hostname: "localhost",
1062+
Keyspace: "ks",
1063+
Shard: "0",
1064+
Type: topodatapb.TabletType_PRIMARY,
1065+
MysqlHostname: "localhost",
1066+
MysqlPort: 6710,
1067+
},
1068+
DurabilityPolicy: policy.DurabilitySemiSync,
1069+
LastCheckValid: 1,
1070+
IsPrimary: 1,
1071+
ReadOnly: 0,
1072+
SemiSyncPrimaryEnabled: 1,
1073+
SemiSyncPrimaryStatus: 1,
1074+
SemiSyncPrimaryWaitForReplicaCount: 2,
1075+
SemiSyncPrimaryClients: 1,
1076+
CurrentTabletType: int(topodatapb.TabletType_REPLICA),
1077+
PrimaryTimestamp: &stalePrimaryTimestamp,
1078+
},
1079+
}
1080+
1081+
var rowMaps []sqlutils.RowMap
1082+
for _, analysis := range info {
1083+
analysis.SetValuesFromTabletInfo()
1084+
rowMaps = append(rowMaps, analysis.ConvertToRowMap())
1085+
}
1086+
db.Db = test.NewTestDB([][]sqlutils.RowMap{rowMaps, rowMaps})
1087+
1088+
// Each sampling should yield the placeholder analysis that represents the future recovery behavior once
1089+
// the demotion logic is implemented, which makes this test fail until the actual fix is in place.
1090+
for range 2 {
1091+
got, err := GetDetectionAnalysis("", "", &DetectionAnalysisHints{})
1092+
require.NoError(t, err, "expected detection analysis to run without error")
1093+
require.Len(t, got, 1, "expected exactly one analysis entry for the shard")
1094+
require.Equal(t, AnalysisCode("StaleTopoPrimary"), got[0].Analysis, "expected stale primary analysis")
1095+
require.Equal(t, "ks", got[0].AnalyzedKeyspace, "expected analysis to target keyspace ks")
1096+
require.Equal(t, "0", got[0].AnalyzedShard, "expected analysis to target shard 0")
1097+
}
1098+
}
1099+
1100+
// TestGetDetectionAnalysis tests the entire GetDetectionAnalysis. It inserts data into the database and runs the function.
1101+
// The database is not faked. This is intended to give more test coverage. This test is more comprehensive but more expensive than TestGetDetectionAnalysisDecision.
1102+
>>>>>>> c3c9914c38 (vtorc: add `StaleTopoPrimary` analysis and recovery (#19173))
9981103
// This test is somewhere between a unit test, and an end-to-end test. It is specifically useful for testing situations which are hard to come by in end-to-end test, but require
9991104
// real-world data to test specifically.
10001105
func TestGetReplicationAnalysis(t *testing.T) {

0 commit comments

Comments
 (0)