Skip to content

Commit 3e8df40

Browse files
vtorc: use *topoprotopb.TabletAlias for tablet alias (vitessio#18389)
Signed-off-by: Tim Vaillancourt <tim@timvaillancourt.com>
1 parent fda348c commit 3e8df40

31 files changed

Lines changed: 594 additions & 434 deletions

go/test/endtoend/vtorc/api/api_test.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func TestAPIEndpoints(t *testing.T) {
8585
})
8686

8787
// Before we disable recoveries, let us wait until VTOrc has fixed all the issues (if any).
88-
_, _ = utils.MakeAPICallRetry(t, vtorc, "/api/replication-analysis", func(_ int, response string) bool {
88+
_, _ = utils.MakeAPICallRetry(t, vtorc, "/api/detection-analysis", func(i int, response string) bool {
8989
return response != "null"
9090
})
9191

@@ -164,41 +164,41 @@ func TestAPIEndpoints(t *testing.T) {
164164
assert.Equal(t, "Global recoveries disabled\n", resp)
165165
})
166166

167-
t.Run("Replication Analysis API", func(t *testing.T) {
167+
t.Run("Detection Analysis API", func(t *testing.T) {
168168
// use vtctldclient to stop replication
169169
_, err := clusterInfo.ClusterInstance.VtctldClientProcess.ExecuteCommandWithOutput("StopReplication", replica.Alias)
170170
require.NoError(t, err)
171171

172172
// We know VTOrc won't fix this since we disabled global recoveries!
173173
// Wait until VTOrc picks up on this issue and verify
174-
// that we see a not null result on the api/replication-analysis page
175-
status, resp := utils.MakeAPICallRetry(t, vtorc, "/api/replication-analysis", func(_ int, response string) bool {
174+
// that we see a not null result on the api/detection-analysis page
175+
status, resp := utils.MakeAPICallRetry(t, vtorc, "/api/detection-analysis", func(_ int, response string) bool {
176176
return response == "null"
177177
})
178178
assert.Equal(t, 200, status, resp)
179179
assert.Contains(t, resp, fmt.Sprintf(`"AnalyzedInstanceAlias": "%s"`, replica.Alias))
180180
assert.Contains(t, resp, `"Analysis": "ReplicationStopped"`)
181181

182182
// Verify that filtering also works in the API as intended
183-
status, resp, err = utils.MakeAPICall(t, vtorc, "/api/replication-analysis?keyspace=ks&shard=0")
183+
status, resp, err = utils.MakeAPICall(t, vtorc, "/api/detection-analysis?keyspace=ks&shard=0")
184184
require.NoError(t, err)
185185
assert.Equal(t, 200, status, resp)
186186
assert.Contains(t, resp, fmt.Sprintf(`"AnalyzedInstanceAlias": "%s"`, replica.Alias))
187187

188188
// Verify that filtering by keyspace also works in the API as intended
189-
status, resp, err = utils.MakeAPICall(t, vtorc, "/api/replication-analysis?keyspace=ks")
189+
status, resp, err = utils.MakeAPICall(t, vtorc, "/api/detection-analysis?keyspace=ks")
190190
require.NoError(t, err)
191191
assert.Equal(t, 200, status, resp)
192192
assert.Contains(t, resp, fmt.Sprintf(`"AnalyzedInstanceAlias": "%s"`, replica.Alias))
193193

194194
// Check that filtering using keyspace and shard works
195-
status, resp, err = utils.MakeAPICall(t, vtorc, "/api/replication-analysis?keyspace=ks&shard=80-")
195+
status, resp, err = utils.MakeAPICall(t, vtorc, "/api/detection-analysis?keyspace=ks&shard=80-")
196196
require.NoError(t, err)
197197
assert.Equal(t, 200, status, resp)
198198
assert.Equal(t, "null", resp)
199199

200200
// Check that filtering using just the shard fails
201-
status, resp, err = utils.MakeAPICall(t, vtorc, "/api/replication-analysis?shard=0")
201+
status, resp, err = utils.MakeAPICall(t, vtorc, "/api/detection-analysis?shard=0")
202202
require.NoError(t, err)
203203
assert.Equal(t, 400, status, resp)
204204
assert.Equal(t, "Filtering by shard without keyspace isn't supported\n", resp)

go/test/endtoend/vtorc/general/vtorc_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -848,7 +848,7 @@ func TestFullStatusConnectionPooling(t *testing.T) {
848848

849849
// Wait until VTOrc notices the failure.
850850
require.Eventually(t, func() bool {
851-
status, resp, err := utils.MakeAPICall(t, vtorc, "/api/replication-analysis")
851+
status, resp, err := utils.MakeAPICall(t, vtorc, "/api/detection-analysis")
852852
return err == nil && status == 200 && strings.Contains(resp, "UnreachablePrimary")
853853
}, 90*time.Second, time.Second, "timed out waiting for UnreachablePrimary analysis")
854854

@@ -860,7 +860,7 @@ func TestFullStatusConnectionPooling(t *testing.T) {
860860

861861
// See that VTOrc eventually reports no errors.
862862
require.Eventually(t, func() bool {
863-
status, resp, err := utils.MakeAPICall(t, vtorc, "/api/replication-analysis")
863+
status, resp, err := utils.MakeAPICall(t, vtorc, "/api/detection-analysis")
864864
return err == nil && status == 200 && strings.TrimSpace(resp) == "null"
865865
}, 90*time.Second, time.Second, "timed out waiting for replication analysis to clear")
866866

@@ -870,7 +870,7 @@ func TestFullStatusConnectionPooling(t *testing.T) {
870870

871871
// Wait until VTOrc notices the failure.
872872
require.Eventually(t, func() bool {
873-
status, resp, err := utils.MakeAPICall(t, vtorc, "/api/replication-analysis")
873+
status, resp, err := utils.MakeAPICall(t, vtorc, "/api/detection-analysis")
874874
return err == nil && status == 200 && strings.Contains(resp, "UnreachablePrimary")
875875
}, 90*time.Second, time.Second, "timed out waiting for UnreachablePrimary analysis")
876876

@@ -882,7 +882,7 @@ func TestFullStatusConnectionPooling(t *testing.T) {
882882

883883
// See that VTOrc eventually reports no errors.
884884
require.Eventually(t, func() bool {
885-
status, resp, err := utils.MakeAPICall(t, vtorc, "/api/replication-analysis")
885+
status, resp, err := utils.MakeAPICall(t, vtorc, "/api/detection-analysis")
886886
return err == nil && status == 200 && strings.TrimSpace(resp) == "null"
887887
}, 90*time.Second, time.Second, "timed out waiting for replication analysis to clear")
888888
}

go/test/endtoend/vtorc/readtopologyinstance/main_test.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import (
2626
"vitess.io/vitess/go/test/endtoend/vtorc/utils"
2727
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
2828
"vitess.io/vitess/go/vt/servenv"
29-
29+
"vitess.io/vitess/go/vt/topo/topoproto"
3030
"vitess.io/vitess/go/vt/vtorc/config"
3131
"vitess.io/vitess/go/vt/vtorc/inst"
3232
"vitess.io/vitess/go/vt/vtorc/logic"
@@ -78,13 +78,19 @@ func TestReadTopologyInstanceBufferable(t *testing.T) {
7878
}
7979
}
8080

81-
primaryInstance, err := inst.ReadTopologyInstanceBufferable(primary.Alias, nil)
81+
primaryAlias, err := topoproto.ParseTabletAlias(primary.Alias)
82+
require.NoError(t, err)
83+
84+
replicaAlias, err := topoproto.ParseTabletAlias(replica.Alias)
85+
require.NoError(t, err)
86+
87+
primaryInstance, err := inst.ReadTopologyInstanceBufferable(primaryAlias, nil)
8288
require.NoError(t, err)
8389
require.NotNil(t, primaryInstance)
8490
assert.Equal(t, utils.Hostname, primaryInstance.Hostname)
8591
assert.Equal(t, primary.MySQLPort, primaryInstance.Port)
8692
assert.Equal(t, topodatapb.TabletType_PRIMARY, primaryInstance.TabletType)
87-
assert.Contains(t, primaryInstance.InstanceAlias, "zone1")
93+
assert.Contains(t, primaryInstance.InstanceAlias.String(), "zone1")
8894
assert.NotEqual(t, 0, primaryInstance.ServerID)
8995
assert.Greater(t, len(primaryInstance.ServerUUID), 10)
9096
assert.Regexp(t, "[58].[704].*", primaryInstance.Version)
@@ -131,13 +137,13 @@ func TestReadTopologyInstanceBufferable(t *testing.T) {
131137
err = logic.EnableRecovery()
132138
require.NoError(t, err)
133139

134-
replicaInstance, err := inst.ReadTopologyInstanceBufferable(replica.Alias, nil)
140+
replicaInstance, err := inst.ReadTopologyInstanceBufferable(replicaAlias, nil)
135141
require.NoError(t, err)
136142
require.NotNil(t, replicaInstance)
137143
assert.Equal(t, utils.Hostname, replicaInstance.Hostname)
138144
assert.Equal(t, replica.MySQLPort, replicaInstance.Port)
139145
assert.Equal(t, topodatapb.TabletType_REPLICA, replicaInstance.TabletType)
140-
assert.Contains(t, replicaInstance.InstanceAlias, "zone1")
146+
assert.Contains(t, replicaInstance.InstanceAlias.String(), "zone1")
141147
assert.NotEqual(t, 0, replicaInstance.ServerID)
142148
assert.Greater(t, len(replicaInstance.ServerUUID), 10)
143149
assert.Regexp(t, "[58].[704].*", replicaInstance.Version)

go/vt/vtorc/inst/analysis.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ type DetectionAnalysisHints struct {
9393

9494
// DetectionAnalysis represents an analysis of a detected problem.
9595
type DetectionAnalysis struct {
96-
AnalyzedInstanceAlias string
97-
AnalyzedInstancePrimaryAlias string
96+
AnalyzedInstanceAlias *topodatapb.TabletAlias
97+
AnalyzedInstancePrimaryAlias *topodatapb.TabletAlias
9898

9999
// TabletType is the tablet's type as seen in the topology.
100100
TabletType topodatapb.TabletType

go/vt/vtorc/inst/analysis_dao.go

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func initializeAnalysisDaoPostConfiguration() {
5252
type clusterAnalysis struct {
5353
hasShardWideAction bool
5454
totalTablets int
55-
primaryAlias string
55+
primaryAlias *topodatapb.TabletAlias
5656

5757
// primaryTimestamp is the most recent primary term start time observed for the shard.
5858
primaryTimestamp time.Time
@@ -332,8 +332,8 @@ func GetDetectionAnalysis(keyspace string, shard string, hints *DetectionAnalysi
332332

333333
a.ShardPrimaryTermTimestamp = m.GetTime("shard_primary_term_timestamp")
334334
a.IsPrimary = m.GetBool("is_primary")
335-
a.AnalyzedInstanceAlias = topoproto.TabletAliasString(tablet.Alias)
336-
a.AnalyzedInstancePrimaryAlias = topoproto.TabletAliasString(primaryTablet.Alias)
335+
a.AnalyzedInstanceAlias = tablet.Alias
336+
a.AnalyzedInstancePrimaryAlias = primaryTablet.Alias
337337
a.AnalyzedInstanceBinlogCoordinates = BinlogCoordinates{
338338
LogFile: m.GetString("binary_log_file"),
339339
LogPos: m.GetUint64("binary_log_pos"),
@@ -569,12 +569,13 @@ func postProcessAnalyses(result []*DetectionAnalysis, clusters map[string]*clust
569569
// auditInstanceAnalysisInChangelog will write down an instance's analysis in the database_instance_analysis_changelog table.
570570
// To not repeat recurring analysis code, the database_instance_last_analysis table is used, so that only changes to
571571
// analysis codes are written.
572-
func auditInstanceAnalysisInChangelog(tabletAlias string, analysisCode AnalysisCode) error {
573-
if lastWrittenAnalysis, found := recentInstantAnalysis.Get(tabletAlias); found {
572+
func auditInstanceAnalysisInChangelog(tabletAlias *topodatapb.TabletAlias, analysisCode AnalysisCode) error {
573+
tabletAliasString := topoproto.TabletAliasString(tabletAlias)
574+
if lastWrittenAnalysis, found := recentInstantAnalysis.Get(tabletAliasString); found {
574575
if lastWrittenAnalysis == analysisCode {
575576
// Surely nothing new.
576577
// And let's expand the timeout
577-
recentInstantAnalysis.Set(tabletAlias, analysisCode, cache.DefaultExpiration)
578+
recentInstantAnalysis.Set(tabletAliasString, analysisCode, cache.DefaultExpiration)
578579
return nil
579580
}
580581
}
@@ -590,7 +591,9 @@ func auditInstanceAnalysisInChangelog(tabletAlias string, analysisCode AnalysisC
590591
alias = ?
591592
AND analysis != ?
592593
`,
593-
string(analysisCode), tabletAlias, string(analysisCode),
594+
string(analysisCode),
595+
tabletAliasString,
596+
string(analysisCode),
594597
)
595598
if err != nil {
596599
log.Error(err.Error())
@@ -619,7 +622,8 @@ func auditInstanceAnalysisInChangelog(tabletAlias string, analysisCode AnalysisC
619622
DATETIME('now'),
620623
?
621624
)`,
622-
tabletAlias, string(analysisCode),
625+
tabletAliasString,
626+
string(analysisCode),
623627
)
624628
if err != nil {
625629
log.Error(err.Error())
@@ -632,7 +636,7 @@ func auditInstanceAnalysisInChangelog(tabletAlias string, analysisCode AnalysisC
632636
}
633637
firstInsertion = rows > 0
634638
}
635-
recentInstantAnalysis.Set(tabletAlias, analysisCode, cache.DefaultExpiration)
639+
recentInstantAnalysis.Set(tabletAliasString, analysisCode, cache.DefaultExpiration)
636640
// If the analysis has changed or if it is the first insertion, we need to make sure we write this change to the database.
637641
if !lastAnalysisChanged && !firstInsertion {
638642
return nil
@@ -648,7 +652,8 @@ func auditInstanceAnalysisInChangelog(tabletAlias string, analysisCode AnalysisC
648652
DATETIME('now'),
649653
?
650654
)`,
651-
tabletAlias, string(analysisCode),
655+
tabletAliasString,
656+
string(analysisCode),
652657
)
653658
if err == nil {
654659
analysisChangeWriteCounter.Add(1)

0 commit comments

Comments
 (0)