Skip to content

Commit 4b74648

Browse files
debug logs
Signed-off-by: Tim Vaillancourt <tim@timvaillancourt.com>
1 parent 25d6515 commit 4b74648

1 file changed

Lines changed: 52 additions & 11 deletions

File tree

go/test/endtoend/vtorc/replicationstalleddiskfull/replication_stalled_test.go

Lines changed: 52 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,32 +69,59 @@ func TestReplicationStalledDiskFull(t *testing.T) {
6969
insertCtx, stopInserts := context.WithCancel(context.Background())
7070
defer stopInserts()
7171

72-
var inserts atomic.Int64
72+
var (
73+
inserts atomic.Int64
74+
insertErrors atomic.Int64
75+
lastErr atomic.Pointer[string]
76+
)
7377
go func() {
7478
for insertCtx.Err() == nil {
75-
_ = runSQLNoFail(insertCtx, "INSERT INTO filler (blob_col) VALUES (REPEAT('x', 1048576))",
79+
err := runSQLNoFail(insertCtx,
80+
"INSERT INTO filler (blob_col) VALUES (REPEAT('x', 1048576))",
7681
primaryTablet, "vt_"+keyspaceName)
82+
if err != nil {
83+
if insertCtx.Err() != nil {
84+
return // shutting down
85+
}
86+
insertErrors.Add(1)
87+
s := err.Error()
88+
lastErr.Store(&s)
89+
time.Sleep(50 * time.Millisecond)
90+
continue
91+
}
7792
inserts.Add(1)
7893
}
7994
}()
8095

8196
// Poll error_log for the InnoDB disk-full codes. Up to 120s — InnoDB's
8297
// dirty-page flush cadence + buffer pool size mean we may need a fair
8398
// bit of cumulative writes before ibdata1 actually fails to extend.
84-
require.Eventually(t, func() bool {
99+
deadline := time.Now().Add(120 * time.Second)
100+
gotDiskFullEntry := false
101+
for time.Now().Before(deadline) {
85102
errLog, err := utils.RunSQL(t, `SELECT COUNT(*) AS c FROM performance_schema.error_log
86103
WHERE prio = 'Error' AND subsystem = 'InnoDB'
87104
AND error_code IN ('MY-012814', 'MY-012820')`,
88105
replicaTablet, "")
89-
if err != nil || len(errLog.Rows) == 0 {
90-
return false
106+
if err == nil && len(errLog.Rows) > 0 {
107+
if count, _ := errLog.Named().Row().ToInt64("c"); count > 0 {
108+
gotDiskFullEntry = true
109+
break
110+
}
91111
}
92-
count, _ := errLog.Named().Row().ToInt64("c")
93-
return count > 0
94-
}, 120*time.Second, 1*time.Second,
95-
"replica's performance_schema.error_log never recorded MY-012814/MY-012820 (inserts so far: %d)", inserts.Load())
96-
97-
t.Logf("InnoDB ENOSPC observed after %d primary inserts", inserts.Load())
112+
time.Sleep(2 * time.Second)
113+
t.Logf("waiting for InnoDB ENOSPC: inserts=%d, insertErrors=%d, free=%s",
114+
inserts.Load(), insertErrors.Load(), formatBytes(diskMount))
115+
}
116+
if !gotDiskFullEntry {
117+
var lastErrStr string
118+
if p := lastErr.Load(); p != nil {
119+
lastErrStr = *p
120+
}
121+
t.Fatalf("performance_schema.error_log never recorded MY-012814/MY-012820: inserts=%d insertErrors=%d lastErr=%q",
122+
inserts.Load(), insertErrors.Load(), lastErrStr)
123+
}
124+
t.Logf("InnoDB ENOSPC observed after %d primary inserts (%d errors)", inserts.Load(), insertErrors.Load())
98125

99126
// Confirm we hit case 2 (silent InnoDB retry, IO thread unaffected) and
100127
// not case 1 (relay log filesystem fails, IO thread stops). With relay
@@ -127,6 +154,20 @@ func TestReplicationStalledDiskFull(t *testing.T) {
127154
})
128155
}
129156

157+
// formatBytes returns a short human-readable free-space figure for the
158+
// loopback mount, used in periodic diagnostic logs while we wait for the
159+
// InnoDB ENOSPC entry.
160+
func formatBytes(m *mount) string {
161+
if m == nil {
162+
return "?"
163+
}
164+
free, err := m.freeBytes()
165+
if err != nil {
166+
return "?"
167+
}
168+
return fmt.Sprintf("%d KiB", free/1024)
169+
}
170+
130171
// runSQLNoFail executes a query against a tablet's mysqld without using
131172
// require.Nil; safe to call from a goroutine. Errors are returned for the
132173
// caller to inspect; the insert loop above swallows them because we only

0 commit comments

Comments
 (0)