@@ -69,32 +69,59 @@ func TestReplicationStalledDiskFull(t *testing.T) {
6969 insertCtx , stopInserts := context .WithCancel (context .Background ())
7070 defer stopInserts ()
7171
72- var inserts atomic.Int64
72+ var (
73+ inserts atomic.Int64
74+ insertErrors atomic.Int64
75+ lastErr atomic.Pointer [string ]
76+ )
7377 go func () {
7478 for insertCtx .Err () == nil {
75- _ = runSQLNoFail (insertCtx , "INSERT INTO filler (blob_col) VALUES (REPEAT('x', 1048576))" ,
79+ err := runSQLNoFail (insertCtx ,
80+ "INSERT INTO filler (blob_col) VALUES (REPEAT('x', 1048576))" ,
7681 primaryTablet , "vt_" + keyspaceName )
82+ if err != nil {
83+ if insertCtx .Err () != nil {
84+ return // shutting down
85+ }
86+ insertErrors .Add (1 )
87+ s := err .Error ()
88+ lastErr .Store (& s )
89+ time .Sleep (50 * time .Millisecond )
90+ continue
91+ }
7792 inserts .Add (1 )
7893 }
7994 }()
8095
8196 // Poll error_log for the InnoDB disk-full codes. Up to 120s — InnoDB's
8297 // dirty-page flush cadence + buffer pool size mean we may need a fair
8398 // bit of cumulative writes before ibdata1 actually fails to extend.
84- require .Eventually (t , func () bool {
99+ deadline := time .Now ().Add (120 * time .Second )
100+ gotDiskFullEntry := false
101+ for time .Now ().Before (deadline ) {
85102 errLog , err := utils .RunSQL (t , `SELECT COUNT(*) AS c FROM performance_schema.error_log
86103 WHERE prio = 'Error' AND subsystem = 'InnoDB'
87104 AND error_code IN ('MY-012814', 'MY-012820')` ,
88105 replicaTablet , "" )
89- if err != nil || len (errLog .Rows ) == 0 {
90- return false
106+ if err == nil && len (errLog .Rows ) > 0 {
107+ if count , _ := errLog .Named ().Row ().ToInt64 ("c" ); count > 0 {
108+ gotDiskFullEntry = true
109+ break
110+ }
91111 }
92- count , _ := errLog .Named ().Row ().ToInt64 ("c" )
93- return count > 0
94- }, 120 * time .Second , 1 * time .Second ,
95- "replica's performance_schema.error_log never recorded MY-012814/MY-012820 (inserts so far: %d)" , inserts .Load ())
96-
97- t .Logf ("InnoDB ENOSPC observed after %d primary inserts" , inserts .Load ())
112+ time .Sleep (2 * time .Second )
113+ t .Logf ("waiting for InnoDB ENOSPC: inserts=%d, insertErrors=%d, free=%s" ,
114+ inserts .Load (), insertErrors .Load (), formatBytes (diskMount ))
115+ }
116+ if ! gotDiskFullEntry {
117+ var lastErrStr string
118+ if p := lastErr .Load (); p != nil {
119+ lastErrStr = * p
120+ }
121+ t .Fatalf ("performance_schema.error_log never recorded MY-012814/MY-012820: inserts=%d insertErrors=%d lastErr=%q" ,
122+ inserts .Load (), insertErrors .Load (), lastErrStr )
123+ }
124+ t .Logf ("InnoDB ENOSPC observed after %d primary inserts (%d errors)" , inserts .Load (), insertErrors .Load ())
98125
99126 // Confirm we hit case 2 (silent InnoDB retry, IO thread unaffected) and
100127 // not case 1 (relay log filesystem fails, IO thread stops). With relay
@@ -127,6 +154,20 @@ func TestReplicationStalledDiskFull(t *testing.T) {
127154 })
128155}
129156
157+ // formatBytes returns a short human-readable free-space figure for the
158+ // loopback mount, used in periodic diagnostic logs while we wait for the
159+ // InnoDB ENOSPC entry.
160+ func formatBytes (m * mount ) string {
161+ if m == nil {
162+ return "?"
163+ }
164+ free , err := m .freeBytes ()
165+ if err != nil {
166+ return "?"
167+ }
168+ return fmt .Sprintf ("%d KiB" , free / 1024 )
169+ }
170+
130171// runSQLNoFail executes a query against a tablet's mysqld without using
131172// require.Nil; safe to call from a goroutine. Errors are returned for the
132173// caller to inspect; the insert loop above swallows them because we only
0 commit comments