Skip to content

Commit 12818d2

Browse files
Brian Fosterdchinner
authored andcommitted
xfs: rework log recovery to submit buffers on LSN boundaries
The fix to log recovery to update the metadata LSN in recovered buffers introduces the requirement that a buffer is submitted only once per current LSN. Log recovery currently submits buffers on transaction boundaries. This is not sufficient as the abstraction between log records and transactions allows for various scenarios where multiple transactions can share the same current LSN. If independent transactions share an LSN and both modify the same buffer, log recovery can incorrectly skip updates and leave the filesystem in an inconsisent state. In preparation for proper metadata LSN updates during log recovery, update log recovery to submit buffers for write on LSN change boundaries rather than transaction boundaries. Explicitly track the current LSN in a new struct xlog field to handle the various corner cases of when the current LSN may or may not change. Signed-off-by: Brian Foster <[email protected]> Reviewed-by: Dave Chinner <[email protected]> Signed-off-by: Dave Chinner <[email protected]>
1 parent ddeb14f commit 12818d2

File tree

2 files changed

+66
-19
lines changed

2 files changed

+66
-19
lines changed

fs/xfs/xfs_log_priv.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,8 @@ struct xlog {
413413
/* log record crc error injection factor */
414414
uint32_t l_badcrc_factor;
415415
#endif
416-
416+
/* log recovery lsn tracking (for buffer submission */
417+
xfs_lsn_t l_recovery_lsn;
417418
};
418419

419420
#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \

fs/xfs/xfs_log_recover.c

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3846,14 +3846,13 @@ STATIC int
38463846
xlog_recover_commit_trans(
38473847
struct xlog *log,
38483848
struct xlog_recover *trans,
3849-
int pass)
3849+
int pass,
3850+
struct list_head *buffer_list)
38503851
{
38513852
int error = 0;
3852-
int error2;
38533853
int items_queued = 0;
38543854
struct xlog_recover_item *item;
38553855
struct xlog_recover_item *next;
3856-
LIST_HEAD (buffer_list);
38573856
LIST_HEAD (ra_list);
38583857
LIST_HEAD (done_list);
38593858

@@ -3876,7 +3875,7 @@ xlog_recover_commit_trans(
38763875
items_queued++;
38773876
if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
38783877
error = xlog_recover_items_pass2(log, trans,
3879-
&buffer_list, &ra_list);
3878+
buffer_list, &ra_list);
38803879
list_splice_tail_init(&ra_list, &done_list);
38813880
items_queued = 0;
38823881
}
@@ -3894,15 +3893,14 @@ xlog_recover_commit_trans(
38943893
if (!list_empty(&ra_list)) {
38953894
if (!error)
38963895
error = xlog_recover_items_pass2(log, trans,
3897-
&buffer_list, &ra_list);
3896+
buffer_list, &ra_list);
38983897
list_splice_tail_init(&ra_list, &done_list);
38993898
}
39003899

39013900
if (!list_empty(&done_list))
39023901
list_splice_init(&done_list, &trans->r_itemq);
39033902

3904-
error2 = xfs_buf_delwri_submit(&buffer_list);
3905-
return error ? error : error2;
3903+
return error;
39063904
}
39073905

39083906
STATIC void
@@ -4085,7 +4083,8 @@ xlog_recovery_process_trans(
40854083
char *dp,
40864084
unsigned int len,
40874085
unsigned int flags,
4088-
int pass)
4086+
int pass,
4087+
struct list_head *buffer_list)
40894088
{
40904089
int error = 0;
40914090
bool freeit = false;
@@ -4109,7 +4108,8 @@ xlog_recovery_process_trans(
41094108
error = xlog_recover_add_to_cont_trans(log, trans, dp, len);
41104109
break;
41114110
case XLOG_COMMIT_TRANS:
4112-
error = xlog_recover_commit_trans(log, trans, pass);
4111+
error = xlog_recover_commit_trans(log, trans, pass,
4112+
buffer_list);
41134113
/* success or fail, we are now done with this transaction. */
41144114
freeit = true;
41154115
break;
@@ -4191,10 +4191,12 @@ xlog_recover_process_ophdr(
41914191
struct xlog_op_header *ohead,
41924192
char *dp,
41934193
char *end,
4194-
int pass)
4194+
int pass,
4195+
struct list_head *buffer_list)
41954196
{
41964197
struct xlog_recover *trans;
41974198
unsigned int len;
4199+
int error;
41984200

41994201
/* Do we understand who wrote this op? */
42004202
if (ohead->oh_clientid != XFS_TRANSACTION &&
@@ -4221,8 +4223,39 @@ xlog_recover_process_ophdr(
42214223
return 0;
42224224
}
42234225

4226+
/*
4227+
* The recovered buffer queue is drained only once we know that all
4228+
* recovery items for the current LSN have been processed. This is
4229+
* required because:
4230+
*
4231+
* - Buffer write submission updates the metadata LSN of the buffer.
4232+
* - Log recovery skips items with a metadata LSN >= the current LSN of
4233+
* the recovery item.
4234+
* - Separate recovery items against the same metadata buffer can share
4235+
* a current LSN. I.e., consider that the LSN of a recovery item is
4236+
* defined as the starting LSN of the first record in which its
4237+
* transaction appears, that a record can hold multiple transactions,
4238+
* and/or that a transaction can span multiple records.
4239+
*
4240+
* In other words, we are allowed to submit a buffer from log recovery
4241+
* once per current LSN. Otherwise, we may incorrectly skip recovery
4242+
* items and cause corruption.
4243+
*
4244+
* We don't know up front whether buffers are updated multiple times per
4245+
* LSN. Therefore, track the current LSN of each commit log record as it
4246+
* is processed and drain the queue when it changes. Use commit records
4247+
* because they are ordered correctly by the logging code.
4248+
*/
4249+
if (log->l_recovery_lsn != trans->r_lsn &&
4250+
ohead->oh_flags & XLOG_COMMIT_TRANS) {
4251+
error = xfs_buf_delwri_submit(buffer_list);
4252+
if (error)
4253+
return error;
4254+
log->l_recovery_lsn = trans->r_lsn;
4255+
}
4256+
42244257
return xlog_recovery_process_trans(log, trans, dp, len,
4225-
ohead->oh_flags, pass);
4258+
ohead->oh_flags, pass, buffer_list);
42264259
}
42274260

42284261
/*
@@ -4240,7 +4273,8 @@ xlog_recover_process_data(
42404273
struct hlist_head rhash[],
42414274
struct xlog_rec_header *rhead,
42424275
char *dp,
4243-
int pass)
4276+
int pass,
4277+
struct list_head *buffer_list)
42444278
{
42454279
struct xlog_op_header *ohead;
42464280
char *end;
@@ -4262,7 +4296,7 @@ xlog_recover_process_data(
42624296

42634297
/* errors will abort recovery */
42644298
error = xlog_recover_process_ophdr(log, rhash, rhead, ohead,
4265-
dp, end, pass);
4299+
dp, end, pass, buffer_list);
42664300
if (error)
42674301
return error;
42684302

@@ -4685,7 +4719,8 @@ xlog_recover_process(
46854719
struct hlist_head rhash[],
46864720
struct xlog_rec_header *rhead,
46874721
char *dp,
4688-
int pass)
4722+
int pass,
4723+
struct list_head *buffer_list)
46894724
{
46904725
int error;
46914726
__le32 crc;
@@ -4732,7 +4767,8 @@ xlog_recover_process(
47324767
if (error)
47334768
return error;
47344769

4735-
return xlog_recover_process_data(log, rhash, rhead, dp, pass);
4770+
return xlog_recover_process_data(log, rhash, rhead, dp, pass,
4771+
buffer_list);
47364772
}
47374773

47384774
STATIC int
@@ -4793,9 +4829,11 @@ xlog_do_recovery_pass(
47934829
char *offset;
47944830
xfs_buf_t *hbp, *dbp;
47954831
int error = 0, h_size, h_len;
4832+
int error2 = 0;
47964833
int bblks, split_bblks;
47974834
int hblks, split_hblks, wrapped_hblks;
47984835
struct hlist_head rhash[XLOG_RHASH_SIZE];
4836+
LIST_HEAD (buffer_list);
47994837

48004838
ASSERT(head_blk != tail_blk);
48014839
rhead_blk = 0;
@@ -4981,7 +5019,7 @@ xlog_do_recovery_pass(
49815019
}
49825020

49835021
error = xlog_recover_process(log, rhash, rhead, offset,
4984-
pass);
5022+
pass, &buffer_list);
49855023
if (error)
49865024
goto bread_err2;
49875025

@@ -5012,7 +5050,8 @@ xlog_do_recovery_pass(
50125050
if (error)
50135051
goto bread_err2;
50145052

5015-
error = xlog_recover_process(log, rhash, rhead, offset, pass);
5053+
error = xlog_recover_process(log, rhash, rhead, offset, pass,
5054+
&buffer_list);
50165055
if (error)
50175056
goto bread_err2;
50185057

@@ -5025,10 +5064,17 @@ xlog_do_recovery_pass(
50255064
bread_err1:
50265065
xlog_put_bp(hbp);
50275066

5067+
/*
5068+
* Submit buffers that have been added from the last record processed,
5069+
* regardless of error status.
5070+
*/
5071+
if (!list_empty(&buffer_list))
5072+
error2 = xfs_buf_delwri_submit(&buffer_list);
5073+
50285074
if (error && first_bad)
50295075
*first_bad = rhead_blk;
50305076

5031-
return error;
5077+
return error ? error : error2;
50325078
}
50335079

50345080
/*

0 commit comments

Comments
 (0)