Skip to content

Commit 46f6301

Browse files
Junxiao Bigregkh
authored andcommitted
Revert "ocfs2: mount shared volume without ha stack"
commit c80af0c upstream. This reverts commit 912f655. This commit introduced a regression that can cause mount hung. The changes in __ocfs2_find_empty_slot causes that any node with none-zero node number can grab the slot that was already taken by node 0, so node 1 will access the same journal with node 0, when it try to grab journal cluster lock, it will hung because it was already acquired by node 0. It's very easy to reproduce this, in one cluster, mount node 0 first, then node 1, you will see the following call trace from node 1. [13148.735424] INFO: task mount.ocfs2:53045 blocked for more than 122 seconds. [13148.739691] Not tainted 5.15.0-2148.0.4.el8uek.mountracev2.x86_64 #2 [13148.742560] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [13148.745846] task:mount.ocfs2 state:D stack: 0 pid:53045 ppid: 53044 flags:0x00004000 [13148.749354] Call Trace: [13148.750718] <TASK> [13148.752019] ? usleep_range+0x90/0x89 [13148.753882] __schedule+0x210/0x567 [13148.755684] schedule+0x44/0xa8 [13148.757270] schedule_timeout+0x106/0x13c [13148.759273] ? __prepare_to_swait+0x53/0x78 [13148.761218] __wait_for_common+0xae/0x163 [13148.763144] __ocfs2_cluster_lock.constprop.0+0x1d6/0x870 [ocfs2] [13148.765780] ? ocfs2_inode_lock_full_nested+0x18d/0x398 [ocfs2] [13148.768312] ocfs2_inode_lock_full_nested+0x18d/0x398 [ocfs2] [13148.770968] ocfs2_journal_init+0x91/0x340 [ocfs2] [13148.773202] ocfs2_check_volume+0x39/0x461 [ocfs2] [13148.775401] ? iput+0x69/0xba [13148.777047] ocfs2_mount_volume.isra.0.cold+0x40/0x1f5 [ocfs2] [13148.779646] ocfs2_fill_super+0x54b/0x853 [ocfs2] [13148.781756] mount_bdev+0x190/0x1b7 [13148.783443] ? ocfs2_remount+0x440/0x440 [ocfs2] [13148.785634] legacy_get_tree+0x27/0x48 [13148.787466] vfs_get_tree+0x25/0xd0 [13148.789270] do_new_mount+0x18c/0x2d9 [13148.791046] __x64_sys_mount+0x10e/0x142 [13148.792911] do_syscall_64+0x3b/0x89 [13148.794667] entry_SYSCALL_64_after_hwframe+0x170/0x0 [13148.797051] RIP: 0033:0x7f2309f6e26e [13148.798784] RSP: 002b:00007ffdcee7d408 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 [13148.801974] RAX: ffffffffffffffda RBX: 00007ffdcee7d4a0 RCX: 00007f2309f6e26e [13148.804815] RDX: 0000559aa762a8ae RSI: 0000559aa939d340 RDI: 0000559aa93a22b0 [13148.807719] RBP: 00007ffdcee7d5b0 R08: 0000559aa93a2290 R09: 00007f230a0b4820 [13148.810659] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffdcee7d420 [13148.813609] R13: 0000000000000000 R14: 0000559aa939f000 R15: 0000000000000000 [13148.816564] </TASK> To fix it, we can just fix __ocfs2_find_empty_slot. But original commit introduced the feature to mount ocfs2 locally even it is cluster based, that is a very dangerous, it can easily cause serious data corruption, there is no way to stop other nodes mounting the fs and corrupting it. Setup ha or other cluster-aware stack is just the cost that we have to take for avoiding corruption, otherwise we have to do it in kernel. Link: https://lkml.kernel.org/r/[email protected] Fixes: 912f655("ocfs2: mount shared volume without ha stack") Signed-off-by: Junxiao Bi <[email protected]> Acked-by: Joseph Qi <[email protected]> Cc: Mark Fasheh <[email protected]> Cc: Joel Becker <[email protected]> Cc: Changwei Ge <[email protected]> Cc: Gang He <[email protected]> Cc: Jun Piao <[email protected]> Cc: <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent f32d561 commit 46f6301

File tree

3 files changed

+20
-51
lines changed

3 files changed

+20
-51
lines changed

fs/ocfs2/ocfs2.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,6 @@ enum ocfs2_mount_options
277277
OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
278278
OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
279279
OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
280-
OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
281280
};
282281

283282
#define OCFS2_OSB_SOFT_RO 0x0001
@@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
673672

674673
static inline int ocfs2_mount_local(struct ocfs2_super *osb)
675674
{
676-
return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
677-
|| (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
675+
return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
678676
}
679677

680678
static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)

fs/ocfs2/slot_map.c

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si,
252252
int i, ret = -ENOSPC;
253253

254254
if ((preferred >= 0) && (preferred < si->si_num_slots)) {
255-
if (!si->si_slots[preferred].sl_valid ||
256-
!si->si_slots[preferred].sl_node_num) {
255+
if (!si->si_slots[preferred].sl_valid) {
257256
ret = preferred;
258257
goto out;
259258
}
260259
}
261260

262261
for(i = 0; i < si->si_num_slots; i++) {
263-
if (!si->si_slots[i].sl_valid ||
264-
!si->si_slots[i].sl_node_num) {
262+
if (!si->si_slots[i].sl_valid) {
265263
ret = i;
266264
break;
267265
}
@@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
456454
spin_lock(&osb->osb_lock);
457455
ocfs2_update_slot_info(si);
458456

459-
if (ocfs2_mount_local(osb))
460-
/* use slot 0 directly in local mode */
461-
slot = 0;
462-
else {
463-
/* search for ourselves first and take the slot if it already
464-
* exists. Perhaps we need to mark this in a variable for our
465-
* own journal recovery? Possibly not, though we certainly
466-
* need to warn to the user */
467-
slot = __ocfs2_node_num_to_slot(si, osb->node_num);
457+
/* search for ourselves first and take the slot if it already
458+
* exists. Perhaps we need to mark this in a variable for our
459+
* own journal recovery? Possibly not, though we certainly
460+
* need to warn to the user */
461+
slot = __ocfs2_node_num_to_slot(si, osb->node_num);
462+
if (slot < 0) {
463+
/* if no slot yet, then just take 1st available
464+
* one. */
465+
slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
468466
if (slot < 0) {
469-
/* if no slot yet, then just take 1st available
470-
* one. */
471-
slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
472-
if (slot < 0) {
473-
spin_unlock(&osb->osb_lock);
474-
mlog(ML_ERROR, "no free slots available!\n");
475-
status = -EINVAL;
476-
goto bail;
477-
}
478-
} else
479-
printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
480-
"already allocated to this node!\n",
481-
slot, osb->dev_str);
482-
}
467+
spin_unlock(&osb->osb_lock);
468+
mlog(ML_ERROR, "no free slots available!\n");
469+
status = -EINVAL;
470+
goto bail;
471+
}
472+
} else
473+
printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
474+
"allocated to this node!\n", slot, osb->dev_str);
483475

484476
ocfs2_set_slot(si, slot, osb->node_num);
485477
osb->slot_num = slot;

fs/ocfs2/super.c

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,6 @@ enum {
173173
Opt_dir_resv_level,
174174
Opt_journal_async_commit,
175175
Opt_err_cont,
176-
Opt_nocluster,
177176
Opt_err,
178177
};
179178

@@ -207,7 +206,6 @@ static const match_table_t tokens = {
207206
{Opt_dir_resv_level, "dir_resv_level=%u"},
208207
{Opt_journal_async_commit, "journal_async_commit"},
209208
{Opt_err_cont, "errors=continue"},
210-
{Opt_nocluster, "nocluster"},
211209
{Opt_err, NULL}
212210
};
213211

@@ -619,13 +617,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
619617
goto out;
620618
}
621619

622-
tmp = OCFS2_MOUNT_NOCLUSTER;
623-
if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
624-
ret = -EINVAL;
625-
mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
626-
goto out;
627-
}
628-
629620
tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
630621
OCFS2_MOUNT_HB_NONE;
631622
if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
@@ -866,7 +857,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb,
866857
}
867858

868859
if (ocfs2_userspace_stack(osb) &&
869-
!(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
870860
strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
871861
OCFS2_STACK_LABEL_LEN)) {
872862
mlog(ML_ERROR,
@@ -1145,11 +1135,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
11451135
osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
11461136
"ordered");
11471137

1148-
if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
1149-
!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
1150-
printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
1151-
"without cluster aware mode.\n", osb->dev_str);
1152-
11531138
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
11541139
wake_up(&osb->osb_mount_event);
11551140

@@ -1456,9 +1441,6 @@ static int ocfs2_parse_options(struct super_block *sb,
14561441
case Opt_journal_async_commit:
14571442
mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
14581443
break;
1459-
case Opt_nocluster:
1460-
mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
1461-
break;
14621444
default:
14631445
mlog(ML_ERROR,
14641446
"Unrecognized mount option \"%s\" "
@@ -1570,9 +1552,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
15701552
if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
15711553
seq_printf(s, ",journal_async_commit");
15721554

1573-
if (opts & OCFS2_MOUNT_NOCLUSTER)
1574-
seq_printf(s, ",nocluster");
1575-
15761555
return 0;
15771556
}
15781557

0 commit comments

Comments
 (0)