Skip to content

Commit b4bfa3f

Browse files
KAGA-KOKOIngo Molnar
authored andcommitted
sched/core: Rework the __schedule() preempt argument
PREEMPT_RT needs to hand a special state into __schedule() when a task blocks on a 'sleeping' spin/rwlock. This is required to handle rcu_note_context_switch() correctly without having special casing in the RCU code. From an RCU point of view the blocking on the sleeping spinlock is equivalent to preemption, because the task might be in a read side critical section. schedule_debug() also has a check which would trigger with the !preempt case, but that could be handled differently. To avoid adding another argument and extra checks which cannot be optimized out by the compiler, the following solution has been chosen: - Replace the boolean 'preempt' argument with an unsigned integer 'sched_mode' argument and define constants to hand in: (0 == no preemption, 1 = preemption). - Add two masks to apply on that mode: one for the debug/rcu invocations, and one for the actual scheduling decision. For a non RT kernel these masks are UINT_MAX, i.e. all bits are set, which allows the compiler to optimize the AND operation out, because it is not masking out anything. IOW, it's not different from the boolean. RT enabled kernels will define these masks separately. No functional change. Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 5f220be commit b4bfa3f

File tree

1 file changed

+23
-11
lines changed

1 file changed

+23
-11
lines changed

kernel/sched/core.c

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5819,6 +5819,18 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
58195819

58205820
#endif /* CONFIG_SCHED_CORE */
58215821

5822+
/*
5823+
* Constants for the sched_mode argument of __schedule().
5824+
*
5825+
* The mode argument allows RT enabled kernels to differentiate a
5826+
* preemption from blocking on an 'sleeping' spin/rwlock. Note that
5827+
* SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to
5828+
* optimize the AND operation out and just check for zero.
5829+
*/
5830+
#define SM_NONE 0x0
5831+
#define SM_PREEMPT 0x1
5832+
#define SM_MASK_PREEMPT (~0U)
5833+
58225834
/*
58235835
* __schedule() is the main scheduler function.
58245836
*
@@ -5858,7 +5870,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
58585870
*
58595871
* WARNING: must be called with preemption disabled!
58605872
*/
5861-
static void __sched notrace __schedule(bool preempt)
5873+
static void __sched notrace __schedule(unsigned int sched_mode)
58625874
{
58635875
struct task_struct *prev, *next;
58645876
unsigned long *switch_count;
@@ -5871,13 +5883,13 @@ static void __sched notrace __schedule(bool preempt)
58715883
rq = cpu_rq(cpu);
58725884
prev = rq->curr;
58735885

5874-
schedule_debug(prev, preempt);
5886+
schedule_debug(prev, !!sched_mode);
58755887

58765888
if (sched_feat(HRTICK) || sched_feat(HRTICK_DL))
58775889
hrtick_clear(rq);
58785890

58795891
local_irq_disable();
5880-
rcu_note_context_switch(preempt);
5892+
rcu_note_context_switch(!!sched_mode);
58815893

58825894
/*
58835895
* Make sure that signal_pending_state()->signal_pending() below
@@ -5911,7 +5923,7 @@ static void __sched notrace __schedule(bool preempt)
59115923
* - ptrace_{,un}freeze_traced() can change ->state underneath us.
59125924
*/
59135925
prev_state = READ_ONCE(prev->__state);
5914-
if (!preempt && prev_state) {
5926+
if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) {
59155927
if (signal_pending_state(prev_state, prev)) {
59165928
WRITE_ONCE(prev->__state, TASK_RUNNING);
59175929
} else {
@@ -5977,7 +5989,7 @@ static void __sched notrace __schedule(bool preempt)
59775989
migrate_disable_switch(rq, prev);
59785990
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
59795991

5980-
trace_sched_switch(preempt, prev, next);
5992+
trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next);
59815993

59825994
/* Also unlocks the rq: */
59835995
rq = context_switch(rq, prev, next, &rf);
@@ -5998,7 +6010,7 @@ void __noreturn do_task_dead(void)
59986010
/* Tell freezer to ignore us: */
59996011
current->flags |= PF_NOFREEZE;
60006012

6001-
__schedule(false);
6013+
__schedule(SM_NONE);
60026014
BUG();
60036015

60046016
/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
@@ -6059,7 +6071,7 @@ asmlinkage __visible void __sched schedule(void)
60596071
sched_submit_work(tsk);
60606072
do {
60616073
preempt_disable();
6062-
__schedule(false);
6074+
__schedule(SM_NONE);
60636075
sched_preempt_enable_no_resched();
60646076
} while (need_resched());
60656077
sched_update_worker(tsk);
@@ -6087,7 +6099,7 @@ void __sched schedule_idle(void)
60876099
*/
60886100
WARN_ON_ONCE(current->__state);
60896101
do {
6090-
__schedule(false);
6102+
__schedule(SM_NONE);
60916103
} while (need_resched());
60926104
}
60936105

@@ -6140,7 +6152,7 @@ static void __sched notrace preempt_schedule_common(void)
61406152
*/
61416153
preempt_disable_notrace();
61426154
preempt_latency_start(1);
6143-
__schedule(true);
6155+
__schedule(SM_PREEMPT);
61446156
preempt_latency_stop(1);
61456157
preempt_enable_no_resched_notrace();
61466158

@@ -6219,7 +6231,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
62196231
* an infinite recursion.
62206232
*/
62216233
prev_ctx = exception_enter();
6222-
__schedule(true);
6234+
__schedule(SM_PREEMPT);
62236235
exception_exit(prev_ctx);
62246236

62256237
preempt_latency_stop(1);
@@ -6368,7 +6380,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
63686380
do {
63696381
preempt_disable();
63706382
local_irq_enable();
6371-
__schedule(true);
6383+
__schedule(SM_PREEMPT);
63726384
local_irq_disable();
63736385
sched_preempt_enable_no_resched();
63746386
} while (need_resched());

0 commit comments

Comments
 (0)