From 3691a6aac0ee8777a3ebd30b1dcfbfc3b254af9b Mon Sep 17 00:00:00 2001 Message-Id: <3691a6aac0ee8777a3ebd30b1dcfbfc3b254af9b.1592846147.git.zanussi@kernel.org> In-Reply-To: <07cd0dbc80b976663c80755496a03f288decfe5a.1592846146.git.zanussi@kernel.org> References: <07cd0dbc80b976663c80755496a03f288decfe5a.1592846146.git.zanussi@kernel.org> From: Thomas Gleixner Date: Mon, 6 Jun 2011 12:20:33 +0200 Subject: [PATCH 107/330] sched: Move mmdrop to RCU on RT Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.127-rt55.tar.xz Takes sleeping locks and calls into the memory allocator, so nothing we want to do in task switch and oder atomic contexts. Signed-off-by: Thomas Gleixner --- include/linux/mm_types.h | 4 ++++ include/linux/sched/mm.h | 11 +++++++++++ kernel/fork.c | 13 +++++++++++++ kernel/sched/core.c | 18 ++++++++++++++++-- 4 files changed, 44 insertions(+), 2 deletions(-) --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -489,6 +490,9 @@ bool tlb_flush_batched; #endif struct uprobes_state uprobes_state; +#ifdef CONFIG_PREEMPT_RT_BASE + struct rcu_head delayed_drop; +#endif #ifdef CONFIG_HUGETLB_PAGE atomic_long_t hugetlb_usage; #endif --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -51,6 +51,17 @@ void mmdrop(struct mm_struct *mm); +#ifdef CONFIG_PREEMPT_RT_BASE +extern void __mmdrop_delayed(struct rcu_head *rhp); +static inline void mmdrop_delayed(struct mm_struct *mm) +{ + if (atomic_dec_and_test(&mm->mm_count)) + call_rcu(&mm->delayed_drop, __mmdrop_delayed); +} +#else +# define mmdrop_delayed(mm) mmdrop(mm) +#endif + /* * This has to be called after a get_task_mm()/mmget_not_zero() * followed by taking the mmap_sem for writing before modifying the --- a/kernel/fork.c +++ b/kernel/fork.c @@ -647,6 +647,19 @@ } EXPORT_SYMBOL_GPL(__mmdrop); +#ifdef CONFIG_PREEMPT_RT_BASE +/* + * RCU callback for delayed mm drop. Not strictly rcu, but we don't + * want another facility to make this work. + */ +void __mmdrop_delayed(struct rcu_head *rhp) +{ + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); + + __mmdrop(mm); +} +#endif + static void mmdrop_async_fn(struct work_struct *work) { struct mm_struct *mm; --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2729,9 +2729,13 @@ * provided by mmdrop(), * - a sync_core for SYNC_CORE. */ + /* + * We use mmdrop_delayed() here so we don't have to do the + * full __mmdrop() when we are the last user. + */ if (mm) { membarrier_mm_sync_core_before_usermode(mm); - mmdrop(mm); + mmdrop_delayed(mm); } if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) @@ -5602,6 +5606,8 @@ #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); + /* * Ensure that the idle task is using init_mm right before its CPU goes * offline. @@ -5617,6 +5623,11 @@ switch_mm(mm, &init_mm, current); finish_arch_post_lock_switch(); } + /* + * Defer the cleanup to an alive cpu. On RT we can neither + * call mmdrop() nor mmdrop_delayed() from here. + */ + per_cpu(idle_last_mm, smp_processor_id()) = mm; /* finish_cpu(), as ran on the BP, will clean up the active_mm state */ } @@ -5930,6 +5941,10 @@ update_max_interval(); nohz_balance_exit_idle(rq); hrtick_clear(rq); + if (per_cpu(idle_last_mm, cpu)) { + mmdrop_delayed(per_cpu(idle_last_mm, cpu)); + per_cpu(idle_last_mm, cpu) = NULL; + } return 0; } #endif