[rt] Update to 4.1.3-rt3 and re-enable
svn path=/dists/trunk/linux/; revision=22861
This commit is contained in:
parent
42b6cc2f10
commit
88728c5842
|
@ -7,6 +7,7 @@ linux (4.1.3-1~exp1) UNRELEASED; urgency=medium
|
|||
* [armel/kirkwood] ubi: Change UBIFS_FS and MTD_UBI from built-in to
|
||||
modules, to reduce kernel image size (fixes FTBFS)
|
||||
* debian/rules.real: Use dpkg-parsechangelog -S
|
||||
* [rt] Update to 4.1.3-rt3 and re-enable
|
||||
|
||||
[ Ian Campbell ]
|
||||
* [armhf] Set CONFIG_ARM_TEGRA_CPUFREQ as builtin.
|
||||
|
|
|
@ -33,7 +33,7 @@ featuresets:
|
|||
rt
|
||||
|
||||
[featureset-rt_base]
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
[description]
|
||||
part-long-up: This kernel is not suitable for SMP (multi-processor,
|
||||
|
|
27
debian/patches/features/all/rt/0001-arm64-Mark-PMU-interrupt-IRQF_NO_THREAD.patch
vendored
Normal file
27
debian/patches/features/all/rt/0001-arm64-Mark-PMU-interrupt-IRQF_NO_THREAD.patch
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
From: Anders Roxell <anders.roxell@linaro.org>
|
||||
Date: Mon, 27 Apr 2015 22:53:08 +0200
|
||||
Subject: arm64: Mark PMU interrupt IRQF_NO_THREAD
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Mark the PMU interrupts as non-threadable, as is the case with
|
||||
arch/arm: d9c3365 ARM: 7813/1: Mark pmu interupt IRQF_NO_THREAD
|
||||
|
||||
[ upstream commit: 96045ed486b0 ]
|
||||
|
||||
Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
|
||||
---
|
||||
arch/arm64/kernel/perf_event.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/arm64/kernel/perf_event.c
|
||||
+++ b/arch/arm64/kernel/perf_event.c
|
||||
@@ -488,7 +488,7 @@ armpmu_reserve_hardware(struct arm_pmu *
|
||||
}
|
||||
|
||||
err = request_irq(irq, armpmu->handle_irq,
|
||||
- IRQF_NOBALANCING,
|
||||
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
|
||||
"arm-pmu", armpmu);
|
||||
if (err) {
|
||||
pr_err("unable to request IRQ%d for ARM PMU counters\n",
|
167
debian/patches/features/all/rt/0001-sched-Implement-lockless-wake-queues.patch
vendored
Normal file
167
debian/patches/features/all/rt/0001-sched-Implement-lockless-wake-queues.patch
vendored
Normal file
|
@ -0,0 +1,167 @@
|
|||
From: Peter Zijlstra <peterz@infradead.org>
|
||||
Date: Fri, 1 May 2015 08:27:50 -0700
|
||||
Subject: sched: Implement lockless wake-queues
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This is useful for locking primitives that can effect multiple
|
||||
wakeups per operation and want to avoid lock internal lock contention
|
||||
by delaying the wakeups until we've released the lock internal locks.
|
||||
|
||||
Alternatively it can be used to avoid issuing multiple wakeups, and
|
||||
thus save a few cycles, in packet processing. Queue all target tasks
|
||||
and wakeup once you've processed all packets. That way you avoid
|
||||
waking the target task multiple times if there were multiple packets
|
||||
for the same task.
|
||||
|
||||
Properties of a wake_q are:
|
||||
- Lockless, as queue head must reside on the stack.
|
||||
- Being a queue, maintains wakeup order passed by the callers. This can
|
||||
be important for otherwise, in scenarios where highly contended locks
|
||||
could affect any reliance on lock fairness.
|
||||
- A queued task cannot be added again until it is woken up.
|
||||
|
||||
This patch adds the needed infrastructure into the scheduler code
|
||||
and uses the new wake_list to delay the futex wakeups until
|
||||
after we've released the hash bucket locks.
|
||||
|
||||
[upstream commit 7675104990ed255b9315a82ae827ff312a2a88a2]
|
||||
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
[tweaks, adjustments, comments, etc.]
|
||||
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Chris Mason <clm@fb.com>
|
||||
Cc: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Cc: George Spelvin <linux@horizon.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Manfred Spraul <manfred@colorfullife.com>
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/1430494072-30283-2-git-send-email-dave@stgolabs.net
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/sched.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
kernel/sched/core.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 92 insertions(+)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -900,6 +900,50 @@ enum cpu_idle_type {
|
||||
#define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
|
||||
|
||||
/*
|
||||
+ * Wake-queues are lists of tasks with a pending wakeup, whose
|
||||
+ * callers have already marked the task as woken internally,
|
||||
+ * and can thus carry on. A common use case is being able to
|
||||
+ * do the wakeups once the corresponding user lock as been
|
||||
+ * released.
|
||||
+ *
|
||||
+ * We hold reference to each task in the list across the wakeup,
|
||||
+ * thus guaranteeing that the memory is still valid by the time
|
||||
+ * the actual wakeups are performed in wake_up_q().
|
||||
+ *
|
||||
+ * One per task suffices, because there's never a need for a task to be
|
||||
+ * in two wake queues simultaneously; it is forbidden to abandon a task
|
||||
+ * in a wake queue (a call to wake_up_q() _must_ follow), so if a task is
|
||||
+ * already in a wake queue, the wakeup will happen soon and the second
|
||||
+ * waker can just skip it.
|
||||
+ *
|
||||
+ * The WAKE_Q macro declares and initializes the list head.
|
||||
+ * wake_up_q() does NOT reinitialize the list; it's expected to be
|
||||
+ * called near the end of a function, where the fact that the queue is
|
||||
+ * not used again will be easy to see by inspection.
|
||||
+ *
|
||||
+ * Note that this can cause spurious wakeups. schedule() callers
|
||||
+ * must ensure the call is done inside a loop, confirming that the
|
||||
+ * wakeup condition has in fact occurred.
|
||||
+ */
|
||||
+struct wake_q_node {
|
||||
+ struct wake_q_node *next;
|
||||
+};
|
||||
+
|
||||
+struct wake_q_head {
|
||||
+ struct wake_q_node *first;
|
||||
+ struct wake_q_node **lastp;
|
||||
+};
|
||||
+
|
||||
+#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
|
||||
+
|
||||
+#define WAKE_Q(name) \
|
||||
+ struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
|
||||
+
|
||||
+extern void wake_q_add(struct wake_q_head *head,
|
||||
+ struct task_struct *task);
|
||||
+extern void wake_up_q(struct wake_q_head *head);
|
||||
+
|
||||
+/*
|
||||
* sched-domains (multiprocessor balancing) declarations:
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -1511,6 +1555,8 @@ struct task_struct {
|
||||
/* Protection of the PI data structures: */
|
||||
raw_spinlock_t pi_lock;
|
||||
|
||||
+ struct wake_q_node wake_q;
|
||||
+
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
/* PI waiters blocked on a rt_mutex held by this task */
|
||||
struct rb_root pi_waiters;
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -541,6 +541,52 @@ static bool set_nr_if_polling(struct tas
|
||||
#endif
|
||||
#endif
|
||||
|
||||
+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
|
||||
+{
|
||||
+ struct wake_q_node *node = &task->wake_q;
|
||||
+
|
||||
+ /*
|
||||
+ * Atomically grab the task, if ->wake_q is !nil already it means
|
||||
+ * its already queued (either by us or someone else) and will get the
|
||||
+ * wakeup due to that.
|
||||
+ *
|
||||
+ * This cmpxchg() implies a full barrier, which pairs with the write
|
||||
+ * barrier implied by the wakeup in wake_up_list().
|
||||
+ */
|
||||
+ if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
|
||||
+ return;
|
||||
+
|
||||
+ get_task_struct(task);
|
||||
+
|
||||
+ /*
|
||||
+ * The head is context local, there can be no concurrency.
|
||||
+ */
|
||||
+ *head->lastp = node;
|
||||
+ head->lastp = &node->next;
|
||||
+}
|
||||
+
|
||||
+void wake_up_q(struct wake_q_head *head)
|
||||
+{
|
||||
+ struct wake_q_node *node = head->first;
|
||||
+
|
||||
+ while (node != WAKE_Q_TAIL) {
|
||||
+ struct task_struct *task;
|
||||
+
|
||||
+ task = container_of(node, struct task_struct, wake_q);
|
||||
+ BUG_ON(!task);
|
||||
+ /* task can safely be re-inserted now */
|
||||
+ node = node->next;
|
||||
+ task->wake_q.next = NULL;
|
||||
+
|
||||
+ /*
|
||||
+ * wake_up_process() implies a wmb() to pair with the queueing
|
||||
+ * in wake_q_add() so as not to miss wakeups.
|
||||
+ */
|
||||
+ wake_up_process(task);
|
||||
+ put_task_struct(task);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* resched_curr - mark rq's current task 'to be rescheduled now'.
|
||||
*
|
120
debian/patches/features/all/rt/0001-uaccess-count-pagefault_disable-levels-in-pagefault_.patch
vendored
Normal file
120
debian/patches/features/all/rt/0001-uaccess-count-pagefault_disable-levels-in-pagefault_.patch
vendored
Normal file
|
@ -0,0 +1,120 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:06 +0200
|
||||
Subject: sched/preempt, mm/fault: Count pagefault_disable() levels in pagefault_disabled
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Until now, pagefault_disable()/pagefault_enabled() used the preempt
|
||||
count to track whether in an environment with pagefaults disabled (can
|
||||
be queried via in_atomic()).
|
||||
|
||||
This patch introduces a separate counter in task_struct to count the
|
||||
level of pagefault_disable() calls. We'll keep manipulating the preempt
|
||||
count to retain compatibility to existing pagefault handlers.
|
||||
|
||||
It is now possible to verify whether in a pagefault_disable() envionment
|
||||
by calling pagefault_disabled(). In contrast to in_atomic() it will not
|
||||
be influenced by preempt_enable()/preempt_disable().
|
||||
|
||||
This patch is based on a patch from Ingo Molnar.
|
||||
|
||||
[upstream commit 8bcbde5480f9777f8b74d71493722c663e22c21b]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/linux/sched.h | 1 +
|
||||
include/linux/uaccess.h | 36 +++++++++++++++++++++++++++++-------
|
||||
kernel/fork.c | 3 +++
|
||||
3 files changed, 33 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -1724,6 +1724,7 @@ struct task_struct {
|
||||
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
|
||||
unsigned long task_state_change;
|
||||
#endif
|
||||
+ int pagefault_disabled;
|
||||
};
|
||||
|
||||
/* Future-safe accessor for struct task_struct's cpus_allowed. */
|
||||
--- a/include/linux/uaccess.h
|
||||
+++ b/include/linux/uaccess.h
|
||||
@@ -2,20 +2,36 @@
|
||||
#define __LINUX_UACCESS_H__
|
||||
|
||||
#include <linux/preempt.h>
|
||||
+#include <linux/sched.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
+static __always_inline void pagefault_disabled_inc(void)
|
||||
+{
|
||||
+ current->pagefault_disabled++;
|
||||
+}
|
||||
+
|
||||
+static __always_inline void pagefault_disabled_dec(void)
|
||||
+{
|
||||
+ current->pagefault_disabled--;
|
||||
+ WARN_ON(current->pagefault_disabled < 0);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
- * These routines enable/disable the pagefault handler in that
|
||||
- * it will not take any locks and go straight to the fixup table.
|
||||
+ * These routines enable/disable the pagefault handler. If disabled, it will
|
||||
+ * not take any locks and go straight to the fixup table.
|
||||
+ *
|
||||
+ * We increase the preempt and the pagefault count, to be able to distinguish
|
||||
+ * whether we run in simple atomic context or in a real pagefault_disable()
|
||||
+ * context.
|
||||
+ *
|
||||
+ * For now, after pagefault_disabled() has been called, we run in atomic
|
||||
+ * context. User access methods will not sleep.
|
||||
*
|
||||
- * They have great resemblance to the preempt_disable/enable calls
|
||||
- * and in fact they are identical; this is because currently there is
|
||||
- * no other way to make the pagefault handlers do this. So we do
|
||||
- * disable preemption but we don't necessarily care about that.
|
||||
*/
|
||||
static inline void pagefault_disable(void)
|
||||
{
|
||||
preempt_count_inc();
|
||||
+ pagefault_disabled_inc();
|
||||
/*
|
||||
* make sure to have issued the store before a pagefault
|
||||
* can hit.
|
||||
@@ -25,18 +41,24 @@ static inline void pagefault_disable(voi
|
||||
|
||||
static inline void pagefault_enable(void)
|
||||
{
|
||||
-#ifndef CONFIG_PREEMPT
|
||||
/*
|
||||
* make sure to issue those last loads/stores before enabling
|
||||
* the pagefault handler again.
|
||||
*/
|
||||
barrier();
|
||||
+ pagefault_disabled_dec();
|
||||
+#ifndef CONFIG_PREEMPT
|
||||
preempt_count_dec();
|
||||
#else
|
||||
preempt_enable();
|
||||
#endif
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Is the pagefault handler disabled? If so, user access methods will not sleep.
|
||||
+ */
|
||||
+#define pagefault_disabled() (current->pagefault_disabled != 0)
|
||||
+
|
||||
#ifndef ARCH_HAS_NOCACHE_UACCESS
|
||||
|
||||
static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
|
||||
--- a/kernel/fork.c
|
||||
+++ b/kernel/fork.c
|
||||
@@ -1396,6 +1396,9 @@ static struct task_struct *copy_process(
|
||||
p->hardirq_context = 0;
|
||||
p->softirq_context = 0;
|
||||
#endif
|
||||
+
|
||||
+ p->pagefault_disabled = 0;
|
||||
+
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
p->lockdep_depth = 0; /* no locks held yet */
|
||||
p->curr_chain_key = 0;
|
|
@ -0,0 +1,27 @@
|
|||
From: Anders Roxell <anders.roxell@linaro.org>
|
||||
Date: Mon, 27 Apr 2015 22:53:09 +0200
|
||||
Subject: arm64: Allow forced irq threading
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Now its safe to allow forced interrupt threading for arm64,
|
||||
all timer interrupts and the perf interrupt are marked NO_THREAD, as is
|
||||
the case with arch/arm: da0ec6f ARM: 7814/2: Allow forced irq threading
|
||||
|
||||
[ upstream commit: e8557d1f0c4d ]
|
||||
|
||||
Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
|
||||
---
|
||||
arch/arm64/Kconfig | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
--- a/arch/arm64/Kconfig
|
||||
+++ b/arch/arm64/Kconfig
|
||||
@@ -71,6 +71,7 @@ config ARM64
|
||||
select HAVE_RCU_TABLE_FREE
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select IRQ_DOMAIN
|
||||
+ select IRQ_FORCED_THREADING
|
||||
select MODULES_USE_ELF_RELA
|
||||
select NO_BOOTMEM
|
||||
select OF
|
|
@ -0,0 +1,182 @@
|
|||
From: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Date: Fri, 1 May 2015 08:27:51 -0700
|
||||
Subject: futex: Implement lockless wakeups
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Given the overall futex architecture, any chance of reducing
|
||||
hb->lock contention is welcome. In this particular case, using
|
||||
wake-queues to enable lockless wakeups addresses very much real
|
||||
world performance concerns, even cases of soft-lockups in cases
|
||||
of large amounts of blocked tasks (which is not hard to find in
|
||||
large boxes, using but just a handful of futex).
|
||||
|
||||
At the lowest level, this patch can reduce latency of a single thread
|
||||
attempting to acquire hb->lock in highly contended scenarios by a
|
||||
up to 2x. At lower counts of nr_wake there are no regressions,
|
||||
confirming, of course, that the wake_q handling overhead is practically
|
||||
non existent. For instance, while a fair amount of variation,
|
||||
the extended pef-bench wakeup benchmark shows for a 20 core machine
|
||||
the following avg per-thread time to wakeup its share of tasks:
|
||||
|
||||
nr_thr ms-before ms-after
|
||||
16 0.0590 0.0215
|
||||
32 0.0396 0.0220
|
||||
48 0.0417 0.0182
|
||||
64 0.0536 0.0236
|
||||
80 0.0414 0.0097
|
||||
96 0.0672 0.0152
|
||||
|
||||
Naturally, this can cause spurious wakeups. However there is no core code
|
||||
that cannot handle them afaict, and furthermore tglx does have the point
|
||||
that other events can already trigger them anyway.
|
||||
|
||||
[upstream commit 1d0dcb3ad9d336e6d6ee020a750a7f8d907e28de]
|
||||
|
||||
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Chris Mason <clm@fb.com>
|
||||
Cc: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Cc: George Spelvin <linux@horizon.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Manfred Spraul <manfred@colorfullife.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Link: http://lkml.kernel.org/r/1430494072-30283-3-git-send-email-dave@stgolabs.net
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/futex.c | 33 +++++++++++++++++----------------
|
||||
1 file changed, 17 insertions(+), 16 deletions(-)
|
||||
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -1090,9 +1090,11 @@ static void __unqueue_futex(struct futex
|
||||
|
||||
/*
|
||||
* The hash bucket lock must be held when this is called.
|
||||
- * Afterwards, the futex_q must not be accessed.
|
||||
+ * Afterwards, the futex_q must not be accessed. Callers
|
||||
+ * must ensure to later call wake_up_q() for the actual
|
||||
+ * wakeups to occur.
|
||||
*/
|
||||
-static void wake_futex(struct futex_q *q)
|
||||
+static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
|
||||
{
|
||||
struct task_struct *p = q->task;
|
||||
|
||||
@@ -1100,14 +1102,10 @@ static void wake_futex(struct futex_q *q
|
||||
return;
|
||||
|
||||
/*
|
||||
- * We set q->lock_ptr = NULL _before_ we wake up the task. If
|
||||
- * a non-futex wake up happens on another CPU then the task
|
||||
- * might exit and p would dereference a non-existing task
|
||||
- * struct. Prevent this by holding a reference on p across the
|
||||
- * wake up.
|
||||
+ * Queue the task for later wakeup for after we've released
|
||||
+ * the hb->lock. wake_q_add() grabs reference to p.
|
||||
*/
|
||||
- get_task_struct(p);
|
||||
-
|
||||
+ wake_q_add(wake_q, p);
|
||||
__unqueue_futex(q);
|
||||
/*
|
||||
* The waiting task can free the futex_q as soon as
|
||||
@@ -1117,9 +1115,6 @@ static void wake_futex(struct futex_q *q
|
||||
*/
|
||||
smp_wmb();
|
||||
q->lock_ptr = NULL;
|
||||
-
|
||||
- wake_up_state(p, TASK_NORMAL);
|
||||
- put_task_struct(p);
|
||||
}
|
||||
|
||||
static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
||||
@@ -1217,6 +1212,7 @@ futex_wake(u32 __user *uaddr, unsigned i
|
||||
struct futex_q *this, *next;
|
||||
union futex_key key = FUTEX_KEY_INIT;
|
||||
int ret;
|
||||
+ WAKE_Q(wake_q);
|
||||
|
||||
if (!bitset)
|
||||
return -EINVAL;
|
||||
@@ -1244,13 +1240,14 @@ futex_wake(u32 __user *uaddr, unsigned i
|
||||
if (!(this->bitset & bitset))
|
||||
continue;
|
||||
|
||||
- wake_futex(this);
|
||||
+ mark_wake_futex(&wake_q, this);
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock(&hb->lock);
|
||||
+ wake_up_q(&wake_q);
|
||||
out_put_key:
|
||||
put_futex_key(&key);
|
||||
out:
|
||||
@@ -1269,6 +1266,7 @@ futex_wake_op(u32 __user *uaddr1, unsign
|
||||
struct futex_hash_bucket *hb1, *hb2;
|
||||
struct futex_q *this, *next;
|
||||
int ret, op_ret;
|
||||
+ WAKE_Q(wake_q);
|
||||
|
||||
retry:
|
||||
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
|
||||
@@ -1320,7 +1318,7 @@ futex_wake_op(u32 __user *uaddr1, unsign
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
- wake_futex(this);
|
||||
+ mark_wake_futex(&wake_q, this);
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
@@ -1334,7 +1332,7 @@ futex_wake_op(u32 __user *uaddr1, unsign
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
- wake_futex(this);
|
||||
+ mark_wake_futex(&wake_q, this);
|
||||
if (++op_ret >= nr_wake2)
|
||||
break;
|
||||
}
|
||||
@@ -1344,6 +1342,7 @@ futex_wake_op(u32 __user *uaddr1, unsign
|
||||
|
||||
out_unlock:
|
||||
double_unlock_hb(hb1, hb2);
|
||||
+ wake_up_q(&wake_q);
|
||||
out_put_keys:
|
||||
put_futex_key(&key2);
|
||||
out_put_key1:
|
||||
@@ -1503,6 +1502,7 @@ static int futex_requeue(u32 __user *uad
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct futex_hash_bucket *hb1, *hb2;
|
||||
struct futex_q *this, *next;
|
||||
+ WAKE_Q(wake_q);
|
||||
|
||||
if (requeue_pi) {
|
||||
/*
|
||||
@@ -1679,7 +1679,7 @@ static int futex_requeue(u32 __user *uad
|
||||
* woken by futex_unlock_pi().
|
||||
*/
|
||||
if (++task_count <= nr_wake && !requeue_pi) {
|
||||
- wake_futex(this);
|
||||
+ mark_wake_futex(&wake_q, this);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1719,6 +1719,7 @@ static int futex_requeue(u32 __user *uad
|
||||
out_unlock:
|
||||
free_pi_state(pi_state);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
+ wake_up_q(&wake_q);
|
||||
hb_waiters_dec(hb2);
|
||||
|
||||
/*
|
101
debian/patches/features/all/rt/0002-mm-uaccess-trigger-might_sleep-in-might_fault-with-d.patch
vendored
Normal file
101
debian/patches/features/all/rt/0002-mm-uaccess-trigger-might_sleep-in-might_fault-with-d.patch
vendored
Normal file
|
@ -0,0 +1,101 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:07 +0200
|
||||
Subject: mm, uaccess: trigger might_sleep() in might_fault() with disabled pagefaults
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Commit 662bbcb2747c ("mm, sched: Allow uaccess in atomic with
|
||||
pagefault_disable()") removed might_sleep() checks for all user access
|
||||
code (that uses might_fault()).
|
||||
|
||||
The reason was to disable wrong "sleep in atomic" warnings in the
|
||||
following scenario:
|
||||
pagefault_disable()
|
||||
rc = copy_to_user(...)
|
||||
pagefault_enable()
|
||||
|
||||
Which is valid, as pagefault_disable() increments the preempt counter
|
||||
and therefore disables the pagefault handler. copy_to_user() will not
|
||||
sleep and return an error code if a page is not available.
|
||||
|
||||
However, as all might_sleep() checks are removed,
|
||||
CONFIG_DEBUG_ATOMIC_SLEEP would no longer detect the following scenario:
|
||||
spin_lock(&lock);
|
||||
rc = copy_to_user(...)
|
||||
spin_unlock(&lock)
|
||||
|
||||
If the kernel is compiled with preemption turned on, preempt_disable()
|
||||
will make in_atomic() detect disabled preemption. The fault handler would
|
||||
correctly never sleep on user access.
|
||||
However, with preemption turned off, preempt_disable() is usually a NOP
|
||||
(with !CONFIG_PREEMPT_COUNT), therefore in_atomic() will not be able to
|
||||
detect disabled preemption nor disabled pagefaults. The fault handler
|
||||
could sleep.
|
||||
We really want to enable CONFIG_DEBUG_ATOMIC_SLEEP checks for user access
|
||||
functions again, otherwise we can end up with horrible deadlocks.
|
||||
|
||||
Root of all evil is that pagefault_disable() acts almost as
|
||||
preempt_disable(), depending on preemption being turned on/off.
|
||||
|
||||
As we now have pagefault_disabled(), we can use it to distinguish
|
||||
whether user acces functions might sleep.
|
||||
|
||||
Convert might_fault() into a makro that calls __might_fault(), to
|
||||
allow proper file + line messages in case of a might_sleep() warning.
|
||||
|
||||
[upstream commit 9ec23531fd48031d1b6ca5366f5f967d17a8bc28]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/linux/kernel.h | 3 ++-
|
||||
mm/memory.c | 18 ++++++------------
|
||||
2 files changed, 8 insertions(+), 13 deletions(-)
|
||||
|
||||
--- a/include/linux/kernel.h
|
||||
+++ b/include/linux/kernel.h
|
||||
@@ -244,7 +244,8 @@ static inline u32 reciprocal_scale(u32 v
|
||||
|
||||
#if defined(CONFIG_MMU) && \
|
||||
(defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP))
|
||||
-void might_fault(void);
|
||||
+#define might_fault() __might_fault(__FILE__, __LINE__)
|
||||
+void __might_fault(const char *file, int line);
|
||||
#else
|
||||
static inline void might_fault(void) { }
|
||||
#endif
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -3737,7 +3737,7 @@ void print_vma_addr(char *prefix, unsign
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
|
||||
-void might_fault(void)
|
||||
+void __might_fault(const char *file, int line)
|
||||
{
|
||||
/*
|
||||
* Some code (nfs/sunrpc) uses socket ops on kernel memory while
|
||||
@@ -3747,21 +3747,15 @@ void might_fault(void)
|
||||
*/
|
||||
if (segment_eq(get_fs(), KERNEL_DS))
|
||||
return;
|
||||
-
|
||||
- /*
|
||||
- * it would be nicer only to annotate paths which are not under
|
||||
- * pagefault_disable, however that requires a larger audit and
|
||||
- * providing helpers like get_user_atomic.
|
||||
- */
|
||||
- if (in_atomic())
|
||||
+ if (pagefault_disabled())
|
||||
return;
|
||||
-
|
||||
- __might_sleep(__FILE__, __LINE__, 0);
|
||||
-
|
||||
+ __might_sleep(file, line, 0);
|
||||
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
|
||||
if (current->mm)
|
||||
might_lock_read(¤t->mm->mmap_sem);
|
||||
+#endif
|
||||
}
|
||||
-EXPORT_SYMBOL(might_fault);
|
||||
+EXPORT_SYMBOL(__might_fault);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
|
642
debian/patches/features/all/rt/0003-uaccess-clarify-that-uaccess-may-only-sleep-if-pagef.patch
vendored
Normal file
642
debian/patches/features/all/rt/0003-uaccess-clarify-that-uaccess-may-only-sleep-if-pagef.patch
vendored
Normal file
|
@ -0,0 +1,642 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:08 +0200
|
||||
Subject: [PATCH] sched/preempt, futex: Update comments to clarify that preemption doesn't have to be disabled
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
In general, non-atomic variants of user access functions must not sleep
|
||||
if pagefaults are disabled.
|
||||
|
||||
Let's update all relevant comments in uaccess code. This also reflects
|
||||
the might_sleep() checks in might_fault().
|
||||
|
||||
[upstream commit 2f09b227eeed4b3a072fe818c82a4c773b778cde]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/avr32/include/asm/uaccess.h | 12 ++++++---
|
||||
arch/hexagon/include/asm/uaccess.h | 3 +-
|
||||
arch/m32r/include/asm/uaccess.h | 30 +++++++++++++++-------
|
||||
arch/microblaze/include/asm/uaccess.h | 6 +++-
|
||||
arch/mips/include/asm/uaccess.h | 45 ++++++++++++++++++++++------------
|
||||
arch/s390/include/asm/uaccess.h | 15 +++++++----
|
||||
arch/score/include/asm/uaccess.h | 15 +++++++----
|
||||
arch/tile/include/asm/uaccess.h | 18 +++++++++----
|
||||
arch/x86/include/asm/uaccess.h | 15 +++++++----
|
||||
arch/x86/include/asm/uaccess_32.h | 6 +++-
|
||||
arch/x86/lib/usercopy_32.c | 6 +++-
|
||||
lib/strnlen_user.c | 6 +++-
|
||||
12 files changed, 118 insertions(+), 59 deletions(-)
|
||||
|
||||
--- a/arch/avr32/include/asm/uaccess.h
|
||||
+++ b/arch/avr32/include/asm/uaccess.h
|
||||
@@ -97,7 +97,8 @@ static inline __kernel_size_t __copy_fro
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -116,7 +117,8 @@ static inline __kernel_size_t __copy_fro
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -136,7 +138,8 @@ static inline __kernel_size_t __copy_fro
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -158,7 +161,8 @@ static inline __kernel_size_t __copy_fro
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
--- a/arch/hexagon/include/asm/uaccess.h
|
||||
+++ b/arch/hexagon/include/asm/uaccess.h
|
||||
@@ -36,7 +36,8 @@
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
--- a/arch/m32r/include/asm/uaccess.h
|
||||
+++ b/arch/m32r/include/asm/uaccess.h
|
||||
@@ -91,7 +91,8 @@ static inline void set_fs(mm_segment_t s
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -155,7 +156,8 @@ extern int fixup_exception(struct pt_reg
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -175,7 +177,8 @@ extern int fixup_exception(struct pt_reg
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -194,7 +197,8 @@ extern int fixup_exception(struct pt_reg
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -274,7 +278,8 @@ do { \
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -568,7 +573,8 @@ unsigned long __generic_copy_from_user(v
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -588,7 +594,8 @@ unsigned long __generic_copy_from_user(v
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space.
|
||||
*
|
||||
@@ -606,7 +613,8 @@ unsigned long __generic_copy_from_user(v
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -626,7 +634,8 @@ unsigned long __generic_copy_from_user(v
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space.
|
||||
*
|
||||
@@ -677,7 +686,8 @@ unsigned long clear_user(void __user *me
|
||||
* strlen_user: - Get the size of a string in user space.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
--- a/arch/microblaze/include/asm/uaccess.h
|
||||
+++ b/arch/microblaze/include/asm/uaccess.h
|
||||
@@ -178,7 +178,8 @@ extern long __user_bad(void);
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -290,7 +291,8 @@ extern long __user_bad(void);
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
--- a/arch/mips/include/asm/uaccess.h
|
||||
+++ b/arch/mips/include/asm/uaccess.h
|
||||
@@ -103,7 +103,8 @@ extern u64 __ua_limit;
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -138,7 +139,8 @@ extern u64 __ua_limit;
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -157,7 +159,8 @@ extern u64 __ua_limit;
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -177,7 +180,8 @@ extern u64 __ua_limit;
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -199,7 +203,8 @@ extern u64 __ua_limit;
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -498,7 +503,8 @@ extern void __put_user_unknown(void);
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -517,7 +523,8 @@ extern void __put_user_unknown(void);
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -537,7 +544,8 @@ extern void __put_user_unknown(void);
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -559,7 +567,8 @@ extern void __put_user_unknown(void);
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -815,7 +824,8 @@ extern size_t __copy_user(void *__to, co
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -888,7 +898,8 @@ extern size_t __copy_user_inatomic(void
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space.
|
||||
*
|
||||
@@ -1075,7 +1086,8 @@ extern size_t __copy_in_user_eva(void *_
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -1107,7 +1119,8 @@ extern size_t __copy_in_user_eva(void *_
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space.
|
||||
*
|
||||
@@ -1329,7 +1342,8 @@ strncpy_from_user(char *__to, const char
|
||||
* strlen_user: - Get the size of a string in user space.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
@@ -1398,7 +1412,8 @@ static inline long __strnlen_user(const
|
||||
* strnlen_user: - Get the size of a string in user space.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
--- a/arch/s390/include/asm/uaccess.h
|
||||
+++ b/arch/s390/include/asm/uaccess.h
|
||||
@@ -98,7 +98,8 @@ static inline unsigned long extable_fixu
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_u
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space.
|
||||
*
|
||||
@@ -290,7 +293,8 @@ void copy_from_user_overflow(void)
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space.
|
||||
*
|
||||
@@ -348,7 +352,8 @@ static inline unsigned long strnlen_user
|
||||
* strlen_user: - Get the size of a string in user space.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
--- a/arch/score/include/asm/uaccess.h
|
||||
+++ b/arch/score/include/asm/uaccess.h
|
||||
@@ -36,7 +36,8 @@
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -61,7 +62,8 @@
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -79,7 +81,8 @@
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -98,7 +101,8 @@
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -119,7 +123,8 @@
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
--- a/arch/tile/include/asm/uaccess.h
|
||||
+++ b/arch/tile/include/asm/uaccess.h
|
||||
@@ -78,7 +78,8 @@ int __range_ok(unsigned long addr, unsig
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -192,7 +193,8 @@ extern int __get_user_bad(void)
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -274,7 +276,8 @@ extern int __put_user_bad(void)
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -330,7 +333,8 @@ extern int __put_user_bad(void)
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -366,7 +370,8 @@ copy_to_user(void __user *to, const void
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -437,7 +442,8 @@ static inline unsigned long __must_check
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to user space. Caller must check
|
||||
* the specified blocks with access_ok() before calling this function.
|
||||
--- a/arch/x86/include/asm/uaccess.h
|
||||
+++ b/arch/x86/include/asm/uaccess.h
|
||||
@@ -74,7 +74,8 @@ static inline bool __chk_range_not_ok(un
|
||||
* @addr: User space pointer to start of block to check
|
||||
* @size: Size of block to check
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Checks if a pointer to a block of memory in user space is valid.
|
||||
*
|
||||
@@ -145,7 +146,8 @@ extern int __get_user_bad(void);
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -240,7 +242,8 @@ extern void __put_user_8(void);
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -455,7 +458,8 @@ struct __large_struct { unsigned long bu
|
||||
* @x: Variable to store result.
|
||||
* @ptr: Source address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple variable from user space to kernel
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
@@ -479,7 +483,8 @@ struct __large_struct { unsigned long bu
|
||||
* @x: Value to copy to user space.
|
||||
* @ptr: Destination address, in user space.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* This macro copies a single simple value from kernel space to user
|
||||
* space. It supports simple types like char and int, but not larger
|
||||
--- a/arch/x86/include/asm/uaccess_32.h
|
||||
+++ b/arch/x86/include/asm/uaccess_32.h
|
||||
@@ -70,7 +70,8 @@ static __always_inline unsigned long __m
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
@@ -117,7 +118,8 @@ static __always_inline unsigned long
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space. Caller must check
|
||||
* the specified block with access_ok() before calling this function.
|
||||
--- a/arch/x86/lib/usercopy_32.c
|
||||
+++ b/arch/x86/lib/usercopy_32.c
|
||||
@@ -647,7 +647,8 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocach
|
||||
* @from: Source address, in kernel space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from kernel space to user space.
|
||||
*
|
||||
@@ -668,7 +669,8 @@ EXPORT_SYMBOL(_copy_to_user);
|
||||
* @from: Source address, in user space.
|
||||
* @n: Number of bytes to copy.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Copy data from user space to kernel space.
|
||||
*
|
||||
--- a/lib/strnlen_user.c
|
||||
+++ b/lib/strnlen_user.c
|
||||
@@ -85,7 +85,8 @@ static inline long do_strnlen_user(const
|
||||
* @str: The string to measure.
|
||||
* @count: Maximum count (including NUL character)
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
||||
@@ -121,7 +122,8 @@ EXPORT_SYMBOL(strnlen_user);
|
||||
* strlen_user: - Get the size of a user string INCLUDING final NUL.
|
||||
* @str: The string to measure.
|
||||
*
|
||||
- * Context: User context only. This function may sleep.
|
||||
+ * Context: User context only. This function may sleep if pagefaults are
|
||||
+ * enabled.
|
||||
*
|
||||
* Get the size of a NUL-terminated string in user space.
|
||||
*
|
184
debian/patches/features/all/rt/0004-ipc-mqueue-Implement-lockless-pipelined-wakeups.patch
vendored
Normal file
184
debian/patches/features/all/rt/0004-ipc-mqueue-Implement-lockless-pipelined-wakeups.patch
vendored
Normal file
|
@ -0,0 +1,184 @@
|
|||
From: Davidlohr Bueso <dave@stgolabs.net>
|
||||
Date: Mon, 4 May 2015 07:02:46 -0700
|
||||
Subject: ipc/mqueue: Implement lockless pipelined wakeups
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This patch moves the wakeup_process() invocation so it is not done under
|
||||
the info->lock by making use of a lockless wake_q. With this change, the
|
||||
waiter is woken up once it is STATE_READY and it does not need to loop
|
||||
on SMP if it is still in STATE_PENDING. In the timeout case we still need
|
||||
to grab the info->lock to verify the state.
|
||||
|
||||
This change should also avoid the introduction of preempt_disable() in -rt
|
||||
which avoids a busy-loop which pools for the STATE_PENDING -> STATE_READY
|
||||
change if the waiter has a higher priority compared to the waker.
|
||||
|
||||
Additionally, this patch micro-optimizes wq_sleep by using the cheaper
|
||||
cousin of set_current_state(TASK_INTERRUPTABLE) as we will block no
|
||||
matter what, thus get rid of the implied barrier.
|
||||
|
||||
[upstream commit fa6004ad4528153b699a4d5ce5ea6b33acce74cc]
|
||||
|
||||
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
|
||||
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||||
Acked-by: George Spelvin <linux@horizon.com>
|
||||
Acked-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Andrew Morton <akpm@linux-foundation.org>
|
||||
Cc: Borislav Petkov <bp@alien8.de>
|
||||
Cc: Chris Mason <clm@fb.com>
|
||||
Cc: H. Peter Anvin <hpa@zytor.com>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Cc: Manfred Spraul <manfred@colorfullife.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: dave@stgolabs.net
|
||||
Link: http://lkml.kernel.org/r/1430748166.1940.17.camel@stgolabs.net
|
||||
Signed-off-by: Ingo Molnar <mingo@kernel.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
ipc/mqueue.c | 54 +++++++++++++++++++++++++++++++++---------------------
|
||||
1 file changed, 33 insertions(+), 21 deletions(-)
|
||||
|
||||
--- a/ipc/mqueue.c
|
||||
+++ b/ipc/mqueue.c
|
||||
@@ -47,8 +47,7 @@
|
||||
#define RECV 1
|
||||
|
||||
#define STATE_NONE 0
|
||||
-#define STATE_PENDING 1
|
||||
-#define STATE_READY 2
|
||||
+#define STATE_READY 1
|
||||
|
||||
struct posix_msg_tree_node {
|
||||
struct rb_node rb_node;
|
||||
@@ -571,15 +570,12 @@ static int wq_sleep(struct mqueue_inode_
|
||||
wq_add(info, sr, ewp);
|
||||
|
||||
for (;;) {
|
||||
- set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ __set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
spin_unlock(&info->lock);
|
||||
time = schedule_hrtimeout_range_clock(timeout, 0,
|
||||
HRTIMER_MODE_ABS, CLOCK_REALTIME);
|
||||
|
||||
- while (ewp->state == STATE_PENDING)
|
||||
- cpu_relax();
|
||||
-
|
||||
if (ewp->state == STATE_READY) {
|
||||
retval = 0;
|
||||
goto out;
|
||||
@@ -907,11 +903,15 @@ SYSCALL_DEFINE1(mq_unlink, const char __
|
||||
* list of waiting receivers. A sender checks that list before adding the new
|
||||
* message into the message array. If there is a waiting receiver, then it
|
||||
* bypasses the message array and directly hands the message over to the
|
||||
- * receiver.
|
||||
- * The receiver accepts the message and returns without grabbing the queue
|
||||
- * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
|
||||
- * are necessary. The same algorithm is used for sysv semaphores, see
|
||||
- * ipc/sem.c for more details.
|
||||
+ * receiver. The receiver accepts the message and returns without grabbing the
|
||||
+ * queue spinlock:
|
||||
+ *
|
||||
+ * - Set pointer to message.
|
||||
+ * - Queue the receiver task for later wakeup (without the info->lock).
|
||||
+ * - Update its state to STATE_READY. Now the receiver can continue.
|
||||
+ * - Wake up the process after the lock is dropped. Should the process wake up
|
||||
+ * before this wakeup (due to a timeout or a signal) it will either see
|
||||
+ * STATE_READY and continue or acquire the lock to check the state again.
|
||||
*
|
||||
* The same algorithm is used for senders.
|
||||
*/
|
||||
@@ -919,21 +919,29 @@ SYSCALL_DEFINE1(mq_unlink, const char __
|
||||
/* pipelined_send() - send a message directly to the task waiting in
|
||||
* sys_mq_timedreceive() (without inserting message into a queue).
|
||||
*/
|
||||
-static inline void pipelined_send(struct mqueue_inode_info *info,
|
||||
+static inline void pipelined_send(struct wake_q_head *wake_q,
|
||||
+ struct mqueue_inode_info *info,
|
||||
struct msg_msg *message,
|
||||
struct ext_wait_queue *receiver)
|
||||
{
|
||||
receiver->msg = message;
|
||||
list_del(&receiver->list);
|
||||
- receiver->state = STATE_PENDING;
|
||||
- wake_up_process(receiver->task);
|
||||
- smp_wmb();
|
||||
+ wake_q_add(wake_q, receiver->task);
|
||||
+ /*
|
||||
+ * Rely on the implicit cmpxchg barrier from wake_q_add such
|
||||
+ * that we can ensure that updating receiver->state is the last
|
||||
+ * write operation: As once set, the receiver can continue,
|
||||
+ * and if we don't have the reference count from the wake_q,
|
||||
+ * yet, at that point we can later have a use-after-free
|
||||
+ * condition and bogus wakeup.
|
||||
+ */
|
||||
receiver->state = STATE_READY;
|
||||
}
|
||||
|
||||
/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
|
||||
* gets its message and put to the queue (we have one free place for sure). */
|
||||
-static inline void pipelined_receive(struct mqueue_inode_info *info)
|
||||
+static inline void pipelined_receive(struct wake_q_head *wake_q,
|
||||
+ struct mqueue_inode_info *info)
|
||||
{
|
||||
struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
|
||||
|
||||
@@ -944,10 +952,9 @@ static inline void pipelined_receive(str
|
||||
}
|
||||
if (msg_insert(sender->msg, info))
|
||||
return;
|
||||
+
|
||||
list_del(&sender->list);
|
||||
- sender->state = STATE_PENDING;
|
||||
- wake_up_process(sender->task);
|
||||
- smp_wmb();
|
||||
+ wake_q_add(wake_q, sender->task);
|
||||
sender->state = STATE_READY;
|
||||
}
|
||||
|
||||
@@ -965,6 +972,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqd
|
||||
struct timespec ts;
|
||||
struct posix_msg_tree_node *new_leaf = NULL;
|
||||
int ret = 0;
|
||||
+ WAKE_Q(wake_q);
|
||||
|
||||
if (u_abs_timeout) {
|
||||
int res = prepare_timeout(u_abs_timeout, &expires, &ts);
|
||||
@@ -1049,7 +1057,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqd
|
||||
} else {
|
||||
receiver = wq_get_first_waiter(info, RECV);
|
||||
if (receiver) {
|
||||
- pipelined_send(info, msg_ptr, receiver);
|
||||
+ pipelined_send(&wake_q, info, msg_ptr, receiver);
|
||||
} else {
|
||||
/* adds message to the queue */
|
||||
ret = msg_insert(msg_ptr, info);
|
||||
@@ -1062,6 +1070,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqd
|
||||
}
|
||||
out_unlock:
|
||||
spin_unlock(&info->lock);
|
||||
+ wake_up_q(&wake_q);
|
||||
out_free:
|
||||
if (ret)
|
||||
free_msg(msg_ptr);
|
||||
@@ -1149,14 +1158,17 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t,
|
||||
msg_ptr = wait.msg;
|
||||
}
|
||||
} else {
|
||||
+ WAKE_Q(wake_q);
|
||||
+
|
||||
msg_ptr = msg_get(info);
|
||||
|
||||
inode->i_atime = inode->i_mtime = inode->i_ctime =
|
||||
CURRENT_TIME;
|
||||
|
||||
/* There is now free space in queue. */
|
||||
- pipelined_receive(info);
|
||||
+ pipelined_receive(&wake_q, info);
|
||||
spin_unlock(&info->lock);
|
||||
+ wake_up_q(&wake_q);
|
||||
ret = 0;
|
||||
}
|
||||
if (ret == 0) {
|
368
debian/patches/features/all/rt/0004-mm-explicitly-disable-enable-preemption-in-kmap_atom.patch
vendored
Normal file
368
debian/patches/features/all/rt/0004-mm-explicitly-disable-enable-preemption-in-kmap_atom.patch
vendored
Normal file
|
@ -0,0 +1,368 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:09 +0200
|
||||
Subject: sched/preempt, mm/kmap: Explicitly disable/enable preemption in kmap_atomic_*
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The existing code relies on pagefault_disable() implicitly disabling
|
||||
preemption, so that no schedule will happen between kmap_atomic() and
|
||||
kunmap_atomic().
|
||||
|
||||
Let's make this explicit, to prepare for pagefault_disable() not
|
||||
touching preemption anymore.
|
||||
|
||||
[uptream commit 2cb7c9cb426660b5ed58b643d9e7dd5d50ba901f]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/arm/mm/highmem.c | 3 +++
|
||||
arch/frv/mm/highmem.c | 2 ++
|
||||
arch/metag/mm/highmem.c | 4 +++-
|
||||
arch/microblaze/mm/highmem.c | 4 +++-
|
||||
arch/mips/mm/highmem.c | 5 ++++-
|
||||
arch/mn10300/include/asm/highmem.h | 3 +++
|
||||
arch/parisc/include/asm/cacheflush.h | 2 ++
|
||||
arch/powerpc/mm/highmem.c | 4 +++-
|
||||
arch/sparc/mm/highmem.c | 4 +++-
|
||||
arch/tile/mm/highmem.c | 3 ++-
|
||||
arch/x86/mm/highmem_32.c | 3 ++-
|
||||
arch/x86/mm/iomap_32.c | 2 ++
|
||||
arch/xtensa/mm/highmem.c | 2 ++
|
||||
include/linux/highmem.h | 2 ++
|
||||
include/linux/io-mapping.h | 2 ++
|
||||
15 files changed, 38 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/arm/mm/highmem.c
|
||||
+++ b/arch/arm/mm/highmem.c
|
||||
@@ -59,6 +59,7 @@ void *kmap_atomic(struct page *page)
|
||||
void *kmap;
|
||||
int type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -121,6 +122,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
|
||||
}
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
@@ -130,6 +132,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
|
||||
int idx, type;
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
--- a/arch/frv/mm/highmem.c
|
||||
+++ b/arch/frv/mm/highmem.c
|
||||
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
|
||||
unsigned long paddr;
|
||||
int type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
type = kmap_atomic_idx_push();
|
||||
paddr = page_to_phys(page);
|
||||
@@ -85,5 +86,6 @@ void __kunmap_atomic(void *kvaddr)
|
||||
}
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
--- a/arch/metag/mm/highmem.c
|
||||
+++ b/arch/metag/mm/highmem.c
|
||||
@@ -43,7 +43,7 @@ void *kmap_atomic(struct page *page)
|
||||
unsigned long vaddr;
|
||||
int type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -82,6 +82,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
}
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
@@ -95,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
|
||||
unsigned long vaddr;
|
||||
int type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
type = kmap_atomic_idx_push();
|
||||
--- a/arch/microblaze/mm/highmem.c
|
||||
+++ b/arch/microblaze/mm/highmem.c
|
||||
@@ -37,7 +37,7 @@ void *kmap_atomic_prot(struct page *page
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -84,5 +85,6 @@ void __kunmap_atomic(void *kvaddr)
|
||||
#endif
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
--- a/arch/mips/mm/highmem.c
|
||||
+++ b/arch/mips/mm/highmem.c
|
||||
@@ -47,7 +47,7 @@ void *kmap_atomic(struct page *page)
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -72,6 +72,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (vaddr < FIXADDR_START) { // FIXME
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -92,6 +93,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
#endif
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
@@ -104,6 +106,7 @@ void *kmap_atomic_pfn(unsigned long pfn)
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
type = kmap_atomic_idx_push();
|
||||
--- a/arch/mn10300/include/asm/highmem.h
|
||||
+++ b/arch/mn10300/include/asm/highmem.h
|
||||
@@ -75,6 +75,7 @@ static inline void *kmap_atomic(struct p
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (page < highmem_start_page)
|
||||
return page_address(page);
|
||||
@@ -98,6 +99,7 @@ static inline void __kunmap_atomic(unsig
|
||||
|
||||
if (vaddr < FIXADDR_START) { /* FIXME */
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -122,6 +124,7 @@ static inline void __kunmap_atomic(unsig
|
||||
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
--- a/arch/parisc/include/asm/cacheflush.h
|
||||
+++ b/arch/parisc/include/asm/cacheflush.h
|
||||
@@ -142,6 +142,7 @@ static inline void kunmap(struct page *p
|
||||
|
||||
static inline void *kmap_atomic(struct page *page)
|
||||
{
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
return page_address(page);
|
||||
}
|
||||
@@ -150,6 +151,7 @@ static inline void __kunmap_atomic(void
|
||||
{
|
||||
flush_kernel_dcache_page_addr(addr);
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
#define kmap_atomic_prot(page, prot) kmap_atomic(page)
|
||||
--- a/arch/powerpc/mm/highmem.c
|
||||
+++ b/arch/powerpc/mm/highmem.c
|
||||
@@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
--- a/arch/sparc/mm/highmem.c
|
||||
+++ b/arch/sparc/mm/highmem.c
|
||||
@@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page)
|
||||
unsigned long vaddr;
|
||||
long idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -91,6 +91,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (vaddr < FIXADDR_START) { // FIXME
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -126,5 +127,6 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
kmap_atomic_idx_pop();
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
--- a/arch/tile/mm/highmem.c
|
||||
+++ b/arch/tile/mm/highmem.c
|
||||
@@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page
|
||||
int idx, type;
|
||||
pte_t *pte;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
/* Avoid icache flushes by disallowing atomic executable mappings. */
|
||||
@@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
}
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
--- a/arch/x86/mm/highmem_32.c
|
||||
+++ b/arch/x86/mm/highmem_32.c
|
||||
@@ -35,7 +35,7 @@ void *kmap_atomic_prot(struct page *page
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
if (!PageHighMem(page))
|
||||
@@ -100,6 +100,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
#endif
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
--- a/arch/x86/mm/iomap_32.c
|
||||
+++ b/arch/x86/mm/iomap_32.c
|
||||
@@ -59,6 +59,7 @@ void *kmap_atomic_prot_pfn(unsigned long
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
type = kmap_atomic_idx_push();
|
||||
@@ -117,5 +118,6 @@ iounmap_atomic(void __iomem *kvaddr)
|
||||
}
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iounmap_atomic);
|
||||
--- a/arch/xtensa/mm/highmem.c
|
||||
+++ b/arch/xtensa/mm/highmem.c
|
||||
@@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page)
|
||||
enum fixed_addresses idx;
|
||||
unsigned long vaddr;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -79,6 +80,7 @@ void __kunmap_atomic(void *kvaddr)
|
||||
}
|
||||
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
--- a/include/linux/highmem.h
|
||||
+++ b/include/linux/highmem.h
|
||||
@@ -65,6 +65,7 @@ static inline void kunmap(struct page *p
|
||||
|
||||
static inline void *kmap_atomic(struct page *page)
|
||||
{
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
return page_address(page);
|
||||
}
|
||||
@@ -73,6 +74,7 @@ static inline void *kmap_atomic(struct p
|
||||
static inline void __kunmap_atomic(void *addr)
|
||||
{
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
#define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
|
||||
--- a/include/linux/io-mapping.h
|
||||
+++ b/include/linux/io-mapping.h
|
||||
@@ -141,6 +141,7 @@ static inline void __iomem *
|
||||
io_mapping_map_atomic_wc(struct io_mapping *mapping,
|
||||
unsigned long offset)
|
||||
{
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
return ((char __force __iomem *) mapping) + offset;
|
||||
}
|
||||
@@ -149,6 +150,7 @@ static inline void
|
||||
io_mapping_unmap_atomic(void __iomem *vaddr)
|
||||
{
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
/* Non-atomic map/unmap */
|
43
debian/patches/features/all/rt/0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch
vendored
Normal file
43
debian/patches/features/all/rt/0005-futex-Ensure-lock-unlock-symetry-versus-pi_lock-and-.patch
vendored
Normal file
|
@ -0,0 +1,43 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 1 Mar 2013 11:17:42 +0100
|
||||
Subject: futex: Ensure lock/unlock symetry versus pi_lock and hash bucket lock
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
In exit_pi_state_list() we have the following locking construct:
|
||||
|
||||
spin_lock(&hb->lock);
|
||||
raw_spin_lock_irq(&curr->pi_lock);
|
||||
|
||||
...
|
||||
spin_unlock(&hb->lock);
|
||||
|
||||
In !RT this works, but on RT the migrate_enable() function which is
|
||||
called from spin_unlock() sees atomic context due to the held pi_lock
|
||||
and just decrements the migrate_disable_atomic counter of the
|
||||
task. Now the next call to migrate_disable() sees the counter being
|
||||
negative and issues a warning. That check should be in
|
||||
migrate_enable() already.
|
||||
|
||||
Fix this by dropping pi_lock before unlocking hb->lock and reaquire
|
||||
pi_lock after that again. This is safe as the loop code reevaluates
|
||||
head again under the pi_lock.
|
||||
|
||||
Reported-by: Yong Zhang <yong.zhang@windriver.com>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/futex.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -738,7 +738,9 @@ void exit_pi_state_list(struct task_stru
|
||||
* task still owns the PI-state:
|
||||
*/
|
||||
if (head->next != next) {
|
||||
+ raw_spin_unlock_irq(&curr->pi_lock);
|
||||
spin_unlock(&hb->lock);
|
||||
+ raw_spin_lock_irq(&curr->pi_lock);
|
||||
continue;
|
||||
}
|
||||
|
41
debian/patches/features/all/rt/0005-mips-kmap_coherent-relies-on-disabled-preemption.patch
vendored
Normal file
41
debian/patches/features/all/rt/0005-mips-kmap_coherent-relies-on-disabled-preemption.patch
vendored
Normal file
|
@ -0,0 +1,41 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:10 +0200
|
||||
Subject: sched/preempt, mm/kmap, MIPS: Disable preemption in kmap_coherent() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
k(un)map_coherent relies on pagefault_disable() to also disable
|
||||
preemption.
|
||||
|
||||
Let's make this explicit, to prepare for pagefault_disable() not
|
||||
touching preemption anymore.
|
||||
|
||||
This patch is based on a patch by Yang Shi on the -rt tree:
|
||||
"k{un}map_coherent are just called when cpu_has_dc_aliases == 1 with VIPT
|
||||
cache. However, actually, the most modern MIPS processors have PIPT dcache
|
||||
without dcache alias issue. In such case, k{un}map_atomic will be called
|
||||
with preempt enabled."
|
||||
|
||||
[upstream commit ce01948eb85da733558fa77c2a554144a57ab0fb]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/mips/mm/init.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/arch/mips/mm/init.c
|
||||
+++ b/arch/mips/mm/init.c
|
||||
@@ -90,6 +90,7 @@ static void *__kmap_pgprot(struct page *
|
||||
|
||||
BUG_ON(Page_dcache_dirty(page));
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
|
||||
idx += in_interrupt() ? FIX_N_COLOURS : 0;
|
||||
@@ -152,6 +153,7 @@ void kunmap_coherent(void)
|
||||
write_c0_entryhi(old_ctx);
|
||||
local_irq_restore(flags);
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
}
|
||||
|
||||
void copy_user_highpage(struct page *to, struct page *from,
|
647
debian/patches/features/all/rt/0006-mm-use-pagefault_disable-to-check-for-disabled-pagef.patch
vendored
Normal file
647
debian/patches/features/all/rt/0006-mm-use-pagefault_disable-to-check-for-disabled-pagef.patch
vendored
Normal file
|
@ -0,0 +1,647 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:11 +0200
|
||||
Subject: mm/fault, arch: Use pagefault_disable() to check for disabled pagefaults in the handler
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Introduce faulthandler_disabled() and use it to check for irq context and
|
||||
disabled pagefaults (via pagefault_disable()) in the pagefault handlers.
|
||||
|
||||
Please note that we keep the in_atomic() checks in place - to detect
|
||||
whether in irq context (in which case preemption is always properly
|
||||
disabled).
|
||||
|
||||
In contrast, preempt_disable() should never be used to disable pagefaults.
|
||||
With !CONFIG_PREEMPT_COUNT, preempt_disable() doesn't modify the preempt
|
||||
counter, and therefore the result of in_atomic() differs.
|
||||
We validate that condition by using might_fault() checks when calling
|
||||
might_sleep().
|
||||
|
||||
Therefore, add a comment to faulthandler_disabled(), describing why this
|
||||
is needed.
|
||||
|
||||
faulthandler_disabled() and pagefault_disable() are defined in
|
||||
linux/uaccess.h, so let's properly add that include to all relevant files.
|
||||
|
||||
This patch is based on a patch from Thomas Gleixner.
|
||||
|
||||
[upstream commit 70ffdb9393a7264a069265edded729078dcf0425]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/alpha/mm/fault.c | 5 ++---
|
||||
arch/arc/mm/fault.c | 2 +-
|
||||
arch/arm/mm/fault.c | 2 +-
|
||||
arch/arm64/mm/fault.c | 2 +-
|
||||
arch/avr32/mm/fault.c | 4 ++--
|
||||
arch/cris/mm/fault.c | 6 +++---
|
||||
arch/frv/mm/fault.c | 4 ++--
|
||||
arch/ia64/mm/fault.c | 4 ++--
|
||||
arch/m32r/mm/fault.c | 8 ++++----
|
||||
arch/m68k/mm/fault.c | 4 ++--
|
||||
arch/metag/mm/fault.c | 2 +-
|
||||
arch/microblaze/mm/fault.c | 8 ++++----
|
||||
arch/mips/mm/fault.c | 4 ++--
|
||||
arch/mn10300/mm/fault.c | 4 ++--
|
||||
arch/nios2/mm/fault.c | 2 +-
|
||||
arch/parisc/kernel/traps.c | 4 ++--
|
||||
arch/parisc/mm/fault.c | 4 ++--
|
||||
arch/powerpc/mm/fault.c | 9 +++++----
|
||||
arch/s390/mm/fault.c | 2 +-
|
||||
arch/score/mm/fault.c | 3 ++-
|
||||
arch/sh/mm/fault.c | 5 +++--
|
||||
arch/sparc/mm/fault_32.c | 4 ++--
|
||||
arch/sparc/mm/fault_64.c | 4 ++--
|
||||
arch/sparc/mm/init_64.c | 2 +-
|
||||
arch/tile/mm/fault.c | 4 ++--
|
||||
arch/um/kernel/trap.c | 4 ++--
|
||||
arch/unicore32/mm/fault.c | 2 +-
|
||||
arch/x86/mm/fault.c | 5 +++--
|
||||
arch/xtensa/mm/fault.c | 4 ++--
|
||||
include/linux/uaccess.h | 12 ++++++++++++
|
||||
30 files changed, 72 insertions(+), 57 deletions(-)
|
||||
|
||||
--- a/arch/alpha/mm/fault.c
|
||||
+++ b/arch/alpha/mm/fault.c
|
||||
@@ -23,8 +23,7 @@
|
||||
#include <linux/smp.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
-
|
||||
-#include <asm/uaccess.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
|
||||
|
||||
@@ -107,7 +106,7 @@ do_page_fault(unsigned long address, uns
|
||||
|
||||
/* If we're in an interrupt context, or have no user context,
|
||||
we must not take the fault. */
|
||||
- if (!mm || in_atomic())
|
||||
+ if (!mm || faulthandler_disabled())
|
||||
goto no_context;
|
||||
|
||||
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
|
||||
--- a/arch/arc/mm/fault.c
|
||||
+++ b/arch/arc/mm/fault.c
|
||||
@@ -86,7 +86,7 @@ void do_page_fault(unsigned long address
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/arm/mm/fault.c
|
||||
+++ b/arch/arm/mm/fault.c
|
||||
@@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsign
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/arm64/mm/fault.c
|
||||
+++ b/arch/arm64/mm/fault.c
|
||||
@@ -211,7 +211,7 @@ static int __kprobes do_page_fault(unsig
|
||||
* If we're in an interrupt or have no user context, we must not take
|
||||
* the fault.
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/avr32/mm/fault.c
|
||||
+++ b/arch/avr32/mm/fault.c
|
||||
@@ -14,11 +14,11 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kprobes.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/sysreg.h>
|
||||
#include <asm/tlb.h>
|
||||
-#include <asm/uaccess.h>
|
||||
|
||||
#ifdef CONFIG_KPROBES
|
||||
static inline int notify_page_fault(struct pt_regs *regs, int trap)
|
||||
@@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned l
|
||||
* If we're in an interrupt or have no user context, we must
|
||||
* not take the fault...
|
||||
*/
|
||||
- if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
|
||||
+ if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM))
|
||||
goto no_context;
|
||||
|
||||
local_irq_enable();
|
||||
--- a/arch/cris/mm/fault.c
|
||||
+++ b/arch/cris/mm/fault.c
|
||||
@@ -8,7 +8,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/wait.h>
|
||||
-#include <asm/uaccess.h>
|
||||
+#include <linux/uaccess.h>
|
||||
#include <arch/system.h>
|
||||
|
||||
extern int find_fixup_code(struct pt_regs *);
|
||||
@@ -109,11 +109,11 @@ do_page_fault(unsigned long address, str
|
||||
info.si_code = SEGV_MAPERR;
|
||||
|
||||
/*
|
||||
- * If we're in an interrupt or "atomic" operation or have no
|
||||
+ * If we're in an interrupt, have pagefaults disabled or have no
|
||||
* user context, we must not take the fault.
|
||||
*/
|
||||
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/frv/mm/fault.c
|
||||
+++ b/arch/frv/mm/fault.c
|
||||
@@ -19,9 +19,9 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/hardirq.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/gdb-stub.h>
|
||||
|
||||
/*****************************************************************************/
|
||||
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datamm
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(__frame))
|
||||
--- a/arch/ia64/mm/fault.c
|
||||
+++ b/arch/ia64/mm/fault.c
|
||||
@@ -11,10 +11,10 @@
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/prefetch.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/processor.h>
|
||||
-#include <asm/uaccess.h>
|
||||
|
||||
extern int die(char *, struct pt_regs *, long);
|
||||
|
||||
@@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long addres
|
||||
/*
|
||||
* If we're in an interrupt or have no user context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
#ifdef CONFIG_VIRTUAL_MEM_MAP
|
||||
--- a/arch/m32r/mm/fault.c
|
||||
+++ b/arch/m32r/mm/fault.c
|
||||
@@ -24,9 +24,9 @@
|
||||
#include <linux/vt_kern.h> /* For unblank_screen() */
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/module.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/m32r.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/hardirq.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@@ -111,10 +111,10 @@ asmlinkage void do_page_fault(struct pt_
|
||||
mm = tsk->mm;
|
||||
|
||||
/*
|
||||
- * If we're in an interrupt or have no user context or are running in an
|
||||
- * atomic region then we must not take the fault..
|
||||
+ * If we're in an interrupt or have no user context or have pagefaults
|
||||
+ * disabled then we must not take the fault.
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
if (error_code & ACE_USERMODE)
|
||||
--- a/arch/m68k/mm/fault.c
|
||||
+++ b/arch/m68k/mm/fault.c
|
||||
@@ -10,10 +10,10 @@
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/setup.h>
|
||||
#include <asm/traps.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
||||
extern void die_if_kernel(char *, struct pt_regs *, long);
|
||||
@@ -81,7 +81,7 @@ int do_page_fault(struct pt_regs *regs,
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/metag/mm/fault.c
|
||||
+++ b/arch/metag/mm/fault.c
|
||||
@@ -105,7 +105,7 @@ int do_page_fault(struct pt_regs *regs,
|
||||
|
||||
mm = tsk->mm;
|
||||
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/microblaze/mm/fault.c
|
||||
+++ b/arch/microblaze/mm/fault.c
|
||||
@@ -107,14 +107,14 @@ void do_page_fault(struct pt_regs *regs,
|
||||
if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
|
||||
is_write = 0;
|
||||
|
||||
- if (unlikely(in_atomic() || !mm)) {
|
||||
+ if (unlikely(faulthandler_disabled() || !mm)) {
|
||||
if (kernel_mode(regs))
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
- /* in_atomic() in user mode is really bad,
|
||||
+ /* faulthandler_disabled() in user mode is really bad,
|
||||
as is current->mm == NULL. */
|
||||
- pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n",
|
||||
- mm);
|
||||
+ pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n",
|
||||
+ mm);
|
||||
pr_emerg("r15 = %lx MSR = %lx\n",
|
||||
regs->r15, regs->msr);
|
||||
die("Weird page fault", regs, SIGSEGV);
|
||||
--- a/arch/mips/mm/fault.c
|
||||
+++ b/arch/mips/mm/fault.c
|
||||
@@ -21,10 +21,10 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/perf_event.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/branch.h>
|
||||
#include <asm/mmu_context.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/highmem.h> /* For VMALLOC_END */
|
||||
#include <linux/kdebug.h>
|
||||
@@ -94,7 +94,7 @@ static void __kprobes __do_page_fault(st
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/mn10300/mm/fault.c
|
||||
+++ b/arch/mn10300/mm/fault.c
|
||||
@@ -23,8 +23,8 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/vt_kern.h> /* For unblank_screen() */
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/hardirq.h>
|
||||
#include <asm/cpu-regs.h>
|
||||
@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
|
||||
--- a/arch/nios2/mm/fault.c
|
||||
+++ b/arch/nios2/mm/fault.c
|
||||
@@ -77,7 +77,7 @@ asmlinkage void do_page_fault(struct pt_
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/parisc/kernel/traps.c
|
||||
+++ b/arch/parisc/kernel/traps.c
|
||||
@@ -26,9 +26,9 @@
|
||||
#include <linux/console.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/ratelimit.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/assembly.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/traps.h>
|
||||
@@ -800,7 +800,7 @@ void notrace handle_interruption(int cod
|
||||
* unless pagefault_disable() was called before.
|
||||
*/
|
||||
|
||||
- if (fault_space == 0 && !in_atomic())
|
||||
+ if (fault_space == 0 && !faulthandler_disabled())
|
||||
{
|
||||
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC);
|
||||
parisc_terminate("Kernel Fault", regs, code, fault_address);
|
||||
--- a/arch/parisc/mm/fault.c
|
||||
+++ b/arch/parisc/mm/fault.c
|
||||
@@ -15,8 +15,8 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
/* Various important other fields */
|
||||
@@ -207,7 +207,7 @@ void do_page_fault(struct pt_regs *regs,
|
||||
int fault;
|
||||
unsigned int flags;
|
||||
|
||||
- if (in_atomic())
|
||||
+ if (pagefault_disabled())
|
||||
goto no_context;
|
||||
|
||||
tsk = current;
|
||||
--- a/arch/powerpc/mm/fault.c
|
||||
+++ b/arch/powerpc/mm/fault.c
|
||||
@@ -33,13 +33,13 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/hugetlb.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/firmware.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/siginfo.h>
|
||||
#include <asm/debug.h>
|
||||
@@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_re
|
||||
if (!arch_irq_disabled_regs(regs))
|
||||
local_irq_enable();
|
||||
|
||||
- if (in_atomic() || mm == NULL) {
|
||||
+ if (faulthandler_disabled() || mm == NULL) {
|
||||
if (!user_mode(regs)) {
|
||||
rc = SIGSEGV;
|
||||
goto bail;
|
||||
}
|
||||
- /* in_atomic() in user mode is really bad,
|
||||
+ /* faulthandler_disabled() in user mode is really bad,
|
||||
as is current->mm == NULL. */
|
||||
printk(KERN_EMERG "Page fault in user mode with "
|
||||
- "in_atomic() = %d mm = %p\n", in_atomic(), mm);
|
||||
+ "faulthandler_disabled() = %d mm = %p\n",
|
||||
+ faulthandler_disabled(), mm);
|
||||
printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
|
||||
regs->nip, regs->msr);
|
||||
die("Weird page fault", regs, SIGSEGV);
|
||||
--- a/arch/s390/mm/fault.c
|
||||
+++ b/arch/s390/mm/fault.c
|
||||
@@ -399,7 +399,7 @@ static inline int do_exception(struct pt
|
||||
* user context.
|
||||
*/
|
||||
fault = VM_FAULT_BADCONTEXT;
|
||||
- if (unlikely(!user_space_fault(regs) || in_atomic() || !mm))
|
||||
+ if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm))
|
||||
goto out;
|
||||
|
||||
address = trans_exc_code & __FAIL_ADDR_MASK;
|
||||
--- a/arch/score/mm/fault.c
|
||||
+++ b/arch/score/mm/fault.c
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/ptrace.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
/*
|
||||
* This routine handles page faults. It determines the address,
|
||||
@@ -73,7 +74,7 @@ asmlinkage void do_page_fault(struct pt_
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (pagefault_disabled() || !mm)
|
||||
goto bad_area_nosemaphore;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/sh/mm/fault.c
|
||||
+++ b/arch/sh/mm/fault.c
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/kdebug.h>
|
||||
+#include <linux/uaccess.h>
|
||||
#include <asm/io_trapped.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/tlbflush.h>
|
||||
@@ -438,9 +439,9 @@ asmlinkage void __kprobes do_page_fault(
|
||||
|
||||
/*
|
||||
* If we're in an interrupt, have no user context or are running
|
||||
- * in an atomic region then we must not take the fault:
|
||||
+ * with pagefaults disabled then we must not take the fault:
|
||||
*/
|
||||
- if (unlikely(in_atomic() || !mm)) {
|
||||
+ if (unlikely(faulthandler_disabled() || !mm)) {
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
--- a/arch/sparc/mm/fault_32.c
|
||||
+++ b/arch/sparc/mm/fault_32.c
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kdebug.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
@@ -29,7 +30,6 @@
|
||||
#include <asm/setup.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/traps.h>
|
||||
-#include <asm/uaccess.h>
|
||||
|
||||
#include "mm_32.h"
|
||||
|
||||
@@ -196,7 +196,7 @@ asmlinkage void do_sparc_fault(struct pt
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (pagefault_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
--- a/arch/sparc/mm/fault_64.c
|
||||
+++ b/arch/sparc/mm/fault_64.c
|
||||
@@ -22,12 +22,12 @@
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/context_tracking.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/openprom.h>
|
||||
#include <asm/oplib.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/asi.h>
|
||||
#include <asm/lsu.h>
|
||||
#include <asm/sections.h>
|
||||
@@ -330,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fau
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto intr_or_no_mm;
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
--- a/arch/sparc/mm/init_64.c
|
||||
+++ b/arch/sparc/mm/init_64.c
|
||||
@@ -2738,7 +2738,7 @@ void hugetlb_setup(struct pt_regs *regs)
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct tsb_config *tp;
|
||||
|
||||
- if (in_atomic() || !mm) {
|
||||
+ if (faulthandler_disabled() || !mm) {
|
||||
const struct exception_table_entry *entry;
|
||||
|
||||
entry = search_exception_tables(regs->tpc);
|
||||
--- a/arch/tile/mm/fault.c
|
||||
+++ b/arch/tile/mm/fault.c
|
||||
@@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_r
|
||||
|
||||
/*
|
||||
* If we're in an interrupt, have no user context or are running in an
|
||||
- * atomic region then we must not take the fault.
|
||||
+ * region with pagefaults disabled then we must not take the fault.
|
||||
*/
|
||||
- if (in_atomic() || !mm) {
|
||||
+ if (pagefault_disabled() || !mm) {
|
||||
vma = NULL; /* happy compiler */
|
||||
goto bad_area_nosemaphore;
|
||||
}
|
||||
--- a/arch/um/kernel/trap.c
|
||||
+++ b/arch/um/kernel/trap.c
|
||||
@@ -35,10 +35,10 @@ int handle_page_fault(unsigned long addr
|
||||
*code_out = SEGV_MAPERR;
|
||||
|
||||
/*
|
||||
- * If the fault was during atomic operation, don't take the fault, just
|
||||
+ * If the fault was with pagefaults disabled, don't take the fault, just
|
||||
* fail.
|
||||
*/
|
||||
- if (in_atomic())
|
||||
+ if (faulthandler_disabled())
|
||||
goto out_nosemaphore;
|
||||
|
||||
if (is_user)
|
||||
--- a/arch/unicore32/mm/fault.c
|
||||
+++ b/arch/unicore32/mm/fault.c
|
||||
@@ -218,7 +218,7 @@ static int do_pf(unsigned long addr, uns
|
||||
* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm)
|
||||
+ if (faulthandler_disabled() || !mm)
|
||||
goto no_context;
|
||||
|
||||
if (user_mode(regs))
|
||||
--- a/arch/x86/mm/fault.c
|
||||
+++ b/arch/x86/mm/fault.c
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/hugetlb.h> /* hstate_index_to_shift */
|
||||
#include <linux/prefetch.h> /* prefetchw */
|
||||
#include <linux/context_tracking.h> /* exception_enter(), ... */
|
||||
+#include <linux/uaccess.h> /* faulthandler_disabled() */
|
||||
|
||||
#include <asm/traps.h> /* dotraplinkage, ... */
|
||||
#include <asm/pgalloc.h> /* pgd_*(), ... */
|
||||
@@ -1126,9 +1127,9 @@ static noinline void
|
||||
|
||||
/*
|
||||
* If we're in an interrupt, have no user context or are running
|
||||
- * in an atomic region then we must not take the fault:
|
||||
+ * in a region with pagefaults disabled then we must not take the fault
|
||||
*/
|
||||
- if (unlikely(in_atomic() || !mm)) {
|
||||
+ if (unlikely(faulthandler_disabled() || !mm)) {
|
||||
bad_area_nosemaphore(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
--- a/arch/xtensa/mm/fault.c
|
||||
+++ b/arch/xtensa/mm/fault.c
|
||||
@@ -15,10 +15,10 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/hardirq.h>
|
||||
+#include <linux/uaccess.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/hardirq.h>
|
||||
-#include <asm/uaccess.h>
|
||||
#include <asm/pgalloc.h>
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST;
|
||||
@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
|
||||
/* If we're in an interrupt or have no user
|
||||
* context, we must not take the fault..
|
||||
*/
|
||||
- if (in_atomic() || !mm) {
|
||||
+ if (faulthandler_disabled() || !mm) {
|
||||
bad_page_fault(regs, address, SIGSEGV);
|
||||
return;
|
||||
}
|
||||
--- a/include/linux/uaccess.h
|
||||
+++ b/include/linux/uaccess.h
|
||||
@@ -59,6 +59,18 @@ static inline void pagefault_enable(void
|
||||
*/
|
||||
#define pagefault_disabled() (current->pagefault_disabled != 0)
|
||||
|
||||
+/*
|
||||
+ * The pagefault handler is in general disabled by pagefault_disable() or
|
||||
+ * when in irq context (via in_atomic()).
|
||||
+ *
|
||||
+ * This function should only be used by the fault handlers. Other users should
|
||||
+ * stick to pagefault_disabled().
|
||||
+ * Please NEVER use preempt_disable() to disable the fault handler. With
|
||||
+ * !CONFIG_PREEMPT_COUNT, this is like a NOP. So the handler won't be disabled.
|
||||
+ * in_atomic() will report different values based on !CONFIG_PREEMPT_COUNT.
|
||||
+ */
|
||||
+#define faulthandler_disabled() (pagefault_disabled() || in_atomic())
|
||||
+
|
||||
#ifndef ARCH_HAS_NOCACHE_UACCESS
|
||||
|
||||
static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
|
33
debian/patches/features/all/rt/0007-drm-i915-use-pagefault_disabled-to-check-for-disable.patch
vendored
Normal file
33
debian/patches/features/all/rt/0007-drm-i915-use-pagefault_disabled-to-check-for-disable.patch
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:12 +0200
|
||||
Subject: mm/fault, drm/i915: Use pagefault_disabled() to check for disabled pagefaults
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Now that the pagefault disabled counter is in place, we can replace
|
||||
the in_atomic() check by a pagefault_disabled() checks.
|
||||
|
||||
[upstream commit 32d8206725bcf6e3ce7832ac39e61a6ecfd558db]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "i915_trace.h"
|
||||
#include "intel_drv.h"
|
||||
#include <linux/dma_remapping.h>
|
||||
+#include <linux/uaccess.h>
|
||||
|
||||
#define __EXEC_OBJECT_HAS_PIN (1<<31)
|
||||
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
|
||||
@@ -465,7 +466,7 @@ i915_gem_execbuffer_relocate_entry(struc
|
||||
}
|
||||
|
||||
/* We can't wait for rendering with pagefaults disabled */
|
||||
- if (obj->active && in_atomic())
|
||||
+ if (obj->active && pagefault_disabled())
|
||||
return -EFAULT;
|
||||
|
||||
if (use_cpu_reloc(obj))
|
46
debian/patches/features/all/rt/0008-futex-UP-futex_atomic_op_inuser-relies-on-disabled-p.patch
vendored
Normal file
46
debian/patches/features/all/rt/0008-futex-UP-futex_atomic_op_inuser-relies-on-disabled-p.patch
vendored
Normal file
|
@ -0,0 +1,46 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:13 +0200
|
||||
Subject: sched/preempt, futex: Disable preemption in UP futex_atomic_op_inuser() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Let's explicitly disable/enable preemption in the !CONFIG_SMP version
|
||||
of futex_atomic_op_inuser, to prepare for pagefault_disable() not
|
||||
touching preemption anymore.
|
||||
|
||||
Otherwise we might break mutual exclusion when relying on a get_user()/
|
||||
put_user() implementation.
|
||||
|
||||
[upstream commit f3dae07e442a8131a5485b6a38db2aa22a7a48cf]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/asm-generic/futex.h | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/include/asm-generic/futex.h
|
||||
+++ b/include/asm-generic/futex.h
|
||||
@@ -8,8 +8,7 @@
|
||||
#ifndef CONFIG_SMP
|
||||
/*
|
||||
* The following implementation only for uniprocessor machines.
|
||||
- * For UP, it's relies on the fact that pagefault_disable() also disables
|
||||
- * preemption to ensure mutual exclusion.
|
||||
+ * It relies on preempt_disable() ensuring mutual exclusion.
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -38,6 +37,7 @@ futex_atomic_op_inuser(int encoded_op, u
|
||||
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
|
||||
oparg = 1 << oparg;
|
||||
|
||||
+ preempt_disable();
|
||||
pagefault_disable();
|
||||
|
||||
ret = -EFAULT;
|
||||
@@ -72,6 +72,7 @@ futex_atomic_op_inuser(int encoded_op, u
|
||||
|
||||
out_pagefault_enable:
|
||||
pagefault_enable();
|
||||
+ preempt_enable();
|
||||
|
||||
if (ret == 0) {
|
||||
switch (cmp) {
|
37
debian/patches/features/all/rt/0009-futex-UP-futex_atomic_cmpxchg_inatomic-relies-on-dis.patch
vendored
Normal file
37
debian/patches/features/all/rt/0009-futex-UP-futex_atomic_cmpxchg_inatomic-relies-on-dis.patch
vendored
Normal file
|
@ -0,0 +1,37 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:14 +0200
|
||||
Subject: sched/preempt, futex: Disable preemption in UP futex_atomic_op_inuser() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Let's explicitly disable/enable preemption in the !CONFIG_SMP version
|
||||
of futex_atomic_cmpxchg_inatomic, to prepare for pagefault_disable() not
|
||||
touching preemption anymore. This is needed for this function to be
|
||||
callable from both, atomic and non-atomic context.
|
||||
|
||||
Otherwise we might break mutual exclusion when relying on a get_user()/
|
||||
put_user() implementation.
|
||||
|
||||
[upstream commit f3dae07e442a8131a5485b6a38db2aa22a7a48cf]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/asm-generic/futex.h | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/include/asm-generic/futex.h
|
||||
+++ b/include/asm-generic/futex.h
|
||||
@@ -107,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
{
|
||||
u32 val;
|
||||
|
||||
+ preempt_disable();
|
||||
if (unlikely(get_user(val, uaddr) != 0))
|
||||
return -EFAULT;
|
||||
|
||||
@@ -114,6 +115,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
return -EFAULT;
|
||||
|
||||
*uval = val;
|
||||
+ preempt_enable();
|
||||
|
||||
return 0;
|
||||
}
|
37
debian/patches/features/all/rt/0010-arm-futex-UP-futex_atomic_cmpxchg_inatomic-relies-on.patch
vendored
Normal file
37
debian/patches/features/all/rt/0010-arm-futex-UP-futex_atomic_cmpxchg_inatomic-relies-on.patch
vendored
Normal file
|
@ -0,0 +1,37 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:15 +0200
|
||||
Subject: sched/preempt, arm/futex: Disable preemption in UP futex_atomic_cmpxchg_inatomic() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The !CONFIG_SMP implementation of futex_atomic_cmpxchg_inatomic()
|
||||
requires preemption to be disabled to guarantee mutual exclusion.
|
||||
Let's make this explicit.
|
||||
|
||||
This patch is based on a patch by Sebastian Andrzej Siewior on the
|
||||
-rt branch.
|
||||
|
||||
[upstream commit 39919b01ae4c1949736b40b79e27178d0c0bc406]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/arm/include/asm/futex.h | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
--- a/arch/arm/include/asm/futex.h
|
||||
+++ b/arch/arm/include/asm/futex.h
|
||||
@@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
|
||||
return -EFAULT;
|
||||
|
||||
+ preempt_disable();
|
||||
__asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n"
|
||||
"1: " TUSER(ldr) " %1, [%4]\n"
|
||||
" teq %1, %2\n"
|
||||
@@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
: "cc", "memory");
|
||||
|
||||
*uval = val;
|
||||
+ preempt_enable();
|
||||
+
|
||||
return ret;
|
||||
}
|
||||
|
48
debian/patches/features/all/rt/0011-arm-futex-UP-futex_atomic_op_inuser-relies-on-disabl.patch
vendored
Normal file
48
debian/patches/features/all/rt/0011-arm-futex-UP-futex_atomic_op_inuser-relies-on-disabl.patch
vendored
Normal file
|
@ -0,0 +1,48 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:16 +0200
|
||||
Subject: sched/preempt, arm/futex: Disable preemption in UP futex_atomic_op_inuser() explicitly
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The !CONFIG_SMP implementation of futex_atomic_op_inuser() seems to rely
|
||||
on disabled preemption to guarantee mutual exclusion.
|
||||
|
||||
From commit e589ed23dd27:
|
||||
"For UP it's enough to disable preemption to ensure mutual exclusion..."
|
||||
From the code itself:
|
||||
"!SMP, we can work around lack of atomic ops by disabling preemption"
|
||||
|
||||
Let's make this explicit, to prepare for pagefault_disable() not
|
||||
touching preemption anymore.
|
||||
|
||||
[upstream commit 388b0e0adbc98a1b12a077dc92851a3ce016db42]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/arm/include/asm/futex.h | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/arch/arm/include/asm/futex.h
|
||||
+++ b/arch/arm/include/asm/futex.h
|
||||
@@ -127,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op,
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
|
||||
return -EFAULT;
|
||||
|
||||
- pagefault_disable(); /* implies preempt_disable() */
|
||||
+#ifndef CONFIG_SMP
|
||||
+ preempt_disable();
|
||||
+#endif
|
||||
+ pagefault_disable();
|
||||
|
||||
switch (op) {
|
||||
case FUTEX_OP_SET:
|
||||
@@ -149,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op,
|
||||
ret = -ENOSYS;
|
||||
}
|
||||
|
||||
- pagefault_enable(); /* subsumes preempt_enable() */
|
||||
+ pagefault_enable();
|
||||
+#ifndef CONFIG_SMP
|
||||
+ preempt_enable();
|
||||
+#endif
|
||||
|
||||
if (!ret) {
|
||||
switch (cmp) {
|
86
debian/patches/features/all/rt/0012-futex-clarify-that-preemption-doesn-t-have-to-be-dis.patch
vendored
Normal file
86
debian/patches/features/all/rt/0012-futex-clarify-that-preemption-doesn-t-have-to-be-dis.patch
vendored
Normal file
|
@ -0,0 +1,86 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:17 +0200
|
||||
Subject: sched/preempt, futex: Update comments to clarify that preemption doesn't have to be disabled
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
As arm64 and arc have no special implementations for !CONFIG_SMP, mutual
|
||||
exclusion doesn't seem to rely on preemption.
|
||||
|
||||
Let's make it clear in the comments that preemption doesn't have to be
|
||||
disabled when accessing user space in the futex code, so we can remove
|
||||
preempt_disable() from pagefault_disable().
|
||||
|
||||
[upstream commit 2f09b227eeed4b3a072fe818c82a4c773b778cde]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/arc/include/asm/futex.h | 10 +++++-----
|
||||
arch/arm64/include/asm/futex.h | 4 ++--
|
||||
2 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/arc/include/asm/futex.h
|
||||
+++ b/arch/arc/include/asm/futex.h
|
||||
@@ -53,7 +53,7 @@ static inline int futex_atomic_op_inuser
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
|
||||
return -EFAULT;
|
||||
|
||||
- pagefault_disable(); /* implies preempt_disable() */
|
||||
+ pagefault_disable();
|
||||
|
||||
switch (op) {
|
||||
case FUTEX_OP_SET:
|
||||
@@ -75,7 +75,7 @@ static inline int futex_atomic_op_inuser
|
||||
ret = -ENOSYS;
|
||||
}
|
||||
|
||||
- pagefault_enable(); /* subsumes preempt_enable() */
|
||||
+ pagefault_enable();
|
||||
|
||||
if (!ret) {
|
||||
switch (cmp) {
|
||||
@@ -104,7 +104,7 @@ static inline int futex_atomic_op_inuser
|
||||
return ret;
|
||||
}
|
||||
|
||||
-/* Compare-xchg with preemption disabled.
|
||||
+/* Compare-xchg with pagefaults disabled.
|
||||
* Notes:
|
||||
* -Best-Effort: Exchg happens only if compare succeeds.
|
||||
* If compare fails, returns; leaving retry/looping to upper layers
|
||||
@@ -121,7 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
|
||||
return -EFAULT;
|
||||
|
||||
- pagefault_disable(); /* implies preempt_disable() */
|
||||
+ pagefault_disable();
|
||||
|
||||
/* TBD : can use llock/scond */
|
||||
__asm__ __volatile__(
|
||||
@@ -142,7 +142,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval,
|
||||
: "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
|
||||
: "cc", "memory");
|
||||
|
||||
- pagefault_enable(); /* subsumes preempt_enable() */
|
||||
+ pagefault_enable();
|
||||
|
||||
*uval = val;
|
||||
return val;
|
||||
--- a/arch/arm64/include/asm/futex.h
|
||||
+++ b/arch/arm64/include/asm/futex.h
|
||||
@@ -58,7 +58,7 @@ futex_atomic_op_inuser (int encoded_op,
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
|
||||
return -EFAULT;
|
||||
|
||||
- pagefault_disable(); /* implies preempt_disable() */
|
||||
+ pagefault_disable();
|
||||
|
||||
switch (op) {
|
||||
case FUTEX_OP_SET:
|
||||
@@ -85,7 +85,7 @@ futex_atomic_op_inuser (int encoded_op,
|
||||
ret = -ENOSYS;
|
||||
}
|
||||
|
||||
- pagefault_enable(); /* subsumes preempt_enable() */
|
||||
+ pagefault_enable();
|
||||
|
||||
if (!ret) {
|
||||
switch (cmp) {
|
34
debian/patches/features/all/rt/0013-mips-properly-lock-access-to-the-fpu.patch
vendored
Normal file
34
debian/patches/features/all/rt/0013-mips-properly-lock-access-to-the-fpu.patch
vendored
Normal file
|
@ -0,0 +1,34 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:19 +0200
|
||||
Subject: sched/preempt, MIPS: Properly lock access to the FPU
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Let's always disable preemption and pagefaults when locking the fpu,
|
||||
so we can be sure that the owner won't change in between.
|
||||
|
||||
This is a preparation for pagefault_disable() not touching preemption
|
||||
anymore.
|
||||
|
||||
[upstream commit 76deabd1867d6d2895152f31fdec819e3505738b]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
arch/mips/kernel/signal-common.h | 9 ++-------
|
||||
1 file changed, 2 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/mips/kernel/signal-common.h
|
||||
+++ b/arch/mips/kernel/signal-common.h
|
||||
@@ -28,12 +28,7 @@ extern void __user *get_sigframe(struct
|
||||
extern int fpcsr_pending(unsigned int __user *fpcsr);
|
||||
|
||||
/* Make sure we will not lose FPU ownership */
|
||||
-#ifdef CONFIG_PREEMPT
|
||||
-#define lock_fpu_owner() preempt_disable()
|
||||
-#define unlock_fpu_owner() preempt_enable()
|
||||
-#else
|
||||
-#define lock_fpu_owner() pagefault_disable()
|
||||
-#define unlock_fpu_owner() pagefault_enable()
|
||||
-#endif
|
||||
+#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); })
|
||||
+#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); })
|
||||
|
||||
#endif /* __SIGNAL_COMMON_H */
|
61
debian/patches/features/all/rt/0014-uaccess-decouple-preemption-from-the-pagefault-logic.patch
vendored
Normal file
61
debian/patches/features/all/rt/0014-uaccess-decouple-preemption-from-the-pagefault-logic.patch
vendored
Normal file
|
@ -0,0 +1,61 @@
|
|||
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
Date: Mon, 11 May 2015 17:52:20 +0200
|
||||
Subject: sched/preempt, mm/fault: Decouple preemption from the page fault logic
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
As the fault handlers now all rely on the pagefault_disabled() checks
|
||||
and implicit preempt_disable() calls by pagefault_disable() have been
|
||||
made explicit, we can completely rely on the pagefault_disableD counter.
|
||||
|
||||
So let's no longer touch the preempt count when disabling/enabling
|
||||
pagefaults. After a call to pagefault_disable(), pagefault_disabled()
|
||||
will return true, but in_atomic() won't.
|
||||
|
||||
[upstream commit 8222dbe21e79338de92d5e1956cd1e3994cc9f93]
|
||||
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
|
||||
---
|
||||
include/linux/uaccess.h | 16 ++--------------
|
||||
1 file changed, 2 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/include/linux/uaccess.h
|
||||
+++ b/include/linux/uaccess.h
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef __LINUX_UACCESS_H__
|
||||
#define __LINUX_UACCESS_H__
|
||||
|
||||
-#include <linux/preempt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
@@ -20,17 +19,11 @@ static __always_inline void pagefault_di
|
||||
* These routines enable/disable the pagefault handler. If disabled, it will
|
||||
* not take any locks and go straight to the fixup table.
|
||||
*
|
||||
- * We increase the preempt and the pagefault count, to be able to distinguish
|
||||
- * whether we run in simple atomic context or in a real pagefault_disable()
|
||||
- * context.
|
||||
- *
|
||||
- * For now, after pagefault_disabled() has been called, we run in atomic
|
||||
- * context. User access methods will not sleep.
|
||||
- *
|
||||
+ * User access methods will not sleep when called from a pagefault_disabled()
|
||||
+ * environment.
|
||||
*/
|
||||
static inline void pagefault_disable(void)
|
||||
{
|
||||
- preempt_count_inc();
|
||||
pagefault_disabled_inc();
|
||||
/*
|
||||
* make sure to have issued the store before a pagefault
|
||||
@@ -47,11 +40,6 @@ static inline void pagefault_enable(void
|
||||
*/
|
||||
barrier();
|
||||
pagefault_disabled_dec();
|
||||
-#ifndef CONFIG_PREEMPT
|
||||
- preempt_count_dec();
|
||||
-#else
|
||||
- preempt_enable();
|
||||
-#endif
|
||||
}
|
||||
|
||||
/*
|
40
debian/patches/features/all/rt/ARM-cmpxchg-define-__HAVE_ARCH_CMPXCHG-for-armv6-and.patch
vendored
Normal file
40
debian/patches/features/all/rt/ARM-cmpxchg-define-__HAVE_ARCH_CMPXCHG-for-armv6-and.patch
vendored
Normal file
|
@ -0,0 +1,40 @@
|
|||
From: Yong Zhang <yong.zhang at windriver.com>
|
||||
Date: Thu, 29 Jan 2015 12:56:18 -0600
|
||||
Subject: ARM: cmpxchg: define __HAVE_ARCH_CMPXCHG for armv6 and later
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Both pi_stress and sigwaittest in rt-test show performance gain with
|
||||
__HAVE_ARCH_CMPXCHG. Testing result on coretile_express_a9x4:
|
||||
|
||||
pi_stress -p 99 --duration=300 (on linux-3.4-rc5; bigger is better)
|
||||
vanilla: Total inversion performed: 5493381
|
||||
patched: Total inversion performed: 5621746
|
||||
|
||||
sigwaittest -p 99 -l 100000 (on linux-3.4-rc5-rt6; less is better)
|
||||
3.4-rc5-rt6: Min 24, Cur 27, Avg 30, Max 98
|
||||
patched: Min 19, Cur 21, Avg 23, Max 96
|
||||
|
||||
Signed-off-by: Yong Zhang <yong.zhang0 at gmail.com>
|
||||
Cc: Russell King <rmk+kernel at arm.linux.org.uk>
|
||||
Cc: Nicolas Pitre <nico at linaro.org>
|
||||
Cc: Will Deacon <will.deacon at arm.com>
|
||||
Cc: Catalin Marinas <catalin.marinas at arm.com>
|
||||
Cc: Thomas Gleixner <tglx at linutronix.de>
|
||||
Cc: linux-arm-kernel at lists.infradead.org
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/include/asm/cmpxchg.h | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/arch/arm/include/asm/cmpxchg.h
|
||||
+++ b/arch/arm/include/asm/cmpxchg.h
|
||||
@@ -129,6 +129,8 @@ static inline unsigned long __xchg(unsig
|
||||
|
||||
#else /* min ARCH >= ARMv6 */
|
||||
|
||||
+#define __HAVE_ARCH_CMPXCHG 1
|
||||
+
|
||||
extern void __bad_cmpxchg(volatile void *ptr, int size);
|
||||
|
||||
/*
|
86
debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch
vendored
Normal file
86
debian/patches/features/all/rt/ARM-enable-irq-in-translation-section-permission-fau.patch
vendored
Normal file
|
@ -0,0 +1,86 @@
|
|||
From: "Yadi.hu" <yadi.hu@windriver.com>
|
||||
Date: Wed, 10 Dec 2014 10:32:09 +0800
|
||||
Subject: ARM: enable irq in translation/section permission fault handlers
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Probably happens on all ARM, with
|
||||
CONFIG_PREEMPT_RT_FULL
|
||||
CONFIG_DEBUG_ATOMIC_SLEEP
|
||||
|
||||
This simple program....
|
||||
|
||||
int main() {
|
||||
*((char*)0xc0001000) = 0;
|
||||
};
|
||||
|
||||
[ 512.742724] BUG: sleeping function called from invalid context at kernel/rtmutex.c:658
|
||||
[ 512.743000] in_atomic(): 0, irqs_disabled(): 128, pid: 994, name: a
|
||||
[ 512.743217] INFO: lockdep is turned off.
|
||||
[ 512.743360] irq event stamp: 0
|
||||
[ 512.743482] hardirqs last enabled at (0): [< (null)>] (null)
|
||||
[ 512.743714] hardirqs last disabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
|
||||
[ 512.744013] softirqs last enabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0
|
||||
[ 512.744303] softirqs last disabled at (0): [< (null)>] (null)
|
||||
[ 512.744631] [<c041872c>] (unwind_backtrace+0x0/0x104)
|
||||
[ 512.745001] [<c09af0c4>] (dump_stack+0x20/0x24)
|
||||
[ 512.745355] [<c0462490>] (__might_sleep+0x1dc/0x1e0)
|
||||
[ 512.745717] [<c09b6770>] (rt_spin_lock+0x34/0x6c)
|
||||
[ 512.746073] [<c0441bf0>] (do_force_sig_info+0x34/0xf0)
|
||||
[ 512.746457] [<c0442668>] (force_sig_info+0x18/0x1c)
|
||||
[ 512.746829] [<c041d880>] (__do_user_fault+0x9c/0xd8)
|
||||
[ 512.747185] [<c041d938>] (do_bad_area+0x7c/0x94)
|
||||
[ 512.747536] [<c041d990>] (do_sect_fault+0x40/0x48)
|
||||
[ 512.747898] [<c040841c>] (do_DataAbort+0x40/0xa0)
|
||||
[ 512.748181] Exception stack(0xecaa1fb0 to 0xecaa1ff8)
|
||||
|
||||
Oxc0000000 belongs to kernel address space, user task can not be
|
||||
allowed to access it. For above condition, correct result is that
|
||||
test case should receive a “segment fault” and exits but not stacks.
|
||||
|
||||
the root cause is commit 02fe2845d6a8 ("avoid enabling interrupts in
|
||||
prefetch/data abort handlers"),it deletes irq enable block in Data
|
||||
abort assemble code and move them into page/breakpiont/alignment fault
|
||||
handlers instead. But author does not enable irq in translation/section
|
||||
permission fault handlers. ARM disables irq when it enters exception/
|
||||
interrupt mode, if kernel doesn't enable irq, it would be still disabled
|
||||
during translation/section permission fault.
|
||||
|
||||
We see the above splat because do_force_sig_info is still called with
|
||||
IRQs off, and that code eventually does a:
|
||||
|
||||
spin_lock_irqsave(&t->sighand->siglock, flags);
|
||||
|
||||
As this is architecture independent code, and we've not seen any other
|
||||
need for other arch to have the siglock converted to raw lock, we can
|
||||
conclude that we should enable irq for ARM translation/section
|
||||
permission exception.
|
||||
|
||||
|
||||
Signed-off-by: Yadi.hu <yadi.hu@windriver.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/mm/fault.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
--- a/arch/arm/mm/fault.c
|
||||
+++ b/arch/arm/mm/fault.c
|
||||
@@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr,
|
||||
if (addr < TASK_SIZE)
|
||||
return do_page_fault(addr, fsr, regs);
|
||||
|
||||
+ if (interrupts_enabled(regs))
|
||||
+ local_irq_enable();
|
||||
+
|
||||
if (user_mode(regs))
|
||||
goto bad_area;
|
||||
|
||||
@@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr,
|
||||
static int
|
||||
do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
||||
{
|
||||
+ if (interrupts_enabled(regs))
|
||||
+ local_irq_enable();
|
||||
+
|
||||
do_bad_area(addr, fsr, regs);
|
||||
return 0;
|
||||
}
|
27
debian/patches/features/all/rt/ASoC-Intel-sst-use-instead-of-at-the-of-a-C-statemen.patch
vendored
Normal file
27
debian/patches/features/all/rt/ASoC-Intel-sst-use-instead-of-at-the-of-a-C-statemen.patch
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 11 Jun 2015 14:17:06 +0200
|
||||
Subject: ASoC: Intel: sst: use ; instead of , at the of a C statement
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This was spotted by Fernando Lopez-Lezcano <nando@ccrma.Stanford.EDU>
|
||||
while he tried to compile a -RT kernel with this driver enabled.
|
||||
"make C=2" would also warn about this. This is is based on his patch.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
sound/soc/intel/atom/sst/sst.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/sound/soc/intel/atom/sst/sst.c
|
||||
+++ b/sound/soc/intel/atom/sst/sst.c
|
||||
@@ -368,8 +368,8 @@ static inline void sst_restore_shim64(st
|
||||
* initialize by FW or driver when firmware is loaded
|
||||
*/
|
||||
spin_lock_irqsave(&ctx->ipc_spin_lock, irq_flags);
|
||||
- sst_shim_write64(shim, SST_IMRX, shim_regs->imrx),
|
||||
- sst_shim_write64(shim, SST_CSR, shim_regs->csr),
|
||||
+ sst_shim_write64(shim, SST_IMRX, shim_regs->imrx);
|
||||
+ sst_shim_write64(shim, SST_CSR, shim_regs->csr);
|
||||
spin_unlock_irqrestore(&ctx->ipc_spin_lock, irq_flags);
|
||||
}
|
||||
|
77
debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch
vendored
Normal file
77
debian/patches/features/all/rt/HACK-printk-drop-the-logbuf_lock-more-often.patch
vendored
Normal file
|
@ -0,0 +1,77 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 21 Mar 2013 19:01:05 +0100
|
||||
Subject: printk: Drop the logbuf_lock more often
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The lock is hold with irgs off. The latency drops 500us+ on my arm bugs
|
||||
with a "full" buffer after executing "dmesg" on the shell.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/printk/printk.c | 27 ++++++++++++++++++++++++++-
|
||||
1 file changed, 26 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/printk/printk.c
|
||||
+++ b/kernel/printk/printk.c
|
||||
@@ -1162,6 +1162,7 @@ static int syslog_print_all(char __user
|
||||
{
|
||||
char *text;
|
||||
int len = 0;
|
||||
+ int attempts = 0;
|
||||
|
||||
text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
|
||||
if (!text)
|
||||
@@ -1173,7 +1174,14 @@ static int syslog_print_all(char __user
|
||||
u64 seq;
|
||||
u32 idx;
|
||||
enum log_flags prev;
|
||||
-
|
||||
+ int num_msg;
|
||||
+try_again:
|
||||
+ attempts++;
|
||||
+ if (attempts > 10) {
|
||||
+ len = -EBUSY;
|
||||
+ goto out;
|
||||
+ }
|
||||
+ num_msg = 0;
|
||||
if (clear_seq < log_first_seq) {
|
||||
/* messages are gone, move to first available one */
|
||||
clear_seq = log_first_seq;
|
||||
@@ -1194,6 +1202,14 @@ static int syslog_print_all(char __user
|
||||
prev = msg->flags;
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
+ num_msg++;
|
||||
+ if (num_msg > 5) {
|
||||
+ num_msg = 0;
|
||||
+ raw_spin_unlock_irq(&logbuf_lock);
|
||||
+ raw_spin_lock_irq(&logbuf_lock);
|
||||
+ if (clear_seq < log_first_seq)
|
||||
+ goto try_again;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* move first record forward until length fits into the buffer */
|
||||
@@ -1207,6 +1223,14 @@ static int syslog_print_all(char __user
|
||||
prev = msg->flags;
|
||||
idx = log_next(idx);
|
||||
seq++;
|
||||
+ num_msg++;
|
||||
+ if (num_msg > 5) {
|
||||
+ num_msg = 0;
|
||||
+ raw_spin_unlock_irq(&logbuf_lock);
|
||||
+ raw_spin_lock_irq(&logbuf_lock);
|
||||
+ if (clear_seq < log_first_seq)
|
||||
+ goto try_again;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* last message fitting into this dump */
|
||||
@@ -1247,6 +1271,7 @@ static int syslog_print_all(char __user
|
||||
clear_seq = log_next_seq;
|
||||
clear_idx = log_next_idx;
|
||||
}
|
||||
+out:
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
|
||||
kfree(text);
|
101
debian/patches/features/all/rt/KVM-lapic-mark-LAPIC-timer-handler-as-irqsafe.patch
vendored
Normal file
101
debian/patches/features/all/rt/KVM-lapic-mark-LAPIC-timer-handler-as-irqsafe.patch
vendored
Normal file
|
@ -0,0 +1,101 @@
|
|||
From: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Date: Wed, 8 Apr 2015 20:33:25 -0300
|
||||
Subject: KVM: lapic: mark LAPIC timer handler as irqsafe
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Since lapic timer handler only wakes up a simple waitqueue,
|
||||
it can be executed from hardirq context.
|
||||
|
||||
Also handle the case where hrtimer_start_expires fails due to -ETIME,
|
||||
by injecting the interrupt to the guest immediately.
|
||||
|
||||
Reduces average cyclictest latency by 3us.
|
||||
|
||||
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/kvm/lapic.c | 40 +++++++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 37 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/x86/kvm/lapic.c
|
||||
+++ b/arch/x86/kvm/lapic.c
|
||||
@@ -1167,8 +1167,36 @@ void wait_lapic_expire(struct kvm_vcpu *
|
||||
__delay(tsc_deadline - guest_tsc);
|
||||
}
|
||||
|
||||
+static enum hrtimer_restart apic_timer_fn(struct hrtimer *data);
|
||||
+
|
||||
+static void __apic_timer_expired(struct hrtimer *data)
|
||||
+{
|
||||
+ int ret, i = 0;
|
||||
+ enum hrtimer_restart r;
|
||||
+ struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
|
||||
+
|
||||
+ r = apic_timer_fn(data);
|
||||
+
|
||||
+ if (r == HRTIMER_RESTART) {
|
||||
+ do {
|
||||
+ ret = hrtimer_start_expires(data, HRTIMER_MODE_ABS);
|
||||
+ if (ret == -ETIME)
|
||||
+ hrtimer_add_expires_ns(&ktimer->timer,
|
||||
+ ktimer->period);
|
||||
+ i++;
|
||||
+ } while (ret == -ETIME && i < 10);
|
||||
+
|
||||
+ if (ret == -ETIME) {
|
||||
+ printk_once(KERN_ERR "%s: failed to reprogram timer\n",
|
||||
+ __func__);
|
||||
+ WARN_ON_ONCE(1);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static void start_apic_timer(struct kvm_lapic *apic)
|
||||
{
|
||||
+ int ret;
|
||||
ktime_t now;
|
||||
|
||||
atomic_set(&apic->lapic_timer.pending, 0);
|
||||
@@ -1199,9 +1227,11 @@ static void start_apic_timer(struct kvm_
|
||||
}
|
||||
}
|
||||
|
||||
- hrtimer_start(&apic->lapic_timer.timer,
|
||||
+ ret = hrtimer_start(&apic->lapic_timer.timer,
|
||||
ktime_add_ns(now, apic->lapic_timer.period),
|
||||
HRTIMER_MODE_ABS);
|
||||
+ if (ret == -ETIME)
|
||||
+ __apic_timer_expired(&apic->lapic_timer.timer);
|
||||
|
||||
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
|
||||
PRIx64 ", "
|
||||
@@ -1233,8 +1263,10 @@ static void start_apic_timer(struct kvm_
|
||||
do_div(ns, this_tsc_khz);
|
||||
expire = ktime_add_ns(now, ns);
|
||||
expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
|
||||
- hrtimer_start(&apic->lapic_timer.timer,
|
||||
+ ret = hrtimer_start(&apic->lapic_timer.timer,
|
||||
expire, HRTIMER_MODE_ABS);
|
||||
+ if (ret == -ETIME)
|
||||
+ __apic_timer_expired(&apic->lapic_timer.timer);
|
||||
} else
|
||||
apic_timer_expired(apic);
|
||||
|
||||
@@ -1707,6 +1739,7 @@ int kvm_create_lapic(struct kvm_vcpu *vc
|
||||
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_ABS);
|
||||
apic->lapic_timer.timer.function = apic_timer_fn;
|
||||
+ apic->lapic_timer.timer.irqsafe = 1;
|
||||
|
||||
/*
|
||||
* APIC is created enabled. This will prevent kvm_lapic_set_base from
|
||||
@@ -1834,7 +1867,8 @@ void __kvm_migrate_apic_timer(struct kvm
|
||||
|
||||
timer = &vcpu->arch.apic->lapic_timer.timer;
|
||||
if (hrtimer_cancel(timer))
|
||||
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
|
||||
+ if (hrtimer_start_expires(timer, HRTIMER_MODE_ABS) == -ETIME)
|
||||
+ __apic_timer_expired(timer);
|
||||
}
|
||||
|
||||
/*
|
|
@ -0,0 +1,342 @@
|
|||
From: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Date: Wed, 8 Apr 2015 20:33:24 -0300
|
||||
Subject: KVM: use simple waitqueue for vcpu->wq
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The problem:
|
||||
|
||||
On -RT, an emulated LAPIC timer instances has the following path:
|
||||
|
||||
1) hard interrupt
|
||||
2) ksoftirqd is scheduled
|
||||
3) ksoftirqd wakes up vcpu thread
|
||||
4) vcpu thread is scheduled
|
||||
|
||||
This extra context switch introduces unnecessary latency in the
|
||||
LAPIC path for a KVM guest.
|
||||
|
||||
The solution:
|
||||
|
||||
Allow waking up vcpu thread from hardirq context,
|
||||
thus avoiding the need for ksoftirqd to be scheduled.
|
||||
|
||||
Normal waitqueues make use of spinlocks, which on -RT
|
||||
are sleepable locks. Therefore, waking up a waitqueue
|
||||
waiter involves locking a sleeping lock, which
|
||||
is not allowed from hard interrupt context.
|
||||
|
||||
cyclictest command line:
|
||||
# cyclictest -m -n -q -p99 -l 1000000 -h60 -D 1m
|
||||
|
||||
This patch reduces the average latency in my tests from 14us to 11us.
|
||||
|
||||
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/kvm/arm.c | 4 ++--
|
||||
arch/arm/kvm/psci.c | 4 ++--
|
||||
arch/powerpc/include/asm/kvm_host.h | 4 ++--
|
||||
arch/powerpc/kvm/book3s_hv.c | 23 +++++++++++------------
|
||||
arch/s390/include/asm/kvm_host.h | 2 +-
|
||||
arch/s390/kvm/interrupt.c | 8 ++++----
|
||||
arch/x86/kvm/lapic.c | 6 +++---
|
||||
include/linux/kvm_host.h | 4 ++--
|
||||
virt/kvm/async_pf.c | 4 ++--
|
||||
virt/kvm/kvm_main.c | 16 ++++++++--------
|
||||
10 files changed, 37 insertions(+), 38 deletions(-)
|
||||
|
||||
--- a/arch/arm/kvm/arm.c
|
||||
+++ b/arch/arm/kvm/arm.c
|
||||
@@ -474,9 +474,9 @@ bool kvm_arch_intc_initialized(struct kv
|
||||
|
||||
static void vcpu_pause(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
- wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
+ struct swait_head *wq = kvm_arch_vcpu_wq(vcpu);
|
||||
|
||||
- wait_event_interruptible(*wq, !vcpu->arch.pause);
|
||||
+ swait_event_interruptible(*wq, !vcpu->arch.pause);
|
||||
}
|
||||
|
||||
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
|
||||
--- a/arch/arm/kvm/psci.c
|
||||
+++ b/arch/arm/kvm/psci.c
|
||||
@@ -68,7 +68,7 @@ static unsigned long kvm_psci_vcpu_on(st
|
||||
{
|
||||
struct kvm *kvm = source_vcpu->kvm;
|
||||
struct kvm_vcpu *vcpu = NULL;
|
||||
- wait_queue_head_t *wq;
|
||||
+ struct swait_head *wq;
|
||||
unsigned long cpu_id;
|
||||
unsigned long context_id;
|
||||
phys_addr_t target_pc;
|
||||
@@ -117,7 +117,7 @@ static unsigned long kvm_psci_vcpu_on(st
|
||||
smp_mb(); /* Make sure the above is visible */
|
||||
|
||||
wq = kvm_arch_vcpu_wq(vcpu);
|
||||
- wake_up_interruptible(wq);
|
||||
+ swait_wake_interruptible(wq);
|
||||
|
||||
return PSCI_RET_SUCCESS;
|
||||
}
|
||||
--- a/arch/powerpc/include/asm/kvm_host.h
|
||||
+++ b/arch/powerpc/include/asm/kvm_host.h
|
||||
@@ -280,7 +280,7 @@ struct kvmppc_vcore {
|
||||
u8 in_guest;
|
||||
struct list_head runnable_threads;
|
||||
spinlock_t lock;
|
||||
- wait_queue_head_t wq;
|
||||
+ struct swait_head wq;
|
||||
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
|
||||
u64 stolen_tb;
|
||||
u64 preempt_tb;
|
||||
@@ -613,7 +613,7 @@ struct kvm_vcpu_arch {
|
||||
u8 prodded;
|
||||
u32 last_inst;
|
||||
|
||||
- wait_queue_head_t *wqp;
|
||||
+ struct swait_head *wqp;
|
||||
struct kvmppc_vcore *vcore;
|
||||
int ret;
|
||||
int trap;
|
||||
--- a/arch/powerpc/kvm/book3s_hv.c
|
||||
+++ b/arch/powerpc/kvm/book3s_hv.c
|
||||
@@ -115,11 +115,11 @@ static bool kvmppc_ipi_thread(int cpu)
|
||||
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int cpu = vcpu->cpu;
|
||||
- wait_queue_head_t *wqp;
|
||||
+ struct swait_head *wqp;
|
||||
|
||||
wqp = kvm_arch_vcpu_wq(vcpu);
|
||||
- if (waitqueue_active(wqp)) {
|
||||
- wake_up_interruptible(wqp);
|
||||
+ if (swaitqueue_active(wqp)) {
|
||||
+ swait_wake_interruptible(wqp);
|
||||
++vcpu->stat.halt_wakeup;
|
||||
}
|
||||
|
||||
@@ -686,8 +686,8 @@ int kvmppc_pseries_do_hcall(struct kvm_v
|
||||
tvcpu->arch.prodded = 1;
|
||||
smp_mb();
|
||||
if (vcpu->arch.ceded) {
|
||||
- if (waitqueue_active(&vcpu->wq)) {
|
||||
- wake_up_interruptible(&vcpu->wq);
|
||||
+ if (swaitqueue_active(&vcpu->wq)) {
|
||||
+ swait_wake_interruptible(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
@@ -1426,7 +1426,7 @@ static struct kvmppc_vcore *kvmppc_vcore
|
||||
INIT_LIST_HEAD(&vcore->runnable_threads);
|
||||
spin_lock_init(&vcore->lock);
|
||||
spin_lock_init(&vcore->stoltb_lock);
|
||||
- init_waitqueue_head(&vcore->wq);
|
||||
+ init_swait_head(&vcore->wq);
|
||||
vcore->preempt_tb = TB_NIL;
|
||||
vcore->lpcr = kvm->arch.lpcr;
|
||||
vcore->first_vcpuid = core * threads_per_subcore;
|
||||
@@ -2073,10 +2073,9 @@ static void kvmppc_vcore_blocked(struct
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int do_sleep = 1;
|
||||
+ DEFINE_SWAITER(wait);
|
||||
|
||||
- DEFINE_WAIT(wait);
|
||||
-
|
||||
- prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
+ swait_prepare(&vc->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* Check one last time for pending exceptions and ceded state after
|
||||
@@ -2090,7 +2089,7 @@ static void kvmppc_vcore_blocked(struct
|
||||
}
|
||||
|
||||
if (!do_sleep) {
|
||||
- finish_wait(&vc->wq, &wait);
|
||||
+ swait_finish(&vc->wq, &wait);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2098,7 +2097,7 @@ static void kvmppc_vcore_blocked(struct
|
||||
trace_kvmppc_vcore_blocked(vc, 0);
|
||||
spin_unlock(&vc->lock);
|
||||
schedule();
|
||||
- finish_wait(&vc->wq, &wait);
|
||||
+ swait_finish(&vc->wq, &wait);
|
||||
spin_lock(&vc->lock);
|
||||
vc->vcore_state = VCORE_INACTIVE;
|
||||
trace_kvmppc_vcore_blocked(vc, 1);
|
||||
@@ -2142,7 +2141,7 @@ static int kvmppc_run_vcpu(struct kvm_ru
|
||||
kvmppc_start_thread(vcpu);
|
||||
trace_kvm_guest_enter(vcpu);
|
||||
} else if (vc->vcore_state == VCORE_SLEEPING) {
|
||||
- wake_up(&vc->wq);
|
||||
+ swait_wake(&vc->wq);
|
||||
}
|
||||
|
||||
}
|
||||
--- a/arch/s390/include/asm/kvm_host.h
|
||||
+++ b/arch/s390/include/asm/kvm_host.h
|
||||
@@ -419,7 +419,7 @@ struct kvm_s390_irq_payload {
|
||||
struct kvm_s390_local_interrupt {
|
||||
spinlock_t lock;
|
||||
struct kvm_s390_float_interrupt *float_int;
|
||||
- wait_queue_head_t *wq;
|
||||
+ struct swait_head *wq;
|
||||
atomic_t *cpuflags;
|
||||
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
|
||||
struct kvm_s390_irq_payload irq;
|
||||
--- a/arch/s390/kvm/interrupt.c
|
||||
+++ b/arch/s390/kvm/interrupt.c
|
||||
@@ -875,13 +875,13 @@ int kvm_s390_handle_wait(struct kvm_vcpu
|
||||
|
||||
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
- if (waitqueue_active(&vcpu->wq)) {
|
||||
+ if (swaitqueue_active(&vcpu->wq)) {
|
||||
/*
|
||||
* The vcpu gave up the cpu voluntarily, mark it as a good
|
||||
* yield-candidate.
|
||||
*/
|
||||
vcpu->preempted = true;
|
||||
- wake_up_interruptible(&vcpu->wq);
|
||||
+ swait_wake_interruptible(&vcpu->wq);
|
||||
vcpu->stat.halt_wakeup++;
|
||||
}
|
||||
}
|
||||
@@ -987,7 +987,7 @@ int kvm_s390_inject_program_int(struct k
|
||||
spin_lock(&li->lock);
|
||||
irq.u.pgm.code = code;
|
||||
__inject_prog(vcpu, &irq);
|
||||
- BUG_ON(waitqueue_active(li->wq));
|
||||
+ BUG_ON(swaitqueue_active(li->wq));
|
||||
spin_unlock(&li->lock);
|
||||
return 0;
|
||||
}
|
||||
@@ -1006,7 +1006,7 @@ int kvm_s390_inject_prog_irq(struct kvm_
|
||||
spin_lock(&li->lock);
|
||||
irq.u.pgm = *pgm_info;
|
||||
rc = __inject_prog(vcpu, &irq);
|
||||
- BUG_ON(waitqueue_active(li->wq));
|
||||
+ BUG_ON(swaitqueue_active(li->wq));
|
||||
spin_unlock(&li->lock);
|
||||
return rc;
|
||||
}
|
||||
--- a/arch/x86/kvm/lapic.c
|
||||
+++ b/arch/x86/kvm/lapic.c
|
||||
@@ -1104,7 +1104,7 @@ static void apic_update_lvtt(struct kvm_
|
||||
static void apic_timer_expired(struct kvm_lapic *apic)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = apic->vcpu;
|
||||
- wait_queue_head_t *q = &vcpu->wq;
|
||||
+ struct swait_head *q = &vcpu->wq;
|
||||
struct kvm_timer *ktimer = &apic->lapic_timer;
|
||||
|
||||
if (atomic_read(&apic->lapic_timer.pending))
|
||||
@@ -1113,8 +1113,8 @@ static void apic_timer_expired(struct kv
|
||||
atomic_inc(&apic->lapic_timer.pending);
|
||||
kvm_set_pending_timer(vcpu);
|
||||
|
||||
- if (waitqueue_active(q))
|
||||
- wake_up_interruptible(q);
|
||||
+ if (swaitqueue_active(q))
|
||||
+ swait_wake_interruptible(q);
|
||||
|
||||
if (apic_lvtt_tscdeadline(apic))
|
||||
ktimer->expired_tscdeadline = ktimer->tscdeadline;
|
||||
--- a/include/linux/kvm_host.h
|
||||
+++ b/include/linux/kvm_host.h
|
||||
@@ -230,7 +230,7 @@ struct kvm_vcpu {
|
||||
|
||||
int fpu_active;
|
||||
int guest_fpu_loaded, guest_xcr0_loaded;
|
||||
- wait_queue_head_t wq;
|
||||
+ struct swait_head wq;
|
||||
struct pid *pid;
|
||||
int sigset_active;
|
||||
sigset_t sigset;
|
||||
@@ -690,7 +690,7 @@ static inline bool kvm_arch_has_noncoher
|
||||
}
|
||||
#endif
|
||||
|
||||
-static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
|
||||
+static inline struct swait_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
#ifdef __KVM_HAVE_ARCH_WQP
|
||||
return vcpu->arch.wqp;
|
||||
--- a/virt/kvm/async_pf.c
|
||||
+++ b/virt/kvm/async_pf.c
|
||||
@@ -94,8 +94,8 @@ static void async_pf_execute(struct work
|
||||
|
||||
trace_kvm_async_pf_completed(addr, gva);
|
||||
|
||||
- if (waitqueue_active(&vcpu->wq))
|
||||
- wake_up_interruptible(&vcpu->wq);
|
||||
+ if (swaitqueue_active(&vcpu->wq))
|
||||
+ swait_wake_interruptible(&vcpu->wq);
|
||||
|
||||
mmput(mm);
|
||||
kvm_put_kvm(vcpu->kvm);
|
||||
--- a/virt/kvm/kvm_main.c
|
||||
+++ b/virt/kvm/kvm_main.c
|
||||
@@ -218,7 +218,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu,
|
||||
vcpu->kvm = kvm;
|
||||
vcpu->vcpu_id = id;
|
||||
vcpu->pid = NULL;
|
||||
- init_waitqueue_head(&vcpu->wq);
|
||||
+ init_swait_head(&vcpu->wq);
|
||||
kvm_async_pf_vcpu_init(vcpu);
|
||||
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
@@ -1779,7 +1779,7 @@ static int kvm_vcpu_check_block(struct k
|
||||
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
ktime_t start, cur;
|
||||
- DEFINE_WAIT(wait);
|
||||
+ DEFINE_SWAITER(wait);
|
||||
bool waited = false;
|
||||
|
||||
start = cur = ktime_get();
|
||||
@@ -1800,7 +1800,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
- prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
+ swait_prepare(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
if (kvm_vcpu_check_block(vcpu) < 0)
|
||||
break;
|
||||
@@ -1809,7 +1809,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp
|
||||
schedule();
|
||||
}
|
||||
|
||||
- finish_wait(&vcpu->wq, &wait);
|
||||
+ swait_finish(&vcpu->wq, &wait);
|
||||
cur = ktime_get();
|
||||
|
||||
out:
|
||||
@@ -1825,11 +1825,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu
|
||||
{
|
||||
int me;
|
||||
int cpu = vcpu->cpu;
|
||||
- wait_queue_head_t *wqp;
|
||||
+ struct swait_head *wqp;
|
||||
|
||||
wqp = kvm_arch_vcpu_wq(vcpu);
|
||||
- if (waitqueue_active(wqp)) {
|
||||
- wake_up_interruptible(wqp);
|
||||
+ if (swaitqueue_active(wqp)) {
|
||||
+ swait_wake_interruptible(wqp);
|
||||
++vcpu->stat.halt_wakeup;
|
||||
}
|
||||
|
||||
@@ -1930,7 +1930,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *m
|
||||
continue;
|
||||
if (vcpu == me)
|
||||
continue;
|
||||
- if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
|
||||
+ if (swaitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
|
||||
continue;
|
||||
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
|
||||
continue;
|
174
debian/patches/features/all/rt/acpi-rt-Convert-acpi_gbl_hardware-lock-back-to-a-raw.patch
vendored
Normal file
174
debian/patches/features/all/rt/acpi-rt-Convert-acpi_gbl_hardware-lock-back-to-a-raw.patch
vendored
Normal file
|
@ -0,0 +1,174 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Wed, 13 Feb 2013 09:26:05 -0500
|
||||
Subject: acpi/rt: Convert acpi_gbl_hardware lock back to a raw_spinlock_t
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
We hit the following bug with 3.6-rt:
|
||||
|
||||
[ 5.898990] BUG: scheduling while atomic: swapper/3/0/0x00000002
|
||||
[ 5.898991] no locks held by swapper/3/0.
|
||||
[ 5.898993] Modules linked in:
|
||||
[ 5.898996] Pid: 0, comm: swapper/3 Not tainted 3.6.11-rt28.19.el6rt.x86_64.debug #1
|
||||
[ 5.898997] Call Trace:
|
||||
[ 5.899011] [<ffffffff810804e7>] __schedule_bug+0x67/0x90
|
||||
[ 5.899028] [<ffffffff81577923>] __schedule+0x793/0x7a0
|
||||
[ 5.899032] [<ffffffff810b4e40>] ? debug_rt_mutex_print_deadlock+0x50/0x200
|
||||
[ 5.899034] [<ffffffff81577b89>] schedule+0x29/0x70
|
||||
[ 5.899036] BUG: scheduling while atomic: swapper/7/0/0x00000002
|
||||
[ 5.899037] no locks held by swapper/7/0.
|
||||
[ 5.899039] [<ffffffff81578525>] rt_spin_lock_slowlock+0xe5/0x2f0
|
||||
[ 5.899040] Modules linked in:
|
||||
[ 5.899041]
|
||||
[ 5.899045] [<ffffffff81579a58>] ? _raw_spin_unlock_irqrestore+0x38/0x90
|
||||
[ 5.899046] Pid: 0, comm: swapper/7 Not tainted 3.6.11-rt28.19.el6rt.x86_64.debug #1
|
||||
[ 5.899047] Call Trace:
|
||||
[ 5.899049] [<ffffffff81578bc6>] rt_spin_lock+0x16/0x40
|
||||
[ 5.899052] [<ffffffff810804e7>] __schedule_bug+0x67/0x90
|
||||
[ 5.899054] [<ffffffff8157d3f0>] ? notifier_call_chain+0x80/0x80
|
||||
[ 5.899056] [<ffffffff81577923>] __schedule+0x793/0x7a0
|
||||
[ 5.899059] [<ffffffff812f2034>] acpi_os_acquire_lock+0x1f/0x23
|
||||
[ 5.899062] [<ffffffff810b4e40>] ? debug_rt_mutex_print_deadlock+0x50/0x200
|
||||
[ 5.899068] [<ffffffff8130be64>] acpi_write_bit_register+0x33/0xb0
|
||||
[ 5.899071] [<ffffffff81577b89>] schedule+0x29/0x70
|
||||
[ 5.899072] [<ffffffff8130be13>] ? acpi_read_bit_register+0x33/0x51
|
||||
[ 5.899074] [<ffffffff81578525>] rt_spin_lock_slowlock+0xe5/0x2f0
|
||||
[ 5.899077] [<ffffffff8131d1fc>] acpi_idle_enter_bm+0x8a/0x28e
|
||||
[ 5.899079] [<ffffffff81579a58>] ? _raw_spin_unlock_irqrestore+0x38/0x90
|
||||
[ 5.899081] [<ffffffff8107e5da>] ? this_cpu_load+0x1a/0x30
|
||||
[ 5.899083] [<ffffffff81578bc6>] rt_spin_lock+0x16/0x40
|
||||
[ 5.899087] [<ffffffff8144c759>] cpuidle_enter+0x19/0x20
|
||||
[ 5.899088] [<ffffffff8157d3f0>] ? notifier_call_chain+0x80/0x80
|
||||
[ 5.899090] [<ffffffff8144c777>] cpuidle_enter_state+0x17/0x50
|
||||
[ 5.899092] [<ffffffff812f2034>] acpi_os_acquire_lock+0x1f/0x23
|
||||
[ 5.899094] [<ffffffff8144d1a1>] cpuidle899101] [<ffffffff8130be13>] ?
|
||||
|
||||
As the acpi code disables interrupts in acpi_idle_enter_bm, and calls
|
||||
code that grabs the acpi lock, it causes issues as the lock is currently
|
||||
in RT a sleeping lock.
|
||||
|
||||
The lock was converted from a raw to a sleeping lock due to some
|
||||
previous issues, and tests that showed it didn't seem to matter.
|
||||
Unfortunately, it did matter for one of our boxes.
|
||||
|
||||
This patch converts the lock back to a raw lock. I've run this code on a
|
||||
few of my own machines, one being my laptop that uses the acpi quite
|
||||
extensively. I've been able to suspend and resume without issues.
|
||||
|
||||
[ tglx: Made the change exclusive for acpi_gbl_hardware_lock ]
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: John Kacur <jkacur@gmail.com>
|
||||
Cc: Clark Williams <clark@redhat.com>
|
||||
Link: http://lkml.kernel.org/r/1360765565.23152.5.camel@gandalf.local.home
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/acpi/acpica/acglobal.h | 2 +-
|
||||
drivers/acpi/acpica/hwregs.c | 4 ++--
|
||||
drivers/acpi/acpica/hwxface.c | 4 ++--
|
||||
drivers/acpi/acpica/utmutex.c | 4 ++--
|
||||
include/acpi/platform/aclinux.h | 15 +++++++++++++++
|
||||
5 files changed, 22 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/drivers/acpi/acpica/acglobal.h
|
||||
+++ b/drivers/acpi/acpica/acglobal.h
|
||||
@@ -112,7 +112,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pen
|
||||
* interrupt level
|
||||
*/
|
||||
ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */
|
||||
-ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
|
||||
+ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */
|
||||
ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock);
|
||||
|
||||
/* Mutex for _OSI support */
|
||||
--- a/drivers/acpi/acpica/hwregs.c
|
||||
+++ b/drivers/acpi/acpica/hwregs.c
|
||||
@@ -269,14 +269,14 @@ acpi_status acpi_hw_clear_acpi_status(vo
|
||||
ACPI_BITMASK_ALL_FIXED_STATUS,
|
||||
ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address)));
|
||||
|
||||
- lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
|
||||
+ raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
|
||||
|
||||
/* Clear the fixed events in PM1 A/B */
|
||||
|
||||
status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
|
||||
ACPI_BITMASK_ALL_FIXED_STATUS);
|
||||
|
||||
- acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
|
||||
+ raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
|
||||
|
||||
if (ACPI_FAILURE(status)) {
|
||||
goto exit;
|
||||
--- a/drivers/acpi/acpica/hwxface.c
|
||||
+++ b/drivers/acpi/acpica/hwxface.c
|
||||
@@ -374,7 +374,7 @@ acpi_status acpi_write_bit_register(u32
|
||||
return_ACPI_STATUS(AE_BAD_PARAMETER);
|
||||
}
|
||||
|
||||
- lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
|
||||
+ raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
|
||||
|
||||
/*
|
||||
* At this point, we know that the parent register is one of the
|
||||
@@ -435,7 +435,7 @@ acpi_status acpi_write_bit_register(u32
|
||||
|
||||
unlock_and_exit:
|
||||
|
||||
- acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
|
||||
+ raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
|
||||
return_ACPI_STATUS(status);
|
||||
}
|
||||
|
||||
--- a/drivers/acpi/acpica/utmutex.c
|
||||
+++ b/drivers/acpi/acpica/utmutex.c
|
||||
@@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(voi
|
||||
return_ACPI_STATUS (status);
|
||||
}
|
||||
|
||||
- status = acpi_os_create_lock (&acpi_gbl_hardware_lock);
|
||||
+ status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock);
|
||||
if (ACPI_FAILURE (status)) {
|
||||
return_ACPI_STATUS (status);
|
||||
}
|
||||
@@ -141,7 +141,7 @@ void acpi_ut_mutex_terminate(void)
|
||||
/* Delete the spinlocks */
|
||||
|
||||
acpi_os_delete_lock(acpi_gbl_gpe_lock);
|
||||
- acpi_os_delete_lock(acpi_gbl_hardware_lock);
|
||||
+ acpi_os_delete_raw_lock(acpi_gbl_hardware_lock);
|
||||
acpi_os_delete_lock(acpi_gbl_reference_count_lock);
|
||||
|
||||
/* Delete the reader/writer lock */
|
||||
--- a/include/acpi/platform/aclinux.h
|
||||
+++ b/include/acpi/platform/aclinux.h
|
||||
@@ -123,6 +123,7 @@
|
||||
|
||||
#define acpi_cache_t struct kmem_cache
|
||||
#define acpi_spinlock spinlock_t *
|
||||
+#define acpi_raw_spinlock raw_spinlock_t *
|
||||
#define acpi_cpu_flags unsigned long
|
||||
|
||||
/* Use native linux version of acpi_os_allocate_zeroed */
|
||||
@@ -141,6 +142,20 @@
|
||||
#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id
|
||||
#define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock
|
||||
|
||||
+#define acpi_os_create_raw_lock(__handle) \
|
||||
+({ \
|
||||
+ raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \
|
||||
+ \
|
||||
+ if (lock) { \
|
||||
+ *(__handle) = lock; \
|
||||
+ raw_spin_lock_init(*(__handle)); \
|
||||
+ } \
|
||||
+ lock ? AE_OK : AE_NO_MEMORY; \
|
||||
+ })
|
||||
+
|
||||
+#define acpi_os_delete_raw_lock(__handle) kfree(__handle)
|
||||
+
|
||||
+
|
||||
/*
|
||||
* OSL interfaces used by debugger/disassembler
|
||||
*/
|
|
@ -0,0 +1,104 @@
|
|||
From: Anders Roxell <anders.roxell@linaro.org>
|
||||
Date: Thu, 14 May 2015 17:52:17 +0200
|
||||
Subject: arch/arm64: Add lazy preempt support
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
arm64 is missing support for PREEMPT_RT. The main feature which is
|
||||
lacking is support for lazy preemption. The arch-specific entry code,
|
||||
thread information structure definitions, and associated data tables
|
||||
have to be extended to provide this support. Then the Kconfig file has
|
||||
to be extended to indicate the support is available, and also to
|
||||
indicate that support for full RT preemption is now available.
|
||||
|
||||
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
|
||||
---
|
||||
arch/arm64/Kconfig | 1 +
|
||||
arch/arm64/include/asm/thread_info.h | 3 +++
|
||||
arch/arm64/kernel/asm-offsets.c | 1 +
|
||||
arch/arm64/kernel/entry.S | 13 ++++++++++---
|
||||
4 files changed, 15 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/arm64/Kconfig
|
||||
+++ b/arch/arm64/Kconfig
|
||||
@@ -69,6 +69,7 @@ config ARM64
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_RCU_TABLE_FREE
|
||||
+ select HAVE_PREEMPT_LAZY
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select IRQ_DOMAIN
|
||||
select IRQ_FORCED_THREADING
|
||||
--- a/arch/arm64/include/asm/thread_info.h
|
||||
+++ b/arch/arm64/include/asm/thread_info.h
|
||||
@@ -47,6 +47,7 @@ struct thread_info {
|
||||
mm_segment_t addr_limit; /* address limit */
|
||||
struct task_struct *task; /* main task structure */
|
||||
int preempt_count; /* 0 => preemptable, <0 => bug */
|
||||
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
|
||||
int cpu; /* cpu */
|
||||
};
|
||||
|
||||
@@ -101,6 +102,7 @@ static inline struct thread_info *curren
|
||||
#define TIF_NEED_RESCHED 1
|
||||
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
|
||||
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
|
||||
+#define TIF_NEED_RESCHED_LAZY 4
|
||||
#define TIF_NOHZ 7
|
||||
#define TIF_SYSCALL_TRACE 8
|
||||
#define TIF_SYSCALL_AUDIT 9
|
||||
@@ -117,6 +119,7 @@ static inline struct thread_info *curren
|
||||
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
||||
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
||||
#define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE)
|
||||
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
|
||||
#define _TIF_NOHZ (1 << TIF_NOHZ)
|
||||
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
||||
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
||||
--- a/arch/arm64/kernel/asm-offsets.c
|
||||
+++ b/arch/arm64/kernel/asm-offsets.c
|
||||
@@ -35,6 +35,7 @@ int main(void)
|
||||
BLANK();
|
||||
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
|
||||
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
|
||||
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
|
||||
DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
|
||||
DEFINE(TI_TASK, offsetof(struct thread_info, task));
|
||||
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
|
||||
--- a/arch/arm64/kernel/entry.S
|
||||
+++ b/arch/arm64/kernel/entry.S
|
||||
@@ -367,11 +367,16 @@ ENDPROC(el1_sync)
|
||||
#ifdef CONFIG_PREEMPT
|
||||
get_thread_info tsk
|
||||
ldr w24, [tsk, #TI_PREEMPT] // get preempt count
|
||||
- cbnz w24, 1f // preempt count != 0
|
||||
+ cbnz w24, 2f // preempt count != 0
|
||||
ldr x0, [tsk, #TI_FLAGS] // get flags
|
||||
- tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
|
||||
- bl el1_preempt
|
||||
+ tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
|
||||
+
|
||||
+ ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count
|
||||
+ cbnz w24, 2f // preempt lazy count != 0
|
||||
+ tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling?
|
||||
1:
|
||||
+ bl el1_preempt
|
||||
+2:
|
||||
#endif
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
bl trace_hardirqs_on
|
||||
@@ -385,6 +390,7 @@ ENDPROC(el1_irq)
|
||||
1: bl preempt_schedule_irq // irq en/disable is done inside
|
||||
ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
|
||||
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
|
||||
+ tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling?
|
||||
ret x24
|
||||
#endif
|
||||
|
||||
@@ -622,6 +628,7 @@ ENDPROC(cpu_switch_to)
|
||||
str x0, [sp, #S_X0] // returned x0
|
||||
work_pending:
|
||||
tbnz x1, #TIF_NEED_RESCHED, work_resched
|
||||
+ tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched
|
||||
/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
|
||||
ldr x2, [sp, #S_PSTATE]
|
||||
mov x0, sp // 'regs'
|
57
debian/patches/features/all/rt/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch
vendored
Normal file
57
debian/patches/features/all/rt/arm-at91-pit-remove-irq-handler-when-clock-is-unused.patch
vendored
Normal file
|
@ -0,0 +1,57 @@
|
|||
From: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Date: Sat, 6 Mar 2010 17:47:10 +0100
|
||||
Subject: ARM: AT91: PIT: Remove irq handler when clock event is unused
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Setup and remove the interrupt handler in clock event mode selection.
|
||||
This avoids calling the (shared) interrupt handler when the device is
|
||||
not used.
|
||||
|
||||
Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
[bigeasy: redo the patch with NR_IRQS_LEGACY which is probably required since
|
||||
commit 8fe82a55 ("ARM: at91: sparse irq support") which is included since v3.6.
|
||||
Patch based on what Sami Pietikäinen <Sami.Pietikainen@wapice.com> suggested].
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/timer-atmel-pit.c | 4 ++++
|
||||
drivers/clocksource/timer-atmel-st.c | 1 +
|
||||
2 files changed, 5 insertions(+)
|
||||
|
||||
--- a/drivers/clocksource/timer-atmel-pit.c
|
||||
+++ b/drivers/clocksource/timer-atmel-pit.c
|
||||
@@ -90,6 +90,7 @@ static cycle_t read_pit_clk(struct clock
|
||||
return elapsed;
|
||||
}
|
||||
|
||||
+static struct irqaction at91sam926x_pit_irq;
|
||||
/*
|
||||
* Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
|
||||
*/
|
||||
@@ -100,6 +101,8 @@ pit_clkevt_mode(enum clock_event_mode mo
|
||||
|
||||
switch (mode) {
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
+ /* Set up irq handler */
|
||||
+ setup_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
|
||||
/* update clocksource counter */
|
||||
data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
|
||||
pit_write(data->base, AT91_PIT_MR,
|
||||
@@ -113,6 +116,7 @@ pit_clkevt_mode(enum clock_event_mode mo
|
||||
/* disable irq, leaving the clocksource active */
|
||||
pit_write(data->base, AT91_PIT_MR,
|
||||
(data->cycle - 1) | AT91_PIT_PITEN);
|
||||
+ remove_irq(at91sam926x_pit_irq.irq, &at91sam926x_pit_irq);
|
||||
break;
|
||||
case CLOCK_EVT_MODE_RESUME:
|
||||
break;
|
||||
--- a/drivers/clocksource/timer-atmel-st.c
|
||||
+++ b/drivers/clocksource/timer-atmel-st.c
|
||||
@@ -131,6 +131,7 @@ clkevt32k_mode(enum clock_event_mode mod
|
||||
break;
|
||||
case CLOCK_EVT_MODE_SHUTDOWN:
|
||||
case CLOCK_EVT_MODE_UNUSED:
|
||||
+ remove_irq(NR_IRQS_LEGACY + AT91_ID_SYS, &at91rm9200_timer_irq);
|
||||
case CLOCK_EVT_MODE_RESUME:
|
||||
irqmask = 0;
|
||||
break;
|
33
debian/patches/features/all/rt/arm-at91-tclib-default-to-tclib-timer-for-rt.patch
vendored
Normal file
33
debian/patches/features/all/rt/arm-at91-tclib-default-to-tclib-timer-for-rt.patch
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sat, 1 May 2010 18:29:35 +0200
|
||||
Subject: ARM: at91: tclib: Default to tclib timer for RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
RT is not too happy about the shared timer interrupt in AT91
|
||||
devices. Default to tclib timer for RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/misc/Kconfig | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/misc/Kconfig
|
||||
+++ b/drivers/misc/Kconfig
|
||||
@@ -54,6 +54,7 @@ config AD525X_DPOT_SPI
|
||||
config ATMEL_TCLIB
|
||||
bool "Atmel AT32/AT91 Timer/Counter Library"
|
||||
depends on (AVR32 || ARCH_AT91)
|
||||
+ default y if PREEMPT_RT_FULL
|
||||
help
|
||||
Select this if you want a library to allocate the Timer/Counter
|
||||
blocks found on many Atmel processors. This facilitates using
|
||||
@@ -86,7 +87,7 @@ config ATMEL_TCB_CLKSRC_BLOCK
|
||||
config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
bool "TC Block use 32 KiHz clock"
|
||||
depends on ATMEL_TCB_CLKSRC
|
||||
- default y
|
||||
+ default y if !PREEMPT_RT_FULL
|
||||
help
|
||||
Select this to use 32 KiHz base clock rate as TC block clock
|
||||
source for clock events.
|
|
@ -0,0 +1,466 @@
|
|||
From: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Date: Mon, 19 Sep 2011 14:51:14 -0700
|
||||
Subject: arm: Convert arm boot_lock to raw
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The arm boot_lock is used by the secondary processor startup code. The locking
|
||||
task is the idle thread, which has idle->sched_class == &idle_sched_class.
|
||||
idle_sched_class->enqueue_task == NULL, so if the idle task blocks on the
|
||||
lock, the attempt to wake it when the lock becomes available will fail:
|
||||
|
||||
try_to_wake_up()
|
||||
...
|
||||
activate_task()
|
||||
enqueue_task()
|
||||
p->sched_class->enqueue_task(rq, p, flags)
|
||||
|
||||
Fix by converting boot_lock to a raw spin lock.
|
||||
|
||||
Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
|
||||
Link: http://lkml.kernel.org/r/4E77B952.3010606@am.sony.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/arm/mach-exynos/platsmp.c | 12 ++++++------
|
||||
arch/arm/mach-hisi/platmcpm.c | 26 +++++++++++++-------------
|
||||
arch/arm/mach-omap2/omap-smp.c | 10 +++++-----
|
||||
arch/arm/mach-prima2/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-qcom/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-spear/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-sti/platsmp.c | 10 +++++-----
|
||||
arch/arm/mach-ux500/platsmp.c | 10 +++++-----
|
||||
arch/arm/plat-versatile/platsmp.c | 10 +++++-----
|
||||
9 files changed, 54 insertions(+), 54 deletions(-)
|
||||
|
||||
--- a/arch/arm/mach-exynos/platsmp.c
|
||||
+++ b/arch/arm/mach-exynos/platsmp.c
|
||||
@@ -231,7 +231,7 @@ static void __iomem *scu_base_addr(void)
|
||||
return (void __iomem *)(S5P_VA_SCU);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void exynos_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -244,8 +244,8 @@ static void exynos_secondary_init(unsign
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -259,7 +259,7 @@ static int exynos_boot_secondary(unsigne
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -286,7 +286,7 @@ static int exynos_boot_secondary(unsigne
|
||||
|
||||
if (timeout == 0) {
|
||||
printk(KERN_ERR "cpu1 power enable failed");
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
}
|
||||
@@ -342,7 +342,7 @@ static int exynos_boot_secondary(unsigne
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
fail:
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? ret : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-hisi/platmcpm.c
|
||||
+++ b/arch/arm/mach-hisi/platmcpm.c
|
||||
@@ -57,7 +57,7 @@
|
||||
|
||||
static void __iomem *sysctrl, *fabric;
|
||||
static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER];
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
static u32 fabric_phys_addr;
|
||||
/*
|
||||
* [0]: bootwrapper physical address
|
||||
@@ -104,7 +104,7 @@ static int hip04_mcpm_power_up(unsigned
|
||||
if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER)
|
||||
return -EINVAL;
|
||||
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
|
||||
if (hip04_cpu_table[cluster][cpu])
|
||||
goto out;
|
||||
@@ -133,7 +133,7 @@ static int hip04_mcpm_power_up(unsigned
|
||||
udelay(20);
|
||||
out:
|
||||
hip04_cpu_table[cluster][cpu]++;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -149,7 +149,7 @@ static void hip04_mcpm_power_down(void)
|
||||
|
||||
__mcpm_cpu_going_down(cpu, cluster);
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
|
||||
hip04_cpu_table[cluster][cpu]--;
|
||||
if (hip04_cpu_table[cluster][cpu] == 1) {
|
||||
@@ -162,7 +162,7 @@ static void hip04_mcpm_power_down(void)
|
||||
|
||||
last_man = hip04_cluster_is_down(cluster);
|
||||
if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
/* Since it's Cortex A15, disable L2 prefetching. */
|
||||
asm volatile(
|
||||
"mcr p15, 1, %0, c15, c0, 3 \n\t"
|
||||
@@ -173,7 +173,7 @@ static void hip04_mcpm_power_down(void)
|
||||
hip04_set_snoop_filter(cluster, 0);
|
||||
__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
|
||||
} else {
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
v7_exit_coherency_flush(louis);
|
||||
}
|
||||
|
||||
@@ -192,7 +192,7 @@ static int hip04_mcpm_wait_for_powerdown
|
||||
cpu >= HIP04_MAX_CPUS_PER_CLUSTER);
|
||||
|
||||
count = TIMEOUT_MSEC / POLL_MSEC;
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
for (tries = 0; tries < count; tries++) {
|
||||
if (hip04_cpu_table[cluster][cpu]) {
|
||||
ret = -EBUSY;
|
||||
@@ -202,10 +202,10 @@ static int hip04_mcpm_wait_for_powerdown
|
||||
data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster));
|
||||
if (data & CORE_WFI_STATUS(cpu))
|
||||
break;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
/* Wait for clean L2 when the whole cluster is down. */
|
||||
msleep(POLL_MSEC);
|
||||
- spin_lock_irq(&boot_lock);
|
||||
+ raw_spin_lock_irq(&boot_lock);
|
||||
}
|
||||
if (tries >= count)
|
||||
goto err;
|
||||
@@ -220,10 +220,10 @@ static int hip04_mcpm_wait_for_powerdown
|
||||
}
|
||||
if (tries >= count)
|
||||
goto err;
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
return 0;
|
||||
err:
|
||||
- spin_unlock_irq(&boot_lock);
|
||||
+ raw_spin_unlock_irq(&boot_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -235,10 +235,10 @@ static void hip04_mcpm_powered_up(void)
|
||||
cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
|
||||
cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
if (!hip04_cpu_table[cluster][cpu])
|
||||
hip04_cpu_table[cluster][cpu] = 1;
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level)
|
||||
--- a/arch/arm/mach-omap2/omap-smp.c
|
||||
+++ b/arch/arm/mach-omap2/omap-smp.c
|
||||
@@ -43,7 +43,7 @@
|
||||
/* SCU base address */
|
||||
static void __iomem *scu_base;
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
void __iomem *omap4_get_scu_base(void)
|
||||
{
|
||||
@@ -74,8 +74,8 @@ static void omap4_secondary_init(unsigne
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -89,7 +89,7 @@ static int omap4_boot_secondary(unsigned
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* Update the AuxCoreBoot0 with boot state for secondary core.
|
||||
@@ -166,7 +166,7 @@ static int omap4_boot_secondary(unsigned
|
||||
* Now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
--- a/arch/arm/mach-prima2/platsmp.c
|
||||
+++ b/arch/arm/mach-prima2/platsmp.c
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
static void __iomem *clk_base;
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void sirfsoc_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsig
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static const struct of_device_id clk_ids[] = {
|
||||
@@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsign
|
||||
/* make sure write buffer is drained */
|
||||
mb();
|
||||
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsign
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-qcom/platsmp.c
|
||||
+++ b/arch/arm/mach-qcom/platsmp.c
|
||||
@@ -46,7 +46,7 @@
|
||||
|
||||
extern void secondary_startup_arm(void);
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void __ref qcom_cpu_die(unsigned int cpu)
|
||||
@@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int scss_release_secondary(unsigned int cpu)
|
||||
@@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* Send the secondary CPU a soft interrupt, thereby causing
|
||||
@@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
--- a/arch/arm/mach-spear/platsmp.c
|
||||
+++ b/arch/arm/mach-spear/platsmp.c
|
||||
@@ -32,7 +32,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void __iomem *scu_base = IOMEM(VA_SCU_BASE);
|
||||
|
||||
@@ -47,8 +47,8 @@ static void spear13xx_secondary_init(uns
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsi
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsi
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-sti/platsmp.c
|
||||
+++ b/arch/arm/mach-sti/platsmp.c
|
||||
@@ -34,7 +34,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void sti_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -49,8 +49,8 @@ static void sti_secondary_init(unsigned
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -61,7 +61,7 @@ static int sti_boot_secondary(unsigned i
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -92,7 +92,7 @@ static int sti_boot_secondary(unsigned i
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/mach-ux500/platsmp.c
|
||||
+++ b/arch/arm/mach-ux500/platsmp.c
|
||||
@@ -51,7 +51,7 @@ static void __iomem *scu_base_addr(void)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
static void ux500_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -64,8 +64,8 @@ static void ux500_secondary_init(unsigne
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -76,7 +76,7 @@ static int ux500_boot_secondary(unsigned
|
||||
* set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* The secondary processor is waiting to be released from
|
||||
@@ -97,7 +97,7 @@ static int ux500_boot_secondary(unsigned
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
||||
--- a/arch/arm/plat-versatile/platsmp.c
|
||||
+++ b/arch/arm/plat-versatile/platsmp.c
|
||||
@@ -30,7 +30,7 @@ static void write_pen_release(int val)
|
||||
sync_cache_w(&pen_release);
|
||||
}
|
||||
|
||||
-static DEFINE_SPINLOCK(boot_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(boot_lock);
|
||||
|
||||
void versatile_secondary_init(unsigned int cpu)
|
||||
{
|
||||
@@ -43,8 +43,8 @@ void versatile_secondary_init(unsigned i
|
||||
/*
|
||||
* Synchronise with the boot thread.
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
}
|
||||
|
||||
int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle)
|
||||
@@ -55,7 +55,7 @@ int versatile_boot_secondary(unsigned in
|
||||
* Set synchronisation state between this boot processor
|
||||
* and the secondary one
|
||||
*/
|
||||
- spin_lock(&boot_lock);
|
||||
+ raw_spin_lock(&boot_lock);
|
||||
|
||||
/*
|
||||
* This is really belt and braces; we hold unintended secondary
|
||||
@@ -85,7 +85,7 @@ int versatile_boot_secondary(unsigned in
|
||||
* now the secondary core is starting up let it run its
|
||||
* calibrations, then wait for it to finish
|
||||
*/
|
||||
- spin_unlock(&boot_lock);
|
||||
+ raw_spin_unlock(&boot_lock);
|
||||
|
||||
return pen_release != -1 ? -ENOSYS : 0;
|
||||
}
|
|
@ -0,0 +1,149 @@
|
|||
Subject: arm: Enable highmem for rt
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 13 Feb 2013 11:03:11 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
fixup highmem for ARM.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/arm/include/asm/switch_to.h | 8 ++++++
|
||||
arch/arm/mm/highmem.c | 46 ++++++++++++++++++++++++++++++++++-----
|
||||
include/linux/highmem.h | 1
|
||||
3 files changed, 50 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/arch/arm/include/asm/switch_to.h
|
||||
+++ b/arch/arm/include/asm/switch_to.h
|
||||
@@ -3,6 +3,13 @@
|
||||
|
||||
#include <linux/thread_info.h>
|
||||
|
||||
+#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM
|
||||
+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p);
|
||||
+#else
|
||||
+static inline void
|
||||
+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* For v7 SMP cores running a preemptible kernel we may be pre-empted
|
||||
* during a TLB maintenance operation, so execute an inner-shareable dsb
|
||||
@@ -22,6 +29,7 @@ extern struct task_struct *__switch_to(s
|
||||
|
||||
#define switch_to(prev,next,last) \
|
||||
do { \
|
||||
+ switch_kmaps(prev, next); \
|
||||
last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \
|
||||
} while (0)
|
||||
|
||||
--- a/arch/arm/mm/highmem.c
|
||||
+++ b/arch/arm/mm/highmem.c
|
||||
@@ -54,12 +54,13 @@ EXPORT_SYMBOL(kunmap);
|
||||
|
||||
void *kmap_atomic(struct page *page)
|
||||
{
|
||||
+ pte_t pte = mk_pte(page, kmap_prot);
|
||||
unsigned int idx;
|
||||
unsigned long vaddr;
|
||||
void *kmap;
|
||||
int type;
|
||||
|
||||
- preempt_disable();
|
||||
+ preempt_disable_nort();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -93,7 +94,10 @@ void *kmap_atomic(struct page *page)
|
||||
* in place, so the contained TLB flush ensures the TLB is updated
|
||||
* with the new mapping.
|
||||
*/
|
||||
- set_fixmap_pte(idx, mk_pte(page, kmap_prot));
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ current->kmap_pte[type] = pte;
|
||||
+#endif
|
||||
+ set_fixmap_pte(idx, pte);
|
||||
|
||||
return (void *)vaddr;
|
||||
}
|
||||
@@ -110,6 +114,9 @@ void __kunmap_atomic(void *kvaddr)
|
||||
|
||||
if (cache_is_vivt())
|
||||
__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ current->kmap_pte[type] = __pte(0);
|
||||
+#endif
|
||||
#ifdef CONFIG_DEBUG_HIGHMEM
|
||||
BUG_ON(vaddr != __fix_to_virt(idx));
|
||||
#else
|
||||
@@ -122,17 +129,18 @@ void __kunmap_atomic(void *kvaddr)
|
||||
kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
|
||||
}
|
||||
pagefault_enable();
|
||||
- preempt_enable();
|
||||
+ preempt_enable_nort();
|
||||
}
|
||||
EXPORT_SYMBOL(__kunmap_atomic);
|
||||
|
||||
void *kmap_atomic_pfn(unsigned long pfn)
|
||||
{
|
||||
+ pte_t pte = pfn_pte(pfn, kmap_prot);
|
||||
unsigned long vaddr;
|
||||
int idx, type;
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
- preempt_disable();
|
||||
+ preempt_disable_nort();
|
||||
pagefault_disable();
|
||||
if (!PageHighMem(page))
|
||||
return page_address(page);
|
||||
@@ -143,7 +151,10 @@ void *kmap_atomic_pfn(unsigned long pfn)
|
||||
#ifdef CONFIG_DEBUG_HIGHMEM
|
||||
BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
|
||||
#endif
|
||||
- set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ current->kmap_pte[type] = pte;
|
||||
+#endif
|
||||
+ set_fixmap_pte(idx, pte);
|
||||
|
||||
return (void *)vaddr;
|
||||
}
|
||||
@@ -157,3 +168,28 @@ struct page *kmap_atomic_to_page(const v
|
||||
|
||||
return pte_page(get_fixmap_pte(vaddr));
|
||||
}
|
||||
+
|
||||
+#if defined CONFIG_PREEMPT_RT_FULL
|
||||
+void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
+{
|
||||
+ int i;
|
||||
+
|
||||
+ /*
|
||||
+ * Clear @prev's kmap_atomic mappings
|
||||
+ */
|
||||
+ for (i = 0; i < prev_p->kmap_idx; i++) {
|
||||
+ int idx = i + KM_TYPE_NR * smp_processor_id();
|
||||
+
|
||||
+ set_fixmap_pte(idx, __pte(0));
|
||||
+ }
|
||||
+ /*
|
||||
+ * Restore @next_p's kmap_atomic mappings
|
||||
+ */
|
||||
+ for (i = 0; i < next_p->kmap_idx; i++) {
|
||||
+ int idx = i + KM_TYPE_NR * smp_processor_id();
|
||||
+
|
||||
+ if (!pte_none(next_p->kmap_pte[i]))
|
||||
+ set_fixmap_pte(idx, next_p->kmap_pte[i]);
|
||||
+ }
|
||||
+}
|
||||
+#endif
|
||||
--- a/include/linux/highmem.h
|
||||
+++ b/include/linux/highmem.h
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hardirq.h>
|
||||
+#include <linux/sched.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 11 Mar 2013 21:37:27 +0100
|
||||
Subject: arm/highmem: Flush tlb on unmap
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The tlb should be flushed on unmap and thus make the mapping entry
|
||||
invalid. This is only done in the non-debug case which does not look
|
||||
right.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/mm/highmem.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/arm/mm/highmem.c
|
||||
+++ b/arch/arm/mm/highmem.c
|
||||
@@ -112,10 +112,10 @@ void __kunmap_atomic(void *kvaddr)
|
||||
__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
|
||||
#ifdef CONFIG_DEBUG_HIGHMEM
|
||||
BUG_ON(vaddr != __fix_to_virt(idx));
|
||||
- set_fixmap_pte(idx, __pte(0));
|
||||
#else
|
||||
(void) idx; /* to kill a warning */
|
||||
#endif
|
||||
+ set_fixmap_pte(idx, __pte(0));
|
||||
kmap_atomic_idx_pop();
|
||||
} else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
|
||||
/* this address was obtained through kmap_high_get() */
|
|
@ -0,0 +1,106 @@
|
|||
Subject: arm: Add support for lazy preemption
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 31 Oct 2012 12:04:11 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Implement the arm pieces for lazy preempt.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/arm/Kconfig | 1 +
|
||||
arch/arm/include/asm/thread_info.h | 3 +++
|
||||
arch/arm/kernel/asm-offsets.c | 1 +
|
||||
arch/arm/kernel/entry-armv.S | 13 +++++++++++--
|
||||
arch/arm/kernel/signal.c | 3 ++-
|
||||
5 files changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/arch/arm/Kconfig
|
||||
+++ b/arch/arm/Kconfig
|
||||
@@ -66,6 +66,7 @@ config ARM
|
||||
select HAVE_PERF_EVENTS
|
||||
select HAVE_PERF_REGS
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
+ select HAVE_PREEMPT_LAZY
|
||||
select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE)
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
--- a/arch/arm/include/asm/thread_info.h
|
||||
+++ b/arch/arm/include/asm/thread_info.h
|
||||
@@ -50,6 +50,7 @@ struct cpu_context_save {
|
||||
struct thread_info {
|
||||
unsigned long flags; /* low level flags */
|
||||
int preempt_count; /* 0 => preemptable, <0 => bug */
|
||||
+ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */
|
||||
mm_segment_t addr_limit; /* address limit */
|
||||
struct task_struct *task; /* main task structure */
|
||||
__u32 cpu; /* cpu */
|
||||
@@ -147,6 +148,7 @@ extern int vfp_restore_user_hwstate(stru
|
||||
#define TIF_SIGPENDING 0
|
||||
#define TIF_NEED_RESCHED 1
|
||||
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
|
||||
+#define TIF_NEED_RESCHED_LAZY 3
|
||||
#define TIF_UPROBE 7
|
||||
#define TIF_SYSCALL_TRACE 8
|
||||
#define TIF_SYSCALL_AUDIT 9
|
||||
@@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(stru
|
||||
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
|
||||
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
|
||||
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
|
||||
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
|
||||
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
||||
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
|
||||
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
|
||||
--- a/arch/arm/kernel/asm-offsets.c
|
||||
+++ b/arch/arm/kernel/asm-offsets.c
|
||||
@@ -65,6 +65,7 @@ int main(void)
|
||||
BLANK();
|
||||
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
|
||||
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
|
||||
+ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count));
|
||||
DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
|
||||
DEFINE(TI_TASK, offsetof(struct thread_info, task));
|
||||
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
|
||||
--- a/arch/arm/kernel/entry-armv.S
|
||||
+++ b/arch/arm/kernel/entry-armv.S
|
||||
@@ -208,11 +208,18 @@ ENDPROC(__dabt_svc)
|
||||
#ifdef CONFIG_PREEMPT
|
||||
get_thread_info tsk
|
||||
ldr r8, [tsk, #TI_PREEMPT] @ get preempt count
|
||||
- ldr r0, [tsk, #TI_FLAGS] @ get flags
|
||||
teq r8, #0 @ if preempt count != 0
|
||||
+ bne 1f @ return from exeption
|
||||
+ ldr r0, [tsk, #TI_FLAGS] @ get flags
|
||||
+ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set
|
||||
+ blne svc_preempt @ preempt!
|
||||
+
|
||||
+ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count
|
||||
+ teq r8, #0 @ if preempt lazy count != 0
|
||||
movne r0, #0 @ force flags to 0
|
||||
- tst r0, #_TIF_NEED_RESCHED
|
||||
+ tst r0, #_TIF_NEED_RESCHED_LAZY
|
||||
blne svc_preempt
|
||||
+1:
|
||||
#endif
|
||||
|
||||
svc_exit r5, irq = 1 @ return from exception
|
||||
@@ -227,6 +234,8 @@ ENDPROC(__irq_svc)
|
||||
1: bl preempt_schedule_irq @ irq en/disable is done inside
|
||||
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
|
||||
tst r0, #_TIF_NEED_RESCHED
|
||||
+ bne 1b
|
||||
+ tst r0, #_TIF_NEED_RESCHED_LAZY
|
||||
reteq r8 @ go again
|
||||
b 1b
|
||||
#endif
|
||||
--- a/arch/arm/kernel/signal.c
|
||||
+++ b/arch/arm/kernel/signal.c
|
||||
@@ -563,7 +563,8 @@ asmlinkage int
|
||||
do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
|
||||
{
|
||||
do {
|
||||
- if (likely(thread_flags & _TIF_NEED_RESCHED)) {
|
||||
+ if (likely(thread_flags & (_TIF_NEED_RESCHED |
|
||||
+ _TIF_NEED_RESCHED_LAZY))) {
|
||||
schedule();
|
||||
} else {
|
||||
if (unlikely(!user_mode(regs)))
|
|
@ -0,0 +1,84 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 20 Sep 2013 14:31:54 +0200
|
||||
Subject: arm/unwind: use a raw_spin_lock
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Mostly unwind is done with irqs enabled however SLUB may call it with
|
||||
irqs disabled while creating a new SLUB cache.
|
||||
|
||||
I had system freeze while loading a module which called
|
||||
kmem_cache_create() on init. That means SLUB's __slab_alloc() disabled
|
||||
interrupts and then
|
||||
|
||||
->new_slab_objects()
|
||||
->new_slab()
|
||||
->setup_object()
|
||||
->setup_object_debug()
|
||||
->init_tracking()
|
||||
->set_track()
|
||||
->save_stack_trace()
|
||||
->save_stack_trace_tsk()
|
||||
->walk_stackframe()
|
||||
->unwind_frame()
|
||||
->unwind_find_idx()
|
||||
=>spin_lock_irqsave(&unwind_lock);
|
||||
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/arm/kernel/unwind.c | 14 +++++++-------
|
||||
1 file changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/arm/kernel/unwind.c
|
||||
+++ b/arch/arm/kernel/unwind.c
|
||||
@@ -93,7 +93,7 @@ extern const struct unwind_idx __start_u
|
||||
static const struct unwind_idx *__origin_unwind_idx;
|
||||
extern const struct unwind_idx __stop_unwind_idx[];
|
||||
|
||||
-static DEFINE_SPINLOCK(unwind_lock);
|
||||
+static DEFINE_RAW_SPINLOCK(unwind_lock);
|
||||
static LIST_HEAD(unwind_tables);
|
||||
|
||||
/* Convert a prel31 symbol to an absolute address */
|
||||
@@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_f
|
||||
/* module unwind tables */
|
||||
struct unwind_table *table;
|
||||
|
||||
- spin_lock_irqsave(&unwind_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&unwind_lock, flags);
|
||||
list_for_each_entry(table, &unwind_tables, list) {
|
||||
if (addr >= table->begin_addr &&
|
||||
addr < table->end_addr) {
|
||||
@@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_f
|
||||
break;
|
||||
}
|
||||
}
|
||||
- spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
}
|
||||
|
||||
pr_debug("%s: idx = %p\n", __func__, idx);
|
||||
@@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(un
|
||||
tab->begin_addr = text_addr;
|
||||
tab->end_addr = text_addr + text_size;
|
||||
|
||||
- spin_lock_irqsave(&unwind_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&unwind_lock, flags);
|
||||
list_add_tail(&tab->list, &unwind_tables);
|
||||
- spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
|
||||
return tab;
|
||||
}
|
||||
@@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_tabl
|
||||
if (!tab)
|
||||
return;
|
||||
|
||||
- spin_lock_irqsave(&unwind_lock, flags);
|
||||
+ raw_spin_lock_irqsave(&unwind_lock, flags);
|
||||
list_del(&tab->list);
|
||||
- spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&unwind_lock, flags);
|
||||
|
||||
kfree(tab);
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
From: Steven Rostedt <srostedt@redhat.com>
|
||||
Date: Fri, 3 Jul 2009 08:44:29 -0500
|
||||
Subject: ata: Do not disable interrupts in ide code for preempt-rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Use the local_irq_*_nort variants.
|
||||
|
||||
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/ata/libata-sff.c | 12 ++++++------
|
||||
1 file changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/ata/libata-sff.c
|
||||
+++ b/drivers/ata/libata-sff.c
|
||||
@@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(str
|
||||
unsigned long flags;
|
||||
unsigned int consumed;
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
|
||||
return consumed;
|
||||
}
|
||||
@@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_qu
|
||||
unsigned long flags;
|
||||
|
||||
/* FIXME: use a bounce buffer */
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
buf = kmap_atomic(page);
|
||||
|
||||
/* do the actual data transfer */
|
||||
@@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_qu
|
||||
do_write);
|
||||
|
||||
kunmap_atomic(buf);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
} else {
|
||||
buf = page_address(page);
|
||||
ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
|
||||
@@ -864,7 +864,7 @@ static int __atapi_pio_bytes(struct ata_
|
||||
unsigned long flags;
|
||||
|
||||
/* FIXME: use bounce buffer */
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
buf = kmap_atomic(page);
|
||||
|
||||
/* do the actual data transfer */
|
||||
@@ -872,7 +872,7 @@ static int __atapi_pio_bytes(struct ata_
|
||||
count, rw);
|
||||
|
||||
kunmap_atomic(buf);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
} else {
|
||||
buf = page_address(page);
|
||||
consumed = ap->ops->sff_data_xfer(dev, buf + offset,
|
|
@ -0,0 +1,84 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Sat, 3 May 2014 11:00:29 +0200
|
||||
Subject: blk-mq: revert raw locks, post pone notifier to POST_DEAD
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The blk_mq_cpu_notify_lock should be raw because some CPU down levels
|
||||
are called with interrupts off. The notifier itself calls currently one
|
||||
function that is blk_mq_hctx_notify().
|
||||
That function acquires the ctx->lock lock which is sleeping and I would
|
||||
prefer to keep it that way. That function only moves IO-requests from
|
||||
the CPU that is going offline to another CPU and it is currently the
|
||||
only one. Therefore I revert the list lock back to sleeping spinlocks
|
||||
and let the notifier run at POST_DEAD time.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq-cpu.c | 17 ++++++++++-------
|
||||
block/blk-mq.c | 2 +-
|
||||
2 files changed, 11 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/block/blk-mq-cpu.c
|
||||
+++ b/block/blk-mq-cpu.c
|
||||
@@ -16,7 +16,7 @@
|
||||
#include "blk-mq.h"
|
||||
|
||||
static LIST_HEAD(blk_mq_cpu_notify_list);
|
||||
-static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
|
||||
+static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
|
||||
|
||||
static int blk_mq_main_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
@@ -25,7 +25,10 @@ static int blk_mq_main_cpu_notify(struct
|
||||
struct blk_mq_cpu_notifier *notify;
|
||||
int ret = NOTIFY_OK;
|
||||
|
||||
- raw_spin_lock(&blk_mq_cpu_notify_lock);
|
||||
+ if (action != CPU_POST_DEAD)
|
||||
+ return NOTIFY_OK;
|
||||
+
|
||||
+ spin_lock(&blk_mq_cpu_notify_lock);
|
||||
|
||||
list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
|
||||
ret = notify->notify(notify->data, action, cpu);
|
||||
@@ -33,7 +36,7 @@ static int blk_mq_main_cpu_notify(struct
|
||||
break;
|
||||
}
|
||||
|
||||
- raw_spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -41,16 +44,16 @@ void blk_mq_register_cpu_notifier(struct
|
||||
{
|
||||
BUG_ON(!notifier->notify);
|
||||
|
||||
- raw_spin_lock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_lock(&blk_mq_cpu_notify_lock);
|
||||
list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list);
|
||||
- raw_spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
}
|
||||
|
||||
void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
|
||||
{
|
||||
- raw_spin_lock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_lock(&blk_mq_cpu_notify_lock);
|
||||
list_del(¬ifier->list);
|
||||
- raw_spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
+ spin_unlock(&blk_mq_cpu_notify_lock);
|
||||
}
|
||||
|
||||
void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -1612,7 +1612,7 @@ static int blk_mq_hctx_notify(void *data
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = data;
|
||||
|
||||
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
|
||||
+ if (action == CPU_POST_DEAD)
|
||||
return blk_mq_hctx_cpu_offline(hctx, cpu);
|
||||
|
||||
/*
|
|
@ -0,0 +1,115 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 13 Feb 2015 11:01:26 +0100
|
||||
Subject: block: blk-mq: Use swait
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
| BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914
|
||||
| in_atomic(): 1, irqs_disabled(): 0, pid: 255, name: kworker/u257:6
|
||||
| 5 locks held by kworker/u257:6/255:
|
||||
| #0: ("events_unbound"){.+.+.+}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
|
||||
| #1: ((&entry->work)){+.+.+.}, at: [<ffffffff8108edf1>] process_one_work+0x171/0x5e0
|
||||
| #2: (&shost->scan_mutex){+.+.+.}, at: [<ffffffffa000faa3>] __scsi_add_device+0xa3/0x130 [scsi_mod]
|
||||
| #3: (&set->tag_list_lock){+.+...}, at: [<ffffffff812f09fa>] blk_mq_init_queue+0x96a/0xa50
|
||||
| #4: (rcu_read_lock_sched){......}, at: [<ffffffff8132887d>] percpu_ref_kill_and_confirm+0x1d/0x120
|
||||
| Preemption disabled at:[<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
|
||||
|
|
||||
| CPU: 2 PID: 255 Comm: kworker/u257:6 Not tainted 3.18.7-rt0+ #1
|
||||
| Workqueue: events_unbound async_run_entry_fn
|
||||
| 0000000000000003 ffff8800bc29f998 ffffffff815b3a12 0000000000000000
|
||||
| 0000000000000000 ffff8800bc29f9b8 ffffffff8109aa16 ffff8800bc29fa28
|
||||
| ffff8800bc5d1bc8 ffff8800bc29f9e8 ffffffff815b8dd4 ffff880000000000
|
||||
| Call Trace:
|
||||
| [<ffffffff815b3a12>] dump_stack+0x4f/0x7c
|
||||
| [<ffffffff8109aa16>] __might_sleep+0x116/0x190
|
||||
| [<ffffffff815b8dd4>] rt_spin_lock+0x24/0x60
|
||||
| [<ffffffff810b6089>] __wake_up+0x29/0x60
|
||||
| [<ffffffff812ee06e>] blk_mq_usage_counter_release+0x1e/0x20
|
||||
| [<ffffffff81328966>] percpu_ref_kill_and_confirm+0x106/0x120
|
||||
| [<ffffffff812eff76>] blk_mq_freeze_queue_start+0x56/0x70
|
||||
| [<ffffffff812f0000>] blk_mq_update_tag_set_depth+0x40/0xd0
|
||||
| [<ffffffff812f0a1c>] blk_mq_init_queue+0x98c/0xa50
|
||||
| [<ffffffffa000dcf0>] scsi_mq_alloc_queue+0x20/0x60 [scsi_mod]
|
||||
| [<ffffffffa000ea35>] scsi_alloc_sdev+0x2f5/0x370 [scsi_mod]
|
||||
| [<ffffffffa000f494>] scsi_probe_and_add_lun+0x9e4/0xdd0 [scsi_mod]
|
||||
| [<ffffffffa000fb26>] __scsi_add_device+0x126/0x130 [scsi_mod]
|
||||
| [<ffffffffa013033f>] ata_scsi_scan_host+0xaf/0x200 [libata]
|
||||
| [<ffffffffa012b5b6>] async_port_probe+0x46/0x60 [libata]
|
||||
| [<ffffffff810978fb>] async_run_entry_fn+0x3b/0xf0
|
||||
| [<ffffffff8108ee81>] process_one_work+0x201/0x5e0
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-core.c | 2 +-
|
||||
block/blk-mq.c | 10 +++++-----
|
||||
include/linux/blkdev.h | 2 +-
|
||||
3 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/block/blk-core.c
|
||||
+++ b/block/blk-core.c
|
||||
@@ -664,7 +664,7 @@ struct request_queue *blk_alloc_queue_no
|
||||
q->bypass_depth = 1;
|
||||
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
|
||||
|
||||
- init_waitqueue_head(&q->mq_freeze_wq);
|
||||
+ init_swait_head(&q->mq_freeze_wq);
|
||||
|
||||
if (blkcg_init_queue(q))
|
||||
goto fail_bdi;
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -88,7 +88,7 @@ static int blk_mq_queue_enter(struct req
|
||||
if (!(gfp & __GFP_WAIT))
|
||||
return -EBUSY;
|
||||
|
||||
- ret = wait_event_interruptible(q->mq_freeze_wq,
|
||||
+ ret = swait_event_interruptible(q->mq_freeze_wq,
|
||||
!q->mq_freeze_depth || blk_queue_dying(q));
|
||||
if (blk_queue_dying(q))
|
||||
return -ENODEV;
|
||||
@@ -107,7 +107,7 @@ static void blk_mq_usage_counter_release
|
||||
struct request_queue *q =
|
||||
container_of(ref, struct request_queue, mq_usage_counter);
|
||||
|
||||
- wake_up_all(&q->mq_freeze_wq);
|
||||
+ swait_wake_all(&q->mq_freeze_wq);
|
||||
}
|
||||
|
||||
void blk_mq_freeze_queue_start(struct request_queue *q)
|
||||
@@ -127,7 +127,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_st
|
||||
|
||||
static void blk_mq_freeze_queue_wait(struct request_queue *q)
|
||||
{
|
||||
- wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
|
||||
+ swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -151,7 +151,7 @@ void blk_mq_unfreeze_queue(struct reques
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
if (wake) {
|
||||
percpu_ref_reinit(&q->mq_usage_counter);
|
||||
- wake_up_all(&q->mq_freeze_wq);
|
||||
+ swait_wake_all(&q->mq_freeze_wq);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
|
||||
@@ -170,7 +170,7 @@ void blk_mq_wake_waiters(struct request_
|
||||
* dying, we need to ensure that processes currently waiting on
|
||||
* the queue are notified as well.
|
||||
*/
|
||||
- wake_up_all(&q->mq_freeze_wq);
|
||||
+ swait_wake_all(&q->mq_freeze_wq);
|
||||
}
|
||||
|
||||
bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
|
||||
--- a/include/linux/blkdev.h
|
||||
+++ b/include/linux/blkdev.h
|
||||
@@ -483,7 +483,7 @@ struct request_queue {
|
||||
struct throtl_data *td;
|
||||
#endif
|
||||
struct rcu_head rcu_head;
|
||||
- wait_queue_head_t mq_freeze_wq;
|
||||
+ struct swait_head mq_freeze_wq;
|
||||
struct percpu_ref mq_usage_counter;
|
||||
struct list_head all_q_node;
|
||||
|
102
debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch
vendored
Normal file
102
debian/patches/features/all/rt/block-mq-don-t-complete-requests-via-IPI.patch
vendored
Normal file
|
@ -0,0 +1,102 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 29 Jan 2015 15:10:08 +0100
|
||||
Subject: block/mq: don't complete requests via IPI
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The IPI runs in hardirq context and there are sleeping locks. This patch
|
||||
moves the completion into a workqueue.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-core.c | 3 +++
|
||||
block/blk-mq.c | 20 ++++++++++++++++++++
|
||||
include/linux/blk-mq.h | 1 +
|
||||
include/linux/blkdev.h | 1 +
|
||||
4 files changed, 25 insertions(+)
|
||||
|
||||
--- a/block/blk-core.c
|
||||
+++ b/block/blk-core.c
|
||||
@@ -100,6 +100,9 @@ void blk_rq_init(struct request_queue *q
|
||||
|
||||
INIT_LIST_HEAD(&rq->queuelist);
|
||||
INIT_LIST_HEAD(&rq->timeout_list);
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
|
||||
+#endif
|
||||
rq->cpu = -1;
|
||||
rq->q = q;
|
||||
rq->__sector = (sector_t) -1;
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -217,6 +217,9 @@ static void blk_mq_rq_ctx_init(struct re
|
||||
rq->resid_len = 0;
|
||||
rq->sense = NULL;
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work);
|
||||
+#endif
|
||||
INIT_LIST_HEAD(&rq->timeout_list);
|
||||
rq->timeout = 0;
|
||||
|
||||
@@ -346,6 +349,17 @@ void blk_mq_end_request(struct request *
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_end_request);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+
|
||||
+void __blk_mq_complete_request_remote_work(struct work_struct *work)
|
||||
+{
|
||||
+ struct request *rq = container_of(work, struct request, work);
|
||||
+
|
||||
+ rq->q->softirq_done_fn(rq);
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
static void __blk_mq_complete_request_remote(void *data)
|
||||
{
|
||||
struct request *rq = data;
|
||||
@@ -353,6 +367,8 @@ static void __blk_mq_complete_request_re
|
||||
rq->q->softirq_done_fn(rq);
|
||||
}
|
||||
|
||||
+#endif
|
||||
+
|
||||
static void blk_mq_ipi_complete_request(struct request *rq)
|
||||
{
|
||||
struct blk_mq_ctx *ctx = rq->mq_ctx;
|
||||
@@ -369,10 +385,14 @@ static void blk_mq_ipi_complete_request(
|
||||
shared = cpus_share_cache(cpu, ctx->cpu);
|
||||
|
||||
if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ schedule_work_on(ctx->cpu, &rq->work);
|
||||
+#else
|
||||
rq->csd.func = __blk_mq_complete_request_remote;
|
||||
rq->csd.info = rq;
|
||||
rq->csd.flags = 0;
|
||||
smp_call_function_single_async(ctx->cpu, &rq->csd);
|
||||
+#endif
|
||||
} else {
|
||||
rq->q->softirq_done_fn(rq);
|
||||
}
|
||||
--- a/include/linux/blk-mq.h
|
||||
+++ b/include/linux/blk-mq.h
|
||||
@@ -202,6 +202,7 @@ static inline u16 blk_mq_unique_tag_to_t
|
||||
|
||||
struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
|
||||
struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
|
||||
+void __blk_mq_complete_request_remote_work(struct work_struct *work);
|
||||
|
||||
int blk_mq_request_started(struct request *rq);
|
||||
void blk_mq_start_request(struct request *rq);
|
||||
--- a/include/linux/blkdev.h
|
||||
+++ b/include/linux/blkdev.h
|
||||
@@ -101,6 +101,7 @@ struct request {
|
||||
struct list_head queuelist;
|
||||
union {
|
||||
struct call_single_data csd;
|
||||
+ struct work_struct work;
|
||||
unsigned long fifo_time;
|
||||
};
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 18 Feb 2015 18:37:26 +0100
|
||||
Subject: block/mq: drop per ctx cpu_lock
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
While converting the get_cpu() to get_cpu_light() I added a cpu lock to
|
||||
ensure the same code is not invoked twice on the same CPU. And now I run
|
||||
into this:
|
||||
|
||||
| kernel BUG at kernel/locking/rtmutex.c:996!
|
||||
| invalid opcode: 0000 [#1] PREEMPT SMP
|
||||
| CPU0: 13 PID: 75 Comm: kworker/u258:0 Tainted: G I 3.18.7-rt1.5+ #12
|
||||
| Workqueue: writeback bdi_writeback_workfn (flush-8:0)
|
||||
| task: ffff88023742a620 ti: ffff88023743c000 task.ti: ffff88023743c000
|
||||
| RIP: 0010:[<ffffffff81523cc0>] [<ffffffff81523cc0>] rt_spin_lock_slowlock+0x280/0x2d0
|
||||
| Call Trace:
|
||||
| [<ffffffff815254e7>] rt_spin_lock+0x27/0x60
|
||||
taking the same lock again
|
||||
|
|
||||
| [<ffffffff8127c771>] blk_mq_insert_requests+0x51/0x130
|
||||
| [<ffffffff8127d4a9>] blk_mq_flush_plug_list+0x129/0x140
|
||||
| [<ffffffff81272461>] blk_flush_plug_list+0xd1/0x250
|
||||
| [<ffffffff81522075>] schedule+0x75/0xa0
|
||||
| [<ffffffff8152474d>] do_nanosleep+0xdd/0x180
|
||||
| [<ffffffff810c8312>] __hrtimer_nanosleep+0xd2/0x1c0
|
||||
| [<ffffffff810c8456>] cpu_chill+0x56/0x80
|
||||
| [<ffffffff8107c13d>] try_to_grab_pending+0x1bd/0x390
|
||||
| [<ffffffff8107c431>] cancel_delayed_work+0x21/0x170
|
||||
| [<ffffffff81279a98>] blk_mq_stop_hw_queue+0x18/0x40
|
||||
| [<ffffffffa000ac6f>] scsi_queue_rq+0x7f/0x830 [scsi_mod]
|
||||
| [<ffffffff8127b0de>] __blk_mq_run_hw_queue+0x1ee/0x360
|
||||
| [<ffffffff8127b528>] blk_mq_map_request+0x108/0x190
|
||||
take the lock ^^^
|
||||
|
|
||||
| [<ffffffff8127c8d2>] blk_sq_make_request+0x82/0x350
|
||||
| [<ffffffff8126f6c0>] generic_make_request+0xd0/0x120
|
||||
| [<ffffffff8126f788>] submit_bio+0x78/0x190
|
||||
| [<ffffffff811bd537>] _submit_bh+0x117/0x180
|
||||
| [<ffffffff811bf528>] __block_write_full_page.constprop.38+0x138/0x3f0
|
||||
| [<ffffffff811bf880>] block_write_full_page+0xa0/0xe0
|
||||
| [<ffffffff811c02b3>] blkdev_writepage+0x13/0x20
|
||||
| [<ffffffff81127b25>] __writepage+0x15/0x40
|
||||
| [<ffffffff8112873b>] write_cache_pages+0x1fb/0x440
|
||||
| [<ffffffff811289be>] generic_writepages+0x3e/0x60
|
||||
| [<ffffffff8112a17c>] do_writepages+0x1c/0x30
|
||||
| [<ffffffff811b3603>] __writeback_single_inode+0x33/0x140
|
||||
| [<ffffffff811b462d>] writeback_sb_inodes+0x2bd/0x490
|
||||
| [<ffffffff811b4897>] __writeback_inodes_wb+0x97/0xd0
|
||||
| [<ffffffff811b4a9b>] wb_writeback+0x1cb/0x210
|
||||
| [<ffffffff811b505b>] bdi_writeback_workfn+0x25b/0x380
|
||||
| [<ffffffff8107b50b>] process_one_work+0x1bb/0x490
|
||||
| [<ffffffff8107c7ab>] worker_thread+0x6b/0x4f0
|
||||
| [<ffffffff81081863>] kthread+0xe3/0x100
|
||||
| [<ffffffff8152627c>] ret_from_fork+0x7c/0xb0
|
||||
|
||||
After looking at this for a while it seems that it is save if blk_mq_ctx is
|
||||
used multiple times, the in struct lock protects the access.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq.c | 4 ----
|
||||
block/blk-mq.h | 8 --------
|
||||
2 files changed, 12 deletions(-)
|
||||
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -1386,9 +1386,7 @@ static void blk_sq_make_request(struct r
|
||||
if (list_empty(&plug->mq_list))
|
||||
trace_block_plug(q);
|
||||
else if (request_count >= BLK_MAX_REQUEST_COUNT) {
|
||||
- spin_unlock(&data.ctx->cpu_lock);
|
||||
blk_flush_plug_list(plug, false);
|
||||
- spin_lock(&data.ctx->cpu_lock);
|
||||
trace_block_plug(q);
|
||||
}
|
||||
list_add_tail(&rq->queuelist, &plug->mq_list);
|
||||
@@ -1581,7 +1579,6 @@ static int blk_mq_hctx_cpu_offline(struc
|
||||
blk_mq_hctx_clear_pending(hctx, ctx);
|
||||
}
|
||||
spin_unlock(&ctx->lock);
|
||||
- __blk_mq_put_ctx(ctx);
|
||||
|
||||
if (list_empty(&tmp))
|
||||
return NOTIFY_OK;
|
||||
@@ -1775,7 +1772,6 @@ static void blk_mq_init_cpu_queues(struc
|
||||
memset(__ctx, 0, sizeof(*__ctx));
|
||||
__ctx->cpu = i;
|
||||
spin_lock_init(&__ctx->lock);
|
||||
- spin_lock_init(&__ctx->cpu_lock);
|
||||
INIT_LIST_HEAD(&__ctx->rq_list);
|
||||
__ctx->queue = q;
|
||||
|
||||
--- a/block/blk-mq.h
|
||||
+++ b/block/blk-mq.h
|
||||
@@ -9,7 +9,6 @@ struct blk_mq_ctx {
|
||||
struct list_head rq_list;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
- spinlock_t cpu_lock;
|
||||
unsigned int cpu;
|
||||
unsigned int index_hw;
|
||||
|
||||
@@ -80,7 +79,6 @@ static inline struct blk_mq_ctx *__blk_m
|
||||
struct blk_mq_ctx *ctx;
|
||||
|
||||
ctx = per_cpu_ptr(q->queue_ctx, cpu);
|
||||
- spin_lock(&ctx->cpu_lock);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
@@ -95,14 +93,8 @@ static inline struct blk_mq_ctx *blk_mq_
|
||||
return __blk_mq_get_ctx(q, get_cpu_light());
|
||||
}
|
||||
|
||||
-static void __blk_mq_put_ctx(struct blk_mq_ctx *ctx)
|
||||
-{
|
||||
- spin_unlock(&ctx->cpu_lock);
|
||||
-}
|
||||
-
|
||||
static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
|
||||
{
|
||||
- __blk_mq_put_ctx(ctx);
|
||||
put_cpu_light();
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: block/mq: do not invoke preempt_disable()
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
preempt_disable() and get_cpu() don't play well together with the sleeping
|
||||
locks it tries to allocate later.
|
||||
It seems to be enough to replace it with get_cpu_light() and migrate_disable().
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq.c | 10 +++++-----
|
||||
1 file changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -364,7 +364,7 @@ static void blk_mq_ipi_complete_request(
|
||||
return;
|
||||
}
|
||||
|
||||
- cpu = get_cpu();
|
||||
+ cpu = get_cpu_light();
|
||||
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
|
||||
shared = cpus_share_cache(cpu, ctx->cpu);
|
||||
|
||||
@@ -376,7 +376,7 @@ static void blk_mq_ipi_complete_request(
|
||||
} else {
|
||||
rq->q->softirq_done_fn(rq);
|
||||
}
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
void __blk_mq_complete_request(struct request *rq)
|
||||
@@ -905,14 +905,14 @@ void blk_mq_run_hw_queue(struct blk_mq_h
|
||||
return;
|
||||
|
||||
if (!async) {
|
||||
- int cpu = get_cpu();
|
||||
+ int cpu = get_cpu_light();
|
||||
if (cpumask_test_cpu(cpu, hctx->cpumask)) {
|
||||
__blk_mq_run_hw_queue(hctx);
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
return;
|
||||
}
|
||||
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
|
|
@ -0,0 +1,90 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 9 Apr 2014 10:37:23 +0200
|
||||
Subject: block: mq: use cpu_light()
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
there is a might sleep splat because get_cpu() disables preemption and
|
||||
later we grab a lock. As a workaround for this we use get_cpu_light()
|
||||
and an additional lock to prevent taking the same ctx.
|
||||
|
||||
There is a lock member in the ctx already but there some functions which do ++
|
||||
on the member and this works with irq off but on RT we would need the extra lock.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
block/blk-mq.c | 4 ++++
|
||||
block/blk-mq.h | 17 ++++++++++++++---
|
||||
2 files changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/block/blk-mq.c
|
||||
+++ b/block/blk-mq.c
|
||||
@@ -1366,7 +1366,9 @@ static void blk_sq_make_request(struct r
|
||||
if (list_empty(&plug->mq_list))
|
||||
trace_block_plug(q);
|
||||
else if (request_count >= BLK_MAX_REQUEST_COUNT) {
|
||||
+ spin_unlock(&data.ctx->cpu_lock);
|
||||
blk_flush_plug_list(plug, false);
|
||||
+ spin_lock(&data.ctx->cpu_lock);
|
||||
trace_block_plug(q);
|
||||
}
|
||||
list_add_tail(&rq->queuelist, &plug->mq_list);
|
||||
@@ -1559,6 +1561,7 @@ static int blk_mq_hctx_cpu_offline(struc
|
||||
blk_mq_hctx_clear_pending(hctx, ctx);
|
||||
}
|
||||
spin_unlock(&ctx->lock);
|
||||
+ __blk_mq_put_ctx(ctx);
|
||||
|
||||
if (list_empty(&tmp))
|
||||
return NOTIFY_OK;
|
||||
@@ -1752,6 +1755,7 @@ static void blk_mq_init_cpu_queues(struc
|
||||
memset(__ctx, 0, sizeof(*__ctx));
|
||||
__ctx->cpu = i;
|
||||
spin_lock_init(&__ctx->lock);
|
||||
+ spin_lock_init(&__ctx->cpu_lock);
|
||||
INIT_LIST_HEAD(&__ctx->rq_list);
|
||||
__ctx->queue = q;
|
||||
|
||||
--- a/block/blk-mq.h
|
||||
+++ b/block/blk-mq.h
|
||||
@@ -9,6 +9,7 @@ struct blk_mq_ctx {
|
||||
struct list_head rq_list;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
+ spinlock_t cpu_lock;
|
||||
unsigned int cpu;
|
||||
unsigned int index_hw;
|
||||
|
||||
@@ -76,7 +77,11 @@ struct blk_align_bitmap {
|
||||
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
|
||||
unsigned int cpu)
|
||||
{
|
||||
- return per_cpu_ptr(q->queue_ctx, cpu);
|
||||
+ struct blk_mq_ctx *ctx;
|
||||
+
|
||||
+ ctx = per_cpu_ptr(q->queue_ctx, cpu);
|
||||
+ spin_lock(&ctx->cpu_lock);
|
||||
+ return ctx;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -87,12 +92,18 @@ static inline struct blk_mq_ctx *__blk_m
|
||||
*/
|
||||
static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q)
|
||||
{
|
||||
- return __blk_mq_get_ctx(q, get_cpu());
|
||||
+ return __blk_mq_get_ctx(q, get_cpu_light());
|
||||
+}
|
||||
+
|
||||
+static void __blk_mq_put_ctx(struct blk_mq_ctx *ctx)
|
||||
+{
|
||||
+ spin_unlock(&ctx->cpu_lock);
|
||||
}
|
||||
|
||||
static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
|
||||
{
|
||||
- put_cpu();
|
||||
+ __blk_mq_put_ctx(ctx);
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
struct blk_mq_alloc_data {
|
97
debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch
vendored
Normal file
97
debian/patches/features/all/rt/block-shorten-interrupt-disabled-regions.patch
vendored
Normal file
|
@ -0,0 +1,97 @@
|
|||
Subject: block: Shorten interrupt disabled regions
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 22 Jun 2011 19:47:02 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Moving the blk_sched_flush_plug() call out of the interrupt/preempt
|
||||
disabled region in the scheduler allows us to replace
|
||||
local_irq_save/restore(flags) by local_irq_disable/enable() in
|
||||
blk_flush_plug().
|
||||
|
||||
Now instead of doing this we disable interrupts explicitely when we
|
||||
lock the request_queue and reenable them when we drop the lock. That
|
||||
allows interrupts to be handled when the plug list contains requests
|
||||
for more than one queue.
|
||||
|
||||
Aside of that this change makes the scope of the irq disabled region
|
||||
more obvious. The current code confused the hell out of me when
|
||||
looking at:
|
||||
|
||||
local_irq_save(flags);
|
||||
spin_lock(q->queue_lock);
|
||||
...
|
||||
queue_unplugged(q...);
|
||||
scsi_request_fn();
|
||||
spin_unlock(q->queue_lock);
|
||||
spin_lock(shost->host_lock);
|
||||
spin_unlock_irq(shost->host_lock);
|
||||
|
||||
-------------------^^^ ????
|
||||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
spin_unlock(q->lock);
|
||||
local_irq_restore(flags);
|
||||
|
||||
Also add a comment to __blk_run_queue() documenting that
|
||||
q->request_fn() can drop q->queue_lock and reenable interrupts, but
|
||||
must return with q->queue_lock held and interrupts disabled.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Tejun Heo <tj@kernel.org>
|
||||
Cc: Jens Axboe <axboe@kernel.dk>
|
||||
Cc: Linus Torvalds <torvalds@linux-foundation.org>
|
||||
Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de
|
||||
---
|
||||
block/blk-core.c | 12 ++----------
|
||||
1 file changed, 2 insertions(+), 10 deletions(-)
|
||||
|
||||
--- a/block/blk-core.c
|
||||
+++ b/block/blk-core.c
|
||||
@@ -3077,7 +3077,7 @@ static void queue_unplugged(struct reque
|
||||
blk_run_queue_async(q);
|
||||
else
|
||||
__blk_run_queue(q);
|
||||
- spin_unlock(q->queue_lock);
|
||||
+ spin_unlock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule)
|
||||
@@ -3125,7 +3125,6 @@ EXPORT_SYMBOL(blk_check_plugged);
|
||||
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
{
|
||||
struct request_queue *q;
|
||||
- unsigned long flags;
|
||||
struct request *rq;
|
||||
LIST_HEAD(list);
|
||||
unsigned int depth;
|
||||
@@ -3145,11 +3144,6 @@ void blk_flush_plug_list(struct blk_plug
|
||||
q = NULL;
|
||||
depth = 0;
|
||||
|
||||
- /*
|
||||
- * Save and disable interrupts here, to avoid doing it for every
|
||||
- * queue lock we have to take.
|
||||
- */
|
||||
- local_irq_save(flags);
|
||||
while (!list_empty(&list)) {
|
||||
rq = list_entry_rq(list.next);
|
||||
list_del_init(&rq->queuelist);
|
||||
@@ -3162,7 +3156,7 @@ void blk_flush_plug_list(struct blk_plug
|
||||
queue_unplugged(q, depth, from_schedule);
|
||||
q = rq->q;
|
||||
depth = 0;
|
||||
- spin_lock(q->queue_lock);
|
||||
+ spin_lock_irq(q->queue_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3189,8 +3183,6 @@ void blk_flush_plug_list(struct blk_plug
|
||||
*/
|
||||
if (q)
|
||||
queue_unplugged(q, depth, from_schedule);
|
||||
-
|
||||
- local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void blk_finish_plug(struct blk_plug *plug)
|
|
@ -0,0 +1,46 @@
|
|||
Subject: block: Use cpu_chill() for retry loops
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 20 Dec 2012 18:28:26 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Retry loops on RT might loop forever when the modifying side was
|
||||
preempted. Steven also observed a live lock when there was a
|
||||
concurrent priority boosting going on.
|
||||
|
||||
Use cpu_chill() instead of cpu_relax() to let the system
|
||||
make progress.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
block/blk-ioc.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/block/blk-ioc.c
|
||||
+++ b/block/blk-ioc.c
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/slab.h>
|
||||
+#include <linux/delay.h>
|
||||
|
||||
#include "blk.h"
|
||||
|
||||
@@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_s
|
||||
spin_unlock(q->queue_lock);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
spin_lock_irqsave_nested(&ioc->lock, flags, 1);
|
||||
}
|
||||
}
|
||||
@@ -187,7 +188,7 @@ void put_io_context_active(struct io_con
|
||||
spin_unlock(icq->q->queue_lock);
|
||||
} else {
|
||||
spin_unlock_irqrestore(&ioc->lock, flags);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
goto retry;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:58 -0500
|
||||
Subject: bug: BUG_ON/WARN_ON variants dependend on RT/!RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Introduce RT/NON-RT WARN/BUG statements to avoid ifdefs in the code.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/asm-generic/bug.h | 14 ++++++++++++++
|
||||
1 file changed, 14 insertions(+)
|
||||
|
||||
--- a/include/asm-generic/bug.h
|
||||
+++ b/include/asm-generic/bug.h
|
||||
@@ -206,6 +206,20 @@ extern void warn_slowpath_null(const cha
|
||||
# define WARN_ON_SMP(x) ({0;})
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define BUG_ON_RT(c) BUG_ON(c)
|
||||
+# define BUG_ON_NONRT(c) do { } while (0)
|
||||
+# define WARN_ON_RT(condition) WARN_ON(condition)
|
||||
+# define WARN_ON_NONRT(condition) do { } while (0)
|
||||
+# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
|
||||
+#else
|
||||
+# define BUG_ON_RT(c) do { } while (0)
|
||||
+# define BUG_ON_NONRT(c) BUG_ON(c)
|
||||
+# define WARN_ON_RT(condition) do { } while (0)
|
||||
+# define WARN_ON_NONRT(condition) WARN_ON(condition)
|
||||
+# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
|
||||
+#endif
|
||||
+
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#endif
|
65
debian/patches/features/all/rt/cgroups-scheduling-while-atomic-in-cgroup-code.patch
vendored
Normal file
65
debian/patches/features/all/rt/cgroups-scheduling-while-atomic-in-cgroup-code.patch
vendored
Normal file
|
@ -0,0 +1,65 @@
|
|||
From: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Date: Sat, 21 Jun 2014 10:09:48 +0200
|
||||
Subject: memcontrol: Prevent scheduling while atomic in cgroup code
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
mm, memcg: make refill_stock() use get_cpu_light()
|
||||
|
||||
Nikita reported the following memcg scheduling while atomic bug:
|
||||
|
||||
Call Trace:
|
||||
[e22d5a90] [c0007ea8] show_stack+0x4c/0x168 (unreliable)
|
||||
[e22d5ad0] [c0618c04] __schedule_bug+0x94/0xb0
|
||||
[e22d5ae0] [c060b9ec] __schedule+0x530/0x550
|
||||
[e22d5bf0] [c060bacc] schedule+0x30/0xbc
|
||||
[e22d5c00] [c060ca24] rt_spin_lock_slowlock+0x180/0x27c
|
||||
[e22d5c70] [c00b39dc] res_counter_uncharge_until+0x40/0xc4
|
||||
[e22d5ca0] [c013ca88] drain_stock.isra.20+0x54/0x98
|
||||
[e22d5cc0] [c01402ac] __mem_cgroup_try_charge+0x2e8/0xbac
|
||||
[e22d5d70] [c01410d4] mem_cgroup_charge_common+0x3c/0x70
|
||||
[e22d5d90] [c0117284] __do_fault+0x38c/0x510
|
||||
[e22d5df0] [c011a5f4] handle_pte_fault+0x98/0x858
|
||||
[e22d5e50] [c060ed08] do_page_fault+0x42c/0x6fc
|
||||
[e22d5f40] [c000f5b4] handle_page_fault+0xc/0x80
|
||||
|
||||
What happens:
|
||||
|
||||
refill_stock()
|
||||
get_cpu_var()
|
||||
drain_stock()
|
||||
res_counter_uncharge()
|
||||
res_counter_uncharge_until()
|
||||
spin_lock() <== boom
|
||||
|
||||
Fix it by replacing get/put_cpu_var() with get/put_cpu_light().
|
||||
|
||||
|
||||
Reported-by: Nikita Yushchenko <nyushchenko@dev.rtsoft.ru>
|
||||
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
mm/memcontrol.c | 7 +++++--
|
||||
1 file changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -2127,14 +2127,17 @@ static void drain_local_stock(struct wor
|
||||
*/
|
||||
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
|
||||
{
|
||||
- struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
|
||||
+ struct memcg_stock_pcp *stock;
|
||||
+ int cpu = get_cpu_light();
|
||||
+
|
||||
+ stock = &per_cpu(memcg_stock, cpu);
|
||||
|
||||
if (stock->cached != memcg) { /* reset if necessary */
|
||||
drain_stock(stock);
|
||||
stock->cached = memcg;
|
||||
}
|
||||
stock->nr_pages += nr_pages;
|
||||
- put_cpu_var(memcg_stock);
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
/*
|
|
@ -0,0 +1,87 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 13 Feb 2015 15:52:24 +0100
|
||||
Subject: cgroups: use simple wait in css_release()
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
To avoid:
|
||||
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:914
|
||||
|in_atomic(): 1, irqs_disabled(): 0, pid: 92, name: rcuc/11
|
||||
|2 locks held by rcuc/11/92:
|
||||
| #0: (rcu_callback){......}, at: [<ffffffff810e037e>] rcu_cpu_kthread+0x3de/0x940
|
||||
| #1: (rcu_read_lock_sched){......}, at: [<ffffffff81328390>] percpu_ref_call_confirm_rcu+0x0/0xd0
|
||||
|Preemption disabled at:[<ffffffff813284e2>] percpu_ref_switch_to_atomic_rcu+0x82/0xc0
|
||||
|CPU: 11 PID: 92 Comm: rcuc/11 Not tainted 3.18.7-rt0+ #1
|
||||
| ffff8802398cdf80 ffff880235f0bc28 ffffffff815b3a12 0000000000000000
|
||||
| 0000000000000000 ffff880235f0bc48 ffffffff8109aa16 0000000000000000
|
||||
| ffff8802398cdf80 ffff880235f0bc78 ffffffff815b8dd4 000000000000df80
|
||||
|Call Trace:
|
||||
| [<ffffffff815b3a12>] dump_stack+0x4f/0x7c
|
||||
| [<ffffffff8109aa16>] __might_sleep+0x116/0x190
|
||||
| [<ffffffff815b8dd4>] rt_spin_lock+0x24/0x60
|
||||
| [<ffffffff8108d2cd>] queue_work_on+0x6d/0x1d0
|
||||
| [<ffffffff8110c881>] css_release+0x81/0x90
|
||||
| [<ffffffff8132844e>] percpu_ref_call_confirm_rcu+0xbe/0xd0
|
||||
| [<ffffffff813284e2>] percpu_ref_switch_to_atomic_rcu+0x82/0xc0
|
||||
| [<ffffffff810e03e5>] rcu_cpu_kthread+0x445/0x940
|
||||
| [<ffffffff81098a2d>] smpboot_thread_fn+0x18d/0x2d0
|
||||
| [<ffffffff810948d8>] kthread+0xe8/0x100
|
||||
| [<ffffffff815b9c3c>] ret_from_fork+0x7c/0xb0
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/cgroup.h | 2 ++
|
||||
kernel/cgroup.c | 9 +++++----
|
||||
2 files changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/include/linux/cgroup.h
|
||||
+++ b/include/linux/cgroup.h
|
||||
@@ -22,6 +22,7 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/kernfs.h>
|
||||
#include <linux/wait.h>
|
||||
+#include <linux/work-simple.h>
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
|
||||
@@ -91,6 +92,7 @@ struct cgroup_subsys_state {
|
||||
/* percpu_ref killing and RCU release */
|
||||
struct rcu_head rcu_head;
|
||||
struct work_struct destroy_work;
|
||||
+ struct swork_event destroy_swork;
|
||||
};
|
||||
|
||||
/* bits in struct cgroup_subsys_state flags field */
|
||||
--- a/kernel/cgroup.c
|
||||
+++ b/kernel/cgroup.c
|
||||
@@ -4421,10 +4421,10 @@ static void css_free_rcu_fn(struct rcu_h
|
||||
queue_work(cgroup_destroy_wq, &css->destroy_work);
|
||||
}
|
||||
|
||||
-static void css_release_work_fn(struct work_struct *work)
|
||||
+static void css_release_work_fn(struct swork_event *sev)
|
||||
{
|
||||
struct cgroup_subsys_state *css =
|
||||
- container_of(work, struct cgroup_subsys_state, destroy_work);
|
||||
+ container_of(sev, struct cgroup_subsys_state, destroy_swork);
|
||||
struct cgroup_subsys *ss = css->ss;
|
||||
struct cgroup *cgrp = css->cgroup;
|
||||
|
||||
@@ -4463,8 +4463,8 @@ static void css_release(struct percpu_re
|
||||
struct cgroup_subsys_state *css =
|
||||
container_of(ref, struct cgroup_subsys_state, refcnt);
|
||||
|
||||
- INIT_WORK(&css->destroy_work, css_release_work_fn);
|
||||
- queue_work(cgroup_destroy_wq, &css->destroy_work);
|
||||
+ INIT_SWORK(&css->destroy_swork, css_release_work_fn);
|
||||
+ swork_queue(&css->destroy_swork);
|
||||
}
|
||||
|
||||
static void init_and_link_css(struct cgroup_subsys_state *css,
|
||||
@@ -5068,6 +5068,7 @@ static int __init cgroup_wq_init(void)
|
||||
*/
|
||||
cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
|
||||
BUG_ON(!cgroup_destroy_wq);
|
||||
+ BUG_ON(swork_get());
|
||||
|
||||
/*
|
||||
* Used to destroy pidlists and separate to serve as flush domain.
|
161
debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch
vendored
Normal file
161
debian/patches/features/all/rt/clocksource-tclib-allow-higher-clockrates.patch
vendored
Normal file
|
@ -0,0 +1,161 @@
|
|||
From: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Date: Mon, 8 Mar 2010 18:57:04 +0100
|
||||
Subject: clocksource: TCLIB: Allow higher clock rates for clock events
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
As default the TCLIB uses the 32KiHz base clock rate for clock events.
|
||||
Add a compile time selection to allow higher clock resulution.
|
||||
|
||||
(fixed up by Sami Pietikäinen <Sami.Pietikainen@wapice.com>)
|
||||
|
||||
Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/clocksource/tcb_clksrc.c | 37 ++++++++++++++++++++++---------------
|
||||
drivers/misc/Kconfig | 12 ++++++++++--
|
||||
2 files changed, 32 insertions(+), 17 deletions(-)
|
||||
|
||||
--- a/drivers/clocksource/tcb_clksrc.c
|
||||
+++ b/drivers/clocksource/tcb_clksrc.c
|
||||
@@ -23,8 +23,7 @@
|
||||
* this 32 bit free-running counter. the second channel is not used.
|
||||
*
|
||||
* - The third channel may be used to provide a 16-bit clockevent
|
||||
- * source, used in either periodic or oneshot mode. This runs
|
||||
- * at 32 KiHZ, and can handle delays of up to two seconds.
|
||||
+ * source, used in either periodic or oneshot mode.
|
||||
*
|
||||
* A boot clocksource and clockevent source are also currently needed,
|
||||
* unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
|
||||
@@ -74,6 +73,7 @@ static struct clocksource clksrc = {
|
||||
struct tc_clkevt_device {
|
||||
struct clock_event_device clkevt;
|
||||
struct clk *clk;
|
||||
+ u32 freq;
|
||||
void __iomem *regs;
|
||||
};
|
||||
|
||||
@@ -82,13 +82,6 @@ static struct tc_clkevt_device *to_tc_cl
|
||||
return container_of(clkevt, struct tc_clkevt_device, clkevt);
|
||||
}
|
||||
|
||||
-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
|
||||
- * because using one of the divided clocks would usually mean the
|
||||
- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
|
||||
- *
|
||||
- * A divided clock could be good for high resolution timers, since
|
||||
- * 30.5 usec resolution can seem "low".
|
||||
- */
|
||||
static u32 timer_clock;
|
||||
|
||||
static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
|
||||
@@ -111,11 +104,12 @@ static void tc_mode(enum clock_event_mod
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
clk_enable(tcd->clk);
|
||||
|
||||
- /* slow clock, count up to RC, then irq and restart */
|
||||
+ /* count up to RC, then irq and restart */
|
||||
__raw_writel(timer_clock
|
||||
| ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
|
||||
regs + ATMEL_TC_REG(2, CMR));
|
||||
- __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
|
||||
+ __raw_writel((tcd->freq + HZ / 2) / HZ,
|
||||
+ tcaddr + ATMEL_TC_REG(2, RC));
|
||||
|
||||
/* Enable clock and interrupts on RC compare */
|
||||
__raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
|
||||
@@ -128,7 +122,7 @@ static void tc_mode(enum clock_event_mod
|
||||
case CLOCK_EVT_MODE_ONESHOT:
|
||||
clk_enable(tcd->clk);
|
||||
|
||||
- /* slow clock, count up to RC, then irq and stop */
|
||||
+ /* count up to RC, then irq and stop */
|
||||
__raw_writel(timer_clock | ATMEL_TC_CPCSTOP
|
||||
| ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
|
||||
regs + ATMEL_TC_REG(2, CMR));
|
||||
@@ -157,8 +151,12 @@ static struct tc_clkevt_device clkevt =
|
||||
.name = "tc_clkevt",
|
||||
.features = CLOCK_EVT_FEAT_PERIODIC
|
||||
| CLOCK_EVT_FEAT_ONESHOT,
|
||||
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
/* Should be lower than at91rm9200's system timer */
|
||||
.rating = 125,
|
||||
+#else
|
||||
+ .rating = 200,
|
||||
+#endif
|
||||
.set_next_event = tc_next_event,
|
||||
.set_mode = tc_mode,
|
||||
},
|
||||
@@ -178,8 +176,9 @@ static irqreturn_t ch2_irq(int irq, void
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
-static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
|
||||
+static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
|
||||
{
|
||||
+ unsigned divisor = atmel_tc_divisors[divisor_idx];
|
||||
int ret;
|
||||
struct clk *t2_clk = tc->clk[2];
|
||||
int irq = tc->irq[2];
|
||||
@@ -193,7 +192,11 @@ static int __init setup_clkevents(struct
|
||||
clkevt.regs = tc->regs;
|
||||
clkevt.clk = t2_clk;
|
||||
|
||||
- timer_clock = clk32k_divisor_idx;
|
||||
+ timer_clock = divisor_idx;
|
||||
+ if (!divisor)
|
||||
+ clkevt.freq = 32768;
|
||||
+ else
|
||||
+ clkevt.freq = clk_get_rate(t2_clk) / divisor;
|
||||
|
||||
clkevt.clkevt.cpumask = cpumask_of(0);
|
||||
|
||||
@@ -203,7 +206,7 @@ static int __init setup_clkevents(struct
|
||||
return ret;
|
||||
}
|
||||
|
||||
- clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff);
|
||||
+ clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -340,7 +343,11 @@ static int __init tcb_clksrc_init(void)
|
||||
goto err_disable_t1;
|
||||
|
||||
/* channel 2: periodic and oneshot timer support */
|
||||
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
ret = setup_clkevents(tc, clk32k_divisor_idx);
|
||||
+#else
|
||||
+ ret = setup_clkevents(tc, best_divisor_idx);
|
||||
+#endif
|
||||
if (ret)
|
||||
goto err_unregister_clksrc;
|
||||
|
||||
--- a/drivers/misc/Kconfig
|
||||
+++ b/drivers/misc/Kconfig
|
||||
@@ -69,8 +69,7 @@ config ATMEL_TCB_CLKSRC
|
||||
are combined to make a single 32-bit timer.
|
||||
|
||||
When GENERIC_CLOCKEVENTS is defined, the third timer channel
|
||||
- may be used as a clock event device supporting oneshot mode
|
||||
- (delays of up to two seconds) based on the 32 KiHz clock.
|
||||
+ may be used as a clock event device supporting oneshot mode.
|
||||
|
||||
config ATMEL_TCB_CLKSRC_BLOCK
|
||||
int
|
||||
@@ -84,6 +83,15 @@ config ATMEL_TCB_CLKSRC_BLOCK
|
||||
TC can be used for other purposes, such as PWM generation and
|
||||
interval timing.
|
||||
|
||||
+config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
|
||||
+ bool "TC Block use 32 KiHz clock"
|
||||
+ depends on ATMEL_TCB_CLKSRC
|
||||
+ default y
|
||||
+ help
|
||||
+ Select this to use 32 KiHz base clock rate as TC block clock
|
||||
+ source for clock events.
|
||||
+
|
||||
+
|
||||
config DUMMY_IRQ
|
||||
tristate "Dummy IRQ handler"
|
||||
default n
|
|
@ -0,0 +1,225 @@
|
|||
Subject: completion: Use simple wait queues
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 11 Jan 2013 11:23:51 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Completions have no long lasting callbacks and therefor do not need
|
||||
the complex waitqueue variant. Use simple waitqueues which reduces the
|
||||
contention on the waitqueue lock.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/net/wireless/orinoco/orinoco_usb.c | 2 -
|
||||
drivers/usb/gadget/function/f_fs.c | 2 -
|
||||
drivers/usb/gadget/legacy/inode.c | 4 +--
|
||||
include/linux/completion.h | 9 +++-----
|
||||
include/linux/uprobes.h | 1
|
||||
kernel/sched/completion.c | 32 ++++++++++++++---------------
|
||||
kernel/sched/core.c | 10 +++++++--
|
||||
7 files changed, 33 insertions(+), 27 deletions(-)
|
||||
|
||||
--- a/drivers/net/wireless/orinoco/orinoco_usb.c
|
||||
+++ b/drivers/net/wireless/orinoco/orinoco_usb.c
|
||||
@@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ez
|
||||
while (!ctx->done.done && msecs--)
|
||||
udelay(1000);
|
||||
} else {
|
||||
- wait_event_interruptible(ctx->done.wait,
|
||||
+ swait_event_interruptible(ctx->done.wait,
|
||||
ctx->done.done);
|
||||
}
|
||||
break;
|
||||
--- a/drivers/usb/gadget/function/f_fs.c
|
||||
+++ b/drivers/usb/gadget/function/f_fs.c
|
||||
@@ -1403,7 +1403,7 @@ static void ffs_data_put(struct ffs_data
|
||||
pr_info("%s(): freeing\n", __func__);
|
||||
ffs_data_clear(ffs);
|
||||
BUG_ON(waitqueue_active(&ffs->ev.waitq) ||
|
||||
- waitqueue_active(&ffs->ep0req_completion.wait));
|
||||
+ swaitqueue_active(&ffs->ep0req_completion.wait));
|
||||
kfree(ffs->dev_name);
|
||||
kfree(ffs);
|
||||
}
|
||||
--- a/drivers/usb/gadget/legacy/inode.c
|
||||
+++ b/drivers/usb/gadget/legacy/inode.c
|
||||
@@ -345,7 +345,7 @@ ep_io (struct ep_data *epdata, void *buf
|
||||
spin_unlock_irq (&epdata->dev->lock);
|
||||
|
||||
if (likely (value == 0)) {
|
||||
- value = wait_event_interruptible (done.wait, done.done);
|
||||
+ value = swait_event_interruptible (done.wait, done.done);
|
||||
if (value != 0) {
|
||||
spin_lock_irq (&epdata->dev->lock);
|
||||
if (likely (epdata->ep != NULL)) {
|
||||
@@ -354,7 +354,7 @@ ep_io (struct ep_data *epdata, void *buf
|
||||
usb_ep_dequeue (epdata->ep, epdata->req);
|
||||
spin_unlock_irq (&epdata->dev->lock);
|
||||
|
||||
- wait_event (done.wait, done.done);
|
||||
+ swait_event (done.wait, done.done);
|
||||
if (epdata->status == -ECONNRESET)
|
||||
epdata->status = -EINTR;
|
||||
} else {
|
||||
--- a/include/linux/completion.h
|
||||
+++ b/include/linux/completion.h
|
||||
@@ -7,8 +7,7 @@
|
||||
* Atomic wait-for-completion handler data structures.
|
||||
* See kernel/sched/completion.c for details.
|
||||
*/
|
||||
-
|
||||
-#include <linux/wait.h>
|
||||
+#include <linux/wait-simple.h>
|
||||
|
||||
/*
|
||||
* struct completion - structure used to maintain state for a "completion"
|
||||
@@ -24,11 +23,11 @@
|
||||
*/
|
||||
struct completion {
|
||||
unsigned int done;
|
||||
- wait_queue_head_t wait;
|
||||
+ struct swait_head wait;
|
||||
};
|
||||
|
||||
#define COMPLETION_INITIALIZER(work) \
|
||||
- { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
|
||||
+ { 0, SWAIT_HEAD_INITIALIZER((work).wait) }
|
||||
|
||||
#define COMPLETION_INITIALIZER_ONSTACK(work) \
|
||||
({ init_completion(&work); work; })
|
||||
@@ -73,7 +72,7 @@ struct completion {
|
||||
static inline void init_completion(struct completion *x)
|
||||
{
|
||||
x->done = 0;
|
||||
- init_waitqueue_head(&x->wait);
|
||||
+ init_swait_head(&x->wait);
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/include/linux/uprobes.h
|
||||
+++ b/include/linux/uprobes.h
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/types.h>
|
||||
+#include <linux/wait.h>
|
||||
|
||||
struct vm_area_struct;
|
||||
struct mm_struct;
|
||||
--- a/kernel/sched/completion.c
|
||||
+++ b/kernel/sched/completion.c
|
||||
@@ -30,10 +30,10 @@ void complete(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
x->done++;
|
||||
- __wake_up_locked(&x->wait, TASK_NORMAL, 1);
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ __swait_wake_locked(&x->wait, TASK_NORMAL, 1);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete);
|
||||
|
||||
@@ -50,10 +50,10 @@ void complete_all(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
x->done += UINT_MAX/2;
|
||||
- __wake_up_locked(&x->wait, TASK_NORMAL, 0);
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ __swait_wake_locked(&x->wait, TASK_NORMAL, 0);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(complete_all);
|
||||
|
||||
@@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x,
|
||||
long (*action)(long), long timeout, int state)
|
||||
{
|
||||
if (!x->done) {
|
||||
- DECLARE_WAITQUEUE(wait, current);
|
||||
+ DEFINE_SWAITER(wait);
|
||||
|
||||
- __add_wait_queue_tail_exclusive(&x->wait, &wait);
|
||||
+ swait_prepare_locked(&x->wait, &wait);
|
||||
do {
|
||||
if (signal_pending_state(state, current)) {
|
||||
timeout = -ERESTARTSYS;
|
||||
break;
|
||||
}
|
||||
__set_current_state(state);
|
||||
- spin_unlock_irq(&x->wait.lock);
|
||||
+ raw_spin_unlock_irq(&x->wait.lock);
|
||||
timeout = action(timeout);
|
||||
- spin_lock_irq(&x->wait.lock);
|
||||
+ raw_spin_lock_irq(&x->wait.lock);
|
||||
} while (!x->done && timeout);
|
||||
- __remove_wait_queue(&x->wait, &wait);
|
||||
+ swait_finish_locked(&x->wait, &wait);
|
||||
if (!x->done)
|
||||
return timeout;
|
||||
}
|
||||
@@ -89,9 +89,9 @@ static inline long __sched
|
||||
{
|
||||
might_sleep();
|
||||
|
||||
- spin_lock_irq(&x->wait.lock);
|
||||
+ raw_spin_lock_irq(&x->wait.lock);
|
||||
timeout = do_wait_for_common(x, action, timeout, state);
|
||||
- spin_unlock_irq(&x->wait.lock);
|
||||
+ raw_spin_unlock_irq(&x->wait.lock);
|
||||
return timeout;
|
||||
}
|
||||
|
||||
@@ -277,12 +277,12 @@ bool try_wait_for_completion(struct comp
|
||||
if (!READ_ONCE(x->done))
|
||||
return 0;
|
||||
|
||||
- spin_lock_irqsave(&x->wait.lock, flags);
|
||||
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
|
||||
if (!x->done)
|
||||
ret = 0;
|
||||
else
|
||||
x->done--;
|
||||
- spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(try_wait_for_completion);
|
||||
@@ -311,7 +311,7 @@ bool completion_done(struct completion *
|
||||
* after it's acquired the lock.
|
||||
*/
|
||||
smp_rmb();
|
||||
- spin_unlock_wait(&x->wait.lock);
|
||||
+ raw_spin_unlock_wait(&x->wait.lock);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(completion_done);
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -2802,7 +2802,10 @@ void migrate_disable(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
- WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+ if (unlikely(p->migrate_disable_atomic)) {
|
||||
+ tracing_off();
|
||||
+ WARN_ON_ONCE(1);
|
||||
+ }
|
||||
#endif
|
||||
|
||||
if (p->migrate_disable) {
|
||||
@@ -2832,7 +2835,10 @@ void migrate_enable(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
- WARN_ON_ONCE(p->migrate_disable_atomic);
|
||||
+ if (unlikely(p->migrate_disable_atomic)) {
|
||||
+ tracing_off();
|
||||
+ WARN_ON_ONCE(1);
|
||||
+ }
|
||||
#endif
|
||||
WARN_ON_ONCE(p->migrate_disable <= 0);
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
Subject: sched: Use the proper LOCK_OFFSET for cond_resched()
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 22:51:33 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
RT does not increment preempt count when a 'sleeping' spinlock is
|
||||
locked. Update PREEMPT_LOCK_OFFSET for that case.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/sched.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -2926,7 +2926,7 @@ extern int _cond_resched(void);
|
||||
|
||||
extern int __cond_resched_lock(spinlock_t *lock);
|
||||
|
||||
-#ifdef CONFIG_PREEMPT_COUNT
|
||||
+#if defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT_FULL)
|
||||
#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
|
||||
#else
|
||||
#define PREEMPT_LOCK_OFFSET 0
|
|
@ -0,0 +1,53 @@
|
|||
Subject: sched: Take RT softirq semantics into account in cond_resched()
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 14 Jul 2011 09:56:44 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The softirq semantics work different on -RT. There is no SOFTIRQ_MASK in
|
||||
the preemption counter which leads to the BUG_ON() statement in
|
||||
__cond_resched_softirq(). As for -RT it is enough to perform a "normal"
|
||||
schedule.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/sched.h | 4 ++++
|
||||
kernel/sched/core.c | 2 ++
|
||||
2 files changed, 6 insertions(+)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -2937,12 +2937,16 @@ extern int __cond_resched_lock(spinlock_
|
||||
__cond_resched_lock(lock); \
|
||||
})
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_FULL
|
||||
extern int __cond_resched_softirq(void);
|
||||
|
||||
#define cond_resched_softirq() ({ \
|
||||
___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
|
||||
__cond_resched_softirq(); \
|
||||
})
|
||||
+#else
|
||||
+# define cond_resched_softirq() cond_resched()
|
||||
+#endif
|
||||
|
||||
static inline void cond_resched_rcu(void)
|
||||
{
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -4479,6 +4479,7 @@ int __cond_resched_lock(spinlock_t *lock
|
||||
}
|
||||
EXPORT_SYMBOL(__cond_resched_lock);
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_FULL
|
||||
int __sched __cond_resched_softirq(void)
|
||||
{
|
||||
BUG_ON(!in_softirq());
|
||||
@@ -4492,6 +4493,7 @@ int __sched __cond_resched_softirq(void)
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__cond_resched_softirq);
|
||||
+#endif
|
||||
|
||||
/**
|
||||
* yield - yield the current processor to other threads.
|
56
debian/patches/features/all/rt/cpu-hotplug-Document-why-PREEMPT_RT-uses-a-spinlock.patch
vendored
Normal file
56
debian/patches/features/all/rt/cpu-hotplug-Document-why-PREEMPT_RT-uses-a-spinlock.patch
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Thu, 5 Dec 2013 09:16:52 -0500
|
||||
Subject: cpu hotplug: Document why PREEMPT_RT uses a spinlock
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The patch:
|
||||
|
||||
cpu: Make hotplug.lock a "sleeping" spinlock on RT
|
||||
|
||||
Tasks can block on hotplug.lock in pin_current_cpu(), but their
|
||||
state might be != RUNNING. So the mutex wakeup will set the state
|
||||
unconditionally to RUNNING. That might cause spurious unexpected
|
||||
wakeups. We could provide a state preserving mutex_lock() function,
|
||||
but this is semantically backwards. So instead we convert the
|
||||
hotplug.lock() to a spinlock for RT, which has the state preserving
|
||||
semantics already.
|
||||
|
||||
Fixed a bug where the hotplug lock on PREEMPT_RT can be called after a
|
||||
task set its state to TASK_UNINTERRUPTIBLE and before it called
|
||||
schedule. If the hotplug_lock used a mutex, and there was contention,
|
||||
the current task's state would be turned to TASK_RUNNABLE and the
|
||||
schedule call will not sleep. This caused unexpected results.
|
||||
|
||||
Although the patch had a description of the change, the code had no
|
||||
comments about it. This causes confusion to those that review the code,
|
||||
and as PREEMPT_RT is held in a quilt queue and not git, it's not as easy
|
||||
to see why a change was made. Even if it was in git, the code should
|
||||
still have a comment for something as subtle as this.
|
||||
|
||||
Document the rational for using a spinlock on PREEMPT_RT in the hotplug
|
||||
lock code.
|
||||
|
||||
Reported-by: Nicholas Mc Guire <der.herr@hofr.at>
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -109,6 +109,14 @@ struct hotplug_pcp {
|
||||
int grab_lock;
|
||||
struct completion synced;
|
||||
#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ /*
|
||||
+ * Note, on PREEMPT_RT, the hotplug lock must save the state of
|
||||
+ * the task, otherwise the mutex will cause the task to fail
|
||||
+ * to sleep when required. (Because it's called from migrate_disable())
|
||||
+ *
|
||||
+ * The spinlock_t on PREEMPT_RT is a mutex that saves the task's
|
||||
+ * state.
|
||||
+ */
|
||||
spinlock_t lock;
|
||||
#else
|
||||
struct mutex mutex;
|
131
debian/patches/features/all/rt/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch
vendored
Normal file
131
debian/patches/features/all/rt/cpu-rt-make-hotplug-lock-a-sleeping-spinlock-on-rt.patch
vendored
Normal file
|
@ -0,0 +1,131 @@
|
|||
Subject: cpu: Make hotplug.lock a "sleeping" spinlock on RT
|
||||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Fri, 02 Mar 2012 10:36:57 -0500
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Tasks can block on hotplug.lock in pin_current_cpu(), but their state
|
||||
might be != RUNNING. So the mutex wakeup will set the state
|
||||
unconditionally to RUNNING. That might cause spurious unexpected
|
||||
wakeups. We could provide a state preserving mutex_lock() function,
|
||||
but this is semantically backwards. So instead we convert the
|
||||
hotplug.lock() to a spinlock for RT, which has the state preserving
|
||||
semantics already.
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: Carsten Emde <C.Emde@osadl.org>
|
||||
Cc: John Kacur <jkacur@redhat.com>
|
||||
Cc: Peter Zijlstra <peterz@infradead.org>
|
||||
Cc: Clark Williams <clark.williams@gmail.com>
|
||||
|
||||
Link: http://lkml.kernel.org/r/1330702617.25686.265.camel@gandalf.stny.rr.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 38 +++++++++++++++++++++++++++++---------
|
||||
1 file changed, 29 insertions(+), 9 deletions(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -59,10 +59,16 @@ static int cpu_hotplug_disabled;
|
||||
|
||||
static struct {
|
||||
struct task_struct *active_writer;
|
||||
+
|
||||
/* wait queue to wake up the active_writer */
|
||||
wait_queue_head_t wq;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ /* Makes the lock keep the task's state */
|
||||
+ spinlock_t lock;
|
||||
+#else
|
||||
/* verifies that no writer will get active while readers are active */
|
||||
struct mutex lock;
|
||||
+#endif
|
||||
/*
|
||||
* Also blocks the new readers during
|
||||
* an ongoing cpu hotplug operation.
|
||||
@@ -75,12 +81,26 @@ static struct {
|
||||
} cpu_hotplug = {
|
||||
.active_writer = NULL,
|
||||
.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock),
|
||||
+#else
|
||||
.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
|
||||
+#endif
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
.dep_map = {.name = "cpu_hotplug.lock" },
|
||||
#endif
|
||||
};
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock)
|
||||
+# define hotplug_trylock() rt_spin_trylock(&cpu_hotplug.lock)
|
||||
+# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock)
|
||||
+#else
|
||||
+# define hotplug_lock() mutex_lock(&cpu_hotplug.lock)
|
||||
+# define hotplug_trylock() mutex_trylock(&cpu_hotplug.lock)
|
||||
+# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock)
|
||||
+#endif
|
||||
+
|
||||
/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
|
||||
#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
|
||||
#define cpuhp_lock_acquire_tryread() \
|
||||
@@ -117,8 +137,8 @@ void pin_current_cpu(void)
|
||||
return;
|
||||
}
|
||||
preempt_enable();
|
||||
- mutex_lock(&cpu_hotplug.lock);
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_lock();
|
||||
+ hotplug_unlock();
|
||||
preempt_disable();
|
||||
goto retry;
|
||||
}
|
||||
@@ -191,9 +211,9 @@ void get_online_cpus(void)
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return;
|
||||
cpuhp_lock_acquire_read();
|
||||
- mutex_lock(&cpu_hotplug.lock);
|
||||
+ hotplug_lock();
|
||||
atomic_inc(&cpu_hotplug.refcount);
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_online_cpus);
|
||||
|
||||
@@ -201,11 +221,11 @@ bool try_get_online_cpus(void)
|
||||
{
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return true;
|
||||
- if (!mutex_trylock(&cpu_hotplug.lock))
|
||||
+ if (!hotplug_trylock())
|
||||
return false;
|
||||
cpuhp_lock_acquire_tryread();
|
||||
atomic_inc(&cpu_hotplug.refcount);
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_unlock();
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(try_get_online_cpus);
|
||||
@@ -259,11 +279,11 @@ void cpu_hotplug_begin(void)
|
||||
cpuhp_lock_acquire();
|
||||
|
||||
for (;;) {
|
||||
- mutex_lock(&cpu_hotplug.lock);
|
||||
+ hotplug_lock();
|
||||
prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (likely(!atomic_read(&cpu_hotplug.refcount)))
|
||||
break;
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_unlock();
|
||||
schedule();
|
||||
}
|
||||
finish_wait(&cpu_hotplug.wq, &wait);
|
||||
@@ -272,7 +292,7 @@ void cpu_hotplug_begin(void)
|
||||
void cpu_hotplug_done(void)
|
||||
{
|
||||
cpu_hotplug.active_writer = NULL;
|
||||
- mutex_unlock(&cpu_hotplug.lock);
|
||||
+ hotplug_unlock();
|
||||
cpuhp_lock_release();
|
||||
}
|
||||
|
|
@ -0,0 +1,562 @@
|
|||
From: Steven Rostedt <srostedt@redhat.com>
|
||||
Date: Mon, 16 Jul 2012 08:07:43 +0000
|
||||
Subject: cpu/rt: Rework cpu down for PREEMPT_RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Bringing a CPU down is a pain with the PREEMPT_RT kernel because
|
||||
tasks can be preempted in many more places than in non-RT. In
|
||||
order to handle per_cpu variables, tasks may be pinned to a CPU
|
||||
for a while, and even sleep. But these tasks need to be off the CPU
|
||||
if that CPU is going down.
|
||||
|
||||
Several synchronization methods have been tried, but when stressed
|
||||
they failed. This is a new approach.
|
||||
|
||||
A sync_tsk thread is still created and tasks may still block on a
|
||||
lock when the CPU is going down, but how that works is a bit different.
|
||||
When cpu_down() starts, it will create the sync_tsk and wait on it
|
||||
to inform that current tasks that are pinned on the CPU are no longer
|
||||
pinned. But new tasks that are about to be pinned will still be allowed
|
||||
to do so at this time.
|
||||
|
||||
Then the notifiers are called. Several notifiers will bring down tasks
|
||||
that will enter these locations. Some of these tasks will take locks
|
||||
of other tasks that are on the CPU. If we don't let those other tasks
|
||||
continue, but make them block until CPU down is done, the tasks that
|
||||
the notifiers are waiting on will never complete as they are waiting
|
||||
for the locks held by the tasks that are blocked.
|
||||
|
||||
Thus we still let the task pin the CPU until the notifiers are done.
|
||||
After the notifiers run, we then make new tasks entering the pinned
|
||||
CPU sections grab a mutex and wait. This mutex is now a per CPU mutex
|
||||
in the hotplug_pcp descriptor.
|
||||
|
||||
To help things along, a new function in the scheduler code is created
|
||||
called migrate_me(). This function will try to migrate the current task
|
||||
off the CPU this is going down if possible. When the sync_tsk is created,
|
||||
all tasks will then try to migrate off the CPU going down. There are
|
||||
several cases that this wont work, but it helps in most cases.
|
||||
|
||||
After the notifiers are called and if a task can't migrate off but enters
|
||||
the pin CPU sections, it will be forced to wait on the hotplug_pcp mutex
|
||||
until the CPU down is complete. Then the scheduler will force the migration
|
||||
anyway.
|
||||
|
||||
Also, I found that THREAD_BOUND need to also be accounted for in the
|
||||
pinned CPU, and the migrate_disable no longer treats them special.
|
||||
This helps fix issues with ksoftirqd and workqueue that unbind on CPU down.
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/linux/sched.h | 7 +
|
||||
kernel/cpu.c | 244 ++++++++++++++++++++++++++++++++++++++++----------
|
||||
kernel/sched/core.c | 82 ++++++++++++++++
|
||||
3 files changed, 285 insertions(+), 48 deletions(-)
|
||||
|
||||
--- a/include/linux/sched.h
|
||||
+++ b/include/linux/sched.h
|
||||
@@ -2217,6 +2217,10 @@ extern void do_set_cpus_allowed(struct t
|
||||
|
||||
extern int set_cpus_allowed_ptr(struct task_struct *p,
|
||||
const struct cpumask *new_mask);
|
||||
+int migrate_me(void);
|
||||
+void tell_sched_cpu_down_begin(int cpu);
|
||||
+void tell_sched_cpu_down_done(int cpu);
|
||||
+
|
||||
#else
|
||||
static inline void do_set_cpus_allowed(struct task_struct *p,
|
||||
const struct cpumask *new_mask)
|
||||
@@ -2229,6 +2233,9 @@ static inline int set_cpus_allowed_ptr(s
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
+static inline int migrate_me(void) { return 0; }
|
||||
+static inline void tell_sched_cpu_down_begin(int cpu) { }
|
||||
+static inline void tell_sched_cpu_down_done(int cpu) { }
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -59,16 +59,10 @@ static int cpu_hotplug_disabled;
|
||||
|
||||
static struct {
|
||||
struct task_struct *active_writer;
|
||||
-
|
||||
/* wait queue to wake up the active_writer */
|
||||
wait_queue_head_t wq;
|
||||
-#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
- /* Makes the lock keep the task's state */
|
||||
- spinlock_t lock;
|
||||
-#else
|
||||
/* verifies that no writer will get active while readers are active */
|
||||
struct mutex lock;
|
||||
-#endif
|
||||
/*
|
||||
* Also blocks the new readers during
|
||||
* an ongoing cpu hotplug operation.
|
||||
@@ -80,27 +74,13 @@ static struct {
|
||||
#endif
|
||||
} cpu_hotplug = {
|
||||
.active_writer = NULL,
|
||||
- .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
|
||||
-#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
- .lock = __SPIN_LOCK_UNLOCKED(cpu_hotplug.lock),
|
||||
-#else
|
||||
.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
|
||||
-#endif
|
||||
+ .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
.dep_map = {.name = "cpu_hotplug.lock" },
|
||||
#endif
|
||||
};
|
||||
|
||||
-#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
-# define hotplug_lock() rt_spin_lock(&cpu_hotplug.lock)
|
||||
-# define hotplug_trylock() rt_spin_trylock(&cpu_hotplug.lock)
|
||||
-# define hotplug_unlock() rt_spin_unlock(&cpu_hotplug.lock)
|
||||
-#else
|
||||
-# define hotplug_lock() mutex_lock(&cpu_hotplug.lock)
|
||||
-# define hotplug_trylock() mutex_trylock(&cpu_hotplug.lock)
|
||||
-# define hotplug_unlock() mutex_unlock(&cpu_hotplug.lock)
|
||||
-#endif
|
||||
-
|
||||
/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
|
||||
#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
|
||||
#define cpuhp_lock_acquire_tryread() \
|
||||
@@ -108,12 +88,42 @@ static struct {
|
||||
#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
|
||||
#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
|
||||
|
||||
+/**
|
||||
+ * hotplug_pcp - per cpu hotplug descriptor
|
||||
+ * @unplug: set when pin_current_cpu() needs to sync tasks
|
||||
+ * @sync_tsk: the task that waits for tasks to finish pinned sections
|
||||
+ * @refcount: counter of tasks in pinned sections
|
||||
+ * @grab_lock: set when the tasks entering pinned sections should wait
|
||||
+ * @synced: notifier for @sync_tsk to tell cpu_down it's finished
|
||||
+ * @mutex: the mutex to make tasks wait (used when @grab_lock is true)
|
||||
+ * @mutex_init: zero if the mutex hasn't been initialized yet.
|
||||
+ *
|
||||
+ * Although @unplug and @sync_tsk may point to the same task, the @unplug
|
||||
+ * is used as a flag and still exists after @sync_tsk has exited and
|
||||
+ * @sync_tsk set to NULL.
|
||||
+ */
|
||||
struct hotplug_pcp {
|
||||
struct task_struct *unplug;
|
||||
+ struct task_struct *sync_tsk;
|
||||
int refcount;
|
||||
+ int grab_lock;
|
||||
struct completion synced;
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ spinlock_t lock;
|
||||
+#else
|
||||
+ struct mutex mutex;
|
||||
+#endif
|
||||
+ int mutex_init;
|
||||
};
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+# define hotplug_lock(hp) rt_spin_lock(&(hp)->lock)
|
||||
+# define hotplug_unlock(hp) rt_spin_unlock(&(hp)->lock)
|
||||
+#else
|
||||
+# define hotplug_lock(hp) mutex_lock(&(hp)->mutex)
|
||||
+# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex)
|
||||
+#endif
|
||||
+
|
||||
static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
|
||||
|
||||
/**
|
||||
@@ -127,18 +137,39 @@ static DEFINE_PER_CPU(struct hotplug_pcp
|
||||
void pin_current_cpu(void)
|
||||
{
|
||||
struct hotplug_pcp *hp;
|
||||
+ int force = 0;
|
||||
|
||||
retry:
|
||||
hp = this_cpu_ptr(&hotplug_pcp);
|
||||
|
||||
- if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
|
||||
+ if (!hp->unplug || hp->refcount || force || preempt_count() > 1 ||
|
||||
hp->unplug == current) {
|
||||
hp->refcount++;
|
||||
return;
|
||||
}
|
||||
- preempt_enable();
|
||||
- hotplug_lock();
|
||||
- hotplug_unlock();
|
||||
+ if (hp->grab_lock) {
|
||||
+ preempt_enable();
|
||||
+ hotplug_lock(hp);
|
||||
+ hotplug_unlock(hp);
|
||||
+ } else {
|
||||
+ preempt_enable();
|
||||
+ /*
|
||||
+ * Try to push this task off of this CPU.
|
||||
+ */
|
||||
+ if (!migrate_me()) {
|
||||
+ preempt_disable();
|
||||
+ hp = this_cpu_ptr(&hotplug_pcp);
|
||||
+ if (!hp->grab_lock) {
|
||||
+ /*
|
||||
+ * Just let it continue it's already pinned
|
||||
+ * or about to sleep.
|
||||
+ */
|
||||
+ force = 1;
|
||||
+ goto retry;
|
||||
+ }
|
||||
+ preempt_enable();
|
||||
+ }
|
||||
+ }
|
||||
preempt_disable();
|
||||
goto retry;
|
||||
}
|
||||
@@ -159,26 +190,84 @@ void unpin_current_cpu(void)
|
||||
wake_up_process(hp->unplug);
|
||||
}
|
||||
|
||||
-/*
|
||||
- * FIXME: Is this really correct under all circumstances ?
|
||||
- */
|
||||
+static void wait_for_pinned_cpus(struct hotplug_pcp *hp)
|
||||
+{
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ while (hp->refcount) {
|
||||
+ schedule_preempt_disabled();
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int sync_unplug_thread(void *data)
|
||||
{
|
||||
struct hotplug_pcp *hp = data;
|
||||
|
||||
preempt_disable();
|
||||
hp->unplug = current;
|
||||
+ wait_for_pinned_cpus(hp);
|
||||
+
|
||||
+ /*
|
||||
+ * This thread will synchronize the cpu_down() with threads
|
||||
+ * that have pinned the CPU. When the pinned CPU count reaches
|
||||
+ * zero, we inform the cpu_down code to continue to the next step.
|
||||
+ */
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
- while (hp->refcount) {
|
||||
- schedule_preempt_disabled();
|
||||
+ preempt_enable();
|
||||
+ complete(&hp->synced);
|
||||
+
|
||||
+ /*
|
||||
+ * If all succeeds, the next step will need tasks to wait till
|
||||
+ * the CPU is offline before continuing. To do this, the grab_lock
|
||||
+ * is set and tasks going into pin_current_cpu() will block on the
|
||||
+ * mutex. But we still need to wait for those that are already in
|
||||
+ * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop()
|
||||
+ * will kick this thread out.
|
||||
+ */
|
||||
+ while (!hp->grab_lock && !kthread_should_stop()) {
|
||||
+ schedule();
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ }
|
||||
+
|
||||
+ /* Make sure grab_lock is seen before we see a stale completion */
|
||||
+ smp_mb();
|
||||
+
|
||||
+ /*
|
||||
+ * Now just before cpu_down() enters stop machine, we need to make
|
||||
+ * sure all tasks that are in pinned CPU sections are out, and new
|
||||
+ * tasks will now grab the lock, keeping them from entering pinned
|
||||
+ * CPU sections.
|
||||
+ */
|
||||
+ if (!kthread_should_stop()) {
|
||||
+ preempt_disable();
|
||||
+ wait_for_pinned_cpus(hp);
|
||||
+ preempt_enable();
|
||||
+ complete(&hp->synced);
|
||||
+ }
|
||||
+
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ while (!kthread_should_stop()) {
|
||||
+ schedule();
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
set_current_state(TASK_RUNNING);
|
||||
- preempt_enable();
|
||||
- complete(&hp->synced);
|
||||
+
|
||||
+ /*
|
||||
+ * Force this thread off this CPU as it's going down and
|
||||
+ * we don't want any more work on this CPU.
|
||||
+ */
|
||||
+ current->flags &= ~PF_NO_SETAFFINITY;
|
||||
+ do_set_cpus_allowed(current, cpu_present_mask);
|
||||
+ migrate_me();
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void __cpu_unplug_sync(struct hotplug_pcp *hp)
|
||||
+{
|
||||
+ wake_up_process(hp->sync_tsk);
|
||||
+ wait_for_completion(&hp->synced);
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Start the sync_unplug_thread on the target cpu and wait for it to
|
||||
* complete.
|
||||
@@ -186,23 +275,83 @@ static int sync_unplug_thread(void *data
|
||||
static int cpu_unplug_begin(unsigned int cpu)
|
||||
{
|
||||
struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
- struct task_struct *tsk;
|
||||
+ int err;
|
||||
+
|
||||
+ /* Protected by cpu_hotplug.lock */
|
||||
+ if (!hp->mutex_init) {
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ spin_lock_init(&hp->lock);
|
||||
+#else
|
||||
+ mutex_init(&hp->mutex);
|
||||
+#endif
|
||||
+ hp->mutex_init = 1;
|
||||
+ }
|
||||
+
|
||||
+ /* Inform the scheduler to migrate tasks off this CPU */
|
||||
+ tell_sched_cpu_down_begin(cpu);
|
||||
|
||||
init_completion(&hp->synced);
|
||||
- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
|
||||
- if (IS_ERR(tsk))
|
||||
- return (PTR_ERR(tsk));
|
||||
- kthread_bind(tsk, cpu);
|
||||
- wake_up_process(tsk);
|
||||
- wait_for_completion(&hp->synced);
|
||||
+
|
||||
+ hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
|
||||
+ if (IS_ERR(hp->sync_tsk)) {
|
||||
+ err = PTR_ERR(hp->sync_tsk);
|
||||
+ hp->sync_tsk = NULL;
|
||||
+ return err;
|
||||
+ }
|
||||
+ kthread_bind(hp->sync_tsk, cpu);
|
||||
+
|
||||
+ /*
|
||||
+ * Wait for tasks to get out of the pinned sections,
|
||||
+ * it's still OK if new tasks enter. Some CPU notifiers will
|
||||
+ * wait for tasks that are going to enter these sections and
|
||||
+ * we must not have them block.
|
||||
+ */
|
||||
+ __cpu_unplug_sync(hp);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static void cpu_unplug_sync(unsigned int cpu)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
+
|
||||
+ init_completion(&hp->synced);
|
||||
+ /* The completion needs to be initialzied before setting grab_lock */
|
||||
+ smp_wmb();
|
||||
+
|
||||
+ /* Grab the mutex before setting grab_lock */
|
||||
+ hotplug_lock(hp);
|
||||
+ hp->grab_lock = 1;
|
||||
+
|
||||
+ /*
|
||||
+ * The CPU notifiers have been completed.
|
||||
+ * Wait for tasks to get out of pinned CPU sections and have new
|
||||
+ * tasks block until the CPU is completely down.
|
||||
+ */
|
||||
+ __cpu_unplug_sync(hp);
|
||||
+
|
||||
+ /* All done with the sync thread */
|
||||
+ kthread_stop(hp->sync_tsk);
|
||||
+ hp->sync_tsk = NULL;
|
||||
+}
|
||||
+
|
||||
static void cpu_unplug_done(unsigned int cpu)
|
||||
{
|
||||
struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
|
||||
hp->unplug = NULL;
|
||||
+ /* Let all tasks know cpu unplug is finished before cleaning up */
|
||||
+ smp_wmb();
|
||||
+
|
||||
+ if (hp->sync_tsk)
|
||||
+ kthread_stop(hp->sync_tsk);
|
||||
+
|
||||
+ if (hp->grab_lock) {
|
||||
+ hotplug_unlock(hp);
|
||||
+ /* protected by cpu_hotplug.lock */
|
||||
+ hp->grab_lock = 0;
|
||||
+ }
|
||||
+ tell_sched_cpu_down_done(cpu);
|
||||
}
|
||||
|
||||
void get_online_cpus(void)
|
||||
@@ -211,9 +360,9 @@ void get_online_cpus(void)
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return;
|
||||
cpuhp_lock_acquire_read();
|
||||
- hotplug_lock();
|
||||
+ mutex_lock(&cpu_hotplug.lock);
|
||||
atomic_inc(&cpu_hotplug.refcount);
|
||||
- hotplug_unlock();
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_online_cpus);
|
||||
|
||||
@@ -221,11 +370,11 @@ bool try_get_online_cpus(void)
|
||||
{
|
||||
if (cpu_hotplug.active_writer == current)
|
||||
return true;
|
||||
- if (!hotplug_trylock())
|
||||
+ if (!mutex_trylock(&cpu_hotplug.lock))
|
||||
return false;
|
||||
cpuhp_lock_acquire_tryread();
|
||||
atomic_inc(&cpu_hotplug.refcount);
|
||||
- hotplug_unlock();
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(try_get_online_cpus);
|
||||
@@ -279,11 +428,11 @@ void cpu_hotplug_begin(void)
|
||||
cpuhp_lock_acquire();
|
||||
|
||||
for (;;) {
|
||||
- hotplug_lock();
|
||||
+ mutex_lock(&cpu_hotplug.lock);
|
||||
prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
|
||||
if (likely(!atomic_read(&cpu_hotplug.refcount)))
|
||||
break;
|
||||
- hotplug_unlock();
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
schedule();
|
||||
}
|
||||
finish_wait(&cpu_hotplug.wq, &wait);
|
||||
@@ -292,7 +441,7 @@ void cpu_hotplug_begin(void)
|
||||
void cpu_hotplug_done(void)
|
||||
{
|
||||
cpu_hotplug.active_writer = NULL;
|
||||
- hotplug_unlock();
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
cpuhp_lock_release();
|
||||
}
|
||||
|
||||
@@ -527,6 +676,9 @@ static int __ref _cpu_down(unsigned int
|
||||
|
||||
smpboot_park_threads(cpu);
|
||||
|
||||
+ /* Notifiers are done. Don't let any more tasks pin this CPU. */
|
||||
+ cpu_unplug_sync(cpu);
|
||||
+
|
||||
/*
|
||||
* So now all preempt/rcu users must observe !cpu_active().
|
||||
*/
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -2754,7 +2754,7 @@ void migrate_disable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
|
||||
- if (in_atomic() || p->flags & PF_NO_SETAFFINITY) {
|
||||
+ if (in_atomic()) {
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
p->migrate_disable_atomic++;
|
||||
#endif
|
||||
@@ -2787,7 +2787,7 @@ void migrate_enable(void)
|
||||
unsigned long flags;
|
||||
struct rq *rq;
|
||||
|
||||
- if (in_atomic() || p->flags & PF_NO_SETAFFINITY) {
|
||||
+ if (in_atomic()) {
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
p->migrate_disable_atomic--;
|
||||
#endif
|
||||
@@ -4960,6 +4960,84 @@ void do_set_cpus_allowed(struct task_str
|
||||
cpumask_copy(&p->cpus_allowed, new_mask);
|
||||
}
|
||||
|
||||
+static DEFINE_PER_CPU(struct cpumask, sched_cpumasks);
|
||||
+static DEFINE_MUTEX(sched_down_mutex);
|
||||
+static cpumask_t sched_down_cpumask;
|
||||
+
|
||||
+void tell_sched_cpu_down_begin(int cpu)
|
||||
+{
|
||||
+ mutex_lock(&sched_down_mutex);
|
||||
+ cpumask_set_cpu(cpu, &sched_down_cpumask);
|
||||
+ mutex_unlock(&sched_down_mutex);
|
||||
+}
|
||||
+
|
||||
+void tell_sched_cpu_down_done(int cpu)
|
||||
+{
|
||||
+ mutex_lock(&sched_down_mutex);
|
||||
+ cpumask_clear_cpu(cpu, &sched_down_cpumask);
|
||||
+ mutex_unlock(&sched_down_mutex);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * migrate_me - try to move the current task off this cpu
|
||||
+ *
|
||||
+ * Used by the pin_current_cpu() code to try to get tasks
|
||||
+ * to move off the current CPU as it is going down.
|
||||
+ * It will only move the task if the task isn't pinned to
|
||||
+ * the CPU (with migrate_disable, affinity or NO_SETAFFINITY)
|
||||
+ * and the task has to be in a RUNNING state. Otherwise the
|
||||
+ * movement of the task will wake it up (change its state
|
||||
+ * to running) when the task did not expect it.
|
||||
+ *
|
||||
+ * Returns 1 if it succeeded in moving the current task
|
||||
+ * 0 otherwise.
|
||||
+ */
|
||||
+int migrate_me(void)
|
||||
+{
|
||||
+ struct task_struct *p = current;
|
||||
+ struct migration_arg arg;
|
||||
+ struct cpumask *cpumask;
|
||||
+ struct cpumask *mask;
|
||||
+ unsigned long flags;
|
||||
+ unsigned int dest_cpu;
|
||||
+ struct rq *rq;
|
||||
+
|
||||
+ /*
|
||||
+ * We can not migrate tasks bounded to a CPU or tasks not
|
||||
+ * running. The movement of the task will wake it up.
|
||||
+ */
|
||||
+ if (p->flags & PF_NO_SETAFFINITY || p->state)
|
||||
+ return 0;
|
||||
+
|
||||
+ mutex_lock(&sched_down_mutex);
|
||||
+ rq = task_rq_lock(p, &flags);
|
||||
+
|
||||
+ cpumask = this_cpu_ptr(&sched_cpumasks);
|
||||
+ mask = &p->cpus_allowed;
|
||||
+
|
||||
+ cpumask_andnot(cpumask, mask, &sched_down_cpumask);
|
||||
+
|
||||
+ if (!cpumask_weight(cpumask)) {
|
||||
+ /* It's only on this CPU? */
|
||||
+ task_rq_unlock(rq, p, &flags);
|
||||
+ mutex_unlock(&sched_down_mutex);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ dest_cpu = cpumask_any_and(cpu_active_mask, cpumask);
|
||||
+
|
||||
+ arg.task = p;
|
||||
+ arg.dest_cpu = dest_cpu;
|
||||
+
|
||||
+ task_rq_unlock(rq, p, &flags);
|
||||
+
|
||||
+ stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
|
||||
+ tlb_migrate_finish(p->mm);
|
||||
+ mutex_unlock(&sched_down_mutex);
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* This is how migration works:
|
||||
*
|
107
debian/patches/features/all/rt/cpu_chill-Add-a-UNINTERRUPTIBLE-hrtimer_nanosleep.patch
vendored
Normal file
107
debian/patches/features/all/rt/cpu_chill-Add-a-UNINTERRUPTIBLE-hrtimer_nanosleep.patch
vendored
Normal file
|
@ -0,0 +1,107 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Tue, 4 Mar 2014 12:28:32 -0500
|
||||
Subject: cpu_chill: Add a UNINTERRUPTIBLE hrtimer_nanosleep
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
We hit another bug that was caused by switching cpu_chill() from
|
||||
msleep() to hrtimer_nanosleep().
|
||||
|
||||
This time it is a livelock. The problem is that hrtimer_nanosleep()
|
||||
calls schedule with the state == TASK_INTERRUPTIBLE. But these means
|
||||
that if a signal is pending, the scheduler wont schedule, and will
|
||||
simply change the current task state back to TASK_RUNNING. This
|
||||
nullifies the whole point of cpu_chill() in the first place. That is,
|
||||
if a task is spinning on a try_lock() and it preempted the owner of the
|
||||
lock, if it has a signal pending, it will never give up the CPU to let
|
||||
the owner of the lock run.
|
||||
|
||||
I made a static function __hrtimer_nanosleep() that takes a fifth
|
||||
parameter "state", which determines the task state of that the
|
||||
nanosleep() will be in. The normal hrtimer_nanosleep() will act the
|
||||
same, but cpu_chill() will call the __hrtimer_nanosleep() directly with
|
||||
the TASK_UNINTERRUPTIBLE state.
|
||||
|
||||
cpu_chill() only cares that the first sleep happens, and does not care
|
||||
about the state of the restart schedule (in hrtimer_nanosleep_restart).
|
||||
|
||||
|
||||
Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/time/hrtimer.c | 25 ++++++++++++++++++-------
|
||||
1 file changed, 18 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -1746,12 +1746,13 @@ void hrtimer_init_sleeper(struct hrtimer
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
|
||||
|
||||
-static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
|
||||
+static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
|
||||
+ unsigned long state)
|
||||
{
|
||||
hrtimer_init_sleeper(t, current);
|
||||
|
||||
do {
|
||||
- set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ set_current_state(state);
|
||||
hrtimer_start_expires(&t->timer, mode);
|
||||
if (!hrtimer_active(&t->timer))
|
||||
t->task = NULL;
|
||||
@@ -1795,7 +1796,8 @@ long __sched hrtimer_nanosleep_restart(s
|
||||
HRTIMER_MODE_ABS);
|
||||
hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
|
||||
|
||||
- if (do_nanosleep(&t, HRTIMER_MODE_ABS))
|
||||
+ /* cpu_chill() does not care about restart state. */
|
||||
+ if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
|
||||
goto out;
|
||||
|
||||
rmtp = restart->nanosleep.rmtp;
|
||||
@@ -1812,8 +1814,10 @@ long __sched hrtimer_nanosleep_restart(s
|
||||
return ret;
|
||||
}
|
||||
|
||||
-long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
|
||||
- const enum hrtimer_mode mode, const clockid_t clockid)
|
||||
+static long
|
||||
+__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
|
||||
+ const enum hrtimer_mode mode, const clockid_t clockid,
|
||||
+ unsigned long state)
|
||||
{
|
||||
struct restart_block *restart;
|
||||
struct hrtimer_sleeper t;
|
||||
@@ -1826,7 +1830,7 @@ long hrtimer_nanosleep(struct timespec *
|
||||
|
||||
hrtimer_init_on_stack(&t.timer, clockid, mode);
|
||||
hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
|
||||
- if (do_nanosleep(&t, mode))
|
||||
+ if (do_nanosleep(&t, mode, state))
|
||||
goto out;
|
||||
|
||||
/* Absolute timers do not update the rmtp value and restart: */
|
||||
@@ -1853,6 +1857,12 @@ long hrtimer_nanosleep(struct timespec *
|
||||
return ret;
|
||||
}
|
||||
|
||||
+long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
|
||||
+ const enum hrtimer_mode mode, const clockid_t clockid)
|
||||
+{
|
||||
+ return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
|
||||
+}
|
||||
+
|
||||
SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
|
||||
struct timespec __user *, rmtp)
|
||||
{
|
||||
@@ -1879,7 +1889,8 @@ void cpu_chill(void)
|
||||
unsigned int freeze_flag = current->flags & PF_NOFREEZE;
|
||||
|
||||
current->flags |= PF_NOFREEZE;
|
||||
- hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
|
||||
+ __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
|
||||
+ TASK_UNINTERRUPTIBLE);
|
||||
if (!freeze_flag)
|
||||
current->flags &= ~PF_NOFREEZE;
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
From: Tiejun Chen <tiejun.chen@windriver.com>
|
||||
Subject: cpu_down: move migrate_enable() back
|
||||
Date: Thu, 7 Nov 2013 10:06:07 +0800
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Commit 08c1ab68, "hotplug-use-migrate-disable.patch", intends to
|
||||
use migrate_enable()/migrate_disable() to replace that combination
|
||||
of preempt_enable() and preempt_disable(), but actually in
|
||||
!CONFIG_PREEMPT_RT_FULL case, migrate_enable()/migrate_disable()
|
||||
are still equal to preempt_enable()/preempt_disable(). So that
|
||||
followed cpu_hotplug_begin()/cpu_unplug_begin(cpu) would go schedule()
|
||||
to trigger schedule_debug() like this:
|
||||
|
||||
_cpu_down()
|
||||
|
|
||||
+ migrate_disable() = preempt_disable()
|
||||
|
|
||||
+ cpu_hotplug_begin() or cpu_unplug_begin()
|
||||
|
|
||||
+ schedule()
|
||||
|
|
||||
+ __schedule()
|
||||
|
|
||||
+ preempt_disable();
|
||||
|
|
||||
+ __schedule_bug() is true!
|
||||
|
||||
So we should move migrate_enable() as the original scheme.
|
||||
|
||||
|
||||
Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
|
||||
---
|
||||
kernel/cpu.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -668,6 +668,7 @@ static int __ref _cpu_down(unsigned int
|
||||
err = -EBUSY;
|
||||
goto restore_cpus;
|
||||
}
|
||||
+ migrate_enable();
|
||||
|
||||
cpu_hotplug_begin();
|
||||
err = cpu_unplug_begin(cpu);
|
||||
@@ -744,7 +745,6 @@ static int __ref _cpu_down(unsigned int
|
||||
out_release:
|
||||
cpu_unplug_done(cpu);
|
||||
out_cancel:
|
||||
- migrate_enable();
|
||||
cpu_hotplug_done();
|
||||
if (!err)
|
||||
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
|
|
@ -0,0 +1,196 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Tue, 21 Jul 2015 15:28:49 +0200
|
||||
Subject: cpufreq: Remove cpufreq_rwsem
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
cpufreq_rwsem was introduced in commit 6eed9404ab3c4 ("cpufreq: Use
|
||||
rwsem for protecting critical sections) in order to replace
|
||||
try_module_get() on the cpu-freq driver. That try_module_get() worked
|
||||
well until the refcount was so heavily used that module removal became
|
||||
more or less impossible.
|
||||
|
||||
Though when looking at the various (undocumented) protection
|
||||
mechanisms in that code, the randomly sprinkeled around cpufreq_rwsem
|
||||
locking sites are superfluous.
|
||||
|
||||
The policy, which is acquired in cpufreq_cpu_get() and released in
|
||||
cpufreq_cpu_put() is sufficiently protected already.
|
||||
|
||||
cpufreq_cpu_get(cpu)
|
||||
/* Protects against concurrent driver removal */
|
||||
read_lock_irqsave(&cpufreq_driver_lock, flags);
|
||||
policy = per_cpu(cpufreq_cpu_data, cpu);
|
||||
kobject_get(&policy->kobj);
|
||||
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
|
||||
|
||||
The reference on the policy serializes versus module unload already:
|
||||
|
||||
cpufreq_unregister_driver()
|
||||
subsys_interface_unregister()
|
||||
__cpufreq_remove_dev_finish()
|
||||
per_cpu(cpufreq_cpu_data) = NULL;
|
||||
cpufreq_policy_put_kobj()
|
||||
|
||||
If there is a reference held on the policy, i.e. obtained prior to the
|
||||
unregister call, then cpufreq_policy_put_kobj() will wait until that
|
||||
reference is dropped. So once subsys_interface_unregister() returns
|
||||
there is no policy pointer in flight and no new reference can be
|
||||
obtained. So that rwsem protection is useless.
|
||||
|
||||
The other usage of cpufreq_rwsem in show()/store() of the sysfs
|
||||
interface is redundant as well because sysfs already does the proper
|
||||
kobject_get()/put() pairs.
|
||||
|
||||
That leaves CPU hotplug versus module removal. The current
|
||||
down_write() around the write_lock() in cpufreq_unregister_driver() is
|
||||
silly at best as it protects actually nothing.
|
||||
|
||||
The trivial solution to this is to prevent hotplug across
|
||||
cpufreq_unregister_driver completely.
|
||||
|
||||
[upstream: rafael/linux-pm 454d3a2500a4eb33be85dde3bfba9e5f6b5efadc]
|
||||
[fixes: "cpufreq_stat_notifier_trans: No policy found" since v4.0-rt]
|
||||
Cc: stable-rt@vger.kernel.org
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/cpufreq/cpufreq.c | 35 +++--------------------------------
|
||||
1 file changed, 3 insertions(+), 32 deletions(-)
|
||||
|
||||
--- a/drivers/cpufreq/cpufreq.c
|
||||
+++ b/drivers/cpufreq/cpufreq.c
|
||||
@@ -64,12 +64,6 @@ static inline bool has_target(void)
|
||||
return cpufreq_driver->target_index || cpufreq_driver->target;
|
||||
}
|
||||
|
||||
-/*
|
||||
- * rwsem to guarantee that cpufreq driver module doesn't unload during critical
|
||||
- * sections
|
||||
- */
|
||||
-static DECLARE_RWSEM(cpufreq_rwsem);
|
||||
-
|
||||
/* internal prototypes */
|
||||
static int __cpufreq_governor(struct cpufreq_policy *policy,
|
||||
unsigned int event);
|
||||
@@ -215,9 +209,6 @@ struct cpufreq_policy *cpufreq_cpu_get(u
|
||||
if (cpu >= nr_cpu_ids)
|
||||
return NULL;
|
||||
|
||||
- if (!down_read_trylock(&cpufreq_rwsem))
|
||||
- return NULL;
|
||||
-
|
||||
/* get the cpufreq driver */
|
||||
read_lock_irqsave(&cpufreq_driver_lock, flags);
|
||||
|
||||
@@ -230,9 +221,6 @@ struct cpufreq_policy *cpufreq_cpu_get(u
|
||||
|
||||
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
|
||||
|
||||
- if (!policy)
|
||||
- up_read(&cpufreq_rwsem);
|
||||
-
|
||||
return policy;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
|
||||
@@ -240,7 +228,6 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
|
||||
void cpufreq_cpu_put(struct cpufreq_policy *policy)
|
||||
{
|
||||
kobject_put(&policy->kobj);
|
||||
- up_read(&cpufreq_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
|
||||
|
||||
@@ -765,9 +752,6 @@ static ssize_t show(struct kobject *kobj
|
||||
struct freq_attr *fattr = to_attr(attr);
|
||||
ssize_t ret;
|
||||
|
||||
- if (!down_read_trylock(&cpufreq_rwsem))
|
||||
- return -EINVAL;
|
||||
-
|
||||
down_read(&policy->rwsem);
|
||||
|
||||
if (fattr->show)
|
||||
@@ -776,7 +760,6 @@ static ssize_t show(struct kobject *kobj
|
||||
ret = -EIO;
|
||||
|
||||
up_read(&policy->rwsem);
|
||||
- up_read(&cpufreq_rwsem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -793,9 +776,6 @@ static ssize_t store(struct kobject *kob
|
||||
if (!cpu_online(policy->cpu))
|
||||
goto unlock;
|
||||
|
||||
- if (!down_read_trylock(&cpufreq_rwsem))
|
||||
- goto unlock;
|
||||
-
|
||||
down_write(&policy->rwsem);
|
||||
|
||||
if (fattr->store)
|
||||
@@ -804,8 +784,6 @@ static ssize_t store(struct kobject *kob
|
||||
ret = -EIO;
|
||||
|
||||
up_write(&policy->rwsem);
|
||||
-
|
||||
- up_read(&cpufreq_rwsem);
|
||||
unlock:
|
||||
put_online_cpus();
|
||||
|
||||
@@ -1117,16 +1095,12 @@ static int __cpufreq_add_dev(struct devi
|
||||
if (unlikely(policy))
|
||||
return 0;
|
||||
|
||||
- if (!down_read_trylock(&cpufreq_rwsem))
|
||||
- return 0;
|
||||
-
|
||||
/* Check if this cpu was hot-unplugged earlier and has siblings */
|
||||
read_lock_irqsave(&cpufreq_driver_lock, flags);
|
||||
for_each_policy(policy) {
|
||||
if (cpumask_test_cpu(cpu, policy->related_cpus)) {
|
||||
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
|
||||
ret = cpufreq_add_policy_cpu(policy, cpu, dev);
|
||||
- up_read(&cpufreq_rwsem);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@@ -1269,8 +1243,6 @@ static int __cpufreq_add_dev(struct devi
|
||||
|
||||
kobject_uevent(&policy->kobj, KOBJ_ADD);
|
||||
|
||||
- up_read(&cpufreq_rwsem);
|
||||
-
|
||||
/* Callback for handling stuff after policy is ready */
|
||||
if (cpufreq_driver->ready)
|
||||
cpufreq_driver->ready(policy);
|
||||
@@ -1304,8 +1276,6 @@ static int __cpufreq_add_dev(struct devi
|
||||
cpufreq_policy_free(policy);
|
||||
|
||||
nomem_out:
|
||||
- up_read(&cpufreq_rwsem);
|
||||
-
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2499,19 +2469,20 @@ int cpufreq_unregister_driver(struct cpu
|
||||
|
||||
pr_debug("unregistering driver %s\n", driver->name);
|
||||
|
||||
+ /* Protect against concurrent cpu hotplug */
|
||||
+ get_online_cpus();
|
||||
subsys_interface_unregister(&cpufreq_interface);
|
||||
if (cpufreq_boost_supported())
|
||||
cpufreq_sysfs_remove_file(&boost.attr);
|
||||
|
||||
unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
|
||||
|
||||
- down_write(&cpufreq_rwsem);
|
||||
write_lock_irqsave(&cpufreq_driver_lock, flags);
|
||||
|
||||
cpufreq_driver = NULL;
|
||||
|
||||
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
|
||||
- up_write(&cpufreq_rwsem);
|
||||
+ put_online_cpus();
|
||||
|
||||
return 0;
|
||||
}
|
33
debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch
vendored
Normal file
33
debian/patches/features/all/rt/cpufreq-drop-K8-s-driver-from-beeing-selected.patch
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 9 Apr 2015 15:23:01 +0200
|
||||
Subject: cpufreq: drop K8's driver from beeing selected
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Ralf posted a picture of a backtrace from
|
||||
|
||||
| powernowk8_target_fn() -> transition_frequency_fidvid() and then at the
|
||||
| end:
|
||||
| 932 policy = cpufreq_cpu_get(smp_processor_id());
|
||||
| 933 cpufreq_cpu_put(policy);
|
||||
|
||||
crashing the system on -RT. I assumed that policy was a NULL pointer but
|
||||
was rulled out. Since Ralf can't do any more investigations on this and
|
||||
I have no machine with this, I simply switch it off.
|
||||
|
||||
Reported-by: Ralf Mardorf <ralf.mardorf@alice-dsl.net>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/cpufreq/Kconfig.x86 | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/cpufreq/Kconfig.x86
|
||||
+++ b/drivers/cpufreq/Kconfig.x86
|
||||
@@ -123,7 +123,7 @@ config X86_POWERNOW_K7_ACPI
|
||||
|
||||
config X86_POWERNOW_K8
|
||||
tristate "AMD Opteron/Athlon64 PowerNow!"
|
||||
- depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ
|
||||
+ depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE
|
||||
help
|
||||
This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors.
|
||||
Support for K10 and newer processors is now in acpi-cpufreq.
|
|
@ -0,0 +1,35 @@
|
|||
Subject: cpumask: Disable CONFIG_CPUMASK_OFFSTACK for RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 14 Dec 2011 01:03:49 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
We can't deal with the cpumask allocations which happen in atomic
|
||||
context (see arch/x86/kernel/apic/io_apic.c) on RT right now.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
arch/x86/Kconfig | 2 +-
|
||||
lib/Kconfig | 1 +
|
||||
2 files changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -841,7 +841,7 @@ config IOMMU_HELPER
|
||||
config MAXSMP
|
||||
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
|
||||
depends on X86_64 && SMP && DEBUG_KERNEL
|
||||
- select CPUMASK_OFFSTACK
|
||||
+ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
|
||||
---help---
|
||||
Enable maximum number of CPUS and NUMA Nodes for this architecture.
|
||||
If unsure, say N.
|
||||
--- a/lib/Kconfig
|
||||
+++ b/lib/Kconfig
|
||||
@@ -391,6 +391,7 @@ config CHECK_SIGNATURE
|
||||
|
||||
config CPUMASK_OFFSTACK
|
||||
bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
|
||||
+ depends on !PREEMPT_RT_FULL
|
||||
help
|
||||
Use dynamic allocation for cpumask_var_t, instead of putting
|
||||
them on the stack. This is a bit more expensive, but avoids
|
242
debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch
vendored
Normal file
242
debian/patches/features/all/rt/crypto-Reduce-preempt-disabled-regions-more-algos.patch
vendored
Normal file
|
@ -0,0 +1,242 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Fri, 21 Feb 2014 17:24:04 +0100
|
||||
Subject: crypto: Reduce preempt disabled regions, more algos
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Don Estabrook reported
|
||||
| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
|
||||
| kernel: WARNING: CPU: 2 PID: 858 at kernel/sched/core.c:2462 migrate_enable+0x17b/0x200()
|
||||
| kernel: WARNING: CPU: 3 PID: 865 at kernel/sched/core.c:2428 migrate_disable+0xed/0x100()
|
||||
|
||||
and his backtrace showed some crypto functions which looked fine.
|
||||
|
||||
The problem is the following sequence:
|
||||
|
||||
glue_xts_crypt_128bit()
|
||||
{
|
||||
blkcipher_walk_virt(); /* normal migrate_disable() */
|
||||
|
||||
glue_fpu_begin(); /* get atomic */
|
||||
|
||||
while (nbytes) {
|
||||
__glue_xts_crypt_128bit();
|
||||
blkcipher_walk_done(); /* with nbytes = 0, migrate_enable()
|
||||
* while we are atomic */
|
||||
};
|
||||
glue_fpu_end() /* no longer atomic */
|
||||
}
|
||||
|
||||
and this is why the counter get out of sync and the warning is printed.
|
||||
The other problem is that we are non-preemptible between
|
||||
glue_fpu_begin() and glue_fpu_end() and the latency grows. To fix this,
|
||||
I shorten the FPU off region and ensure blkcipher_walk_done() is called
|
||||
with preemption enabled. This might hurt the performance because we now
|
||||
enable/disable the FPU state more often but we gain lower latency and
|
||||
the bug is gone.
|
||||
|
||||
|
||||
Reported-by: Don Estabrook <don.estabrook@gmail.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
arch/x86/crypto/cast5_avx_glue.c | 21 +++++++++------------
|
||||
arch/x86/crypto/glue_helper.c | 31 +++++++++++++++----------------
|
||||
2 files changed, 24 insertions(+), 28 deletions(-)
|
||||
|
||||
--- a/arch/x86/crypto/cast5_avx_glue.c
|
||||
+++ b/arch/x86/crypto/cast5_avx_glue.c
|
||||
@@ -60,7 +60,7 @@ static inline void cast5_fpu_end(bool fp
|
||||
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
|
||||
bool enc)
|
||||
{
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = CAST5_BLOCK_SIZE;
|
||||
unsigned int nbytes;
|
||||
@@ -76,7 +76,7 @@ static int ecb_crypt(struct blkcipher_de
|
||||
u8 *wsrc = walk->src.virt.addr;
|
||||
u8 *wdst = walk->dst.virt.addr;
|
||||
|
||||
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
|
||||
+ fpu_enabled = cast5_fpu_begin(false, nbytes);
|
||||
|
||||
/* Process multi-block batch */
|
||||
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
|
||||
@@ -104,10 +104,9 @@ static int ecb_crypt(struct blkcipher_de
|
||||
} while (nbytes >= bsize);
|
||||
|
||||
done:
|
||||
+ cast5_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, walk, nbytes);
|
||||
}
|
||||
-
|
||||
- cast5_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -228,7 +227,7 @@ static unsigned int __cbc_decrypt(struct
|
||||
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -237,12 +236,11 @@ static int cbc_decrypt(struct blkcipher_
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
|
||||
+ fpu_enabled = cast5_fpu_begin(false, nbytes);
|
||||
nbytes = __cbc_decrypt(desc, &walk);
|
||||
+ cast5_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
-
|
||||
- cast5_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -312,7 +310,7 @@ static unsigned int __ctr_crypt(struct b
|
||||
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -321,13 +319,12 @@ static int ctr_crypt(struct blkcipher_de
|
||||
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
|
||||
|
||||
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
|
||||
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
|
||||
+ fpu_enabled = cast5_fpu_begin(false, nbytes);
|
||||
nbytes = __ctr_crypt(desc, &walk);
|
||||
+ cast5_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
- cast5_fpu_end(fpu_enabled);
|
||||
-
|
||||
if (walk.nbytes) {
|
||||
ctr_crypt_final(desc, &walk);
|
||||
err = blkcipher_walk_done(desc, &walk, 0);
|
||||
--- a/arch/x86/crypto/glue_helper.c
|
||||
+++ b/arch/x86/crypto/glue_helper.c
|
||||
@@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const
|
||||
void *ctx = crypto_blkcipher_ctx(desc->tfm);
|
||||
const unsigned int bsize = 128 / 8;
|
||||
unsigned int nbytes, i, func_bytes;
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
int err;
|
||||
|
||||
err = blkcipher_walk_virt(desc, walk);
|
||||
@@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const
|
||||
u8 *wdst = walk->dst.virt.addr;
|
||||
|
||||
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
- desc, fpu_enabled, nbytes);
|
||||
+ desc, false, nbytes);
|
||||
|
||||
for (i = 0; i < gctx->num_funcs; i++) {
|
||||
func_bytes = bsize * gctx->funcs[i].num_blocks;
|
||||
@@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const
|
||||
}
|
||||
|
||||
done:
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, walk, nbytes);
|
||||
}
|
||||
|
||||
- glue_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = 128 / 8;
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct
|
||||
|
||||
while ((nbytes = walk.nbytes)) {
|
||||
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
- desc, fpu_enabled, nbytes);
|
||||
+ desc, false, nbytes);
|
||||
nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
- glue_fpu_end(fpu_enabled);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
|
||||
@@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct c
|
||||
struct scatterlist *src, unsigned int nbytes)
|
||||
{
|
||||
const unsigned int bsize = 128 / 8;
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct c
|
||||
|
||||
while ((nbytes = walk.nbytes) >= bsize) {
|
||||
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
- desc, fpu_enabled, nbytes);
|
||||
+ desc, false, nbytes);
|
||||
nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
}
|
||||
|
||||
- glue_fpu_end(fpu_enabled);
|
||||
-
|
||||
if (walk.nbytes) {
|
||||
glue_ctr_crypt_final_128bit(
|
||||
gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
|
||||
@@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct c
|
||||
void *tweak_ctx, void *crypt_ctx)
|
||||
{
|
||||
const unsigned int bsize = 128 / 8;
|
||||
- bool fpu_enabled = false;
|
||||
+ bool fpu_enabled;
|
||||
struct blkcipher_walk walk;
|
||||
int err;
|
||||
|
||||
@@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct c
|
||||
|
||||
/* set minimum length to bsize, for tweak_fn */
|
||||
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
- desc, fpu_enabled,
|
||||
+ desc, false,
|
||||
nbytes < bsize ? bsize : nbytes);
|
||||
-
|
||||
/* calculate first value of T */
|
||||
tweak_fn(tweak_ctx, walk.iv, walk.iv);
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
|
||||
while (nbytes) {
|
||||
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
|
||||
+ desc, false, nbytes);
|
||||
nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
|
||||
|
||||
+ glue_fpu_end(fpu_enabled);
|
||||
err = blkcipher_walk_done(desc, &walk, nbytes);
|
||||
nbytes = walk.nbytes;
|
||||
}
|
||||
-
|
||||
- glue_fpu_end(fpu_enabled);
|
||||
-
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
|
|
@ -0,0 +1,26 @@
|
|||
Subject: debugobjects: Make RT aware
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 21:41:35 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Avoid filling the pool / allocating memory with irqs off().
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
lib/debugobjects.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/lib/debugobjects.c
|
||||
+++ b/lib/debugobjects.c
|
||||
@@ -309,7 +309,10 @@ static void
|
||||
struct debug_obj *obj;
|
||||
unsigned long flags;
|
||||
|
||||
- fill_pool();
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (preempt_count() == 0 && !irqs_disabled())
|
||||
+#endif
|
||||
+ fill_pool();
|
||||
|
||||
db = get_bucket((unsigned long) addr);
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
Subject: dm: Make rt aware
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Mon, 14 Nov 2011 23:06:09 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Use the BUG_ON_NORT variant for the irq_disabled() checks. RT has
|
||||
interrupts legitimately enabled here as we cant deadlock against the
|
||||
irq thread due to the "sleeping spinlocks" conversion.
|
||||
|
||||
Reported-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/md/dm.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/md/dm.c
|
||||
+++ b/drivers/md/dm.c
|
||||
@@ -2143,7 +2143,7 @@ static void dm_request_fn(struct request
|
||||
/* Establish tio->ti before queuing work (map_tio_request) */
|
||||
tio->ti = ti;
|
||||
queue_kthread_work(&md->kworker, &tio->work);
|
||||
- BUG_ON(!irqs_disabled());
|
||||
+ BUG_ON_NONRT(!irqs_disabled());
|
||||
}
|
||||
|
||||
goto out;
|
|
@ -0,0 +1,26 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:24 -0500
|
||||
Subject: drivers/net: Use disable_irq_nosync() in 8139too
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Use disable_irq_nosync() instead of disable_irq() as this might be
|
||||
called in atomic context with netpoll.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/net/ethernet/realtek/8139too.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/net/ethernet/realtek/8139too.c
|
||||
+++ b/drivers/net/ethernet/realtek/8139too.c
|
||||
@@ -2229,7 +2229,7 @@ static void rtl8139_poll_controller(stru
|
||||
struct rtl8139_private *tp = netdev_priv(dev);
|
||||
const int irq = tp->pci_dev->irq;
|
||||
|
||||
- disable_irq(irq);
|
||||
+ disable_irq_nosync(irq);
|
||||
rtl8139_interrupt(irq, dev);
|
||||
enable_irq(irq);
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sat, 20 Jun 2009 11:36:54 +0200
|
||||
Subject: drivers/net: fix livelock issues
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Preempt-RT runs into a live lock issue with the NETDEV_TX_LOCKED micro
|
||||
optimization. The reason is that the softirq thread is rescheduling
|
||||
itself on that return value. Depending on priorities it starts to
|
||||
monoplize the CPU and livelock on UP systems.
|
||||
|
||||
Remove it.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 6 +-----
|
||||
drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 3 +--
|
||||
drivers/net/ethernet/chelsio/cxgb/sge.c | 3 +--
|
||||
drivers/net/ethernet/neterion/s2io.c | 7 +------
|
||||
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 6 ++----
|
||||
drivers/net/ethernet/tehuti/tehuti.c | 9 ++-------
|
||||
drivers/net/rionet.c | 6 +-----
|
||||
7 files changed, 9 insertions(+), 31 deletions(-)
|
||||
|
||||
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
|
||||
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
|
||||
@@ -2213,11 +2213,7 @@ static netdev_tx_t atl1c_xmit_frame(stru
|
||||
}
|
||||
|
||||
tpd_req = atl1c_cal_tpd_req(skb);
|
||||
- if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
|
||||
- if (netif_msg_pktdata(adapter))
|
||||
- dev_info(&adapter->pdev->dev, "tx locked\n");
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+ spin_lock_irqsave(&adapter->tx_lock, flags);
|
||||
|
||||
if (atl1c_tpd_avail(adapter, type) < tpd_req) {
|
||||
/* no enough descriptor, just stop queue */
|
||||
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
|
||||
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
|
||||
@@ -1880,8 +1880,7 @@ static netdev_tx_t atl1e_xmit_frame(stru
|
||||
return NETDEV_TX_OK;
|
||||
}
|
||||
tpd_req = atl1e_cal_tdp_req(skb);
|
||||
- if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
|
||||
- return NETDEV_TX_LOCKED;
|
||||
+ spin_lock_irqsave(&adapter->tx_lock, flags);
|
||||
|
||||
if (atl1e_tpd_avail(adapter) < tpd_req) {
|
||||
/* no enough descriptor, just stop queue */
|
||||
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
|
||||
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
|
||||
@@ -1664,8 +1664,7 @@ static int t1_sge_tx(struct sk_buff *skb
|
||||
struct cmdQ *q = &sge->cmdQ[qid];
|
||||
unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
|
||||
|
||||
- if (!spin_trylock(&q->lock))
|
||||
- return NETDEV_TX_LOCKED;
|
||||
+ spin_lock(&q->lock);
|
||||
|
||||
reclaim_completed_tx(sge, q);
|
||||
|
||||
--- a/drivers/net/ethernet/neterion/s2io.c
|
||||
+++ b/drivers/net/ethernet/neterion/s2io.c
|
||||
@@ -4084,12 +4084,7 @@ static netdev_tx_t s2io_xmit(struct sk_b
|
||||
[skb->priority & (MAX_TX_FIFOS - 1)];
|
||||
fifo = &mac_control->fifos[queue];
|
||||
|
||||
- if (do_spin_lock)
|
||||
- spin_lock_irqsave(&fifo->tx_lock, flags);
|
||||
- else {
|
||||
- if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+ spin_lock_irqsave(&fifo->tx_lock, flags);
|
||||
|
||||
if (sp->config.multiq) {
|
||||
if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
|
||||
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
|
||||
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
|
||||
@@ -2137,10 +2137,8 @@ static int pch_gbe_xmit_frame(struct sk_
|
||||
struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring;
|
||||
unsigned long flags;
|
||||
|
||||
- if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
|
||||
- /* Collision - tell upper layer to requeue */
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+ spin_lock_irqsave(&tx_ring->tx_lock, flags);
|
||||
+
|
||||
if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
|
||||
netif_stop_queue(netdev);
|
||||
spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
|
||||
--- a/drivers/net/ethernet/tehuti/tehuti.c
|
||||
+++ b/drivers/net/ethernet/tehuti/tehuti.c
|
||||
@@ -1629,13 +1629,8 @@ static netdev_tx_t bdx_tx_transmit(struc
|
||||
unsigned long flags;
|
||||
|
||||
ENTER;
|
||||
- local_irq_save(flags);
|
||||
- if (!spin_trylock(&priv->tx_lock)) {
|
||||
- local_irq_restore(flags);
|
||||
- DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
|
||||
- BDX_DRV_NAME, ndev->name);
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+
|
||||
+ spin_lock_irqsave(&priv->tx_lock, flags);
|
||||
|
||||
/* build tx descriptor */
|
||||
BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
|
||||
--- a/drivers/net/rionet.c
|
||||
+++ b/drivers/net/rionet.c
|
||||
@@ -174,11 +174,7 @@ static int rionet_start_xmit(struct sk_b
|
||||
unsigned long flags;
|
||||
int add_num = 1;
|
||||
|
||||
- local_irq_save(flags);
|
||||
- if (!spin_trylock(&rnet->tx_lock)) {
|
||||
- local_irq_restore(flags);
|
||||
- return NETDEV_TX_LOCKED;
|
||||
- }
|
||||
+ spin_lock_irqsave(&rnet->tx_lock, flags);
|
||||
|
||||
if (is_multicast_ether_addr(eth->h_dest))
|
||||
add_num = nets[rnet->mport->id].nact;
|
|
@ -0,0 +1,49 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Fri, 3 Jul 2009 08:30:00 -0500
|
||||
Subject: drivers/net: vortex fix locking issues
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Argh, cut and paste wasn't enough...
|
||||
|
||||
Use this patch instead. It needs an irq disable. But, believe it or not,
|
||||
on SMP this is actually better. If the irq is shared (as it is in Mark's
|
||||
case), we don't stop the irq of other devices from being handled on
|
||||
another CPU (unfortunately for Mark, he pinned all interrupts to one CPU).
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
drivers/net/ethernet/3com/3c59x.c | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
|
||||
--- a/drivers/net/ethernet/3com/3c59x.c
|
||||
+++ b/drivers/net/ethernet/3com/3c59x.c
|
||||
@@ -842,9 +842,9 @@ static void poll_vortex(struct net_devic
|
||||
{
|
||||
struct vortex_private *vp = netdev_priv(dev);
|
||||
unsigned long flags;
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
(vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1916,12 +1916,12 @@ static void vortex_tx_timeout(struct net
|
||||
* Block interrupts because vortex_interrupt does a bare spin_lock()
|
||||
*/
|
||||
unsigned long flags;
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
if (vp->full_bus_master_tx)
|
||||
boomerang_interrupt(dev->irq, dev);
|
||||
else
|
||||
vortex_interrupt(dev->irq, dev);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
}
|
||||
}
|
||||
|
33
debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch
vendored
Normal file
33
debian/patches/features/all/rt/drivers-random-reduce-preempt-disabled-region.patch
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:30 -0500
|
||||
Subject: drivers: random: Reduce preempt disabled region
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
No need to keep preemption disabled across the whole function.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
drivers/char/random.c | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
--- a/drivers/char/random.c
|
||||
+++ b/drivers/char/random.c
|
||||
@@ -776,8 +776,6 @@ static void add_timer_randomness(struct
|
||||
} sample;
|
||||
long delta, delta2, delta3;
|
||||
|
||||
- preempt_disable();
|
||||
-
|
||||
sample.jiffies = jiffies;
|
||||
sample.cycles = random_get_entropy();
|
||||
sample.num = num;
|
||||
@@ -818,7 +816,6 @@ static void add_timer_randomness(struct
|
||||
*/
|
||||
credit_entropy_bits(r, min_t(int, fls(delta>>1), 11));
|
||||
}
|
||||
- preempt_enable();
|
||||
}
|
||||
|
||||
void add_input_randomness(unsigned int type, unsigned int code,
|
|
@ -0,0 +1,43 @@
|
|||
Subject: tty/serial/omap: Make the locking RT aware
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Thu, 28 Jul 2011 13:32:57 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The lock is a sleeping lock and local_irq_save() is not the
|
||||
optimsation we are looking for. Redo it to make it work on -RT and
|
||||
non-RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/omap-serial.c | 12 ++++--------
|
||||
1 file changed, 4 insertions(+), 8 deletions(-)
|
||||
|
||||
--- a/drivers/tty/serial/omap-serial.c
|
||||
+++ b/drivers/tty/serial/omap-serial.c
|
||||
@@ -1282,13 +1282,10 @@ serial_omap_console_write(struct console
|
||||
|
||||
pm_runtime_get_sync(up->dev);
|
||||
|
||||
- local_irq_save(flags);
|
||||
- if (up->port.sysrq)
|
||||
- locked = 0;
|
||||
- else if (oops_in_progress)
|
||||
- locked = spin_trylock(&up->port.lock);
|
||||
+ if (up->port.sysrq || oops_in_progress)
|
||||
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
|
||||
else
|
||||
- spin_lock(&up->port.lock);
|
||||
+ spin_lock_irqsave(&up->port.lock, flags);
|
||||
|
||||
/*
|
||||
* First save the IER then disable the interrupts
|
||||
@@ -1317,8 +1314,7 @@ serial_omap_console_write(struct console
|
||||
pm_runtime_mark_last_busy(up->dev);
|
||||
pm_runtime_put_autosuspend(up->dev);
|
||||
if (locked)
|
||||
- spin_unlock(&up->port.lock);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&up->port.lock, flags);
|
||||
}
|
||||
|
||||
static int __init
|
|
@ -0,0 +1,48 @@
|
|||
Subject: tty/serial/pl011: Make the locking work on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 08 Jan 2013 21:36:51 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The lock is a sleeping lock and local_irq_save() is not the optimsation
|
||||
we are looking for. Redo it to make it work on -RT and non-RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
drivers/tty/serial/amba-pl011.c | 15 ++++++++++-----
|
||||
1 file changed, 10 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/drivers/tty/serial/amba-pl011.c
|
||||
+++ b/drivers/tty/serial/amba-pl011.c
|
||||
@@ -2000,13 +2000,19 @@ pl011_console_write(struct console *co,
|
||||
|
||||
clk_enable(uap->clk);
|
||||
|
||||
- local_irq_save(flags);
|
||||
+ /*
|
||||
+ * local_irq_save(flags);
|
||||
+ *
|
||||
+ * This local_irq_save() is nonsense. If we come in via sysrq
|
||||
+ * handling then interrupts are already disabled. Aside of
|
||||
+ * that the port.sysrq check is racy on SMP regardless.
|
||||
+ */
|
||||
if (uap->port.sysrq)
|
||||
locked = 0;
|
||||
else if (oops_in_progress)
|
||||
- locked = spin_trylock(&uap->port.lock);
|
||||
+ locked = spin_trylock_irqsave(&uap->port.lock, flags);
|
||||
else
|
||||
- spin_lock(&uap->port.lock);
|
||||
+ spin_lock_irqsave(&uap->port.lock, flags);
|
||||
|
||||
/*
|
||||
* First save the CR then disable the interrupts
|
||||
@@ -2028,8 +2034,7 @@ pl011_console_write(struct console *co,
|
||||
writew(old_cr, uap->port.membase + UART011_CR);
|
||||
|
||||
if (locked)
|
||||
- spin_unlock(&uap->port.lock);
|
||||
- local_irq_restore(flags);
|
||||
+ spin_unlock_irqrestore(&uap->port.lock, flags);
|
||||
|
||||
clk_disable(uap->clk);
|
||||
}
|
59
debian/patches/features/all/rt/drm-i915-drop-trace_i915_gem_ring_dispatch-onrt.patch
vendored
Normal file
59
debian/patches/features/all/rt/drm-i915-drop-trace_i915_gem_ring_dispatch-onrt.patch
vendored
Normal file
|
@ -0,0 +1,59 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 25 Apr 2013 18:12:52 +0200
|
||||
Subject: drm/i915: drop trace_i915_gem_ring_dispatch on rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This tracepoint is responsible for:
|
||||
|
||||
|[<814cc358>] __schedule_bug+0x4d/0x59
|
||||
|[<814d24cc>] __schedule+0x88c/0x930
|
||||
|[<814d3b90>] ? _raw_spin_unlock_irqrestore+0x40/0x50
|
||||
|[<814d3b95>] ? _raw_spin_unlock_irqrestore+0x45/0x50
|
||||
|[<810b57b5>] ? task_blocks_on_rt_mutex+0x1f5/0x250
|
||||
|[<814d27d9>] schedule+0x29/0x70
|
||||
|[<814d3423>] rt_spin_lock_slowlock+0x15b/0x278
|
||||
|[<814d3786>] rt_spin_lock+0x26/0x30
|
||||
|[<a00dced9>] gen6_gt_force_wake_get+0x29/0x60 [i915]
|
||||
|[<a00e183f>] gen6_ring_get_irq+0x5f/0x100 [i915]
|
||||
|[<a00b2a33>] ftrace_raw_event_i915_gem_ring_dispatch+0xe3/0x100 [i915]
|
||||
|[<a00ac1b3>] i915_gem_do_execbuffer.isra.13+0xbd3/0x1430 [i915]
|
||||
|[<810f8943>] ? trace_buffer_unlock_commit+0x43/0x60
|
||||
|[<8113e8d2>] ? ftrace_raw_event_kmem_alloc+0xd2/0x180
|
||||
|[<8101d063>] ? native_sched_clock+0x13/0x80
|
||||
|[<a00acf29>] i915_gem_execbuffer2+0x99/0x280 [i915]
|
||||
|[<a00114a3>] drm_ioctl+0x4c3/0x570 [drm]
|
||||
|[<8101d0d9>] ? sched_clock+0x9/0x10
|
||||
|[<a00ace90>] ? i915_gem_execbuffer+0x480/0x480 [i915]
|
||||
|[<810f1c18>] ? rb_commit+0x68/0xa0
|
||||
|[<810f1c6c>] ? ring_buffer_unlock_commit+0x1c/0xa0
|
||||
|[<81197467>] do_vfs_ioctl+0x97/0x540
|
||||
|[<81021318>] ? ftrace_raw_event_sys_enter+0xd8/0x130
|
||||
|[<811979a1>] sys_ioctl+0x91/0xb0
|
||||
|[<814db931>] tracesys+0xe1/0xe6
|
||||
|
||||
Chris Wilson does not like to move i915_trace_irq_get() out of the macro
|
||||
|
||||
|No. This enables the IRQ, as well as making a number of
|
||||
|very expensively serialised read, unconditionally.
|
||||
|
||||
so it is gone now on RT.
|
||||
|
||||
|
||||
Reported-by: Joakim Hernberg <jbh@alchemy.lu>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
|
||||
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
|
||||
@@ -1339,7 +1339,9 @@ i915_gem_ringbuffer_submission(struct dr
|
||||
return ret;
|
||||
}
|
||||
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
trace_i915_gem_ring_dispatch(intel_ring_get_request(ring), dispatch_flags);
|
||||
+#endif
|
||||
|
||||
i915_gem_execbuffer_move_to_active(vmas, ring);
|
||||
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
|
|
@ -0,0 +1,31 @@
|
|||
Subject: fs/epoll: Do not disable preemption on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 08 Jul 2011 16:35:35 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
ep_call_nested() takes a sleeping lock so we can't disable preemption.
|
||||
The light version is enough since ep_call_nested() doesn't mind beeing
|
||||
invoked twice on the same CPU.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
fs/eventpoll.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/fs/eventpoll.c
|
||||
+++ b/fs/eventpoll.c
|
||||
@@ -505,12 +505,12 @@ static int ep_poll_wakeup_proc(void *pri
|
||||
*/
|
||||
static void ep_poll_safewake(wait_queue_head_t *wq)
|
||||
{
|
||||
- int this_cpu = get_cpu();
|
||||
+ int this_cpu = get_cpu_light();
|
||||
|
||||
ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
|
||||
ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
|
||||
|
||||
- put_cpu();
|
||||
+ put_cpu_light();
|
||||
}
|
||||
|
||||
static void ep_remove_wait_queue(struct eppoll_entry *pwq)
|
|
@ -0,0 +1,102 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: x86: Do not disable preemption in int3 on 32bit
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Preemption must be disabled before enabling interrupts in do_trap
|
||||
on x86_64 because the stack in use for int3 and debug is a per CPU
|
||||
stack set by th IST. But 32bit does not have an IST and the stack
|
||||
still belongs to the current task and there is no problem in scheduling
|
||||
out the task.
|
||||
|
||||
Keep preemption enabled on X86_32 when enabling interrupts for
|
||||
do_trap().
|
||||
|
||||
The name of the function is changed from preempt_conditional_sti/cli()
|
||||
to conditional_sti/cli_ist(), to annotate that this function is used
|
||||
when the stack is on the IST.
|
||||
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
arch/x86/kernel/traps.c | 28 +++++++++++++++++++++-------
|
||||
1 file changed, 21 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/arch/x86/kernel/traps.c
|
||||
+++ b/arch/x86/kernel/traps.c
|
||||
@@ -88,9 +88,21 @@ static inline void conditional_sti(struc
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
-static inline void preempt_conditional_sti(struct pt_regs *regs)
|
||||
+static inline void conditional_sti_ist(struct pt_regs *regs)
|
||||
{
|
||||
+#ifdef CONFIG_X86_64
|
||||
+ /*
|
||||
+ * X86_64 uses a per CPU stack on the IST for certain traps
|
||||
+ * like int3. The task can not be preempted when using one
|
||||
+ * of these stacks, thus preemption must be disabled, otherwise
|
||||
+ * the stack can be corrupted if the task is scheduled out,
|
||||
+ * and another task comes in and uses this stack.
|
||||
+ *
|
||||
+ * On x86_32 the task keeps its own stack and it is OK if the
|
||||
+ * task schedules out.
|
||||
+ */
|
||||
preempt_count_inc();
|
||||
+#endif
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_enable();
|
||||
}
|
||||
@@ -101,11 +113,13 @@ static inline void conditional_cli(struc
|
||||
local_irq_disable();
|
||||
}
|
||||
|
||||
-static inline void preempt_conditional_cli(struct pt_regs *regs)
|
||||
+static inline void conditional_cli_ist(struct pt_regs *regs)
|
||||
{
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
local_irq_disable();
|
||||
+#ifdef CONFIG_X86_64
|
||||
preempt_count_dec();
|
||||
+#endif
|
||||
}
|
||||
|
||||
enum ctx_state ist_enter(struct pt_regs *regs)
|
||||
@@ -536,9 +550,9 @@ dotraplinkage void notrace do_int3(struc
|
||||
* as we may switch to the interrupt stack.
|
||||
*/
|
||||
debug_stack_usage_inc();
|
||||
- preempt_conditional_sti(regs);
|
||||
+ conditional_sti_ist(regs);
|
||||
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
|
||||
- preempt_conditional_cli(regs);
|
||||
+ conditional_cli_ist(regs);
|
||||
debug_stack_usage_dec();
|
||||
exit:
|
||||
ist_exit(regs, prev_state);
|
||||
@@ -668,12 +682,12 @@ dotraplinkage void do_debug(struct pt_re
|
||||
debug_stack_usage_inc();
|
||||
|
||||
/* It's safe to allow irq's after DR6 has been saved */
|
||||
- preempt_conditional_sti(regs);
|
||||
+ conditional_sti_ist(regs);
|
||||
|
||||
if (v8086_mode(regs)) {
|
||||
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
|
||||
X86_TRAP_DB);
|
||||
- preempt_conditional_cli(regs);
|
||||
+ conditional_cli_ist(regs);
|
||||
debug_stack_usage_dec();
|
||||
goto exit;
|
||||
}
|
||||
@@ -693,7 +707,7 @@ dotraplinkage void do_debug(struct pt_re
|
||||
si_code = get_si_code(tsk->thread.debugreg6);
|
||||
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
|
||||
send_sigtrap(tsk, regs, error_code, si_code);
|
||||
- preempt_conditional_cli(regs);
|
||||
+ conditional_cli_ist(regs);
|
||||
debug_stack_usage_dec();
|
||||
|
||||
exit:
|
|
@ -0,0 +1,107 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 16 Feb 2015 18:49:10 +0100
|
||||
Subject: fs/aio: simple simple work
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
|BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768
|
||||
|in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2
|
||||
|2 locks held by rcuos/2/26:
|
||||
| #0: (rcu_callback){.+.+..}, at: [<ffffffff810b1a12>] rcu_nocb_kthread+0x1e2/0x380
|
||||
| #1: (rcu_read_lock_sched){.+.+..}, at: [<ffffffff812acd26>] percpu_ref_kill_rcu+0xa6/0x1c0
|
||||
|Preemption disabled at:[<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
|
||||
|Call Trace:
|
||||
| [<ffffffff81582e9e>] dump_stack+0x4e/0x9c
|
||||
| [<ffffffff81077aeb>] __might_sleep+0xfb/0x170
|
||||
| [<ffffffff81589304>] rt_spin_lock+0x24/0x70
|
||||
| [<ffffffff811c5790>] free_ioctx_users+0x30/0x130
|
||||
| [<ffffffff812ace34>] percpu_ref_kill_rcu+0x1b4/0x1c0
|
||||
| [<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
|
||||
| [<ffffffff8106e046>] kthread+0xd6/0xf0
|
||||
| [<ffffffff81591eec>] ret_from_fork+0x7c/0xb0
|
||||
|
||||
replace this preempt_disable() friendly swork.
|
||||
|
||||
Reported-By: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Suggested-by: Benjamin LaHaise <bcrl@kvack.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/aio.c | 24 +++++++++++++++++-------
|
||||
1 file changed, 17 insertions(+), 7 deletions(-)
|
||||
|
||||
--- a/fs/aio.c
|
||||
+++ b/fs/aio.c
|
||||
@@ -40,6 +40,7 @@
|
||||
#include <linux/ramfs.h>
|
||||
#include <linux/percpu-refcount.h>
|
||||
#include <linux/mount.h>
|
||||
+#include <linux/work-simple.h>
|
||||
|
||||
#include <asm/kmap_types.h>
|
||||
#include <asm/uaccess.h>
|
||||
@@ -115,7 +116,7 @@ struct kioctx {
|
||||
struct page **ring_pages;
|
||||
long nr_pages;
|
||||
|
||||
- struct work_struct free_work;
|
||||
+ struct swork_event free_work;
|
||||
|
||||
/*
|
||||
* signals when all in-flight requests are done
|
||||
@@ -253,6 +254,7 @@ static int __init aio_setup(void)
|
||||
.mount = aio_mount,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
+ BUG_ON(swork_get());
|
||||
aio_mnt = kern_mount(&aio_fs);
|
||||
if (IS_ERR(aio_mnt))
|
||||
panic("Failed to create aio fs mount.");
|
||||
@@ -559,9 +561,9 @@ static int kiocb_cancel(struct aio_kiocb
|
||||
return cancel(&kiocb->common);
|
||||
}
|
||||
|
||||
-static void free_ioctx(struct work_struct *work)
|
||||
+static void free_ioctx(struct swork_event *sev)
|
||||
{
|
||||
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
|
||||
+ struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
|
||||
|
||||
pr_debug("freeing %p\n", ctx);
|
||||
|
||||
@@ -580,8 +582,8 @@ static void free_ioctx_reqs(struct percp
|
||||
if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count))
|
||||
complete(&ctx->rq_wait->comp);
|
||||
|
||||
- INIT_WORK(&ctx->free_work, free_ioctx);
|
||||
- schedule_work(&ctx->free_work);
|
||||
+ INIT_SWORK(&ctx->free_work, free_ioctx);
|
||||
+ swork_queue(&ctx->free_work);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -589,9 +591,9 @@ static void free_ioctx_reqs(struct percp
|
||||
* and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
|
||||
* now it's safe to cancel any that need to be.
|
||||
*/
|
||||
-static void free_ioctx_users(struct percpu_ref *ref)
|
||||
+static void free_ioctx_users_work(struct swork_event *sev)
|
||||
{
|
||||
- struct kioctx *ctx = container_of(ref, struct kioctx, users);
|
||||
+ struct kioctx *ctx = container_of(sev, struct kioctx, free_work);
|
||||
struct aio_kiocb *req;
|
||||
|
||||
spin_lock_irq(&ctx->ctx_lock);
|
||||
@@ -610,6 +612,14 @@ static void free_ioctx_users(struct perc
|
||||
percpu_ref_put(&ctx->reqs);
|
||||
}
|
||||
|
||||
+static void free_ioctx_users(struct percpu_ref *ref)
|
||||
+{
|
||||
+ struct kioctx *ctx = container_of(ref, struct kioctx, users);
|
||||
+
|
||||
+ INIT_SWORK(&ctx->free_work, free_ioctx_users_work);
|
||||
+ swork_queue(&ctx->free_work);
|
||||
+}
|
||||
+
|
||||
static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
|
||||
{
|
||||
unsigned i, new_nr;
|
|
@ -0,0 +1,23 @@
|
|||
Subject: block: Turn off warning which is bogus on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Tue, 14 Jun 2011 17:05:09 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
On -RT the context is always with IRQs enabled. Ignore this warning on -RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
block/blk-core.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/block/blk-core.c
|
||||
+++ b/block/blk-core.c
|
||||
@@ -194,7 +194,7 @@ EXPORT_SYMBOL(blk_delay_queue);
|
||||
**/
|
||||
void blk_start_queue(struct request_queue *q)
|
||||
{
|
||||
- WARN_ON(!irqs_disabled());
|
||||
+ WARN_ON_NONRT(!irqs_disabled());
|
||||
|
||||
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
|
||||
__blk_run_queue(q);
|
86
debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch
vendored
Normal file
86
debian/patches/features/all/rt/fs-dcache-use-cpu-chill-in-trylock-loops.patch
vendored
Normal file
|
@ -0,0 +1,86 @@
|
|||
Subject: fs: dcache: Use cpu_chill() in trylock loops
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 07 Mar 2012 21:00:34 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Retry loops on RT might loop forever when the modifying side was
|
||||
preempted. Use cpu_chill() instead of cpu_relax() to let the system
|
||||
make progress.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
fs/autofs4/autofs_i.h | 1 +
|
||||
fs/autofs4/expire.c | 2 +-
|
||||
fs/dcache.c | 5 +++--
|
||||
fs/namespace.c | 3 ++-
|
||||
4 files changed, 7 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/fs/autofs4/autofs_i.h
|
||||
+++ b/fs/autofs4/autofs_i.h
|
||||
@@ -34,6 +34,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/namei.h>
|
||||
+#include <linux/delay.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
--- a/fs/autofs4/expire.c
|
||||
+++ b/fs/autofs4/expire.c
|
||||
@@ -150,7 +150,7 @@ static struct dentry *get_next_positive_
|
||||
parent = p->d_parent;
|
||||
if (!spin_trylock(&parent->d_lock)) {
|
||||
spin_unlock(&p->d_lock);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
goto relock;
|
||||
}
|
||||
spin_unlock(&p->d_lock);
|
||||
--- a/fs/dcache.c
|
||||
+++ b/fs/dcache.c
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/fsnotify.h>
|
||||
+#include <linux/delay.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/hash.h>
|
||||
@@ -589,7 +590,7 @@ static struct dentry *dentry_kill(struct
|
||||
|
||||
failed:
|
||||
spin_unlock(&dentry->d_lock);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
return dentry; /* try again with same dentry */
|
||||
}
|
||||
|
||||
@@ -2392,7 +2393,7 @@ void d_delete(struct dentry * dentry)
|
||||
if (dentry->d_lockref.count == 1) {
|
||||
if (!spin_trylock(&inode->i_lock)) {
|
||||
spin_unlock(&dentry->d_lock);
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
goto again;
|
||||
}
|
||||
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
|
||||
--- a/fs/namespace.c
|
||||
+++ b/fs/namespace.c
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <linux/mnt_namespace.h>
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/namei.h>
|
||||
+#include <linux/delay.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/init.h> /* init_rootfs */
|
||||
@@ -355,7 +356,7 @@ int __mnt_want_write(struct vfsmount *m)
|
||||
smp_mb();
|
||||
while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
|
||||
preempt_enable();
|
||||
- cpu_relax();
|
||||
+ cpu_chill();
|
||||
preempt_disable();
|
||||
}
|
||||
/*
|
30
debian/patches/features/all/rt/fs-jbd-pull-plug-when-waiting-for-space.patch
vendored
Normal file
30
debian/patches/features/all/rt/fs-jbd-pull-plug-when-waiting-for-space.patch
vendored
Normal file
|
@ -0,0 +1,30 @@
|
|||
From: Mike Galbraith <mgalbraith@suse.de>
|
||||
Date: Wed, 11 Jul 2012 22:05:20 +0000
|
||||
Subject: fs, jbd: pull your plug when waiting for space
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
With an -rt kernel, and a heavy sync IO load, tasks can jam
|
||||
up on journal locks without unplugging, which can lead to
|
||||
terminal IO starvation. Unplug and schedule when waiting for space.
|
||||
|
||||
Signed-off-by: Mike Galbraith <mgalbraith@suse.de>
|
||||
Cc: Steven Rostedt <rostedt@goodmis.org>
|
||||
Cc: Theodore Tso <tytso@mit.edu>
|
||||
Link: http://lkml.kernel.org/r/1341812414.7370.73.camel@marge.simpson.net
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
fs/jbd/checkpoint.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/fs/jbd/checkpoint.c
|
||||
+++ b/fs/jbd/checkpoint.c
|
||||
@@ -129,6 +129,8 @@ void __log_wait_for_space(journal_t *jou
|
||||
if (journal->j_flags & JFS_ABORT)
|
||||
return;
|
||||
spin_unlock(&journal->j_state_lock);
|
||||
+ if (current->plug)
|
||||
+ io_schedule();
|
||||
mutex_lock(&journal->j_checkpoint_mutex);
|
||||
|
||||
/*
|
|
@ -0,0 +1,101 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 18 Mar 2011 10:11:25 +0100
|
||||
Subject: fs: jbd/jbd2: Make state lock and journal head lock rt safe
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
bit_spin_locks break under RT.
|
||||
|
||||
Based on a previous patch from Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
--
|
||||
|
||||
include/linux/buffer_head.h | 10 ++++++++++
|
||||
include/linux/jbd_common.h | 24 ++++++++++++++++++++++++
|
||||
2 files changed, 34 insertions(+)
|
||||
|
||||
--- a/include/linux/buffer_head.h
|
||||
+++ b/include/linux/buffer_head.h
|
||||
@@ -77,6 +77,11 @@ struct buffer_head {
|
||||
atomic_t b_count; /* users using this buffer_head */
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
spinlock_t b_uptodate_lock;
|
||||
+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
|
||||
+ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
|
||||
+ spinlock_t b_state_lock;
|
||||
+ spinlock_t b_journal_head_lock;
|
||||
+#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -108,6 +113,11 @@ static inline void buffer_head_init_lock
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
spin_lock_init(&bh->b_uptodate_lock);
|
||||
+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
|
||||
+ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
|
||||
+ spin_lock_init(&bh->b_state_lock);
|
||||
+ spin_lock_init(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
--- a/include/linux/jbd_common.h
|
||||
+++ b/include/linux/jbd_common.h
|
||||
@@ -15,32 +15,56 @@ static inline struct journal_head *bh2jh
|
||||
|
||||
static inline void jbd_lock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
return bit_spin_trylock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ return spin_trylock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
return bit_spin_is_locked(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ return spin_is_locked(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_unlock(BH_State, &bh->b_state);
|
||||
+#else
|
||||
+ spin_unlock(&bh->b_state_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_lock(BH_JournalHead, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
bit_spin_unlock(BH_JournalHead, &bh->b_state);
|
||||
+#else
|
||||
+ spin_unlock(&bh->b_journal_head_lock);
|
||||
+#endif
|
||||
}
|
||||
|
||||
#endif
|
32
debian/patches/features/all/rt/fs-jbd2-pull-your-plug-when-waiting-for-space.patch
vendored
Normal file
32
debian/patches/features/all/rt/fs-jbd2-pull-your-plug-when-waiting-for-space.patch
vendored
Normal file
|
@ -0,0 +1,32 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Mon, 17 Feb 2014 17:30:03 +0100
|
||||
Subject: fs: jbd2: pull your plug when waiting for space
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Two cps in parallel managed to stall the the ext4 fs. It seems that
|
||||
journal code is either waiting for locks or sleeping waiting for
|
||||
something to happen. This seems similar to what Mike observed on ext3,
|
||||
here is his description:
|
||||
|
||||
|With an -rt kernel, and a heavy sync IO load, tasks can jam
|
||||
|up on journal locks without unplugging, which can lead to
|
||||
|terminal IO starvation. Unplug and schedule when waiting
|
||||
|for space.
|
||||
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
fs/jbd2/checkpoint.c | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
--- a/fs/jbd2/checkpoint.c
|
||||
+++ b/fs/jbd2/checkpoint.c
|
||||
@@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t
|
||||
nblocks = jbd2_space_needed(journal);
|
||||
while (jbd2_log_space_left(journal) < nblocks) {
|
||||
write_unlock(&journal->j_state_lock);
|
||||
+ if (current->plug)
|
||||
+ io_schedule();
|
||||
mutex_lock(&journal->j_checkpoint_mutex);
|
||||
|
||||
/*
|
|
@ -0,0 +1,31 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 19 Jul 2009 08:44:27 -0500
|
||||
Subject: fs: namespace preemption fix
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
On RT we cannot loop with preemption disabled here as
|
||||
mnt_make_readonly() might have been preempted. We can safely enable
|
||||
preemption while waiting for MNT_WRITE_HOLD to be cleared. Safe on !RT
|
||||
as well.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
fs/namespace.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/fs/namespace.c
|
||||
+++ b/fs/namespace.c
|
||||
@@ -353,8 +353,11 @@ int __mnt_want_write(struct vfsmount *m)
|
||||
* incremented count after it has set MNT_WRITE_HOLD.
|
||||
*/
|
||||
smp_mb();
|
||||
- while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD)
|
||||
+ while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
|
||||
+ preempt_enable();
|
||||
cpu_relax();
|
||||
+ preempt_disable();
|
||||
+ }
|
||||
/*
|
||||
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
|
||||
* be set to match its requirements. So we must not load that until
|
|
@ -0,0 +1,60 @@
|
|||
From: Mike Galbraith <efault@gmx.de>
|
||||
Date: Fri, 3 Jul 2009 08:44:12 -0500
|
||||
Subject: fs: ntfs: disable interrupt only on !RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
On Sat, 2007-10-27 at 11:44 +0200, Ingo Molnar wrote:
|
||||
> * Nick Piggin <nickpiggin@yahoo.com.au> wrote:
|
||||
>
|
||||
> > > [10138.175796] [<c0105de3>] show_trace+0x12/0x14
|
||||
> > > [10138.180291] [<c0105dfb>] dump_stack+0x16/0x18
|
||||
> > > [10138.184769] [<c011609f>] native_smp_call_function_mask+0x138/0x13d
|
||||
> > > [10138.191117] [<c0117606>] smp_call_function+0x1e/0x24
|
||||
> > > [10138.196210] [<c012f85c>] on_each_cpu+0x25/0x50
|
||||
> > > [10138.200807] [<c0115c74>] flush_tlb_all+0x1e/0x20
|
||||
> > > [10138.205553] [<c016caaf>] kmap_high+0x1b6/0x417
|
||||
> > > [10138.210118] [<c011ec88>] kmap+0x4d/0x4f
|
||||
> > > [10138.214102] [<c026a9d8>] ntfs_end_buffer_async_read+0x228/0x2f9
|
||||
> > > [10138.220163] [<c01a0e9e>] end_bio_bh_io_sync+0x26/0x3f
|
||||
> > > [10138.225352] [<c01a2b09>] bio_endio+0x42/0x6d
|
||||
> > > [10138.229769] [<c02c2a08>] __end_that_request_first+0x115/0x4ac
|
||||
> > > [10138.235682] [<c02c2da7>] end_that_request_chunk+0x8/0xa
|
||||
> > > [10138.241052] [<c0365943>] ide_end_request+0x55/0x10a
|
||||
> > > [10138.246058] [<c036dae3>] ide_dma_intr+0x6f/0xac
|
||||
> > > [10138.250727] [<c0366d83>] ide_intr+0x93/0x1e0
|
||||
> > > [10138.255125] [<c015afb4>] handle_IRQ_event+0x5c/0xc9
|
||||
> >
|
||||
> > Looks like ntfs is kmap()ing from interrupt context. Should be using
|
||||
> > kmap_atomic instead, I think.
|
||||
>
|
||||
> it's not atomic interrupt context but irq thread context - and -rt
|
||||
> remaps kmap_atomic() to kmap() internally.
|
||||
|
||||
Hm. Looking at the change to mm/bounce.c, perhaps I should do this
|
||||
instead?
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
fs/ntfs/aops.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
--- a/fs/ntfs/aops.c
|
||||
+++ b/fs/ntfs/aops.c
|
||||
@@ -143,13 +143,13 @@ static void ntfs_end_buffer_async_read(s
|
||||
recs = PAGE_CACHE_SIZE / rec_size;
|
||||
/* Should have been verified before we got here... */
|
||||
BUG_ON(!recs);
|
||||
- local_irq_save(flags);
|
||||
+ local_irq_save_nort(flags);
|
||||
kaddr = kmap_atomic(page);
|
||||
for (i = 0; i < recs; i++)
|
||||
post_read_mst_fixup((NTFS_RECORD*)(kaddr +
|
||||
i * rec_size), rec_size);
|
||||
kunmap_atomic(kaddr);
|
||||
- local_irq_restore(flags);
|
||||
+ local_irq_restore_nort(flags);
|
||||
flush_dcache_page(page);
|
||||
if (likely(page_uptodate && !PageError(page)))
|
||||
SetPageUptodate(page);
|
|
@ -0,0 +1,162 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 18 Mar 2011 09:18:52 +0100
|
||||
Subject: buffer_head: Replace bh_uptodate_lock for -rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Wrap the bit_spin_lock calls into a separate inline and add the RT
|
||||
replacements with a real spinlock.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
fs/buffer.c | 21 +++++++--------------
|
||||
fs/ntfs/aops.c | 10 +++-------
|
||||
include/linux/buffer_head.h | 34 ++++++++++++++++++++++++++++++++++
|
||||
3 files changed, 44 insertions(+), 21 deletions(-)
|
||||
|
||||
--- a/fs/buffer.c
|
||||
+++ b/fs/buffer.c
|
||||
@@ -301,8 +301,7 @@ static void end_buffer_async_read(struct
|
||||
* decide that the page is now completely done.
|
||||
*/
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -315,8 +314,7 @@ static void end_buffer_async_read(struct
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
|
||||
/*
|
||||
* If none of the buffers had errors and they are all
|
||||
@@ -328,9 +326,7 @@ static void end_buffer_async_read(struct
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffe
|
||||
}
|
||||
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
|
||||
clear_buffer_async_write(bh);
|
||||
unlock_buffer(bh);
|
||||
@@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffe
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
}
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
end_page_writeback(page);
|
||||
return;
|
||||
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(end_buffer_async_write);
|
||||
|
||||
@@ -3325,6 +3317,7 @@ struct buffer_head *alloc_buffer_head(gf
|
||||
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
|
||||
if (ret) {
|
||||
INIT_LIST_HEAD(&ret->b_assoc_buffers);
|
||||
+ buffer_head_init_locks(ret);
|
||||
preempt_disable();
|
||||
__this_cpu_inc(bh_accounting.nr);
|
||||
recalc_bh_state();
|
||||
--- a/fs/ntfs/aops.c
|
||||
+++ b/fs/ntfs/aops.c
|
||||
@@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
"0x%llx.", (unsigned long long)bh->b_blocknr);
|
||||
}
|
||||
first = page_buffers(page);
|
||||
- local_irq_save(flags);
|
||||
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
|
||||
+ flags = bh_uptodate_lock_irqsave(first);
|
||||
clear_buffer_async_read(bh);
|
||||
unlock_buffer(bh);
|
||||
tmp = bh;
|
||||
@@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
}
|
||||
tmp = tmp->b_this_page;
|
||||
} while (tmp != bh);
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
/*
|
||||
* If none of the buffers had errors then we can set the page uptodate,
|
||||
* but we first have to perform the post read mst fixups, if the
|
||||
@@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(s
|
||||
unlock_page(page);
|
||||
return;
|
||||
still_busy:
|
||||
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
|
||||
- local_irq_restore(flags);
|
||||
- return;
|
||||
+ bh_uptodate_unlock_irqrestore(first, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/include/linux/buffer_head.h
|
||||
+++ b/include/linux/buffer_head.h
|
||||
@@ -75,8 +75,42 @@ struct buffer_head {
|
||||
struct address_space *b_assoc_map; /* mapping this buffer is
|
||||
associated with */
|
||||
atomic_t b_count; /* users using this buffer_head */
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ spinlock_t b_uptodate_lock;
|
||||
+#endif
|
||||
};
|
||||
|
||||
+static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
|
||||
+{
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
+ local_irq_save(flags);
|
||||
+ bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
|
||||
+#else
|
||||
+ spin_lock_irqsave(&bh->b_uptodate_lock, flags);
|
||||
+#endif
|
||||
+ return flags;
|
||||
+}
|
||||
+
|
||||
+static inline void
|
||||
+bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
|
||||
+{
|
||||
+#ifndef CONFIG_PREEMPT_RT_BASE
|
||||
+ bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
|
||||
+ local_irq_restore(flags);
|
||||
+#else
|
||||
+ spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+static inline void buffer_head_init_locks(struct buffer_head *bh)
|
||||
+{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ spin_lock_init(&bh->b_uptodate_lock);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
|
||||
* and buffer_foo() functions.
|
|
@ -0,0 +1,74 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 21:56:42 +0200
|
||||
Subject: trace: Add migrate-disabled counter to tracing output
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/ftrace_event.h | 2 ++
|
||||
kernel/trace/trace.c | 9 ++++++---
|
||||
kernel/trace/trace_events.c | 2 ++
|
||||
kernel/trace/trace_output.c | 5 +++++
|
||||
4 files changed, 15 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/ftrace_event.h
|
||||
+++ b/include/linux/ftrace_event.h
|
||||
@@ -66,6 +66,8 @@ struct trace_entry {
|
||||
unsigned char flags;
|
||||
unsigned char preempt_count;
|
||||
int pid;
|
||||
+ unsigned short migrate_disable;
|
||||
+ unsigned short padding;
|
||||
};
|
||||
|
||||
#define FTRACE_MAX_EVENT \
|
||||
--- a/kernel/trace/trace.c
|
||||
+++ b/kernel/trace/trace.c
|
||||
@@ -1641,6 +1641,8 @@ tracing_generic_entry_update(struct trac
|
||||
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
|
||||
(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
|
||||
(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
|
||||
+
|
||||
+ entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
|
||||
|
||||
@@ -2563,9 +2565,10 @@ static void print_lat_help_header(struct
|
||||
"# | / _----=> need-resched \n"
|
||||
"# || / _---=> hardirq/softirq \n"
|
||||
"# ||| / _--=> preempt-depth \n"
|
||||
- "# |||| / delay \n"
|
||||
- "# cmd pid ||||| time | caller \n"
|
||||
- "# \\ / ||||| \\ | / \n");
|
||||
+ "# |||| / _--=> migrate-disable\n"
|
||||
+ "# ||||| / delay \n"
|
||||
+ "# cmd pid |||||| time | caller \n"
|
||||
+ "# \\ / ||||| \\ | / \n");
|
||||
}
|
||||
|
||||
static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
|
||||
--- a/kernel/trace/trace_events.c
|
||||
+++ b/kernel/trace/trace_events.c
|
||||
@@ -162,6 +162,8 @@ static int trace_define_common_fields(vo
|
||||
__common_field(unsigned char, flags);
|
||||
__common_field(unsigned char, preempt_count);
|
||||
__common_field(int, pid);
|
||||
+ __common_field(unsigned short, migrate_disable);
|
||||
+ __common_field(unsigned short, padding);
|
||||
|
||||
return ret;
|
||||
}
|
||||
--- a/kernel/trace/trace_output.c
|
||||
+++ b/kernel/trace/trace_output.c
|
||||
@@ -472,6 +472,11 @@ int trace_print_lat_fmt(struct trace_seq
|
||||
else
|
||||
trace_seq_putc(s, '.');
|
||||
|
||||
+ if (entry->migrate_disable)
|
||||
+ trace_seq_printf(s, "%x", entry->migrate_disable);
|
||||
+ else
|
||||
+ trace_seq_putc(s, '.');
|
||||
+
|
||||
return !trace_seq_has_overflowed(s);
|
||||
}
|
||||
|
224
debian/patches/features/all/rt/futex-avoid-double-wake-up-in-PI-futex-wait-wake-on-.patch
vendored
Normal file
224
debian/patches/features/all/rt/futex-avoid-double-wake-up-in-PI-futex-wait-wake-on-.patch
vendored
Normal file
|
@ -0,0 +1,224 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 18 Feb 2015 20:17:31 +0100
|
||||
Subject: futex: avoid double wake up in PI futex wait / wake on -RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The boosted priority is reverted after the unlock but before the
|
||||
futex_hash_bucket (hb) has been accessed. The result is that we boost the
|
||||
task, deboost the task, boost again for the hb lock, deboost again.
|
||||
A sched trace of this scenario looks the following:
|
||||
|
||||
| med_prio-93 sched_wakeup: comm=high_prio pid=92 prio=9 success=1 target_cpu=000
|
||||
| med_prio-93 sched_switch: prev_comm=med_prio prev_pid=93 prev_prio=29 prev_state=R ==> next_comm=high_prio next_pid=92 next_prio=9
|
||||
|high_prio-92 sched_pi_setprio: comm=low_prio pid=91 oldprio=120 newprio=9
|
||||
|high_prio-92 sched_switch: prev_comm=high_prio prev_pid=92 prev_prio=9 prev_state=S ==> next_comm=low_prio next_pid=91 next_prio=9
|
||||
| low_prio-91 sched_wakeup: comm=high_prio pid=92 prio=9 success=1 target_cpu=000
|
||||
| low_prio-91 sched_pi_setprio: comm=low_prio pid=91 oldprio=9 newprio=120
|
||||
| low_prio-91 sched_switch: prev_comm=low_prio prev_pid=91 prev_prio=120 prev_state=R+ ==> next_comm=high_prio next_pid=92 next_prio=9
|
||||
|high_prio-92 sched_pi_setprio: comm=low_prio pid=91 oldprio=120 newprio=9
|
||||
|high_prio-92 sched_switch: prev_comm=high_prio prev_pid=92 prev_prio=9 prev_state=D ==> next_comm=low_prio next_pid=91 next_prio=9
|
||||
| low_prio-91 sched_wakeup: comm=high_prio pid=92 prio=9 success=1 target_cpu=000
|
||||
| low_prio-91 sched_pi_setprio: comm=low_prio pid=91 oldprio=9 newprio=120
|
||||
| low_prio-91 sched_switch: prev_comm=low_prio prev_pid=91 prev_prio=120 prev_state=R+ ==> next_comm=high_prio next_pid=92 next_prio=9
|
||||
|
||||
We see four sched_pi_setprio() invocation but ideally two would be enough.
|
||||
The patch tries to avoid the double wakeup by a wake up once the hb lock is
|
||||
released. The same test case:
|
||||
|
||||
| med_prio-21 sched_wakeup: comm=high_prio pid=20 prio=9 success=1 target_cpu=000
|
||||
| med_prio-21 sched_switch: prev_comm=med_prio prev_pid=21 prev_prio=29 prev_state=R ==> next_comm=high_prio next_pid=20 next_prio=9
|
||||
|high_prio-20 sched_pi_setprio: comm=low_prio pid=19 oldprio=120 newprio=9
|
||||
|high_prio-20 sched_switch: prev_comm=high_prio prev_pid=20 prev_prio=9 prev_state=S ==> next_comm=low_prio next_pid=19 next_prio=9
|
||||
| low_prio-19 sched_wakeup: comm=high_prio pid=20 prio=9 success=1 target_cpu=000
|
||||
| low_prio-19 sched_pi_setprio: comm=low_prio pid=19 oldprio=9 newprio=120
|
||||
| low_prio-19 sched_switch: prev_comm=low_prio prev_pid=19 prev_prio=120 prev_state=R+ ==> next_comm=high_prio next_pid=20 next_prio=9
|
||||
|
||||
only two sched_pi_setprio() invocations as one would expect and see
|
||||
without -RT.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/futex.c | 32 +++++++++++++++++++++++++++++---
|
||||
kernel/locking/rtmutex.c | 40 +++++++++++++++++++++++++++++-----------
|
||||
kernel/locking/rtmutex_common.h | 4 ++++
|
||||
3 files changed, 62 insertions(+), 14 deletions(-)
|
||||
|
||||
--- a/kernel/futex.c
|
||||
+++ b/kernel/futex.c
|
||||
@@ -1117,11 +1117,13 @@ static void mark_wake_futex(struct wake_
|
||||
q->lock_ptr = NULL;
|
||||
}
|
||||
|
||||
-static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
|
||||
+static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
|
||||
+ struct futex_hash_bucket *hb)
|
||||
{
|
||||
struct task_struct *new_owner;
|
||||
struct futex_pi_state *pi_state = this->pi_state;
|
||||
u32 uninitialized_var(curval), newval;
|
||||
+ bool deboost;
|
||||
int ret = 0;
|
||||
|
||||
if (!pi_state)
|
||||
@@ -1173,7 +1175,17 @@ static int wake_futex_pi(u32 __user *uad
|
||||
raw_spin_unlock_irq(&new_owner->pi_lock);
|
||||
|
||||
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
|
||||
- rt_mutex_unlock(&pi_state->pi_mutex);
|
||||
+
|
||||
+ deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex);
|
||||
+
|
||||
+ /*
|
||||
+ * We deboost after dropping hb->lock. That prevents a double
|
||||
+ * wakeup on RT.
|
||||
+ */
|
||||
+ spin_unlock(&hb->lock);
|
||||
+
|
||||
+ if (deboost)
|
||||
+ rt_mutex_adjust_prio(current);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2413,13 +2425,26 @@ static int futex_unlock_pi(u32 __user *u
|
||||
*/
|
||||
match = futex_top_waiter(hb, &key);
|
||||
if (match) {
|
||||
- ret = wake_futex_pi(uaddr, uval, match);
|
||||
+ ret = wake_futex_pi(uaddr, uval, match, hb);
|
||||
+
|
||||
+ /*
|
||||
+ * In case of success wake_futex_pi dropped the hash
|
||||
+ * bucket lock.
|
||||
+ */
|
||||
+ if (!ret)
|
||||
+ goto out_putkey;
|
||||
+
|
||||
/*
|
||||
* The atomic access to the futex value generated a
|
||||
* pagefault, so retry the user-access and the wakeup:
|
||||
*/
|
||||
if (ret == -EFAULT)
|
||||
goto pi_faulted;
|
||||
+
|
||||
+ /*
|
||||
+ * wake_futex_pi has detected invalid state. Tell user
|
||||
+ * space.
|
||||
+ */
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
@@ -2440,6 +2465,7 @@ static int futex_unlock_pi(u32 __user *u
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&hb->lock);
|
||||
+out_putkey:
|
||||
put_futex_key(&key);
|
||||
return ret;
|
||||
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -300,7 +300,7 @@ static void __rt_mutex_adjust_prio(struc
|
||||
* of task. We do not use the spin_xx_mutex() variants here as we are
|
||||
* outside of the debug path.)
|
||||
*/
|
||||
-static void rt_mutex_adjust_prio(struct task_struct *task)
|
||||
+void rt_mutex_adjust_prio(struct task_struct *task)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
@@ -957,8 +957,9 @@ static int task_blocks_on_rt_mutex(struc
|
||||
/*
|
||||
* Wake up the next waiter on the lock.
|
||||
*
|
||||
- * Remove the top waiter from the current tasks pi waiter list and
|
||||
- * wake it up.
|
||||
+ * Remove the top waiter from the current tasks pi waiter list,
|
||||
+ * wake it up and return whether the current task needs to undo
|
||||
+ * a potential priority boosting.
|
||||
*
|
||||
* Called with lock->wait_lock held.
|
||||
*/
|
||||
@@ -1255,7 +1256,7 @@ static inline int rt_mutex_slowtrylock(s
|
||||
/*
|
||||
* Slow path to release a rt-mutex:
|
||||
*/
|
||||
-static void __sched
|
||||
+static bool __sched
|
||||
rt_mutex_slowunlock(struct rt_mutex *lock)
|
||||
{
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
@@ -1298,7 +1299,7 @@ rt_mutex_slowunlock(struct rt_mutex *loc
|
||||
while (!rt_mutex_has_waiters(lock)) {
|
||||
/* Drops lock->wait_lock ! */
|
||||
if (unlock_rt_mutex_safe(lock) == true)
|
||||
- return;
|
||||
+ return false;
|
||||
/* Relock the rtmutex and try again */
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
}
|
||||
@@ -1311,8 +1312,7 @@ rt_mutex_slowunlock(struct rt_mutex *loc
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
- /* Undo pi boosting if necessary: */
|
||||
- rt_mutex_adjust_prio(current);
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1363,12 +1363,14 @@ rt_mutex_fasttrylock(struct rt_mutex *lo
|
||||
|
||||
static inline void
|
||||
rt_mutex_fastunlock(struct rt_mutex *lock,
|
||||
- void (*slowfn)(struct rt_mutex *lock))
|
||||
+ bool (*slowfn)(struct rt_mutex *lock))
|
||||
{
|
||||
- if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
|
||||
+ if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
|
||||
rt_mutex_deadlock_account_unlock(current);
|
||||
- else
|
||||
- slowfn(lock);
|
||||
+ } else if (slowfn(lock)) {
|
||||
+ /* Undo pi boosting if necessary: */
|
||||
+ rt_mutex_adjust_prio(current);
|
||||
+ }
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1463,6 +1465,22 @@ void __sched rt_mutex_unlock(struct rt_m
|
||||
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
|
||||
|
||||
/**
|
||||
+ * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
|
||||
+ * @lock: the rt_mutex to be unlocked
|
||||
+ *
|
||||
+ * Returns: true/false indicating whether priority adjustment is
|
||||
+ * required or not.
|
||||
+ */
|
||||
+bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
|
||||
+{
|
||||
+ if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
|
||||
+ rt_mutex_deadlock_account_unlock(current);
|
||||
+ return false;
|
||||
+ }
|
||||
+ return rt_mutex_slowunlock(lock);
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
* rt_mutex_destroy - mark a mutex unusable
|
||||
* @lock: the mutex to be destroyed
|
||||
*
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -132,6 +132,10 @@ extern int rt_mutex_finish_proxy_lock(st
|
||||
struct rt_mutex_waiter *waiter);
|
||||
extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
|
||||
|
||||
+extern bool rt_mutex_futex_unlock(struct rt_mutex *lock);
|
||||
+
|
||||
+extern void rt_mutex_adjust_prio(struct task_struct *task);
|
||||
+
|
||||
#ifdef CONFIG_DEBUG_RT_MUTEXES
|
||||
# include "rtmutex-debug.h"
|
||||
#else
|
|
@ -0,0 +1,114 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
||||
Subject: futex: Fix bug on when a requeued RT task times out
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Requeue with timeout causes a bug with PREEMPT_RT_FULL.
|
||||
|
||||
The bug comes from a timed out condition.
|
||||
|
||||
|
||||
TASK 1 TASK 2
|
||||
------ ------
|
||||
futex_wait_requeue_pi()
|
||||
futex_wait_queue_me()
|
||||
<timed out>
|
||||
|
||||
double_lock_hb();
|
||||
|
||||
raw_spin_lock(pi_lock);
|
||||
if (current->pi_blocked_on) {
|
||||
} else {
|
||||
current->pi_blocked_on = PI_WAKE_INPROGRESS;
|
||||
run_spin_unlock(pi_lock);
|
||||
spin_lock(hb->lock); <-- blocked!
|
||||
|
||||
|
||||
plist_for_each_entry_safe(this) {
|
||||
rt_mutex_start_proxy_lock();
|
||||
task_blocks_on_rt_mutex();
|
||||
BUG_ON(task->pi_blocked_on)!!!!
|
||||
|
||||
The BUG_ON() actually has a check for PI_WAKE_INPROGRESS, but the
|
||||
problem is that, after TASK 1 sets PI_WAKE_INPROGRESS, it then tries to
|
||||
grab the hb->lock, which it fails to do so. As the hb->lock is a mutex,
|
||||
it will block and set the "pi_blocked_on" to the hb->lock.
|
||||
|
||||
When TASK 2 goes to requeue it, the check for PI_WAKE_INPROGESS fails
|
||||
because the task1's pi_blocked_on is no longer set to that, but instead,
|
||||
set to the hb->lock.
|
||||
|
||||
The fix:
|
||||
|
||||
When calling rt_mutex_start_proxy_lock() a check is made to see
|
||||
if the proxy tasks pi_blocked_on is set. If so, exit out early.
|
||||
Otherwise set it to a new flag PI_REQUEUE_INPROGRESS, which notifies
|
||||
the proxy task that it is being requeued, and will handle things
|
||||
appropriately.
|
||||
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/locking/rtmutex.c | 32 +++++++++++++++++++++++++++++++-
|
||||
kernel/locking/rtmutex_common.h | 1 +
|
||||
2 files changed, 32 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/locking/rtmutex.c
|
||||
+++ b/kernel/locking/rtmutex.c
|
||||
@@ -71,7 +71,8 @@ static void fixup_rt_mutex_waiters(struc
|
||||
|
||||
static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter)
|
||||
{
|
||||
- return waiter && waiter != PI_WAKEUP_INPROGRESS;
|
||||
+ return waiter && waiter != PI_WAKEUP_INPROGRESS &&
|
||||
+ waiter != PI_REQUEUE_INPROGRESS;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1603,6 +1604,35 @@ int rt_mutex_start_proxy_lock(struct rt_
|
||||
return 1;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ /*
|
||||
+ * In PREEMPT_RT there's an added race.
|
||||
+ * If the task, that we are about to requeue, times out,
|
||||
+ * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue
|
||||
+ * to skip this task. But right after the task sets
|
||||
+ * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then
|
||||
+ * block on the spin_lock(&hb->lock), which in RT is an rtmutex.
|
||||
+ * This will replace the PI_WAKEUP_INPROGRESS with the actual
|
||||
+ * lock that it blocks on. We *must not* place this task
|
||||
+ * on this proxy lock in that case.
|
||||
+ *
|
||||
+ * To prevent this race, we first take the task's pi_lock
|
||||
+ * and check if it has updated its pi_blocked_on. If it has,
|
||||
+ * we assume that it woke up and we return -EAGAIN.
|
||||
+ * Otherwise, we set the task's pi_blocked_on to
|
||||
+ * PI_REQUEUE_INPROGRESS, so that if the task is waking up
|
||||
+ * it will know that we are in the process of requeuing it.
|
||||
+ */
|
||||
+ raw_spin_lock_irq(&task->pi_lock);
|
||||
+ if (task->pi_blocked_on) {
|
||||
+ raw_spin_unlock_irq(&task->pi_lock);
|
||||
+ raw_spin_unlock(&lock->wait_lock);
|
||||
+ return -EAGAIN;
|
||||
+ }
|
||||
+ task->pi_blocked_on = PI_REQUEUE_INPROGRESS;
|
||||
+ raw_spin_unlock_irq(&task->pi_lock);
|
||||
+#endif
|
||||
+
|
||||
/* We enforce deadlock detection for futexes */
|
||||
ret = task_blocks_on_rt_mutex(lock, waiter, task,
|
||||
RT_MUTEX_FULL_CHAINWALK);
|
||||
--- a/kernel/locking/rtmutex_common.h
|
||||
+++ b/kernel/locking/rtmutex_common.h
|
||||
@@ -120,6 +120,7 @@ enum rtmutex_chainwalk {
|
||||
* PI-futex support (proxy locking functions, etc.):
|
||||
*/
|
||||
#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1)
|
||||
+#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2)
|
||||
|
||||
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
|
||||
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
|
|
@ -0,0 +1,38 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:57 -0500
|
||||
Subject: genirq: Disable irqpoll on -rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Creates long latencies for no value
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
kernel/irq/spurious.c | 8 ++++++++
|
||||
1 file changed, 8 insertions(+)
|
||||
|
||||
--- a/kernel/irq/spurious.c
|
||||
+++ b/kernel/irq/spurious.c
|
||||
@@ -444,6 +444,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir
|
||||
|
||||
static int __init irqfixup_setup(char *str)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
|
||||
+ return 1;
|
||||
+#endif
|
||||
irqfixup = 1;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
|
||||
printk(KERN_WARNING "This may impact system performance.\n");
|
||||
@@ -456,6 +460,10 @@ module_param(irqfixup, int, 0644);
|
||||
|
||||
static int __init irqpoll_setup(char *str)
|
||||
{
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n");
|
||||
+ return 1;
|
||||
+#endif
|
||||
irqfixup = 2;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
|
||||
"enabled\n");
|
145
debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch
vendored
Normal file
145
debian/patches/features/all/rt/genirq-do-not-invoke-the-affinity-callback-via-a-wor.patch
vendored
Normal file
|
@ -0,0 +1,145 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Wed, 21 Aug 2013 17:48:46 +0200
|
||||
Subject: genirq: Do not invoke the affinity callback via a workqueue on RT
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Joe Korty reported, that __irq_set_affinity_locked() schedules a
|
||||
workqueue while holding a rawlock which results in a might_sleep()
|
||||
warning.
|
||||
This patch moves the invokation into a process context so that we only
|
||||
wakeup() a process while holding the lock.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 1
|
||||
kernel/irq/manage.c | 79 ++++++++++++++++++++++++++++++++++++++++++++--
|
||||
2 files changed, 77 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -215,6 +215,7 @@ struct irq_affinity_notify {
|
||||
unsigned int irq;
|
||||
struct kref kref;
|
||||
struct work_struct work;
|
||||
+ struct list_head list;
|
||||
void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
|
||||
void (*release)(struct kref *ref);
|
||||
};
|
||||
--- a/kernel/irq/manage.c
|
||||
+++ b/kernel/irq/manage.c
|
||||
@@ -181,6 +181,62 @@ static inline void
|
||||
irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+static void _irq_affinity_notify(struct irq_affinity_notify *notify);
|
||||
+static struct task_struct *set_affinity_helper;
|
||||
+static LIST_HEAD(affinity_list);
|
||||
+static DEFINE_RAW_SPINLOCK(affinity_list_lock);
|
||||
+
|
||||
+static int set_affinity_thread(void *unused)
|
||||
+{
|
||||
+ while (1) {
|
||||
+ struct irq_affinity_notify *notify;
|
||||
+ int empty;
|
||||
+
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+
|
||||
+ raw_spin_lock_irq(&affinity_list_lock);
|
||||
+ empty = list_empty(&affinity_list);
|
||||
+ raw_spin_unlock_irq(&affinity_list_lock);
|
||||
+
|
||||
+ if (empty)
|
||||
+ schedule();
|
||||
+ if (kthread_should_stop())
|
||||
+ break;
|
||||
+ set_current_state(TASK_RUNNING);
|
||||
+try_next:
|
||||
+ notify = NULL;
|
||||
+
|
||||
+ raw_spin_lock_irq(&affinity_list_lock);
|
||||
+ if (!list_empty(&affinity_list)) {
|
||||
+ notify = list_first_entry(&affinity_list,
|
||||
+ struct irq_affinity_notify, list);
|
||||
+ list_del_init(¬ify->list);
|
||||
+ }
|
||||
+ raw_spin_unlock_irq(&affinity_list_lock);
|
||||
+
|
||||
+ if (!notify)
|
||||
+ continue;
|
||||
+ _irq_affinity_notify(notify);
|
||||
+ goto try_next;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void init_helper_thread(void)
|
||||
+{
|
||||
+ if (set_affinity_helper)
|
||||
+ return;
|
||||
+ set_affinity_helper = kthread_run(set_affinity_thread, NULL,
|
||||
+ "affinity-cb");
|
||||
+ WARN_ON(IS_ERR(set_affinity_helper));
|
||||
+}
|
||||
+#else
|
||||
+
|
||||
+static inline void init_helper_thread(void) { }
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
|
||||
bool force)
|
||||
{
|
||||
@@ -220,7 +276,17 @@ int irq_set_affinity_locked(struct irq_d
|
||||
|
||||
if (desc->affinity_notify) {
|
||||
kref_get(&desc->affinity_notify->kref);
|
||||
+
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ raw_spin_lock(&affinity_list_lock);
|
||||
+ if (list_empty(&desc->affinity_notify->list))
|
||||
+ list_add_tail(&affinity_list,
|
||||
+ &desc->affinity_notify->list);
|
||||
+ raw_spin_unlock(&affinity_list_lock);
|
||||
+ wake_up_process(set_affinity_helper);
|
||||
+#else
|
||||
schedule_work(&desc->affinity_notify->work);
|
||||
+#endif
|
||||
}
|
||||
irqd_set(data, IRQD_AFFINITY_SET);
|
||||
|
||||
@@ -258,10 +324,8 @@ int irq_set_affinity_hint(unsigned int i
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
|
||||
|
||||
-static void irq_affinity_notify(struct work_struct *work)
|
||||
+static void _irq_affinity_notify(struct irq_affinity_notify *notify)
|
||||
{
|
||||
- struct irq_affinity_notify *notify =
|
||||
- container_of(work, struct irq_affinity_notify, work);
|
||||
struct irq_desc *desc = irq_to_desc(notify->irq);
|
||||
cpumask_var_t cpumask;
|
||||
unsigned long flags;
|
||||
@@ -283,6 +347,13 @@ static void irq_affinity_notify(struct w
|
||||
kref_put(¬ify->kref, notify->release);
|
||||
}
|
||||
|
||||
+static void irq_affinity_notify(struct work_struct *work)
|
||||
+{
|
||||
+ struct irq_affinity_notify *notify =
|
||||
+ container_of(work, struct irq_affinity_notify, work);
|
||||
+ _irq_affinity_notify(notify);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* irq_set_affinity_notifier - control notification of IRQ affinity changes
|
||||
* @irq: Interrupt for which to enable/disable notification
|
||||
@@ -312,6 +383,8 @@ irq_set_affinity_notifier(unsigned int i
|
||||
notify->irq = irq;
|
||||
kref_init(¬ify->kref);
|
||||
INIT_WORK(¬ify->work, irq_affinity_notify);
|
||||
+ INIT_LIST_HEAD(¬ify->list);
|
||||
+ init_helper_thread();
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&desc->lock, flags);
|
|
@ -0,0 +1,49 @@
|
|||
Subject: genirq: Force interrupt thread on RT
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 03 Apr 2011 11:57:29 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Force threaded_irqs and optimize the code (force_irqthreads) in regard
|
||||
to this.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/interrupt.h | 6 +++++-
|
||||
kernel/irq/manage.c | 2 ++
|
||||
2 files changed, 7 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/interrupt.h
|
||||
+++ b/include/linux/interrupt.h
|
||||
@@ -377,9 +377,13 @@ extern int irq_set_irqchip_state(unsigne
|
||||
bool state);
|
||||
|
||||
#ifdef CONFIG_IRQ_FORCED_THREADING
|
||||
+# ifndef CONFIG_PREEMPT_RT_BASE
|
||||
extern bool force_irqthreads;
|
||||
+# else
|
||||
+# define force_irqthreads (true)
|
||||
+# endif
|
||||
#else
|
||||
-#define force_irqthreads (0)
|
||||
+#define force_irqthreads (false)
|
||||
#endif
|
||||
|
||||
#ifndef __ARCH_SET_SOFTIRQ_PENDING
|
||||
--- a/kernel/irq/manage.c
|
||||
+++ b/kernel/irq/manage.c
|
||||
@@ -22,6 +22,7 @@
|
||||
#include "internals.h"
|
||||
|
||||
#ifdef CONFIG_IRQ_FORCED_THREADING
|
||||
+# ifndef CONFIG_PREEMPT_RT_BASE
|
||||
__read_mostly bool force_irqthreads;
|
||||
|
||||
static int __init setup_forced_irqthreads(char *arg)
|
||||
@@ -30,6 +31,7 @@ static int __init setup_forced_irqthread
|
||||
return 0;
|
||||
}
|
||||
early_param("threadirqs", setup_forced_irqthreads);
|
||||
+# endif
|
||||
#endif
|
||||
|
||||
static void __synchronize_hardirq(struct irq_desc *desc)
|
|
@ -0,0 +1,317 @@
|
|||
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
Date: Thu, 12 Feb 2015 16:01:13 +0100
|
||||
Subject: gpio: omap: use raw locks for locking
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
This patch converts gpio_bank.lock from a spin_lock into a
|
||||
raw_spin_lock. The call path is to access this lock is always under a
|
||||
raw_spin_lock, for instance
|
||||
- __setup_irq() holds &desc->lock with irq off
|
||||
+ __irq_set_trigger()
|
||||
+ omap_gpio_irq_type()
|
||||
|
||||
- handle_level_irq() (runs with irqs off therefore raw locks)
|
||||
+ mask_ack_irq()
|
||||
+ omap_gpio_mask_irq()
|
||||
|
||||
This fixes the obvious backtrace on -RT. However the locking vs context
|
||||
is not and this is not limited to -RT:
|
||||
- omap_gpio_irq_type() is called with IRQ off and has an conditional
|
||||
call to pm_runtime_get_sync() which may sleep. Either it may happen or
|
||||
it may not happen but pm_runtime_get_sync() should not be called with
|
||||
irqs off.
|
||||
|
||||
- omap_gpio_debounce() is holding the lock with IRQs off.
|
||||
+ omap2_set_gpio_debounce()
|
||||
+ clk_prepare_enable()
|
||||
+ clk_prepare() this one might sleep.
|
||||
The number of users of gpiod_set_debounce() / gpio_set_debounce()
|
||||
looks low but still this is not good.
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/gpio/gpio-omap.c | 78 +++++++++++++++++++++++------------------------
|
||||
1 file changed, 39 insertions(+), 39 deletions(-)
|
||||
|
||||
--- a/drivers/gpio/gpio-omap.c
|
||||
+++ b/drivers/gpio/gpio-omap.c
|
||||
@@ -57,7 +57,7 @@ struct gpio_bank {
|
||||
u32 saved_datain;
|
||||
u32 level_mask;
|
||||
u32 toggle_mask;
|
||||
- spinlock_t lock;
|
||||
+ raw_spinlock_t lock;
|
||||
struct gpio_chip chip;
|
||||
struct clk *dbck;
|
||||
u32 mod_usage;
|
||||
@@ -498,14 +498,14 @@ static int omap_gpio_irq_type(struct irq
|
||||
(type & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH)))
|
||||
return -EINVAL;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
retval = omap_set_gpio_triggering(bank, offset, type);
|
||||
omap_gpio_init_irq(bank, offset);
|
||||
if (!omap_gpio_is_input(bank, offset)) {
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return -EINVAL;
|
||||
}
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH))
|
||||
__irq_set_handler_locked(d->irq, handle_level_irq);
|
||||
@@ -626,14 +626,14 @@ static int omap_set_gpio_wakeup(struct g
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
if (enable)
|
||||
bank->context.wake_en |= gpio_bit;
|
||||
else
|
||||
bank->context.wake_en &= ~gpio_bit;
|
||||
|
||||
writel_relaxed(bank->context.wake_en, bank->base + bank->regs->wkup_en);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -668,7 +668,7 @@ static int omap_gpio_request(struct gpio
|
||||
if (!BANK_USED(bank))
|
||||
pm_runtime_get_sync(bank->dev);
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
/* Set trigger to none. You need to enable the desired trigger with
|
||||
* request_irq() or set_irq_type(). Only do this if the IRQ line has
|
||||
* not already been requested.
|
||||
@@ -678,7 +678,7 @@ static int omap_gpio_request(struct gpio
|
||||
omap_enable_gpio_module(bank, offset);
|
||||
}
|
||||
bank->mod_usage |= BIT(offset);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -688,11 +688,11 @@ static void omap_gpio_free(struct gpio_c
|
||||
struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip);
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
bank->mod_usage &= ~(BIT(offset));
|
||||
omap_disable_gpio_module(bank, offset);
|
||||
omap_reset_gpio(bank, offset);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
/*
|
||||
* If this is the last gpio to be freed in the bank,
|
||||
@@ -794,9 +794,9 @@ static unsigned int omap_gpio_irq_startu
|
||||
if (!BANK_USED(bank))
|
||||
pm_runtime_get_sync(bank->dev);
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
omap_gpio_init_irq(bank, offset);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
omap_gpio_unmask_irq(d);
|
||||
|
||||
return 0;
|
||||
@@ -808,11 +808,11 @@ static void omap_gpio_irq_shutdown(struc
|
||||
unsigned long flags;
|
||||
unsigned offset = d->hwirq;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
bank->irq_usage &= ~(BIT(offset));
|
||||
omap_disable_gpio_module(bank, offset);
|
||||
omap_reset_gpio(bank, offset);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
/*
|
||||
* If this is the last IRQ to be freed in the bank,
|
||||
@@ -836,10 +836,10 @@ static void omap_gpio_mask_irq(struct ir
|
||||
unsigned offset = d->hwirq;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
omap_set_gpio_irqenable(bank, offset, 0);
|
||||
omap_set_gpio_triggering(bank, offset, IRQ_TYPE_NONE);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
}
|
||||
|
||||
static void omap_gpio_unmask_irq(struct irq_data *d)
|
||||
@@ -849,7 +849,7 @@ static void omap_gpio_unmask_irq(struct
|
||||
u32 trigger = irqd_get_trigger_type(d);
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
if (trigger)
|
||||
omap_set_gpio_triggering(bank, offset, trigger);
|
||||
|
||||
@@ -861,7 +861,7 @@ static void omap_gpio_unmask_irq(struct
|
||||
}
|
||||
|
||||
omap_set_gpio_irqenable(bank, offset, 1);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------*/
|
||||
@@ -874,9 +874,9 @@ static int omap_mpuio_suspend_noirq(stru
|
||||
OMAP_MPUIO_GPIO_MASKIT / bank->stride;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
writel_relaxed(0xffff & ~bank->context.wake_en, mask_reg);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -889,9 +889,9 @@ static int omap_mpuio_resume_noirq(struc
|
||||
OMAP_MPUIO_GPIO_MASKIT / bank->stride;
|
||||
unsigned long flags;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
writel_relaxed(bank->context.wake_en, mask_reg);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -937,9 +937,9 @@ static int omap_gpio_get_direction(struc
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
reg = bank->base + bank->regs->direction;
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
dir = !!(readl_relaxed(reg) & BIT(offset));
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return dir;
|
||||
}
|
||||
|
||||
@@ -949,9 +949,9 @@ static int omap_gpio_input(struct gpio_c
|
||||
unsigned long flags;
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
omap_set_gpio_direction(bank, offset, 1);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -973,10 +973,10 @@ static int omap_gpio_output(struct gpio_
|
||||
unsigned long flags;
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
bank->set_dataout(bank, offset, value);
|
||||
omap_set_gpio_direction(bank, offset, 0);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -988,9 +988,9 @@ static int omap_gpio_debounce(struct gpi
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
omap2_set_gpio_debounce(bank, offset, debounce);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1001,9 +1001,9 @@ static void omap_gpio_set(struct gpio_ch
|
||||
unsigned long flags;
|
||||
|
||||
bank = container_of(chip, struct gpio_bank, chip);
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
bank->set_dataout(bank, offset, value);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------*/
|
||||
@@ -1199,7 +1199,7 @@ static int omap_gpio_probe(struct platfo
|
||||
else
|
||||
bank->set_dataout = omap_set_gpio_dataout_mask;
|
||||
|
||||
- spin_lock_init(&bank->lock);
|
||||
+ raw_spin_lock_init(&bank->lock);
|
||||
|
||||
/* Static mapping, never released */
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
||||
@@ -1246,7 +1246,7 @@ static int omap_gpio_runtime_suspend(str
|
||||
unsigned long flags;
|
||||
u32 wake_low, wake_hi;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
|
||||
/*
|
||||
* Only edges can generate a wakeup event to the PRCM.
|
||||
@@ -1299,7 +1299,7 @@ static int omap_gpio_runtime_suspend(str
|
||||
bank->get_context_loss_count(bank->dev);
|
||||
|
||||
omap_gpio_dbck_disable(bank);
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1314,7 +1314,7 @@ static int omap_gpio_runtime_resume(stru
|
||||
unsigned long flags;
|
||||
int c;
|
||||
|
||||
- spin_lock_irqsave(&bank->lock, flags);
|
||||
+ raw_spin_lock_irqsave(&bank->lock, flags);
|
||||
|
||||
/*
|
||||
* On the first resume during the probe, the context has not
|
||||
@@ -1350,14 +1350,14 @@ static int omap_gpio_runtime_resume(stru
|
||||
if (c != bank->context_loss_count) {
|
||||
omap_gpio_restore_context(bank);
|
||||
} else {
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!bank->workaround_enabled) {
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1412,7 +1412,7 @@ static int omap_gpio_runtime_resume(stru
|
||||
}
|
||||
|
||||
bank->workaround_enabled = false;
|
||||
- spin_unlock_irqrestore(&bank->lock, flags);
|
||||
+ raw_spin_unlock_irqrestore(&bank->lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
47
debian/patches/features/all/rt/hotplug-Use-set_cpus_allowed_ptr-in-sync_unplug_thre.patch
vendored
Normal file
47
debian/patches/features/all/rt/hotplug-Use-set_cpus_allowed_ptr-in-sync_unplug_thre.patch
vendored
Normal file
|
@ -0,0 +1,47 @@
|
|||
From: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
Date: Tue, 24 Mar 2015 08:14:49 +0100
|
||||
Subject: hotplug: Use set_cpus_allowed_ptr() in sync_unplug_thread()
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
do_set_cpus_allowed() is not safe vs ->sched_class change.
|
||||
|
||||
crash> bt
|
||||
PID: 11676 TASK: ffff88026f979da0 CPU: 22 COMMAND: "sync_unplug/22"
|
||||
#0 [ffff880274d25bc8] machine_kexec at ffffffff8103b41c
|
||||
#1 [ffff880274d25c18] crash_kexec at ffffffff810d881a
|
||||
#2 [ffff880274d25cd8] oops_end at ffffffff81525818
|
||||
#3 [ffff880274d25cf8] do_invalid_op at ffffffff81003096
|
||||
#4 [ffff880274d25d90] invalid_op at ffffffff8152d3de
|
||||
[exception RIP: set_cpus_allowed_rt+18]
|
||||
RIP: ffffffff8109e012 RSP: ffff880274d25e48 RFLAGS: 00010202
|
||||
RAX: ffffffff8109e000 RBX: ffff88026f979da0 RCX: ffff8802770cb6e8
|
||||
RDX: 0000000000000000 RSI: ffffffff81add700 RDI: ffff88026f979da0
|
||||
RBP: ffff880274d25e78 R8: ffffffff816112e0 R9: 0000000000000001
|
||||
R10: 0000000000000001 R11: 0000000000011940 R12: ffff88026f979da0
|
||||
R13: ffff8802770cb6d0 R14: ffff880274d25fd8 R15: 0000000000000000
|
||||
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
|
||||
#5 [ffff880274d25e60] do_set_cpus_allowed at ffffffff8108e65f
|
||||
#6 [ffff880274d25e80] sync_unplug_thread at ffffffff81058c08
|
||||
#7 [ffff880274d25ed8] kthread at ffffffff8107cad6
|
||||
#8 [ffff880274d25f50] ret_from_fork at ffffffff8152bbbc
|
||||
crash> task_struct ffff88026f979da0 | grep class
|
||||
sched_class = 0xffffffff816111e0 <fair_sched_class+64>,
|
||||
|
||||
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
|
||||
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -267,7 +267,7 @@ static int sync_unplug_thread(void *data
|
||||
* we don't want any more work on this CPU.
|
||||
*/
|
||||
current->flags &= ~PF_NO_SETAFFINITY;
|
||||
- do_set_cpus_allowed(current, cpu_present_mask);
|
||||
+ set_cpus_allowed_ptr(current, cpu_present_mask);
|
||||
migrate_me();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,205 @@
|
|||
Subject: hotplug: Lightweight get online cpus
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Wed, 15 Jun 2011 12:36:06 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
get_online_cpus() is a heavy weight function which involves a global
|
||||
mutex. migrate_disable() wants a simpler construct which prevents only
|
||||
a CPU from going doing while a task is in a migrate disabled section.
|
||||
|
||||
Implement a per cpu lockless mechanism, which serializes only in the
|
||||
real unplug case on a global mutex. That serialization affects only
|
||||
tasks on the cpu which should be brought down.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
include/linux/cpu.h | 7 +--
|
||||
kernel/cpu.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 122 insertions(+), 4 deletions(-)
|
||||
|
||||
--- a/include/linux/cpu.h
|
||||
+++ b/include/linux/cpu.h
|
||||
@@ -221,9 +221,6 @@ static inline void smpboot_thread_init(v
|
||||
#endif /* CONFIG_SMP */
|
||||
extern struct bus_type cpu_subsys;
|
||||
|
||||
-static inline void pin_current_cpu(void) { }
|
||||
-static inline void unpin_current_cpu(void) { }
|
||||
-
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/* Stop CPUs going up and down. */
|
||||
|
||||
@@ -234,6 +231,8 @@ extern bool try_get_online_cpus(void);
|
||||
extern void put_online_cpus(void);
|
||||
extern void cpu_hotplug_disable(void);
|
||||
extern void cpu_hotplug_enable(void);
|
||||
+extern void pin_current_cpu(void);
|
||||
+extern void unpin_current_cpu(void);
|
||||
#define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
|
||||
#define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri)
|
||||
#define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
|
||||
@@ -252,6 +251,8 @@ static inline void cpu_hotplug_done(void
|
||||
#define put_online_cpus() do { } while (0)
|
||||
#define cpu_hotplug_disable() do { } while (0)
|
||||
#define cpu_hotplug_enable() do { } while (0)
|
||||
+static inline void pin_current_cpu(void) { }
|
||||
+static inline void unpin_current_cpu(void) { }
|
||||
#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
||||
#define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
|
||||
/* These aren't inline functions due to a GCC bug. */
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -88,6 +88,100 @@ static struct {
|
||||
#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
|
||||
#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
|
||||
|
||||
+struct hotplug_pcp {
|
||||
+ struct task_struct *unplug;
|
||||
+ int refcount;
|
||||
+ struct completion synced;
|
||||
+};
|
||||
+
|
||||
+static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
|
||||
+
|
||||
+/**
|
||||
+ * pin_current_cpu - Prevent the current cpu from being unplugged
|
||||
+ *
|
||||
+ * Lightweight version of get_online_cpus() to prevent cpu from being
|
||||
+ * unplugged when code runs in a migration disabled region.
|
||||
+ *
|
||||
+ * Must be called with preemption disabled (preempt_count = 1)!
|
||||
+ */
|
||||
+void pin_current_cpu(void)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
|
||||
+
|
||||
+retry:
|
||||
+ if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
|
||||
+ hp->unplug == current) {
|
||||
+ hp->refcount++;
|
||||
+ return;
|
||||
+ }
|
||||
+ preempt_enable();
|
||||
+ mutex_lock(&cpu_hotplug.lock);
|
||||
+ mutex_unlock(&cpu_hotplug.lock);
|
||||
+ preempt_disable();
|
||||
+ goto retry;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * unpin_current_cpu - Allow unplug of current cpu
|
||||
+ *
|
||||
+ * Must be called with preemption or interrupts disabled!
|
||||
+ */
|
||||
+void unpin_current_cpu(void)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp);
|
||||
+
|
||||
+ WARN_ON(hp->refcount <= 0);
|
||||
+
|
||||
+ /* This is safe. sync_unplug_thread is pinned to this cpu */
|
||||
+ if (!--hp->refcount && hp->unplug && hp->unplug != current)
|
||||
+ wake_up_process(hp->unplug);
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * FIXME: Is this really correct under all circumstances ?
|
||||
+ */
|
||||
+static int sync_unplug_thread(void *data)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = data;
|
||||
+
|
||||
+ preempt_disable();
|
||||
+ hp->unplug = current;
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ while (hp->refcount) {
|
||||
+ schedule_preempt_disabled();
|
||||
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
+ }
|
||||
+ set_current_state(TASK_RUNNING);
|
||||
+ preempt_enable();
|
||||
+ complete(&hp->synced);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * Start the sync_unplug_thread on the target cpu and wait for it to
|
||||
+ * complete.
|
||||
+ */
|
||||
+static int cpu_unplug_begin(unsigned int cpu)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
+ struct task_struct *tsk;
|
||||
+
|
||||
+ init_completion(&hp->synced);
|
||||
+ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
|
||||
+ if (IS_ERR(tsk))
|
||||
+ return (PTR_ERR(tsk));
|
||||
+ kthread_bind(tsk, cpu);
|
||||
+ wake_up_process(tsk);
|
||||
+ wait_for_completion(&hp->synced);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static void cpu_unplug_done(unsigned int cpu)
|
||||
+{
|
||||
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
|
||||
+
|
||||
+ hp->unplug = NULL;
|
||||
+}
|
||||
|
||||
void get_online_cpus(void)
|
||||
{
|
||||
@@ -349,13 +443,14 @@ static int __ref take_cpu_down(void *_pa
|
||||
/* Requires cpu_add_remove_lock to be held */
|
||||
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
|
||||
{
|
||||
- int err, nr_calls = 0;
|
||||
+ int mycpu, err, nr_calls = 0;
|
||||
void *hcpu = (void *)(long)cpu;
|
||||
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
|
||||
struct take_cpu_down_param tcd_param = {
|
||||
.mod = mod,
|
||||
.hcpu = hcpu,
|
||||
};
|
||||
+ cpumask_var_t cpumask;
|
||||
|
||||
if (num_online_cpus() == 1)
|
||||
return -EBUSY;
|
||||
@@ -363,7 +458,27 @@ static int __ref _cpu_down(unsigned int
|
||||
if (!cpu_online(cpu))
|
||||
return -EINVAL;
|
||||
|
||||
+ /* Move the downtaker off the unplug cpu */
|
||||
+ if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
|
||||
+ return -ENOMEM;
|
||||
+ cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
|
||||
+ set_cpus_allowed_ptr(current, cpumask);
|
||||
+ free_cpumask_var(cpumask);
|
||||
+ preempt_disable();
|
||||
+ mycpu = smp_processor_id();
|
||||
+ if (mycpu == cpu) {
|
||||
+ printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
|
||||
+ preempt_enable();
|
||||
+ return -EBUSY;
|
||||
+ }
|
||||
+ preempt_enable();
|
||||
+
|
||||
cpu_hotplug_begin();
|
||||
+ err = cpu_unplug_begin(cpu);
|
||||
+ if (err) {
|
||||
+ printk("cpu_unplug_begin(%d) failed\n", cpu);
|
||||
+ goto out_cancel;
|
||||
+ }
|
||||
|
||||
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
|
||||
if (err) {
|
||||
@@ -427,6 +542,8 @@ static int __ref _cpu_down(unsigned int
|
||||
check_for_tasks(cpu);
|
||||
|
||||
out_release:
|
||||
+ cpu_unplug_done(cpu);
|
||||
+out_cancel:
|
||||
cpu_hotplug_done();
|
||||
if (!err)
|
||||
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
|
25
debian/patches/features/all/rt/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch
vendored
Normal file
25
debian/patches/features/all/rt/hotplug-sync_unplug-no-27-5cn-27-in-task-name.patch
vendored
Normal file
|
@ -0,0 +1,25 @@
|
|||
Subject: hotplug: sync_unplug: No "\n" in task name
|
||||
From: Yong Zhang <yong.zhang0@gmail.com>
|
||||
Date: Sun, 16 Oct 2011 18:56:43 +0800
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Otherwise the output will look a little odd.
|
||||
|
||||
Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
|
||||
Link: http://lkml.kernel.org/r/1318762607-2261-2-git-send-email-yong.zhang0@gmail.com
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -167,7 +167,7 @@ static int cpu_unplug_begin(unsigned int
|
||||
struct task_struct *tsk;
|
||||
|
||||
init_completion(&hp->synced);
|
||||
- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
|
||||
+ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
|
||||
if (IS_ERR(tsk))
|
||||
return (PTR_ERR(tsk));
|
||||
kthread_bind(tsk, cpu);
|
|
@ -0,0 +1,40 @@
|
|||
Subject: hotplug: Use migrate disable on unplug
|
||||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Sun, 17 Jul 2011 19:35:29 +0200
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Migration needs to be disabled accross the unplug handling to make
|
||||
sure that the unplug thread is off the unplugged cpu.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
---
|
||||
kernel/cpu.c | 6 +++---
|
||||
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
--- a/kernel/cpu.c
|
||||
+++ b/kernel/cpu.c
|
||||
@@ -466,14 +466,13 @@ static int __ref _cpu_down(unsigned int
|
||||
cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
|
||||
set_cpus_allowed_ptr(current, cpumask);
|
||||
free_cpumask_var(cpumask);
|
||||
- preempt_disable();
|
||||
+ migrate_disable();
|
||||
mycpu = smp_processor_id();
|
||||
if (mycpu == cpu) {
|
||||
printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
|
||||
- preempt_enable();
|
||||
+ migrate_enable();
|
||||
return -EBUSY;
|
||||
}
|
||||
- preempt_enable();
|
||||
|
||||
cpu_hotplug_begin();
|
||||
err = cpu_unplug_begin(cpu);
|
||||
@@ -546,6 +545,7 @@ static int __ref _cpu_down(unsigned int
|
||||
out_release:
|
||||
cpu_unplug_done(cpu);
|
||||
out_cancel:
|
||||
+ migrate_enable();
|
||||
cpu_hotplug_done();
|
||||
if (!err)
|
||||
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
|
118
debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch
vendored
Normal file
118
debian/patches/features/all/rt/hrtimer-Move-schedule_work-call-to-helper-thread.patch
vendored
Normal file
|
@ -0,0 +1,118 @@
|
|||
From: Yang Shi <yang.shi@windriver.com>
|
||||
Date: Mon, 16 Sep 2013 14:09:19 -0700
|
||||
Subject: hrtimer: Move schedule_work call to helper thread
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
When run ltp leapsec_timer test, the following call trace is caught:
|
||||
|
||||
BUG: sleeping function called from invalid context at kernel/rtmutex.c:659
|
||||
in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1
|
||||
Preemption disabled at:[<ffffffff810857f3>] cpu_startup_entry+0x133/0x310
|
||||
|
||||
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.10.10-rt3 #2
|
||||
Hardware name: Intel Corporation Calpella platform/MATXM-CORE-411-B, BIOS 4.6.3 08/18/2010
|
||||
ffffffff81c2f800 ffff880076843e40 ffffffff8169918d ffff880076843e58
|
||||
ffffffff8106db31 ffff88007684b4a0 ffff880076843e70 ffffffff8169d9c0
|
||||
ffff88007684b4a0 ffff880076843eb0 ffffffff81059da1 0000001876851200
|
||||
Call Trace:
|
||||
<IRQ> [<ffffffff8169918d>] dump_stack+0x19/0x1b
|
||||
[<ffffffff8106db31>] __might_sleep+0xf1/0x170
|
||||
[<ffffffff8169d9c0>] rt_spin_lock+0x20/0x50
|
||||
[<ffffffff81059da1>] queue_work_on+0x61/0x100
|
||||
[<ffffffff81065aa1>] clock_was_set_delayed+0x21/0x30
|
||||
[<ffffffff810883be>] do_timer+0x40e/0x660
|
||||
[<ffffffff8108f487>] tick_do_update_jiffies64+0xf7/0x140
|
||||
[<ffffffff8108fe42>] tick_check_idle+0x92/0xc0
|
||||
[<ffffffff81044327>] irq_enter+0x57/0x70
|
||||
[<ffffffff816a040e>] smp_apic_timer_interrupt+0x3e/0x9b
|
||||
[<ffffffff8169f80a>] apic_timer_interrupt+0x6a/0x70
|
||||
<EOI> [<ffffffff8155ea1c>] ? cpuidle_enter_state+0x4c/0xc0
|
||||
[<ffffffff8155eb68>] cpuidle_idle_call+0xd8/0x2d0
|
||||
[<ffffffff8100b59e>] arch_cpu_idle+0xe/0x30
|
||||
[<ffffffff8108585e>] cpu_startup_entry+0x19e/0x310
|
||||
[<ffffffff8168efa2>] start_secondary+0x1ad/0x1b0
|
||||
|
||||
The clock_was_set_delayed is called in hard IRQ handler (timer interrupt), which
|
||||
calls schedule_work.
|
||||
|
||||
Under PREEMPT_RT_FULL, schedule_work calls spinlocks which could sleep, so it's
|
||||
not safe to call schedule_work in interrupt context.
|
||||
|
||||
Reference upstream commit b68d61c705ef02384c0538b8d9374545097899ca
|
||||
(rt,ntp: Move call to schedule_delayed_work() to helper thread)
|
||||
from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git, which
|
||||
makes a similar change.
|
||||
|
||||
add a helper thread which does the call to schedule_work and wake up that
|
||||
thread instead of calling schedule_work directly.
|
||||
|
||||
|
||||
Signed-off-by: Yang Shi <yang.shi@windriver.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
kernel/time/hrtimer.c | 40 ++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 40 insertions(+)
|
||||
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -48,6 +48,7 @@
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/sched/deadline.h>
|
||||
#include <linux/timer.h>
|
||||
+#include <linux/kthread.h>
|
||||
#include <linux/freezer.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
@@ -713,6 +714,44 @@ static void clock_was_set_work(struct wo
|
||||
|
||||
static DECLARE_WORK(hrtimer_work, clock_was_set_work);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+/*
|
||||
+ * RT can not call schedule_work from real interrupt context.
|
||||
+ * Need to make a thread to do the real work.
|
||||
+ */
|
||||
+static struct task_struct *clock_set_delay_thread;
|
||||
+static bool do_clock_set_delay;
|
||||
+
|
||||
+static int run_clock_set_delay(void *ignore)
|
||||
+{
|
||||
+ while (!kthread_should_stop()) {
|
||||
+ set_current_state(TASK_INTERRUPTIBLE);
|
||||
+ if (do_clock_set_delay) {
|
||||
+ do_clock_set_delay = false;
|
||||
+ schedule_work(&hrtimer_work);
|
||||
+ }
|
||||
+ schedule();
|
||||
+ }
|
||||
+ __set_current_state(TASK_RUNNING);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+void clock_was_set_delayed(void)
|
||||
+{
|
||||
+ do_clock_set_delay = true;
|
||||
+ /* Make visible before waking up process */
|
||||
+ smp_wmb();
|
||||
+ wake_up_process(clock_set_delay_thread);
|
||||
+}
|
||||
+
|
||||
+static __init int create_clock_set_delay_thread(void)
|
||||
+{
|
||||
+ clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd");
|
||||
+ BUG_ON(!clock_set_delay_thread);
|
||||
+ return 0;
|
||||
+}
|
||||
+early_initcall(create_clock_set_delay_thread);
|
||||
+#else /* PREEMPT_RT_FULL */
|
||||
/*
|
||||
* Called from timekeeping and resume code to reprogramm the hrtimer
|
||||
* interrupt device on all cpus.
|
||||
@@ -721,6 +760,7 @@ void clock_was_set_delayed(void)
|
||||
{
|
||||
schedule_work(&hrtimer_work);
|
||||
}
|
||||
+#endif
|
||||
|
||||
#else
|
||||
|
463
debian/patches/features/all/rt/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch
vendored
Normal file
463
debian/patches/features/all/rt/hrtimer-fixup-hrtimer-callback-changes-for-preempt-r.patch
vendored
Normal file
|
@ -0,0 +1,463 @@
|
|||
From: Thomas Gleixner <tglx@linutronix.de>
|
||||
Date: Fri, 3 Jul 2009 08:44:31 -0500
|
||||
Subject: hrtimer: Fixup hrtimer callback changes for preempt-rt
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
In preempt-rt we can not call the callbacks which take sleeping locks
|
||||
from the timer interrupt context.
|
||||
|
||||
Bring back the softirq split for now, until we fixed the signal
|
||||
delivery problem for real.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
|
||||
---
|
||||
include/linux/hrtimer.h | 3
|
||||
kernel/sched/core.c | 1
|
||||
kernel/sched/rt.c | 1
|
||||
kernel/time/hrtimer.c | 219 +++++++++++++++++++++++++++++++++++++++++------
|
||||
kernel/time/tick-sched.c | 1
|
||||
kernel/watchdog.c | 1
|
||||
6 files changed, 200 insertions(+), 26 deletions(-)
|
||||
|
||||
--- a/include/linux/hrtimer.h
|
||||
+++ b/include/linux/hrtimer.h
|
||||
@@ -111,6 +111,8 @@ struct hrtimer {
|
||||
enum hrtimer_restart (*function)(struct hrtimer *);
|
||||
struct hrtimer_clock_base *base;
|
||||
unsigned long state;
|
||||
+ struct list_head cb_entry;
|
||||
+ int irqsafe;
|
||||
#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
|
||||
ktime_t praecox;
|
||||
#endif
|
||||
@@ -150,6 +152,7 @@ struct hrtimer_clock_base {
|
||||
int index;
|
||||
clockid_t clockid;
|
||||
struct timerqueue_head active;
|
||||
+ struct list_head expired;
|
||||
ktime_t resolution;
|
||||
ktime_t (*get_time)(void);
|
||||
ktime_t softirq_time;
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -461,6 +461,7 @@ static void init_rq_hrtick(struct rq *rq
|
||||
|
||||
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
rq->hrtick_timer.function = hrtick;
|
||||
+ rq->hrtick_timer.irqsafe = 1;
|
||||
}
|
||||
#else /* CONFIG_SCHED_HRTICK */
|
||||
static inline void hrtick_clear(struct rq *rq)
|
||||
--- a/kernel/sched/rt.c
|
||||
+++ b/kernel/sched/rt.c
|
||||
@@ -44,6 +44,7 @@ void init_rt_bandwidth(struct rt_bandwid
|
||||
|
||||
hrtimer_init(&rt_b->rt_period_timer,
|
||||
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
+ rt_b->rt_period_timer.irqsafe = 1;
|
||||
rt_b->rt_period_timer.function = sched_rt_period_timer;
|
||||
}
|
||||
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -577,8 +577,7 @@ static int hrtimer_reprogram(struct hrti
|
||||
* When the callback is running, we do not reprogram the clock event
|
||||
* device. The timer callback is either running on a different CPU or
|
||||
* the callback is executed in the hrtimer_interrupt context. The
|
||||
- * reprogramming is handled either by the softirq, which called the
|
||||
- * callback or at the end of the hrtimer_interrupt.
|
||||
+ * reprogramming is handled at the end of the hrtimer_interrupt.
|
||||
*/
|
||||
if (hrtimer_callback_running(timer))
|
||||
return 0;
|
||||
@@ -622,6 +621,9 @@ static int hrtimer_reprogram(struct hrti
|
||||
return res;
|
||||
}
|
||||
|
||||
+static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
|
||||
+static int hrtimer_rt_defer(struct hrtimer *timer);
|
||||
+
|
||||
/*
|
||||
* Initialize the high resolution related parts of cpu_base
|
||||
*/
|
||||
@@ -631,6 +633,21 @@ static inline void hrtimer_init_hres(str
|
||||
base->hres_active = 0;
|
||||
}
|
||||
|
||||
+static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
+ struct hrtimer_clock_base *base,
|
||||
+ int wakeup)
|
||||
+{
|
||||
+ if (!hrtimer_reprogram(timer, base))
|
||||
+ return 0;
|
||||
+ if (!wakeup)
|
||||
+ return -ETIME;
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ if (!hrtimer_rt_defer(timer))
|
||||
+ return -ETIME;
|
||||
+#endif
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
|
||||
{
|
||||
ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
|
||||
@@ -712,6 +729,13 @@ static inline int hrtimer_is_hres_enable
|
||||
static inline int hrtimer_switch_to_hres(void) { return 0; }
|
||||
static inline void
|
||||
hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { }
|
||||
+static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
+ struct hrtimer_clock_base *base,
|
||||
+ int wakeup)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static inline int hrtimer_reprogram(struct hrtimer *timer,
|
||||
struct hrtimer_clock_base *base)
|
||||
{
|
||||
@@ -719,7 +743,6 @@ static inline int hrtimer_reprogram(stru
|
||||
}
|
||||
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
|
||||
static inline void retrigger_next_event(void *arg) { }
|
||||
-
|
||||
#endif /* CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
/*
|
||||
@@ -854,9 +877,9 @@ void hrtimer_wait_for_timer(const struct
|
||||
{
|
||||
struct hrtimer_clock_base *base = timer->base;
|
||||
|
||||
- if (base && base->cpu_base && !hrtimer_hres_active())
|
||||
+ if (base && base->cpu_base && !timer->irqsafe)
|
||||
wait_event(base->cpu_base->wait,
|
||||
- !(timer->state & HRTIMER_STATE_CALLBACK));
|
||||
+ !(timer->state & HRTIMER_STATE_CALLBACK));
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -906,6 +929,11 @@ static void __remove_hrtimer(struct hrti
|
||||
if (!(timer->state & HRTIMER_STATE_ENQUEUED))
|
||||
goto out;
|
||||
|
||||
+ if (unlikely(!list_empty(&timer->cb_entry))) {
|
||||
+ list_del_init(&timer->cb_entry);
|
||||
+ goto out;
|
||||
+ }
|
||||
+
|
||||
next_timer = timerqueue_getnext(&base->active);
|
||||
timerqueue_del(&base->active, &timer->node);
|
||||
if (&timer->node == next_timer) {
|
||||
@@ -1016,15 +1044,26 @@ int __hrtimer_start_range_ns(struct hrti
|
||||
* on dynticks target.
|
||||
*/
|
||||
wake_up_nohz_cpu(new_base->cpu_base->cpu);
|
||||
- } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases) &&
|
||||
- hrtimer_reprogram(timer, new_base)) {
|
||||
+ } else if (new_base->cpu_base == this_cpu_ptr(&hrtimer_bases)) {
|
||||
+
|
||||
+ ret = hrtimer_enqueue_reprogram(timer, new_base, wakeup);
|
||||
+ if (ret < 0) {
|
||||
+ /*
|
||||
+ * In case we failed to reprogram the timer (mostly
|
||||
+ * because out current timer is already elapsed),
|
||||
+ * remove it again and report a failure. This avoids
|
||||
+ * stale base->first entries.
|
||||
+ */
|
||||
+ debug_deactivate(timer);
|
||||
+ __remove_hrtimer(timer, new_base,
|
||||
+ timer->state & HRTIMER_STATE_CALLBACK, 0);
|
||||
+ } else if (ret > 0) {
|
||||
/*
|
||||
* Only allow reprogramming if the new base is on this CPU.
|
||||
* (it might still be on another CPU if the timer was pending)
|
||||
*
|
||||
* XXX send_remote_softirq() ?
|
||||
*/
|
||||
- if (wakeup) {
|
||||
/*
|
||||
* We need to drop cpu_base->lock to avoid a
|
||||
* lock ordering issue vs. rq->lock.
|
||||
@@ -1032,9 +1071,7 @@ int __hrtimer_start_range_ns(struct hrti
|
||||
raw_spin_unlock(&new_base->cpu_base->lock);
|
||||
raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
local_irq_restore(flags);
|
||||
- return ret;
|
||||
- } else {
|
||||
- __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
+ return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1189,6 +1226,7 @@ static void __hrtimer_init(struct hrtime
|
||||
|
||||
base = hrtimer_clockid_to_base(clock_id);
|
||||
timer->base = &cpu_base->clock_base[base];
|
||||
+ INIT_LIST_HEAD(&timer->cb_entry);
|
||||
timerqueue_init(&timer->node);
|
||||
|
||||
#ifdef CONFIG_TIMER_STATS
|
||||
@@ -1272,10 +1310,128 @@ static void __run_hrtimer(struct hrtimer
|
||||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
-
|
||||
static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
|
||||
+ struct hrtimer_clock_base *base)
|
||||
+{
|
||||
+ /*
|
||||
+ * Note, we clear the callback flag before we requeue the
|
||||
+ * timer otherwise we trigger the callback_running() check
|
||||
+ * in hrtimer_reprogram().
|
||||
+ */
|
||||
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
+
|
||||
+ if (restart != HRTIMER_NORESTART) {
|
||||
+ BUG_ON(hrtimer_active(timer));
|
||||
+ /*
|
||||
+ * Enqueue the timer, if it's the leftmost timer then
|
||||
+ * we need to reprogram it.
|
||||
+ */
|
||||
+ if (!enqueue_hrtimer(timer, base))
|
||||
+ return;
|
||||
+
|
||||
+#ifndef CONFIG_HIGH_RES_TIMERS
|
||||
+ }
|
||||
+#else
|
||||
+ if (base->cpu_base->hres_active &&
|
||||
+ hrtimer_reprogram(timer, base))
|
||||
+ goto requeue;
|
||||
+
|
||||
+ } else if (hrtimer_active(timer)) {
|
||||
+ /*
|
||||
+ * If the timer was rearmed on another CPU, reprogram
|
||||
+ * the event device.
|
||||
+ */
|
||||
+ if (&timer->node == base->active.next &&
|
||||
+ base->cpu_base->hres_active &&
|
||||
+ hrtimer_reprogram(timer, base))
|
||||
+ goto requeue;
|
||||
+ }
|
||||
+ return;
|
||||
+
|
||||
+requeue:
|
||||
+ /*
|
||||
+ * Timer is expired. Thus move it from tree to pending list
|
||||
+ * again.
|
||||
+ */
|
||||
+ __remove_hrtimer(timer, base, timer->state, 0);
|
||||
+ list_add_tail(&timer->cb_entry, &base->expired);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+ * The changes in mainline which removed the callback modes from
|
||||
+ * hrtimer are not yet working with -rt. The non wakeup_process()
|
||||
+ * based callbacks which involve sleeping locks need to be treated
|
||||
+ * seperately.
|
||||
+ */
|
||||
+static void hrtimer_rt_run_pending(void)
|
||||
+{
|
||||
+ enum hrtimer_restart (*fn)(struct hrtimer *);
|
||||
+ struct hrtimer_cpu_base *cpu_base;
|
||||
+ struct hrtimer_clock_base *base;
|
||||
+ struct hrtimer *timer;
|
||||
+ int index, restart;
|
||||
+
|
||||
+ local_irq_disable();
|
||||
+ cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
|
||||
+
|
||||
+ raw_spin_lock(&cpu_base->lock);
|
||||
+
|
||||
+ for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
|
||||
+ base = &cpu_base->clock_base[index];
|
||||
+
|
||||
+ while (!list_empty(&base->expired)) {
|
||||
+ timer = list_first_entry(&base->expired,
|
||||
+ struct hrtimer, cb_entry);
|
||||
+
|
||||
+ /*
|
||||
+ * Same as the above __run_hrtimer function
|
||||
+ * just we run with interrupts enabled.
|
||||
+ */
|
||||
+ debug_hrtimer_deactivate(timer);
|
||||
+ __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
|
||||
+ timer_stats_account_hrtimer(timer);
|
||||
+ fn = timer->function;
|
||||
+
|
||||
+ raw_spin_unlock_irq(&cpu_base->lock);
|
||||
+ restart = fn(timer);
|
||||
+ raw_spin_lock_irq(&cpu_base->lock);
|
||||
+
|
||||
+ hrtimer_rt_reprogram(restart, timer, base);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ raw_spin_unlock_irq(&cpu_base->lock);
|
||||
+
|
||||
+ wake_up_timer_waiters(cpu_base);
|
||||
+}
|
||||
+
|
||||
+static int hrtimer_rt_defer(struct hrtimer *timer)
|
||||
+{
|
||||
+ if (timer->irqsafe)
|
||||
+ return 0;
|
||||
+
|
||||
+ __remove_hrtimer(timer, timer->base, timer->state, 0);
|
||||
+ list_add_tail(&timer->cb_entry, &timer->base->expired);
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+static inline void hrtimer_rt_run_pending(void)
|
||||
+{
|
||||
+ hrtimer_peek_ahead_timers();
|
||||
+}
|
||||
+
|
||||
+static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
+
|
||||
/*
|
||||
* High resolution timer interrupt
|
||||
* Called with interrupts disabled
|
||||
@@ -1284,7 +1440,7 @@ void hrtimer_interrupt(struct clock_even
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
|
||||
ktime_t expires_next, now, entry_time, delta;
|
||||
- int i, retries = 0;
|
||||
+ int i, retries = 0, raise = 0;
|
||||
|
||||
BUG_ON(!cpu_base->hres_active);
|
||||
cpu_base->nr_events++;
|
||||
@@ -1343,7 +1499,10 @@ void hrtimer_interrupt(struct clock_even
|
||||
if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer))
|
||||
break;
|
||||
|
||||
- __run_hrtimer(timer, &basenow);
|
||||
+ if (!hrtimer_rt_defer(timer))
|
||||
+ __run_hrtimer(timer, &basenow);
|
||||
+ else
|
||||
+ raise = 1;
|
||||
}
|
||||
}
|
||||
/* Reevaluate the clock bases for the next expiry */
|
||||
@@ -1360,6 +1519,10 @@ void hrtimer_interrupt(struct clock_even
|
||||
if (expires_next.tv64 == KTIME_MAX ||
|
||||
!tick_program_event(expires_next, 0)) {
|
||||
cpu_base->hang_detected = 0;
|
||||
+
|
||||
+ if (raise)
|
||||
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
+
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1439,18 +1602,18 @@ void hrtimer_peek_ahead_timers(void)
|
||||
__hrtimer_peek_ahead_timers();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
-
|
||||
-static void run_hrtimer_softirq(struct softirq_action *h)
|
||||
-{
|
||||
- hrtimer_peek_ahead_timers();
|
||||
-}
|
||||
-
|
||||
#else /* CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
static inline void __hrtimer_peek_ahead_timers(void) { }
|
||||
|
||||
#endif /* !CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
+
|
||||
+static void run_hrtimer_softirq(struct softirq_action *h)
|
||||
+{
|
||||
+ hrtimer_rt_run_pending();
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Called from timer softirq every jiffy, expire hrtimers:
|
||||
*
|
||||
@@ -1483,7 +1646,7 @@ void hrtimer_run_queues(void)
|
||||
struct timerqueue_node *node;
|
||||
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
|
||||
struct hrtimer_clock_base *base;
|
||||
- int index, gettime = 1;
|
||||
+ int index, gettime = 1, raise = 0;
|
||||
|
||||
if (hrtimer_hres_active())
|
||||
return;
|
||||
@@ -1508,12 +1671,16 @@ void hrtimer_run_queues(void)
|
||||
hrtimer_get_expires_tv64(timer))
|
||||
break;
|
||||
|
||||
- __run_hrtimer(timer, &base->softirq_time);
|
||||
+ if (!hrtimer_rt_defer(timer))
|
||||
+ __run_hrtimer(timer, &base->softirq_time);
|
||||
+ else
|
||||
+ raise = 1;
|
||||
}
|
||||
raw_spin_unlock(&cpu_base->lock);
|
||||
}
|
||||
|
||||
- wake_up_timer_waiters(cpu_base);
|
||||
+ if (raise)
|
||||
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1535,6 +1702,7 @@ static enum hrtimer_restart hrtimer_wake
|
||||
void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
|
||||
{
|
||||
sl->timer.function = hrtimer_wakeup;
|
||||
+ sl->timer.irqsafe = 1;
|
||||
sl->task = task;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
|
||||
@@ -1671,6 +1839,7 @@ static void init_hrtimers_cpu(int cpu)
|
||||
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
|
||||
cpu_base->clock_base[i].cpu_base = cpu_base;
|
||||
timerqueue_init_head(&cpu_base->clock_base[i].active);
|
||||
+ INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
|
||||
}
|
||||
|
||||
cpu_base->cpu = cpu;
|
||||
@@ -1783,9 +1952,7 @@ void __init hrtimers_init(void)
|
||||
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
|
||||
(void *)(long)smp_processor_id());
|
||||
register_cpu_notifier(&hrtimers_nb);
|
||||
-#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
|
||||
-#endif
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/kernel/time/tick-sched.c
|
||||
+++ b/kernel/time/tick-sched.c
|
||||
@@ -1159,6 +1159,7 @@ void tick_setup_sched_timer(void)
|
||||
* Emulate tick processing via per-CPU hrtimers:
|
||||
*/
|
||||
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
+ ts->sched_timer.irqsafe = 1;
|
||||
ts->sched_timer.function = tick_sched_timer;
|
||||
|
||||
/* Get the next period (per cpu) */
|
||||
--- a/kernel/watchdog.c
|
||||
+++ b/kernel/watchdog.c
|
||||
@@ -454,6 +454,7 @@ static void watchdog_enable(unsigned int
|
||||
/* kick off the timer for the hardlockup detector */
|
||||
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hrtimer->function = watchdog_timer_fn;
|
||||
+ hrtimer->irqsafe = 1;
|
||||
|
||||
/* Enable the perf event */
|
||||
watchdog_nmi_enable(cpu);
|
38
debian/patches/features/all/rt/hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch
vendored
Normal file
38
debian/patches/features/all/rt/hrtimer-raise-softirq-if-hrtimer-irq-stalled.patch
vendored
Normal file
|
@ -0,0 +1,38 @@
|
|||
Subject: hrtimer: Raise softirq if hrtimer irq stalled
|
||||
From: Watanabe <shunsuke.watanabe@tel.com>
|
||||
Date: Sun, 28 Oct 2012 11:13:44 +0100
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
When the hrtimer stall detection hits the softirq is not raised.
|
||||
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
kernel/time/hrtimer.c | 9 ++++-----
|
||||
1 file changed, 4 insertions(+), 5 deletions(-)
|
||||
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -1519,11 +1519,7 @@ void hrtimer_interrupt(struct clock_even
|
||||
if (expires_next.tv64 == KTIME_MAX ||
|
||||
!tick_program_event(expires_next, 0)) {
|
||||
cpu_base->hang_detected = 0;
|
||||
-
|
||||
- if (raise)
|
||||
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
-
|
||||
- return;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1567,6 +1563,9 @@ void hrtimer_interrupt(struct clock_even
|
||||
tick_program_event(expires_next, 1);
|
||||
printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
|
||||
ktime_to_ns(delta));
|
||||
+out:
|
||||
+ if (raise)
|
||||
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
}
|
||||
|
||||
/*
|
|
@ -0,0 +1,196 @@
|
|||
From: Ingo Molnar <mingo@elte.hu>
|
||||
Date: Fri, 3 Jul 2009 08:29:34 -0500
|
||||
Subject: hrtimers: Prepare full preemption
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
Make cancellation of a running callback in softirq context safe
|
||||
against preemption.
|
||||
|
||||
Signed-off-by: Ingo Molnar <mingo@elte.hu>
|
||||
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||||
|
||||
---
|
||||
include/linux/hrtimer.h | 10 ++++++++++
|
||||
kernel/time/hrtimer.c | 33 ++++++++++++++++++++++++++++++++-
|
||||
kernel/time/itimer.c | 1 +
|
||||
kernel/time/posix-timers.c | 33 +++++++++++++++++++++++++++++++++
|
||||
4 files changed, 76 insertions(+), 1 deletion(-)
|
||||
|
||||
--- a/include/linux/hrtimer.h
|
||||
+++ b/include/linux/hrtimer.h
|
||||
@@ -197,6 +197,9 @@ struct hrtimer_cpu_base {
|
||||
unsigned long nr_hangs;
|
||||
ktime_t max_hang_time;
|
||||
#endif
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ wait_queue_head_t wait;
|
||||
+#endif
|
||||
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
|
||||
};
|
||||
|
||||
@@ -384,6 +387,13 @@ static inline int hrtimer_restart(struct
|
||||
return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
|
||||
}
|
||||
|
||||
+/* Softirq preemption could deadlock timer removal */
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
|
||||
+#else
|
||||
+# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
|
||||
+#endif
|
||||
+
|
||||
/* Query timers: */
|
||||
extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
|
||||
extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
|
||||
--- a/kernel/time/hrtimer.c
|
||||
+++ b/kernel/time/hrtimer.c
|
||||
@@ -837,6 +837,32 @@ u64 hrtimer_forward(struct hrtimer *time
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_forward);
|
||||
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
|
||||
+
|
||||
+/**
|
||||
+ * hrtimer_wait_for_timer - Wait for a running timer
|
||||
+ *
|
||||
+ * @timer: timer to wait for
|
||||
+ *
|
||||
+ * The function waits in case the timers callback function is
|
||||
+ * currently executed on the waitqueue of the timer base. The
|
||||
+ * waitqueue is woken up after the timer callback function has
|
||||
+ * finished execution.
|
||||
+ */
|
||||
+void hrtimer_wait_for_timer(const struct hrtimer *timer)
|
||||
+{
|
||||
+ struct hrtimer_clock_base *base = timer->base;
|
||||
+
|
||||
+ if (base && base->cpu_base && !hrtimer_hres_active())
|
||||
+ wait_event(base->cpu_base->wait,
|
||||
+ !(timer->state & HRTIMER_STATE_CALLBACK));
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+# define wake_up_timer_waiters(b) do { } while (0)
|
||||
+#endif
|
||||
+
|
||||
/*
|
||||
* enqueue_hrtimer - internal function to (re)start a timer
|
||||
*
|
||||
@@ -1099,7 +1125,7 @@ int hrtimer_cancel(struct hrtimer *timer
|
||||
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
- cpu_relax();
|
||||
+ hrtimer_wait_for_timer(timer);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(hrtimer_cancel);
|
||||
@@ -1486,6 +1512,8 @@ void hrtimer_run_queues(void)
|
||||
}
|
||||
raw_spin_unlock(&cpu_base->lock);
|
||||
}
|
||||
+
|
||||
+ wake_up_timer_waiters(cpu_base);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1647,6 +1675,9 @@ static void init_hrtimers_cpu(int cpu)
|
||||
|
||||
cpu_base->cpu = cpu;
|
||||
hrtimer_init_hres(cpu_base);
|
||||
+#ifdef CONFIG_PREEMPT_RT_BASE
|
||||
+ init_waitqueue_head(&cpu_base->wait);
|
||||
+#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
--- a/kernel/time/itimer.c
|
||||
+++ b/kernel/time/itimer.c
|
||||
@@ -213,6 +213,7 @@ int do_setitimer(int which, struct itime
|
||||
/* We are sharing ->siglock with it_real_fn() */
|
||||
if (hrtimer_try_to_cancel(timer) < 0) {
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
+ hrtimer_wait_for_timer(&tsk->signal->real_timer);
|
||||
goto again;
|
||||
}
|
||||
expires = timeval_to_ktime(value->it_value);
|
||||
--- a/kernel/time/posix-timers.c
|
||||
+++ b/kernel/time/posix-timers.c
|
||||
@@ -821,6 +821,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_
|
||||
return overrun;
|
||||
}
|
||||
|
||||
+/*
|
||||
+ * Protected by RCU!
|
||||
+ */
|
||||
+static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
|
||||
+{
|
||||
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||||
+ if (kc->timer_set == common_timer_set)
|
||||
+ hrtimer_wait_for_timer(&timr->it.real.timer);
|
||||
+ else
|
||||
+ /* FIXME: Whacky hack for posix-cpu-timers */
|
||||
+ schedule_timeout(1);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
/* Set a POSIX.1b interval timer. */
|
||||
/* timr->it_lock is taken. */
|
||||
static int
|
||||
@@ -898,6 +912,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t,
|
||||
if (!timr)
|
||||
return -EINVAL;
|
||||
|
||||
+ rcu_read_lock();
|
||||
kc = clockid_to_kclock(timr->it_clock);
|
||||
if (WARN_ON_ONCE(!kc || !kc->timer_set))
|
||||
error = -EINVAL;
|
||||
@@ -906,9 +921,12 @@ SYSCALL_DEFINE4(timer_settime, timer_t,
|
||||
|
||||
unlock_timer(timr, flag);
|
||||
if (error == TIMER_RETRY) {
|
||||
+ timer_wait_for_callback(kc, timr);
|
||||
rtn = NULL; // We already got the old time...
|
||||
+ rcu_read_unlock();
|
||||
goto retry;
|
||||
}
|
||||
+ rcu_read_unlock();
|
||||
|
||||
if (old_setting && !error &&
|
||||
copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
|
||||
@@ -946,10 +964,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, t
|
||||
if (!timer)
|
||||
return -EINVAL;
|
||||
|
||||
+ rcu_read_lock();
|
||||
if (timer_delete_hook(timer) == TIMER_RETRY) {
|
||||
unlock_timer(timer, flags);
|
||||
+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
|
||||
+ timer);
|
||||
+ rcu_read_unlock();
|
||||
goto retry_delete;
|
||||
}
|
||||
+ rcu_read_unlock();
|
||||
|
||||
spin_lock(¤t->sighand->siglock);
|
||||
list_del(&timer->list);
|
||||
@@ -975,8 +998,18 @@ static void itimer_delete(struct k_itime
|
||||
retry_delete:
|
||||
spin_lock_irqsave(&timer->it_lock, flags);
|
||||
|
||||
+ /* On RT we can race with a deletion */
|
||||
+ if (!timer->it_signal) {
|
||||
+ unlock_timer(timer, flags);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (timer_delete_hook(timer) == TIMER_RETRY) {
|
||||
+ rcu_read_lock();
|
||||
unlock_timer(timer, flags);
|
||||
+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
|
||||
+ timer);
|
||||
+ rcu_read_unlock();
|
||||
goto retry_delete;
|
||||
}
|
||||
list_del(&timer->list);
|
26
debian/patches/features/all/rt/hwlat-detector-Don-t-ignore-threshold-module-paramet.patch
vendored
Normal file
26
debian/patches/features/all/rt/hwlat-detector-Don-t-ignore-threshold-module-paramet.patch
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
From: Mike Galbraith <bitbucket@online.de>
|
||||
Date: Fri, 30 Aug 2013 07:57:25 +0200
|
||||
Subject: hwlat-detector: Don't ignore threshold module parameter
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
If the user specified a threshold at module load time, use it.
|
||||
|
||||
|
||||
Acked-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Mike Galbraith <bitbucket@online.de>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/misc/hwlat_detector.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
--- a/drivers/misc/hwlat_detector.c
|
||||
+++ b/drivers/misc/hwlat_detector.c
|
||||
@@ -414,7 +414,7 @@ static int init_stats(void)
|
||||
goto out;
|
||||
|
||||
__reset_stats();
|
||||
- data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */
|
||||
+ data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */
|
||||
data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
|
||||
data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */
|
||||
|
126
debian/patches/features/all/rt/hwlat-detector-Update-hwlat_detector-to-add-outer-lo.patch
vendored
Normal file
126
debian/patches/features/all/rt/hwlat-detector-Update-hwlat_detector-to-add-outer-lo.patch
vendored
Normal file
|
@ -0,0 +1,126 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Mon, 19 Aug 2013 17:33:25 -0400
|
||||
Subject: hwlat-detector: Update hwlat_detector to add outer loop detection
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
The hwlat_detector reads two timestamps in a row, then reports any
|
||||
gap between those calls. The problem is, it misses everything between
|
||||
the second reading of the time stamp to the first reading of the time stamp
|
||||
in the next loop. That's were most of the time is spent, which means,
|
||||
chances are likely that it will miss all hardware latencies. This
|
||||
defeats the purpose.
|
||||
|
||||
By also testing the first time stamp from the previous loop second
|
||||
time stamp (the outer loop), we are more likely to find a latency.
|
||||
|
||||
Setting the threshold to 1, here's what the report now looks like:
|
||||
|
||||
1347415723.0232202770 0 2
|
||||
1347415725.0234202822 0 2
|
||||
1347415727.0236202875 0 2
|
||||
1347415729.0238202928 0 2
|
||||
1347415731.0240202980 0 2
|
||||
1347415734.0243203061 0 2
|
||||
1347415736.0245203113 0 2
|
||||
1347415738.0247203166 2 0
|
||||
1347415740.0249203219 0 3
|
||||
1347415742.0251203272 0 3
|
||||
1347415743.0252203299 0 3
|
||||
1347415745.0254203351 0 2
|
||||
1347415747.0256203404 0 2
|
||||
1347415749.0258203457 0 2
|
||||
1347415751.0260203510 0 2
|
||||
1347415754.0263203589 0 2
|
||||
1347415756.0265203642 0 2
|
||||
1347415758.0267203695 0 2
|
||||
1347415760.0269203748 0 2
|
||||
1347415762.0271203801 0 2
|
||||
1347415764.0273203853 2 0
|
||||
|
||||
There's some hardware latency that takes 2 microseconds to run.
|
||||
|
||||
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/misc/hwlat_detector.c | 32 ++++++++++++++++++++++++++------
|
||||
1 file changed, 26 insertions(+), 6 deletions(-)
|
||||
|
||||
--- a/drivers/misc/hwlat_detector.c
|
||||
+++ b/drivers/misc/hwlat_detector.c
|
||||
@@ -143,6 +143,7 @@ static void detector_exit(void);
|
||||
struct sample {
|
||||
u64 seqnum; /* unique sequence */
|
||||
u64 duration; /* ktime delta */
|
||||
+ u64 outer_duration; /* ktime delta (outer loop) */
|
||||
struct timespec timestamp; /* wall time */
|
||||
unsigned long lost;
|
||||
};
|
||||
@@ -219,11 +220,13 @@ static struct sample *buffer_get_sample(
|
||||
*/
|
||||
static int get_sample(void *unused)
|
||||
{
|
||||
- ktime_t start, t1, t2;
|
||||
+ ktime_t start, t1, t2, last_t2;
|
||||
s64 diff, total = 0;
|
||||
u64 sample = 0;
|
||||
+ u64 outer_sample = 0;
|
||||
int ret = 1;
|
||||
|
||||
+ last_t2.tv64 = 0;
|
||||
start = ktime_get(); /* start timestamp */
|
||||
|
||||
do {
|
||||
@@ -231,7 +234,22 @@ static int get_sample(void *unused)
|
||||
t1 = ktime_get(); /* we'll look for a discontinuity */
|
||||
t2 = ktime_get();
|
||||
|
||||
+ if (last_t2.tv64) {
|
||||
+ /* Check the delta from outer loop (t2 to next t1) */
|
||||
+ diff = ktime_to_us(ktime_sub(t1, last_t2));
|
||||
+ /* This shouldn't happen */
|
||||
+ if (diff < 0) {
|
||||
+ pr_err(BANNER "time running backwards\n");
|
||||
+ goto out;
|
||||
+ }
|
||||
+ if (diff > outer_sample)
|
||||
+ outer_sample = diff;
|
||||
+ }
|
||||
+ last_t2 = t2;
|
||||
+
|
||||
total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
|
||||
+
|
||||
+ /* This checks the inner loop (t1 to t2) */
|
||||
diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */
|
||||
|
||||
/* This shouldn't happen */
|
||||
@@ -246,12 +264,13 @@ static int get_sample(void *unused)
|
||||
} while (total <= data.sample_width);
|
||||
|
||||
/* If we exceed the threshold value, we have found a hardware latency */
|
||||
- if (sample > data.threshold) {
|
||||
+ if (sample > data.threshold || outer_sample > data.threshold) {
|
||||
struct sample s;
|
||||
|
||||
data.count++;
|
||||
s.seqnum = data.count;
|
||||
s.duration = sample;
|
||||
+ s.outer_duration = outer_sample;
|
||||
s.timestamp = CURRENT_TIME;
|
||||
__buffer_add_sample(&s);
|
||||
|
||||
@@ -738,10 +757,11 @@ static ssize_t debug_sample_fread(struct
|
||||
}
|
||||
}
|
||||
|
||||
- len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
|
||||
- sample->timestamp.tv_sec,
|
||||
- sample->timestamp.tv_nsec,
|
||||
- sample->duration);
|
||||
+ len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
|
||||
+ sample->timestamp.tv_sec,
|
||||
+ sample->timestamp.tv_nsec,
|
||||
+ sample->duration,
|
||||
+ sample->outer_duration);
|
||||
|
||||
|
||||
/* handling partial reads is more trouble than it's worth */
|
184
debian/patches/features/all/rt/hwlat-detector-Use-thread-instead-of-stop-machine.patch
vendored
Normal file
184
debian/patches/features/all/rt/hwlat-detector-Use-thread-instead-of-stop-machine.patch
vendored
Normal file
|
@ -0,0 +1,184 @@
|
|||
From: Steven Rostedt <rostedt@goodmis.org>
|
||||
Date: Mon, 19 Aug 2013 17:33:27 -0400
|
||||
Subject: hwlat-detector: Use thread instead of stop machine
|
||||
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.1/patches-4.1.3-rt3.tar.xz
|
||||
|
||||
There's no reason to use stop machine to search for hardware latency.
|
||||
Simply disabling interrupts while running the loop will do enough to
|
||||
check if something comes in that wasn't disabled by interrupts being
|
||||
off, which is exactly what stop machine does.
|
||||
|
||||
Instead of using stop machine, just have the thread disable interrupts
|
||||
while it checks for hardware latency.
|
||||
|
||||
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
||||
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
---
|
||||
drivers/misc/hwlat_detector.c | 60 ++++++++++++++++++------------------------
|
||||
1 file changed, 26 insertions(+), 34 deletions(-)
|
||||
|
||||
--- a/drivers/misc/hwlat_detector.c
|
||||
+++ b/drivers/misc/hwlat_detector.c
|
||||
@@ -41,7 +41,6 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/ring_buffer.h>
|
||||
-#include <linux/stop_machine.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/kthread.h>
|
||||
@@ -107,7 +106,6 @@ struct data; /* Global state */
|
||||
/* Sampling functions */
|
||||
static int __buffer_add_sample(struct sample *sample);
|
||||
static struct sample *buffer_get_sample(struct sample *sample);
|
||||
-static int get_sample(void *unused);
|
||||
|
||||
/* Threading and state */
|
||||
static int kthread_fn(void *unused);
|
||||
@@ -149,7 +147,7 @@ struct sample {
|
||||
unsigned long lost;
|
||||
};
|
||||
|
||||
-/* keep the global state somewhere. Mostly used under stop_machine. */
|
||||
+/* keep the global state somewhere. */
|
||||
static struct data {
|
||||
|
||||
struct mutex lock; /* protect changes */
|
||||
@@ -172,7 +170,7 @@ static struct data {
|
||||
* @sample: The new latency sample value
|
||||
*
|
||||
* This receives a new latency sample and records it in a global ring buffer.
|
||||
- * No additional locking is used in this case - suited for stop_machine use.
|
||||
+ * No additional locking is used in this case.
|
||||
*/
|
||||
static int __buffer_add_sample(struct sample *sample)
|
||||
{
|
||||
@@ -229,18 +227,18 @@ static struct sample *buffer_get_sample(
|
||||
#endif
|
||||
/**
|
||||
* get_sample - sample the CPU TSC and look for likely hardware latencies
|
||||
- * @unused: This is not used but is a part of the stop_machine API
|
||||
*
|
||||
* Used to repeatedly capture the CPU TSC (or similar), looking for potential
|
||||
- * hardware-induced latency. Called under stop_machine, with data.lock held.
|
||||
+ * hardware-induced latency. Called with interrupts disabled and with
|
||||
+ * data.lock held.
|
||||
*/
|
||||
-static int get_sample(void *unused)
|
||||
+static int get_sample(void)
|
||||
{
|
||||
time_type start, t1, t2, last_t2;
|
||||
s64 diff, total = 0;
|
||||
u64 sample = 0;
|
||||
u64 outer_sample = 0;
|
||||
- int ret = 1;
|
||||
+ int ret = -1;
|
||||
|
||||
init_time(last_t2, 0);
|
||||
start = time_get(); /* start timestamp */
|
||||
@@ -279,10 +277,14 @@ static int get_sample(void *unused)
|
||||
|
||||
} while (total <= data.sample_width);
|
||||
|
||||
+ ret = 0;
|
||||
+
|
||||
/* If we exceed the threshold value, we have found a hardware latency */
|
||||
if (sample > data.threshold || outer_sample > data.threshold) {
|
||||
struct sample s;
|
||||
|
||||
+ ret = 1;
|
||||
+
|
||||
data.count++;
|
||||
s.seqnum = data.count;
|
||||
s.duration = sample;
|
||||
@@ -295,7 +297,6 @@ static int get_sample(void *unused)
|
||||
data.max_sample = sample;
|
||||
}
|
||||
|
||||
- ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -305,32 +306,30 @@ static int get_sample(void *unused)
|
||||
* @unused: A required part of the kthread API.
|
||||
*
|
||||
* Used to periodically sample the CPU TSC via a call to get_sample. We
|
||||
- * use stop_machine, whith does (intentionally) introduce latency since we
|
||||
+ * disable interrupts, which does (intentionally) introduce latency since we
|
||||
* need to ensure nothing else might be running (and thus pre-empting).
|
||||
* Obviously this should never be used in production environments.
|
||||
*
|
||||
- * stop_machine will schedule us typically only on CPU0 which is fine for
|
||||
- * almost every real-world hardware latency situation - but we might later
|
||||
- * generalize this if we find there are any actualy systems with alternate
|
||||
- * SMI delivery or other non CPU0 hardware latencies.
|
||||
+ * Currently this runs on which ever CPU it was scheduled on, but most
|
||||
+ * real-worald hardware latency situations occur across several CPUs,
|
||||
+ * but we might later generalize this if we find there are any actualy
|
||||
+ * systems with alternate SMI delivery or other hardware latencies.
|
||||
*/
|
||||
static int kthread_fn(void *unused)
|
||||
{
|
||||
- int err = 0;
|
||||
- u64 interval = 0;
|
||||
+ int ret;
|
||||
+ u64 interval;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
|
||||
mutex_lock(&data.lock);
|
||||
|
||||
- err = stop_machine(get_sample, unused, 0);
|
||||
- if (err) {
|
||||
- /* Houston, we have a problem */
|
||||
- mutex_unlock(&data.lock);
|
||||
- goto err_out;
|
||||
- }
|
||||
+ local_irq_disable();
|
||||
+ ret = get_sample();
|
||||
+ local_irq_enable();
|
||||
|
||||
- wake_up(&data.wq); /* wake up reader(s) */
|
||||
+ if (ret > 0)
|
||||
+ wake_up(&data.wq); /* wake up reader(s) */
|
||||
|
||||
interval = data.sample_window - data.sample_width;
|
||||
do_div(interval, USEC_PER_MSEC); /* modifies interval value */
|
||||
@@ -338,15 +337,10 @@ static int kthread_fn(void *unused)
|
||||
mutex_unlock(&data.lock);
|
||||
|
||||
if (msleep_interruptible(interval))
|
||||
- goto out;
|
||||
+ break;
|
||||
}
|
||||
- goto out;
|
||||
-err_out:
|
||||
- pr_err(BANNER "could not call stop_machine, disabling\n");
|
||||
- enabled = 0;
|
||||
-out:
|
||||
- return err;
|
||||
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -442,8 +436,7 @@ static int init_stats(void)
|
||||
* This function provides a generic read implementation for the global state
|
||||
* "data" structure debugfs filesystem entries. It would be nice to use
|
||||
* simple_attr_read directly, but we need to make sure that the data.lock
|
||||
- * spinlock is held during the actual read (even though we likely won't ever
|
||||
- * actually race here as the updater runs under a stop_machine context).
|
||||
+ * is held during the actual read.
|
||||
*/
|
||||
static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos, const u64 *entry)
|
||||
@@ -478,8 +471,7 @@ static ssize_t simple_data_read(struct f
|
||||
* This function provides a generic write implementation for the global state
|
||||
* "data" structure debugfs filesystem entries. It would be nice to use
|
||||
* simple_attr_write directly, but we need to make sure that the data.lock
|
||||
- * spinlock is held during the actual write (even though we likely won't ever
|
||||
- * actually race here as the updater runs under a stop_machine context).
|
||||
+ * is held during the actual write.
|
||||
*/
|
||||
static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos, u64 *entry)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue