[amd64] bump -rt to 3.2-rc2-rt3

From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>

svn path=/dists/trunk/linux-2.6/; revision=18299
This commit is contained in:
Ben Hutchings 2011-11-20 00:01:55 +00:00
parent 9d96b925c8
commit 7547fa690b
245 changed files with 27682 additions and 19710 deletions

2
debian/changelog vendored
View File

@ -5,7 +5,7 @@ linux-2.6 (3.2~rc2-1~experimental.1) UNRELEASED; urgency=low
* aufs: Update to aufs3.x-rcN-20111114
[ Uwe Kleine-König ]
* [amd64] reenable rt featureset with 3.2-rc1-52e4c2a05-rt2
* [amd64] reenable rt featureset with 3.2-rc2-rt3
-- Ben Hutchings <ben@decadent.org.uk> Mon, 14 Nov 2011 15:21:10 +0000

View File

@ -0,0 +1,27 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 21 Jul 2009 22:54:51 +0200
Subject: acpi: Do not disable interrupts on PREEMPT_RT
Use the local_irq_*_nort() variants.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/include/asm/acpi.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: linux-3.2/arch/x86/include/asm/acpi.h
===================================================================
--- linux-3.2.orig/arch/x86/include/asm/acpi.h
+++ linux-3.2/arch/x86/include/asm/acpi.h
@@ -51,8 +51,8 @@
#define ACPI_ASM_MACROS
#define BREAKPOINT3
-#define ACPI_DISABLE_IRQS() local_irq_disable()
-#define ACPI_ENABLE_IRQS() local_irq_enable()
+#define ACPI_DISABLE_IRQS() local_irq_disable_nort()
+#define ACPI_ENABLE_IRQS() local_irq_enable_nort()
#define ACPI_FLUSH_CPU_CACHE() wbinvd()
int __acpi_acquire_global_lock(unsigned int *lock);

View File

@ -0,0 +1,332 @@
Subject: mm: Fixup all fault handlers to check current->pagefault_disable
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 17 Mar 2011 11:32:28 +0100
Necessary for decoupling pagefault disable from preempt count.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/alpha/mm/fault.c | 2 +-
arch/arm/mm/fault.c | 2 +-
arch/avr32/mm/fault.c | 3 ++-
arch/cris/mm/fault.c | 2 +-
arch/frv/mm/fault.c | 2 +-
arch/ia64/mm/fault.c | 2 +-
arch/m32r/mm/fault.c | 2 +-
arch/m68k/mm/fault.c | 2 +-
arch/microblaze/mm/fault.c | 2 +-
arch/mips/mm/fault.c | 2 +-
arch/mn10300/mm/fault.c | 2 +-
arch/parisc/mm/fault.c | 2 +-
arch/powerpc/mm/fault.c | 2 +-
arch/s390/mm/fault.c | 6 ++++--
arch/score/mm/fault.c | 2 +-
arch/sh/mm/fault_32.c | 2 +-
arch/sparc/mm/fault_32.c | 4 ++--
arch/sparc/mm/fault_64.c | 2 +-
arch/tile/mm/fault.c | 2 +-
arch/um/kernel/trap.c | 2 +-
arch/x86/mm/fault.c | 2 +-
arch/xtensa/mm/fault.c | 2 +-
22 files changed, 27 insertions(+), 24 deletions(-)
Index: linux-3.2/arch/alpha/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/alpha/mm/fault.c
+++ linux-3.2/arch/alpha/mm/fault.c
@@ -107,7 +107,7 @@ do_page_fault(unsigned long address, uns
/* If we're in an interrupt context, or have no user context,
we must not take the fault. */
- if (!mm || in_atomic())
+ if (!mm || in_atomic() || current->pagefault_disabled)
goto no_context;
#ifdef CONFIG_ALPHA_LARGE_VMALLOC
Index: linux-3.2/arch/arm/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/arm/mm/fault.c
+++ linux-3.2/arch/arm/mm/fault.c
@@ -294,7 +294,7 @@ do_page_fault(unsigned long addr, unsign
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto no_context;
/*
Index: linux-3.2/arch/avr32/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/avr32/mm/fault.c
+++ linux-3.2/arch/avr32/mm/fault.c
@@ -81,7 +81,8 @@ asmlinkage void do_page_fault(unsigned l
* If we're in an interrupt or have no user context, we must
* not take the fault...
*/
- if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM))
+ if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM) ||
+ current->pagefault_disabled)
goto no_context;
local_irq_enable();
Index: linux-3.2/arch/cris/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/cris/mm/fault.c
+++ linux-3.2/arch/cris/mm/fault.c
@@ -111,7 +111,7 @@ do_page_fault(unsigned long address, str
* user context, we must not take the fault.
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto no_context;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/frv/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/frv/mm/fault.c
+++ linux-3.2/arch/frv/mm/fault.c
@@ -79,7 +79,7 @@ asmlinkage void do_page_fault(int datamm
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto no_context;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/ia64/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/ia64/mm/fault.c
+++ linux-3.2/arch/ia64/mm/fault.c
@@ -89,7 +89,7 @@ ia64_do_page_fault (unsigned long addres
/*
* If we're in an interrupt or have no user context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto no_context;
#ifdef CONFIG_VIRTUAL_MEM_MAP
Index: linux-3.2/arch/m32r/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/m32r/mm/fault.c
+++ linux-3.2/arch/m32r/mm/fault.c
@@ -115,7 +115,7 @@ asmlinkage void do_page_fault(struct pt_
* If we're in an interrupt or have no user context or are running in an
* atomic region then we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto bad_area_nosemaphore;
/* When running in the kernel we expect faults to occur only to
Index: linux-3.2/arch/m68k/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/m68k/mm/fault.c
+++ linux-3.2/arch/m68k/mm/fault.c
@@ -85,7 +85,7 @@ int do_page_fault(struct pt_regs *regs,
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto no_context;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/microblaze/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/microblaze/mm/fault.c
+++ linux-3.2/arch/microblaze/mm/fault.c
@@ -107,7 +107,7 @@ void do_page_fault(struct pt_regs *regs,
if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11)
is_write = 0;
- if (unlikely(in_atomic() || !mm)) {
+ if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
if (kernel_mode(regs))
goto bad_area_nosemaphore;
Index: linux-3.2/arch/mips/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/mips/mm/fault.c
+++ linux-3.2/arch/mips/mm/fault.c
@@ -88,7 +88,7 @@ asmlinkage void __kprobes do_page_fault(
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto bad_area_nosemaphore;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/mn10300/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/mn10300/mm/fault.c
+++ linux-3.2/arch/mn10300/mm/fault.c
@@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto no_context;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/parisc/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/parisc/mm/fault.c
+++ linux-3.2/arch/parisc/mm/fault.c
@@ -176,7 +176,7 @@ void do_page_fault(struct pt_regs *regs,
unsigned long acc_type;
int fault;
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto no_context;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/powerpc/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/powerpc/mm/fault.c
+++ linux-3.2/arch/powerpc/mm/fault.c
@@ -162,7 +162,7 @@ int __kprobes do_page_fault(struct pt_re
}
#endif
- if (in_atomic() || mm == NULL) {
+ if (in_atomic() || mm == NULL || current->pagefault_disabled) {
if (!user_mode(regs))
return SIGSEGV;
/* in_atomic() in user mode is really bad,
Index: linux-3.2/arch/s390/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/s390/mm/fault.c
+++ linux-3.2/arch/s390/mm/fault.c
@@ -295,7 +295,8 @@ static inline int do_exception(struct pt
* user context.
*/
fault = VM_FAULT_BADCONTEXT;
- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
+ if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm ||
+ tsk->pagefault_disabled))
goto out;
address = trans_exc_code & __FAIL_ADDR_MASK;
@@ -426,7 +427,8 @@ void __kprobes do_asce_exception(struct
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm))
+ if (unlikely(!user_space_fault(trans_exc_code) || in_atomic() || !mm ||
+ current->pagefault_disabled))
goto no_context;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/score/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/score/mm/fault.c
+++ linux-3.2/arch/score/mm/fault.c
@@ -72,7 +72,7 @@ asmlinkage void do_page_fault(struct pt_
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto bad_area_nosemaphore;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/sh/mm/fault_32.c
===================================================================
--- linux-3.2.orig/arch/sh/mm/fault_32.c
+++ linux-3.2/arch/sh/mm/fault_32.c
@@ -166,7 +166,7 @@ asmlinkage void __kprobes do_page_fault(
* If we're in an interrupt, have no user context or are running
* in an atomic region then we must not take the fault:
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto no_context;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/sparc/mm/fault_32.c
===================================================================
--- linux-3.2.orig/arch/sparc/mm/fault_32.c
+++ linux-3.2/arch/sparc/mm/fault_32.c
@@ -247,8 +247,8 @@ asmlinkage void do_sparc_fault(struct pt
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
- goto no_context;
+ if (in_atomic() || !mm || current->pagefault_disabled)
+ goto no_context;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
Index: linux-3.2/arch/sparc/mm/fault_64.c
===================================================================
--- linux-3.2.orig/arch/sparc/mm/fault_64.c
+++ linux-3.2/arch/sparc/mm/fault_64.c
@@ -322,7 +322,7 @@ asmlinkage void __kprobes do_sparc64_fau
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm)
+ if (in_atomic() || !mm || current->pagefault_enabled)
goto intr_or_no_mm;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
Index: linux-3.2/arch/tile/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/tile/mm/fault.c
+++ linux-3.2/arch/tile/mm/fault.c
@@ -346,7 +346,7 @@ static int handle_page_fault(struct pt_r
* If we're in an interrupt, have no user context or are running in an
* atomic region then we must not take the fault.
*/
- if (in_atomic() || !mm) {
+ if (in_atomic() || !mm || current->pagefault_disabled) {
vma = NULL; /* happy compiler */
goto bad_area_nosemaphore;
}
Index: linux-3.2/arch/um/kernel/trap.c
===================================================================
--- linux-3.2.orig/arch/um/kernel/trap.c
+++ linux-3.2/arch/um/kernel/trap.c
@@ -37,7 +37,7 @@ int handle_page_fault(unsigned long addr
* If the fault was during atomic operation, don't take the fault, just
* fail.
*/
- if (in_atomic())
+ if (in_atomic() || !mm || current->pagefault_disabled)
goto out_nosemaphore;
down_read(&mm->mmap_sem);
Index: linux-3.2/arch/x86/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/x86/mm/fault.c
+++ linux-3.2/arch/x86/mm/fault.c
@@ -1084,7 +1084,7 @@ do_page_fault(struct pt_regs *regs, unsi
* If we're in an interrupt, have no user context or are running
* in an atomic region then we must not take the fault:
*/
- if (unlikely(in_atomic() || !mm)) {
+ if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
bad_area_nosemaphore(regs, error_code, address);
return;
}
Index: linux-3.2/arch/xtensa/mm/fault.c
===================================================================
--- linux-3.2.orig/arch/xtensa/mm/fault.c
+++ linux-3.2/arch/xtensa/mm/fault.c
@@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs)
/* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (in_atomic() || !mm) {
+ if (in_atomic() || !mm || current->pagefault_disabled) {
bad_page_fault(regs, address, SIGSEGV);
return;
}

View File

@ -0,0 +1,24 @@
Subject: arm: Allow forced irq threading
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 16 Jul 2011 13:15:20 +0200
All timer interrupts and the perf interrupt are marked NO_THREAD, so
its safe to allow forced interrupt threading.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/Kconfig | 1 +
1 file changed, 1 insertion(+)
Index: linux-3.2/arch/arm/Kconfig
===================================================================
--- linux-3.2.orig/arch/arm/Kconfig
+++ linux-3.2/arch/arm/Kconfig
@@ -29,6 +29,7 @@ config ARM
select HAVE_GENERIC_HARDIRQS
select HAVE_SPARSE_IRQ
select GENERIC_IRQ_SHOW
+ select IRQ_FORCED_THREADING
select CPU_PM if (SUSPEND || CPU_IDLE)
help
The ARM series is a line of low-power-consumption RISC chip designs

View File

@ -0,0 +1,67 @@
From: Benedikt Spranger <b.spranger@linutronix.de>
Date: Sat, 6 Mar 2010 17:47:10 +0100
Subject: ARM: AT91: PIT: Remove irq handler when clock event is unused
Setup and remove the interrupt handler in clock event mode selection.
This avoids calling the (shared) interrupt handler when the device is
not used.
Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/mach-at91/at91rm9200_time.c | 2 ++
arch/arm/mach-at91/at91sam926x_time.c | 6 +++++-
2 files changed, 7 insertions(+), 1 deletion(-)
Index: linux-3.2/arch/arm/mach-at91/at91rm9200_time.c
===================================================================
--- linux-3.2.orig/arch/arm/mach-at91/at91rm9200_time.c
+++ linux-3.2/arch/arm/mach-at91/at91rm9200_time.c
@@ -114,6 +114,7 @@ clkevt32k_mode(enum clock_event_mode mod
last_crtr = read_CRTR();
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
+ setup_irq(AT91_ID_SYS, &at91rm9200_timer_irq);
/* PIT for periodic irqs; fixed rate of 1/HZ */
irqmask = AT91_ST_PITS;
at91_sys_write(AT91_ST_PIMR, LATCH);
@@ -127,6 +128,7 @@ clkevt32k_mode(enum clock_event_mode mod
break;
case CLOCK_EVT_MODE_SHUTDOWN:
case CLOCK_EVT_MODE_UNUSED:
+ remove_irq(AT91_ID_SYS, &at91rm9200_timer_irq);
case CLOCK_EVT_MODE_RESUME:
irqmask = 0;
break;
Index: linux-3.2/arch/arm/mach-at91/at91sam926x_time.c
===================================================================
--- linux-3.2.orig/arch/arm/mach-at91/at91sam926x_time.c
+++ linux-3.2/arch/arm/mach-at91/at91sam926x_time.c
@@ -54,7 +54,7 @@ static struct clocksource pit_clk = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-
+static struct irqaction at91sam926x_pit_irq;
/*
* Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16)
*/
@@ -63,6 +63,9 @@ pit_clkevt_mode(enum clock_event_mode mo
{
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
+ /* Set up irq handler */
+ setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
+
/* update clocksource counter */
pit_cnt += pit_cycle * PIT_PICNT(at91_sys_read(AT91_PIT_PIVR));
at91_sys_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN
@@ -75,6 +78,7 @@ pit_clkevt_mode(enum clock_event_mode mo
case CLOCK_EVT_MODE_UNUSED:
/* disable irq, leaving the clocksource active */
at91_sys_write(AT91_PIT_MR, (pit_cycle - 1) | AT91_PIT_PITEN);
+ remove_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
break;
case CLOCK_EVT_MODE_RESUME:
break;

View File

@ -0,0 +1,34 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 1 May 2010 18:29:35 +0200
Subject: ARM: at91: tclib: Default to tclib timer for RT
RT is not too happy about the shared timer interrupt in AT91
devices. Default to tclib timer for RT.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/misc/Kconfig | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
Index: linux-3.2/drivers/misc/Kconfig
===================================================================
--- linux-3.2.orig/drivers/misc/Kconfig
+++ linux-3.2/drivers/misc/Kconfig
@@ -82,6 +82,7 @@ config AB8500_PWM
config ATMEL_TCLIB
bool "Atmel AT32/AT91 Timer/Counter Library"
depends on (AVR32 || ARCH_AT91)
+ default y if PREEMPT_RT_FULL
help
Select this if you want a library to allocate the Timer/Counter
blocks found on many Atmel processors. This facilitates using
@@ -114,7 +115,7 @@ config ATMEL_TCB_CLKSRC_BLOCK
config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
bool "TC Block use 32 KiHz clock"
depends on ATMEL_TCB_CLKSRC
- default y
+ default y if !PREEMPT_RT_FULL
help
Select this to use 32 KiHz base clock rate as TC block clock
source for clock events.

View File

@ -0,0 +1,291 @@
Subject: preempt-rt: Convert arm boot_lock to raw
From: Frank Rowand <frank.rowand@am.sony.com>
Date: Mon, 19 Sep 2011 14:51:14 -0700
The arm boot_lock is used by the secondary processor startup code. The locking
task is the idle thread, which has idle->sched_class == &idle_sched_class.
idle_sched_class->enqueue_task == NULL, so if the idle task blocks on the
lock, the attempt to wake it when the lock becomes available will fail:
try_to_wake_up()
...
activate_task()
enqueue_task()
p->sched_class->enqueue_task(rq, p, flags)
Fix by converting boot_lock to a raw spin lock.
Signed-off-by: Frank Rowand <frank.rowand@am.sony.com>
Link: http://lkml.kernel.org/r/4E77B952.3010606@am.sony.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/mach-exynos/platsmp.c | 12 ++++++------
arch/arm/mach-msm/platsmp.c | 10 +++++-----
arch/arm/mach-omap2/omap-smp.c | 10 +++++-----
arch/arm/mach-tegra/platsmp.c | 10 +++++-----
arch/arm/mach-ux500/platsmp.c | 10 +++++-----
arch/arm/plat-versatile/platsmp.c | 10 +++++-----
6 files changed, 31 insertions(+), 31 deletions(-)
Index: linux-3.2/arch/arm/mach-exynos/platsmp.c
===================================================================
--- linux-3.2.orig/arch/arm/mach-exynos/platsmp.c
+++ linux-3.2/arch/arm/mach-exynos/platsmp.c
@@ -63,7 +63,7 @@ static void __iomem *scu_base_addr(void)
return (void __iomem *)(S5P_VA_SCU);
}
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
static void __cpuinit exynos4_gic_secondary_init(void)
{
@@ -108,8 +108,8 @@ void __cpuinit platform_secondary_init(u
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
@@ -120,7 +120,7 @@ int __cpuinit boot_secondary(unsigned in
* Set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* The secondary processor is waiting to be released from
@@ -149,7 +149,7 @@ int __cpuinit boot_secondary(unsigned in
if (timeout == 0) {
printk(KERN_ERR "cpu1 power enable failed");
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return -ETIMEDOUT;
}
}
@@ -177,7 +177,7 @@ int __cpuinit boot_secondary(unsigned in
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return pen_release != -1 ? -ENOSYS : 0;
}
Index: linux-3.2/arch/arm/mach-msm/platsmp.c
===================================================================
--- linux-3.2.orig/arch/arm/mach-msm/platsmp.c
+++ linux-3.2/arch/arm/mach-msm/platsmp.c
@@ -39,7 +39,7 @@ extern void msm_secondary_startup(void);
*/
volatile int pen_release = -1;
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
static inline int get_core_count(void)
{
@@ -69,8 +69,8 @@ void __cpuinit platform_secondary_init(u
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
static __cpuinit void prepare_cold_cpu(unsigned int cpu)
@@ -107,7 +107,7 @@ int __cpuinit boot_secondary(unsigned in
* set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* The secondary processor is waiting to be released from
@@ -141,7 +141,7 @@ int __cpuinit boot_secondary(unsigned in
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return pen_release != -1 ? -ENOSYS : 0;
}
Index: linux-3.2/arch/arm/mach-omap2/omap-smp.c
===================================================================
--- linux-3.2.orig/arch/arm/mach-omap2/omap-smp.c
+++ linux-3.2/arch/arm/mach-omap2/omap-smp.c
@@ -29,7 +29,7 @@
/* SCU base address */
static void __iomem *scu_base;
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
void __cpuinit platform_secondary_init(unsigned int cpu)
{
@@ -43,8 +43,8 @@ void __cpuinit platform_secondary_init(u
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
@@ -53,7 +53,7 @@ int __cpuinit boot_secondary(unsigned in
* Set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* Update the AuxCoreBoot0 with boot state for secondary core.
@@ -70,7 +70,7 @@ int __cpuinit boot_secondary(unsigned in
* Now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return 0;
}
Index: linux-3.2/arch/arm/mach-tegra/platsmp.c
===================================================================
--- linux-3.2.orig/arch/arm/mach-tegra/platsmp.c
+++ linux-3.2/arch/arm/mach-tegra/platsmp.c
@@ -28,7 +28,7 @@
extern void tegra_secondary_startup(void);
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
#define EVP_CPU_RESET_VECTOR \
@@ -50,8 +50,8 @@ void __cpuinit platform_secondary_init(u
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
@@ -65,7 +65,7 @@ int __cpuinit boot_secondary(unsigned in
* set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/* set the reset vector to point to the secondary_startup routine */
@@ -101,7 +101,7 @@ int __cpuinit boot_secondary(unsigned in
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return 0;
}
Index: linux-3.2/arch/arm/mach-ux500/platsmp.c
===================================================================
--- linux-3.2.orig/arch/arm/mach-ux500/platsmp.c
+++ linux-3.2/arch/arm/mach-ux500/platsmp.c
@@ -57,7 +57,7 @@ static void __iomem *scu_base_addr(void)
return NULL;
}
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
void __cpuinit platform_secondary_init(unsigned int cpu)
{
@@ -77,8 +77,8 @@ void __cpuinit platform_secondary_init(u
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
@@ -89,7 +89,7 @@ int __cpuinit boot_secondary(unsigned in
* set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* The secondary processor is waiting to be released from
@@ -110,7 +110,7 @@ int __cpuinit boot_secondary(unsigned in
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return pen_release != -1 ? -ENOSYS : 0;
}
Index: linux-3.2/arch/arm/plat-versatile/platsmp.c
===================================================================
--- linux-3.2.orig/arch/arm/plat-versatile/platsmp.c
+++ linux-3.2/arch/arm/plat-versatile/platsmp.c
@@ -37,7 +37,7 @@ static void __cpuinit write_pen_release(
outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
}
-static DEFINE_SPINLOCK(boot_lock);
+static DEFINE_RAW_SPINLOCK(boot_lock);
void __cpuinit platform_secondary_init(unsigned int cpu)
{
@@ -57,8 +57,8 @@ void __cpuinit platform_secondary_init(u
/*
* Synchronise with the boot thread.
*/
- spin_lock(&boot_lock);
- spin_unlock(&boot_lock);
+ raw_spin_lock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
}
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
@@ -69,7 +69,7 @@ int __cpuinit boot_secondary(unsigned in
* Set synchronisation state between this boot processor
* and the secondary one
*/
- spin_lock(&boot_lock);
+ raw_spin_lock(&boot_lock);
/*
* This is really belt and braces; we hold unintended secondary
@@ -99,7 +99,7 @@ int __cpuinit boot_secondary(unsigned in
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
- spin_unlock(&boot_lock);
+ raw_spin_unlock(&boot_lock);
return pen_release != -1 ? -ENOSYS : 0;
}

View File

@ -0,0 +1,22 @@
Subject: arm-disable-highmem-on-rt.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 18 Jul 2011 17:09:28 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/arch/arm/Kconfig
===================================================================
--- linux-3.2.orig/arch/arm/Kconfig
+++ linux-3.2/arch/arm/Kconfig
@@ -1655,7 +1655,7 @@ config HAVE_ARCH_PFN_VALID
config HIGHMEM
bool "High Memory Support"
- depends on MMU
+ depends on MMU && !PREEMPT_RT_FULL
help
The address space of ARM processors is only 4 Gigabytes large
and it has to accommodate user address space, kernel address

View File

@ -0,0 +1,23 @@
Subject: arm-enable-interrupts-in-signal-code.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 16 Jul 2011 16:27:13 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/kernel/signal.c | 3 +++
1 file changed, 3 insertions(+)
Index: linux-3.2/arch/arm/kernel/signal.c
===================================================================
--- linux-3.2.orig/arch/arm/kernel/signal.c
+++ linux-3.2/arch/arm/kernel/signal.c
@@ -673,6 +673,9 @@ static void do_signal(struct pt_regs *re
if (!user_mode(regs))
return;
+ local_irq_enable();
+ preempt_check_resched();
+
/*
* If we were from a system call, check for system call restarting...
*/

View File

@ -0,0 +1,25 @@
Subject: arm: Mark pmu interupt IRQF_NO_THREAD
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 16 Mar 2011 14:45:31 +0100
PMU interrupt must not be threaded. Remove IRQF_DISABLED while at it
as we run all handlers with interrupts disabled anyway.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/kernel/perf_event.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/arch/arm/kernel/perf_event.c
===================================================================
--- linux-3.2.orig/arch/arm/kernel/perf_event.c
+++ linux-3.2/arch/arm/kernel/perf_event.c
@@ -432,7 +432,7 @@ armpmu_reserve_hardware(struct arm_pmu *
}
err = request_irq(irq, handle_irq,
- IRQF_DISABLED | IRQF_NOBALANCING,
+ IRQF_NOBALANCING | IRQF_NO_THREAD,
"arm-pmu", armpmu);
if (err) {
pr_err("unable to request IRQ%d for ARM PMU counters\n",

View File

@ -0,0 +1,66 @@
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 3 Jul 2009 08:44:29 -0500
Subject: ata: Do not disable interrupts in ide code for preempt-rt
Use the local_irq_*_nort variants.
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/ata/libata-sff.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
Index: linux-3.2/drivers/ata/libata-sff.c
===================================================================
--- linux-3.2.orig/drivers/ata/libata-sff.c
+++ linux-3.2/drivers/ata/libata-sff.c
@@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(str
unsigned long flags;
unsigned int consumed;
- local_irq_save(flags);
+ local_irq_save_nort(flags);
consumed = ata_sff_data_xfer32(dev, buf, buflen, rw);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
return consumed;
}
@@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_qu
unsigned long flags;
/* FIXME: use a bounce buffer */
- local_irq_save(flags);
+ local_irq_save_nort(flags);
buf = kmap_atomic(page, KM_IRQ0);
/* do the actual data transfer */
@@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_qu
do_write);
kunmap_atomic(buf, KM_IRQ0);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
} else {
buf = page_address(page);
ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
@@ -864,7 +864,7 @@ next_sg:
unsigned long flags;
/* FIXME: use bounce buffer */
- local_irq_save(flags);
+ local_irq_save_nort(flags);
buf = kmap_atomic(page, KM_IRQ0);
/* do the actual data transfer */
@@ -872,7 +872,7 @@ next_sg:
count, rw);
kunmap_atomic(buf, KM_IRQ0);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
} else {
buf = page_address(page);
consumed = ap->ops->sff_data_xfer(dev, buf + offset,

View File

@ -0,0 +1,117 @@
Subject: block: Shorten interrupt disabled regions
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 22 Jun 2011 19:47:02 +0200
Moving the blk_sched_flush_plug() call out of the interrupt/preempt
disabled region in the scheduler allows us to replace
local_irq_save/restore(flags) by local_irq_disable/enable() in
blk_flush_plug().
Now instead of doing this we disable interrupts explicitely when we
lock the request_queue and reenable them when we drop the lock. That
allows interrupts to be handled when the plug list contains requests
for more than one queue.
Aside of that this change makes the scope of the irq disabled region
more obvious. The current code confused the hell out of me when
looking at:
local_irq_save(flags);
spin_lock(q->queue_lock);
...
queue_unplugged(q...);
scsi_request_fn();
spin_unlock(q->queue_lock);
spin_lock(shost->host_lock);
spin_unlock_irq(shost->host_lock);
-------------------^^^ ????
spin_lock_irq(q->queue_lock);
spin_unlock(q->lock);
local_irq_restore(flags);
Also add a comment to __blk_run_queue() documenting that
q->request_fn() can drop q->queue_lock and reenable interrupts, but
must return with q->queue_lock held and interrupts disabled.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20110622174919.025446432@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
block/blk-core.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
Index: linux-3.2/block/blk-core.c
===================================================================
--- linux-3.2.orig/block/blk-core.c
+++ linux-3.2/block/blk-core.c
@@ -300,7 +300,11 @@ void __blk_run_queue(struct request_queu
{
if (unlikely(blk_queue_stopped(q)))
return;
-
+ /*
+ * q->request_fn() can drop q->queue_lock and reenable
+ * interrupts, but must return with q->queue_lock held and
+ * interrupts disabled.
+ */
q->request_fn(q);
}
EXPORT_SYMBOL(__blk_run_queue);
@@ -2742,11 +2746,11 @@ static void queue_unplugged(struct reque
* this lock).
*/
if (from_schedule) {
- spin_unlock(q->queue_lock);
+ spin_unlock_irq(q->queue_lock);
blk_run_queue_async(q);
} else {
__blk_run_queue(q);
- spin_unlock(q->queue_lock);
+ spin_unlock_irq(q->queue_lock);
}
}
@@ -2772,7 +2776,6 @@ static void flush_plug_callbacks(struct
void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
struct request_queue *q;
- unsigned long flags;
struct request *rq;
LIST_HEAD(list);
unsigned int depth;
@@ -2793,11 +2796,6 @@ void blk_flush_plug_list(struct blk_plug
q = NULL;
depth = 0;
- /*
- * Save and disable interrupts here, to avoid doing it for every
- * queue lock we have to take.
- */
- local_irq_save(flags);
while (!list_empty(&list)) {
rq = list_entry_rq(list.next);
list_del_init(&rq->queuelist);
@@ -2810,7 +2808,7 @@ void blk_flush_plug_list(struct blk_plug
queue_unplugged(q, depth, from_schedule);
q = rq->q;
depth = 0;
- spin_lock(q->queue_lock);
+ spin_lock_irq(q->queue_lock);
}
/*
* rq is already accounted, so use raw insert
@@ -2828,8 +2826,6 @@ void blk_flush_plug_list(struct blk_plug
*/
if (q)
queue_unplugged(q, depth, from_schedule);
-
- local_irq_restore(flags);
}
void blk_finish_plug(struct blk_plug *plug)

View File

@ -0,0 +1,45 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:58 -0500
Subject: bug: BUG_ON/WARN_ON variants dependend on RT/!RT
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/asm-generic/bug.h | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)
Index: linux-3.2/include/asm-generic/bug.h
===================================================================
--- linux-3.2.orig/include/asm-generic/bug.h
+++ linux-3.2/include/asm-generic/bug.h
@@ -3,6 +3,10 @@
#include <linux/compiler.h>
+#ifndef __ASSEMBLY__
+extern void __WARN_ON(const char *func, const char *file, const int line);
+#endif /* __ASSEMBLY__ */
+
#ifdef CONFIG_BUG
#ifdef CONFIG_GENERIC_BUG
@@ -202,4 +206,18 @@ extern void warn_slowpath_null(const cha
# define WARN_ON_SMP(x) ({0;})
#endif
+#ifdef CONFIG_PREEMPT_RT_BASE
+# define BUG_ON_RT(c) BUG_ON(c)
+# define BUG_ON_NONRT(c) do { } while (0)
+# define WARN_ON_RT(condition) WARN_ON(condition)
+# define WARN_ON_NONRT(condition) do { } while (0)
+# define WARN_ON_ONCE_NONRT(condition) do { } while (0)
+#else
+# define BUG_ON_RT(c) do { } while (0)
+# define BUG_ON_NONRT(c) BUG_ON(c)
+# define WARN_ON_RT(condition) do { } while (0)
+# define WARN_ON_NONRT(condition) WARN_ON(condition)
+# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition)
+#endif
+
#endif

View File

@ -0,0 +1,163 @@
From: Benedikt Spranger <b.spranger@linutronix.de>
Date: Mon, 8 Mar 2010 18:57:04 +0100
Subject: clocksource: TCLIB: Allow higher clock rates for clock events
As default the TCLIB uses the 32KiHz base clock rate for clock events.
Add a compile time selection to allow higher clock resulution.
Signed-off-by: Benedikt Spranger <b.spranger@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/clocksource/tcb_clksrc.c | 44 +++++++++++++++++++++++----------------
drivers/misc/Kconfig | 11 +++++++--
2 files changed, 35 insertions(+), 20 deletions(-)
Index: linux-3.2/drivers/clocksource/tcb_clksrc.c
===================================================================
--- linux-3.2.orig/drivers/clocksource/tcb_clksrc.c
+++ linux-3.2/drivers/clocksource/tcb_clksrc.c
@@ -21,8 +21,7 @@
* resolution better than 200 nsec).
*
* - The third channel may be used to provide a 16-bit clockevent
- * source, used in either periodic or oneshot mode. This runs
- * at 32 KiHZ, and can handle delays of up to two seconds.
+ * source, used in either periodic or oneshot mode.
*
* A boot clocksource and clockevent source are also currently needed,
* unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so
@@ -68,6 +67,7 @@ static struct clocksource clksrc = {
struct tc_clkevt_device {
struct clock_event_device clkevt;
struct clk *clk;
+ u32 freq;
void __iomem *regs;
};
@@ -76,13 +76,6 @@ static struct tc_clkevt_device *to_tc_cl
return container_of(clkevt, struct tc_clkevt_device, clkevt);
}
-/* For now, we always use the 32K clock ... this optimizes for NO_HZ,
- * because using one of the divided clocks would usually mean the
- * tick rate can never be less than several dozen Hz (vs 0.5 Hz).
- *
- * A divided clock could be good for high resolution timers, since
- * 30.5 usec resolution can seem "low".
- */
static u32 timer_clock;
static void tc_mode(enum clock_event_mode m, struct clock_event_device *d)
@@ -105,11 +98,12 @@ static void tc_mode(enum clock_event_mod
case CLOCK_EVT_MODE_PERIODIC:
clk_enable(tcd->clk);
- /* slow clock, count up to RC, then irq and restart */
+ /* count up to RC, then irq and restart */
__raw_writel(timer_clock
| ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
regs + ATMEL_TC_REG(2, CMR));
- __raw_writel((32768 + HZ/2) / HZ, tcaddr + ATMEL_TC_REG(2, RC));
+ __raw_writel((tcd->freq + HZ/2)/HZ,
+ tcaddr + ATMEL_TC_REG(2, RC));
/* Enable clock and interrupts on RC compare */
__raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER));
@@ -122,7 +116,7 @@ static void tc_mode(enum clock_event_mod
case CLOCK_EVT_MODE_ONESHOT:
clk_enable(tcd->clk);
- /* slow clock, count up to RC, then irq and stop */
+ /* count up to RC, then irq and stop */
__raw_writel(timer_clock | ATMEL_TC_CPCSTOP
| ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO,
regs + ATMEL_TC_REG(2, CMR));
@@ -152,8 +146,12 @@ static struct tc_clkevt_device clkevt =
.features = CLOCK_EVT_FEAT_PERIODIC
| CLOCK_EVT_FEAT_ONESHOT,
.shift = 32,
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
/* Should be lower than at91rm9200's system timer */
.rating = 125,
+#else
+ .rating = 200,
+#endif
.set_next_event = tc_next_event,
.set_mode = tc_mode,
},
@@ -179,8 +177,9 @@ static struct irqaction tc_irqaction = {
.handler = ch2_irq,
};
-static void __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx)
+static void __init setup_clkevents(struct atmel_tc *tc, int divisor_idx)
{
+ unsigned divisor = atmel_tc_divisors[divisor_idx];
struct clk *t2_clk = tc->clk[2];
int irq = tc->irq[2];
@@ -188,11 +187,17 @@ static void __init setup_clkevents(struc
clkevt.clk = t2_clk;
tc_irqaction.dev_id = &clkevt;
- timer_clock = clk32k_divisor_idx;
+ timer_clock = divisor_idx;
- clkevt.clkevt.mult = div_sc(32768, NSEC_PER_SEC, clkevt.clkevt.shift);
- clkevt.clkevt.max_delta_ns
- = clockevent_delta2ns(0xffff, &clkevt.clkevt);
+ if (!divisor)
+ clkevt.freq = 32768;
+ else
+ clkevt.freq = clk_get_rate(t2_clk)/divisor;
+
+ clkevt.clkevt.mult = div_sc(clkevt.freq, NSEC_PER_SEC,
+ clkevt.clkevt.shift);
+ clkevt.clkevt.max_delta_ns =
+ clockevent_delta2ns(0xffff, &clkevt.clkevt);
clkevt.clkevt.min_delta_ns = clockevent_delta2ns(1, &clkevt.clkevt) + 1;
clkevt.clkevt.cpumask = cpumask_of(0);
@@ -295,8 +300,11 @@ static int __init tcb_clksrc_init(void)
clocksource_register(&clksrc);
/* channel 2: periodic and oneshot timer support */
+#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
setup_clkevents(tc, clk32k_divisor_idx);
-
+#else
+ setup_clkevents(tc, best_divisor_idx);
+#endif
return 0;
}
arch_initcall(tcb_clksrc_init);
Index: linux-3.2/drivers/misc/Kconfig
===================================================================
--- linux-3.2.orig/drivers/misc/Kconfig
+++ linux-3.2/drivers/misc/Kconfig
@@ -97,8 +97,7 @@ config ATMEL_TCB_CLKSRC
are combined to make a single 32-bit timer.
When GENERIC_CLOCKEVENTS is defined, the third timer channel
- may be used as a clock event device supporting oneshot mode
- (delays of up to two seconds) based on the 32 KiHz clock.
+ may be used as a clock event device supporting oneshot mode.
config ATMEL_TCB_CLKSRC_BLOCK
int
@@ -112,6 +111,14 @@ config ATMEL_TCB_CLKSRC_BLOCK
TC can be used for other purposes, such as PWM generation and
interval timing.
+config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK
+ bool "TC Block use 32 KiHz clock"
+ depends on ATMEL_TCB_CLKSRC
+ default y
+ help
+ Select this to use 32 KiHz base clock rate as TC block clock
+ source for clock events.
+
config IBM_ASM
tristate "Device driver for IBM RSA service processor"
depends on X86 && PCI && INPUT && EXPERIMENTAL

View File

@ -0,0 +1,22 @@
Subject: cond-resched-lock-rt-tweak.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 22:51:33 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/sched.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/include/linux/sched.h
===================================================================
--- linux-3.2.orig/include/linux/sched.h
+++ linux-3.2/include/linux/sched.h
@@ -2596,7 +2596,7 @@ extern int _cond_resched(void);
extern int __cond_resched_lock(spinlock_t *lock);
-#ifdef CONFIG_PREEMPT_COUNT
+#if defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT_FULL)
#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
#else
#define PREEMPT_LOCK_OFFSET 0

View File

@ -0,0 +1,51 @@
Subject: cond-resched-softirq-fix.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 14 Jul 2011 09:56:44 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/sched.h | 4 ++++
kernel/sched.c | 2 ++
2 files changed, 6 insertions(+)
Index: linux-3.2/include/linux/sched.h
===================================================================
--- linux-3.2.orig/include/linux/sched.h
+++ linux-3.2/include/linux/sched.h
@@ -2599,12 +2599,16 @@ extern int __cond_resched_lock(spinlock_
__cond_resched_lock(lock); \
})
+#ifndef CONFIG_PREEMPT_RT_FULL
extern int __cond_resched_softirq(void);
#define cond_resched_softirq() ({ \
__might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
__cond_resched_softirq(); \
})
+#else
+# define cond_resched_softirq() cond_resched()
+#endif
/*
* Does a critical section need to be broken due to another
Index: linux-3.2/kernel/sched.c
===================================================================
--- linux-3.2.orig/kernel/sched.c
+++ linux-3.2/kernel/sched.c
@@ -5813,6 +5813,7 @@ int __cond_resched_lock(spinlock_t *lock
}
EXPORT_SYMBOL(__cond_resched_lock);
+#ifndef CONFIG_PREEMPT_RT_FULL
int __sched __cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
@@ -5826,6 +5827,7 @@ int __sched __cond_resched_softirq(void)
return 0;
}
EXPORT_SYMBOL(__cond_resched_softirq);
+#endif
/**
* yield - yield the current processor to other threads.

View File

@ -0,0 +1,84 @@
Subject: console-make-rt-friendly.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 22:43:07 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/printk.c | 26 +++++++++++++++++++++++---
1 file changed, 23 insertions(+), 3 deletions(-)
Index: linux-3.2/kernel/printk.c
===================================================================
--- linux-3.2.orig/kernel/printk.c
+++ linux-3.2/kernel/printk.c
@@ -504,6 +504,7 @@ static void __call_console_drivers(unsig
{
struct console *con;
+ migrate_disable();
for_each_console(con) {
if (exclusive_console && con != exclusive_console)
continue;
@@ -512,6 +513,7 @@ static void __call_console_drivers(unsig
(con->flags & CON_ANYTIME)))
con->write(con, &LOG_BUF(start), end - start);
}
+ migrate_enable();
}
#ifdef CONFIG_EARLY_PRINTK
@@ -827,12 +829,18 @@ static inline int can_use_console(unsign
* interrupts disabled. It should return with 'lockbuf_lock'
* released but interrupts still disabled.
*/
-static int console_trylock_for_printk(unsigned int cpu)
+static int console_trylock_for_printk(unsigned int cpu, unsigned long flags)
__releases(&logbuf_lock)
{
int retval = 0, wake = 0;
+#ifdef CONFIG_PREEMPT_RT_FULL
+ int lock = !early_boot_irqs_disabled && !irqs_disabled_flags(flags) &&
+ !preempt_count();
+#else
+ int lock = 1;
+#endif
- if (console_trylock()) {
+ if (lock && console_trylock()) {
retval = 1;
/*
@@ -1010,8 +1018,15 @@ asmlinkage int vprintk(const char *fmt,
* will release 'logbuf_lock' regardless of whether it
* actually gets the semaphore or not.
*/
- if (console_trylock_for_printk(this_cpu))
+ if (console_trylock_for_printk(this_cpu, flags)) {
+#ifndef CONFIG_PREEMPT_RT_FULL
console_unlock();
+#else
+ raw_local_irq_restore(flags);
+ console_unlock();
+ raw_local_irq_save(flags);
+#endif
+ }
lockdep_on();
out_restore_irqs:
@@ -1321,11 +1336,16 @@ again:
_con_start = con_start;
_log_end = log_end;
con_start = log_end; /* Flush */
+#ifndef CONFIG_PREEMPT_RT_FULL
raw_spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(_con_start, _log_end);
start_critical_timings();
local_irq_restore(flags);
+#else
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+ call_console_drivers(_con_start, _log_end);
+#endif
}
console_locked = 0;

View File

@ -0,0 +1,28 @@
Subject: cpu-rt-variants.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 17 Jun 2011 15:42:38 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/smp.h | 8 ++++++++
1 file changed, 8 insertions(+)
Index: linux-3.2/include/linux/smp.h
===================================================================
--- linux-3.2.orig/include/linux/smp.h
+++ linux-3.2/include/linux/smp.h
@@ -173,6 +173,14 @@ smp_call_function_any(const struct cpuma
#define get_cpu() ({ preempt_disable(); smp_processor_id(); })
#define put_cpu() preempt_enable()
+#ifndef CONFIG_PREEMPT_RT_FULL
+# define get_cpu_light() get_cpu()
+# define put_cpu_light() put_cpu()
+#else
+# define get_cpu_light() ({ migrate_disable(); smp_processor_id(); })
+# define put_cpu_light() migrate_enable()
+#endif
+
/*
* Callback to arch code if there's nosmp or maxcpus=0 on the
* boot command line:

View File

@ -0,0 +1,36 @@
Subject: debugobjects-rt.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 21:41:35 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
lib/debugobjects.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
Index: linux-3.2/lib/debugobjects.c
===================================================================
--- linux-3.2.orig/lib/debugobjects.c
+++ linux-3.2/lib/debugobjects.c
@@ -306,7 +306,10 @@ __debug_object_init(void *addr, struct d
struct debug_obj *obj;
unsigned long flags;
- fill_pool();
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (preempt_count() == 0 && !irqs_disabled())
+#endif
+ fill_pool();
db = get_bucket((unsigned long) addr);
@@ -1015,9 +1018,9 @@ static int __init debug_objects_replace_
}
}
+ local_irq_enable();
printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt,
obj_pool_used);
- local_irq_enable();
return 0;
free:
hlist_for_each_entry_safe(obj, node, tmp, &objects, node) {

View File

@ -0,0 +1,36 @@
Subject: dm: Make rt aware
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 14 Nov 2011 23:06:09 +0100
Use the BUG_ON_NORT variant for the irq_disabled() checks. RT has
interrupts legitimately enabled here as we cant deadlock against the
irq thread due to the "sleeping spinlocks" conversion.
Reported-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
Cc: stable-rt@vger.kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/md/dm.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: linux-3.2/drivers/md/dm.c
===================================================================
--- linux-3.2.orig/drivers/md/dm.c
+++ linux-3.2/drivers/md/dm.c
@@ -1648,14 +1648,14 @@ static void dm_request_fn(struct request
if (map_request(ti, clone, md))
goto requeued;
- BUG_ON(!irqs_disabled());
+ BUG_ON_NONRT(!irqs_disabled());
spin_lock(q->queue_lock);
}
goto out;
requeued:
- BUG_ON(!irqs_disabled());
+ BUG_ON_NONRT(!irqs_disabled());
spin_lock(q->queue_lock);
delay_and_out:

View File

@ -0,0 +1,27 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:24 -0500
Subject: drivers/net: Use disable_irq_nosync() in 8139too
Use disable_irq_nosync() instead of disable_irq() as this might be
called in atomic context with netpoll.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/net/ethernet/realtek/8139too.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/drivers/net/ethernet/realtek/8139too.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/realtek/8139too.c
+++ linux-3.2/drivers/net/ethernet/realtek/8139too.c
@@ -2174,7 +2174,7 @@ static irqreturn_t rtl8139_interrupt (in
*/
static void rtl8139_poll_controller(struct net_device *dev)
{
- disable_irq(dev->irq);
+ disable_irq_nosync(dev->irq);
rtl8139_interrupt(dev->irq, dev);
enable_irq(dev->irq);
}

View File

@ -0,0 +1,54 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 17 Nov 2009 12:02:43 +0100
Subject: drivers: net: at91_ether: Make mdio protection -rt safe
Neither the phy interrupt nor the timer callback which updates the
link status in absense of a phy interrupt are taking lp->lock which
serializes the MDIO access. This works on mainline as at91 is an UP
machine. On preempt-rt the timer callback can run even in the
spin_lock_irq(&lp->lock) protected code pathes because spin_lock_irq
is neither disabling interrupts nor disabling preemption.
Fix this by adding proper locking to at91ether_phy_interrupt() and
at91_check_ether() which serializes the access on -rt.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/net/ethernet/cadence/at91_ether.c | 5 +++++
1 file changed, 5 insertions(+)
Index: linux-3.2/drivers/net/ethernet/cadence/at91_ether.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/cadence/at91_ether.c
+++ linux-3.2/drivers/net/ethernet/cadence/at91_ether.c
@@ -200,7 +200,9 @@ static irqreturn_t at91ether_phy_interru
struct net_device *dev = (struct net_device *) dev_id;
struct at91_private *lp = netdev_priv(dev);
unsigned int phy;
+ unsigned long flags;
+ spin_lock_irqsave(&lp->lock, flags);
/*
* This hander is triggered on both edges, but the PHY chips expect
* level-triggering. We therefore have to check if the PHY actually has
@@ -242,6 +244,7 @@ static irqreturn_t at91ether_phy_interru
done:
disable_mdi();
+ spin_unlock_irqrestore(&lp->lock, flags);
return IRQ_HANDLED;
}
@@ -398,9 +401,11 @@ static void at91ether_check_link(unsigne
struct net_device *dev = (struct net_device *) dev_id;
struct at91_private *lp = netdev_priv(dev);
+ spin_lock_irq(&lp->lock);
enable_mdi();
update_linkspeed(dev, 1);
disable_mdi();
+ spin_unlock_irq(&lp->lock);
mod_timer(&lp->check_timer, jiffies + LINK_POLL_INTERVAL);
}

View File

@ -0,0 +1,53 @@
From: Darren Hart <dvhltc@us.ibm.com>
Date: Tue, 18 May 2010 14:33:07 -0700
Subject: drivers: net: ehea: Make rx irq handler non-threaded (IRQF_NO_THREAD)
The underlying hardware is edge triggered but presented by XICS as level
triggered. The edge triggered interrupts are not reissued after masking. This
is not a problem in mainline which does not mask the interrupt (relying on the
EOI mechanism instead). The threaded interrupts in PREEMPT_RT do mask the
interrupt, and can lose interrupts that occurred while masked, resulting in a
hung ethernet interface.
The receive handler simply calls napi_schedule(), as such, there is no
significant additional overhead in making this non-threaded, since we either
wakeup the threaded irq handler to call napi_schedule(), or just call
napi_schedule() directly to wakeup the softirqs. As the receive handler is
lockless, there is no need to convert any of the ehea spinlock_t's to
raw_spinlock_t's.
Without this patch, a simple scp file copy loop would fail quickly (usually
seconds). We have over two hours of sustained scp activity with the patch
applied.
Credit goes to Will Schmidt for lots of instrumentation and tracing which
clarified the scenario and to Thomas Gleixner for the incredibly simple
solution.
Signed-off-by: Darren Hart <dvhltc@us.ibm.com>
Acked-by: Will Schmidt <will_schmidt@vnet.ibm.com>
Cc: Jan-Bernd Themann <themann@de.ibm.com>
Cc: Nivedita Singhvi <niv@us.ibm.com>
Cc: Brian King <bjking1@us.ibm.com>
Cc: Michael Ellerman <ellerman@au1.ibm.com>
Cc: Doug Maxey <doug.maxey@us.ibm.com>
LKML-Reference: <4BF30793.5070300@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/net/ethernet/ibm/ehea/ehea_main.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/drivers/net/ethernet/ibm/ehea/ehea_main.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ linux-3.2/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -1303,7 +1303,7 @@ static int ehea_reg_interrupts(struct ne
"%s-queue%d", dev->name, i);
ret = ibmebus_request_irq(pr->eq->attr.ist1,
ehea_recv_irq_handler,
- IRQF_DISABLED, pr->int_send_name,
+ IRQF_NO_THREAD, pr->int_send_name,
pr);
if (ret) {
netdev_err(dev, "failed registering irq for ehea_queue port_res_nr:%d, ist=%X\n",

View File

@ -0,0 +1,142 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 20 Jun 2009 11:36:54 +0200
Subject: drivers/net: fix livelock issues
Preempt-RT runs into a live lock issue with the NETDEV_TX_LOCKED micro
optimization. The reason is that the softirq thread is rescheduling
itself on that return value. Depending on priorities it starts to
monoplize the CPU and livelock on UP systems.
Remove it.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 7 ++-----
drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 3 +--
drivers/net/ethernet/chelsio/cxgb/sge.c | 3 +--
drivers/net/ethernet/neterion/s2io.c | 7 +------
drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 7 +++----
drivers/net/ethernet/tehuti/tehuti.c | 9 ++-------
drivers/net/rionet.c | 6 +-----
7 files changed, 11 insertions(+), 31 deletions(-)
Index: linux-3.2/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ linux-3.2/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2236,11 +2236,8 @@ static netdev_tx_t atl1c_xmit_frame(stru
}
tpd_req = atl1c_cal_tpd_req(skb);
- if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
- if (netif_msg_pktdata(adapter))
- dev_info(&adapter->pdev->dev, "tx locked\n");
- return NETDEV_TX_LOCKED;
- }
+ spin_lock_irqsave(&adapter->tx_lock, flags);
+
if (skb->mark == 0x01)
type = atl1c_trans_high;
else
Index: linux-3.2/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ linux-3.2/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -1819,8 +1819,7 @@ static netdev_tx_t atl1e_xmit_frame(stru
return NETDEV_TX_OK;
}
tpd_req = atl1e_cal_tdp_req(skb);
- if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
- return NETDEV_TX_LOCKED;
+ spin_lock_irqsave(&adapter->tx_lock, flags);
if (atl1e_tpd_avail(adapter) < tpd_req) {
/* no enough descriptor, just stop queue */
Index: linux-3.2/drivers/net/ethernet/chelsio/cxgb/sge.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ linux-3.2/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1678,8 +1678,7 @@ static int t1_sge_tx(struct sk_buff *skb
struct cmdQ *q = &sge->cmdQ[qid];
unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
- if (!spin_trylock(&q->lock))
- return NETDEV_TX_LOCKED;
+ spin_lock(&q->lock);
reclaim_completed_tx(sge, q);
Index: linux-3.2/drivers/net/ethernet/neterion/s2io.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/neterion/s2io.c
+++ linux-3.2/drivers/net/ethernet/neterion/s2io.c
@@ -4090,12 +4090,7 @@ static netdev_tx_t s2io_xmit(struct sk_b
[skb->priority & (MAX_TX_FIFOS - 1)];
fifo = &mac_control->fifos[queue];
- if (do_spin_lock)
- spin_lock_irqsave(&fifo->tx_lock, flags);
- else {
- if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
- return NETDEV_TX_LOCKED;
- }
+ spin_lock_irqsave(&fifo->tx_lock, flags);
if (sp->config.multiq) {
if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
Index: linux-3.2/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ linux-3.2/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -1931,10 +1931,9 @@ static int pch_gbe_xmit_frame(struct sk_
adapter->stats.tx_length_errors++;
return NETDEV_TX_OK;
}
- if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) {
- /* Collision - tell upper layer to requeue */
- return NETDEV_TX_LOCKED;
- }
+
+ spin_lock_irqsave(&tx_ring->tx_lock, flags);
+
if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) {
netif_stop_queue(netdev);
spin_unlock_irqrestore(&tx_ring->tx_lock, flags);
Index: linux-3.2/drivers/net/ethernet/tehuti/tehuti.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/tehuti/tehuti.c
+++ linux-3.2/drivers/net/ethernet/tehuti/tehuti.c
@@ -1605,13 +1605,8 @@ static netdev_tx_t bdx_tx_transmit(struc
unsigned long flags;
ENTER;
- local_irq_save(flags);
- if (!spin_trylock(&priv->tx_lock)) {
- local_irq_restore(flags);
- DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
- BDX_DRV_NAME, ndev->name);
- return NETDEV_TX_LOCKED;
- }
+
+ spin_lock_irqsave(&priv->tx_lock, flags);
/* build tx descriptor */
BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */
Index: linux-3.2/drivers/net/rionet.c
===================================================================
--- linux-3.2.orig/drivers/net/rionet.c
+++ linux-3.2/drivers/net/rionet.c
@@ -176,11 +176,7 @@ static int rionet_start_xmit(struct sk_b
u16 destid;
unsigned long flags;
- local_irq_save(flags);
- if (!spin_trylock(&rnet->tx_lock)) {
- local_irq_restore(flags);
- return NETDEV_TX_LOCKED;
- }
+ spin_lock_irqsave(&rnet->tx_lock, flags);
if ((rnet->tx_cnt + 1) > RIONET_TX_RING_SIZE) {
netif_stop_queue(ndev);

View File

@ -0,0 +1,57 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 1 Apr 2010 20:20:57 +0200
Subject: drivers: net: gianfar: Make RT aware
The adjust_link() disables interrupts before taking the queue
locks. On RT those locks are converted to "sleeping" locks and
therefor the local_irq_save/restore must be converted to
local_irq_save/restore_nort.
Reported-by: Xianghua Xiao <xiaoxianghua@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Xianghua Xiao <xiaoxianghua@gmail.com>
---
drivers/net/ethernet/freescale/gianfar.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
Index: linux-3.2/drivers/net/ethernet/freescale/gianfar.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/freescale/gianfar.c
+++ linux-3.2/drivers/net/ethernet/freescale/gianfar.c
@@ -1671,7 +1671,7 @@ void stop_gfar(struct net_device *dev)
/* Lock it down */
- local_irq_save(flags);
+ local_irq_save_nort(flags);
lock_tx_qs(priv);
lock_rx_qs(priv);
@@ -1679,7 +1679,7 @@ void stop_gfar(struct net_device *dev)
unlock_rx_qs(priv);
unlock_tx_qs(priv);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
/* Free the IRQs */
if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
@@ -2949,7 +2949,7 @@ static void adjust_link(struct net_devic
struct phy_device *phydev = priv->phydev;
int new_state = 0;
- local_irq_save(flags);
+ local_irq_save_nort(flags);
lock_tx_qs(priv);
if (phydev->link) {
@@ -3016,7 +3016,7 @@ static void adjust_link(struct net_devic
if (new_state && netif_msg_link(priv))
phy_print_status(phydev);
unlock_tx_qs(priv);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
}
/* Update the hash table based on the current list of multicast

View File

@ -0,0 +1,25 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:30:18 -0500
Subject: drivers/net: tulip_remove_one needs to call pci_disable_device()
Otherwise the device is not completely shut down.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/net/ethernet/dec/tulip/tulip_core.c | 1 +
1 file changed, 1 insertion(+)
Index: linux-3.2/drivers/net/ethernet/dec/tulip/tulip_core.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ linux-3.2/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -1949,6 +1949,7 @@ static void __devexit tulip_remove_one (
pci_iounmap(pdev, tp->base_addr);
free_netdev (dev);
pci_release_regions (pdev);
+ pci_disable_device (pdev);
pci_set_drvdata (pdev, NULL);
/* pci_power_off (pdev, -1); */

View File

@ -0,0 +1,50 @@
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 3 Jul 2009 08:30:00 -0500
Subject: drivers/net: vortex fix locking issues
Argh, cut and paste wasn't enough...
Use this patch instead. It needs an irq disable. But, believe it or not,
on SMP this is actually better. If the irq is shared (as it is in Mark's
case), we don't stop the irq of other devices from being handled on
another CPU (unfortunately for Mark, he pinned all interrupts to one CPU).
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
drivers/net/ethernet/3com/3c59x.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Index: linux-3.2/drivers/net/ethernet/3com/3c59x.c
===================================================================
--- linux-3.2.orig/drivers/net/ethernet/3com/3c59x.c
+++ linux-3.2/drivers/net/ethernet/3com/3c59x.c
@@ -843,9 +843,9 @@ static void poll_vortex(struct net_devic
{
struct vortex_private *vp = netdev_priv(dev);
unsigned long flags;
- local_irq_save(flags);
+ local_irq_save_nort(flags);
(vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
}
#endif
@@ -1921,12 +1921,12 @@ static void vortex_tx_timeout(struct net
* Block interrupts because vortex_interrupt does a bare spin_lock()
*/
unsigned long flags;
- local_irq_save(flags);
+ local_irq_save_nort(flags);
if (vp->full_bus_master_tx)
boomerang_interrupt(dev->irq, dev);
else
vortex_interrupt(dev->irq, dev);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
}
}

View File

@ -0,0 +1,40 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:30 -0500
Subject: drivers: random: Reduce preempt disabled region
No need to keep preemption disabled across the whole function.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/char/random.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
Index: linux-3.2/drivers/char/random.c
===================================================================
--- linux-3.2.orig/drivers/char/random.c
+++ linux-3.2/drivers/char/random.c
@@ -633,8 +633,11 @@ static void add_timer_randomness(struct
preempt_disable();
/* if over the trickle threshold, use only 1 in 4096 samples */
if (input_pool.entropy_count > trickle_thresh &&
- ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff))
- goto out;
+ ((__this_cpu_inc_return(trickle_count) - 1) & 0xfff)) {
+ preempt_enable();
+ return;
+ }
+ preempt_enable();
sample.jiffies = jiffies;
sample.cycles = get_cycles();
@@ -676,8 +679,6 @@ static void add_timer_randomness(struct
credit_entropy_bits(&input_pool,
min_t(int, fls(delta>>1), 11));
}
-out:
- preempt_enable();
}
void add_input_randomness(unsigned int type, unsigned int code,

View File

@ -0,0 +1,49 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:30:01 -0500
Subject: serial: 8250: Call flush_to_ldisc when the irq is threaded
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
drivers/tty/serial/8250.c | 2 ++
drivers/tty/tty_buffer.c | 4 ++++
2 files changed, 6 insertions(+)
Index: linux-3.2/drivers/tty/serial/8250.c
===================================================================
--- linux-3.2.orig/drivers/tty/serial/8250.c
+++ linux-3.2/drivers/tty/serial/8250.c
@@ -1631,12 +1631,14 @@ static irqreturn_t serial8250_interrupt(
l = l->next;
+#ifndef CONFIG_PREEMPT_RT_FULL
if (l == i->head && pass_counter++ > PASS_LIMIT) {
/* If we hit this, we're dead. */
printk_ratelimited(KERN_ERR
"serial8250: too much work for irq%d\n", irq);
break;
}
+#endif
} while (l != end);
spin_unlock(&i->lock);
Index: linux-3.2/drivers/tty/tty_buffer.c
===================================================================
--- linux-3.2.orig/drivers/tty/tty_buffer.c
+++ linux-3.2/drivers/tty/tty_buffer.c
@@ -493,10 +493,14 @@ void tty_flip_buffer_push(struct tty_str
tty->buf.tail->commit = tty->buf.tail->used;
spin_unlock_irqrestore(&tty->buf.lock, flags);
+#ifndef CONFIG_PREEMPT_RT_FULL
if (tty->low_latency)
flush_to_ldisc(&tty->buf.work);
else
schedule_work(&tty->buf.work);
+#else
+ flush_to_ldisc(&tty->buf.work);
+#endif
}
EXPORT_SYMBOL(tty_flip_buffer_push);

View File

@ -0,0 +1,44 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:30:01 -0500
Subject: serial: 8250: Clean up the locking for -rt
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/tty/serial/8250.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
Index: linux-3.2/drivers/tty/serial/8250.c
===================================================================
--- linux-3.2.orig/drivers/tty/serial/8250.c
+++ linux-3.2/drivers/tty/serial/8250.c
@@ -2846,14 +2846,10 @@ serial8250_console_write(struct console
touch_nmi_watchdog();
- local_irq_save(flags);
- if (up->port.sysrq) {
- /* serial8250_handle_port() already took the lock */
- locked = 0;
- } else if (oops_in_progress) {
- locked = spin_trylock(&up->port.lock);
- } else
- spin_lock(&up->port.lock);
+ if (up->port.sysrq || oops_in_progress)
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
+ else
+ spin_lock_irqsave(&up->port.lock, flags);
/*
* First save the IER then disable the interrupts
@@ -2885,8 +2881,7 @@ serial8250_console_write(struct console
check_modem_status(up);
if (locked)
- spin_unlock(&up->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&up->port.lock, flags);
}
static int __init serial8250_console_setup(struct console *co, char *options)

View File

@ -0,0 +1,39 @@
Subject: drivers-tty-fix-omap-lock-crap.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 28 Jul 2011 13:32:57 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/tty/serial/omap-serial.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
Index: linux-3.2/drivers/tty/serial/omap-serial.c
===================================================================
--- linux-3.2.orig/drivers/tty/serial/omap-serial.c
+++ linux-3.2/drivers/tty/serial/omap-serial.c
@@ -946,13 +946,12 @@ serial_omap_console_write(struct console
unsigned int ier;
int locked = 1;
- local_irq_save(flags);
if (up->port.sysrq)
locked = 0;
else if (oops_in_progress)
- locked = spin_trylock(&up->port.lock);
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
else
- spin_lock(&up->port.lock);
+ spin_lock_irqsave(&up->port.lock, flags);
/*
* First save the IER then disable the interrupts
@@ -979,8 +978,7 @@ serial_omap_console_write(struct console
check_modem_status(up);
if (locked)
- spin_unlock(&up->port.lock);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&up->port.lock, flags);
}
static int __init

View File

@ -0,0 +1,32 @@
Subject: drm-more-moronic-crap.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 23:56:44 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/gpu/drm/drm_irq.c | 4 ----
1 file changed, 4 deletions(-)
Index: linux-3.2/drivers/gpu/drm/drm_irq.c
===================================================================
--- linux-3.2.orig/drivers/gpu/drm/drm_irq.c
+++ linux-3.2/drivers/gpu/drm/drm_irq.c
@@ -110,10 +110,7 @@ static void vblank_disable_and_save(stru
/* Prevent vblank irq processing while disabling vblank irqs,
* so no updates of timestamps or count can happen after we've
* disabled. Needed to prevent races in case of delayed irq's.
- * Disable preemption, so vblank_time_lock is held as short as
- * possible, even under a kernel with PREEMPT_RT patches.
*/
- preempt_disable();
spin_lock_irqsave(&dev->vblank_time_lock, irqflags);
dev->driver->disable_vblank(dev, crtc);
@@ -164,7 +161,6 @@ static void vblank_disable_and_save(stru
clear_vblank_timestamps(dev, crtc);
spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
- preempt_enable();
}
static void vblank_disable_fn(unsigned long arg)

View File

@ -0,0 +1,32 @@
Subject: drm-sigh.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Jul 2011 11:36:15 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/gpu/drm/drm_irq.c | 5 -----
1 file changed, 5 deletions(-)
Index: linux-3.2/drivers/gpu/drm/drm_irq.c
===================================================================
--- linux-3.2.orig/drivers/gpu/drm/drm_irq.c
+++ linux-3.2/drivers/gpu/drm/drm_irq.c
@@ -889,10 +889,6 @@ int drm_vblank_get(struct drm_device *de
spin_lock_irqsave(&dev->vbl_lock, irqflags);
/* Going from 0->1 means we have to enable interrupts again */
if (atomic_add_return(1, &dev->vblank_refcount[crtc]) == 1) {
- /* Disable preemption while holding vblank_time_lock. Do
- * it explicitely to guard against PREEMPT_RT kernel.
- */
- preempt_disable();
spin_lock_irqsave(&dev->vblank_time_lock, irqflags2);
if (!dev->vblank_enabled[crtc]) {
/* Enable vblank irqs under vblank_time_lock protection.
@@ -912,7 +908,6 @@ int drm_vblank_get(struct drm_device *de
}
}
spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags2);
- preempt_enable();
} else {
if (!dev->vblank_enabled[crtc]) {
atomic_dec(&dev->vblank_refcount[crtc]);

View File

@ -0,0 +1,495 @@
Subject: early-printk-consolidate.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 23 Jul 2011 11:04:08 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/arm/kernel/early_printk.c | 17 +++--------------
arch/blackfin/kernel/early_printk.c | 2 --
arch/microblaze/kernel/early_printk.c | 26 ++++----------------------
arch/mips/kernel/early_printk.c | 10 ++++------
arch/powerpc/kernel/udbg.c | 6 ++----
arch/sh/kernel/sh_bios.c | 2 --
arch/sparc/kernel/setup_32.c | 1 +
arch/sparc/kernel/setup_64.c | 8 +++++++-
arch/tile/kernel/early_printk.c | 26 ++++----------------------
arch/um/kernel/early_printk.c | 8 +++++---
arch/unicore32/kernel/early_printk.c | 12 ++++--------
arch/x86/kernel/early_printk.c | 21 ++-------------------
include/linux/console.h | 1 +
include/linux/printk.h | 5 +++++
kernel/printk.c | 27 ++++++++++++++++++++-------
15 files changed, 62 insertions(+), 110 deletions(-)
Index: linux-3.2/arch/arm/kernel/early_printk.c
===================================================================
--- linux-3.2.orig/arch/arm/kernel/early_printk.c
+++ linux-3.2/arch/arm/kernel/early_printk.c
@@ -29,28 +29,17 @@ static void early_console_write(struct c
early_write(s, n);
}
-static struct console early_console = {
+static struct console early_console_dev = {
.name = "earlycon",
.write = early_console_write,
.flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1,
};
-asmlinkage void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
- va_list ap;
-
- va_start(ap, fmt);
- n = vscnprintf(buf, sizeof(buf), fmt, ap);
- early_write(buf, n);
- va_end(ap);
-}
-
static int __init setup_early_printk(char *buf)
{
- register_console(&early_console);
+ early_console = &early_console_dev;
+ register_console(&early_console_dev);
return 0;
}
Index: linux-3.2/arch/blackfin/kernel/early_printk.c
===================================================================
--- linux-3.2.orig/arch/blackfin/kernel/early_printk.c
+++ linux-3.2/arch/blackfin/kernel/early_printk.c
@@ -25,8 +25,6 @@ extern struct console *bfin_earlyserial_
extern struct console *bfin_jc_early_init(void);
#endif
-static struct console *early_console;
-
/* Default console */
#define DEFAULT_PORT 0
#define DEFAULT_CFLAG CS8|B57600
Index: linux-3.2/arch/microblaze/kernel/early_printk.c
===================================================================
--- linux-3.2.orig/arch/microblaze/kernel/early_printk.c
+++ linux-3.2/arch/microblaze/kernel/early_printk.c
@@ -21,7 +21,6 @@
#include <asm/setup.h>
#include <asm/prom.h>
-static u32 early_console_initialized;
static u32 base_addr;
#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE
@@ -109,27 +108,11 @@ static struct console early_serial_uart1
};
#endif /* CONFIG_SERIAL_8250_CONSOLE */
-static struct console *early_console;
-
-void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
- va_list ap;
-
- if (early_console_initialized) {
- va_start(ap, fmt);
- n = vscnprintf(buf, 512, fmt, ap);
- early_console->write(early_console, buf, n);
- va_end(ap);
- }
-}
-
int __init setup_early_printk(char *opt)
{
int version = 0;
- if (early_console_initialized)
+ if (early_console)
return 1;
base_addr = of_early_console(&version);
@@ -159,7 +142,6 @@ int __init setup_early_printk(char *opt)
}
register_console(early_console);
- early_console_initialized = 1;
return 0;
}
return 1;
@@ -169,7 +151,7 @@ int __init setup_early_printk(char *opt)
* only for early console because of performance degression */
void __init remap_early_printk(void)
{
- if (!early_console_initialized || !early_console)
+ if (!early_console)
return;
printk(KERN_INFO "early_printk_console remaping from 0x%x to ",
base_addr);
@@ -179,9 +161,9 @@ void __init remap_early_printk(void)
void __init disable_early_printk(void)
{
- if (!early_console_initialized || !early_console)
+ if (!early_console)
return;
printk(KERN_WARNING "disabling early console\n");
unregister_console(early_console);
- early_console_initialized = 0;
+ early_console = NULL;
}
Index: linux-3.2/arch/mips/kernel/early_printk.c
===================================================================
--- linux-3.2.orig/arch/mips/kernel/early_printk.c
+++ linux-3.2/arch/mips/kernel/early_printk.c
@@ -25,20 +25,18 @@ early_console_write(struct console *con,
}
}
-static struct console early_console __initdata = {
+static struct console early_console_prom = {
.name = "early",
.write = early_console_write,
.flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1
};
-static int early_console_initialized __initdata;
-
void __init setup_early_printk(void)
{
- if (early_console_initialized)
+ if (early_console)
return;
- early_console_initialized = 1;
+ early_console = &early_console_prom;
- register_console(&early_console);
+ register_console(&early_console_prom);
}
Index: linux-3.2/arch/powerpc/kernel/udbg.c
===================================================================
--- linux-3.2.orig/arch/powerpc/kernel/udbg.c
+++ linux-3.2/arch/powerpc/kernel/udbg.c
@@ -182,15 +182,13 @@ static struct console udbg_console = {
.index = 0,
};
-static int early_console_initialized;
-
/*
* Called by setup_system after ppc_md->probe and ppc_md->early_init.
* Call it again after setting udbg_putc in ppc_md->setup_arch.
*/
void __init register_early_udbg_console(void)
{
- if (early_console_initialized)
+ if (early_console)
return;
if (!udbg_putc)
@@ -200,7 +198,7 @@ void __init register_early_udbg_console(
printk(KERN_INFO "early console immortal !\n");
udbg_console.flags &= ~CON_BOOT;
}
- early_console_initialized = 1;
+ early_console = &udbg_console;
register_console(&udbg_console);
}
Index: linux-3.2/arch/sh/kernel/sh_bios.c
===================================================================
--- linux-3.2.orig/arch/sh/kernel/sh_bios.c
+++ linux-3.2/arch/sh/kernel/sh_bios.c
@@ -144,8 +144,6 @@ static struct console bios_console = {
.index = -1,
};
-static struct console *early_console;
-
static int __init setup_early_printk(char *buf)
{
int keep_early = 0;
Index: linux-3.2/arch/sparc/kernel/setup_32.c
===================================================================
--- linux-3.2.orig/arch/sparc/kernel/setup_32.c
+++ linux-3.2/arch/sparc/kernel/setup_32.c
@@ -221,6 +221,7 @@ void __init setup_arch(char **cmdline_p)
boot_flags_init(*cmdline_p);
+ early_console = &prom_early_console;
register_console(&prom_early_console);
/* Set sparc_cpu_model */
Index: linux-3.2/arch/sparc/kernel/setup_64.c
===================================================================
--- linux-3.2.orig/arch/sparc/kernel/setup_64.c
+++ linux-3.2/arch/sparc/kernel/setup_64.c
@@ -477,6 +477,12 @@ static void __init init_sparc64_elf_hwca
popc_patch();
}
+static inline void register_prom_console(void)
+{
+ early_console = &prom_early_console;
+ register_console(&prom_early_console);
+}
+
void __init setup_arch(char **cmdline_p)
{
/* Initialize PROM console and command line. */
@@ -488,7 +494,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_EARLYFB
if (btext_find_display())
#endif
- register_console(&prom_early_console);
+ register_prom_console();
if (tlb_type == hypervisor)
printk("ARCH: SUN4V\n");
Index: linux-3.2/arch/tile/kernel/early_printk.c
===================================================================
--- linux-3.2.orig/arch/tile/kernel/early_printk.c
+++ linux-3.2/arch/tile/kernel/early_printk.c
@@ -32,25 +32,8 @@ static struct console early_hv_console =
};
/* Direct interface for emergencies */
-static struct console *early_console = &early_hv_console;
-static int early_console_initialized;
static int early_console_complete;
-static void early_vprintk(const char *fmt, va_list ap)
-{
- char buf[512];
- int n = vscnprintf(buf, sizeof(buf), fmt, ap);
- early_console->write(early_console, buf, n);
-}
-
-void early_printk(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- early_vprintk(fmt, ap);
- va_end(ap);
-}
-
void early_panic(const char *fmt, ...)
{
va_list ap;
@@ -68,14 +51,13 @@ static int __initdata keep_early;
static int __init setup_early_printk(char *str)
{
- if (early_console_initialized)
+ if (early_console)
return 1;
if (str != NULL && strncmp(str, "keep", 4) == 0)
keep_early = 1;
early_console = &early_hv_console;
- early_console_initialized = 1;
register_console(early_console);
return 0;
@@ -84,12 +66,12 @@ static int __init setup_early_printk(cha
void __init disable_early_printk(void)
{
early_console_complete = 1;
- if (!early_console_initialized || !early_console)
+ if (!early_console)
return;
if (!keep_early) {
early_printk("disabling early console\n");
unregister_console(early_console);
- early_console_initialized = 0;
+ early_console = NULL;
} else {
early_printk("keeping early console\n");
}
@@ -97,7 +79,7 @@ void __init disable_early_printk(void)
void warn_early_printk(void)
{
- if (early_console_complete || early_console_initialized)
+ if (early_console_complete || early_console)
return;
early_printk("\
Machine shutting down before console output is fully initialized.\n\
Index: linux-3.2/arch/um/kernel/early_printk.c
===================================================================
--- linux-3.2.orig/arch/um/kernel/early_printk.c
+++ linux-3.2/arch/um/kernel/early_printk.c
@@ -16,7 +16,7 @@ static void early_console_write(struct c
um_early_printk(s, n);
}
-static struct console early_console = {
+static struct console early_console_dev = {
.name = "earlycon",
.write = early_console_write,
.flags = CON_BOOT,
@@ -25,8 +25,10 @@ static struct console early_console = {
static int __init setup_early_printk(char *buf)
{
- register_console(&early_console);
-
+ if (!early_console) {
+ early_console = &early_console_dev;
+ register_console(&early_console_dev);
+ }
return 0;
}
Index: linux-3.2/arch/unicore32/kernel/early_printk.c
===================================================================
--- linux-3.2.orig/arch/unicore32/kernel/early_printk.c
+++ linux-3.2/arch/unicore32/kernel/early_printk.c
@@ -33,21 +33,17 @@ static struct console early_ocd_console
.index = -1,
};
-/* Direct interface for emergencies */
-static struct console *early_console = &early_ocd_console;
-
-static int __initdata keep_early;
-
static int __init setup_early_printk(char *buf)
{
- if (!buf)
+ int keep_early;
+
+ if (!buf || early_console)
return 0;
if (strstr(buf, "keep"))
keep_early = 1;
- if (!strncmp(buf, "ocd", 3))
- early_console = &early_ocd_console;
+ early_console = &early_ocd_console;
if (keep_early)
early_console->flags &= ~CON_BOOT;
Index: linux-3.2/arch/x86/kernel/early_printk.c
===================================================================
--- linux-3.2.orig/arch/x86/kernel/early_printk.c
+++ linux-3.2/arch/x86/kernel/early_printk.c
@@ -169,25 +169,9 @@ static struct console early_serial_conso
.index = -1,
};
-/* Direct interface for emergencies */
-static struct console *early_console = &early_vga_console;
-static int __initdata early_console_initialized;
-
-asmlinkage void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
- va_list ap;
-
- va_start(ap, fmt);
- n = vscnprintf(buf, sizeof(buf), fmt, ap);
- early_console->write(early_console, buf, n);
- va_end(ap);
-}
-
static inline void early_console_register(struct console *con, int keep_early)
{
- if (early_console->index != -1) {
+ if (con->index != -1) {
printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
con->name);
return;
@@ -207,9 +191,8 @@ static int __init setup_early_printk(cha
if (!buf)
return 0;
- if (early_console_initialized)
+ if (early_console)
return 0;
- early_console_initialized = 1;
keep = (strstr(buf, "keep") != NULL);
Index: linux-3.2/include/linux/console.h
===================================================================
--- linux-3.2.orig/include/linux/console.h
+++ linux-3.2/include/linux/console.h
@@ -133,6 +133,7 @@ struct console {
for (con = console_drivers; con != NULL; con = con->next)
extern int console_set_on_cmdline;
+extern struct console *early_console;
extern int add_preferred_console(char *name, int idx, char *options);
extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options);
Index: linux-3.2/include/linux/printk.h
===================================================================
--- linux-3.2.orig/include/linux/printk.h
+++ linux-3.2/include/linux/printk.h
@@ -88,8 +88,13 @@ int no_printk(const char *fmt, ...)
return 0;
}
+#ifdef CONFIG_EARLY_PRINTK
extern asmlinkage __printf(1, 2)
void early_printk(const char *fmt, ...);
+#else
+static inline __printf(1, 2) __cold
+void early_printk(const char *s, ...) { }
+#endif
extern int printk_needs_cpu(int cpu);
extern void printk_tick(void);
Index: linux-3.2/kernel/printk.c
===================================================================
--- linux-3.2.orig/kernel/printk.c
+++ linux-3.2/kernel/printk.c
@@ -44,13 +44,6 @@
#include <asm/uaccess.h>
-/*
- * Architectures can override it:
- */
-void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
-{
-}
-
#define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
/* printk's without a loglevel use this.. */
@@ -521,6 +514,26 @@ static void __call_console_drivers(unsig
}
}
+#ifdef CONFIG_EARLY_PRINTK
+struct console *early_console;
+
+static void early_vprintk(const char *fmt, va_list ap)
+{
+ char buf[512];
+ int n = vscnprintf(buf, sizeof(buf), fmt, ap);
+ if (early_console)
+ early_console->write(early_console, buf, n);
+}
+
+asmlinkage void early_printk(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ early_vprintk(fmt, ap);
+ va_end(ap);
+}
+#endif
+
static int __read_mostly ignore_loglevel;
static int __init ignore_loglevel_setup(char *str)

View File

@ -0,0 +1,28 @@
Subject: epoll.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 08 Jul 2011 16:35:35 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/eventpoll.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: linux-3.2/fs/eventpoll.c
===================================================================
--- linux-3.2.orig/fs/eventpoll.c
+++ linux-3.2/fs/eventpoll.c
@@ -438,12 +438,12 @@ static int ep_poll_wakeup_proc(void *pri
*/
static void ep_poll_safewake(wait_queue_head_t *wq)
{
- int this_cpu = get_cpu();
+ int this_cpu = get_cpu_light();
ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu);
- put_cpu();
+ put_cpu_light();
}
/*

View File

@ -0,0 +1,24 @@
Subject: filemap-fix-up.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 17 Jun 2011 18:56:24 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Wrecked-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-m6yuzd6ul717hlnl2gj6p3ou@git.kernel.org
---
mm/filemap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/mm/filemap.c
===================================================================
--- linux-3.2.orig/mm/filemap.c
+++ linux-3.2/mm/filemap.c
@@ -2061,7 +2061,7 @@ size_t iov_iter_copy_from_user_atomic(st
char *kaddr;
size_t copied;
- BUG_ON(!in_atomic());
+ BUG_ON(!pagefault_disabled());
kaddr = kmap_atomic(page, KM_USER0);
if (likely(i->nr_segs == 1)) {
int left;

View File

@ -0,0 +1,44 @@
Subject: fs-block-rt-support.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 14 Jun 2011 17:05:09 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
block/blk-core.c | 2 +-
fs/file.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
Index: linux-3.2/block/blk-core.c
===================================================================
--- linux-3.2.orig/block/blk-core.c
+++ linux-3.2/block/blk-core.c
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(blk_delay_queue);
**/
void blk_start_queue(struct request_queue *q)
{
- WARN_ON(!irqs_disabled());
+ WARN_ON_NONRT(!irqs_disabled());
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
__blk_run_queue(q);
Index: linux-3.2/fs/file.c
===================================================================
--- linux-3.2.orig/fs/file.c
+++ linux-3.2/fs/file.c
@@ -105,14 +105,14 @@ void free_fdtable_rcu(struct rcu_head *r
kfree(fdt->open_fds);
kfree(fdt);
} else {
- fddef = &get_cpu_var(fdtable_defer_list);
+ fddef = &per_cpu(fdtable_defer_list, get_cpu_light());
spin_lock(&fddef->lock);
fdt->next = fddef->next;
fddef->next = fdt;
/* vmallocs are handled from the workqueue context */
schedule_work(&fddef->wq);
spin_unlock(&fddef->lock);
- put_cpu_var(fdtable_defer_list);
+ put_cpu_light();
}
}

View File

@ -0,0 +1,104 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 18 Mar 2011 10:11:25 +0100
Subject: fs: jbd/jbd2: Make state lock and journal head lock rt safe
bit_spin_locks break under RT.
Based on a previous patch from Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
--
include/linux/buffer_head.h | 10 ++++++++++
include/linux/jbd_common.h | 24 ++++++++++++++++++++++++
2 files changed, 34 insertions(+)
Index: linux-3.2/include/linux/buffer_head.h
===================================================================
--- linux-3.2.orig/include/linux/buffer_head.h
+++ linux-3.2/include/linux/buffer_head.h
@@ -74,6 +74,11 @@ struct buffer_head {
atomic_t b_count; /* users using this buffer_head */
#ifdef CONFIG_PREEMPT_RT_BASE
spinlock_t b_uptodate_lock;
+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
+ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
+ spinlock_t b_state_lock;
+ spinlock_t b_journal_head_lock;
+#endif
#endif
};
@@ -105,6 +110,11 @@ static inline void buffer_head_init_lock
{
#ifdef CONFIG_PREEMPT_RT_BASE
spin_lock_init(&bh->b_uptodate_lock);
+#if defined(CONFIG_JBD) || defined(CONFIG_JBD_MODULE) || \
+ defined(CONFIG_JBD2) || defined(CONFIG_JBD2_MODULE)
+ spin_lock_init(&bh->b_state_lock);
+ spin_lock_init(&bh->b_journal_head_lock);
+#endif
#endif
}
Index: linux-3.2/include/linux/jbd_common.h
===================================================================
--- linux-3.2.orig/include/linux/jbd_common.h
+++ linux-3.2/include/linux/jbd_common.h
@@ -37,32 +37,56 @@ static inline struct journal_head *bh2jh
static inline void jbd_lock_bh_state(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_lock(BH_State, &bh->b_state);
+#else
+ spin_lock(&bh->b_state_lock);
+#endif
}
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
return bit_spin_trylock(BH_State, &bh->b_state);
+#else
+ return spin_trylock(&bh->b_state_lock);
+#endif
}
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
return bit_spin_is_locked(BH_State, &bh->b_state);
+#else
+ return spin_is_locked(&bh->b_state_lock);
+#endif
}
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_unlock(BH_State, &bh->b_state);
+#else
+ spin_unlock(&bh->b_state_lock);
+#endif
}
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_lock(BH_JournalHead, &bh->b_state);
+#else
+ spin_lock(&bh->b_journal_head_lock);
+#endif
}
static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_unlock(BH_JournalHead, &bh->b_state);
+#else
+ spin_unlock(&bh->b_journal_head_lock);
+#endif
}
#endif

View File

@ -0,0 +1,45 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 19 Jul 2009 08:44:27 -0500
Subject: fs: namespace preemption fix
On RT we cannot loop with preemption disabled here as
mnt_make_readonly() might have been preempted. We can safely enable
preemption while waiting for MNT_WRITE_HOLD to be cleared. Safe on !RT
as well.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/namespace.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
Index: linux-3.2/fs/namespace.c
===================================================================
--- linux-3.2.orig/fs/namespace.c
+++ linux-3.2/fs/namespace.c
@@ -341,8 +341,14 @@ int mnt_want_write(struct vfsmount *mnt)
* incremented count after it has set MNT_WRITE_HOLD.
*/
smp_mb();
- while (mnt->mnt_flags & MNT_WRITE_HOLD)
+ /*
+ * No need to keep preemption disabled accross the spin loop.
+ */
+ while (mnt->mnt_flags & MNT_WRITE_HOLD) {
+ preempt_enable();
cpu_relax();
+ preempt_disable();
+ }
/*
* After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
* be set to match its requirements. So we must not load that until
@@ -352,9 +358,7 @@ int mnt_want_write(struct vfsmount *mnt)
if (__mnt_is_readonly(mnt)) {
mnt_dec_writers(mnt);
ret = -EROFS;
- goto out;
}
-out:
preempt_enable();
return ret;
}

View File

@ -0,0 +1,61 @@
From: Mike Galbraith <efault@gmx.de>
Date: Fri, 3 Jul 2009 08:44:12 -0500
Subject: fs: ntfs: disable interrupt only on !RT
On Sat, 2007-10-27 at 11:44 +0200, Ingo Molnar wrote:
> * Nick Piggin <nickpiggin@yahoo.com.au> wrote:
>
> > > [10138.175796] [<c0105de3>] show_trace+0x12/0x14
> > > [10138.180291] [<c0105dfb>] dump_stack+0x16/0x18
> > > [10138.184769] [<c011609f>] native_smp_call_function_mask+0x138/0x13d
> > > [10138.191117] [<c0117606>] smp_call_function+0x1e/0x24
> > > [10138.196210] [<c012f85c>] on_each_cpu+0x25/0x50
> > > [10138.200807] [<c0115c74>] flush_tlb_all+0x1e/0x20
> > > [10138.205553] [<c016caaf>] kmap_high+0x1b6/0x417
> > > [10138.210118] [<c011ec88>] kmap+0x4d/0x4f
> > > [10138.214102] [<c026a9d8>] ntfs_end_buffer_async_read+0x228/0x2f9
> > > [10138.220163] [<c01a0e9e>] end_bio_bh_io_sync+0x26/0x3f
> > > [10138.225352] [<c01a2b09>] bio_endio+0x42/0x6d
> > > [10138.229769] [<c02c2a08>] __end_that_request_first+0x115/0x4ac
> > > [10138.235682] [<c02c2da7>] end_that_request_chunk+0x8/0xa
> > > [10138.241052] [<c0365943>] ide_end_request+0x55/0x10a
> > > [10138.246058] [<c036dae3>] ide_dma_intr+0x6f/0xac
> > > [10138.250727] [<c0366d83>] ide_intr+0x93/0x1e0
> > > [10138.255125] [<c015afb4>] handle_IRQ_event+0x5c/0xc9
> >
> > Looks like ntfs is kmap()ing from interrupt context. Should be using
> > kmap_atomic instead, I think.
>
> it's not atomic interrupt context but irq thread context - and -rt
> remaps kmap_atomic() to kmap() internally.
Hm. Looking at the change to mm/bounce.c, perhaps I should do this
instead?
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/ntfs/aops.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: linux-3.2/fs/ntfs/aops.c
===================================================================
--- linux-3.2.orig/fs/ntfs/aops.c
+++ linux-3.2/fs/ntfs/aops.c
@@ -144,13 +144,13 @@ static void ntfs_end_buffer_async_read(s
recs = PAGE_CACHE_SIZE / rec_size;
/* Should have been verified before we got here... */
BUG_ON(!recs);
- local_irq_save(flags);
+ local_irq_save_nort(flags);
kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
for (i = 0; i < recs; i++)
post_read_mst_fixup((NTFS_RECORD*)(kaddr +
i * rec_size), rec_size);
kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
flush_dcache_page(page);
if (likely(page_uptodate && !PageError(page)))
SetPageUptodate(page);

View File

@ -0,0 +1,167 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 18 Mar 2011 09:18:52 +0100
Subject: buffer_head: Replace bh_uptodate_lock for -rt
Wrap the bit_spin_lock calls into a separate inline and add the RT
replacements with a real spinlock.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/buffer.c | 21 +++++++--------------
fs/ntfs/aops.c | 10 +++-------
include/linux/buffer_head.h | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 44 insertions(+), 21 deletions(-)
Index: linux-3.2/fs/buffer.c
===================================================================
--- linux-3.2.orig/fs/buffer.c
+++ linux-3.2/fs/buffer.c
@@ -331,8 +331,7 @@ static void end_buffer_async_read(struct
* decide that the page is now completely done.
*/
first = page_buffers(page);
- local_irq_save(flags);
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+ flags = bh_uptodate_lock_irqsave(first);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
@@ -345,8 +344,7 @@ static void end_buffer_async_read(struct
}
tmp = tmp->b_this_page;
} while (tmp != bh);
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
+ bh_uptodate_unlock_irqrestore(first, flags);
/*
* If none of the buffers had errors and they are all
@@ -358,9 +356,7 @@ static void end_buffer_async_read(struct
return;
still_busy:
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
- return;
+ bh_uptodate_unlock_irqrestore(first, flags);
}
/*
@@ -394,8 +390,7 @@ void end_buffer_async_write(struct buffe
}
first = page_buffers(page);
- local_irq_save(flags);
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+ flags = bh_uptodate_lock_irqsave(first);
clear_buffer_async_write(bh);
unlock_buffer(bh);
@@ -407,15 +402,12 @@ void end_buffer_async_write(struct buffe
}
tmp = tmp->b_this_page;
}
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
+ bh_uptodate_unlock_irqrestore(first, flags);
end_page_writeback(page);
return;
still_busy:
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
- return;
+ bh_uptodate_unlock_irqrestore(first, flags);
}
EXPORT_SYMBOL(end_buffer_async_write);
@@ -3223,6 +3215,7 @@ struct buffer_head *alloc_buffer_head(gf
struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
if (ret) {
INIT_LIST_HEAD(&ret->b_assoc_buffers);
+ buffer_head_init_locks(ret);
preempt_disable();
__this_cpu_inc(bh_accounting.nr);
recalc_bh_state();
Index: linux-3.2/fs/ntfs/aops.c
===================================================================
--- linux-3.2.orig/fs/ntfs/aops.c
+++ linux-3.2/fs/ntfs/aops.c
@@ -108,8 +108,7 @@ static void ntfs_end_buffer_async_read(s
"0x%llx.", (unsigned long long)bh->b_blocknr);
}
first = page_buffers(page);
- local_irq_save(flags);
- bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
+ flags = bh_uptodate_lock_irqsave(first);
clear_buffer_async_read(bh);
unlock_buffer(bh);
tmp = bh;
@@ -124,8 +123,7 @@ static void ntfs_end_buffer_async_read(s
}
tmp = tmp->b_this_page;
} while (tmp != bh);
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
+ bh_uptodate_unlock_irqrestore(first, flags);
/*
* If none of the buffers had errors then we can set the page uptodate,
* but we first have to perform the post read mst fixups, if the
@@ -160,9 +158,7 @@ static void ntfs_end_buffer_async_read(s
unlock_page(page);
return;
still_busy:
- bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
- local_irq_restore(flags);
- return;
+ bh_uptodate_unlock_irqrestore(first, flags);
}
/**
Index: linux-3.2/include/linux/buffer_head.h
===================================================================
--- linux-3.2.orig/include/linux/buffer_head.h
+++ linux-3.2/include/linux/buffer_head.h
@@ -72,8 +72,42 @@ struct buffer_head {
struct address_space *b_assoc_map; /* mapping this buffer is
associated with */
atomic_t b_count; /* users using this buffer_head */
+#ifdef CONFIG_PREEMPT_RT_BASE
+ spinlock_t b_uptodate_lock;
+#endif
};
+static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh)
+{
+ unsigned long flags;
+
+#ifndef CONFIG_PREEMPT_RT_BASE
+ local_irq_save(flags);
+ bit_spin_lock(BH_Uptodate_Lock, &bh->b_state);
+#else
+ spin_lock_irqsave(&bh->b_uptodate_lock, flags);
+#endif
+ return flags;
+}
+
+static inline void
+bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags)
+{
+#ifndef CONFIG_PREEMPT_RT_BASE
+ bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state);
+ local_irq_restore(flags);
+#else
+ spin_unlock_irqrestore(&bh->b_uptodate_lock, flags);
+#endif
+}
+
+static inline void buffer_head_init_locks(struct buffer_head *bh)
+{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ spin_lock_init(&bh->b_uptodate_lock);
+#endif
+}
+
/*
* macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
* and buffer_foo() functions.

View File

@ -0,0 +1,92 @@
Subject: ftrace-crap.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 09 Sep 2011 16:55:53 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/trace/trace.c | 26 ++++++++++++++++++++++++--
kernel/trace/trace.h | 1 -
2 files changed, 24 insertions(+), 3 deletions(-)
Index: linux-3.2/kernel/trace/trace.c
===================================================================
--- linux-3.2.orig/kernel/trace/trace.c
+++ linux-3.2/kernel/trace/trace.c
@@ -359,11 +359,13 @@ static DECLARE_DELAYED_WORK(wakeup_work,
*/
void trace_wake_up(void)
{
+#ifndef CONFIG_PREEMPT_RT_FULL
const unsigned long delay = msecs_to_jiffies(2);
if (trace_flags & TRACE_ITER_BLOCK)
return;
schedule_delayed_work(&wakeup_work, delay);
+#endif
}
static int __init set_buf_size(char *str)
@@ -719,6 +721,12 @@ update_max_tr_single(struct trace_array
}
#endif /* CONFIG_TRACER_MAX_TRACE */
+#ifndef CONFIG_PREEMPT_RT_FULL
+static void default_wait_pipe(struct trace_iterator *iter);
+#else
+#define default_wait_pipe poll_wait_pipe
+#endif
+
/**
* register_tracer - register a tracer with the ftrace system.
* @type - the plugin for the tracer
@@ -3192,6 +3200,7 @@ static int tracing_release_pipe(struct i
return 0;
}
+#ifndef CONFIG_PREEMPT_RT_FULL
static unsigned int
tracing_poll_pipe(struct file *filp, poll_table *poll_table)
{
@@ -3213,8 +3222,7 @@ tracing_poll_pipe(struct file *filp, pol
}
}
-
-void default_wait_pipe(struct trace_iterator *iter)
+static void default_wait_pipe(struct trace_iterator *iter)
{
DEFINE_WAIT(wait);
@@ -3225,6 +3233,20 @@ void default_wait_pipe(struct trace_iter
finish_wait(&trace_wait, &wait);
}
+#else
+static unsigned int
+tracing_poll_pipe(struct file *filp, poll_table *poll_table)
+{
+ struct trace_iterator *iter = filp->private_data;
+
+ if ((trace_flags & TRACE_ITER_BLOCK) || !trace_empty(iter))
+ return POLLIN | POLLRDNORM;
+ poll_wait_pipe(iter);
+ if (!trace_empty(iter))
+ return POLLIN | POLLRDNORM;
+ return 0;
+}
+#endif
/*
* This is a make-shift waitqueue.
Index: linux-3.2/kernel/trace/trace.h
===================================================================
--- linux-3.2.orig/kernel/trace/trace.h
+++ linux-3.2/kernel/trace/trace.h
@@ -345,7 +345,6 @@ void trace_init_global_iter(struct trace
void tracing_iter_reset(struct trace_iterator *iter, int cpu);
-void default_wait_pipe(struct trace_iterator *iter);
void poll_wait_pipe(struct trace_iterator *iter);
void ftrace(struct trace_array *tr,

View File

@ -0,0 +1,81 @@
Subject: ftrace-migrate-disable-tracing.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 21:56:42 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/ftrace_event.h | 3 ++-
kernel/trace/trace.c | 9 ++++++---
kernel/trace/trace_events.c | 1 +
kernel/trace/trace_output.c | 5 +++++
4 files changed, 14 insertions(+), 4 deletions(-)
Index: linux-3.2/include/linux/ftrace_event.h
===================================================================
--- linux-3.2.orig/include/linux/ftrace_event.h
+++ linux-3.2/include/linux/ftrace_event.h
@@ -49,7 +49,8 @@ struct trace_entry {
unsigned char flags;
unsigned char preempt_count;
int pid;
- int padding;
+ unsigned short migrate_disable;
+ unsigned short padding;
};
#define FTRACE_MAX_EVENT \
Index: linux-3.2/kernel/trace/trace.c
===================================================================
--- linux-3.2.orig/kernel/trace/trace.c
+++ linux-3.2/kernel/trace/trace.c
@@ -1123,6 +1123,8 @@ tracing_generic_entry_update(struct trac
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
+
+ entry->migrate_disable = (tsk) ? tsk->migrate_disable & 0xFF : 0;
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
@@ -1850,9 +1852,10 @@ static void print_lat_help_header(struct
seq_puts(m, "# | / _----=> need-resched \n");
seq_puts(m, "# || / _---=> hardirq/softirq \n");
seq_puts(m, "# ||| / _--=> preempt-depth \n");
- seq_puts(m, "# |||| / delay \n");
- seq_puts(m, "# cmd pid ||||| time | caller \n");
- seq_puts(m, "# \\ / ||||| \\ | / \n");
+ seq_puts(m, "# |||| / _--=> migrate-disable\n");
+ seq_puts(m, "# ||||| / delay \n");
+ seq_puts(m, "# cmd pid |||||| time | caller \n");
+ seq_puts(m, "# \\ / ||||| \\ | / \n");
}
static void print_func_help_header(struct seq_file *m)
Index: linux-3.2/kernel/trace/trace_events.c
===================================================================
--- linux-3.2.orig/kernel/trace/trace_events.c
+++ linux-3.2/kernel/trace/trace_events.c
@@ -116,6 +116,7 @@ static int trace_define_common_fields(vo
__common_field(unsigned char, flags);
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
+ __common_field(unsigned short, migrate_disable);
__common_field(int, padding);
return ret;
Index: linux-3.2/kernel/trace/trace_output.c
===================================================================
--- linux-3.2.orig/kernel/trace/trace_output.c
+++ linux-3.2/kernel/trace/trace_output.c
@@ -591,6 +591,11 @@ int trace_print_lat_fmt(struct trace_seq
else
ret = trace_seq_putc(s, '.');
+ if (entry->migrate_disable)
+ ret = trace_seq_printf(s, "%x", entry->migrate_disable);
+ else
+ ret = trace_seq_putc(s, '.');
+
return ret;
}

View File

@ -1,20 +0,0 @@
#! /bin/sh
set -e
version="$1"
if [ -z "$version" ]; then
echo >&2 "Usage: $0 <version>"
exit 2
fi
name="patch-$version.patch"
dir="debian/patches/features/all/rt"
basever="$(expr "$version" : '\([3-9].[0-9]*\)')"
wget -O "$dir/$name.bz2" "http://www.kernel.org/pub/linux/kernel/projects/rt/$basever/patch-$version.patch.bz2"
wget -O "$dir/$name.sign" "http://www.kernel.org/pub/linux/kernel/projects/rt/$basever/patch-$version.patch.sign"
bzcat "$dir/$name.bz2" | gpg --verify "$dir/$name.sign" -
bzcat "$dir/$name.bz2" | filterdiff -x 'linux-*/localversion-rt' > "$dir/$name"

View File

@ -0,0 +1,49 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:30 -0500
Subject: generic: Use raw local irq variant for generic cmpxchg
No point in tracing those.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/asm-generic/cmpxchg-local.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
Index: linux-3.2/include/asm-generic/cmpxchg-local.h
===================================================================
--- linux-3.2.orig/include/asm-generic/cmpxchg-local.h
+++ linux-3.2/include/asm-generic/cmpxchg-local.h
@@ -21,7 +21,7 @@ static inline unsigned long __cmpxchg_lo
if (size == 8 && sizeof(unsigned long) != 8)
wrong_size_cmpxchg(ptr);
- local_irq_save(flags);
+ raw_local_irq_save(flags);
switch (size) {
case 1: prev = *(u8 *)ptr;
if (prev == old)
@@ -42,7 +42,7 @@ static inline unsigned long __cmpxchg_lo
default:
wrong_size_cmpxchg(ptr);
}
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return prev;
}
@@ -55,11 +55,11 @@ static inline u64 __cmpxchg64_local_gene
u64 prev;
unsigned long flags;
- local_irq_save(flags);
+ raw_local_irq_save(flags);
prev = *(u64 *)ptr;
if (prev == old)
*(u64 *)ptr = new;
- local_irq_restore(flags);
+ raw_local_irq_restore(flags);
return prev;
}

View File

@ -0,0 +1,41 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:57 -0500
Subject: genirq: disable irqpoll on -rt
Creates long latencies for no value
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/irq/spurious.c | 10 ++++++++++
1 file changed, 10 insertions(+)
Index: linux-3.2/kernel/irq/spurious.c
===================================================================
--- linux-3.2.orig/kernel/irq/spurious.c
+++ linux-3.2/kernel/irq/spurious.c
@@ -339,6 +339,11 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir
static int __init irqfixup_setup(char *str)
{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ printk(KERN_WARNING "irqfixup boot option not supported "
+ "w/ CONFIG_PREEMPT_RT\n");
+ return 1;
+#endif
irqfixup = 1;
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
printk(KERN_WARNING "This may impact system performance.\n");
@@ -351,6 +356,11 @@ module_param(irqfixup, int, 0644);
static int __init irqpoll_setup(char *str)
{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ printk(KERN_WARNING "irqpoll boot option not supported "
+ "w/ CONFIG_PREEMPT_RT\n");
+ return 1;
+#endif
irqfixup = 2;
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
"enabled\n");

View File

@ -0,0 +1,29 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 21 Jul 2009 16:07:37 +0200
Subject: genirq: Disable random call on preempt-rt
The random call introduces high latencies and is almost
unused. Disable it for -rt.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/irq/handle.c | 3 +++
1 file changed, 3 insertions(+)
Index: linux-3.2/kernel/irq/handle.c
===================================================================
--- linux-3.2.orig/kernel/irq/handle.c
+++ linux-3.2/kernel/irq/handle.c
@@ -156,8 +156,11 @@ handle_irq_event_percpu(struct irq_desc
action = action->next;
} while (action);
+#ifndef CONFIG_PREEMPT_RT_FULL
+ /* FIXME: Can we unbreak that ? */
if (random & IRQF_SAMPLE_RANDOM)
add_interrupt_randomness(irq);
+#endif
if (!noirqdebug)
note_interrupt(irq, desc, retval);

View File

@ -0,0 +1,50 @@
Subject: genirq-force-threading.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 03 Apr 2011 11:57:29 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/interrupt.h | 8 ++++++--
kernel/irq/manage.c | 2 ++
2 files changed, 8 insertions(+), 2 deletions(-)
Index: linux-3.2/include/linux/interrupt.h
===================================================================
--- linux-3.2.orig/include/linux/interrupt.h
+++ linux-3.2/include/linux/interrupt.h
@@ -396,9 +396,13 @@ static inline int disable_irq_wake(unsig
#ifdef CONFIG_IRQ_FORCED_THREADING
-extern bool force_irqthreads;
+# ifndef CONFIG_PREEMPT_RT_BASE
+ extern bool force_irqthreads;
+# else
+# define force_irqthreads (true)
+# endif
#else
-#define force_irqthreads (0)
+#define force_irqthreads (false)
#endif
#ifndef __ARCH_SET_SOFTIRQ_PENDING
Index: linux-3.2/kernel/irq/manage.c
===================================================================
--- linux-3.2.orig/kernel/irq/manage.c
+++ linux-3.2/kernel/irq/manage.c
@@ -18,6 +18,7 @@
#include "internals.h"
#ifdef CONFIG_IRQ_FORCED_THREADING
+# ifndef CONFIG_PREEMPT_RT_BASE
__read_mostly bool force_irqthreads;
static int __init setup_forced_irqthreads(char *arg)
@@ -26,6 +27,7 @@ static int __init setup_forced_irqthread
return 0;
}
early_param("threadirqs", setup_forced_irqthreads);
+# endif
#endif
/**

View File

@ -0,0 +1,22 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 18 Mar 2011 10:22:04 +0100
Subject: genirq: Disable DEBUG_SHIRQ for rt
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
lib/Kconfig.debug | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/lib/Kconfig.debug
===================================================================
--- linux-3.2.orig/lib/Kconfig.debug
+++ linux-3.2/lib/Kconfig.debug
@@ -151,7 +151,7 @@ config DEBUG_KERNEL
config DEBUG_SHIRQ
bool "Debug shared IRQ handlers"
- depends on DEBUG_KERNEL && GENERIC_HARDIRQS
+ depends on DEBUG_KERNEL && GENERIC_HARDIRQS && !PREEMPT_RT_BASE
help
Enable this to generate a spurious interrupt as soon as a shared
interrupt handler is registered, and just before one is deregistered.

View File

@ -0,0 +1,37 @@
Subject: hardirq.h: Define softirq_count() as OUL to kill build warning
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Thu, 13 Oct 2011 17:19:09 +0800
kernel/lockdep.c: In function print_bad_irq_dependency:
kernel/lockdep.c:1476:3: warning: format %lu expects type long unsigned int, but argument 7 has type unsigned int
kernel/lockdep.c: In function print_usage_bug:
kernel/lockdep.c:2193:3: warning: format %lu expects type long unsigned int, but argument 7 has type unsigned int
kernel/lockdep.i show this:
printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
curr->hardirq_context, ((current_thread_info()->preempt_count) & (((1UL << (10))-1) << ((0 + 8) + 8))) >> ((0 + 8) + 8),
curr->softirq_context, (0U) >> (0 + 8),
curr->hardirqs_enabled,
curr->softirqs_enabled);
Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Link: http://lkml.kernel.org/r/20111013091909.GA32739@zhy
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/hardirq.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/include/linux/hardirq.h
===================================================================
--- linux-3.2.orig/include/linux/hardirq.h
+++ linux-3.2/include/linux/hardirq.h
@@ -84,7 +84,7 @@
# define softirq_count() (preempt_count() & SOFTIRQ_MASK)
# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
#else
-# define softirq_count() (0U)
+# define softirq_count() (0UL)
extern int in_serving_softirq(void);
#endif

View File

@ -0,0 +1,61 @@
Subject: hotplug: Call cpu_unplug_begin() before DOWN_PREPARE
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Sun, 16 Oct 2011 18:56:44 +0800
cpu_unplug_begin() should be called before CPU_DOWN_PREPARE, because
at CPU_DOWN_PREPARE cpu_active is cleared and sched_domain is
rebuilt. Otherwise the 'sync_unplug' thread will be running on the cpu
on which it's created and not bound on the cpu which is about to go
down.
I found that by an incorrect warning on smp_processor_id() called by
sync_unplug/1, and trace shows below:
(echo 1 > /sys/device/system/cpu/cpu1/online)
bash-1664 [000] 83.136620: _cpu_down: Bind sync_unplug to cpu 1
bash-1664 [000] 83.136623: sched_wait_task: comm=sync_unplug/1 pid=1724 prio=120
bash-1664 [000] 83.136624: _cpu_down: Wake sync_unplug
bash-1664 [000] 83.136629: sched_wakeup: comm=sync_unplug/1 pid=1724 prio=120 success=1 target_cpu=000
Wants to be folded back....
Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Link: http://lkml.kernel.org/r/1318762607-2261-3-git-send-email-yong.zhang0@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/cpu.c | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
Index: linux-3.2/kernel/cpu.c
===================================================================
--- linux-3.2.orig/kernel/cpu.c
+++ linux-3.2/kernel/cpu.c
@@ -338,22 +338,20 @@ static int __ref _cpu_down(unsigned int
return -EBUSY;
}
- err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
+ cpu_hotplug_begin();
+ err = cpu_unplug_begin(cpu);
if (err) {
- nr_calls--;
- __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
- printk("%s: attempt to take down CPU %u failed\n",
- __func__, cpu);
+ printk("cpu_unplug_begin(%d) failed\n", cpu);
goto out_cancel;
}
- cpu_hotplug_begin();
- err = cpu_unplug_begin(cpu);
+ err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
if (err) {
nr_calls--;
__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
- printk("cpu_unplug_begin(%d) failed\n", cpu);
- goto out_cancel;
+ printk("%s: attempt to take down CPU %u failed\n",
+ __func__, cpu);
+ goto out_release;
}
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));

View File

@ -0,0 +1,212 @@
Subject: hotplug: Lightweight get online cpus
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Jun 2011 12:36:06 +0200
get_online_cpus() is a heavy weight function which involves a global
mutex. migrate_disable() wants a simpler construct which prevents only
a CPU from going doing while a task is in a migrate disabled section.
Implement a per cpu lockless mechanism, which serializes only in the
real unplug case on a global mutex. That serialization affects only
tasks on the cpu which should be brought down.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/cpu.h | 4 +
kernel/cpu.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 128 insertions(+), 3 deletions(-)
Index: linux-3.2/include/linux/cpu.h
===================================================================
--- linux-3.2.orig/include/linux/cpu.h
+++ linux-3.2/include/linux/cpu.h
@@ -167,6 +167,8 @@ extern struct sysdev_class cpu_sysdev_cl
extern void get_online_cpus(void);
extern void put_online_cpus(void);
+extern void pin_current_cpu(void);
+extern void unpin_current_cpu(void);
#define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
#define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
#define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
@@ -189,6 +191,8 @@ static inline void cpu_hotplug_driver_un
#define get_online_cpus() do { } while (0)
#define put_online_cpus() do { } while (0)
+static inline void pin_current_cpu(void) { }
+static inline void unpin_current_cpu(void) { }
#define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0)
/* These aren't inline functions due to a GCC bug. */
#define register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
Index: linux-3.2/kernel/cpu.c
===================================================================
--- linux-3.2.orig/kernel/cpu.c
+++ linux-3.2/kernel/cpu.c
@@ -58,6 +58,102 @@ static struct {
.refcount = 0,
};
+struct hotplug_pcp {
+ struct task_struct *unplug;
+ int refcount;
+ struct completion synced;
+};
+
+static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp);
+
+/**
+ * pin_current_cpu - Prevent the current cpu from being unplugged
+ *
+ * Lightweight version of get_online_cpus() to prevent cpu from being
+ * unplugged when code runs in a migration disabled region.
+ *
+ * Must be called with preemption disabled (preempt_count = 1)!
+ */
+void pin_current_cpu(void)
+{
+ struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
+
+retry:
+ if (!hp->unplug || hp->refcount || preempt_count() > 1 ||
+ hp->unplug == current || (current->flags & PF_STOMPER)) {
+ hp->refcount++;
+ return;
+ }
+ preempt_enable();
+ mutex_lock(&cpu_hotplug.lock);
+ mutex_unlock(&cpu_hotplug.lock);
+ preempt_disable();
+ goto retry;
+}
+
+/**
+ * unpin_current_cpu - Allow unplug of current cpu
+ *
+ * Must be called with preemption or interrupts disabled!
+ */
+void unpin_current_cpu(void)
+{
+ struct hotplug_pcp *hp = &__get_cpu_var(hotplug_pcp);
+
+ WARN_ON(hp->refcount <= 0);
+
+ /* This is safe. sync_unplug_thread is pinned to this cpu */
+ if (!--hp->refcount && hp->unplug && hp->unplug != current &&
+ !(current->flags & PF_STOMPER))
+ wake_up_process(hp->unplug);
+}
+
+/*
+ * FIXME: Is this really correct under all circumstances ?
+ */
+static int sync_unplug_thread(void *data)
+{
+ struct hotplug_pcp *hp = data;
+
+ preempt_disable();
+ hp->unplug = current;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ while (hp->refcount) {
+ schedule_preempt_disabled();
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ }
+ set_current_state(TASK_RUNNING);
+ preempt_enable();
+ complete(&hp->synced);
+ return 0;
+}
+
+/*
+ * Start the sync_unplug_thread on the target cpu and wait for it to
+ * complete.
+ */
+static int cpu_unplug_begin(unsigned int cpu)
+{
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
+ struct task_struct *tsk;
+
+ init_completion(&hp->synced);
+ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
+ if (IS_ERR(tsk))
+ return (PTR_ERR(tsk));
+ kthread_bind(tsk, cpu);
+ wake_up_process(tsk);
+ wait_for_completion(&hp->synced);
+ return 0;
+}
+
+static void cpu_unplug_done(unsigned int cpu)
+{
+ struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu);
+
+ hp->unplug = NULL;
+}
+
void get_online_cpus(void)
{
might_sleep();
@@ -211,13 +307,14 @@ static int __ref take_cpu_down(void *_pa
/* Requires cpu_add_remove_lock to be held */
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
- int err, nr_calls = 0;
+ int mycpu, err, nr_calls = 0;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
.mod = mod,
.hcpu = hcpu,
};
+ cpumask_var_t cpumask;
if (num_online_cpus() == 1)
return -EBUSY;
@@ -225,7 +322,20 @@ static int __ref _cpu_down(unsigned int
if (!cpu_online(cpu))
return -EINVAL;
- cpu_hotplug_begin();
+ /* Move the downtaker off the unplug cpu */
+ if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
+ set_cpus_allowed_ptr(current, cpumask);
+ free_cpumask_var(cpumask);
+ preempt_disable();
+ mycpu = smp_processor_id();
+ if (mycpu == cpu) {
+ printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
+ preempt_enable();
+ return -EBUSY;
+ }
+ preempt_enable();
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
if (err) {
@@ -233,7 +343,16 @@ static int __ref _cpu_down(unsigned int
__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
printk("%s: attempt to take down CPU %u failed\n",
__func__, cpu);
- goto out_release;
+ goto out_cancel;
+ }
+
+ cpu_hotplug_begin();
+ err = cpu_unplug_begin(cpu);
+ if (err) {
+ nr_calls--;
+ __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
+ printk("cpu_unplug_begin(%d) failed\n", cpu);
+ goto out_cancel;
}
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
@@ -264,6 +383,8 @@ static int __ref _cpu_down(unsigned int
check_for_tasks(cpu);
out_release:
+ cpu_unplug_done(cpu);
+out_cancel:
cpu_hotplug_done();
if (!err)
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);

View File

@ -0,0 +1,26 @@
Subject: hotplug: sync_unplug: No "\n" in task name
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Sun, 16 Oct 2011 18:56:43 +0800
Otherwise the output will look a little odd.
Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Link: http://lkml.kernel.org/r/1318762607-2261-2-git-send-email-yong.zhang0@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/cpu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/kernel/cpu.c
===================================================================
--- linux-3.2.orig/kernel/cpu.c
+++ linux-3.2/kernel/cpu.c
@@ -138,7 +138,7 @@ static int cpu_unplug_begin(unsigned int
struct task_struct *tsk;
init_completion(&hp->synced);
- tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d\n", cpu);
+ tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu);
if (IS_ERR(tsk))
return (PTR_ERR(tsk));
kthread_bind(tsk, cpu);

View File

@ -0,0 +1,38 @@
Subject: hotplug-use-migrate-disable.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 19:35:29 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/cpu.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
Index: linux-3.2/kernel/cpu.c
===================================================================
--- linux-3.2.orig/kernel/cpu.c
+++ linux-3.2/kernel/cpu.c
@@ -330,14 +330,13 @@ static int __ref _cpu_down(unsigned int
cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
set_cpus_allowed_ptr(current, cpumask);
free_cpumask_var(cpumask);
- preempt_disable();
+ migrate_disable();
mycpu = smp_processor_id();
if (mycpu == cpu) {
printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
- preempt_enable();
+ migrate_enable();
return -EBUSY;
}
- preempt_enable();
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
if (err) {
@@ -387,6 +386,7 @@ static int __ref _cpu_down(unsigned int
out_release:
cpu_unplug_done(cpu);
out_cancel:
+ migrate_enable();
cpu_hotplug_done();
if (!err)
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);

View File

@ -0,0 +1,70 @@
Subject: hrtimer: Add missing debug_activate() aid [Was: Re: [ANNOUNCE] 3.0.6-rt17]
From: Yong Zhang <yong.zhang0@gmail.com>
Date: Thu, 13 Oct 2011 15:52:30 +0800
On Fri, Oct 07, 2011 at 10:25:25AM -0700, Fernando Lopez-Lezcano wrote:
> On 10/06/2011 06:15 PM, Thomas Gleixner wrote:
> >Dear RT Folks,
> >
> >I'm pleased to announce the 3.0.6-rt17 release.
>
> Hi and thanks again. So far this one is not hanging which is very
> good news. But I still see the hrtimer_fixup_activate warnings I
> reported for rt16...
Hi Fernando,
I think below patch will smooth your concern?
Thanks,
Yong
---
From: Yong Zhang <yong.zhang0@gmail.com>
Subject: [PATCH -rt] hrtimer: Add missing debug_activate() aid
It will fix below warning, which is also reported by Fernando:
[ 7.616090] ------------[ cut here ]------------
[ 7.616093] WARNING: at kernel/hrtimer.c:391 hrtimer_fixup_activate+0x27/0x50()
[ 7.616094] Hardware name: OptiPlex 755
[ 7.616096] Modules linked in:
[ 7.616099] Pid: 0, comm: kworker/0:0 Tainted: G W 3.0.6-rt17-00284-g9d73a61 #15
[ 7.616100] Call Trace:
[ 7.616103] [<c014d9a2>] warn_slowpath_common+0x72/0xa0
[ 7.616106] [<c0175417>] ? hrtimer_fixup_activate+0x27/0x50
[ 7.616109] [<c0175417>] ? hrtimer_fixup_activate+0x27/0x50
[ 7.616112] [<c014d9f2>] warn_slowpath_null+0x22/0x30
[ 7.616115] [<c0175417>] hrtimer_fixup_activate+0x27/0x50
[ 7.616118] [<c03b3ab0>] debug_object_activate+0x100/0x130
[ 7.616121] [<c0176b96>] ? hrtimer_start_range_ns+0x26/0x30
[ 7.616123] [<c0175a59>] enqueue_hrtimer+0x19/0x100
[ 7.616126] [<c0176b96>] ? hrtimer_start_range_ns+0x26/0x30
[ 7.616129] [<c0176744>] __hrtimer_start_range_ns+0x144/0x540
[ 7.616132] [<c072705a>] ? _raw_spin_unlock_irqrestore+0x3a/0x80
[ 7.616136] [<c0176b96>] hrtimer_start_range_ns+0x26/0x30
[ 7.616139] [<c01852b5>] tick_nohz_restart_sched_tick+0x185/0x1b0
[ 7.616142] [<c0101878>] cpu_idle+0x98/0xc0
[ 7.616146] [<c071fcd8>] start_secondary+0x1d3/0x1da
[ 7.616148] ---[ end trace 0000000000000003 ]---
Reported-by: Fernando Lopez-Lezcano <nando@ccrma.stanford.edu>
Signed-off-by: Yong Zhang <yong.zhang0@gmail.com>
Link: http://lkml.kernel.org/r/20111013075230.GA2740@zhy
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/hrtimer.c | 1 +
1 file changed, 1 insertion(+)
Index: linux-3.2/kernel/hrtimer.c
===================================================================
--- linux-3.2.orig/kernel/hrtimer.c
+++ linux-3.2/kernel/hrtimer.c
@@ -1040,6 +1040,7 @@ int __hrtimer_start_range_ns(struct hrti
* remove it again and report a failure. This avoids
* stale base->first entries.
*/
+ debug_deactivate(timer);
__remove_hrtimer(timer, new_base,
timer->state & HRTIMER_STATE_CALLBACK, 0);
}

View File

@ -0,0 +1,42 @@
Subject: hrtimer-fix-reprogram-madness.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 14 Sep 2011 14:48:43 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/hrtimer.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
Index: linux-3.2/kernel/hrtimer.c
===================================================================
--- linux-3.2.orig/kernel/hrtimer.c
+++ linux-3.2/kernel/hrtimer.c
@@ -1315,7 +1315,11 @@ static void hrtimer_rt_reprogram(int res
if (!enqueue_hrtimer(timer, base))
return;
- if (hrtimer_reprogram(timer, base))
+#ifndef CONFIG_HIGH_RES_TIMERS
+ }
+#else
+ if (base->cpu_base->hres_active &&
+ hrtimer_reprogram(timer, base))
goto requeue;
} else if (hrtimer_active(timer)) {
@@ -1324,6 +1328,7 @@ static void hrtimer_rt_reprogram(int res
* the event device.
*/
if (&timer->node == base->active.next &&
+ base->cpu_base->hres_active &&
hrtimer_reprogram(timer, base))
goto requeue;
}
@@ -1336,6 +1341,7 @@ requeue:
*/
__remove_hrtimer(timer, base, timer->state, 0);
list_add_tail(&timer->cb_entry, &base->expired);
+#endif
}
/*

View File

@ -0,0 +1,414 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Jul 2009 08:44:31 -0500
Subject: hrtimer: fixup hrtimer callback changes for preempt-rt
In preempt-rt we can not call the callbacks which take sleeping locks
from the timer interrupt context.
Bring back the softirq split for now, until we fixed the signal
delivery problem for real.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/hrtimer.h | 3
kernel/hrtimer.c | 190 ++++++++++++++++++++++++++++++++++++++++++-----
kernel/sched.c | 2
kernel/time/tick-sched.c | 1
kernel/watchdog.c | 1
5 files changed, 179 insertions(+), 18 deletions(-)
Index: linux-3.2/include/linux/hrtimer.h
===================================================================
--- linux-3.2.orig/include/linux/hrtimer.h
+++ linux-3.2/include/linux/hrtimer.h
@@ -111,6 +111,8 @@ struct hrtimer {
enum hrtimer_restart (*function)(struct hrtimer *);
struct hrtimer_clock_base *base;
unsigned long state;
+ struct list_head cb_entry;
+ int irqsafe;
#ifdef CONFIG_TIMER_STATS
int start_pid;
void *start_site;
@@ -147,6 +149,7 @@ struct hrtimer_clock_base {
int index;
clockid_t clockid;
struct timerqueue_head active;
+ struct list_head expired;
ktime_t resolution;
ktime_t (*get_time)(void);
ktime_t softirq_time;
Index: linux-3.2/kernel/hrtimer.c
===================================================================
--- linux-3.2.orig/kernel/hrtimer.c
+++ linux-3.2/kernel/hrtimer.c
@@ -589,8 +589,7 @@ static int hrtimer_reprogram(struct hrti
* When the callback is running, we do not reprogram the clock event
* device. The timer callback is either running on a different CPU or
* the callback is executed in the hrtimer_interrupt context. The
- * reprogramming is handled either by the softirq, which called the
- * callback or at the end of the hrtimer_interrupt.
+ * reprogramming is handled at the end of the hrtimer_interrupt.
*/
if (hrtimer_callback_running(timer))
return 0;
@@ -625,6 +624,9 @@ static int hrtimer_reprogram(struct hrti
return res;
}
+static void __run_hrtimer(struct hrtimer *timer, ktime_t *now);
+static int hrtimer_rt_defer(struct hrtimer *timer);
+
/*
* Initialize the high resolution related parts of cpu_base
*/
@@ -644,7 +646,29 @@ static inline int hrtimer_enqueue_reprog
struct hrtimer_clock_base *base,
int wakeup)
{
+#ifdef CONFIG_PREEMPT_RT_BASE
+again:
+ if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
+ /*
+ * Move softirq based timers away from the rbtree in
+ * case it expired already. Otherwise we would have a
+ * stale base->first entry until the softirq runs.
+ */
+ if (!hrtimer_rt_defer(timer)) {
+ ktime_t now = ktime_get();
+
+ __run_hrtimer(timer, &now);
+ /*
+ * __run_hrtimer might have requeued timer and
+ * it could be base->first again.
+ */
+ if (&timer->node == base->active.next)
+ goto again;
+ return 1;
+ }
+#else
if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
+#endif
if (wakeup) {
raw_spin_unlock(&base->cpu_base->lock);
raise_softirq_irqoff(HRTIMER_SOFTIRQ);
@@ -733,6 +757,11 @@ static inline int hrtimer_enqueue_reprog
}
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void retrigger_next_event(void *arg) { }
+static inline int hrtimer_reprogram(struct hrtimer *timer,
+ struct hrtimer_clock_base *base)
+{
+ return 0;
+}
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -864,9 +893,9 @@ void hrtimer_wait_for_timer(const struct
{
struct hrtimer_clock_base *base = timer->base;
- if (base && base->cpu_base && !hrtimer_hres_active(base->cpu_base))
+ if (base && base->cpu_base && !timer->irqsafe)
wait_event(base->cpu_base->wait,
- !(timer->state & HRTIMER_STATE_CALLBACK));
+ !(timer->state & HRTIMER_STATE_CALLBACK));
}
#else
@@ -915,6 +944,11 @@ static void __remove_hrtimer(struct hrti
if (!(timer->state & HRTIMER_STATE_ENQUEUED))
goto out;
+ if (unlikely(!list_empty(&timer->cb_entry))) {
+ list_del_init(&timer->cb_entry);
+ goto out;
+ }
+
if (&timer->node == timerqueue_getnext(&base->active)) {
#ifdef CONFIG_HIGH_RES_TIMERS
/* Reprogram the clock event device. if enabled */
@@ -1176,6 +1210,7 @@ static void __hrtimer_init(struct hrtime
base = hrtimer_clockid_to_base(clock_id);
timer->base = &cpu_base->clock_base[base];
+ INIT_LIST_HEAD(&timer->cb_entry);
timerqueue_init(&timer->node);
#ifdef CONFIG_TIMER_STATS
@@ -1259,10 +1294,118 @@ static void __run_hrtimer(struct hrtimer
timer->state &= ~HRTIMER_STATE_CALLBACK;
}
-#ifdef CONFIG_HIGH_RES_TIMERS
-
static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer);
+#ifdef CONFIG_PREEMPT_RT_BASE
+static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
+ struct hrtimer_clock_base *base)
+{
+ /*
+ * Note, we clear the callback flag before we requeue the
+ * timer otherwise we trigger the callback_running() check
+ * in hrtimer_reprogram().
+ */
+ timer->state &= ~HRTIMER_STATE_CALLBACK;
+
+ if (restart != HRTIMER_NORESTART) {
+ BUG_ON(hrtimer_active(timer));
+ /*
+ * Enqueue the timer, if it's the leftmost timer then
+ * we need to reprogram it.
+ */
+ if (!enqueue_hrtimer(timer, base))
+ return;
+
+ if (hrtimer_reprogram(timer, base))
+ goto requeue;
+
+ } else if (hrtimer_active(timer)) {
+ /*
+ * If the timer was rearmed on another CPU, reprogram
+ * the event device.
+ */
+ if (&timer->node == base->active.next &&
+ hrtimer_reprogram(timer, base))
+ goto requeue;
+ }
+ return;
+
+requeue:
+ /*
+ * Timer is expired. Thus move it from tree to pending list
+ * again.
+ */
+ __remove_hrtimer(timer, base, timer->state, 0);
+ list_add_tail(&timer->cb_entry, &base->expired);
+}
+
+/*
+ * The changes in mainline which removed the callback modes from
+ * hrtimer are not yet working with -rt. The non wakeup_process()
+ * based callbacks which involve sleeping locks need to be treated
+ * seperately.
+ */
+static void hrtimer_rt_run_pending(void)
+{
+ enum hrtimer_restart (*fn)(struct hrtimer *);
+ struct hrtimer_cpu_base *cpu_base;
+ struct hrtimer_clock_base *base;
+ struct hrtimer *timer;
+ int index, restart;
+
+ local_irq_disable();
+ cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
+
+ raw_spin_lock(&cpu_base->lock);
+
+ for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
+ base = &cpu_base->clock_base[index];
+
+ while (!list_empty(&base->expired)) {
+ timer = list_first_entry(&base->expired,
+ struct hrtimer, cb_entry);
+
+ /*
+ * Same as the above __run_hrtimer function
+ * just we run with interrupts enabled.
+ */
+ debug_hrtimer_deactivate(timer);
+ __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
+ timer_stats_account_hrtimer(timer);
+ fn = timer->function;
+
+ raw_spin_unlock_irq(&cpu_base->lock);
+ restart = fn(timer);
+ raw_spin_lock_irq(&cpu_base->lock);
+
+ hrtimer_rt_reprogram(restart, timer, base);
+ }
+ }
+
+ raw_spin_unlock_irq(&cpu_base->lock);
+
+ wake_up_timer_waiters(cpu_base);
+}
+
+static int hrtimer_rt_defer(struct hrtimer *timer)
+{
+ if (timer->irqsafe)
+ return 0;
+
+ __remove_hrtimer(timer, timer->base, timer->state, 0);
+ list_add_tail(&timer->cb_entry, &timer->base->expired);
+ return 1;
+}
+
+#else
+
+static inline void hrtimer_rt_run_pending(void) { }
+static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
+
+#endif
+
+#ifdef CONFIG_HIGH_RES_TIMERS
+
/*
* High resolution timer interrupt
* Called with interrupts disabled
@@ -1271,7 +1414,7 @@ void hrtimer_interrupt(struct clock_even
{
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
ktime_t expires_next, now, entry_time, delta;
- int i, retries = 0;
+ int i, retries = 0, raise = 0;
BUG_ON(!cpu_base->hres_active);
cpu_base->nr_events++;
@@ -1338,7 +1481,10 @@ retry:
break;
}
- __run_hrtimer(timer, &basenow);
+ if (!hrtimer_rt_defer(timer))
+ __run_hrtimer(timer, &basenow);
+ else
+ raise = 1;
}
}
@@ -1353,6 +1499,10 @@ retry:
if (expires_next.tv64 == KTIME_MAX ||
!tick_program_event(expires_next, 0)) {
cpu_base->hang_detected = 0;
+
+ if (raise)
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+
return;
}
@@ -1428,17 +1578,17 @@ void hrtimer_peek_ahead_timers(void)
local_irq_restore(flags);
}
-static void run_hrtimer_softirq(struct softirq_action *h)
-{
- hrtimer_peek_ahead_timers();
-}
-
#else /* CONFIG_HIGH_RES_TIMERS */
static inline void __hrtimer_peek_ahead_timers(void) { }
#endif /* !CONFIG_HIGH_RES_TIMERS */
+static void run_hrtimer_softirq(struct softirq_action *h)
+{
+ hrtimer_rt_run_pending();
+}
+
/*
* Called from timer softirq every jiffy, expire hrtimers:
*
@@ -1471,7 +1621,7 @@ void hrtimer_run_queues(void)
struct timerqueue_node *node;
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
struct hrtimer_clock_base *base;
- int index, gettime = 1;
+ int index, gettime = 1, raise = 0;
if (hrtimer_hres_active())
return;
@@ -1496,12 +1646,16 @@ void hrtimer_run_queues(void)
hrtimer_get_expires_tv64(timer))
break;
- __run_hrtimer(timer, &base->softirq_time);
+ if (!hrtimer_rt_defer(timer))
+ __run_hrtimer(timer, &base->softirq_time);
+ else
+ raise = 1;
}
raw_spin_unlock(&cpu_base->lock);
}
- wake_up_timer_waiters(cpu_base);
+ if (raise)
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
}
/*
@@ -1523,6 +1677,7 @@ static enum hrtimer_restart hrtimer_wake
void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
{
sl->timer.function = hrtimer_wakeup;
+ sl->timer.irqsafe = 1;
sl->task = task;
}
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
@@ -1661,6 +1816,7 @@ static void __cpuinit init_hrtimers_cpu(
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
cpu_base->clock_base[i].cpu_base = cpu_base;
timerqueue_init_head(&cpu_base->clock_base[i].active);
+ INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
}
hrtimer_init_hres(cpu_base);
@@ -1779,9 +1935,7 @@ void __init hrtimers_init(void)
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
register_cpu_notifier(&hrtimers_nb);
-#ifdef CONFIG_HIGH_RES_TIMERS
open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
-#endif
}
/**
Index: linux-3.2/kernel/sched.c
===================================================================
--- linux-3.2.orig/kernel/sched.c
+++ linux-3.2/kernel/sched.c
@@ -189,6 +189,7 @@ void init_rt_bandwidth(struct rt_bandwid
hrtimer_init(&rt_b->rt_period_timer,
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ rt_b->rt_period_timer.irqsafe = 1;
rt_b->rt_period_timer.function = sched_rt_period_timer;
}
@@ -1277,6 +1278,7 @@ static void init_rq_hrtick(struct rq *rq
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rq->hrtick_timer.function = hrtick;
+ rq->hrtick_timer.irqsafe = 1;
}
#else /* CONFIG_SCHED_HRTICK */
static inline void hrtick_clear(struct rq *rq)
Index: linux-3.2/kernel/time/tick-sched.c
===================================================================
--- linux-3.2.orig/kernel/time/tick-sched.c
+++ linux-3.2/kernel/time/tick-sched.c
@@ -798,6 +798,7 @@ void tick_setup_sched_timer(void)
* Emulate tick processing via per-CPU hrtimers:
*/
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ ts->sched_timer.irqsafe = 1;
ts->sched_timer.function = tick_sched_timer;
/* Get the next period (per cpu) */
Index: linux-3.2/kernel/watchdog.c
===================================================================
--- linux-3.2.orig/kernel/watchdog.c
+++ linux-3.2/kernel/watchdog.c
@@ -436,6 +436,7 @@ static void watchdog_prepare_cpu(int cpu
WARN_ON(per_cpu(softlockup_watchdog, cpu));
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
+ hrtimer->irqsafe = 1;
}
static int watchdog_enable(int cpu)

View File

@ -0,0 +1,203 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:34 -0500
Subject: hrtimers: prepare full preemption
Make cancellation of a running callback in softirq context safe
against preemption.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/hrtimer.h | 10 ++++++++++
kernel/hrtimer.c | 33 ++++++++++++++++++++++++++++++++-
kernel/itimer.c | 1 +
kernel/posix-timers.c | 33 +++++++++++++++++++++++++++++++++
4 files changed, 76 insertions(+), 1 deletion(-)
Index: linux-3.2/include/linux/hrtimer.h
===================================================================
--- linux-3.2.orig/include/linux/hrtimer.h
+++ linux-3.2/include/linux/hrtimer.h
@@ -187,6 +187,9 @@ struct hrtimer_cpu_base {
unsigned long nr_hangs;
ktime_t max_hang_time;
#endif
+#ifdef CONFIG_PREEMPT_RT_BASE
+ wait_queue_head_t wait;
+#endif
struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
};
@@ -374,6 +377,13 @@ static inline int hrtimer_restart(struct
return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
+/* Softirq preemption could deadlock timer removal */
+#ifdef CONFIG_PREEMPT_RT_BASE
+ extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
+#else
+# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
+#endif
+
/* Query timers: */
extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
Index: linux-3.2/kernel/hrtimer.c
===================================================================
--- linux-3.2.orig/kernel/hrtimer.c
+++ linux-3.2/kernel/hrtimer.c
@@ -847,6 +847,32 @@ u64 hrtimer_forward(struct hrtimer *time
}
EXPORT_SYMBOL_GPL(hrtimer_forward);
+#ifdef CONFIG_PREEMPT_RT_BASE
+# define wake_up_timer_waiters(b) wake_up(&(b)->wait)
+
+/**
+ * hrtimer_wait_for_timer - Wait for a running timer
+ *
+ * @timer: timer to wait for
+ *
+ * The function waits in case the timers callback function is
+ * currently executed on the waitqueue of the timer base. The
+ * waitqueue is woken up after the timer callback function has
+ * finished execution.
+ */
+void hrtimer_wait_for_timer(const struct hrtimer *timer)
+{
+ struct hrtimer_clock_base *base = timer->base;
+
+ if (base && base->cpu_base && !hrtimer_hres_active(base->cpu_base))
+ wait_event(base->cpu_base->wait,
+ !(timer->state & HRTIMER_STATE_CALLBACK));
+}
+
+#else
+# define wake_up_timer_waiters(b) do { } while (0)
+#endif
+
/*
* enqueue_hrtimer - internal function to (re)start a timer
*
@@ -1071,7 +1097,7 @@ int hrtimer_cancel(struct hrtimer *timer
if (ret >= 0)
return ret;
- cpu_relax();
+ hrtimer_wait_for_timer(timer);
}
}
EXPORT_SYMBOL_GPL(hrtimer_cancel);
@@ -1474,6 +1500,8 @@ void hrtimer_run_queues(void)
}
raw_spin_unlock(&cpu_base->lock);
}
+
+ wake_up_timer_waiters(cpu_base);
}
/*
@@ -1636,6 +1664,9 @@ static void __cpuinit init_hrtimers_cpu(
}
hrtimer_init_hres(cpu_base);
+#ifdef CONFIG_PREEMPT_RT_BASE
+ init_waitqueue_head(&cpu_base->wait);
+#endif
}
#ifdef CONFIG_HOTPLUG_CPU
Index: linux-3.2/kernel/itimer.c
===================================================================
--- linux-3.2.orig/kernel/itimer.c
+++ linux-3.2/kernel/itimer.c
@@ -214,6 +214,7 @@ again:
/* We are sharing ->siglock with it_real_fn() */
if (hrtimer_try_to_cancel(timer) < 0) {
spin_unlock_irq(&tsk->sighand->siglock);
+ hrtimer_wait_for_timer(&tsk->signal->real_timer);
goto again;
}
expires = timeval_to_ktime(value->it_value);
Index: linux-3.2/kernel/posix-timers.c
===================================================================
--- linux-3.2.orig/kernel/posix-timers.c
+++ linux-3.2/kernel/posix-timers.c
@@ -766,6 +766,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_
return overrun;
}
+/*
+ * Protected by RCU!
+ */
+static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr)
+{
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (kc->timer_set == common_timer_set)
+ hrtimer_wait_for_timer(&timr->it.real.timer);
+ else
+ /* FIXME: Whacky hack for posix-cpu-timers */
+ schedule_timeout(1);
+#endif
+}
+
/* Set a POSIX.1b interval timer. */
/* timr->it_lock is taken. */
static int
@@ -843,6 +857,7 @@ retry:
if (!timr)
return -EINVAL;
+ rcu_read_lock();
kc = clockid_to_kclock(timr->it_clock);
if (WARN_ON_ONCE(!kc || !kc->timer_set))
error = -EINVAL;
@@ -851,9 +866,12 @@ retry:
unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
+ timer_wait_for_callback(kc, timr);
rtn = NULL; // We already got the old time...
+ rcu_read_unlock();
goto retry;
}
+ rcu_read_unlock();
if (old_setting && !error &&
copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
@@ -891,10 +909,15 @@ retry_delete:
if (!timer)
return -EINVAL;
+ rcu_read_lock();
if (timer_delete_hook(timer) == TIMER_RETRY) {
unlock_timer(timer, flags);
+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
+ timer);
+ rcu_read_unlock();
goto retry_delete;
}
+ rcu_read_unlock();
spin_lock(&current->sighand->siglock);
list_del(&timer->list);
@@ -920,8 +943,18 @@ static void itimer_delete(struct k_itime
retry_delete:
spin_lock_irqsave(&timer->it_lock, flags);
+ /* On RT we can race with a deletion */
+ if (!timer->it_signal) {
+ unlock_timer(timer, flags);
+ return;
+ }
+
if (timer_delete_hook(timer) == TIMER_RETRY) {
+ rcu_read_lock();
unlock_timer(timer, flags);
+ timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
+ timer);
+ rcu_read_unlock();
goto retry_delete;
}
list_del(&timer->list);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,183 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:30:16 -0500
Subject: ide: Do not disable interrupts for PREEMPT-RT
Use the local_irq_*_nort variants.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/ide/alim15x3.c | 4 ++--
drivers/ide/hpt366.c | 4 ++--
drivers/ide/ide-io-std.c | 8 ++++----
drivers/ide/ide-io.c | 2 +-
drivers/ide/ide-iops.c | 4 ++--
drivers/ide/ide-probe.c | 4 ++--
drivers/ide/ide-taskfile.c | 6 +++---
7 files changed, 16 insertions(+), 16 deletions(-)
Index: linux-3.2/drivers/ide/alim15x3.c
===================================================================
--- linux-3.2.orig/drivers/ide/alim15x3.c
+++ linux-3.2/drivers/ide/alim15x3.c
@@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct p
isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
- local_irq_save(flags);
+ local_irq_save_nort(flags);
if (m5229_revision < 0xC2) {
/*
@@ -325,7 +325,7 @@ out:
}
pci_dev_put(north);
pci_dev_put(isa_dev);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
return 0;
}
Index: linux-3.2/drivers/ide/hpt366.c
===================================================================
--- linux-3.2.orig/drivers/ide/hpt366.c
+++ linux-3.2/drivers/ide/hpt366.c
@@ -1241,7 +1241,7 @@ static int __devinit init_dma_hpt366(ide
dma_old = inb(base + 2);
- local_irq_save(flags);
+ local_irq_save_nort(flags);
dma_new = dma_old;
pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
@@ -1252,7 +1252,7 @@ static int __devinit init_dma_hpt366(ide
if (dma_new != dma_old)
outb(dma_new, base + 2);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n",
hwif->name, base, base + 7);
Index: linux-3.2/drivers/ide/ide-io-std.c
===================================================================
--- linux-3.2.orig/drivers/ide/ide-io-std.c
+++ linux-3.2/drivers/ide/ide-io-std.c
@@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive,
unsigned long uninitialized_var(flags);
if ((io_32bit & 2) && !mmio) {
- local_irq_save(flags);
+ local_irq_save_nort(flags);
ata_vlb_sync(io_ports->nsect_addr);
}
@@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive,
insl(data_addr, buf, words);
if ((io_32bit & 2) && !mmio)
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
if (((len + 1) & 3) < 2)
return;
@@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive,
unsigned long uninitialized_var(flags);
if ((io_32bit & 2) && !mmio) {
- local_irq_save(flags);
+ local_irq_save_nort(flags);
ata_vlb_sync(io_ports->nsect_addr);
}
@@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive,
outsl(data_addr, buf, words);
if ((io_32bit & 2) && !mmio)
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
if (((len + 1) & 3) < 2)
return;
Index: linux-3.2/drivers/ide/ide-io.c
===================================================================
--- linux-3.2.orig/drivers/ide/ide-io.c
+++ linux-3.2/drivers/ide/ide-io.c
@@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long dat
/* disable_irq_nosync ?? */
disable_irq(hwif->irq);
/* local CPU only, as if we were handling an interrupt */
- local_irq_disable();
+ local_irq_disable_nort();
if (hwif->polling) {
startstop = handler(drive);
} else if (drive_is_ready(drive)) {
Index: linux-3.2/drivers/ide/ide-iops.c
===================================================================
--- linux-3.2.orig/drivers/ide/ide-iops.c
+++ linux-3.2/drivers/ide/ide-iops.c
@@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive,
if ((stat & ATA_BUSY) == 0)
break;
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
*rstat = stat;
return -EBUSY;
}
}
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
}
/*
* Allow status to settle, then read it again.
Index: linux-3.2/drivers/ide/ide-probe.c
===================================================================
--- linux-3.2.orig/drivers/ide/ide-probe.c
+++ linux-3.2/drivers/ide/ide-probe.c
@@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *dri
int bswap = 1;
/* local CPU only; some systems need this */
- local_irq_save(flags);
+ local_irq_save_nort(flags);
/* read 512 bytes of id info */
hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
drive->dev_flags |= IDE_DFLAG_ID_READ;
#ifdef DEBUG
Index: linux-3.2/drivers/ide/ide-taskfile.c
===================================================================
--- linux-3.2.orig/drivers/ide/ide-taskfile.c
+++ linux-3.2/drivers/ide/ide-taskfile.c
@@ -251,7 +251,7 @@ void ide_pio_bytes(ide_drive_t *drive, s
page_is_high = PageHighMem(page);
if (page_is_high)
- local_irq_save(flags);
+ local_irq_save_nort(flags);
buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset;
@@ -272,7 +272,7 @@ void ide_pio_bytes(ide_drive_t *drive, s
kunmap_atomic(buf, KM_BIO_SRC_IRQ);
if (page_is_high)
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
len -= nr_bytes;
}
@@ -415,7 +415,7 @@ static ide_startstop_t pre_task_out_intr
}
if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
- local_irq_disable();
+ local_irq_disable_nort();
ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);

View File

@ -0,0 +1,42 @@
From: Sven-Thorsten Dietrich <sdietrich@novell.com>
Date: Fri, 3 Jul 2009 08:30:35 -0500
Subject: infiniband: Mellanox IB driver patch use _nort() primitives
Fixes in_atomic stack-dump, when Mellanox module is loaded into the RT
Kernel.
Michael S. Tsirkin <mst@dev.mellanox.co.il> sayeth:
"Basically, if you just make spin_lock_irqsave (and spin_lock_irq) not disable
interrupts for non-raw spinlocks, I think all of infiniband will be fine without
changes."
Signed-off-by: Sven-Thorsten Dietrich <sven@thebigcorporation.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: linux-3.2/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
===================================================================
--- linux-3.2.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ linux-3.2/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -801,7 +801,7 @@ void ipoib_mcast_restart_task(struct wor
ipoib_mcast_stop_thread(dev, 0);
- local_irq_save(flags);
+ local_irq_save_nort(flags);
netif_addr_lock(dev);
spin_lock(&priv->lock);
@@ -883,7 +883,7 @@ void ipoib_mcast_restart_task(struct wor
spin_unlock(&priv->lock);
netif_addr_unlock(dev);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
/* We have to cancel outside of the spinlock */
list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {

View File

@ -0,0 +1,46 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:30:16 -0500
Subject: input: gameport: Do not disable interrupts on PREEMPT_RT
Use the _nort() primitives.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/input/gameport/gameport.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
Index: linux-3.2/drivers/input/gameport/gameport.c
===================================================================
--- linux-3.2.orig/drivers/input/gameport/gameport.c
+++ linux-3.2/drivers/input/gameport/gameport.c
@@ -87,12 +87,12 @@ static int gameport_measure_speed(struct
tx = 1 << 30;
for(i = 0; i < 50; i++) {
- local_irq_save(flags);
+ local_irq_save_nort(flags);
GET_TIME(t1);
for (t = 0; t < 50; t++) gameport_read(gameport);
GET_TIME(t2);
GET_TIME(t3);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
udelay(i * 10);
if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
}
@@ -111,11 +111,11 @@ static int gameport_measure_speed(struct
tx = 1 << 30;
for(i = 0; i < 50; i++) {
- local_irq_save(flags);
+ local_irq_save_nort(flags);
rdtscl(t1);
for (t = 0; t < 50; t++) gameport_read(gameport);
rdtscl(t2);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
udelay(i * 10);
if (t2 - t1 < tx) tx = t2 - t1;
}

View File

@ -0,0 +1,89 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:30:12 -0500
Subject: ipc: Make the ipc code -rt aware
RT serializes the code with the (rt)spinlock but keeps preemption
enabled. Some parts of the code need to be atomic nevertheless.
Protect it with preempt_disable/enable_rt pairts.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
ipc/mqueue.c | 5 +++++
ipc/msg.c | 16 ++++++++++++++++
2 files changed, 21 insertions(+)
Index: linux-3.2/ipc/mqueue.c
===================================================================
--- linux-3.2.orig/ipc/mqueue.c
+++ linux-3.2/ipc/mqueue.c
@@ -820,12 +820,17 @@ static inline void pipelined_send(struct
struct msg_msg *message,
struct ext_wait_queue *receiver)
{
+ /*
+ * Keep them in one critical section for PREEMPT_RT:
+ */
+ preempt_disable_rt();
receiver->msg = message;
list_del(&receiver->list);
receiver->state = STATE_PENDING;
wake_up_process(receiver->task);
smp_wmb();
receiver->state = STATE_READY;
+ preempt_enable_rt();
}
/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
Index: linux-3.2/ipc/msg.c
===================================================================
--- linux-3.2.orig/ipc/msg.c
+++ linux-3.2/ipc/msg.c
@@ -259,12 +259,20 @@ static void expunge_all(struct msg_queue
while (tmp != &msq->q_receivers) {
struct msg_receiver *msr;
+ /*
+ * Make sure that the wakeup doesnt preempt
+ * this CPU prematurely. (on PREEMPT_RT)
+ */
+ preempt_disable_rt();
+
msr = list_entry(tmp, struct msg_receiver, r_list);
tmp = tmp->next;
msr->r_msg = NULL;
wake_up_process(msr->r_tsk);
smp_mb();
msr->r_msg = ERR_PTR(res);
+
+ preempt_enable_rt();
}
}
@@ -611,6 +619,12 @@ static inline int pipelined_send(struct
!security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
msr->r_msgtype, msr->r_mode)) {
+ /*
+ * Make sure that the wakeup doesnt preempt
+ * this CPU prematurely. (on PREEMPT_RT)
+ */
+ preempt_disable_rt();
+
list_del(&msr->r_list);
if (msr->r_maxsize < msg->m_ts) {
msr->r_msg = NULL;
@@ -624,9 +638,11 @@ static inline int pipelined_send(struct
wake_up_process(msr->r_tsk);
smp_mb();
msr->r_msg = msg;
+ preempt_enable_rt();
return 1;
}
+ preempt_enable_rt();
}
}
return 0;

View File

@ -0,0 +1,62 @@
Subject: ipc/mqueue: Add a critical section to avoid a deadlock
From: KOBAYASHI Yoshitake <yoshitake.kobayashi@toshiba.co.jp>
Date: Sat, 23 Jul 2011 11:57:36 +0900
(Repost for v3.0-rt1 and changed the distination addreses)
I have tested the following patch on v3.0-rt1 with PREEMPT_RT_FULL.
In POSIX message queue, if a sender process uses SCHED_FIFO and
has a higher priority than a receiver process, the sender will
be stuck at ipc/mqueue.c:452
452 while (ewp->state == STATE_PENDING)
453 cpu_relax();
Description of the problem
(receiver process)
1. receiver changes sender's state to STATE_PENDING (mqueue.c:846)
2. wake up sender process and "switch to sender" (mqueue.c:847)
Note: This context switch only happens in PREEMPT_RT_FULL kernel.
(sender process)
3. sender check the own state in above loop (mqueue.c:452-453)
*. receiver will never wake up and cannot change sender's state to
STATE_READY because sender has higher priority
Signed-off-by: Yoshitake Kobayashi <yoshitake.kobayashi@toshiba.co.jp>
Cc: viro@zeniv.linux.org.uk
Cc: dchinner@redhat.com
Cc: npiggin@kernel.dk
Cc: hch@lst.de
Cc: arnd@arndb.de
Link: http://lkml.kernel.org/r/4E2A38A0.1090601@toshiba.co.jp
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
ipc/mqueue.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
Index: linux-3.2/ipc/mqueue.c
===================================================================
--- linux-3.2.orig/ipc/mqueue.c
+++ linux-3.2/ipc/mqueue.c
@@ -844,15 +844,19 @@ static inline void pipelined_receive(str
wake_up_interruptible(&info->wait_q);
return;
}
+ /*
+ * Keep them in one critical section for PREEMPT_RT:
+ */
+ preempt_disable_rt();
msg_insert(sender->msg, info);
list_del(&sender->list);
sender->state = STATE_PENDING;
wake_up_process(sender->task);
smp_wmb();
sender->state = STATE_READY;
+ preempt_enable_rt();
}
-
-SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
+ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
size_t, msg_len, unsigned int, msg_prio,
const struct timespec __user *, u_abs_timeout)
{

View File

@ -0,0 +1,75 @@
Subject: ipc/sem: Rework semaphore wakeups
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Sep 2011 11:57:04 +0200
Subject: ipc/sem: Rework semaphore wakeups
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue Sep 13 15:09:40 CEST 2011
Current sysv sems have a weird ass wakeup scheme that involves keeping
preemption disabled over a potential O(n^2) loop and busy waiting on
that on other CPUs.
Kill this and simply wake the task directly from under the sem_lock.
This was discovered by a migrate_disable() debug feature that
disallows:
spin_lock();
preempt_disable();
spin_unlock()
preempt_enable();
Cc: Manfred Spraul <manfred@colorfullife.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Manfred Spraul <manfred@colorfullife.com>
Link: http://lkml.kernel.org/r/1315994224.5040.1.camel@twins
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
ipc/sem.c | 10 ++++++++++
1 file changed, 10 insertions(+)
Index: linux-3.2/ipc/sem.c
===================================================================
--- linux-3.2.orig/ipc/sem.c
+++ linux-3.2/ipc/sem.c
@@ -461,6 +461,13 @@ undo:
static void wake_up_sem_queue_prepare(struct list_head *pt,
struct sem_queue *q, int error)
{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ struct task_struct *p = q->sleeper;
+ get_task_struct(p);
+ q->status = error;
+ wake_up_process(p);
+ put_task_struct(p);
+#else
if (list_empty(pt)) {
/*
* Hold preempt off so that we don't get preempted and have the
@@ -472,6 +479,7 @@ static void wake_up_sem_queue_prepare(st
q->pid = error;
list_add_tail(&q->simple_list, pt);
+#endif
}
/**
@@ -485,6 +493,7 @@ static void wake_up_sem_queue_prepare(st
*/
static void wake_up_sem_queue_do(struct list_head *pt)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
struct sem_queue *q, *t;
int did_something;
@@ -497,6 +506,7 @@ static void wake_up_sem_queue_do(struct
}
if (did_something)
preempt_enable();
+#endif
}
static void unlink_queue(struct sem_array *sma, struct sem_queue *q)

View File

@ -0,0 +1,22 @@
Subject: jump-label-rt.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 13 Jul 2011 11:03:16 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/jump_label.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/include/linux/jump_label.h
===================================================================
--- linux-3.2.orig/include/linux/jump_label.h
+++ linux-3.2/include/linux/jump_label.h
@@ -4,7 +4,7 @@
#include <linux/types.h>
#include <linux/compiler.h>
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) && !defined(CONFIG_PREEMPT_BASE)
struct jump_label_key {
atomic_t enabled;

View File

@ -0,0 +1,50 @@
Subject: kconfig-disable-a-few-options-rt.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 24 Jul 2011 12:11:43 +0200
Disable stuff which is known to have issues on RT
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/Kconfig | 1 +
drivers/net/Kconfig | 1 +
mm/Kconfig | 2 +-
3 files changed, 3 insertions(+), 1 deletion(-)
Index: linux-3.2/arch/Kconfig
===================================================================
--- linux-3.2.orig/arch/Kconfig
+++ linux-3.2/arch/Kconfig
@@ -6,6 +6,7 @@ config OPROFILE
tristate "OProfile system profiling"
depends on PROFILING
depends on HAVE_OPROFILE
+ depends on !PREEMPT_RT_FULL
select RING_BUFFER
select RING_BUFFER_ALLOW_SWAP
help
Index: linux-3.2/drivers/net/Kconfig
===================================================================
--- linux-3.2.orig/drivers/net/Kconfig
+++ linux-3.2/drivers/net/Kconfig
@@ -154,6 +154,7 @@ config MACVTAP
config NETCONSOLE
tristate "Network console logging support"
+ depends on !PREEMPT_RT_FULL
---help---
If you want to log kernel messages over the network, enable this.
See <file:Documentation/networking/netconsole.txt> for details.
Index: linux-3.2/mm/Kconfig
===================================================================
--- linux-3.2.orig/mm/Kconfig
+++ linux-3.2/mm/Kconfig
@@ -307,7 +307,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
config TRANSPARENT_HUGEPAGE
bool "Transparent Hugepage Support"
- depends on X86 && MMU
+ depends on X86 && MMU && !PREEMPT_RT_FULL
select COMPACTION
help
Transparent Hugepages allows the kernel to use huge pages and

View File

@ -0,0 +1,61 @@
Subject: kconfig-preempt-rt-full.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 29 Jun 2011 14:58:57 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
init/Makefile | 2 +-
kernel/Kconfig.preempt | 7 +++++++
scripts/mkcompile_h | 4 +++-
3 files changed, 11 insertions(+), 2 deletions(-)
Index: linux-3.2/init/Makefile
===================================================================
--- linux-3.2.orig/init/Makefile
+++ linux-3.2/init/Makefile
@@ -29,4 +29,4 @@ silent_chk_compile.h = :
include/generated/compile.h: FORCE
@$($(quiet)chk_compile.h)
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
- "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
+ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)"
Index: linux-3.2/kernel/Kconfig.preempt
===================================================================
--- linux-3.2.orig/kernel/Kconfig.preempt
+++ linux-3.2/kernel/Kconfig.preempt
@@ -66,6 +66,13 @@ config PREEMPT_RTB
enables changes which are preliminary for the full preemptiple
RT kernel.
+config PREEMPT_RT_FULL
+ bool "Fully Preemptible Kernel (RT)"
+ depends on IRQ_FORCED_THREADING
+ select PREEMPT_RT_BASE
+ help
+ All and everything
+
endchoice
config PREEMPT_COUNT
Index: linux-3.2/scripts/mkcompile_h
===================================================================
--- linux-3.2.orig/scripts/mkcompile_h
+++ linux-3.2/scripts/mkcompile_h
@@ -4,7 +4,8 @@ TARGET=$1
ARCH=$2
SMP=$3
PREEMPT=$4
-CC=$5
+RT=$5
+CC=$6
vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
@@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION"
CONFIG_FLAGS=""
if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
+if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
# Truncate to maximum length

View File

@ -0,0 +1,116 @@
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 28 Jul 2011 12:42:23 -0500
Subject: kgdb/serial: Short term workaround
On 07/27/2011 04:37 PM, Thomas Gleixner wrote:
> - KGDB (not yet disabled) is reportedly unusable on -rt right now due
> to missing hacks in the console locking which I dropped on purpose.
>
To work around this in the short term you can use this patch, in
addition to the clocksource watchdog patch that Thomas brewed up.
Comments are welcome of course. Ultimately the right solution is to
change separation between the console and the HW to have a polled mode
+ work queue so as not to introduce any kind of latency.
Thanks,
Jason.
---
drivers/tty/serial/8250.c | 13 +++++++++----
include/linux/kdb.h | 2 ++
kernel/debug/kdb/kdb_io.c | 6 ++----
3 files changed, 13 insertions(+), 8 deletions(-)
Index: linux-3.2/drivers/tty/serial/8250.c
===================================================================
--- linux-3.2.orig/drivers/tty/serial/8250.c
+++ linux-3.2/drivers/tty/serial/8250.c
@@ -38,6 +38,7 @@
#include <linux/nmi.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/kdb.h>
#include <asm/io.h>
#include <asm/irq.h>
@@ -2848,10 +2849,14 @@ serial8250_console_write(struct console
touch_nmi_watchdog();
- if (up->port.sysrq || oops_in_progress)
- locked = spin_trylock_irqsave(&up->port.lock, flags);
- else
- spin_lock_irqsave(&up->port.lock, flags);
+ if (unlikely(in_kdb_printk())) {
+ locked = 0;
+ } else {
+ if (up->port.sysrq || oops_in_progress)
+ locked = spin_trylock_irqsave(&up->port.lock, flags);
+ else
+ spin_lock_irqsave(&up->port.lock, flags);
+ }
/*
* First save the IER then disable the interrupts
Index: linux-3.2/include/linux/kdb.h
===================================================================
--- linux-3.2.orig/include/linux/kdb.h
+++ linux-3.2/include/linux/kdb.h
@@ -150,12 +150,14 @@ extern int kdb_register(char *, kdb_func
extern int kdb_register_repeat(char *, kdb_func_t, char *, char *,
short, kdb_repeat_t);
extern int kdb_unregister(char *);
+#define in_kdb_printk() (kdb_trap_printk)
#else /* ! CONFIG_KGDB_KDB */
#define kdb_printf(...)
#define kdb_init(x)
#define kdb_register(...)
#define kdb_register_repeat(...)
#define kdb_uregister(x)
+#define in_kdb_printk() (0)
#endif /* CONFIG_KGDB_KDB */
enum {
KDB_NOT_INITIALIZED,
Index: linux-3.2/kernel/debug/kdb/kdb_io.c
===================================================================
--- linux-3.2.orig/kernel/debug/kdb/kdb_io.c
+++ linux-3.2/kernel/debug/kdb/kdb_io.c
@@ -553,7 +553,6 @@ int vkdb_printf(const char *fmt, va_list
int diag;
int linecount;
int logging, saved_loglevel = 0;
- int saved_trap_printk;
int got_printf_lock = 0;
int retlen = 0;
int fnd, len;
@@ -564,8 +563,6 @@ int vkdb_printf(const char *fmt, va_list
unsigned long uninitialized_var(flags);
preempt_disable();
- saved_trap_printk = kdb_trap_printk;
- kdb_trap_printk = 0;
/* Serialize kdb_printf if multiple cpus try to write at once.
* But if any cpu goes recursive in kdb, just print the output,
@@ -821,7 +818,6 @@ kdb_print_out:
} else {
__release(kdb_printf_lock);
}
- kdb_trap_printk = saved_trap_printk;
preempt_enable();
return retlen;
}
@@ -831,9 +827,11 @@ int kdb_printf(const char *fmt, ...)
va_list ap;
int r;
+ kdb_trap_printk++;
va_start(ap, fmt);
r = vkdb_printf(fmt, ap);
va_end(ap);
+ kdb_trap_printk--;
return r;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,125 @@
Subject: lglocks-rt.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 15 Jun 2011 11:02:21 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/lglock.h | 100 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 100 insertions(+)
Index: linux-3.2/include/linux/lglock.h
===================================================================
--- linux-3.2.orig/include/linux/lglock.h
+++ linux-3.2/include/linux/lglock.h
@@ -70,6 +70,9 @@
extern void name##_global_lock_online(void); \
extern void name##_global_unlock_online(void); \
+
+#ifndef CONFIG_PREEMPT_RT_FULL
+
#define DEFINE_LGLOCK(name) \
\
DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \
@@ -169,4 +172,101 @@
preempt_enable(); \
} \
EXPORT_SYMBOL(name##_global_unlock);
+
+#else /* !PREEMPT_RT_FULL */
+#define DEFINE_LGLOCK(name) \
+ \
+ DEFINE_PER_CPU(struct rt_mutex, name##_lock); \
+ DEFINE_LGLOCK_LOCKDEP(name); \
+ \
+ void name##_lock_init(void) { \
+ int i; \
+ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
+ for_each_possible_cpu(i) { \
+ struct rt_mutex *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ rt_mutex_init(lock); \
+ } \
+ } \
+ EXPORT_SYMBOL(name##_lock_init); \
+ \
+ void name##_local_lock(void) { \
+ struct rt_mutex *lock; \
+ migrate_disable(); \
+ rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \
+ lock = &__get_cpu_var(name##_lock); \
+ __rt_spin_lock(lock); \
+ } \
+ EXPORT_SYMBOL(name##_local_lock); \
+ \
+ void name##_local_unlock(void) { \
+ struct rt_mutex *lock; \
+ rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \
+ lock = &__get_cpu_var(name##_lock); \
+ __rt_spin_unlock(lock); \
+ migrate_enable(); \
+ } \
+ EXPORT_SYMBOL(name##_local_unlock); \
+ \
+ void name##_local_lock_cpu(int cpu) { \
+ struct rt_mutex *lock; \
+ rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \
+ lock = &per_cpu(name##_lock, cpu); \
+ __rt_spin_lock(lock); \
+ } \
+ EXPORT_SYMBOL(name##_local_lock_cpu); \
+ \
+ void name##_local_unlock_cpu(int cpu) { \
+ struct rt_mutex *lock; \
+ rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \
+ lock = &per_cpu(name##_lock, cpu); \
+ __rt_spin_unlock(lock); \
+ } \
+ EXPORT_SYMBOL(name##_local_unlock_cpu); \
+ \
+ void name##_global_lock_online(void) { \
+ int i; \
+ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
+ for_each_online_cpu(i) { \
+ struct rt_mutex *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ __rt_spin_lock(lock); \
+ } \
+ } \
+ EXPORT_SYMBOL(name##_global_lock_online); \
+ \
+ void name##_global_unlock_online(void) { \
+ int i; \
+ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
+ for_each_online_cpu(i) { \
+ struct rt_mutex *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ __rt_spin_unlock(lock); \
+ } \
+ } \
+ EXPORT_SYMBOL(name##_global_unlock_online); \
+ \
+ void name##_global_lock(void) { \
+ int i; \
+ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
+ for_each_possible_cpu(i) { \
+ struct rt_mutex *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ __rt_spin_lock(lock); \
+ } \
+ } \
+ EXPORT_SYMBOL(name##_global_lock); \
+ \
+ void name##_global_unlock(void) { \
+ int i; \
+ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
+ for_each_possible_cpu(i) { \
+ struct rt_mutex *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ __rt_spin_unlock(lock); \
+ } \
+ } \
+ EXPORT_SYMBOL(name##_global_unlock);
+#endif /* PRREMPT_RT_FULL */
+
#endif

View File

@ -0,0 +1,31 @@
Subject: list-add-list-last-entry.patch
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 21 Jun 2011 11:22:36 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/list.h | 11 +++++++++++
1 file changed, 11 insertions(+)
Index: linux-3.2/include/linux/list.h
===================================================================
--- linux-3.2.orig/include/linux/list.h
+++ linux-3.2/include/linux/list.h
@@ -362,6 +362,17 @@ static inline void list_splice_tail_init
list_entry((ptr)->next, type, member)
/**
+ * list_last_entry - get the last element from a list
+ * @ptr: the list head to take the element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_last_entry(ptr, type, member) \
+ list_entry((ptr)->prev, type, member)
+
+/**
* list_for_each - iterate over a list
* @pos: the &struct list_head to use as a loop cursor.
* @head: the head for your list.

View File

@ -0,0 +1,56 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 21 Jul 2009 22:34:14 +0200
Subject: rt: local_irq_* variants depending on RT/!RT
Add local_irq_*_(no)rt variant which are mainly used to break
interrupt disabled sections on PREEMPT_RT or to explicitely disable
interrupts on PREEMPT_RT.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/interrupt.h | 2 +-
include/linux/irqflags.h | 19 +++++++++++++++++++
2 files changed, 20 insertions(+), 1 deletion(-)
Index: linux-3.2/include/linux/interrupt.h
===================================================================
--- linux-3.2.orig/include/linux/interrupt.h
+++ linux-3.2/include/linux/interrupt.h
@@ -219,7 +219,7 @@ extern void devm_free_irq(struct device
#ifdef CONFIG_LOCKDEP
# define local_irq_enable_in_hardirq() do { } while (0)
#else
-# define local_irq_enable_in_hardirq() local_irq_enable()
+# define local_irq_enable_in_hardirq() local_irq_enable_nort()
#endif
extern void disable_irq_nosync(unsigned int irq);
Index: linux-3.2/include/linux/irqflags.h
===================================================================
--- linux-3.2.orig/include/linux/irqflags.h
+++ linux-3.2/include/linux/irqflags.h
@@ -147,4 +147,23 @@
#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
+/*
+ * local_irq* variants depending on RT/!RT
+ */
+#ifdef CONFIG_PREEMPT_RT_FULL
+# define local_irq_disable_nort() do { } while (0)
+# define local_irq_enable_nort() do { } while (0)
+# define local_irq_save_nort(flags) do { local_save_flags(flags); } while (0)
+# define local_irq_restore_nort(flags) do { (void)(flags); } while (0)
+# define local_irq_disable_rt() local_irq_disable()
+# define local_irq_enable_rt() local_irq_enable()
+#else
+# define local_irq_disable_nort() local_irq_disable()
+# define local_irq_enable_nort() local_irq_enable()
+# define local_irq_save_nort(flags) local_irq_save(flags)
+# define local_irq_restore_nort(flags) local_irq_restore(flags)
+# define local_irq_disable_rt() do { } while (0)
+# define local_irq_enable_rt() do { } while (0)
+#endif
+
#endif

View File

@ -0,0 +1,25 @@
Subject: local-var.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 24 Jun 2011 18:40:37 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/percpu.h | 5 +++++
1 file changed, 5 insertions(+)
Index: linux-3.2/include/linux/percpu.h
===================================================================
--- linux-3.2.orig/include/linux/percpu.h
+++ linux-3.2/include/linux/percpu.h
@@ -48,6 +48,11 @@
preempt_enable(); \
} while (0)
+#define get_local_var(var) get_cpu_var(var)
+#define put_local_var(var) put_cpu_var(var)
+#define get_local_ptr(var) get_cpu_ptr(var)
+#define put_local_ptr(var) put_cpu_ptr(var)
+
/* minimum unit size, also is the maximum supported allocation size */
#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)

View File

@ -0,0 +1,48 @@
Subject: local-vars-migrate-disable.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 Jun 2011 20:42:16 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/percpu.h | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
Index: linux-3.2/include/linux/percpu.h
===================================================================
--- linux-3.2.orig/include/linux/percpu.h
+++ linux-3.2/include/linux/percpu.h
@@ -48,10 +48,30 @@
preempt_enable(); \
} while (0)
-#define get_local_var(var) get_cpu_var(var)
-#define put_local_var(var) put_cpu_var(var)
-#define get_local_ptr(var) get_cpu_ptr(var)
-#define put_local_ptr(var) put_cpu_ptr(var)
+#ifndef CONFIG_PREEMPT_RT_FULL
+# define get_local_var(var) get_cpu_var(var)
+# define put_local_var(var) put_cpu_var(var)
+# define get_local_ptr(var) get_cpu_ptr(var)
+# define put_local_ptr(var) put_cpu_ptr(var)
+#else
+# define get_local_var(var) (*({ \
+ migrate_disable(); \
+ &__get_cpu_var(var); }))
+
+# define put_local_var(var) do { \
+ (void)&(var); \
+ migrate_enable(); \
+} while (0)
+
+# define get_local_ptr(var) ({ \
+ migrate_disable(); \
+ this_cpu_ptr(var); })
+
+# define put_local_ptr(var) do { \
+ (void)(var); \
+ migrate_enable(); \
+} while (0)
+#endif
/* minimum unit size, also is the maximum supported allocation size */
#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)

View File

@ -0,0 +1,17 @@
Subject: localversion.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 08 Jul 2011 20:25:16 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-8vdw4bfcsds27cvox6rpb334@git.kernel.org
---
localversion-rt | 1 +
1 file changed, 1 insertion(+)
Index: linux-3.2/localversion-rt
===================================================================
--- /dev/null
+++ linux-3.2/localversion-rt
@@ -0,0 +1 @@
+-rt3

View File

@ -0,0 +1,60 @@
Subject: lockdep-rt.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 18:51:23 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/irqflags.h | 10 +++++++---
kernel/lockdep.c | 2 ++
2 files changed, 9 insertions(+), 3 deletions(-)
Index: linux-3.2/include/linux/irqflags.h
===================================================================
--- linux-3.2.orig/include/linux/irqflags.h
+++ linux-3.2/include/linux/irqflags.h
@@ -25,8 +25,6 @@
# define trace_softirqs_enabled(p) ((p)->softirqs_enabled)
# define trace_hardirq_enter() do { current->hardirq_context++; } while (0)
# define trace_hardirq_exit() do { current->hardirq_context--; } while (0)
-# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
-# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1,
#else
# define trace_hardirqs_on() do { } while (0)
@@ -39,9 +37,15 @@
# define trace_softirqs_enabled(p) 0
# define trace_hardirq_enter() do { } while (0)
# define trace_hardirq_exit() do { } while (0)
+# define INIT_TRACE_IRQFLAGS
+#endif
+
+#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL)
+# define lockdep_softirq_enter() do { current->softirq_context++; } while (0)
+# define lockdep_softirq_exit() do { current->softirq_context--; } while (0)
+#else
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
-# define INIT_TRACE_IRQFLAGS
#endif
#if defined(CONFIG_IRQSOFF_TRACER) || \
Index: linux-3.2/kernel/lockdep.c
===================================================================
--- linux-3.2.orig/kernel/lockdep.c
+++ linux-3.2/kernel/lockdep.c
@@ -3488,6 +3488,7 @@ static void check_flags(unsigned long fl
}
}
+#ifndef CONFIG_PREEMPT_RT_FULL
/*
* We dont accurately track softirq state in e.g.
* hardirq contexts (such as on 4KSTACKS), so only
@@ -3502,6 +3503,7 @@ static void check_flags(unsigned long fl
DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
}
}
+#endif
if (!debug_locks)
print_irqtrace_events(current);

View File

@ -0,0 +1,96 @@
Subject: locking-various-init-fixes.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 21:25:03 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
drivers/char/random.c | 6 +++---
drivers/usb/gadget/ci13xxx_udc.c | 2 +-
fs/file.c | 2 +-
include/linux/idr.h | 2 +-
kernel/cred.c | 2 +-
5 files changed, 7 insertions(+), 7 deletions(-)
Index: linux-3.2/drivers/char/random.c
===================================================================
--- linux-3.2.orig/drivers/char/random.c
+++ linux-3.2/drivers/char/random.c
@@ -433,7 +433,7 @@ static struct entropy_store input_pool =
.poolinfo = &poolinfo_table[0],
.name = "input",
.limit = 1,
- .lock = __SPIN_LOCK_UNLOCKED(&input_pool.lock),
+ .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock),
.pool = input_pool_data
};
@@ -442,7 +442,7 @@ static struct entropy_store blocking_poo
.name = "blocking",
.limit = 1,
.pull = &input_pool,
- .lock = __SPIN_LOCK_UNLOCKED(&blocking_pool.lock),
+ .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock),
.pool = blocking_pool_data
};
@@ -450,7 +450,7 @@ static struct entropy_store nonblocking_
.poolinfo = &poolinfo_table[1],
.name = "nonblocking",
.pull = &input_pool,
- .lock = __SPIN_LOCK_UNLOCKED(&nonblocking_pool.lock),
+ .lock = __SPIN_LOCK_UNLOCKED(nonblocking_pool.lock),
.pool = nonblocking_pool_data
};
Index: linux-3.2/drivers/usb/gadget/ci13xxx_udc.c
===================================================================
--- linux-3.2.orig/drivers/usb/gadget/ci13xxx_udc.c
+++ linux-3.2/drivers/usb/gadget/ci13xxx_udc.c
@@ -816,7 +816,7 @@ static struct {
} dbg_data = {
.idx = 0,
.tty = 0,
- .lck = __RW_LOCK_UNLOCKED(lck)
+ .lck = __RW_LOCK_UNLOCKED(dbg_data.lck)
};
/**
Index: linux-3.2/fs/file.c
===================================================================
--- linux-3.2.orig/fs/file.c
+++ linux-3.2/fs/file.c
@@ -422,7 +422,7 @@ struct files_struct init_files = {
.close_on_exec = (fd_set *)&init_files.close_on_exec_init,
.open_fds = (fd_set *)&init_files.open_fds_init,
},
- .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
+ .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
};
/*
Index: linux-3.2/include/linux/idr.h
===================================================================
--- linux-3.2.orig/include/linux/idr.h
+++ linux-3.2/include/linux/idr.h
@@ -136,7 +136,7 @@ struct ida {
struct ida_bitmap *free_bitmap;
};
-#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, }
+#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
#define DEFINE_IDA(name) struct ida name = IDA_INIT(name)
int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
Index: linux-3.2/kernel/cred.c
===================================================================
--- linux-3.2.orig/kernel/cred.c
+++ linux-3.2/kernel/cred.c
@@ -35,7 +35,7 @@ static struct kmem_cache *cred_jar;
static struct thread_group_cred init_tgcred = {
.usage = ATOMIC_INIT(2),
.tgid = 0,
- .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
+ .lock = __SPIN_LOCK_UNLOCKED(init_tgcred.lock),
};
#endif

View File

@ -0,0 +1,65 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 6 Apr 2010 16:51:31 +0200
Subject: md: raid5: Make raid5_percpu handling RT aware
__raid_run_ops() disables preemption with get_cpu() around the access
to the raid5_percpu variables. That causes scheduling while atomic
spews on RT.
Serialize the access to the percpu data with a lock and keep the code
preemptible.
Reported-by: Udo van den Heuvel <udovdh@xs4all.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Udo van den Heuvel <udovdh@xs4all.nl>
---
drivers/md/raid5.c | 7 +++++--
drivers/md/raid5.h | 1 +
2 files changed, 6 insertions(+), 2 deletions(-)
Index: linux-3.2/drivers/md/raid5.c
===================================================================
--- linux-3.2.orig/drivers/md/raid5.c
+++ linux-3.2/drivers/md/raid5.c
@@ -1245,8 +1245,9 @@ static void __raid_run_ops(struct stripe
struct raid5_percpu *percpu;
unsigned long cpu;
- cpu = get_cpu();
+ cpu = get_cpu_light();
percpu = per_cpu_ptr(conf->percpu, cpu);
+ spin_lock(&percpu->lock);
if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) {
ops_run_biofill(sh);
overlap_clear++;
@@ -1298,7 +1299,8 @@ static void __raid_run_ops(struct stripe
if (test_and_clear_bit(R5_Overlap, &dev->flags))
wake_up(&sh->raid_conf->wait_for_overlap);
}
- put_cpu();
+ spin_unlock(&percpu->lock);
+ put_cpu_light();
}
#ifdef CONFIG_MULTICORE_RAID456
@@ -4531,6 +4533,7 @@ static int raid5_alloc_percpu(struct r5c
break;
}
per_cpu_ptr(conf->percpu, cpu)->scribble = scribble;
+ spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock);
}
#ifdef CONFIG_HOTPLUG_CPU
conf->cpu_notify.notifier_call = raid456_cpu_notify;
Index: linux-3.2/drivers/md/raid5.h
===================================================================
--- linux-3.2.orig/drivers/md/raid5.h
+++ linux-3.2/drivers/md/raid5.h
@@ -405,6 +405,7 @@ struct r5conf {
int recovery_disabled;
/* per cpu variables */
struct raid5_percpu {
+ spinlock_t lock; /* Protection for -RT */
struct page *spare_page; /* Used when checking P/Q in raid6 */
void *scribble; /* space for constructing buffer
* lists and performing address

View File

@ -0,0 +1,29 @@
Subject: migrate-disable-rt-variant.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 17 Jul 2011 19:48:20 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/preempt.h | 4 ++++
1 file changed, 4 insertions(+)
Index: linux-3.2/include/linux/preempt.h
===================================================================
--- linux-3.2.orig/include/linux/preempt.h
+++ linux-3.2/include/linux/preempt.h
@@ -121,11 +121,15 @@ extern void migrate_enable(void);
# define preempt_enable_rt() preempt_enable()
# define preempt_disable_nort() do { } while (0)
# define preempt_enable_nort() do { } while (0)
+# define migrate_disable_rt() migrate_disable()
+# define migrate_enable_rt() migrate_enable()
#else
# define preempt_disable_rt() do { } while (0)
# define preempt_enable_rt() do { } while (0)
# define preempt_disable_nort() preempt_disable()
# define preempt_enable_nort() preempt_enable()
+# define migrate_disable_rt() do { } while (0)
+# define migrate_enable_rt() do { } while (0)
#endif
#ifdef CONFIG_PREEMPT_NOTIFIERS

View File

@ -0,0 +1,22 @@
Subject: mips-disable-highmem-on-rt.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 18 Jul 2011 17:10:12 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/mips/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/arch/mips/Kconfig
===================================================================
--- linux-3.2.orig/arch/mips/Kconfig
+++ linux-3.2/arch/mips/Kconfig
@@ -2040,7 +2040,7 @@ config CPU_R4400_WORKAROUNDS
#
config HIGHMEM
bool "High Memory Support"
- depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM
+ depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !PREEMPT_RT_FULL
config CPU_SUPPORTS_HIGHMEM
bool

View File

@ -0,0 +1,23 @@
Subject: mips-enable-interrupts-in-signal.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 18 Jul 2011 21:32:10 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/mips/kernel/signal.c | 3 +++
1 file changed, 3 insertions(+)
Index: linux-3.2/arch/mips/kernel/signal.c
===================================================================
--- linux-3.2.orig/arch/mips/kernel/signal.c
+++ linux-3.2/arch/mips/kernel/signal.c
@@ -604,6 +604,9 @@ static void do_signal(struct pt_regs *re
if (!user_mode(regs))
return;
+ local_irq_enable();
+ preempt_check_resched();
+
if (test_thread_flag(TIF_RESTORE_SIGMASK))
oldset = &current->saved_sigmask;
else

View File

@ -0,0 +1,38 @@
Subject: MIPS: Octeon: Mark SMP-IPI interrupt as IRQF_NO_THREAD
From: Venkat Subbiah <venkat.subbiah@caviumnetworks.com>
Date: Mon, 3 Oct 2011 13:31:10 -0700
From: Venkat Subbiah <venkat.subbiah@cavium.com>
This is to exclude it from force threading to allow RT patch set to work.
And while on this line
* Remove IRQF_DISABLED as as this flag is NOOP
* Add IRQF_PERCPU as this is a per cpu interrupt.
Signed-off-by: Venkat Subbiah <venkat.subbiah@cavium.com>
Acked-by: David Daney <david.daney@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: ralf@linux-mips.org
Link: http://lkml.kernel.org/r/1317673870-10671-1-git-send-email-venkat.subbiah@caviumnetworks.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/mips/cavium-octeon/smp.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
Index: linux-3.2/arch/mips/cavium-octeon/smp.c
===================================================================
--- linux-3.2.orig/arch/mips/cavium-octeon/smp.c
+++ linux-3.2/arch/mips/cavium-octeon/smp.c
@@ -207,8 +207,9 @@ void octeon_prepare_cpus(unsigned int ma
* the other bits alone.
*/
cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffff);
- if (request_irq(OCTEON_IRQ_MBOX0, mailbox_interrupt, IRQF_DISABLED,
- "SMP-IPI", mailbox_interrupt)) {
+ if (request_irq(OCTEON_IRQ_MBOX0, mailbox_interrupt,
+ IRQF_PERCPU | IRQF_NO_THREAD, "SMP-IPI",
+ mailbox_interrupt)) {
panic("Cannot request_irq(OCTEON_IRQ_MBOX0)\n");
}
}

View File

@ -0,0 +1,31 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:44:03 -0500
Subject: mm: Allow only slab on RT
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
init/Kconfig | 2 ++
1 file changed, 2 insertions(+)
Index: linux-3.2/init/Kconfig
===================================================================
--- linux-3.2.orig/init/Kconfig
+++ linux-3.2/init/Kconfig
@@ -1239,6 +1239,7 @@ config SLAB
config SLUB
bool "SLUB (Unqueued Allocator)"
+ depends on !PREEMPT_RT_FULL
help
SLUB is a slab allocator that minimizes cache line usage
instead of managing queues of cached objects (SLAB approach).
@@ -1250,6 +1251,7 @@ config SLUB
config SLOB
depends on EXPERT
bool "SLOB (Simple Allocator)"
+ depends on !PREEMPT_RT_FULL
help
SLOB replaces the stock allocator with a drastically simpler
allocator. SLOB is generally more space efficient but

View File

@ -0,0 +1,95 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 19 Aug 2009 09:56:42 +0200
Subject: mm: Replace cgroup_page bit spinlock
Bit spinlocks are not working on RT. Replace them.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/page_cgroup.h | 28 ++++++++++++++++++++++++++++
mm/page_cgroup.c | 1 +
2 files changed, 29 insertions(+)
Index: linux-3.2/include/linux/page_cgroup.h
===================================================================
--- linux-3.2.orig/include/linux/page_cgroup.h
+++ linux-3.2/include/linux/page_cgroup.h
@@ -30,6 +30,10 @@ enum {
*/
struct page_cgroup {
unsigned long flags;
+#ifdef CONFIG_PREEMPT_RT_BASE
+ spinlock_t pcg_lock;
+ spinlock_t pcm_lock;
+#endif
struct mem_cgroup *mem_cgroup;
struct list_head lru; /* per cgroup LRU list */
};
@@ -96,30 +100,54 @@ static inline void lock_page_cgroup(stru
* Don't take this lock in IRQ context.
* This lock is for pc->mem_cgroup, USED, CACHE, MIGRATION
*/
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_lock(PCG_LOCK, &pc->flags);
+#else
+ spin_lock(&pc->pcg_lock);
+#endif
}
static inline void unlock_page_cgroup(struct page_cgroup *pc)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_unlock(PCG_LOCK, &pc->flags);
+#else
+ spin_unlock(&pc->pcg_lock);
+#endif
}
static inline void move_lock_page_cgroup(struct page_cgroup *pc,
unsigned long *flags)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
/*
* We know updates to pc->flags of page cache's stats are from both of
* usual context or IRQ context. Disable IRQ to avoid deadlock.
*/
local_irq_save(*flags);
bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
+#else
+ spin_lock_irqsave(&pc->pcm_lock, *flags);
+#endif
}
static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
unsigned long *flags)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
local_irq_restore(*flags);
+#else
+ spin_unlock_irqrestore(&pc->pcm_lock, *flags);
+#endif
+}
+
+static inline void page_cgroup_lock_init(struct page_cgroup *pc)
+{
+#ifdef CONFIG_PREEMPT_RT_BASE
+ spin_lock_init(&pc->pcg_lock);
+ spin_lock_init(&pc->pcm_lock);
+#endif
}
#ifdef CONFIG_SPARSEMEM
Index: linux-3.2/mm/page_cgroup.c
===================================================================
--- linux-3.2.orig/mm/page_cgroup.c
+++ linux-3.2/mm/page_cgroup.c
@@ -17,6 +17,7 @@ static void __meminit init_page_cgroup(s
set_page_cgroup_array_id(pc, id);
pc->mem_cgroup = NULL;
INIT_LIST_HEAD(&pc->lru);
+ page_cgroup_lock_init(pc);
}
static unsigned long total_usage;

View File

@ -0,0 +1,124 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:51 -0500
Subject: mm: convert swap to percpu locked
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/swap.c | 33 +++++++++++++++++++++------------
1 file changed, 21 insertions(+), 12 deletions(-)
Index: linux-3.2/mm/swap.c
===================================================================
--- linux-3.2.orig/mm/swap.c
+++ linux-3.2/mm/swap.c
@@ -31,6 +31,7 @@
#include <linux/backing-dev.h>
#include <linux/memcontrol.h>
#include <linux/gfp.h>
+#include <linux/locallock.h>
#include "internal.h"
@@ -41,6 +42,9 @@ static DEFINE_PER_CPU(struct pagevec[NR_
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
+static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
+static DEFINE_LOCAL_IRQ_LOCK(swap_lock);
+
/*
* This path almost never happens for VM activity - pages are normally
* freed via pagevecs. But it gets used by networking.
@@ -267,11 +271,11 @@ void rotate_reclaimable_page(struct page
unsigned long flags;
page_cache_get(page);
- local_irq_save(flags);
+ local_lock_irqsave(rotate_lock, flags);
pvec = &__get_cpu_var(lru_rotate_pvecs);
if (!pagevec_add(pvec, page))
pagevec_move_tail(pvec);
- local_irq_restore(flags);
+ local_unlock_irqrestore(rotate_lock, flags);
}
}
@@ -327,12 +331,13 @@ static void activate_page_drain(int cpu)
void activate_page(struct page *page)
{
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
- struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+ struct pagevec *pvec = &get_locked_var(swap_lock,
+ activate_page_pvecs);
page_cache_get(page);
if (!pagevec_add(pvec, page))
pagevec_lru_move_fn(pvec, __activate_page, NULL);
- put_cpu_var(activate_page_pvecs);
+ put_locked_var(swap_lock, activate_page_pvecs);
}
}
@@ -373,12 +378,12 @@ EXPORT_SYMBOL(mark_page_accessed);
void __lru_cache_add(struct page *page, enum lru_list lru)
{
- struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
+ struct pagevec *pvec = &get_locked_var(swap_lock, lru_add_pvecs)[lru];
page_cache_get(page);
if (!pagevec_add(pvec, page))
____pagevec_lru_add(pvec, lru);
- put_cpu_var(lru_add_pvecs);
+ put_locked_var(swap_lock, lru_add_pvecs);
}
EXPORT_SYMBOL(__lru_cache_add);
@@ -512,9 +517,9 @@ static void drain_cpu_pagevecs(int cpu)
unsigned long flags;
/* No harm done if a racing interrupt already did this */
- local_irq_save(flags);
+ local_lock_irqsave(rotate_lock, flags);
pagevec_move_tail(pvec);
- local_irq_restore(flags);
+ local_unlock_irqrestore(rotate_lock, flags);
}
pvec = &per_cpu(lru_deactivate_pvecs, cpu);
@@ -542,18 +547,19 @@ void deactivate_page(struct page *page)
return;
if (likely(get_page_unless_zero(page))) {
- struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+ struct pagevec *pvec = &get_locked_var(swap_lock,
+ lru_deactivate_pvecs);
if (!pagevec_add(pvec, page))
pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
- put_cpu_var(lru_deactivate_pvecs);
+ put_locked_var(swap_lock, lru_deactivate_pvecs);
}
}
void lru_add_drain(void)
{
- drain_cpu_pagevecs(get_cpu());
- put_cpu();
+ drain_cpu_pagevecs(local_lock_cpu(swap_lock));
+ local_unlock_cpu(swap_lock);
}
static void lru_add_drain_per_cpu(struct work_struct *dummy)
@@ -783,6 +789,9 @@ void __init swap_setup(void)
{
unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
+ local_irq_lock_init(rotate_lock);
+ local_irq_lock_init(swap_lock);
+
#ifdef CONFIG_SWAP
bdi_init(swapper_space.backing_dev_info);
#endif

View File

@ -0,0 +1,88 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:30:13 -0500
Subject: [PATCH] mm: make vmstat -rt aware
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/vmstat.h | 4 ++++
mm/vmstat.c | 6 ++++++
2 files changed, 10 insertions(+)
Index: linux-3.2/include/linux/vmstat.h
===================================================================
--- linux-3.2.orig/include/linux/vmstat.h
+++ linux-3.2/include/linux/vmstat.h
@@ -29,7 +29,9 @@ DECLARE_PER_CPU(struct vm_event_state, v
static inline void __count_vm_event(enum vm_event_item item)
{
+ preempt_disable_rt();
__this_cpu_inc(vm_event_states.event[item]);
+ preempt_enable_rt();
}
static inline void count_vm_event(enum vm_event_item item)
@@ -39,7 +41,9 @@ static inline void count_vm_event(enum v
static inline void __count_vm_events(enum vm_event_item item, long delta)
{
+ preempt_disable_rt();
__this_cpu_add(vm_event_states.event[item], delta);
+ preempt_enable_rt();
}
static inline void count_vm_events(enum vm_event_item item, long delta)
Index: linux-3.2/mm/vmstat.c
===================================================================
--- linux-3.2.orig/mm/vmstat.c
+++ linux-3.2/mm/vmstat.c
@@ -216,6 +216,7 @@ void __mod_zone_page_state(struct zone *
long x;
long t;
+ preempt_disable_rt();
x = delta + __this_cpu_read(*p);
t = __this_cpu_read(pcp->stat_threshold);
@@ -225,6 +226,7 @@ void __mod_zone_page_state(struct zone *
x = 0;
}
__this_cpu_write(*p, x);
+ preempt_enable_rt();
}
EXPORT_SYMBOL(__mod_zone_page_state);
@@ -257,6 +259,7 @@ void __inc_zone_state(struct zone *zone,
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
+ preempt_disable_rt();
v = __this_cpu_inc_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v > t)) {
@@ -265,6 +268,7 @@ void __inc_zone_state(struct zone *zone,
zone_page_state_add(v + overstep, zone, item);
__this_cpu_write(*p, -overstep);
}
+ preempt_enable_rt();
}
void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -279,6 +283,7 @@ void __dec_zone_state(struct zone *zone,
s8 __percpu *p = pcp->vm_stat_diff + item;
s8 v, t;
+ preempt_disable_rt();
v = __this_cpu_dec_return(*p);
t = __this_cpu_read(pcp->stat_threshold);
if (unlikely(v < - t)) {
@@ -287,6 +292,7 @@ void __dec_zone_state(struct zone *zone,
zone_page_state_add(v - overstep, zone, item);
__this_cpu_write(*p, overstep);
}
+ preempt_enable_rt();
}
void __dec_zone_page_state(struct page *page, enum zone_stat_item item)

View File

@ -0,0 +1,124 @@
Subject: mm: memcg: shorten preempt-disabled section around event checks
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 17 Nov 2011 07:49:25 +0100
Only the ratelimit checks themselves have to run with preemption
disabled, the resulting actions - checking for usage thresholds,
updating the soft limit tree - can and should run with preemption
enabled.
Signed-off-by: Johannes Weiner <jweiner@redhat.com>
Tested-by: Luis Henriques <henrix@camandro.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/memcontrol.c | 73 ++++++++++++++++++++++++++------------------------------
1 file changed, 35 insertions(+), 38 deletions(-)
Thomas, HTH and it is probably interesting for upstream as well.
Unfortunately, I'm in the middle of moving right now, so this is
untested except for compiling.
Index: linux-3.2/mm/memcontrol.c
===================================================================
--- linux-3.2.orig/mm/memcontrol.c
+++ linux-3.2/mm/memcontrol.c
@@ -683,37 +683,32 @@ static unsigned long mem_cgroup_nr_lru_p
return total;
}
-static bool __memcg_event_check(struct mem_cgroup *memcg, int target)
+static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
+ enum mem_cgroup_events_target target)
{
unsigned long val, next;
val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
next = __this_cpu_read(memcg->stat->targets[target]);
/* from time_after() in jiffies.h */
- return ((long)next - (long)val < 0);
-}
-
-static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
-{
- unsigned long val, next;
-
- val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
-
- switch (target) {
- case MEM_CGROUP_TARGET_THRESH:
- next = val + THRESHOLDS_EVENTS_TARGET;
- break;
- case MEM_CGROUP_TARGET_SOFTLIMIT:
- next = val + SOFTLIMIT_EVENTS_TARGET;
- break;
- case MEM_CGROUP_TARGET_NUMAINFO:
- next = val + NUMAINFO_EVENTS_TARGET;
- break;
- default:
- return;
+ if ((long)next - (long)val < 0) {
+ switch (target) {
+ case MEM_CGROUP_TARGET_THRESH:
+ next = val + THRESHOLDS_EVENTS_TARGET;
+ break;
+ case MEM_CGROUP_TARGET_SOFTLIMIT:
+ next = val + SOFTLIMIT_EVENTS_TARGET;
+ break;
+ case MEM_CGROUP_TARGET_NUMAINFO:
+ next = val + NUMAINFO_EVENTS_TARGET;
+ break;
+ default:
+ break;
+ }
+ __this_cpu_write(memcg->stat->targets[target], next);
+ return true;
}
-
- __this_cpu_write(memcg->stat->targets[target], next);
+ return false;
}
/*
@@ -724,25 +719,27 @@ static void memcg_check_events(struct me
{
preempt_disable();
/* threshold event is triggered in finer grain than soft limit */
- if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) {
+ if (unlikely(mem_cgroup_event_ratelimit(memcg,
+ MEM_CGROUP_TARGET_THRESH))) {
+ bool do_softlimit, do_numainfo;
+
+ do_softlimit = mem_cgroup_event_ratelimit(memcg,
+ MEM_CGROUP_TARGET_SOFTLIMIT);
+#if MAX_NUMNODES > 1
+ do_numainfo = mem_cgroup_event_ratelimit(memcg,
+ MEM_CGROUP_TARGET_NUMAINFO);
+#endif
+ preempt_enable();
+
mem_cgroup_threshold(memcg);
- __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH);
- if (unlikely(__memcg_event_check(memcg,
- MEM_CGROUP_TARGET_SOFTLIMIT))) {
+ if (unlikely(do_softlimit))
mem_cgroup_update_tree(memcg, page);
- __mem_cgroup_target_update(memcg,
- MEM_CGROUP_TARGET_SOFTLIMIT);
- }
#if MAX_NUMNODES > 1
- if (unlikely(__memcg_event_check(memcg,
- MEM_CGROUP_TARGET_NUMAINFO))) {
+ if (unlikely(do_numainfo))
atomic_inc(&memcg->numainfo_events);
- __mem_cgroup_target_update(memcg,
- MEM_CGROUP_TARGET_NUMAINFO);
- }
#endif
- }
- preempt_enable();
+ } else
+ preempt_enable();
}
static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)

View File

@ -0,0 +1,24 @@
Subject: mm-page-alloc-fix.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Jul 2011 16:47:49 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/page_alloc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: linux-3.2/mm/page_alloc.c
===================================================================
--- linux-3.2.orig/mm/page_alloc.c
+++ linux-3.2/mm/page_alloc.c
@@ -1960,8 +1960,8 @@ __alloc_pages_direct_compact(gfp_t gfp_m
if (*did_some_progress != COMPACT_SKIPPED) {
/* Page migration frees to the PCP lists but we want merging */
- drain_pages(get_cpu());
- put_cpu();
+ drain_pages(get_cpu_light());
+ put_cpu_light();
page = get_page_from_freelist(gfp_mask, nodemask,
order, zonelist, high_zoneidx,

View File

@ -0,0 +1,22 @@
Subject: mm-page-alloc-use-list-last-entry.patch
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 21 Jun 2011 11:24:35 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/page_alloc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
Index: linux-3.2/mm/page_alloc.c
===================================================================
--- linux-3.2.orig/mm/page_alloc.c
+++ linux-3.2/mm/page_alloc.c
@@ -625,7 +625,7 @@ static void free_pcppages_bulk(struct zo
batch_free = to_free;
do {
- page = list_entry(list->prev, struct page, lru);
+ page = list_last_entry(list, struct page, lru);
/* must delete as __free_one_page list manipulates */
list_del(&page->lru);
/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */

View File

@ -0,0 +1,192 @@
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 3 Jul 2009 08:44:37 -0500
Subject: mm: page_alloc reduce lock sections further
Split out the pages which are to be freed into a separate list and
call free_pages_bulk() outside of the percpu page allocator locks.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/page_alloc.c | 77 ++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 58 insertions(+), 19 deletions(-)
Index: linux-3.2/mm/page_alloc.c
===================================================================
--- linux-3.2.orig/mm/page_alloc.c
+++ linux-3.2/mm/page_alloc.c
@@ -594,7 +594,7 @@ static inline int free_pages_check(struc
}
/*
- * Frees a number of pages from the PCP lists
+ * Frees a number of pages which have been collected from the pcp lists.
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
@@ -605,16 +605,42 @@ static inline int free_pages_check(struc
* pinned" detection logic.
*/
static void free_pcppages_bulk(struct zone *zone, int count,
- struct per_cpu_pages *pcp)
+ struct list_head *list)
{
- int migratetype = 0;
- int batch_free = 0;
int to_free = count;
+ unsigned long flags;
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
+ while (!list_empty(list)) {
+ struct page *page = list_first_entry(list, struct page, lru);
+
+ /* must delete as __free_one_page list manipulates */
+ list_del(&page->lru);
+ /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
+ __free_one_page(page, zone, 0, page_private(page));
+ trace_mm_page_pcpu_drain(page, 0, page_private(page));
+ to_free--;
+ }
+ WARN_ON(to_free != 0);
+ __mod_zone_page_state(zone, NR_FREE_PAGES, count);
+ spin_unlock_irqrestore(&zone->lock, flags);
+}
+
+/*
+ * Moves a number of pages from the PCP lists to free list which
+ * is freed outside of the locked region.
+ *
+ * Assumes all pages on list are in same zone, and of same order.
+ * count is the number of pages to free.
+ */
+static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src,
+ struct list_head *dst)
+{
+ int migratetype = 0, batch_free = 0;
+
while (to_free) {
struct page *page;
struct list_head *list;
@@ -630,7 +656,7 @@ static void free_pcppages_bulk(struct zo
batch_free++;
if (++migratetype == MIGRATE_PCPTYPES)
migratetype = 0;
- list = &pcp->lists[migratetype];
+ list = &src->lists[migratetype];
} while (list_empty(list));
/* This is the only non-empty list. Free them all. */
@@ -639,27 +665,24 @@ static void free_pcppages_bulk(struct zo
do {
page = list_last_entry(list, struct page, lru);
- /* must delete as __free_one_page list manipulates */
list_del(&page->lru);
- /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
- __free_one_page(page, zone, 0, page_private(page));
- trace_mm_page_pcpu_drain(page, 0, page_private(page));
+ list_add(&page->lru, dst);
} while (--to_free && --batch_free && !list_empty(list));
}
- __mod_zone_page_state(zone, NR_FREE_PAGES, count);
- spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone, struct page *page, int order,
int migratetype)
{
- spin_lock(&zone->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->lock, flags);
zone->all_unreclaimable = 0;
zone->pages_scanned = 0;
__free_one_page(page, zone, order, migratetype);
__mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
}
static bool free_pages_prepare(struct page *page, unsigned int order)
@@ -1078,6 +1101,7 @@ static int rmqueue_bulk(struct zone *zon
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
+ LIST_HEAD(dst);
int to_drain;
local_lock_irqsave(pa_lock, flags);
@@ -1085,9 +1109,10 @@ void drain_zone_pages(struct zone *zone,
to_drain = pcp->batch;
else
to_drain = pcp->count;
- free_pcppages_bulk(zone, to_drain, pcp);
+ isolate_pcp_pages(to_drain, pcp, &dst);
pcp->count -= to_drain;
local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, to_drain, &dst);
}
#endif
@@ -1106,16 +1131,21 @@ static void drain_pages(unsigned int cpu
for_each_populated_zone(zone) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
+ int count;
cpu_lock_irqsave(cpu, flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- if (pcp->count) {
- free_pcppages_bulk(zone, pcp->count, pcp);
+ count = pcp->count;
+ if (count) {
+ isolate_pcp_pages(count, pcp, &dst);
pcp->count = 0;
}
cpu_unlock_irqrestore(cpu, flags);
+ if (count)
+ free_pcppages_bulk(zone, count, &dst);
}
}
@@ -1222,8 +1252,15 @@ void free_hot_cold_page(struct page *pag
list_add(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
if (pcp->count >= pcp->high) {
- free_pcppages_bulk(zone, pcp->batch, pcp);
+ LIST_HEAD(dst);
+ int count;
+
+ isolate_pcp_pages(pcp->batch, pcp, &dst);
pcp->count -= pcp->batch;
+ count = pcp->batch;
+ local_unlock_irqrestore(pa_lock, flags);
+ free_pcppages_bulk(zone, count, &dst);
+ return;
}
out:
@@ -3696,12 +3733,14 @@ static int __zone_pcp_update(void *data)
for_each_possible_cpu(cpu) {
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
+ LIST_HEAD(dst);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
cpu_lock_irqsave(cpu, flags);
- free_pcppages_bulk(zone, pcp->count, pcp);
+ isolate_pcp_pages(pcp->count, pcp, &dst);
+ free_pcppages_bulk(zone, pcp->count, &dst);
setup_pageset(pset, batch);
cpu_unlock_irqrestore(cpu, flags);
}

View File

@ -0,0 +1,196 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:29:37 -0500
Subject: mm: page_alloc: rt-friendly per-cpu pages
rt-friendly per-cpu pages: convert the irqs-off per-cpu locking
method into a preemptible, explicit-per-cpu-locks method.
Contains fixes from:
Peter Zijlstra <a.p.zijlstra@chello.nl>
Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/page_alloc.c | 55 +++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 39 insertions(+), 16 deletions(-)
Index: linux-3.2/mm/page_alloc.c
===================================================================
--- linux-3.2.orig/mm/page_alloc.c
+++ linux-3.2/mm/page_alloc.c
@@ -57,6 +57,7 @@
#include <linux/ftrace_event.h>
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
+#include <linux/locallock.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -222,6 +223,18 @@ EXPORT_SYMBOL(nr_node_ids);
EXPORT_SYMBOL(nr_online_nodes);
#endif
+static DEFINE_LOCAL_IRQ_LOCK(pa_lock);
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+# define cpu_lock_irqsave(cpu, flags) \
+ spin_lock_irqsave(&per_cpu(pa_lock, cpu).lock, flags)
+# define cpu_unlock_irqrestore(cpu, flags) \
+ spin_unlock_irqrestore(&per_cpu(pa_lock, cpu).lock, flags)
+#else
+# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags)
+# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags)
+#endif
+
int page_group_by_mobility_disabled __read_mostly;
static void set_pageblock_migratetype(struct page *page, int migratetype)
@@ -683,13 +696,13 @@ static void __free_pages_ok(struct page
if (!free_pages_prepare(page, order))
return;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (unlikely(wasMlocked))
free_page_mlock(page);
__count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, order,
get_pageblock_migratetype(page));
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
/*
@@ -1067,14 +1080,14 @@ void drain_zone_pages(struct zone *zone,
unsigned long flags;
int to_drain;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (pcp->count >= pcp->batch)
to_drain = pcp->batch;
else
to_drain = pcp->count;
free_pcppages_bulk(zone, to_drain, pcp);
pcp->count -= to_drain;
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
#endif
@@ -1094,7 +1107,7 @@ static void drain_pages(unsigned int cpu
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
- local_irq_save(flags);
+ cpu_lock_irqsave(cpu, flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
@@ -1102,7 +1115,7 @@ static void drain_pages(unsigned int cpu
free_pcppages_bulk(zone, pcp->count, pcp);
pcp->count = 0;
}
- local_irq_restore(flags);
+ cpu_unlock_irqrestore(cpu, flags);
}
}
@@ -1119,7 +1132,14 @@ void drain_local_pages(void *arg)
*/
void drain_all_pages(void)
{
+#ifndef CONFIG_PREEMPT_RT_BASE
on_each_cpu(drain_local_pages, NULL, 1);
+#else
+ int i;
+
+ for_each_online_cpu(i)
+ drain_pages(i);
+#endif
}
#ifdef CONFIG_HIBERNATION
@@ -1175,7 +1195,7 @@ void free_hot_cold_page(struct page *pag
migratetype = get_pageblock_migratetype(page);
set_page_private(page, migratetype);
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
if (unlikely(wasMlocked))
free_page_mlock(page);
__count_vm_event(PGFREE);
@@ -1207,7 +1227,7 @@ void free_hot_cold_page(struct page *pag
}
out:
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
}
/*
@@ -1302,7 +1322,7 @@ again:
struct per_cpu_pages *pcp;
struct list_head *list;
- local_irq_save(flags);
+ local_lock_irqsave(pa_lock, flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
if (list_empty(list)) {
@@ -1334,17 +1354,19 @@ again:
*/
WARN_ON_ONCE(order > 1);
}
- spin_lock_irqsave(&zone->lock, flags);
+ local_spin_lock_irqsave(pa_lock, &zone->lock, flags);
page = __rmqueue(zone, order, migratetype);
- spin_unlock(&zone->lock);
- if (!page)
+ if (!page) {
+ spin_unlock(&zone->lock);
goto failed;
+ }
__mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
+ spin_unlock(&zone->lock);
}
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
VM_BUG_ON(bad_range(zone, page));
if (prep_new_page(page, order, gfp_flags))
@@ -1352,7 +1374,7 @@ again:
return page;
failed:
- local_irq_restore(flags);
+ local_unlock_irqrestore(pa_lock, flags);
return NULL;
}
@@ -3678,10 +3700,10 @@ static int __zone_pcp_update(void *data)
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- local_irq_save(flags);
+ cpu_lock_irqsave(cpu, flags);
free_pcppages_bulk(zone, pcp->count, pcp);
setup_pageset(pset, batch);
- local_irq_restore(flags);
+ cpu_unlock_irqrestore(cpu, flags);
}
return 0;
}
@@ -5047,6 +5069,7 @@ static int page_alloc_cpu_notify(struct
void __init page_alloc_init(void)
{
hotcpu_notifier(page_alloc_cpu_notify, 0);
+ local_irq_lock_init(pa_lock);
}
/*

View File

@ -0,0 +1,126 @@
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 3 Jul 2009 08:30:37 -0500
Subject: mm: Prepare decoupling the page fault disabling logic
Add a pagefault_disabled variable to task_struct to allow decoupling
the pagefault-disabled logic from the preempt count.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/sched.h | 1 +
include/linux/uaccess.h | 33 +++------------------------------
kernel/fork.c | 1 +
mm/memory.c | 29 +++++++++++++++++++++++++++++
4 files changed, 34 insertions(+), 30 deletions(-)
Index: linux-3.2/include/linux/sched.h
===================================================================
--- linux-3.2.orig/include/linux/sched.h
+++ linux-3.2/include/linux/sched.h
@@ -1431,6 +1431,7 @@ struct task_struct {
/* mutex deadlock detection */
struct mutex_waiter *blocked_on;
#endif
+ int pagefault_disabled;
#ifdef CONFIG_TRACE_IRQFLAGS
unsigned int irq_events;
unsigned long hardirq_enable_ip;
Index: linux-3.2/include/linux/uaccess.h
===================================================================
--- linux-3.2.orig/include/linux/uaccess.h
+++ linux-3.2/include/linux/uaccess.h
@@ -6,37 +6,10 @@
/*
* These routines enable/disable the pagefault handler in that
- * it will not take any locks and go straight to the fixup table.
- *
- * They have great resemblance to the preempt_disable/enable calls
- * and in fact they are identical; this is because currently there is
- * no other way to make the pagefault handlers do this. So we do
- * disable preemption but we don't necessarily care about that.
+ * it will not take any MM locks and go straight to the fixup table.
*/
-static inline void pagefault_disable(void)
-{
- inc_preempt_count();
- /*
- * make sure to have issued the store before a pagefault
- * can hit.
- */
- barrier();
-}
-
-static inline void pagefault_enable(void)
-{
- /*
- * make sure to issue those last loads/stores before enabling
- * the pagefault handler again.
- */
- barrier();
- dec_preempt_count();
- /*
- * make sure we do..
- */
- barrier();
- preempt_check_resched();
-}
+extern void pagefault_disable(void);
+extern void pagefault_enable(void);
#ifndef ARCH_HAS_NOCACHE_UACCESS
Index: linux-3.2/kernel/fork.c
===================================================================
--- linux-3.2.orig/kernel/fork.c
+++ linux-3.2/kernel/fork.c
@@ -1198,6 +1198,7 @@ static struct task_struct *copy_process(
p->hardirq_context = 0;
p->softirq_context = 0;
#endif
+ p->pagefault_disabled = 0;
#ifdef CONFIG_LOCKDEP
p->lockdep_depth = 0; /* no locks held yet */
p->curr_chain_key = 0;
Index: linux-3.2/mm/memory.c
===================================================================
--- linux-3.2.orig/mm/memory.c
+++ linux-3.2/mm/memory.c
@@ -3436,6 +3436,35 @@ unlock:
return 0;
}
+void pagefault_disable(void)
+{
+ inc_preempt_count();
+ current->pagefault_disabled++;
+ /*
+ * make sure to have issued the store before a pagefault
+ * can hit.
+ */
+ barrier();
+}
+EXPORT_SYMBOL_GPL(pagefault_disable);
+
+void pagefault_enable(void)
+{
+ /*
+ * make sure to issue those last loads/stores before enabling
+ * the pagefault handler again.
+ */
+ barrier();
+ current->pagefault_disabled--;
+ dec_preempt_count();
+ /*
+ * make sure we do..
+ */
+ barrier();
+ preempt_check_resched();
+}
+EXPORT_SYMBOL_GPL(pagefault_enable);
+
/*
* By the time we get here, we already hold the mm semaphore
*/

View File

@ -0,0 +1,47 @@
Subject: mm-protect-activate-switch-mm.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 04 Jul 2011 09:48:40 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
fs/exec.c | 2 ++
mm/mmu_context.c | 2 ++
2 files changed, 4 insertions(+)
Index: linux-3.2/fs/exec.c
===================================================================
--- linux-3.2.orig/fs/exec.c
+++ linux-3.2/fs/exec.c
@@ -837,10 +837,12 @@ static int exec_mmap(struct mm_struct *m
}
}
task_lock(tsk);
+ local_irq_disable_rt();
active_mm = tsk->active_mm;
tsk->mm = mm;
tsk->active_mm = mm;
activate_mm(active_mm, mm);
+ local_irq_enable_rt();
task_unlock(tsk);
arch_pick_mmap_layout(mm);
if (old_mm) {
Index: linux-3.2/mm/mmu_context.c
===================================================================
--- linux-3.2.orig/mm/mmu_context.c
+++ linux-3.2/mm/mmu_context.c
@@ -26,6 +26,7 @@ void use_mm(struct mm_struct *mm)
struct task_struct *tsk = current;
task_lock(tsk);
+ local_irq_disable_rt();
active_mm = tsk->active_mm;
if (active_mm != mm) {
atomic_inc(&mm->mm_count);
@@ -33,6 +34,7 @@ void use_mm(struct mm_struct *mm)
}
tsk->mm = mm;
switch_mm(active_mm, mm, tsk);
+ local_irq_enable_rt();
task_unlock(tsk);
if (active_mm != mm)

View File

@ -0,0 +1,36 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 25 Jul 2009 22:06:27 +0200
Subject: mm: Remove preempt count from pagefault disable/enable
Now that all users are cleaned up, we can remove the preemption count.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/memory.c | 7 -------
1 file changed, 7 deletions(-)
Index: linux-3.2/mm/memory.c
===================================================================
--- linux-3.2.orig/mm/memory.c
+++ linux-3.2/mm/memory.c
@@ -3439,7 +3439,6 @@ unlock:
#ifdef CONFIG_PREEMPT_RT_FULL
void pagefault_disable(void)
{
- inc_preempt_count();
current->pagefault_disabled++;
/*
* make sure to have issued the store before a pagefault
@@ -3457,12 +3456,6 @@ void pagefault_enable(void)
*/
barrier();
current->pagefault_disabled--;
- dec_preempt_count();
- /*
- * make sure we do..
- */
- barrier();
- preempt_check_resched();
}
EXPORT_SYMBOL_GPL(pagefault_enable);
#endif

View File

@ -0,0 +1,40 @@
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Jul 2009 08:44:34 -0500
Subject: mm: scatterlist dont disable irqs on RT
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
lib/scatterlist.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
Index: linux-3.2/lib/scatterlist.c
===================================================================
--- linux-3.2.orig/lib/scatterlist.c
+++ linux-3.2/lib/scatterlist.c
@@ -423,7 +423,7 @@ void sg_miter_stop(struct sg_mapping_ite
flush_kernel_dcache_page(miter->page);
if (miter->__flags & SG_MITER_ATOMIC) {
- WARN_ON(!irqs_disabled());
+ WARN_ON_NONRT(!irqs_disabled());
kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
} else
kunmap(miter->page);
@@ -463,7 +463,7 @@ static size_t sg_copy_buffer(struct scat
sg_miter_start(&miter, sgl, nents, sg_flags);
- local_irq_save(flags);
+ local_irq_save_nort(flags);
while (sg_miter_next(&miter) && offset < buflen) {
unsigned int len;
@@ -480,7 +480,7 @@ static size_t sg_copy_buffer(struct scat
sg_miter_stop(&miter);
- local_irq_restore(flags);
+ local_irq_restore_nort(flags);
return offset;
}

View File

@ -0,0 +1,147 @@
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 3 Jul 2009 08:44:54 -0500
Subject: mm: shrink the page frame to !-rt size
He below is a boot-tested hack to shrink the page frame size back to
normal.
Should be a net win since there should be many less PTE-pages than
page-frames.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
include/linux/mm.h | 46 +++++++++++++++++++++++++++++++++++++++-------
include/linux/mm_types.h | 6 +++++-
mm/memory.c | 32 ++++++++++++++++++++++++++++++++
3 files changed, 76 insertions(+), 8 deletions(-)
Index: linux-3.2/include/linux/mm.h
===================================================================
--- linux-3.2.orig/include/linux/mm.h
+++ linux-3.2/include/linux/mm.h
@@ -1194,27 +1194,59 @@ static inline pmd_t *pmd_alloc(struct mm
* overflow into the next struct page (as it might with DEBUG_SPINLOCK).
* When freeing, reset page->mapping so free_pages_check won't complain.
*/
+#ifndef CONFIG_PREEMPT_RT_FULL
+
#define __pte_lockptr(page) &((page)->ptl)
-#define pte_lock_init(_page) do { \
- spin_lock_init(__pte_lockptr(_page)); \
-} while (0)
+
+static inline struct page *pte_lock_init(struct page *page)
+{
+ spin_lock_init(__pte_lockptr(page));
+ return page;
+}
+
#define pte_lock_deinit(page) ((page)->mapping = NULL)
+
+#else /* !PREEMPT_RT_FULL */
+
+/*
+ * On PREEMPT_RT_FULL the spinlock_t's are too large to embed in the
+ * page frame, hence it only has a pointer and we need to dynamically
+ * allocate the lock when we allocate PTE-pages.
+ *
+ * This is an overall win, since only a small fraction of the pages
+ * will be PTE pages under normal circumstances.
+ */
+
+#define __pte_lockptr(page) ((page)->ptl)
+
+extern struct page *pte_lock_init(struct page *page);
+extern void pte_lock_deinit(struct page *page);
+
+#endif /* PREEMPT_RT_FULL */
+
#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
#else /* !USE_SPLIT_PTLOCKS */
/*
* We use mm->page_table_lock to guard all pagetable pages of the mm.
*/
-#define pte_lock_init(page) do {} while (0)
+static inline struct page *pte_lock_init(struct page *page) { return page; }
#define pte_lock_deinit(page) do {} while (0)
#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
#endif /* USE_SPLIT_PTLOCKS */
-static inline void pgtable_page_ctor(struct page *page)
+static inline struct page *__pgtable_page_ctor(struct page *page)
{
- pte_lock_init(page);
- inc_zone_page_state(page, NR_PAGETABLE);
+ page = pte_lock_init(page);
+ if (page)
+ inc_zone_page_state(page, NR_PAGETABLE);
+ return page;
}
+#define pgtable_page_ctor(page) \
+do { \
+ page = __pgtable_page_ctor(page); \
+} while (0)
+
static inline void pgtable_page_dtor(struct page *page)
{
pte_lock_deinit(page);
Index: linux-3.2/include/linux/mm_types.h
===================================================================
--- linux-3.2.orig/include/linux/mm_types.h
+++ linux-3.2/include/linux/mm_types.h
@@ -118,7 +118,11 @@ struct page {
* system if PG_buddy is set.
*/
#if USE_SPLIT_PTLOCKS
- spinlock_t ptl;
+# ifndef CONFIG_PREEMPT_RT_FULL
+ spinlock_t ptl;
+# else
+ spinlock_t *ptl;
+# endif
#endif
struct kmem_cache *slab; /* SLUB: Pointer to slab */
struct page *first_page; /* Compound tail pages */
Index: linux-3.2/mm/memory.c
===================================================================
--- linux-3.2.orig/mm/memory.c
+++ linux-3.2/mm/memory.c
@@ -4008,3 +4008,35 @@ void copy_user_huge_page(struct page *ds
}
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+
+#if defined(CONFIG_PREEMPT_RT_FULL) && (USE_SPLIT_PTLOCKS > 0)
+/*
+ * Heinous hack, relies on the caller doing something like:
+ *
+ * pte = alloc_pages(PGALLOC_GFP, 0);
+ * if (pte)
+ * pgtable_page_ctor(pte);
+ * return pte;
+ *
+ * This ensures we release the page and return NULL when the
+ * lock allocation fails.
+ */
+struct page *pte_lock_init(struct page *page)
+{
+ page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
+ if (page->ptl) {
+ spin_lock_init(__pte_lockptr(page));
+ } else {
+ __free_page(page);
+ page = NULL;
+ }
+ return page;
+}
+
+void pte_lock_deinit(struct page *page)
+{
+ kfree(page->ptl);
+ page->mapping = NULL;
+}
+
+#endif

View File

@ -0,0 +1,251 @@
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 3 Jul 2009 08:44:43 -0500
Subject: mm: More lock breaks in slab.c
Handle __free_pages outside of the locked regions. This reduces the
lock contention on the percpu slab locks in -rt significantly.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/slab.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 68 insertions(+), 22 deletions(-)
Index: linux-3.2/mm/slab.c
===================================================================
--- linux-3.2.orig/mm/slab.c
+++ linux-3.2/mm/slab.c
@@ -719,6 +719,7 @@ static DEFINE_MUTEX(cache_chain_mutex);
static struct list_head cache_chain;
static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
+static DEFINE_PER_CPU(struct list_head, slab_free_list);
static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
#ifndef CONFIG_PREEMPT_RT_BASE
@@ -734,14 +735,39 @@ slab_on_each_cpu(void (*func)(void *arg,
{
unsigned int i;
- for_each_online_cpu(i) {
- spin_lock_irq(&per_cpu(slab_lock, i).lock);
+ for_each_online_cpu(i)
func(arg, i);
- spin_unlock_irq(&per_cpu(slab_lock, i).lock);
- }
}
#endif
+static void free_delayed(struct list_head *h)
+{
+ while(!list_empty(h)) {
+ struct page *page = list_first_entry(h, struct page, lru);
+
+ list_del(&page->lru);
+ __free_pages(page, page->index);
+ }
+}
+
+static void unlock_l3_and_free_delayed(spinlock_t *list_lock)
+{
+ LIST_HEAD(tmp);
+
+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
+ local_spin_unlock_irq(slab_lock, list_lock);
+ free_delayed(&tmp);
+}
+
+static void unlock_slab_and_free_delayed(unsigned long flags)
+{
+ LIST_HEAD(tmp);
+
+ list_splice_init(&__get_cpu_var(slab_free_list), &tmp);
+ local_unlock_irqrestore(slab_lock, flags);
+ free_delayed(&tmp);
+}
+
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
return cachep->array[smp_processor_id()];
@@ -1226,7 +1252,7 @@ static void __cpuinit cpuup_canceled(lon
free_block(cachep, nc->entry, nc->avail, node);
if (!cpumask_empty(mask)) {
- local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ unlock_l3_and_free_delayed(&l3->list_lock);
goto free_array_cache;
}
@@ -1240,7 +1266,7 @@ static void __cpuinit cpuup_canceled(lon
alien = l3->alien;
l3->alien = NULL;
- local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ unlock_l3_and_free_delayed(&l3->list_lock);
kfree(shared);
if (alien) {
@@ -1521,6 +1547,8 @@ void __init kmem_cache_init(void)
use_alien_caches = 0;
local_irq_lock_init(slab_lock);
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(slab_free_list, i));
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
@@ -1798,12 +1826,14 @@ static void *kmem_getpages(struct kmem_c
/*
* Interface to system's page release.
*/
-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
+static void kmem_freepages(struct kmem_cache *cachep, void *addr, bool delayed)
{
unsigned long i = (1 << cachep->gfporder);
- struct page *page = virt_to_page(addr);
+ struct page *page, *basepage = virt_to_page(addr);
const unsigned long nr_freed = i;
+ page = basepage;
+
kmemcheck_free_shadow(page, cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1819,7 +1849,13 @@ static void kmem_freepages(struct kmem_c
}
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
- free_pages((unsigned long)addr, cachep->gfporder);
+
+ if (!delayed) {
+ free_pages((unsigned long)addr, cachep->gfporder);
+ } else {
+ basepage->index = cachep->gfporder;
+ list_add(&basepage->lru, &__get_cpu_var(slab_free_list));
+ }
}
static void kmem_rcu_free(struct rcu_head *head)
@@ -1827,7 +1863,7 @@ static void kmem_rcu_free(struct rcu_hea
struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
struct kmem_cache *cachep = slab_rcu->cachep;
- kmem_freepages(cachep, slab_rcu->addr);
+ kmem_freepages(cachep, slab_rcu->addr, false);
if (OFF_SLAB(cachep))
kmem_cache_free(cachep->slabp_cache, slab_rcu);
}
@@ -2046,7 +2082,8 @@ static void slab_destroy_debugcheck(stru
* Before calling the slab must have been unlinked from the cache. The
* cache-lock is not held/needed.
*/
-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp,
+ bool delayed)
{
void *addr = slabp->s_mem - slabp->colouroff;
@@ -2059,7 +2096,7 @@ static void slab_destroy(struct kmem_cac
slab_rcu->addr = addr;
call_rcu(&slab_rcu->head, kmem_rcu_free);
} else {
- kmem_freepages(cachep, addr);
+ kmem_freepages(cachep, addr, delayed);
if (OFF_SLAB(cachep))
kmem_cache_free(cachep->slabp_cache, slabp);
}
@@ -2579,9 +2616,15 @@ static void do_drain(void *arg)
__do_drain(arg, smp_processor_id());
}
#else
-static void do_drain(void *arg, int this_cpu)
+static void do_drain(void *arg, int cpu)
{
- __do_drain(arg, this_cpu);
+ LIST_HEAD(tmp);
+
+ spin_lock_irq(&per_cpu(slab_lock, cpu).lock);
+ __do_drain(arg, cpu);
+ list_splice_init(&per_cpu(slab_free_list, cpu), &tmp);
+ spin_unlock_irq(&per_cpu(slab_lock, cpu).lock);
+ free_delayed(&tmp);
}
#endif
@@ -2639,7 +2682,7 @@ static int drain_freelist(struct kmem_ca
*/
l3->free_objects -= cache->num;
local_spin_unlock_irq(slab_lock, &l3->list_lock);
- slab_destroy(cache, slabp);
+ slab_destroy(cache, slabp, false);
nr_freed++;
}
out:
@@ -2974,7 +3017,7 @@ static int cache_grow(struct kmem_cache
spin_unlock(&l3->list_lock);
return 1;
opps1:
- kmem_freepages(cachep, objp);
+ kmem_freepages(cachep, objp, false);
failed:
if (local_flags & __GFP_WAIT)
local_lock_irq(slab_lock);
@@ -3624,7 +3667,7 @@ static void free_block(struct kmem_cache
* a different cache, refer to comments before
* alloc_slabmgmt.
*/
- slab_destroy(cachep, slabp);
+ slab_destroy(cachep, slabp, true);
} else {
list_add(&slabp->list, &l3->slabs_free);
}
@@ -3892,7 +3935,7 @@ void kmem_cache_free(struct kmem_cache *
debug_check_no_obj_freed(objp, obj_size(cachep));
local_lock_irqsave(slab_lock, flags);
__cache_free(cachep, objp, __builtin_return_address(0));
- local_unlock_irqrestore(slab_lock, flags);
+ unlock_slab_and_free_delayed(flags);
trace_kmem_cache_free(_RET_IP_, objp);
}
@@ -3922,7 +3965,7 @@ void kfree(const void *objp)
debug_check_no_obj_freed(objp, obj_size(c));
local_lock_irqsave(slab_lock, flags);
__cache_free(c, (void *)objp, __builtin_return_address(0));
- local_unlock_irqrestore(slab_lock, flags);
+ unlock_slab_and_free_delayed(flags);
}
EXPORT_SYMBOL(kfree);
@@ -3978,7 +4021,8 @@ static int alloc_kmemlist(struct kmem_ca
}
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- local_spin_unlock_irq(slab_lock, &l3->list_lock);
+ unlock_l3_and_free_delayed(&l3->list_lock);
+
kfree(shared);
free_alien_cache(new_alien);
continue;
@@ -4044,7 +4088,9 @@ static void do_ccupdate_local(void *info
#else
static void do_ccupdate_local(void *info, int cpu)
{
+ spin_lock_irq(&per_cpu(slab_lock, cpu).lock);
__do_ccupdate_local(info, cpu);
+ spin_unlock_irq(&per_cpu(slab_lock, cpu).lock);
}
#endif
@@ -4086,8 +4132,8 @@ static int do_tune_cpucache(struct kmem_
local_spin_lock_irq(slab_lock,
&cachep->nodelists[cpu_to_mem(i)]->list_lock);
free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- local_spin_unlock_irq(slab_lock,
- &cachep->nodelists[cpu_to_mem(i)]->list_lock);
+
+ unlock_l3_and_free_delayed(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
kfree(ccold);
}
kfree(new);

View File

@ -0,0 +1,38 @@
Subject: mm-slab-move-debug-out.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 20 Jun 2011 10:42:04 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/slab.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: linux-3.2/mm/slab.c
===================================================================
--- linux-3.2.orig/mm/slab.c
+++ linux-3.2/mm/slab.c
@@ -3844,10 +3844,10 @@ void kmem_cache_free(struct kmem_cache *
{
unsigned long flags;
- local_irq_save(flags);
debug_check_no_locks_freed(objp, obj_size(cachep));
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(objp, obj_size(cachep));
+ local_irq_save(flags);
__cache_free(cachep, objp, __builtin_return_address(0));
local_irq_restore(flags);
@@ -3873,11 +3873,11 @@ void kfree(const void *objp)
if (unlikely(ZERO_OR_NULL_PTR(objp)))
return;
- local_irq_save(flags);
kfree_debugcheck(objp);
c = virt_to_cache(objp);
debug_check_no_locks_freed(objp, obj_size(c));
debug_check_no_obj_freed(objp, obj_size(c));
+ local_irq_save(flags);
__cache_free(c, (void *)objp, __builtin_return_address(0));
local_irq_restore(flags);
}

View File

@ -0,0 +1,450 @@
Subject: mm-slab-wrap-functions.patch
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 18 Jun 2011 19:44:43 +0200
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
mm/slab.c | 152 ++++++++++++++++++++++++++++++++++++++++++--------------------
1 file changed, 104 insertions(+), 48 deletions(-)
Index: linux-3.2/mm/slab.c
===================================================================
--- linux-3.2.orig/mm/slab.c
+++ linux-3.2/mm/slab.c
@@ -116,6 +116,7 @@
#include <linux/kmemcheck.h>
#include <linux/memory.h>
#include <linux/prefetch.h>
+#include <linux/locallock.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
@@ -718,12 +719,40 @@ static DEFINE_MUTEX(cache_chain_mutex);
static struct list_head cache_chain;
static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
+static DEFINE_LOCAL_IRQ_LOCK(slab_lock);
+
+#ifndef CONFIG_PREEMPT_RT_BASE
+# define slab_on_each_cpu(func, cp) on_each_cpu(func, cp, 1)
+#else
+/*
+ * execute func() for all CPUs. On PREEMPT_RT we dont actually have
+ * to run on the remote CPUs - we only have to take their CPU-locks.
+ * (This is a rare operation, so cacheline bouncing is not an issue.)
+ */
+static void
+slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg)
+{
+ unsigned int i;
+
+ for_each_online_cpu(i) {
+ spin_lock_irq(&per_cpu(slab_lock, i).lock);
+ func(arg, i);
+ spin_unlock_irq(&per_cpu(slab_lock, i).lock);
+ }
+}
+#endif
static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
{
return cachep->array[smp_processor_id()];
}
+static inline struct array_cache *cpu_cache_get_on_cpu(struct kmem_cache *cachep,
+ int cpu)
+{
+ return cachep->array[cpu];
+}
+
static inline struct kmem_cache *__find_general_cachep(size_t size,
gfp_t gfpflags)
{
@@ -1061,9 +1090,10 @@ static void reap_alien(struct kmem_cache
if (l3->alien) {
struct array_cache *ac = l3->alien[node];
- if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
+ if (ac && ac->avail &&
+ local_spin_trylock_irq(slab_lock, &ac->lock)) {
__drain_alien_cache(cachep, ac, node);
- spin_unlock_irq(&ac->lock);
+ local_spin_unlock_irq(slab_lock, &ac->lock);
}
}
}
@@ -1078,9 +1108,9 @@ static void drain_alien_cache(struct kme
for_each_online_node(i) {
ac = alien[i];
if (ac) {
- spin_lock_irqsave(&ac->lock, flags);
+ local_spin_lock_irqsave(slab_lock, &ac->lock, flags);
__drain_alien_cache(cachep, ac, i);
- spin_unlock_irqrestore(&ac->lock, flags);
+ local_spin_unlock_irqrestore(slab_lock, &ac->lock, flags);
}
}
}
@@ -1159,11 +1189,11 @@ static int init_cache_nodelists_node(int
cachep->nodelists[node] = l3;
}
- spin_lock_irq(&cachep->nodelists[node]->list_lock);
+ local_spin_lock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
cachep->nodelists[node]->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&cachep->nodelists[node]->list_lock);
+ local_spin_unlock_irq(slab_lock, &cachep->nodelists[node]->list_lock);
}
return 0;
}
@@ -1188,7 +1218,7 @@ static void __cpuinit cpuup_canceled(lon
if (!l3)
goto free_array_cache;
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
/* Free limit for this kmem_list3 */
l3->free_limit -= cachep->batchcount;
@@ -1196,7 +1226,7 @@ static void __cpuinit cpuup_canceled(lon
free_block(cachep, nc->entry, nc->avail, node);
if (!cpumask_empty(mask)) {
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
goto free_array_cache;
}
@@ -1210,7 +1240,7 @@ static void __cpuinit cpuup_canceled(lon
alien = l3->alien;
l3->alien = NULL;
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
kfree(shared);
if (alien) {
@@ -1284,7 +1314,7 @@ static int __cpuinit cpuup_prepare(long
l3 = cachep->nodelists[node];
BUG_ON(!l3);
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
if (!l3->shared) {
/*
* We are serialised from CPU_DEAD or
@@ -1299,7 +1329,7 @@ static int __cpuinit cpuup_prepare(long
alien = NULL;
}
#endif
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
kfree(shared);
free_alien_cache(alien);
if (cachep->flags & SLAB_DEBUG_OBJECTS)
@@ -1490,6 +1520,8 @@ void __init kmem_cache_init(void)
if (num_possible_nodes() == 1)
use_alien_caches = 0;
+ local_irq_lock_init(slab_lock);
+
for (i = 0; i < NUM_INIT_LISTS; i++) {
kmem_list3_init(&initkmem_list3[i]);
if (i < MAX_NUMNODES)
@@ -2493,7 +2525,7 @@ EXPORT_SYMBOL(kmem_cache_create);
#if DEBUG
static void check_irq_off(void)
{
- BUG_ON(!irqs_disabled());
+ BUG_ON_NONRT(!irqs_disabled());
}
static void check_irq_on(void)
@@ -2528,13 +2560,12 @@ static void drain_array(struct kmem_cach
struct array_cache *ac,
int force, int node);
-static void do_drain(void *arg)
+static void __do_drain(void *arg, unsigned int cpu)
{
struct kmem_cache *cachep = arg;
struct array_cache *ac;
- int node = numa_mem_id();
+ int node = cpu_to_mem(cpu);
- check_irq_off();
ac = cpu_cache_get(cachep);
spin_lock(&cachep->nodelists[node]->list_lock);
free_block(cachep, ac->entry, ac->avail, node);
@@ -2542,12 +2573,24 @@ static void do_drain(void *arg)
ac->avail = 0;
}
+#ifndef CONFIG_PREEMPT_RT_BASE
+static void do_drain(void *arg)
+{
+ __do_drain(arg, smp_processor_id());
+}
+#else
+static void do_drain(void *arg, int this_cpu)
+{
+ __do_drain(arg, this_cpu);
+}
+#endif
+
static void drain_cpu_caches(struct kmem_cache *cachep)
{
struct kmem_list3 *l3;
int node;
- on_each_cpu(do_drain, cachep, 1);
+ slab_on_each_cpu(do_drain, cachep);
check_irq_on();
for_each_online_node(node) {
l3 = cachep->nodelists[node];
@@ -2578,10 +2621,10 @@ static int drain_freelist(struct kmem_ca
nr_freed = 0;
while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
p = l3->slabs_free.prev;
if (p == &l3->slabs_free) {
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
goto out;
}
@@ -2595,7 +2638,7 @@ static int drain_freelist(struct kmem_ca
* to the cache.
*/
l3->free_objects -= cache->num;
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
slab_destroy(cache, slabp);
nr_freed++;
}
@@ -2890,7 +2933,7 @@ static int cache_grow(struct kmem_cache
offset *= cachep->colour_off;
if (local_flags & __GFP_WAIT)
- local_irq_enable();
+ local_unlock_irq(slab_lock);
/*
* The test for missing atomic flag is performed here, rather than
@@ -2920,7 +2963,7 @@ static int cache_grow(struct kmem_cache
cache_init_objs(cachep, slabp);
if (local_flags & __GFP_WAIT)
- local_irq_disable();
+ local_lock_irq(slab_lock);
check_irq_off();
spin_lock(&l3->list_lock);
@@ -2934,7 +2977,7 @@ opps1:
kmem_freepages(cachep, objp);
failed:
if (local_flags & __GFP_WAIT)
- local_irq_disable();
+ local_lock_irq(slab_lock);
return 0;
}
@@ -3326,11 +3369,11 @@ retry:
* set and go into memory reserves if necessary.
*/
if (local_flags & __GFP_WAIT)
- local_irq_enable();
+ local_unlock_irq(slab_lock);
kmem_flagcheck(cache, flags);
obj = kmem_getpages(cache, local_flags, numa_mem_id());
if (local_flags & __GFP_WAIT)
- local_irq_disable();
+ local_lock_irq(slab_lock);
if (obj) {
/*
* Insert into the appropriate per node queues
@@ -3446,7 +3489,7 @@ __cache_alloc_node(struct kmem_cache *ca
return NULL;
cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
+ local_lock_irqsave(slab_lock, save_flags);
if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
@@ -3471,7 +3514,7 @@ __cache_alloc_node(struct kmem_cache *ca
/* ___cache_alloc_node can fall back to other nodes */
ptr = ____cache_alloc_node(cachep, flags, nodeid);
out:
- local_irq_restore(save_flags);
+ local_unlock_irqrestore(slab_lock, save_flags);
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
flags);
@@ -3531,9 +3574,9 @@ __cache_alloc(struct kmem_cache *cachep,
return NULL;
cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
+ local_lock_irqsave(slab_lock, save_flags);
objp = __do_cache_alloc(cachep, flags);
- local_irq_restore(save_flags);
+ local_unlock_irqrestore(slab_lock, save_flags);
objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
flags);
@@ -3847,9 +3890,9 @@ void kmem_cache_free(struct kmem_cache *
debug_check_no_locks_freed(objp, obj_size(cachep));
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(objp, obj_size(cachep));
- local_irq_save(flags);
+ local_lock_irqsave(slab_lock, flags);
__cache_free(cachep, objp, __builtin_return_address(0));
- local_irq_restore(flags);
+ local_unlock_irqrestore(slab_lock, flags);
trace_kmem_cache_free(_RET_IP_, objp);
}
@@ -3877,9 +3920,9 @@ void kfree(const void *objp)
c = virt_to_cache(objp);
debug_check_no_locks_freed(objp, obj_size(c));
debug_check_no_obj_freed(objp, obj_size(c));
- local_irq_save(flags);
+ local_lock_irqsave(slab_lock, flags);
__cache_free(c, (void *)objp, __builtin_return_address(0));
- local_irq_restore(flags);
+ local_unlock_irqrestore(slab_lock, flags);
}
EXPORT_SYMBOL(kfree);
@@ -3922,7 +3965,7 @@ static int alloc_kmemlist(struct kmem_ca
if (l3) {
struct array_cache *shared = l3->shared;
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
if (shared)
free_block(cachep, shared->entry,
@@ -3935,7 +3978,7 @@ static int alloc_kmemlist(struct kmem_ca
}
l3->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
kfree(shared);
free_alien_cache(new_alien);
continue;
@@ -3982,17 +4025,28 @@ struct ccupdate_struct {
struct array_cache *new[0];
};
-static void do_ccupdate_local(void *info)
+static void __do_ccupdate_local(void *info, int cpu)
{
struct ccupdate_struct *new = info;
struct array_cache *old;
- check_irq_off();
- old = cpu_cache_get(new->cachep);
+ old = cpu_cache_get_on_cpu(new->cachep, cpu);
+
+ new->cachep->array[cpu] = new->new[cpu];
+ new->new[cpu] = old;
+}
- new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
- new->new[smp_processor_id()] = old;
+#ifndef CONFIG_PREEMPT_RT_BASE
+static void do_ccupdate_local(void *info)
+{
+ __do_ccupdate_local(info, smp_processor_id());
}
+#else
+static void do_ccupdate_local(void *info, int cpu)
+{
+ __do_ccupdate_local(info, cpu);
+}
+#endif
/* Always called with the cache_chain_mutex held */
static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
@@ -4018,7 +4072,7 @@ static int do_tune_cpucache(struct kmem_
}
new->cachep = cachep;
- on_each_cpu(do_ccupdate_local, (void *)new, 1);
+ slab_on_each_cpu(do_ccupdate_local, (void *)new);
check_irq_on();
cachep->batchcount = batchcount;
@@ -4029,9 +4083,11 @@ static int do_tune_cpucache(struct kmem_
struct array_cache *ccold = new->new[i];
if (!ccold)
continue;
- spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ local_spin_lock_irq(slab_lock,
+ &cachep->nodelists[cpu_to_mem(i)]->list_lock);
free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ local_spin_unlock_irq(slab_lock,
+ &cachep->nodelists[cpu_to_mem(i)]->list_lock);
kfree(ccold);
}
kfree(new);
@@ -4107,7 +4163,7 @@ static void drain_array(struct kmem_cach
if (ac->touched && !force) {
ac->touched = 0;
} else {
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
if (ac->avail) {
tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail)
@@ -4117,7 +4173,7 @@ static void drain_array(struct kmem_cach
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail);
}
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
}
}
@@ -4256,7 +4312,7 @@ static int s_show(struct seq_file *m, vo
continue;
check_irq_on();
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
list_for_each_entry(slabp, &l3->slabs_full, list) {
if (slabp->inuse != cachep->num && !error)
@@ -4281,7 +4337,7 @@ static int s_show(struct seq_file *m, vo
if (l3->shared)
shared_avail += l3->shared->avail;
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
}
num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
@@ -4510,13 +4566,13 @@ static int leaks_show(struct seq_file *m
continue;
check_irq_on();
- spin_lock_irq(&l3->list_lock);
+ local_spin_lock_irq(slab_lock, &l3->list_lock);
list_for_each_entry(slabp, &l3->slabs_full, list)
handle_slab(n, cachep, slabp);
list_for_each_entry(slabp, &l3->slabs_partial, list)
handle_slab(n, cachep, slabp);
- spin_unlock_irq(&l3->list_lock);
+ local_spin_unlock_irq(slab_lock, &l3->list_lock);
}
name = cachep->name;
if (n[0] == n[1]) {

Some files were not shown because too many files have changed in this diff Show More