diff --git a/debian/changelog b/debian/changelog index 7de7b900b..2569426fa 100644 --- a/debian/changelog +++ b/debian/changelog @@ -33,6 +33,9 @@ linux-2.6 (3.0.0-1) UNRELEASED; urgency=low [ Bastian Blank ] * [xen] Allow autoloading of backend drivers. + [ Uwe Kleine-König ] + * [amd64] Add rt featureset with 3.0-rt2 patch set + -- maximilian attems Tue, 05 Jul 2011 14:25:29 +0200 linux-2.6 (3.0.0~rc6-1~experimental.1) experimental; urgency=low diff --git a/debian/config/amd64/defines b/debian/config/amd64/defines index 48b1aea56..e69547196 100644 --- a/debian/config/amd64/defines +++ b/debian/config/amd64/defines @@ -1,4 +1,6 @@ [base] +featuresets: + rt flavours: amd64 kernel-arch: x86 diff --git a/debian/config/amd64/rt/defines b/debian/config/amd64/rt/defines new file mode 100644 index 000000000..090dc41bf --- /dev/null +++ b/debian/config/amd64/rt/defines @@ -0,0 +1,3 @@ +[base] +flavours: + amd64 diff --git a/debian/config/defines b/debian/config/defines index d6fa20417..32b08d442 100644 --- a/debian/config/defines +++ b/debian/config/defines @@ -22,6 +22,7 @@ arches: compiler: gcc-4.5 featuresets: none + rt [description] part-long-up: This kernel is not suitable for SMP (multi-processor, @@ -47,3 +48,5 @@ elilo: elilo (>= 3.12-3.1~) lilo: lilo (>= 22.8-8.2~) s390-tools: s390-tools (>= 1.8.3-2~) +[featureset-rt] +enabled: true diff --git a/debian/config/featureset-rt/config b/debian/config/featureset-rt/config new file mode 100644 index 000000000..8136f167e --- /dev/null +++ b/debian/config/featureset-rt/config @@ -0,0 +1,5 @@ +# CONFIG_PREEMPT_VOLUNTARY is not set +CONFIG_PREEMPT_RT_FULL=y +CONFIG_SCHED_TRACER=y +CONFIG_MISSED_TIMER_OFFSETS_HIST=y +CONFIG_WAKEUP_LATENCY_HIST=y diff --git a/debian/config/featureset-rt/defines b/debian/config/featureset-rt/defines new file mode 100644 index 000000000..44e2f2b03 --- /dev/null +++ b/debian/config/featureset-rt/defines @@ -0,0 +1,7 @@ +[abi] +ignore-changes: * + +[description] +part-long-rt: This kernel includes the PREEMPT_RT realtime patch set. +part-short-rt: PREEMPT_RT +parts: rt diff --git a/debian/patches/features/all/rt/gen-patch b/debian/patches/features/all/rt/gen-patch new file mode 100644 index 000000000..b43a96749 --- /dev/null +++ b/debian/patches/features/all/rt/gen-patch @@ -0,0 +1,17 @@ +#! /bin/sh + +set -e + +version="$1" + +if [ -z "$version" ]; then + echo >&2 "Usage: $0 " + exit 2 +fi + +name="patch-$version.patch" +dir="debian/patches/features/all/rt" +wget -O "$dir/$name.bz2" "http://www.kernel.org/pub/linux/kernel/projects/rt/patch-$version.patch.bz2" +wget -O "$dir/$name.bz2.sign" "http://www.kernel.org/pub/linux/kernel/projects/rt/patch-$version.patch.bz2.sign" +( cd "$dir" && gpg --verify "$name.bz2.sign" ) +bzcat "$dir/$name.bz2" | filterdiff -x linux-2.6/localversion-rt > "$dir/$name" diff --git a/debian/patches/features/all/rt/patch-3.0-rt2.patch b/debian/patches/features/all/rt/patch-3.0-rt2.patch new file mode 100644 index 000000000..8bb2f97e7 --- /dev/null +++ b/debian/patches/features/all/rt/patch-3.0-rt2.patch @@ -0,0 +1,23493 @@ +Index: linux-2.6/drivers/rtc/interface.c +=================================================================== +--- linux-2.6.orig/drivers/rtc/interface.c ++++ linux-2.6/drivers/rtc/interface.c +@@ -636,6 +636,29 @@ void rtc_irq_unregister(struct rtc_devic + } + EXPORT_SYMBOL_GPL(rtc_irq_unregister); + ++static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled) ++{ ++ /* ++ * We unconditionally cancel the timer here, because otherwise ++ * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); ++ * when we manage to start the timer before the callback ++ * returns HRTIMER_RESTART. ++ * ++ * We cannot use hrtimer_cancel() here as a running callback ++ * could be blocked on rtc->irq_task_lock and hrtimer_cancel() ++ * would spin forever. ++ */ ++ if (hrtimer_try_to_cancel(&rtc->pie_timer) < 0) ++ return -1; ++ ++ if (enabled) { ++ ktime_t period = ktime_set(0, NSEC_PER_SEC / rtc->irq_freq); ++ ++ hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL); ++ } ++ return 0; ++} ++ + /** + * rtc_irq_set_state - enable/disable 2^N Hz periodic IRQs + * @rtc: the rtc device +@@ -651,21 +674,21 @@ int rtc_irq_set_state(struct rtc_device + int err = 0; + unsigned long flags; + ++retry: + spin_lock_irqsave(&rtc->irq_task_lock, flags); + if (rtc->irq_task != NULL && task == NULL) + err = -EBUSY; + if (rtc->irq_task != task) + err = -EACCES; +- +- if (enabled) { +- ktime_t period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq); +- hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL); +- } else { +- hrtimer_cancel(&rtc->pie_timer); ++ if (!err) { ++ if (rtc_update_hrtimer(rtc, enabled) < 0) { ++ spin_unlock_irqrestore(&rtc->irq_task_lock, flags); ++ cpu_relax(); ++ goto retry; ++ } ++ rtc->pie_enabled = enabled; + } +- rtc->pie_enabled = enabled; + spin_unlock_irqrestore(&rtc->irq_task_lock, flags); +- + return err; + } + EXPORT_SYMBOL_GPL(rtc_irq_set_state); +@@ -685,22 +708,20 @@ int rtc_irq_set_freq(struct rtc_device * + int err = 0; + unsigned long flags; + +- if (freq <= 0) ++ if (freq <= 0 || freq > 5000) + return -EINVAL; +- ++retry: + spin_lock_irqsave(&rtc->irq_task_lock, flags); + if (rtc->irq_task != NULL && task == NULL) + err = -EBUSY; + if (rtc->irq_task != task) + err = -EACCES; +- if (err == 0) { ++ if (!err) { + rtc->irq_freq = freq; +- if (rtc->pie_enabled) { +- ktime_t period; +- hrtimer_cancel(&rtc->pie_timer); +- period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq); +- hrtimer_start(&rtc->pie_timer, period, +- HRTIMER_MODE_REL); ++ if (rtc->pie_enabled && rtc_update_hrtimer(rtc, 1) < 0) { ++ spin_unlock_irqrestore(&rtc->irq_task_lock, flags); ++ cpu_relax(); ++ goto retry; + } + } + spin_unlock_irqrestore(&rtc->irq_task_lock, flags); +Index: linux-2.6/kernel/trace/ftrace.c +=================================================================== +--- linux-2.6.orig/kernel/trace/ftrace.c ++++ linux-2.6/kernel/trace/ftrace.c +@@ -1182,8 +1182,14 @@ alloc_and_copy_ftrace_hash(int size_bits + return NULL; + } + ++static void ++ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash); ++static void ++ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash); ++ + static int +-ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src) ++ftrace_hash_move(struct ftrace_ops *ops, int enable, ++ struct ftrace_hash **dst, struct ftrace_hash *src) + { + struct ftrace_func_entry *entry; + struct hlist_node *tp, *tn; +@@ -1193,9 +1199,16 @@ ftrace_hash_move(struct ftrace_hash **ds + unsigned long key; + int size = src->count; + int bits = 0; ++ int ret; + int i; + + /* ++ * Remove the current set, update the hash and add ++ * them back. ++ */ ++ ftrace_hash_rec_disable(ops, enable); ++ ++ /* + * If the new source is empty, just free dst and assign it + * the empty_hash. + */ +@@ -1215,9 +1228,10 @@ ftrace_hash_move(struct ftrace_hash **ds + if (bits > FTRACE_HASH_MAX_BITS) + bits = FTRACE_HASH_MAX_BITS; + ++ ret = -ENOMEM; + new_hash = alloc_ftrace_hash(bits); + if (!new_hash) +- return -ENOMEM; ++ goto out; + + size = 1 << src->size_bits; + for (i = 0; i < size; i++) { +@@ -1236,7 +1250,16 @@ ftrace_hash_move(struct ftrace_hash **ds + rcu_assign_pointer(*dst, new_hash); + free_ftrace_hash_rcu(old_hash); + +- return 0; ++ ret = 0; ++ out: ++ /* ++ * Enable regardless of ret: ++ * On success, we enable the new hash. ++ * On failure, we re-enable the original hash. ++ */ ++ ftrace_hash_rec_enable(ops, enable); ++ ++ return ret; + } + + /* +@@ -2857,7 +2880,7 @@ ftrace_set_regex(struct ftrace_ops *ops, + ftrace_match_records(hash, buf, len); + + mutex_lock(&ftrace_lock); +- ret = ftrace_hash_move(orig_hash, hash); ++ ret = ftrace_hash_move(ops, enable, orig_hash, hash); + mutex_unlock(&ftrace_lock); + + mutex_unlock(&ftrace_regex_lock); +@@ -3040,18 +3063,12 @@ ftrace_regex_release(struct inode *inode + orig_hash = &iter->ops->notrace_hash; + + mutex_lock(&ftrace_lock); +- /* +- * Remove the current set, update the hash and add +- * them back. +- */ +- ftrace_hash_rec_disable(iter->ops, filter_hash); +- ret = ftrace_hash_move(orig_hash, iter->hash); +- if (!ret) { +- ftrace_hash_rec_enable(iter->ops, filter_hash); +- if (iter->ops->flags & FTRACE_OPS_FL_ENABLED +- && ftrace_enabled) +- ftrace_run_update_code(FTRACE_ENABLE_CALLS); +- } ++ ret = ftrace_hash_move(iter->ops, filter_hash, ++ orig_hash, iter->hash); ++ if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED) ++ && ftrace_enabled) ++ ftrace_run_update_code(FTRACE_ENABLE_CALLS); ++ + mutex_unlock(&ftrace_lock); + } + free_ftrace_hash(iter->hash); +Index: linux-2.6/drivers/block/floppy.c +=================================================================== +--- linux-2.6.orig/drivers/block/floppy.c ++++ linux-2.6/drivers/block/floppy.c +@@ -4250,7 +4250,7 @@ static int __init floppy_init(void) + use_virtual_dma = can_use_virtual_dma & 1; + fdc_state[0].address = FDC1; + if (fdc_state[0].address == -1) { +- del_timer(&fd_timeout); ++ del_timer_sync(&fd_timeout); + err = -ENODEV; + goto out_unreg_region; + } +@@ -4261,7 +4261,7 @@ static int __init floppy_init(void) + fdc = 0; /* reset fdc in case of unexpected interrupt */ + err = floppy_grab_irq_and_dma(); + if (err) { +- del_timer(&fd_timeout); ++ del_timer_sync(&fd_timeout); + err = -EBUSY; + goto out_unreg_region; + } +@@ -4318,7 +4318,7 @@ static int __init floppy_init(void) + user_reset_fdc(-1, FD_RESET_ALWAYS, false); + } + fdc = 0; +- del_timer(&fd_timeout); ++ del_timer_sync(&fd_timeout); + current_drive = 0; + initialized = true; + if (have_no_fdc) { +@@ -4368,7 +4368,7 @@ out_unreg_blkdev: + unregister_blkdev(FLOPPY_MAJOR, "fd"); + out_put_disk: + while (dr--) { +- del_timer(&motor_off_timer[dr]); ++ del_timer_sync(&motor_off_timer[dr]); + if (disks[dr]->queue) + blk_cleanup_queue(disks[dr]->queue); + put_disk(disks[dr]); +Index: linux-2.6/drivers/gpu/drm/drm_irq.c +=================================================================== +--- linux-2.6.orig/drivers/gpu/drm/drm_irq.c ++++ linux-2.6/drivers/gpu/drm/drm_irq.c +@@ -109,10 +109,7 @@ static void vblank_disable_and_save(stru + /* Prevent vblank irq processing while disabling vblank irqs, + * so no updates of timestamps or count can happen after we've + * disabled. Needed to prevent races in case of delayed irq's. +- * Disable preemption, so vblank_time_lock is held as short as +- * possible, even under a kernel with PREEMPT_RT patches. + */ +- preempt_disable(); + spin_lock_irqsave(&dev->vblank_time_lock, irqflags); + + dev->driver->disable_vblank(dev, crtc); +@@ -163,7 +160,6 @@ static void vblank_disable_and_save(stru + clear_vblank_timestamps(dev, crtc); + + spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags); +- preempt_enable(); + } + + static void vblank_disable_fn(unsigned long arg) +@@ -875,10 +871,6 @@ int drm_vblank_get(struct drm_device *de + spin_lock_irqsave(&dev->vbl_lock, irqflags); + /* Going from 0->1 means we have to enable interrupts again */ + if (atomic_add_return(1, &dev->vblank_refcount[crtc]) == 1) { +- /* Disable preemption while holding vblank_time_lock. Do +- * it explicitely to guard against PREEMPT_RT kernel. +- */ +- preempt_disable(); + spin_lock_irqsave(&dev->vblank_time_lock, irqflags2); + if (!dev->vblank_enabled[crtc]) { + /* Enable vblank irqs under vblank_time_lock protection. +@@ -898,7 +890,6 @@ int drm_vblank_get(struct drm_device *de + } + } + spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags2); +- preempt_enable(); + } else { + if (!dev->vblank_enabled[crtc]) { + atomic_dec(&dev->vblank_refcount[crtc]); +Index: linux-2.6/arch/x86/kernel/kprobes.c +=================================================================== +--- linux-2.6.orig/arch/x86/kernel/kprobes.c ++++ linux-2.6/arch/x86/kernel/kprobes.c +@@ -475,7 +475,6 @@ static void __kprobes setup_singlestep(s + * stepping. + */ + regs->ip = (unsigned long)p->ainsn.insn; +- preempt_enable_no_resched(); + return; + } + #endif +Index: linux-2.6/drivers/ide/ide_platform.c +=================================================================== +--- linux-2.6.orig/drivers/ide/ide_platform.c ++++ linux-2.6/drivers/ide/ide_platform.c +@@ -95,7 +95,7 @@ static int __devinit plat_ide_probe(stru + plat_ide_setup_ports(&hw, base, alt_base, pdata, res_irq->start); + hw.dev = &pdev->dev; + +- d.irq_flags = res_irq->flags; ++ d.irq_flags = 0; + if (mmio) + d.host_flags |= IDE_HFLAG_MMIO; + +Index: linux-2.6/kernel/sched.c +=================================================================== +--- linux-2.6.orig/kernel/sched.c ++++ linux-2.6/kernel/sched.c +@@ -185,6 +185,7 @@ void init_rt_bandwidth(struct rt_bandwid + + hrtimer_init(&rt_b->rt_period_timer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); ++ rt_b->rt_period_timer.irqsafe = 1; + rt_b->rt_period_timer.function = sched_rt_period_timer; + } + +@@ -800,7 +801,11 @@ late_initcall(sched_init_debug); + * Number of tasks to iterate in a single balance run. + * Limited because this is done with IRQs disabled. + */ ++#ifndef CONFIG_PREEMPT_RT_FULL + const_debug unsigned int sysctl_sched_nr_migrate = 32; ++#else ++const_debug unsigned int sysctl_sched_nr_migrate = 8; ++#endif + + /* + * period over which we average the RT time consumption, measured +@@ -1136,6 +1141,7 @@ static void init_rq_hrtick(struct rq *rq + + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + rq->hrtick_timer.function = hrtick; ++ rq->hrtick_timer.irqsafe = 1; + } + #else /* CONFIG_SCHED_HRTICK */ + static inline void hrtick_clear(struct rq *rq) +@@ -2378,11 +2384,11 @@ static int select_fallback_rq(int cpu, s + + /* Look for allowed, online CPU in same node. */ + for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) +- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) ++ if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) + return dest_cpu; + + /* Any allowed, online CPU? */ +- dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); ++ dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask); + if (dest_cpu < nr_cpu_ids) + return dest_cpu; + +@@ -2419,7 +2425,7 @@ int select_task_rq(struct task_struct *p + * [ this allows ->select_task() to simply return task_cpu(p) and + * not worry about this generic constraint ] + */ +- if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || ++ if (unlikely(!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)) || + !cpu_online(cpu))) + cpu = select_fallback_rq(task_cpu(p), p); + +@@ -2477,10 +2483,6 @@ static void ttwu_activate(struct rq *rq, + { + activate_task(rq, p, en_flags); + p->on_rq = 1; +- +- /* if a worker is waking up, notify workqueue */ +- if (p->flags & PF_WQ_WORKER) +- wq_worker_waking_up(p, cpu_of(rq)); + } + + /* +@@ -2678,8 +2680,25 @@ try_to_wake_up(struct task_struct *p, un + + smp_wmb(); + raw_spin_lock_irqsave(&p->pi_lock, flags); +- if (!(p->state & state)) ++ if (!(p->state & state)) { ++ /* ++ * The task might be running due to a spinlock sleeper ++ * wakeup. Check the saved state and set it to running ++ * if the wakeup condition is true. ++ */ ++ if (!(wake_flags & WF_LOCK_SLEEPER)) { ++ if (p->saved_state & state) ++ p->saved_state = TASK_RUNNING; ++ } + goto out; ++ } ++ ++ /* ++ * If this is a regular wakeup, then we can unconditionally ++ * clear the saved state of a "lock sleeper". ++ */ ++ if (!(wake_flags & WF_LOCK_SLEEPER)) ++ p->saved_state = TASK_RUNNING; + + success = 1; /* we're going to change ->state */ + cpu = task_cpu(p); +@@ -2735,40 +2754,6 @@ out: + } + + /** +- * try_to_wake_up_local - try to wake up a local task with rq lock held +- * @p: the thread to be awakened +- * +- * Put @p on the run-queue if it's not already there. The caller must +- * ensure that this_rq() is locked, @p is bound to this_rq() and not +- * the current task. +- */ +-static void try_to_wake_up_local(struct task_struct *p) +-{ +- struct rq *rq = task_rq(p); +- +- BUG_ON(rq != this_rq()); +- BUG_ON(p == current); +- lockdep_assert_held(&rq->lock); +- +- if (!raw_spin_trylock(&p->pi_lock)) { +- raw_spin_unlock(&rq->lock); +- raw_spin_lock(&p->pi_lock); +- raw_spin_lock(&rq->lock); +- } +- +- if (!(p->state & TASK_NORMAL)) +- goto out; +- +- if (!p->on_rq) +- ttwu_activate(rq, p, ENQUEUE_WAKEUP); +- +- ttwu_do_wakeup(rq, p, 0); +- ttwu_stat(p, smp_processor_id(), 0); +-out: +- raw_spin_unlock(&p->pi_lock); +-} +- +-/** + * wake_up_process - Wake up a specific process + * @p: The process to be woken up. + * +@@ -2785,6 +2770,18 @@ int wake_up_process(struct task_struct * + } + EXPORT_SYMBOL(wake_up_process); + ++/** ++ * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" ++ * @p: The process to be woken up. ++ * ++ * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate ++ * the nature of the wakeup. ++ */ ++int wake_up_lock_sleeper(struct task_struct *p) ++{ ++ return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER); ++} ++ + int wake_up_state(struct task_struct *p, unsigned int state) + { + return try_to_wake_up(p, state, 0); +@@ -2825,7 +2822,7 @@ static void __sched_fork(struct task_str + void sched_fork(struct task_struct *p) + { + unsigned long flags; +- int cpu = get_cpu(); ++ int cpu; + + __sched_fork(p); + /* +@@ -2865,6 +2862,7 @@ void sched_fork(struct task_struct *p) + if (!rt_prio(p->prio)) + p->sched_class = &fair_sched_class; + ++ cpu = get_cpu(); + if (p->sched_class->task_fork) + p->sched_class->task_fork(p); + +@@ -2876,8 +2874,9 @@ void sched_fork(struct task_struct *p) + * Silence PROVE_RCU. + */ + raw_spin_lock_irqsave(&p->pi_lock, flags); +- set_task_cpu(p, cpu); ++ set_task_cpu(p, smp_processor_id()); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ put_cpu(); + + #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) + if (likely(sched_info_on())) +@@ -2893,8 +2892,6 @@ void sched_fork(struct task_struct *p) + #ifdef CONFIG_SMP + plist_node_init(&p->pushable_tasks, MAX_PRIO); + #endif +- +- put_cpu(); + } + + /* +@@ -3060,8 +3057,12 @@ static void finish_task_switch(struct rq + finish_lock_switch(rq, prev); + + fire_sched_in_preempt_notifiers(current); ++ /* ++ * We use mmdrop_delayed() here so we don't have to do the ++ * full __mmdrop() when we are the last user. ++ */ + if (mm) +- mmdrop(mm); ++ mmdrop_delayed(mm); + if (unlikely(prev_state == TASK_DEAD)) { + /* + * Remove function-return probe instances associated with this +@@ -4242,9 +4243,9 @@ pick_next_task(struct rq *rq) + } + + /* +- * schedule() is the main scheduler function. ++ * __schedule() is the main scheduler function. + */ +-asmlinkage void __sched schedule(void) ++static void __sched __schedule(void) + { + struct task_struct *prev, *next; + unsigned long *switch_count; +@@ -4272,29 +4273,6 @@ need_resched: + } else { + deactivate_task(rq, prev, DEQUEUE_SLEEP); + prev->on_rq = 0; +- +- /* +- * If a worker went to sleep, notify and ask workqueue +- * whether it wants to wake up a task to maintain +- * concurrency. +- */ +- if (prev->flags & PF_WQ_WORKER) { +- struct task_struct *to_wakeup; +- +- to_wakeup = wq_worker_sleeping(prev, cpu); +- if (to_wakeup) +- try_to_wake_up_local(to_wakeup); +- } +- +- /* +- * If we are going to sleep and we have plugged IO +- * queued, make sure to submit it to avoid deadlocks. +- */ +- if (blk_needs_flush_plug(prev)) { +- raw_spin_unlock(&rq->lock); +- blk_schedule_flush_plug(prev); +- raw_spin_lock(&rq->lock); +- } + } + switch_count = &prev->nvcsw; + } +@@ -4328,12 +4306,62 @@ need_resched: + + post_schedule(rq); + +- preempt_enable_no_resched(); ++ __preempt_enable_no_resched(); + if (need_resched()) + goto need_resched; + } ++ ++static inline void sched_submit_work(struct task_struct *tsk) ++{ ++ if (!tsk->state || tsk->pi_blocked_on) ++ return; ++ ++ /* ++ * If a worker went to sleep, notify and ask workqueue whether ++ * it wants to wake up a task to maintain concurrency. ++ */ ++ if (tsk->flags & PF_WQ_WORKER) ++ wq_worker_sleeping(tsk); ++ ++ /* ++ * If we are going to sleep and we have plugged IO queued, ++ * make sure to submit it to avoid deadlocks. ++ */ ++ if (blk_needs_flush_plug(tsk)) ++ blk_schedule_flush_plug(tsk); ++} ++ ++static inline void sched_update_worker(struct task_struct *tsk) ++{ ++ if (tsk->pi_blocked_on) ++ return; ++ ++ if (tsk->flags & PF_WQ_WORKER) ++ wq_worker_running(tsk); ++} ++ ++asmlinkage void schedule(void) ++{ ++ struct task_struct *tsk = current; ++ ++ sched_submit_work(tsk); ++ __schedule(); ++ sched_update_worker(tsk); ++} + EXPORT_SYMBOL(schedule); + ++/** ++ * schedule_preempt_disabled - called with preemption disabled ++ * ++ * Returns with preemption disabled. Note: preempt_count must be 1 ++ */ ++void __sched schedule_preempt_disabled(void) ++{ ++ __preempt_enable_no_resched(); ++ schedule(); ++ preempt_disable(); ++} ++ + #ifdef CONFIG_MUTEX_SPIN_ON_OWNER + + static inline bool owner_running(struct mutex *lock, struct task_struct *owner) +@@ -4405,7 +4433,7 @@ asmlinkage void __sched notrace preempt_ + + do { + add_preempt_count_notrace(PREEMPT_ACTIVE); +- schedule(); ++ __schedule(); + sub_preempt_count_notrace(PREEMPT_ACTIVE); + + /* +@@ -4433,7 +4461,7 @@ asmlinkage void __sched preempt_schedule + do { + add_preempt_count(PREEMPT_ACTIVE); + local_irq_enable(); +- schedule(); ++ __schedule(); + local_irq_disable(); + sub_preempt_count(PREEMPT_ACTIVE); + +@@ -4827,10 +4855,8 @@ long __sched sleep_on_timeout(wait_queue + } + EXPORT_SYMBOL(sleep_on_timeout); + +-#ifdef CONFIG_RT_MUTEXES +- + /* +- * rt_mutex_setprio - set the current priority of a task ++ * task_setprio - set the current priority of a task + * @p: task + * @prio: prio value (kernel-internal form) + * +@@ -4839,7 +4865,7 @@ EXPORT_SYMBOL(sleep_on_timeout); + * + * Used by the rt_mutex code to implement priority inheritance logic. + */ +-void rt_mutex_setprio(struct task_struct *p, int prio) ++void task_setprio(struct task_struct *p, int prio) + { + int oldprio, on_rq, running; + struct rq *rq; +@@ -4849,6 +4875,24 @@ void rt_mutex_setprio(struct task_struct + + rq = __task_rq_lock(p); + ++ /* ++ * Idle task boosting is a nono in general. There is one ++ * exception, when PREEMPT_RT and NOHZ is active: ++ * ++ * The idle task calls get_next_timer_interrupt() and holds ++ * the timer wheel base->lock on the CPU and another CPU wants ++ * to access the timer (probably to cancel it). We can safely ++ * ignore the boosting request, as the idle CPU runs this code ++ * with interrupts disabled and will complete the lock ++ * protected section without being interrupted. So there is no ++ * real need to boost. ++ */ ++ if (unlikely(p == rq->idle)) { ++ WARN_ON(p != rq->curr); ++ WARN_ON(p->pi_blocked_on); ++ goto out_unlock; ++ } ++ + trace_sched_pi_setprio(p, prio); + oldprio = p->prio; + prev_class = p->sched_class; +@@ -4872,11 +4916,10 @@ void rt_mutex_setprio(struct task_struct + enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); + + check_class_changed(rq, p, prev_class, oldprio); ++out_unlock: + __task_rq_unlock(rq); + } + +-#endif +- + void set_user_nice(struct task_struct *p, long nice) + { + int old_prio, delta, on_rq; +@@ -5543,7 +5586,7 @@ SYSCALL_DEFINE0(sched_yield) + __release(rq->lock); + spin_release(&rq->lock.dep_map, 1, _THIS_IP_); + do_raw_spin_unlock(&rq->lock); +- preempt_enable_no_resched(); ++ __preempt_enable_no_resched(); + + schedule(); + +@@ -5557,9 +5600,17 @@ static inline int should_resched(void) + + static void __cond_resched(void) + { +- add_preempt_count(PREEMPT_ACTIVE); +- schedule(); +- sub_preempt_count(PREEMPT_ACTIVE); ++ do { ++ add_preempt_count(PREEMPT_ACTIVE); ++ __schedule(); ++ sub_preempt_count(PREEMPT_ACTIVE); ++ /* ++ * Check again in case we missed a preemption ++ * opportunity between schedule and now. ++ */ ++ barrier(); ++ ++ } while (need_resched()); + } + + int __sched _cond_resched(void) +@@ -5600,6 +5651,7 @@ int __cond_resched_lock(spinlock_t *lock + } + EXPORT_SYMBOL(__cond_resched_lock); + ++#ifndef CONFIG_PREEMPT_RT_FULL + int __sched __cond_resched_softirq(void) + { + BUG_ON(!in_softirq()); +@@ -5613,6 +5665,7 @@ int __sched __cond_resched_softirq(void) + return 0; + } + EXPORT_SYMBOL(__cond_resched_softirq); ++#endif + + /** + * yield - yield the current processor to other threads. +@@ -5859,7 +5912,7 @@ void show_state_filter(unsigned long sta + printk(KERN_INFO + " task PC stack pid father\n"); + #endif +- read_lock(&tasklist_lock); ++ rcu_read_lock(); + do_each_thread(g, p) { + /* + * reset the NMI-timeout, listing all files on a slow +@@ -5875,7 +5928,7 @@ void show_state_filter(unsigned long sta + #ifdef CONFIG_SCHED_DEBUG + sysrq_sched_debug_show(); + #endif +- read_unlock(&tasklist_lock); ++ rcu_read_unlock(); + /* + * Only show locks if all tasks are dumped: + */ +@@ -5997,12 +6050,12 @@ static inline void sched_init_granularit + #ifdef CONFIG_SMP + void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) + { +- if (p->sched_class && p->sched_class->set_cpus_allowed) +- p->sched_class->set_cpus_allowed(p, new_mask); +- else { +- cpumask_copy(&p->cpus_allowed, new_mask); ++ if (!p->migrate_disable) { ++ if (p->sched_class && p->sched_class->set_cpus_allowed) ++ p->sched_class->set_cpus_allowed(p, new_mask); + p->rt.nr_cpus_allowed = cpumask_weight(new_mask); + } ++ cpumask_copy(&p->cpus_allowed, new_mask); + } + + /* +@@ -6053,7 +6106,7 @@ int set_cpus_allowed_ptr(struct task_str + do_set_cpus_allowed(p, new_mask); + + /* Can the task run on the task's current CPU? If so, we're done */ +- if (cpumask_test_cpu(task_cpu(p), new_mask)) ++ if (cpumask_test_cpu(task_cpu(p), new_mask) || p->migrate_disable) + goto out; + + dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); +@@ -6072,6 +6125,83 @@ out: + } + EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); + ++void migrate_disable(void) ++{ ++ struct task_struct *p = current; ++ const struct cpumask *mask; ++ unsigned long flags; ++ struct rq *rq; ++ ++ preempt_disable(); ++ if (p->migrate_disable) { ++ p->migrate_disable++; ++ preempt_enable(); ++ return; ++ } ++ ++ pin_current_cpu(); ++ if (unlikely(!scheduler_running)) { ++ p->migrate_disable = 1; ++ preempt_enable(); ++ return; ++ } ++ rq = task_rq_lock(p, &flags); ++ p->migrate_disable = 1; ++ mask = tsk_cpus_allowed(p); ++ ++ WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask)); ++ ++ if (!cpumask_equal(&p->cpus_allowed, mask)) { ++ if (p->sched_class->set_cpus_allowed) ++ p->sched_class->set_cpus_allowed(p, mask); ++ p->rt.nr_cpus_allowed = cpumask_weight(mask); ++ } ++ task_rq_unlock(rq, p, &flags); ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(migrate_disable); ++ ++void migrate_enable(void) ++{ ++ struct task_struct *p = current; ++ const struct cpumask *mask; ++ unsigned long flags; ++ struct rq *rq; ++ ++ WARN_ON_ONCE(p->migrate_disable <= 0); ++ ++ preempt_disable(); ++ if (p->migrate_disable > 1) { ++ p->migrate_disable--; ++ preempt_enable(); ++ return; ++ } ++ ++ if (unlikely(!scheduler_running)) { ++ p->migrate_disable = 0; ++ unpin_current_cpu(); ++ preempt_enable(); ++ return; ++ } ++ ++ rq = task_rq_lock(p, &flags); ++ p->migrate_disable = 0; ++ mask = tsk_cpus_allowed(p); ++ ++ WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask)); ++ ++ if (!cpumask_equal(&p->cpus_allowed, mask)) { ++ if (p->sched_class->set_cpus_allowed) ++ p->sched_class->set_cpus_allowed(p, mask); ++ p->rt.nr_cpus_allowed = cpumask_weight(mask); ++ } ++ ++ task_rq_unlock(rq, p, &flags); ++ unpin_current_cpu(); ++ preempt_enable(); ++} ++EXPORT_SYMBOL_GPL(migrate_enable); ++ + /* + * Move (not current) task off this cpu, onto dest cpu. We're doing + * this because either it can't run here any more (set_cpus_allowed() +@@ -6100,7 +6230,7 @@ static int __migrate_task(struct task_st + if (task_cpu(p) != src_cpu) + goto done; + /* Affinity changed (again). */ +- if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) ++ if (!cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) + goto fail; + + /* +@@ -6142,6 +6272,8 @@ static int migration_cpu_stop(void *data + + #ifdef CONFIG_HOTPLUG_CPU + ++static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); ++ + /* + * Ensures that the idle task is using init_mm right before its cpu goes + * offline. +@@ -6154,7 +6286,12 @@ void idle_task_exit(void) + + if (mm != &init_mm) + switch_mm(mm, &init_mm, current); +- mmdrop(mm); ++ ++ /* ++ * Defer the cleanup to an alive cpu. On RT we can neither ++ * call mmdrop() nor mmdrop_delayed() from here. ++ */ ++ per_cpu(idle_last_mm, smp_processor_id()) = mm; + } + + /* +@@ -6472,6 +6609,12 @@ migration_call(struct notifier_block *nf + migrate_nr_uninterruptible(rq); + calc_global_load_remove(rq); + break; ++ case CPU_DEAD: ++ if (per_cpu(idle_last_mm, cpu)) { ++ mmdrop(per_cpu(idle_last_mm, cpu)); ++ per_cpu(idle_last_mm, cpu) = NULL; ++ } ++ break; + #endif + } + +@@ -8188,7 +8331,8 @@ void __init sched_init(void) + #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP + static inline int preempt_count_equals(int preempt_offset) + { +- int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); ++ int nested = (preempt_count() & ~PREEMPT_ACTIVE) + ++ sched_rcu_preempt_depth(); + + return (nested == preempt_offset); + } +Index: linux-2.6/block/blk-core.c +=================================================================== +--- linux-2.6.orig/block/blk-core.c ++++ linux-2.6/block/blk-core.c +@@ -236,7 +236,7 @@ EXPORT_SYMBOL(blk_delay_queue); + **/ + void blk_start_queue(struct request_queue *q) + { +- WARN_ON(!irqs_disabled()); ++ WARN_ON_NONRT(!irqs_disabled()); + + queue_flag_clear(QUEUE_FLAG_STOPPED, q); + __blk_run_queue(q); +@@ -301,7 +301,11 @@ void __blk_run_queue(struct request_queu + { + if (unlikely(blk_queue_stopped(q))) + return; +- ++ /* ++ * q->request_fn() can drop q->queue_lock and reenable ++ * interrupts, but must return with q->queue_lock held and ++ * interrupts disabled. ++ */ + q->request_fn(q); + } + EXPORT_SYMBOL(__blk_run_queue); +@@ -2667,11 +2671,11 @@ static void queue_unplugged(struct reque + * this lock). + */ + if (from_schedule) { +- spin_unlock(q->queue_lock); ++ spin_unlock_irq(q->queue_lock); + blk_run_queue_async(q); + } else { + __blk_run_queue(q); +- spin_unlock(q->queue_lock); ++ spin_unlock_irq(q->queue_lock); + } + + } +@@ -2697,7 +2701,6 @@ static void flush_plug_callbacks(struct + void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) + { + struct request_queue *q; +- unsigned long flags; + struct request *rq; + LIST_HEAD(list); + unsigned int depth; +@@ -2718,11 +2721,6 @@ void blk_flush_plug_list(struct blk_plug + q = NULL; + depth = 0; + +- /* +- * Save and disable interrupts here, to avoid doing it for every +- * queue lock we have to take. +- */ +- local_irq_save(flags); + while (!list_empty(&list)) { + rq = list_entry_rq(list.next); + list_del_init(&rq->queuelist); +@@ -2735,7 +2733,7 @@ void blk_flush_plug_list(struct blk_plug + queue_unplugged(q, depth, from_schedule); + q = rq->q; + depth = 0; +- spin_lock(q->queue_lock); ++ spin_lock_irq(q->queue_lock); + } + /* + * rq is already accounted, so use raw insert +@@ -2753,8 +2751,6 @@ void blk_flush_plug_list(struct blk_plug + */ + if (q) + queue_unplugged(q, depth, from_schedule); +- +- local_irq_restore(flags); + } + + void blk_finish_plug(struct blk_plug *plug) +Index: linux-2.6/kernel/workqueue.c +=================================================================== +--- linux-2.6.orig/kernel/workqueue.c ++++ linux-2.6/kernel/workqueue.c +@@ -137,6 +137,7 @@ struct worker { + unsigned int flags; /* X: flags */ + int id; /* I: worker id */ + struct work_struct rebind_work; /* L: rebind worker to cpu */ ++ int sleeping; /* None */ + }; + + /* +@@ -657,66 +658,58 @@ static void wake_up_worker(struct global + } + + /** +- * wq_worker_waking_up - a worker is waking up +- * @task: task waking up +- * @cpu: CPU @task is waking up to ++ * wq_worker_running - a worker is running again ++ * @task: task returning from sleep + * +- * This function is called during try_to_wake_up() when a worker is +- * being awoken. +- * +- * CONTEXT: +- * spin_lock_irq(rq->lock) ++ * This function is called when a worker returns from schedule() + */ +-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) ++void wq_worker_running(struct task_struct *task) + { + struct worker *worker = kthread_data(task); + ++ if (!worker->sleeping) ++ return; + if (!(worker->flags & WORKER_NOT_RUNNING)) +- atomic_inc(get_gcwq_nr_running(cpu)); ++ atomic_inc(get_gcwq_nr_running(smp_processor_id())); ++ worker->sleeping = 0; + } + + /** + * wq_worker_sleeping - a worker is going to sleep + * @task: task going to sleep +- * @cpu: CPU in question, must be the current CPU number +- * +- * This function is called during schedule() when a busy worker is +- * going to sleep. Worker on the same cpu can be woken up by +- * returning pointer to its task. +- * +- * CONTEXT: +- * spin_lock_irq(rq->lock) + * +- * RETURNS: +- * Worker task on @cpu to wake up, %NULL if none. ++ * This function is called from schedule() when a busy worker is ++ * going to sleep. + */ +-struct task_struct *wq_worker_sleeping(struct task_struct *task, +- unsigned int cpu) ++void wq_worker_sleeping(struct task_struct *task) + { +- struct worker *worker = kthread_data(task), *to_wakeup = NULL; +- struct global_cwq *gcwq = get_gcwq(cpu); +- atomic_t *nr_running = get_gcwq_nr_running(cpu); ++ struct worker *worker = kthread_data(task); ++ struct global_cwq *gcwq; ++ int cpu; + + if (worker->flags & WORKER_NOT_RUNNING) +- return NULL; ++ return; ++ ++ if (WARN_ON_ONCE(worker->sleeping)) ++ return; + +- /* this can only happen on the local cpu */ +- BUG_ON(cpu != raw_smp_processor_id()); ++ worker->sleeping = 1; + ++ cpu = smp_processor_id(); ++ gcwq = get_gcwq(cpu); ++ spin_lock_irq(&gcwq->lock); + /* + * The counterpart of the following dec_and_test, implied mb, + * worklist not empty test sequence is in insert_work(). + * Please read comment there. +- * +- * NOT_RUNNING is clear. This means that trustee is not in +- * charge and we're running on the local cpu w/ rq lock held +- * and preemption disabled, which in turn means that none else +- * could be manipulating idle_list, so dereferencing idle_list +- * without gcwq lock is safe. +- */ +- if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist)) +- to_wakeup = first_worker(gcwq); +- return to_wakeup ? to_wakeup->task : NULL; ++ */ ++ if (atomic_dec_and_test(get_gcwq_nr_running(cpu)) && ++ !list_empty(&gcwq->worklist)) { ++ worker = first_worker(gcwq); ++ if (worker) ++ wake_up_process(worker->task); ++ } ++ spin_unlock_irq(&gcwq->lock); + } + + /** +@@ -1067,8 +1060,8 @@ int queue_work(struct workqueue_struct * + { + int ret; + +- ret = queue_work_on(get_cpu(), wq, work); +- put_cpu(); ++ ret = queue_work_on(get_cpu_light(), wq, work); ++ put_cpu_light(); + + return ret; + } +@@ -3484,6 +3477,25 @@ static int __devinit workqueue_cpu_callb + kthread_stop(new_trustee); + return NOTIFY_BAD; + } ++ break; ++ case CPU_POST_DEAD: ++ case CPU_UP_CANCELED: ++ case CPU_DOWN_FAILED: ++ case CPU_ONLINE: ++ break; ++ case CPU_DYING: ++ /* ++ * We access this lockless. We are on the dying CPU ++ * and called from stomp machine. ++ * ++ * Before this, the trustee and all workers except for ++ * the ones which are still executing works from ++ * before the last CPU down must be on the cpu. After ++ * this, they'll all be diasporas. ++ */ ++ gcwq->flags |= GCWQ_DISASSOCIATED; ++ default: ++ goto out; + } + + /* some are called w/ irq disabled, don't disturb irq status */ +@@ -3503,16 +3515,6 @@ static int __devinit workqueue_cpu_callb + gcwq->first_idle = new_worker; + break; + +- case CPU_DYING: +- /* +- * Before this, the trustee and all workers except for +- * the ones which are still executing works from +- * before the last CPU down must be on the cpu. After +- * this, they'll all be diasporas. +- */ +- gcwq->flags |= GCWQ_DISASSOCIATED; +- break; +- + case CPU_POST_DEAD: + gcwq->trustee_state = TRUSTEE_BUTCHER; + /* fall through */ +@@ -3546,6 +3548,7 @@ static int __devinit workqueue_cpu_callb + + spin_unlock_irqrestore(&gcwq->lock, flags); + ++out: + return notifier_from_errno(0); + } + +Index: linux-2.6/kernel/workqueue_sched.h +=================================================================== +--- linux-2.6.orig/kernel/workqueue_sched.h ++++ linux-2.6/kernel/workqueue_sched.h +@@ -4,6 +4,5 @@ + * Scheduler hooks for concurrency managed workqueue. Only to be + * included from sched.c and workqueue.c. + */ +-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu); +-struct task_struct *wq_worker_sleeping(struct task_struct *task, +- unsigned int cpu); ++void wq_worker_running(struct task_struct *task); ++void wq_worker_sleeping(struct task_struct *task); +Index: linux-2.6/arch/mips/sibyte/sb1250/irq.c +=================================================================== +--- linux-2.6.orig/arch/mips/sibyte/sb1250/irq.c ++++ linux-2.6/arch/mips/sibyte/sb1250/irq.c +@@ -178,7 +178,7 @@ static void ack_sb1250_irq(struct irq_da + + static struct irq_chip sb1250_irq_type = { + .name = "SB1250-IMR", +- .irq_mask_ack = ack_sb1250_irq, ++ .irq_mask = ack_sb1250_irq, + .irq_unmask = enable_sb1250_irq, + #ifdef CONFIG_SMP + .irq_set_affinity = sb1250_set_affinity +Index: linux-2.6/arch/mips/kernel/ftrace.c +=================================================================== +--- linux-2.6.orig/arch/mips/kernel/ftrace.c ++++ linux-2.6/arch/mips/kernel/ftrace.c +@@ -19,6 +19,26 @@ + + #include + ++#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) ++#define MCOUNT_OFFSET_INSNS 5 ++#else ++#define MCOUNT_OFFSET_INSNS 4 ++#endif ++ ++/* ++ * Check if the address is in kernel space ++ * ++ * Clone core_kernel_text() from kernel/extable.c, but doesn't call ++ * init_kernel_text() for Ftrace doesn't trace functions in init sections. ++ */ ++static inline int in_kernel_space(unsigned long ip) ++{ ++ if (ip >= (unsigned long)_stext && ++ ip <= (unsigned long)_etext) ++ return 1; ++ return 0; ++} ++ + #ifdef CONFIG_DYNAMIC_FTRACE + + #define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */ +@@ -54,20 +74,6 @@ static inline void ftrace_dyn_arch_init_ + #endif + } + +-/* +- * Check if the address is in kernel space +- * +- * Clone core_kernel_text() from kernel/extable.c, but doesn't call +- * init_kernel_text() for Ftrace doesn't trace functions in init sections. +- */ +-static inline int in_kernel_space(unsigned long ip) +-{ +- if (ip >= (unsigned long)_stext && +- ip <= (unsigned long)_etext) +- return 1; +- return 0; +-} +- + static int ftrace_modify_code(unsigned long ip, unsigned int new_code) + { + int faulted; +@@ -112,11 +118,6 @@ static int ftrace_modify_code(unsigned l + * 1: offset = 4 instructions + */ + +-#if defined(KBUILD_MCOUNT_RA_ADDRESS) && defined(CONFIG_32BIT) +-#define MCOUNT_OFFSET_INSNS 5 +-#else +-#define MCOUNT_OFFSET_INSNS 4 +-#endif + #define INSN_B_1F (0x10000000 | MCOUNT_OFFSET_INSNS) + + int ftrace_make_nop(struct module *mod, +Index: linux-2.6/arch/mips/loongson/fuloong-2e/irq.c +=================================================================== +--- linux-2.6.orig/arch/mips/loongson/fuloong-2e/irq.c ++++ linux-2.6/arch/mips/loongson/fuloong-2e/irq.c +@@ -42,6 +42,7 @@ asmlinkage void mach_irq_dispatch(unsign + static struct irqaction cascade_irqaction = { + .handler = no_action, + .name = "cascade", ++ .flags = IRQF_NO_THREAD, + }; + + void __init mach_init_irq(void) +Index: linux-2.6/arch/mips/loongson/lemote-2f/irq.c +=================================================================== +--- linux-2.6.orig/arch/mips/loongson/lemote-2f/irq.c ++++ linux-2.6/arch/mips/loongson/lemote-2f/irq.c +@@ -96,12 +96,13 @@ static irqreturn_t ip6_action(int cpl, v + struct irqaction ip6_irqaction = { + .handler = ip6_action, + .name = "cascade", +- .flags = IRQF_SHARED, ++ .flags = IRQF_SHARED | IRQF_NO_THREAD, + }; + + struct irqaction cascade_irqaction = { + .handler = no_action, + .name = "cascade", ++ .flags = IRQF_NO_THREAD, + }; + + void __init mach_init_irq(void) +Index: linux-2.6/arch/mips/ar7/irq.c +=================================================================== +--- linux-2.6.orig/arch/mips/ar7/irq.c ++++ linux-2.6/arch/mips/ar7/irq.c +@@ -98,7 +98,8 @@ static struct irq_chip ar7_sec_irq_type + + static struct irqaction ar7_cascade_action = { + .handler = no_action, +- .name = "AR7 cascade interrupt" ++ .name = "AR7 cascade interrupt", ++ .flags = IRQF_NO_THREAD, + }; + + static void __init ar7_irq_init(int base) +Index: linux-2.6/arch/mips/bcm63xx/irq.c +=================================================================== +--- linux-2.6.orig/arch/mips/bcm63xx/irq.c ++++ linux-2.6/arch/mips/bcm63xx/irq.c +@@ -222,6 +222,7 @@ static struct irq_chip bcm63xx_external_ + static struct irqaction cpu_ip2_cascade_action = { + .handler = no_action, + .name = "cascade_ip2", ++ .flags = IRQF_NO_THREAD, + }; + + void __init arch_init_irq(void) +Index: linux-2.6/arch/mips/cobalt/irq.c +=================================================================== +--- linux-2.6.orig/arch/mips/cobalt/irq.c ++++ linux-2.6/arch/mips/cobalt/irq.c +@@ -48,6 +48,7 @@ asmlinkage void plat_irq_dispatch(void) + static struct irqaction cascade = { + .handler = no_action, + .name = "cascade", ++ .flags = IRQF_NO_THREAD, + }; + + void __init arch_init_irq(void) +Index: linux-2.6/arch/mips/dec/setup.c +=================================================================== +--- linux-2.6.orig/arch/mips/dec/setup.c ++++ linux-2.6/arch/mips/dec/setup.c +@@ -101,20 +101,24 @@ int cpu_fpu_mask = DEC_CPU_IRQ_MASK(DEC_ + static struct irqaction ioirq = { + .handler = no_action, + .name = "cascade", ++ .flags = IRQF_NO_THREAD, + }; + static struct irqaction fpuirq = { + .handler = no_action, + .name = "fpu", ++ .flags = IRQF_NO_THREAD, + }; + + static struct irqaction busirq = { + .flags = IRQF_DISABLED, + .name = "bus error", ++ .flags = IRQF_NO_THREAD, + }; + + static struct irqaction haltirq = { + .handler = dec_intr_halt, + .name = "halt", ++ .flags = IRQF_NO_THREAD, + }; + + +Index: linux-2.6/arch/mips/emma/markeins/irq.c +=================================================================== +--- linux-2.6.orig/arch/mips/emma/markeins/irq.c ++++ linux-2.6/arch/mips/emma/markeins/irq.c +@@ -169,7 +169,7 @@ void emma2rh_gpio_irq_init(void) + + static struct irqaction irq_cascade = { + .handler = no_action, +- .flags = 0, ++ .flags = IRQF_NO_THREAD, + .name = "cascade", + .dev_id = NULL, + .next = NULL, +Index: linux-2.6/arch/mips/lasat/interrupt.c +=================================================================== +--- linux-2.6.orig/arch/mips/lasat/interrupt.c ++++ linux-2.6/arch/mips/lasat/interrupt.c +@@ -105,6 +105,7 @@ asmlinkage void plat_irq_dispatch(void) + static struct irqaction cascade = { + .handler = no_action, + .name = "cascade", ++ .flags = IRQF_NO_THREAD, + }; + + void __init arch_init_irq(void) +Index: linux-2.6/arch/mips/mti-malta/malta-int.c +=================================================================== +--- linux-2.6.orig/arch/mips/mti-malta/malta-int.c ++++ linux-2.6/arch/mips/mti-malta/malta-int.c +@@ -350,12 +350,14 @@ unsigned int plat_ipi_resched_int_xlate( + + static struct irqaction i8259irq = { + .handler = no_action, +- .name = "XT-PIC cascade" ++ .name = "XT-PIC cascade", ++ .flags = IRQF_NO_THREAD, + }; + + static struct irqaction corehi_irqaction = { + .handler = no_action, +- .name = "CoreHi" ++ .name = "CoreHi", ++ .flags = IRQF_NO_THREAD, + }; + + static msc_irqmap_t __initdata msc_irqmap[] = { +Index: linux-2.6/arch/mips/pmc-sierra/msp71xx/msp_irq.c +=================================================================== +--- linux-2.6.orig/arch/mips/pmc-sierra/msp71xx/msp_irq.c ++++ linux-2.6/arch/mips/pmc-sierra/msp71xx/msp_irq.c +@@ -109,11 +109,13 @@ asmlinkage void plat_irq_dispatch(struct + static struct irqaction cic_cascade_msp = { + .handler = no_action, + .name = "MSP CIC cascade" ++ .flags = IRQF_NO_THREAD, + }; + + static struct irqaction per_cascade_msp = { + .handler = no_action, + .name = "MSP PER cascade" ++ .flags = IRQF_NO_THREAD, + }; + + void __init arch_init_irq(void) +Index: linux-2.6/arch/mips/pnx8550/common/int.c +=================================================================== +--- linux-2.6.orig/arch/mips/pnx8550/common/int.c ++++ linux-2.6/arch/mips/pnx8550/common/int.c +@@ -167,7 +167,7 @@ static struct irq_chip level_irq_type = + + static struct irqaction gic_action = { + .handler = no_action, +- .flags = IRQF_DISABLED, ++ .flags = IRQF_DISABLED | IRQF_NO_THREAD, + .name = "GIC", + }; + +Index: linux-2.6/arch/mips/sgi-ip22/ip22-int.c +=================================================================== +--- linux-2.6.orig/arch/mips/sgi-ip22/ip22-int.c ++++ linux-2.6/arch/mips/sgi-ip22/ip22-int.c +@@ -155,32 +155,32 @@ static void __irq_entry indy_buserror_ir + + static struct irqaction local0_cascade = { + .handler = no_action, +- .flags = IRQF_DISABLED, ++ .flags = IRQF_DISABLED | IRQF_NO_THREAD, + .name = "local0 cascade", + }; + + static struct irqaction local1_cascade = { + .handler = no_action, +- .flags = IRQF_DISABLED, ++ .flags = IRQF_DISABLED | IRQF_NO_THREAD, + .name = "local1 cascade", + }; + + static struct irqaction buserr = { + .handler = no_action, +- .flags = IRQF_DISABLED, ++ .flags = IRQF_DISABLED | IRQF_NO_THREAD, + .name = "Bus Error", + }; + + static struct irqaction map0_cascade = { + .handler = no_action, +- .flags = IRQF_DISABLED, ++ .flags = IRQF_DISABLED | IRQF_NO_THREAD, + .name = "mapable0 cascade", + }; + + #ifdef USE_LIO3_IRQ + static struct irqaction map1_cascade = { + .handler = no_action, +- .flags = IRQF_DISABLED, ++ .flags = IRQF_DISABLED | IRQF_NO_THREAD, + .name = "mapable1 cascade", + }; + #define SGI_INTERRUPTS SGINT_END +Index: linux-2.6/arch/mips/sni/rm200.c +=================================================================== +--- linux-2.6.orig/arch/mips/sni/rm200.c ++++ linux-2.6/arch/mips/sni/rm200.c +@@ -359,6 +359,7 @@ void sni_rm200_init_8259A(void) + static struct irqaction sni_rm200_irq2 = { + .handler = no_action, + .name = "cascade", ++ .flags = IRQF_NO_THREAD, + }; + + static struct resource sni_rm200_pic1_resource = { +Index: linux-2.6/arch/mips/vr41xx/common/irq.c +=================================================================== +--- linux-2.6.orig/arch/mips/vr41xx/common/irq.c ++++ linux-2.6/arch/mips/vr41xx/common/irq.c +@@ -34,6 +34,7 @@ static irq_cascade_t irq_cascade[NR_IRQS + static struct irqaction cascade_irqaction = { + .handler = no_action, + .name = "cascade", ++ .flags = IRQF_NO_THREAD, + }; + + int cascade_irq(unsigned int irq, int (*get_irq)(unsigned int)) +Index: linux-2.6/arch/mips/Kconfig +=================================================================== +--- linux-2.6.orig/arch/mips/Kconfig ++++ linux-2.6/arch/mips/Kconfig +@@ -24,6 +24,7 @@ config MIPS + select GENERIC_IRQ_PROBE + select GENERIC_IRQ_SHOW + select HAVE_ARCH_JUMP_LABEL ++ select IRQ_FORCED_THREADING + + menu "Machine selection" + +@@ -2038,7 +2039,7 @@ config CPU_R4400_WORKAROUNDS + # + config HIGHMEM + bool "High Memory Support" +- depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM ++ depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !PREEMPT_RT_FULL + + config CPU_SUPPORTS_HIGHMEM + bool +Index: linux-2.6/arch/mips/kernel/traps.c +=================================================================== +--- linux-2.6.orig/arch/mips/kernel/traps.c ++++ linux-2.6/arch/mips/kernel/traps.c +@@ -364,7 +364,7 @@ static int regs_to_trapnr(struct pt_regs + return (regs->cp0_cause >> 2) & 0x1f; + } + +-static DEFINE_SPINLOCK(die_lock); ++static DEFINE_RAW_SPINLOCK(die_lock); + + void __noreturn die(const char *str, struct pt_regs *regs) + { +@@ -378,7 +378,7 @@ void __noreturn die(const char *str, str + sig = 0; + + console_verbose(); +- spin_lock_irq(&die_lock); ++ raw_spin_lock_irq(&die_lock); + bust_spinlocks(1); + #ifdef CONFIG_MIPS_MT_SMTC + mips_mt_regdump(dvpret); +@@ -387,7 +387,7 @@ void __noreturn die(const char *str, str + printk("%s[#%d]:\n", str, ++die_counter); + show_registers(regs); + add_taint(TAINT_DIE); +- spin_unlock_irq(&die_lock); ++ raw_spin_unlock_irq(&die_lock); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); +Index: linux-2.6/arch/mips/kernel/signal.c +=================================================================== +--- linux-2.6.orig/arch/mips/kernel/signal.c ++++ linux-2.6/arch/mips/kernel/signal.c +@@ -603,6 +603,9 @@ static void do_signal(struct pt_regs *re + if (!user_mode(regs)) + return; + ++ local_irq_enable(); ++ preempt_check_resched(); ++ + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else +Index: linux-2.6/kernel/watchdog.c +=================================================================== +--- linux-2.6.orig/kernel/watchdog.c ++++ linux-2.6/kernel/watchdog.c +@@ -208,6 +208,8 @@ static struct perf_event_attr wd_hw_attr + .disabled = 1, + }; + ++static DEFINE_RAW_SPINLOCK(watchdog_output_lock); ++ + /* Callback function for perf event subsystem */ + static void watchdog_overflow_callback(struct perf_event *event, int nmi, + struct perf_sample_data *data, +@@ -234,10 +236,19 @@ static void watchdog_overflow_callback(s + if (__this_cpu_read(hard_watchdog_warn) == true) + return; + +- if (hardlockup_panic) ++ /* ++ * If early-printk is enabled then make sure we do not ++ * lock up in printk() and kill console logging: ++ */ ++ printk_kill(); ++ ++ if (hardlockup_panic) { + panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu); +- else ++ } else { ++ raw_spin_lock(&watchdog_output_lock); + WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); ++ raw_spin_unlock(&watchdog_output_lock); ++ } + + __this_cpu_write(hard_watchdog_warn, true); + return; +@@ -320,7 +331,7 @@ static enum hrtimer_restart watchdog_tim + */ + static int watchdog(void *unused) + { +- static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; ++ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); + + sched_setscheduler(current, SCHED_FIFO, ¶m); +@@ -349,7 +360,8 @@ static int watchdog(void *unused) + set_current_state(TASK_INTERRUPTIBLE); + } + __set_current_state(TASK_RUNNING); +- ++ param.sched_priority = 0; ++ sched_setscheduler(current, SCHED_NORMAL, ¶m); + return 0; + } + +@@ -422,6 +434,7 @@ static void watchdog_prepare_cpu(int cpu + WARN_ON(per_cpu(softlockup_watchdog, cpu)); + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; ++ hrtimer->irqsafe = 1; + } + + static int watchdog_enable(int cpu) +Index: linux-2.6/kernel/rtmutex-debug.c +=================================================================== +--- linux-2.6.orig/kernel/rtmutex-debug.c ++++ linux-2.6/kernel/rtmutex-debug.c +@@ -29,61 +29,6 @@ + + #include "rtmutex_common.h" + +-# define TRACE_WARN_ON(x) WARN_ON(x) +-# define TRACE_BUG_ON(x) BUG_ON(x) +- +-# define TRACE_OFF() \ +-do { \ +- if (rt_trace_on) { \ +- rt_trace_on = 0; \ +- console_verbose(); \ +- if (raw_spin_is_locked(¤t->pi_lock)) \ +- raw_spin_unlock(¤t->pi_lock); \ +- } \ +-} while (0) +- +-# define TRACE_OFF_NOLOCK() \ +-do { \ +- if (rt_trace_on) { \ +- rt_trace_on = 0; \ +- console_verbose(); \ +- } \ +-} while (0) +- +-# define TRACE_BUG_LOCKED() \ +-do { \ +- TRACE_OFF(); \ +- BUG(); \ +-} while (0) +- +-# define TRACE_WARN_ON_LOCKED(c) \ +-do { \ +- if (unlikely(c)) { \ +- TRACE_OFF(); \ +- WARN_ON(1); \ +- } \ +-} while (0) +- +-# define TRACE_BUG_ON_LOCKED(c) \ +-do { \ +- if (unlikely(c)) \ +- TRACE_BUG_LOCKED(); \ +-} while (0) +- +-#ifdef CONFIG_SMP +-# define SMP_TRACE_BUG_ON_LOCKED(c) TRACE_BUG_ON_LOCKED(c) +-#else +-# define SMP_TRACE_BUG_ON_LOCKED(c) do { } while (0) +-#endif +- +-/* +- * deadlock detection flag. We turn it off when we detect +- * the first problem because we dont want to recurse back +- * into the tracing code when doing error printk or +- * executing a BUG(): +- */ +-static int rt_trace_on = 1; +- + static void printk_task(struct task_struct *p) + { + if (p) +@@ -111,8 +56,8 @@ static void printk_lock(struct rt_mutex + + void rt_mutex_debug_task_free(struct task_struct *task) + { +- WARN_ON(!plist_head_empty(&task->pi_waiters)); +- WARN_ON(task->pi_blocked_on); ++ DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters)); ++ DEBUG_LOCKS_WARN_ON(task->pi_blocked_on); + } + + /* +@@ -125,7 +70,7 @@ void debug_rt_mutex_deadlock(int detect, + { + struct task_struct *task; + +- if (!rt_trace_on || detect || !act_waiter) ++ if (!debug_locks || detect || !act_waiter) + return; + + task = rt_mutex_owner(act_waiter->lock); +@@ -139,7 +84,7 @@ void debug_rt_mutex_print_deadlock(struc + { + struct task_struct *task; + +- if (!waiter->deadlock_lock || !rt_trace_on) ++ if (!waiter->deadlock_lock || !debug_locks) + return; + + rcu_read_lock(); +@@ -149,7 +94,8 @@ void debug_rt_mutex_print_deadlock(struc + return; + } + +- TRACE_OFF_NOLOCK(); ++ if (!debug_locks_off()) ++ return; + + printk("\n============================================\n"); + printk( "[ BUG: circular locking deadlock detected! ]\n"); +@@ -180,7 +126,6 @@ void debug_rt_mutex_print_deadlock(struc + + printk("[ turning off deadlock detection." + "Please report this trace. ]\n\n"); +- local_irq_disable(); + } + + void debug_rt_mutex_lock(struct rt_mutex *lock) +@@ -189,7 +134,7 @@ void debug_rt_mutex_lock(struct rt_mutex + + void debug_rt_mutex_unlock(struct rt_mutex *lock) + { +- TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current); ++ DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current); + } + + void +@@ -199,7 +144,7 @@ debug_rt_mutex_proxy_lock(struct rt_mute + + void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock) + { +- TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock)); ++ DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock)); + } + + void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) +@@ -213,8 +158,8 @@ void debug_rt_mutex_init_waiter(struct r + void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) + { + put_pid(waiter->deadlock_task_pid); +- TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry)); +- TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); ++ DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry)); ++ DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry)); + memset(waiter, 0x22, sizeof(*waiter)); + } + +Index: linux-2.6/arch/arm/kernel/perf_event.c +=================================================================== +--- linux-2.6.orig/arch/arm/kernel/perf_event.c ++++ linux-2.6/arch/arm/kernel/perf_event.c +@@ -420,7 +420,7 @@ armpmu_reserve_hardware(void) + continue; + + err = request_irq(irq, handle_irq, +- IRQF_DISABLED | IRQF_NOBALANCING, ++ IRQF_DISABLED | IRQF_NOBALANCING | IRQF_NO_THREAD, + "armpmu", NULL); + if (err) { + pr_warning("unable to request IRQ%d for ARM perf " +Index: linux-2.6/arch/arm/Kconfig +=================================================================== +--- linux-2.6.orig/arch/arm/Kconfig ++++ linux-2.6/arch/arm/Kconfig +@@ -29,6 +29,7 @@ config ARM + select HAVE_GENERIC_HARDIRQS + select HAVE_SPARSE_IRQ + select GENERIC_IRQ_SHOW ++ select IRQ_FORCED_THREADING + help + The ARM series is a line of low-power-consumption RISC chip designs + licensed by ARM Ltd and targeted at embedded applications and +@@ -1510,7 +1511,7 @@ config HAVE_ARCH_PFN_VALID + + config HIGHMEM + bool "High Memory Support" +- depends on MMU ++ depends on MMU && !PREEMPT_RT_FULL + help + The address space of ARM processors is only 4 Gigabytes large + and it has to accommodate user address space, kernel address +Index: linux-2.6/arch/powerpc/platforms/85xx/mpc85xx_cds.c +=================================================================== +--- linux-2.6.orig/arch/powerpc/platforms/85xx/mpc85xx_cds.c ++++ linux-2.6/arch/powerpc/platforms/85xx/mpc85xx_cds.c +@@ -178,7 +178,7 @@ static irqreturn_t mpc85xx_8259_cascade_ + + static struct irqaction mpc85xxcds_8259_irqaction = { + .handler = mpc85xx_8259_cascade_action, +- .flags = IRQF_SHARED, ++ .flags = IRQF_SHARED | IRQF_NO_THREAD, + .name = "8259 cascade", + }; + #endif /* PPC_I8259 */ +Index: linux-2.6/arch/powerpc/Kconfig +=================================================================== +--- linux-2.6.orig/arch/powerpc/Kconfig ++++ linux-2.6/arch/powerpc/Kconfig +@@ -69,10 +69,11 @@ config LOCKDEP_SUPPORT + + config RWSEM_GENERIC_SPINLOCK + bool ++ default y if PREEMPT_RT_FULL + + config RWSEM_XCHGADD_ALGORITHM + bool +- default y ++ default y if !PREEMPT_RT_FULL + + config GENERIC_LOCKBREAK + bool +@@ -134,6 +135,7 @@ config PPC + select GENERIC_IRQ_SHOW_LEVEL + select HAVE_RCU_TABLE_FREE if SMP + select HAVE_SYSCALL_TRACEPOINTS ++ select IRQ_FORCED_THREADING + + config EARLY_PRINTK + bool +@@ -271,7 +273,7 @@ menu "Kernel options" + + config HIGHMEM + bool "High memory support" +- depends on PPC32 ++ depends on PPC32 && !PREEMPT_RT_FULL + + source kernel/time/Kconfig + source kernel/Kconfig.hz +Index: linux-2.6/include/linux/sched.h +=================================================================== +--- linux-2.6.orig/include/linux/sched.h ++++ linux-2.6/include/linux/sched.h +@@ -359,6 +359,7 @@ extern signed long schedule_timeout_inte + extern signed long schedule_timeout_killable(signed long timeout); + extern signed long schedule_timeout_uninterruptible(signed long timeout); + asmlinkage void schedule(void); ++extern void schedule_preempt_disabled(void); + extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner); + + struct nsproxy; +@@ -510,7 +511,7 @@ struct task_cputime { + struct thread_group_cputimer { + struct task_cputime cputime; + int running; +- spinlock_t lock; ++ raw_spinlock_t lock; + }; + + #include +@@ -1070,6 +1071,7 @@ struct sched_domain; + #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ + #define WF_FORK 0x02 /* child wakeup after fork */ + #define WF_MIGRATED 0x04 /* internal use, task got migrated */ ++#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ + + #define ENQUEUE_WAKEUP 1 + #define ENQUEUE_HEAD 2 +@@ -1219,6 +1221,7 @@ enum perf_event_task_context { + + struct task_struct { + volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ ++ volatile long saved_state; /* saved state for "spinlock sleepers" */ + void *stack; + atomic_t usage; + unsigned int flags; /* per process flags, defined below */ +@@ -1255,6 +1258,7 @@ struct task_struct { + #endif + + unsigned int policy; ++ int migrate_disable; + cpumask_t cpus_allowed; + + #ifdef CONFIG_PREEMPT_RCU +@@ -1356,6 +1360,7 @@ struct task_struct { + + struct task_cputime cputime_expires; + struct list_head cpu_timers[3]; ++ struct task_struct *posix_timer_list; + + /* process credentials */ + const struct cred __rcu *real_cred; /* objective and real subjective task +@@ -1389,6 +1394,7 @@ struct task_struct { + /* signal handlers */ + struct signal_struct *signal; + struct sighand_struct *sighand; ++ struct sigqueue *sigqueue_cache; + + sigset_t blocked, real_blocked; + sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ +@@ -1432,6 +1438,7 @@ struct task_struct { + /* mutex deadlock detection */ + struct mutex_waiter *blocked_on; + #endif ++ int pagefault_disabled; + #ifdef CONFIG_TRACE_IRQFLAGS + unsigned int irq_events; + unsigned long hardirq_enable_ip; +@@ -1558,6 +1565,12 @@ struct task_struct { + unsigned long trace; + /* bitmask and counter of trace recursion */ + unsigned long trace_recursion; ++#ifdef CONFIG_WAKEUP_LATENCY_HIST ++ u64 preempt_timestamp_hist; ++#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST ++ unsigned long timer_offset; ++#endif ++#endif + #endif /* CONFIG_TRACING */ + #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ + struct memcg_batch_info { +@@ -1570,11 +1583,12 @@ struct task_struct { + #ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_t ptrace_bp_refcnt; + #endif ++#ifdef CONFIG_PREEMPT_RT_BASE ++ struct rcu_head put_rcu; ++ int softirq_nestcnt; ++#endif + }; + +-/* Future-safe accessor for struct task_struct's cpus_allowed. */ +-#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) +- + /* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH +@@ -1743,6 +1757,15 @@ extern struct pid *cad_pid; + extern void free_task(struct task_struct *tsk); + #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) + ++#ifdef CONFIG_PREEMPT_RT_BASE ++extern void __put_task_struct_cb(struct rcu_head *rhp); ++ ++static inline void put_task_struct(struct task_struct *t) ++{ ++ if (atomic_dec_and_test(&t->usage)) ++ call_rcu(&t->put_rcu, __put_task_struct_cb); ++} ++#else + extern void __put_task_struct(struct task_struct *t); + + static inline void put_task_struct(struct task_struct *t) +@@ -1750,6 +1773,7 @@ static inline void put_task_struct(struc + if (atomic_dec_and_test(&t->usage)) + __put_task_struct(t); + } ++#endif + + extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); + extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st); +@@ -1774,6 +1798,7 @@ extern void thread_group_times(struct ta + #define PF_FROZEN 0x00010000 /* frozen for system suspend */ + #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ + #define PF_KSWAPD 0x00040000 /* I am kswapd */ ++#define PF_STOMPER 0x00080000 /* I am a stomp machine thread */ + #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ + #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ + #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ +@@ -2021,9 +2046,14 @@ static inline void sched_autogroup_fork( + static inline void sched_autogroup_exit(struct signal_struct *sig) { } + #endif + ++extern void task_setprio(struct task_struct *p, int prio); ++ + #ifdef CONFIG_RT_MUTEXES + extern int rt_mutex_getprio(struct task_struct *p); +-extern void rt_mutex_setprio(struct task_struct *p, int prio); ++static inline void rt_mutex_setprio(struct task_struct *p, int prio) ++{ ++ task_setprio(p, prio); ++} + extern void rt_mutex_adjust_pi(struct task_struct *p); + #else + static inline int rt_mutex_getprio(struct task_struct *p) +@@ -2110,6 +2140,7 @@ extern void xtime_update(unsigned long t + + extern int wake_up_state(struct task_struct *tsk, unsigned int state); + extern int wake_up_process(struct task_struct *tsk); ++extern int wake_up_lock_sleeper(struct task_struct * tsk); + extern void wake_up_new_task(struct task_struct *tsk); + #ifdef CONFIG_SMP + extern void kick_process(struct task_struct *tsk); +@@ -2199,12 +2230,24 @@ extern struct mm_struct * mm_alloc(void) + + /* mmdrop drops the mm and the page tables */ + extern void __mmdrop(struct mm_struct *); ++ + static inline void mmdrop(struct mm_struct * mm) + { + if (unlikely(atomic_dec_and_test(&mm->mm_count))) + __mmdrop(mm); + } + ++#ifdef CONFIG_PREEMPT_RT_BASE ++extern void __mmdrop_delayed(struct rcu_head *rhp); ++static inline void mmdrop_delayed(struct mm_struct *mm) ++{ ++ if (atomic_dec_and_test(&mm->mm_count)) ++ call_rcu(&mm->delayed_drop, __mmdrop_delayed); ++} ++#else ++# define mmdrop_delayed(mm) mmdrop(mm) ++#endif ++ + /* mmput gets rid of the mappings and all user-space */ + extern void mmput(struct mm_struct *); + /* Grab a reference to a task's mm, if it is not already going away */ +@@ -2510,7 +2553,7 @@ extern int _cond_resched(void); + + extern int __cond_resched_lock(spinlock_t *lock); + +-#ifdef CONFIG_PREEMPT ++#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_RT_FULL) + #define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET + #else + #define PREEMPT_LOCK_OFFSET 0 +@@ -2521,12 +2564,16 @@ extern int __cond_resched_lock(spinlock_ + __cond_resched_lock(lock); \ + }) + ++#ifndef CONFIG_PREEMPT_RT_FULL + extern int __cond_resched_softirq(void); + + #define cond_resched_softirq() ({ \ + __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ + __cond_resched_softirq(); \ + }) ++#else ++# define cond_resched_softirq() cond_resched() ++#endif + + /* + * Does a critical section need to be broken due to another +@@ -2550,7 +2597,7 @@ void thread_group_cputimer(struct task_s + + static inline void thread_group_cputime_init(struct signal_struct *sig) + { +- spin_lock_init(&sig->cputimer.lock); ++ raw_spin_lock_init(&sig->cputimer.lock); + } + + /* +@@ -2589,6 +2636,15 @@ static inline void set_task_cpu(struct t + + #endif /* CONFIG_SMP */ + ++/* Future-safe accessor for struct task_struct's cpus_allowed. */ ++static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p) ++{ ++ if (p->migrate_disable) ++ return cpumask_of(task_cpu(p)); ++ ++ return &p->cpus_allowed; ++} ++ + extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); + extern long sched_getaffinity(pid_t pid, struct cpumask *mask); + +Index: linux-2.6/arch/arm/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/arm/kernel/process.c ++++ linux-2.6/arch/arm/kernel/process.c +@@ -209,9 +209,7 @@ void cpu_idle(void) + } + leds_event(led_idle_end); + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/avr32/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/avr32/kernel/process.c ++++ linux-2.6/arch/avr32/kernel/process.c +@@ -38,9 +38,7 @@ void cpu_idle(void) + while (!need_resched()) + cpu_idle_sleep(); + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/blackfin/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/blackfin/kernel/process.c ++++ linux-2.6/arch/blackfin/kernel/process.c +@@ -92,9 +92,7 @@ void cpu_idle(void) + while (!need_resched()) + idle(); + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/cris/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/cris/kernel/process.c ++++ linux-2.6/arch/cris/kernel/process.c +@@ -115,9 +115,7 @@ void cpu_idle (void) + idle = default_idle; + idle(); + } +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/frv/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/frv/kernel/process.c ++++ linux-2.6/arch/frv/kernel/process.c +@@ -92,9 +92,7 @@ void cpu_idle(void) + idle(); + } + +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/h8300/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/h8300/kernel/process.c ++++ linux-2.6/arch/h8300/kernel/process.c +@@ -81,9 +81,7 @@ void cpu_idle(void) + while (1) { + while (!need_resched()) + idle(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/ia64/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/ia64/kernel/process.c ++++ linux-2.6/arch/ia64/kernel/process.c +@@ -330,9 +330,7 @@ cpu_idle (void) + normal_xtp(); + #endif + } +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + check_pgt_cache(); + if (cpu_is_offline(cpu)) + play_dead(); +Index: linux-2.6/arch/m32r/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/m32r/kernel/process.c ++++ linux-2.6/arch/m32r/kernel/process.c +@@ -90,9 +90,7 @@ void cpu_idle (void) + + idle(); + } +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/m68k/kernel/process_mm.c +=================================================================== +--- linux-2.6.orig/arch/m68k/kernel/process_mm.c ++++ linux-2.6/arch/m68k/kernel/process_mm.c +@@ -94,9 +94,7 @@ void cpu_idle(void) + while (1) { + while (!need_resched()) + idle(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/m68k/kernel/process_no.c +=================================================================== +--- linux-2.6.orig/arch/m68k/kernel/process_no.c ++++ linux-2.6/arch/m68k/kernel/process_no.c +@@ -73,9 +73,7 @@ void cpu_idle(void) + /* endless idle loop with no priority at all */ + while (1) { + idle(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/microblaze/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/microblaze/kernel/process.c ++++ linux-2.6/arch/microblaze/kernel/process.c +@@ -108,9 +108,7 @@ void cpu_idle(void) + idle(); + tick_nohz_restart_sched_tick(); + +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + check_pgt_cache(); + } + } +Index: linux-2.6/arch/mips/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/mips/kernel/process.c ++++ linux-2.6/arch/mips/kernel/process.c +@@ -78,9 +78,7 @@ void __noreturn cpu_idle(void) + play_dead(); + #endif + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/mn10300/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/mn10300/kernel/process.c ++++ linux-2.6/arch/mn10300/kernel/process.c +@@ -123,9 +123,7 @@ void cpu_idle(void) + idle(); + } + +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/parisc/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/parisc/kernel/process.c ++++ linux-2.6/arch/parisc/kernel/process.c +@@ -71,9 +71,7 @@ void cpu_idle(void) + while (1) { + while (!need_resched()) + barrier(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + check_pgt_cache(); + } + } +Index: linux-2.6/arch/powerpc/kernel/idle.c +=================================================================== +--- linux-2.6.orig/arch/powerpc/kernel/idle.c ++++ linux-2.6/arch/powerpc/kernel/idle.c +@@ -94,11 +94,11 @@ void cpu_idle(void) + HMT_medium(); + ppc64_runlatch_on(); + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- if (cpu_should_die()) ++ if (cpu_should_die()) { ++ __preempt_enable_no_resched(); + cpu_die(); +- schedule(); +- preempt_disable(); ++ } ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/powerpc/platforms/iseries/setup.c +=================================================================== +--- linux-2.6.orig/arch/powerpc/platforms/iseries/setup.c ++++ linux-2.6/arch/powerpc/platforms/iseries/setup.c +@@ -581,9 +581,7 @@ static void iseries_shared_idle(void) + if (hvlpevent_is_pending()) + process_iSeries_events(); + +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +@@ -610,9 +608,7 @@ static void iseries_dedicated_idle(void) + + ppc64_runlatch_on(); + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/s390/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/s390/kernel/process.c ++++ linux-2.6/arch/s390/kernel/process.c +@@ -94,9 +94,7 @@ void cpu_idle(void) + while (!need_resched()) + default_idle(); + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/score/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/score/kernel/process.c ++++ linux-2.6/arch/score/kernel/process.c +@@ -53,9 +53,7 @@ void __noreturn cpu_idle(void) + while (!need_resched()) + barrier(); + +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/sh/kernel/idle.c +=================================================================== +--- linux-2.6.orig/arch/sh/kernel/idle.c ++++ linux-2.6/arch/sh/kernel/idle.c +@@ -110,9 +110,7 @@ void cpu_idle(void) + } + + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/sparc/kernel/process_32.c +=================================================================== +--- linux-2.6.orig/arch/sparc/kernel/process_32.c ++++ linux-2.6/arch/sparc/kernel/process_32.c +@@ -113,9 +113,7 @@ void cpu_idle(void) + while (!need_resched()) + cpu_relax(); + } +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + check_pgt_cache(); + } + } +@@ -138,9 +136,7 @@ void cpu_idle(void) + while (!need_resched()) + cpu_relax(); + } +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + check_pgt_cache(); + } + } +Index: linux-2.6/arch/sparc/kernel/process_64.c +=================================================================== +--- linux-2.6.orig/arch/sparc/kernel/process_64.c ++++ linux-2.6/arch/sparc/kernel/process_64.c +@@ -102,15 +102,13 @@ void cpu_idle(void) + + tick_nohz_restart_sched_tick(); + +- preempt_enable_no_resched(); +- + #ifdef CONFIG_HOTPLUG_CPU +- if (cpu_is_offline(cpu)) ++ if (cpu_is_offline(cpu)) { ++ __preempt_enable_no_resched(); + cpu_play_dead(); ++ } + #endif +- +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/tile/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/tile/kernel/process.c ++++ linux-2.6/arch/tile/kernel/process.c +@@ -106,9 +106,7 @@ void cpu_idle(void) + current_thread_info()->status |= TS_POLLING; + } + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/x86/kernel/process_32.c +=================================================================== +--- linux-2.6.orig/arch/x86/kernel/process_32.c ++++ linux-2.6/arch/x86/kernel/process_32.c +@@ -113,9 +113,7 @@ void cpu_idle(void) + start_critical_timings(); + } + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/x86/kernel/process_64.c +=================================================================== +--- linux-2.6.orig/arch/x86/kernel/process_64.c ++++ linux-2.6/arch/x86/kernel/process_64.c +@@ -146,9 +146,7 @@ void cpu_idle(void) + } + + tick_nohz_restart_sched_tick(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/arch/xtensa/kernel/process.c +=================================================================== +--- linux-2.6.orig/arch/xtensa/kernel/process.c ++++ linux-2.6/arch/xtensa/kernel/process.c +@@ -113,9 +113,7 @@ void cpu_idle(void) + while (1) { + while (!need_resched()) + platform_idle(); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + } + } + +Index: linux-2.6/init/main.c +=================================================================== +--- linux-2.6.orig/init/main.c ++++ linux-2.6/init/main.c +@@ -68,6 +68,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -367,9 +368,7 @@ static noinline void __init_refok rest_i + * at least once to get things moving: + */ + init_idle_bootup_task(current); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + + /* Call into cpu_idle with preempt disabled */ + cpu_idle(); +@@ -501,6 +500,7 @@ asmlinkage void __init start_kernel(void + parse_args("Booting kernel", static_command_line, __start___param, + __stop___param - __start___param, + &unknown_bootoption); ++ softirq_early_init(); + /* + * These use large bootmem allocations and must precede + * kmem_cache_init() +Index: linux-2.6/kernel/mutex.c +=================================================================== +--- linux-2.6.orig/kernel/mutex.c ++++ linux-2.6/kernel/mutex.c +@@ -240,9 +240,7 @@ __mutex_lock_common(struct mutex *lock, + + /* didn't get the lock, go to sleep: */ + spin_unlock_mutex(&lock->wait_lock, flags); +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); ++ schedule_preempt_disabled(); + spin_lock_mutex(&lock->wait_lock, flags); + } + +Index: linux-2.6/kernel/softirq.c +=================================================================== +--- linux-2.6.orig/kernel/softirq.c ++++ linux-2.6/kernel/softirq.c +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + + #define CREATE_TRACE_POINTS + #include +@@ -61,6 +62,67 @@ char *softirq_to_name[NR_SOFTIRQS] = { + "TASKLET", "SCHED", "HRTIMER", "RCU" + }; + ++#ifdef CONFIG_NO_HZ ++# ifdef CONFIG_PREEMPT_RT_FULL ++/* ++ * On preempt-rt a softirq might be blocked on a lock. There might be ++ * no other runnable task on this CPU because the lock owner runs on ++ * some other CPU. So we have to go into idle with the pending bit ++ * set. Therefor we need to check this otherwise we warn about false ++ * positives which confuses users and defeats the whole purpose of ++ * this test. ++ * ++ * This code is called with interrupts disabled. ++ */ ++void softirq_check_pending_idle(void) ++{ ++ static int rate_limit; ++ u32 warnpending = 0, pending = local_softirq_pending(); ++ ++ if (rate_limit >= 10) ++ return; ++ ++ if (pending) { ++ struct task_struct *tsk; ++ ++ tsk = __get_cpu_var(ksoftirqd); ++ /* ++ * The wakeup code in rtmutex.c wakes up the task ++ * _before_ it sets pi_blocked_on to NULL under ++ * tsk->pi_lock. So we need to check for both: state ++ * and pi_blocked_on. ++ */ ++ raw_spin_lock(&tsk->pi_lock); ++ ++ if (!tsk->pi_blocked_on && !(tsk->state == TASK_RUNNING)) ++ warnpending = 1; ++ ++ raw_spin_unlock(&tsk->pi_lock); ++ } ++ ++ if (warnpending) { ++ printk(KERN_ERR "NOHZ: local_softirq_pending %02lx\n", ++ pending); ++ rate_limit++; ++ } ++} ++# else ++/* ++ * On !PREEMPT_RT we just printk rate limited: ++ */ ++void softirq_check_pending_idle(void) ++{ ++ static int rate_limit; ++ ++ if (rate_limit < 10) { ++ printk(KERN_ERR "NOHZ: local_softirq_pending %02lx\n", ++ local_softirq_pending()); ++ rate_limit++; ++ } ++} ++# endif ++#endif ++ + /* + * we cannot loop indefinitely here to avoid userspace starvation, + * but we also don't want to introduce a worst case 1/HZ latency +@@ -76,6 +138,35 @@ static void wakeup_softirqd(void) + wake_up_process(tsk); + } + ++static void handle_pending_softirqs(u32 pending, int cpu) ++{ ++ struct softirq_action *h = softirq_vec; ++ unsigned int prev_count = preempt_count(); ++ ++ local_irq_enable(); ++ for ( ; pending; h++, pending >>= 1) { ++ unsigned int vec_nr = h - softirq_vec; ++ ++ if (!(pending & 1)) ++ continue; ++ ++ kstat_incr_softirqs_this_cpu(vec_nr); ++ trace_softirq_entry(vec_nr); ++ h->action(h); ++ trace_softirq_exit(vec_nr); ++ if (unlikely(prev_count != preempt_count())) { ++ printk(KERN_ERR ++ "huh, entered softirq %u %s %p with preempt_count %08x exited with %08x?\n", ++ vec_nr, softirq_to_name[vec_nr], h->action, ++ prev_count, (unsigned int) preempt_count()); ++ preempt_count() = prev_count; ++ } ++ rcu_bh_qs(cpu); ++ } ++ local_irq_disable(); ++} ++ ++#ifndef CONFIG_PREEMPT_RT_FULL + /* + * preempt_count and SOFTIRQ_OFFSET usage: + * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving +@@ -206,7 +297,6 @@ EXPORT_SYMBOL(local_bh_enable_ip); + + asmlinkage void __do_softirq(void) + { +- struct softirq_action *h; + __u32 pending; + int max_restart = MAX_SOFTIRQ_RESTART; + int cpu; +@@ -215,7 +305,7 @@ asmlinkage void __do_softirq(void) + account_system_vtime(current); + + __local_bh_disable((unsigned long)__builtin_return_address(0), +- SOFTIRQ_OFFSET); ++ SOFTIRQ_OFFSET); + lockdep_softirq_enter(); + + cpu = smp_processor_id(); +@@ -223,36 +313,7 @@ restart: + /* Reset the pending bitmask before enabling irqs */ + set_softirq_pending(0); + +- local_irq_enable(); +- +- h = softirq_vec; +- +- do { +- if (pending & 1) { +- unsigned int vec_nr = h - softirq_vec; +- int prev_count = preempt_count(); +- +- kstat_incr_softirqs_this_cpu(vec_nr); +- +- trace_softirq_entry(vec_nr); +- h->action(h); +- trace_softirq_exit(vec_nr); +- if (unlikely(prev_count != preempt_count())) { +- printk(KERN_ERR "huh, entered softirq %u %s %p" +- "with preempt_count %08x," +- " exited with %08x?\n", vec_nr, +- softirq_to_name[vec_nr], h->action, +- prev_count, preempt_count()); +- preempt_count() = prev_count; +- } +- +- rcu_bh_qs(cpu); +- } +- h++; +- pending >>= 1; +- } while (pending); +- +- local_irq_disable(); ++ handle_pending_softirqs(pending, cpu); + + pending = local_softirq_pending(); + if (pending && --max_restart) +@@ -267,6 +328,26 @@ restart: + __local_bh_enable(SOFTIRQ_OFFSET); + } + ++/* ++ * Called with preemption disabled from run_ksoftirqd() ++ */ ++static int ksoftirqd_do_softirq(int cpu) ++{ ++ /* ++ * Preempt disable stops cpu going offline. ++ * If already offline, we'll be on wrong CPU: ++ * don't process. ++ */ ++ if (cpu_is_offline(cpu)) ++ return -1; ++ ++ local_irq_disable(); ++ if (local_softirq_pending()) ++ __do_softirq(); ++ local_irq_enable(); ++ return 0; ++} ++ + #ifndef __ARCH_HAS_DO_SOFTIRQ + + asmlinkage void do_softirq(void) +@@ -289,6 +370,178 @@ asmlinkage void do_softirq(void) + + #endif + ++static inline void local_bh_disable_nort(void) { local_bh_disable(); } ++static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } ++static inline void ksoftirqd_set_sched_params(void) { } ++static inline void ksoftirqd_clr_sched_params(void) { } ++ ++#else /* !PREEMPT_RT_FULL */ ++ ++/* ++ * On RT we serialize softirq execution with a cpu local lock ++ */ ++static DEFINE_LOCAL_IRQ_LOCK(local_softirq_lock); ++static DEFINE_PER_CPU(struct task_struct *, local_softirq_runner); ++ ++static void __do_softirq(void); ++ ++void __init softirq_early_init(void) ++{ ++ local_irq_lock_init(local_softirq_lock); ++} ++ ++void local_bh_disable(void) ++{ ++ migrate_disable(); ++ current->softirq_nestcnt++; ++} ++EXPORT_SYMBOL(local_bh_disable); ++ ++void local_bh_enable(void) ++{ ++ if (WARN_ON(current->softirq_nestcnt == 0)) ++ return; ++ ++ if ((current->softirq_nestcnt == 1) && ++ local_softirq_pending() && ++ local_trylock(local_softirq_lock)) { ++ ++ local_irq_disable(); ++ if (local_softirq_pending()) ++ __do_softirq(); ++ local_unlock(local_softirq_lock); ++ local_irq_enable(); ++ WARN_ON(current->softirq_nestcnt != 1); ++ } ++ current->softirq_nestcnt--; ++ migrate_enable(); ++} ++EXPORT_SYMBOL(local_bh_enable); ++ ++void local_bh_enable_ip(unsigned long ip) ++{ ++ local_bh_enable(); ++} ++EXPORT_SYMBOL(local_bh_enable_ip); ++ ++/* For tracing */ ++int notrace __in_softirq(void) ++{ ++ if (__get_cpu_var(local_softirq_lock).owner == current) ++ return __get_cpu_var(local_softirq_lock).nestcnt; ++ return 0; ++} ++ ++int in_serving_softirq(void) ++{ ++ int res; ++ ++ preempt_disable(); ++ res = __get_cpu_var(local_softirq_runner) == current; ++ preempt_enable(); ++ return res; ++} ++ ++/* ++ * Called with bh and local interrupts disabled. For full RT cpu must ++ * be pinned. ++ */ ++static void __do_softirq(void) ++{ ++ u32 pending = local_softirq_pending(); ++ int cpu = smp_processor_id(); ++ ++ current->softirq_nestcnt++; ++ ++ /* Reset the pending bitmask before enabling irqs */ ++ set_softirq_pending(0); ++ ++ __get_cpu_var(local_softirq_runner) = current; ++ ++ lockdep_softirq_enter(); ++ ++ handle_pending_softirqs(pending, cpu); ++ ++ pending = local_softirq_pending(); ++ if (pending) ++ wakeup_softirqd(); ++ ++ lockdep_softirq_exit(); ++ __get_cpu_var(local_softirq_runner) = NULL; ++ ++ current->softirq_nestcnt--; ++} ++ ++static int __thread_do_softirq(int cpu) ++{ ++ /* ++ * Prevent the current cpu from going offline. ++ * pin_current_cpu() can reenable preemption and block on the ++ * hotplug mutex. When it returns, the current cpu is ++ * pinned. It might be the wrong one, but the offline check ++ * below catches that. ++ */ ++ pin_current_cpu(); ++ /* ++ * If called from ksoftirqd (cpu >= 0) we need to check ++ * whether we are on the wrong cpu due to cpu offlining. If ++ * called via thread_do_softirq() no action required. ++ */ ++ if (cpu >= 0 && cpu_is_offline(cpu)) { ++ unpin_current_cpu(); ++ return -1; ++ } ++ preempt_enable(); ++ local_lock(local_softirq_lock); ++ local_irq_disable(); ++ /* ++ * We cannot switch stacks on RT as we want to be able to ++ * schedule! ++ */ ++ if (local_softirq_pending()) ++ __do_softirq(); ++ local_unlock(local_softirq_lock); ++ unpin_current_cpu(); ++ preempt_disable(); ++ local_irq_enable(); ++ return 0; ++} ++ ++/* ++ * Called from netif_rx_ni(). Preemption enabled. ++ */ ++void thread_do_softirq(void) ++{ ++ if (!in_serving_softirq()) { ++ preempt_disable(); ++ __thread_do_softirq(-1); ++ preempt_enable(); ++ } ++} ++ ++static int ksoftirqd_do_softirq(int cpu) ++{ ++ return __thread_do_softirq(cpu); ++} ++ ++static inline void local_bh_disable_nort(void) { } ++static inline void _local_bh_enable_nort(void) { } ++ ++static inline void ksoftirqd_set_sched_params(void) ++{ ++ struct sched_param param = { .sched_priority = 1 }; ++ ++ sched_setscheduler(current, SCHED_FIFO, ¶m); ++} ++ ++static inline void ksoftirqd_clr_sched_params(void) ++{ ++ struct sched_param param = { .sched_priority = 0 }; ++ ++ sched_setscheduler(current, SCHED_NORMAL, ¶m); ++} ++ ++#endif /* PREEMPT_RT_FULL */ + /* + * Enter an interrupt context. + */ +@@ -302,9 +555,9 @@ void irq_enter(void) + * Prevent raise_softirq from needlessly waking up ksoftirqd + * here, as softirq will be serviced on return from interrupt. + */ +- local_bh_disable(); ++ local_bh_disable_nort(); + tick_check_idle(cpu); +- _local_bh_enable(); ++ _local_bh_enable_nort(); + } + + __irq_enter(); +@@ -313,6 +566,7 @@ void irq_enter(void) + #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED + static inline void invoke_softirq(void) + { ++#ifndef CONFIG_PREEMPT_RT_FULL + if (!force_irqthreads) + __do_softirq(); + else { +@@ -321,10 +575,14 @@ static inline void invoke_softirq(void) + wakeup_softirqd(); + __local_bh_enable(SOFTIRQ_OFFSET); + } ++#else ++ wakeup_softirqd(); ++#endif + } + #else + static inline void invoke_softirq(void) + { ++#ifndef CONFIG_PREEMPT_RT_FULL + if (!force_irqthreads) + do_softirq(); + else { +@@ -333,6 +591,9 @@ static inline void invoke_softirq(void) + wakeup_softirqd(); + __local_bh_enable(SOFTIRQ_OFFSET); + } ++#else ++ wakeup_softirqd(); ++#endif + } + #endif + +@@ -353,7 +614,7 @@ void irq_exit(void) + if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) + tick_nohz_stop_sched_tick(0); + #endif +- preempt_enable_no_resched(); ++ __preempt_enable_no_resched(); + } + + /* +@@ -739,29 +1000,21 @@ void __init softirq_init(void) + + static int run_ksoftirqd(void * __bind_cpu) + { ++ ksoftirqd_set_sched_params(); ++ + set_current_state(TASK_INTERRUPTIBLE); + + while (!kthread_should_stop()) { + preempt_disable(); +- if (!local_softirq_pending()) { +- preempt_enable_no_resched(); +- schedule(); +- preempt_disable(); +- } ++ if (!local_softirq_pending()) ++ schedule_preempt_disabled(); + + __set_current_state(TASK_RUNNING); + + while (local_softirq_pending()) { +- /* Preempt disable stops cpu going offline. +- If already offline, we'll be on wrong CPU: +- don't process */ +- if (cpu_is_offline((long)__bind_cpu)) ++ if (ksoftirqd_do_softirq((long) __bind_cpu)) + goto wait_to_die; +- local_irq_disable(); +- if (local_softirq_pending()) +- __do_softirq(); +- local_irq_enable(); +- preempt_enable_no_resched(); ++ __preempt_enable_no_resched(); + cond_resched(); + preempt_disable(); + rcu_note_context_switch((long)__bind_cpu); +@@ -774,6 +1027,7 @@ static int run_ksoftirqd(void * __bind_c + + wait_to_die: + preempt_enable(); ++ ksoftirqd_clr_sched_params(); + /* Wait for kthread_stop */ + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { +Index: linux-2.6/include/linux/kprobes.h +=================================================================== +--- linux-2.6.orig/include/linux/kprobes.h ++++ linux-2.6/include/linux/kprobes.h +@@ -181,7 +181,7 @@ struct kretprobe { + int nmissed; + size_t data_size; + struct hlist_head free_instances; +- spinlock_t lock; ++ raw_spinlock_t lock; + }; + + struct kretprobe_instance { +Index: linux-2.6/kernel/kprobes.c +=================================================================== +--- linux-2.6.orig/kernel/kprobes.c ++++ linux-2.6/kernel/kprobes.c +@@ -78,10 +78,10 @@ static bool kprobes_all_disarmed; + static DEFINE_MUTEX(kprobe_mutex); + static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; + static struct { +- spinlock_t lock ____cacheline_aligned_in_smp; ++ raw_spinlock_t lock ____cacheline_aligned_in_smp; + } kretprobe_table_locks[KPROBE_TABLE_SIZE]; + +-static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) ++static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) + { + return &(kretprobe_table_locks[hash].lock); + } +@@ -1013,9 +1013,9 @@ void __kprobes recycle_rp_inst(struct kr + hlist_del(&ri->hlist); + INIT_HLIST_NODE(&ri->hlist); + if (likely(rp)) { +- spin_lock(&rp->lock); ++ raw_spin_lock(&rp->lock); + hlist_add_head(&ri->hlist, &rp->free_instances); +- spin_unlock(&rp->lock); ++ raw_spin_unlock(&rp->lock); + } else + /* Unregistering */ + hlist_add_head(&ri->hlist, head); +@@ -1026,19 +1026,19 @@ void __kprobes kretprobe_hash_lock(struc + __acquires(hlist_lock) + { + unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); +- spinlock_t *hlist_lock; ++ raw_spinlock_t *hlist_lock; + + *head = &kretprobe_inst_table[hash]; + hlist_lock = kretprobe_table_lock_ptr(hash); +- spin_lock_irqsave(hlist_lock, *flags); ++ raw_spin_lock_irqsave(hlist_lock, *flags); + } + + static void __kprobes kretprobe_table_lock(unsigned long hash, + unsigned long *flags) + __acquires(hlist_lock) + { +- spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); +- spin_lock_irqsave(hlist_lock, *flags); ++ raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); ++ raw_spin_lock_irqsave(hlist_lock, *flags); + } + + void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, +@@ -1046,18 +1046,18 @@ void __kprobes kretprobe_hash_unlock(str + __releases(hlist_lock) + { + unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); +- spinlock_t *hlist_lock; ++ raw_spinlock_t *hlist_lock; + + hlist_lock = kretprobe_table_lock_ptr(hash); +- spin_unlock_irqrestore(hlist_lock, *flags); ++ raw_spin_unlock_irqrestore(hlist_lock, *flags); + } + + static void __kprobes kretprobe_table_unlock(unsigned long hash, + unsigned long *flags) + __releases(hlist_lock) + { +- spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); +- spin_unlock_irqrestore(hlist_lock, *flags); ++ raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); ++ raw_spin_unlock_irqrestore(hlist_lock, *flags); + } + + /* +@@ -1650,12 +1650,12 @@ static int __kprobes pre_handler_kretpro + + /*TODO: consider to only swap the RA after the last pre_handler fired */ + hash = hash_ptr(current, KPROBE_HASH_BITS); +- spin_lock_irqsave(&rp->lock, flags); ++ raw_spin_lock_irqsave(&rp->lock, flags); + if (!hlist_empty(&rp->free_instances)) { + ri = hlist_entry(rp->free_instances.first, + struct kretprobe_instance, hlist); + hlist_del(&ri->hlist); +- spin_unlock_irqrestore(&rp->lock, flags); ++ raw_spin_unlock_irqrestore(&rp->lock, flags); + + ri->rp = rp; + ri->task = current; +@@ -1672,7 +1672,7 @@ static int __kprobes pre_handler_kretpro + kretprobe_table_unlock(hash, &flags); + } else { + rp->nmissed++; +- spin_unlock_irqrestore(&rp->lock, flags); ++ raw_spin_unlock_irqrestore(&rp->lock, flags); + } + return 0; + } +@@ -1708,7 +1708,7 @@ int __kprobes register_kretprobe(struct + rp->maxactive = num_possible_cpus(); + #endif + } +- spin_lock_init(&rp->lock); ++ raw_spin_lock_init(&rp->lock); + INIT_HLIST_HEAD(&rp->free_instances); + for (i = 0; i < rp->maxactive; i++) { + inst = kmalloc(sizeof(struct kretprobe_instance) + +@@ -1946,7 +1946,7 @@ static int __init init_kprobes(void) + for (i = 0; i < KPROBE_TABLE_SIZE; i++) { + INIT_HLIST_HEAD(&kprobe_table[i]); + INIT_HLIST_HEAD(&kretprobe_inst_table[i]); +- spin_lock_init(&(kretprobe_table_locks[i].lock)); ++ raw_spin_lock_init(&(kretprobe_table_locks[i].lock)); + } + + /* +Index: linux-2.6/include/linux/percpu_counter.h +=================================================================== +--- linux-2.6.orig/include/linux/percpu_counter.h ++++ linux-2.6/include/linux/percpu_counter.h +@@ -16,7 +16,7 @@ + #ifdef CONFIG_SMP + + struct percpu_counter { +- spinlock_t lock; ++ raw_spinlock_t lock; + s64 count; + #ifdef CONFIG_HOTPLUG_CPU + struct list_head list; /* All percpu_counters are on a list */ +Index: linux-2.6/lib/percpu_counter.c +=================================================================== +--- linux-2.6.orig/lib/percpu_counter.c ++++ linux-2.6/lib/percpu_counter.c +@@ -59,13 +59,13 @@ void percpu_counter_set(struct percpu_co + { + int cpu; + +- spin_lock(&fbc->lock); ++ raw_spin_lock(&fbc->lock); + for_each_possible_cpu(cpu) { + s32 *pcount = per_cpu_ptr(fbc->counters, cpu); + *pcount = 0; + } + fbc->count = amount; +- spin_unlock(&fbc->lock); ++ raw_spin_unlock(&fbc->lock); + } + EXPORT_SYMBOL(percpu_counter_set); + +@@ -76,10 +76,10 @@ void __percpu_counter_add(struct percpu_ + preempt_disable(); + count = __this_cpu_read(*fbc->counters) + amount; + if (count >= batch || count <= -batch) { +- spin_lock(&fbc->lock); ++ raw_spin_lock(&fbc->lock); + fbc->count += count; + __this_cpu_write(*fbc->counters, 0); +- spin_unlock(&fbc->lock); ++ raw_spin_unlock(&fbc->lock); + } else { + __this_cpu_write(*fbc->counters, count); + } +@@ -96,13 +96,13 @@ s64 __percpu_counter_sum(struct percpu_c + s64 ret; + int cpu; + +- spin_lock(&fbc->lock); ++ raw_spin_lock(&fbc->lock); + ret = fbc->count; + for_each_online_cpu(cpu) { + s32 *pcount = per_cpu_ptr(fbc->counters, cpu); + ret += *pcount; + } +- spin_unlock(&fbc->lock); ++ raw_spin_unlock(&fbc->lock); + return ret; + } + EXPORT_SYMBOL(__percpu_counter_sum); +@@ -110,7 +110,7 @@ EXPORT_SYMBOL(__percpu_counter_sum); + int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, + struct lock_class_key *key) + { +- spin_lock_init(&fbc->lock); ++ raw_spin_lock_init(&fbc->lock); + lockdep_set_class(&fbc->lock, key); + fbc->count = amount; + fbc->counters = alloc_percpu(s32); +@@ -173,11 +173,11 @@ static int __cpuinit percpu_counter_hotc + s32 *pcount; + unsigned long flags; + +- spin_lock_irqsave(&fbc->lock, flags); ++ raw_spin_lock_irqsave(&fbc->lock, flags); + pcount = per_cpu_ptr(fbc->counters, cpu); + fbc->count += *pcount; + *pcount = 0; +- spin_unlock_irqrestore(&fbc->lock, flags); ++ raw_spin_unlock_irqrestore(&fbc->lock, flags); + } + mutex_unlock(&percpu_counters_lock); + #endif +Index: linux-2.6/kernel/cgroup.c +=================================================================== +--- linux-2.6.orig/kernel/cgroup.c ++++ linux-2.6/kernel/cgroup.c +@@ -263,7 +263,7 @@ list_for_each_entry(_root, &roots, root_ + /* the list of cgroups eligible for automatic release. Protected by + * release_list_lock */ + static LIST_HEAD(release_list); +-static DEFINE_SPINLOCK(release_list_lock); ++static DEFINE_RAW_SPINLOCK(release_list_lock); + static void cgroup_release_agent(struct work_struct *work); + static DECLARE_WORK(release_agent_work, cgroup_release_agent); + static void check_for_release(struct cgroup *cgrp); +@@ -4010,11 +4010,11 @@ again: + finish_wait(&cgroup_rmdir_waitq, &wait); + clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); + +- spin_lock(&release_list_lock); ++ raw_spin_lock(&release_list_lock); + set_bit(CGRP_REMOVED, &cgrp->flags); + if (!list_empty(&cgrp->release_list)) + list_del_init(&cgrp->release_list); +- spin_unlock(&release_list_lock); ++ raw_spin_unlock(&release_list_lock); + + cgroup_lock_hierarchy(cgrp->root); + /* delete this cgroup from parent->children */ +@@ -4667,13 +4667,13 @@ static void check_for_release(struct cgr + * already queued for a userspace notification, queue + * it now */ + int need_schedule_work = 0; +- spin_lock(&release_list_lock); ++ raw_spin_lock(&release_list_lock); + if (!cgroup_is_removed(cgrp) && + list_empty(&cgrp->release_list)) { + list_add(&cgrp->release_list, &release_list); + need_schedule_work = 1; + } +- spin_unlock(&release_list_lock); ++ raw_spin_unlock(&release_list_lock); + if (need_schedule_work) + schedule_work(&release_agent_work); + } +@@ -4725,7 +4725,7 @@ static void cgroup_release_agent(struct + { + BUG_ON(work != &release_agent_work); + mutex_lock(&cgroup_mutex); +- spin_lock(&release_list_lock); ++ raw_spin_lock(&release_list_lock); + while (!list_empty(&release_list)) { + char *argv[3], *envp[3]; + int i; +@@ -4734,7 +4734,7 @@ static void cgroup_release_agent(struct + struct cgroup, + release_list); + list_del_init(&cgrp->release_list); +- spin_unlock(&release_list_lock); ++ raw_spin_unlock(&release_list_lock); + pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!pathbuf) + goto continue_free; +@@ -4764,9 +4764,9 @@ static void cgroup_release_agent(struct + continue_free: + kfree(pathbuf); + kfree(agentbuf); +- spin_lock(&release_list_lock); ++ raw_spin_lock(&release_list_lock); + } +- spin_unlock(&release_list_lock); ++ raw_spin_unlock(&release_list_lock); + mutex_unlock(&cgroup_mutex); + } + +Index: linux-2.6/include/linux/proportions.h +=================================================================== +--- linux-2.6.orig/include/linux/proportions.h ++++ linux-2.6/include/linux/proportions.h +@@ -58,7 +58,7 @@ struct prop_local_percpu { + */ + int shift; + unsigned long period; +- spinlock_t lock; /* protect the snapshot state */ ++ raw_spinlock_t lock; /* protect the snapshot state */ + }; + + int prop_local_init_percpu(struct prop_local_percpu *pl); +@@ -106,11 +106,11 @@ struct prop_local_single { + */ + unsigned long period; + int shift; +- spinlock_t lock; /* protect the snapshot state */ ++ raw_spinlock_t lock; /* protect the snapshot state */ + }; + + #define INIT_PROP_LOCAL_SINGLE(name) \ +-{ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ ++{ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + } + + int prop_local_init_single(struct prop_local_single *pl); +Index: linux-2.6/lib/proportions.c +=================================================================== +--- linux-2.6.orig/lib/proportions.c ++++ linux-2.6/lib/proportions.c +@@ -190,7 +190,7 @@ prop_adjust_shift(int *pl_shift, unsigne + + int prop_local_init_percpu(struct prop_local_percpu *pl) + { +- spin_lock_init(&pl->lock); ++ raw_spin_lock_init(&pl->lock); + pl->shift = 0; + pl->period = 0; + return percpu_counter_init(&pl->events, 0); +@@ -226,7 +226,7 @@ void prop_norm_percpu(struct prop_global + if (pl->period == global_period) + return; + +- spin_lock_irqsave(&pl->lock, flags); ++ raw_spin_lock_irqsave(&pl->lock, flags); + prop_adjust_shift(&pl->shift, &pl->period, pg->shift); + + /* +@@ -247,7 +247,7 @@ void prop_norm_percpu(struct prop_global + percpu_counter_set(&pl->events, 0); + + pl->period = global_period; +- spin_unlock_irqrestore(&pl->lock, flags); ++ raw_spin_unlock_irqrestore(&pl->lock, flags); + } + + /* +@@ -324,7 +324,7 @@ void prop_fraction_percpu(struct prop_de + + int prop_local_init_single(struct prop_local_single *pl) + { +- spin_lock_init(&pl->lock); ++ raw_spin_lock_init(&pl->lock); + pl->shift = 0; + pl->period = 0; + pl->events = 0; +@@ -356,7 +356,7 @@ void prop_norm_single(struct prop_global + if (pl->period == global_period) + return; + +- spin_lock_irqsave(&pl->lock, flags); ++ raw_spin_lock_irqsave(&pl->lock, flags); + prop_adjust_shift(&pl->shift, &pl->period, pg->shift); + /* + * For each missed period, we half the local counter. +@@ -367,7 +367,7 @@ void prop_norm_single(struct prop_global + else + pl->events = 0; + pl->period = global_period; +- spin_unlock_irqrestore(&pl->lock, flags); ++ raw_spin_unlock_irqrestore(&pl->lock, flags); + } + + /* +Index: linux-2.6/kernel/trace/ring_buffer.c +=================================================================== +--- linux-2.6.orig/kernel/trace/ring_buffer.c ++++ linux-2.6/kernel/trace/ring_buffer.c +@@ -478,7 +478,7 @@ struct ring_buffer_per_cpu { + int cpu; + atomic_t record_disabled; + struct ring_buffer *buffer; +- spinlock_t reader_lock; /* serialize readers */ ++ raw_spinlock_t reader_lock; /* serialize readers */ + arch_spinlock_t lock; + struct lock_class_key lock_key; + struct list_head *pages; +@@ -1055,7 +1055,7 @@ rb_allocate_cpu_buffer(struct ring_buffe + + cpu_buffer->cpu = cpu; + cpu_buffer->buffer = buffer; +- spin_lock_init(&cpu_buffer->reader_lock); ++ raw_spin_lock_init(&cpu_buffer->reader_lock); + lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); + cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; + +@@ -1252,7 +1252,7 @@ rb_remove_pages(struct ring_buffer_per_c + struct list_head *p; + unsigned i; + +- spin_lock_irq(&cpu_buffer->reader_lock); ++ raw_spin_lock_irq(&cpu_buffer->reader_lock); + rb_head_page_deactivate(cpu_buffer); + + for (i = 0; i < nr_pages; i++) { +@@ -1270,7 +1270,7 @@ rb_remove_pages(struct ring_buffer_per_c + rb_check_pages(cpu_buffer); + + out: +- spin_unlock_irq(&cpu_buffer->reader_lock); ++ raw_spin_unlock_irq(&cpu_buffer->reader_lock); + } + + static void +@@ -1281,7 +1281,7 @@ rb_insert_pages(struct ring_buffer_per_c + struct list_head *p; + unsigned i; + +- spin_lock_irq(&cpu_buffer->reader_lock); ++ raw_spin_lock_irq(&cpu_buffer->reader_lock); + rb_head_page_deactivate(cpu_buffer); + + for (i = 0; i < nr_pages; i++) { +@@ -1296,7 +1296,7 @@ rb_insert_pages(struct ring_buffer_per_c + rb_check_pages(cpu_buffer); + + out: +- spin_unlock_irq(&cpu_buffer->reader_lock); ++ raw_spin_unlock_irq(&cpu_buffer->reader_lock); + } + + /** +@@ -2790,9 +2790,9 @@ void ring_buffer_iter_reset(struct ring_ + + cpu_buffer = iter->cpu_buffer; + +- spin_lock_irqsave(&cpu_buffer->reader_lock, flags); ++ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + rb_iter_reset(iter); +- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); ++ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + } + EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); + +@@ -3251,12 +3251,12 @@ ring_buffer_peek(struct ring_buffer *buf + again: + local_irq_save(flags); + if (dolock) +- spin_lock(&cpu_buffer->reader_lock); ++ raw_spin_lock(&cpu_buffer->reader_lock); + event = rb_buffer_peek(cpu_buffer, ts, lost_events); + if (event && event->type_len == RINGBUF_TYPE_PADDING) + rb_advance_reader(cpu_buffer); + if (dolock) +- spin_unlock(&cpu_buffer->reader_lock); ++ raw_spin_unlock(&cpu_buffer->reader_lock); + local_irq_restore(flags); + + if (event && event->type_len == RINGBUF_TYPE_PADDING) +@@ -3281,9 +3281,9 @@ ring_buffer_iter_peek(struct ring_buffer + unsigned long flags; + + again: +- spin_lock_irqsave(&cpu_buffer->reader_lock, flags); ++ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + event = rb_iter_peek(iter, ts); +- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); ++ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + if (event && event->type_len == RINGBUF_TYPE_PADDING) + goto again; +@@ -3323,7 +3323,7 @@ ring_buffer_consume(struct ring_buffer * + cpu_buffer = buffer->buffers[cpu]; + local_irq_save(flags); + if (dolock) +- spin_lock(&cpu_buffer->reader_lock); ++ raw_spin_lock(&cpu_buffer->reader_lock); + + event = rb_buffer_peek(cpu_buffer, ts, lost_events); + if (event) { +@@ -3332,7 +3332,7 @@ ring_buffer_consume(struct ring_buffer * + } + + if (dolock) +- spin_unlock(&cpu_buffer->reader_lock); ++ raw_spin_unlock(&cpu_buffer->reader_lock); + local_irq_restore(flags); + + out: +@@ -3424,11 +3424,11 @@ ring_buffer_read_start(struct ring_buffe + + cpu_buffer = iter->cpu_buffer; + +- spin_lock_irqsave(&cpu_buffer->reader_lock, flags); ++ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + arch_spin_lock(&cpu_buffer->lock); + rb_iter_reset(iter); + arch_spin_unlock(&cpu_buffer->lock); +- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); ++ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + } + EXPORT_SYMBOL_GPL(ring_buffer_read_start); + +@@ -3463,7 +3463,7 @@ ring_buffer_read(struct ring_buffer_iter + struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; + unsigned long flags; + +- spin_lock_irqsave(&cpu_buffer->reader_lock, flags); ++ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + again: + event = rb_iter_peek(iter, ts); + if (!event) +@@ -3474,7 +3474,7 @@ ring_buffer_read(struct ring_buffer_iter + + rb_advance_iter(iter); + out: +- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); ++ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + return event; + } +@@ -3543,7 +3543,7 @@ void ring_buffer_reset_cpu(struct ring_b + + atomic_inc(&cpu_buffer->record_disabled); + +- spin_lock_irqsave(&cpu_buffer->reader_lock, flags); ++ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + + if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) + goto out; +@@ -3555,7 +3555,7 @@ void ring_buffer_reset_cpu(struct ring_b + arch_spin_unlock(&cpu_buffer->lock); + + out: +- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); ++ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + atomic_dec(&cpu_buffer->record_disabled); + } +@@ -3593,10 +3593,10 @@ int ring_buffer_empty(struct ring_buffer + cpu_buffer = buffer->buffers[cpu]; + local_irq_save(flags); + if (dolock) +- spin_lock(&cpu_buffer->reader_lock); ++ raw_spin_lock(&cpu_buffer->reader_lock); + ret = rb_per_cpu_empty(cpu_buffer); + if (dolock) +- spin_unlock(&cpu_buffer->reader_lock); ++ raw_spin_unlock(&cpu_buffer->reader_lock); + local_irq_restore(flags); + + if (!ret) +@@ -3627,10 +3627,10 @@ int ring_buffer_empty_cpu(struct ring_bu + cpu_buffer = buffer->buffers[cpu]; + local_irq_save(flags); + if (dolock) +- spin_lock(&cpu_buffer->reader_lock); ++ raw_spin_lock(&cpu_buffer->reader_lock); + ret = rb_per_cpu_empty(cpu_buffer); + if (dolock) +- spin_unlock(&cpu_buffer->reader_lock); ++ raw_spin_unlock(&cpu_buffer->reader_lock); + local_irq_restore(flags); + + return ret; +@@ -3826,7 +3826,7 @@ int ring_buffer_read_page(struct ring_bu + if (!bpage) + goto out; + +- spin_lock_irqsave(&cpu_buffer->reader_lock, flags); ++ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + + reader = rb_get_reader_page(cpu_buffer); + if (!reader) +@@ -3949,7 +3949,7 @@ int ring_buffer_read_page(struct ring_bu + memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); + + out_unlock: +- spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); ++ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + out: + return ret; +Index: linux-2.6/kernel/trace/trace.c +=================================================================== +--- linux-2.6.orig/kernel/trace/trace.c ++++ linux-2.6/kernel/trace/trace.c +@@ -341,7 +341,7 @@ unsigned long trace_flags = TRACE_ITER_P + TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE; + + static int trace_stop_count; +-static DEFINE_SPINLOCK(tracing_start_lock); ++static DEFINE_RAW_SPINLOCK(tracing_start_lock); + + /** + * trace_wake_up - wake up tasks waiting for trace input +@@ -958,7 +958,7 @@ void tracing_start(void) + if (tracing_disabled) + return; + +- spin_lock_irqsave(&tracing_start_lock, flags); ++ raw_spin_lock_irqsave(&tracing_start_lock, flags); + if (--trace_stop_count) { + if (trace_stop_count < 0) { + /* Someone screwed up their debugging */ +@@ -983,7 +983,7 @@ void tracing_start(void) + + ftrace_start(); + out: +- spin_unlock_irqrestore(&tracing_start_lock, flags); ++ raw_spin_unlock_irqrestore(&tracing_start_lock, flags); + } + + /** +@@ -998,7 +998,7 @@ void tracing_stop(void) + unsigned long flags; + + ftrace_stop(); +- spin_lock_irqsave(&tracing_start_lock, flags); ++ raw_spin_lock_irqsave(&tracing_start_lock, flags); + if (trace_stop_count++) + goto out; + +@@ -1016,7 +1016,7 @@ void tracing_stop(void) + arch_spin_unlock(&ftrace_max_lock); + + out: +- spin_unlock_irqrestore(&tracing_start_lock, flags); ++ raw_spin_unlock_irqrestore(&tracing_start_lock, flags); + } + + void trace_stop_cmdline_recording(void); +@@ -1120,6 +1120,8 @@ tracing_generic_entry_update(struct trac + ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | + ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | + (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); ++ ++ entry->migrate_disable = (tsk) ? tsk->migrate_disable & 0xFF : 0; + } + EXPORT_SYMBOL_GPL(tracing_generic_entry_update); + +@@ -1757,9 +1759,10 @@ static void print_lat_help_header(struct + seq_puts(m, "# | / _----=> need-resched \n"); + seq_puts(m, "# || / _---=> hardirq/softirq \n"); + seq_puts(m, "# ||| / _--=> preempt-depth \n"); +- seq_puts(m, "# |||| / delay \n"); +- seq_puts(m, "# cmd pid ||||| time | caller \n"); +- seq_puts(m, "# \\ / ||||| \\ | / \n"); ++ seq_puts(m, "# |||| / _--=> migrate-disable\n"); ++ seq_puts(m, "# ||||| / delay \n"); ++ seq_puts(m, "# cmd pid |||||| time | caller \n"); ++ seq_puts(m, "# \\ / ||||| \\ | / \n"); + } + + static void print_func_help_header(struct seq_file *m) +Index: linux-2.6/kernel/trace/trace_irqsoff.c +=================================================================== +--- linux-2.6.orig/kernel/trace/trace_irqsoff.c ++++ linux-2.6/kernel/trace/trace_irqsoff.c +@@ -23,7 +23,7 @@ static int tracer_enabled __read_most + + static DEFINE_PER_CPU(int, tracing_cpu); + +-static DEFINE_SPINLOCK(max_trace_lock); ++static DEFINE_RAW_SPINLOCK(max_trace_lock); + + enum { + TRACER_IRQS_OFF = (1 << 1), +@@ -319,7 +319,7 @@ check_critical_timing(struct trace_array + if (!report_latency(delta)) + goto out; + +- spin_lock_irqsave(&max_trace_lock, flags); ++ raw_spin_lock_irqsave(&max_trace_lock, flags); + + /* check if we are still the max latency */ + if (!report_latency(delta)) +@@ -342,7 +342,7 @@ check_critical_timing(struct trace_array + max_sequence++; + + out_unlock: +- spin_unlock_irqrestore(&max_trace_lock, flags); ++ raw_spin_unlock_irqrestore(&max_trace_lock, flags); + + out: + data->critical_sequence = max_sequence; +Index: linux-2.6/include/linux/ratelimit.h +=================================================================== +--- linux-2.6.orig/include/linux/ratelimit.h ++++ linux-2.6/include/linux/ratelimit.h +@@ -8,7 +8,7 @@ + #define DEFAULT_RATELIMIT_BURST 10 + + struct ratelimit_state { +- spinlock_t lock; /* protect the state */ ++ raw_spinlock_t lock; /* protect the state */ + + int interval; + int burst; +@@ -20,7 +20,7 @@ struct ratelimit_state { + #define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \ + \ + struct ratelimit_state name = { \ +- .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ ++ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + } +@@ -28,7 +28,7 @@ struct ratelimit_state { + static inline void ratelimit_state_init(struct ratelimit_state *rs, + int interval, int burst) + { +- spin_lock_init(&rs->lock); ++ raw_spin_lock_init(&rs->lock); + rs->interval = interval; + rs->burst = burst; + rs->printed = 0; +Index: linux-2.6/kernel/printk.c +=================================================================== +--- linux-2.6.orig/kernel/printk.c ++++ linux-2.6/kernel/printk.c +@@ -44,13 +44,6 @@ + + #include + +-/* +- * Architectures can override it: +- */ +-void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) +-{ +-} +- + #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) + + /* printk's without a loglevel use this.. */ +@@ -100,7 +93,7 @@ static int console_locked, console_suspe + * It is also used in interesting ways to provide interlocking in + * console_unlock();. + */ +-static DEFINE_SPINLOCK(logbuf_lock); ++static DEFINE_RAW_SPINLOCK(logbuf_lock); + + #define LOG_BUF_MASK (log_buf_len-1) + #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK]) +@@ -212,7 +205,7 @@ void __init setup_log_buf(int early) + return; + } + +- spin_lock_irqsave(&logbuf_lock, flags); ++ raw_spin_lock_irqsave(&logbuf_lock, flags); + log_buf_len = new_log_buf_len; + log_buf = new_log_buf; + new_log_buf_len = 0; +@@ -230,7 +223,7 @@ void __init setup_log_buf(int early) + log_start -= offset; + con_start -= offset; + log_end -= offset; +- spin_unlock_irqrestore(&logbuf_lock, flags); ++ raw_spin_unlock_irqrestore(&logbuf_lock, flags); + + pr_info("log_buf_len: %d\n", log_buf_len); + pr_info("early log buf free: %d(%d%%)\n", +@@ -363,18 +356,18 @@ int do_syslog(int type, char __user *buf + if (error) + goto out; + i = 0; +- spin_lock_irq(&logbuf_lock); ++ raw_spin_lock_irq(&logbuf_lock); + while (!error && (log_start != log_end) && i < len) { + c = LOG_BUF(log_start); + log_start++; +- spin_unlock_irq(&logbuf_lock); ++ raw_spin_unlock_irq(&logbuf_lock); + error = __put_user(c,buf); + buf++; + i++; + cond_resched(); +- spin_lock_irq(&logbuf_lock); ++ raw_spin_lock_irq(&logbuf_lock); + } +- spin_unlock_irq(&logbuf_lock); ++ raw_spin_unlock_irq(&logbuf_lock); + if (!error) + error = i; + break; +@@ -397,7 +390,7 @@ int do_syslog(int type, char __user *buf + count = len; + if (count > log_buf_len) + count = log_buf_len; +- spin_lock_irq(&logbuf_lock); ++ raw_spin_lock_irq(&logbuf_lock); + if (count > logged_chars) + count = logged_chars; + if (do_clear) +@@ -414,12 +407,12 @@ int do_syslog(int type, char __user *buf + if (j + log_buf_len < log_end) + break; + c = LOG_BUF(j); +- spin_unlock_irq(&logbuf_lock); ++ raw_spin_unlock_irq(&logbuf_lock); + error = __put_user(c,&buf[count-1-i]); + cond_resched(); +- spin_lock_irq(&logbuf_lock); ++ raw_spin_lock_irq(&logbuf_lock); + } +- spin_unlock_irq(&logbuf_lock); ++ raw_spin_unlock_irq(&logbuf_lock); + if (error) + break; + error = i; +@@ -509,6 +502,7 @@ static void __call_console_drivers(unsig + { + struct console *con; + ++ migrate_disable(); + for_each_console(con) { + if (exclusive_console && con != exclusive_console) + continue; +@@ -517,7 +511,54 @@ static void __call_console_drivers(unsig + (con->flags & CON_ANYTIME))) + con->write(con, &LOG_BUF(start), end - start); + } ++ migrate_enable(); ++} ++ ++#ifdef CONFIG_EARLY_PRINTK ++struct console *early_console; ++ ++static void early_vprintk(const char *fmt, va_list ap) ++{ ++ char buf[512]; ++ int n = vscnprintf(buf, sizeof(buf), fmt, ap); ++ if (early_console) ++ early_console->write(early_console, buf, n); ++} ++ ++asmlinkage void early_printk(const char *fmt, ...) ++{ ++ va_list ap; ++ va_start(ap, fmt); ++ early_vprintk(fmt, ap); ++ va_end(ap); ++} ++ ++/* ++ * This is independent of any log levels - a global ++ * kill switch that turns off all of printk. ++ * ++ * Used by the NMI watchdog if early-printk is enabled. ++ */ ++static int __read_mostly printk_killswitch; ++ ++void printk_kill(void) ++{ ++ printk_killswitch = 1; ++} ++ ++static int forced_early_printk(const char *fmt, va_list ap) ++{ ++ if (!printk_killswitch) ++ return 0; ++ early_vprintk(fmt, ap); ++ return 1; + } ++#else ++static inline int forced_early_printk(const char *fmt, va_list ap) ++{ ++ return 0; ++} ++#endif + + static int __read_mostly ignore_loglevel; + +@@ -687,7 +728,7 @@ static void zap_locks(void) + oops_timestamp = jiffies; + + /* If a crash is occurring, make sure we can't deadlock */ +- spin_lock_init(&logbuf_lock); ++ raw_spin_lock_init(&logbuf_lock); + /* And make sure that we print immediately */ + sema_init(&console_sem, 1); + } +@@ -779,12 +820,18 @@ static inline int can_use_console(unsign + * interrupts disabled. It should return with 'lockbuf_lock' + * released but interrupts still disabled. + */ +-static int console_trylock_for_printk(unsigned int cpu) ++static int console_trylock_for_printk(unsigned int cpu, unsigned long flags) + __releases(&logbuf_lock) + { ++#ifdef CONFIG_PREEMPT_RT_FULL ++ int lock = !early_boot_irqs_disabled && !irqs_disabled_flags(flags) && ++ !preempt_count(); ++#else ++ int lock = 1; ++#endif + int retval = 0; + +- if (console_trylock()) { ++ if (lock && console_trylock()) { + retval = 1; + + /* +@@ -800,7 +847,7 @@ static int console_trylock_for_printk(un + } + } + printk_cpu = UINT_MAX; +- spin_unlock(&logbuf_lock); ++ raw_spin_unlock(&logbuf_lock); + return retval; + } + static const char recursion_bug_msg [] = +@@ -833,6 +880,13 @@ asmlinkage int vprintk(const char *fmt, + size_t plen; + char special; + ++ /* ++ * Fall back to early_printk if a debugging subsystem has ++ * killed printk output ++ */ ++ if (unlikely(forced_early_printk(fmt, args))) ++ return 1; ++ + boot_delay_msec(); + printk_delay(); + +@@ -860,7 +914,7 @@ asmlinkage int vprintk(const char *fmt, + } + + lockdep_off(); +- spin_lock(&logbuf_lock); ++ raw_spin_lock(&logbuf_lock); + printk_cpu = this_cpu; + + if (recursion_bug) { +@@ -953,8 +1007,15 @@ asmlinkage int vprintk(const char *fmt, + * will release 'logbuf_lock' regardless of whether it + * actually gets the semaphore or not. + */ +- if (console_trylock_for_printk(this_cpu)) ++ if (console_trylock_for_printk(this_cpu, flags)) { ++#ifndef CONFIG_PREEMPT_RT_FULL ++ console_unlock(); ++#else ++ raw_local_irq_restore(flags); + console_unlock(); ++ raw_local_irq_save(flags); ++#endif ++ } + + lockdep_on(); + out_restore_irqs: +@@ -1252,18 +1313,23 @@ void console_unlock(void) + console_may_schedule = 0; + + for ( ; ; ) { +- spin_lock_irqsave(&logbuf_lock, flags); ++ raw_spin_lock_irqsave(&logbuf_lock, flags); + wake_klogd |= log_start - log_end; + if (con_start == log_end) + break; /* Nothing to print */ + _con_start = con_start; + _log_end = log_end; + con_start = log_end; /* Flush */ +- spin_unlock(&logbuf_lock); ++#ifndef CONFIG_PREEMPT_RT_FULL ++ raw_spin_unlock(&logbuf_lock); + stop_critical_timings(); /* don't trace print latency */ + call_console_drivers(_con_start, _log_end); + start_critical_timings(); + local_irq_restore(flags); ++#else ++ raw_spin_unlock_irqrestore(&logbuf_lock, flags); ++ call_console_drivers(_con_start, _log_end); ++#endif + } + console_locked = 0; + +@@ -1272,7 +1338,7 @@ void console_unlock(void) + exclusive_console = NULL; + + up(&console_sem); +- spin_unlock_irqrestore(&logbuf_lock, flags); ++ raw_spin_unlock_irqrestore(&logbuf_lock, flags); + if (wake_klogd) + wake_up_klogd(); + } +@@ -1502,9 +1568,9 @@ void register_console(struct console *ne + * console_unlock(); will print out the buffered messages + * for us. + */ +- spin_lock_irqsave(&logbuf_lock, flags); ++ raw_spin_lock_irqsave(&logbuf_lock, flags); + con_start = log_start; +- spin_unlock_irqrestore(&logbuf_lock, flags); ++ raw_spin_unlock_irqrestore(&logbuf_lock, flags); + /* + * We're about to replay the log buffer. Only do this to the + * just-registered console to avoid excessive message spam to +@@ -1711,10 +1777,10 @@ void kmsg_dump(enum kmsg_dump_reason rea + /* Theoretically, the log could move on after we do this, but + there's not a lot we can do about that. The new messages + will overwrite the start of what we dump. */ +- spin_lock_irqsave(&logbuf_lock, flags); ++ raw_spin_lock_irqsave(&logbuf_lock, flags); + end = log_end & LOG_BUF_MASK; + chars = logged_chars; +- spin_unlock_irqrestore(&logbuf_lock, flags); ++ raw_spin_unlock_irqrestore(&logbuf_lock, flags); + + if (chars > end) { + s1 = log_buf + log_buf_len - chars + end; +Index: linux-2.6/lib/ratelimit.c +=================================================================== +--- linux-2.6.orig/lib/ratelimit.c ++++ linux-2.6/lib/ratelimit.c +@@ -39,7 +39,7 @@ int ___ratelimit(struct ratelimit_state + * in addition to the one that will be printed by + * the entity that is holding the lock already: + */ +- if (!spin_trylock_irqsave(&rs->lock, flags)) ++ if (!raw_spin_trylock_irqsave(&rs->lock, flags)) + return 0; + + if (!rs->begin) +@@ -60,7 +60,7 @@ int ___ratelimit(struct ratelimit_state + rs->missed++; + ret = 0; + } +- spin_unlock_irqrestore(&rs->lock, flags); ++ raw_spin_unlock_irqrestore(&rs->lock, flags); + + return ret; + } +Index: linux-2.6/include/linux/init_task.h +=================================================================== +--- linux-2.6.orig/include/linux/init_task.h ++++ linux-2.6/include/linux/init_task.h +@@ -42,7 +42,7 @@ extern struct fs_struct init_fs; + .cputimer = { \ + .cputime = INIT_CPUTIME, \ + .running = 0, \ +- .lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ ++ .lock = __RAW_SPIN_LOCK_UNLOCKED(sig.cputimer.lock), \ + }, \ + .cred_guard_mutex = \ + __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ +@@ -179,6 +179,7 @@ extern struct cred init_cred; + .fs_excl = ATOMIC_INIT(0), \ + .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ + .timer_slack_ns = 50000, /* 50 usec default slack */ \ ++ .posix_timer_list = NULL, \ + .pids = { \ + [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ + [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ +Index: linux-2.6/kernel/posix-cpu-timers.c +=================================================================== +--- linux-2.6.orig/kernel/posix-cpu-timers.c ++++ linux-2.6/kernel/posix-cpu-timers.c +@@ -274,7 +274,7 @@ void thread_group_cputimer(struct task_s + struct task_cputime sum; + unsigned long flags; + +- spin_lock_irqsave(&cputimer->lock, flags); ++ raw_spin_lock_irqsave(&cputimer->lock, flags); + if (!cputimer->running) { + cputimer->running = 1; + /* +@@ -287,7 +287,7 @@ void thread_group_cputimer(struct task_s + update_gt_cputime(&cputimer->cputime, &sum); + } + *times = cputimer->cputime; +- spin_unlock_irqrestore(&cputimer->lock, flags); ++ raw_spin_unlock_irqrestore(&cputimer->lock, flags); + } + + /* +@@ -699,7 +699,7 @@ static int posix_cpu_timer_set(struct k_ + /* + * Disarm any old timer after extracting its expiry time. + */ +- BUG_ON(!irqs_disabled()); ++ BUG_ON_NONRT(!irqs_disabled()); + + ret = 0; + old_incr = timer->it.cpu.incr; +@@ -997,9 +997,9 @@ static void stop_process_timers(struct s + struct thread_group_cputimer *cputimer = &sig->cputimer; + unsigned long flags; + +- spin_lock_irqsave(&cputimer->lock, flags); ++ raw_spin_lock_irqsave(&cputimer->lock, flags); + cputimer->running = 0; +- spin_unlock_irqrestore(&cputimer->lock, flags); ++ raw_spin_unlock_irqrestore(&cputimer->lock, flags); + } + + static u32 onecputick; +@@ -1221,7 +1221,7 @@ void posix_cpu_timer_schedule(struct k_i + /* + * Now re-arm for the new expiry time. + */ +- BUG_ON(!irqs_disabled()); ++ BUG_ON_NONRT(!irqs_disabled()); + arm_timer(timer); + spin_unlock(&p->sighand->siglock); + +@@ -1289,9 +1289,9 @@ static inline int fastpath_timer_check(s + if (sig->cputimer.running) { + struct task_cputime group_sample; + +- spin_lock(&sig->cputimer.lock); ++ raw_spin_lock(&sig->cputimer.lock); + group_sample = sig->cputimer.cputime; +- spin_unlock(&sig->cputimer.lock); ++ raw_spin_unlock(&sig->cputimer.lock); + + if (task_cputime_expired(&group_sample, &sig->cputime_expires)) + return 1; +@@ -1305,13 +1305,13 @@ static inline int fastpath_timer_check(s + * already updated our counts. We need to check if any timers fire now. + * Interrupts are disabled. + */ +-void run_posix_cpu_timers(struct task_struct *tsk) ++void __run_posix_cpu_timers(struct task_struct *tsk) + { + LIST_HEAD(firing); + struct k_itimer *timer, *next; + unsigned long flags; + +- BUG_ON(!irqs_disabled()); ++ BUG_ON_NONRT(!irqs_disabled()); + + /* + * The fast path checks that there are no expired thread or thread +@@ -1369,6 +1369,177 @@ void run_posix_cpu_timers(struct task_st + } + } + ++#include ++#include ++DEFINE_PER_CPU(struct task_struct *, posix_timer_task); ++DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); ++ ++static int posix_cpu_timers_thread(void *data) ++{ ++ int cpu = (long)data; ++ ++ BUG_ON(per_cpu(posix_timer_task,cpu) != current); ++ ++ while (!kthread_should_stop()) { ++ struct task_struct *tsk = NULL; ++ struct task_struct *next = NULL; ++ ++ if (cpu_is_offline(cpu)) ++ goto wait_to_die; ++ ++ /* grab task list */ ++ raw_local_irq_disable(); ++ tsk = per_cpu(posix_timer_tasklist, cpu); ++ per_cpu(posix_timer_tasklist, cpu) = NULL; ++ raw_local_irq_enable(); ++ ++ /* its possible the list is empty, just return */ ++ if (!tsk) { ++ set_current_state(TASK_INTERRUPTIBLE); ++ schedule(); ++ __set_current_state(TASK_RUNNING); ++ continue; ++ } ++ ++ /* Process task list */ ++ while (1) { ++ /* save next */ ++ next = tsk->posix_timer_list; ++ ++ /* run the task timers, clear its ptr and ++ * unreference it ++ */ ++ __run_posix_cpu_timers(tsk); ++ tsk->posix_timer_list = NULL; ++ put_task_struct(tsk); ++ ++ /* check if this is the last on the list */ ++ if (next == tsk) ++ break; ++ tsk = next; ++ } ++ } ++ return 0; ++ ++wait_to_die: ++ /* Wait for kthread_stop */ ++ set_current_state(TASK_INTERRUPTIBLE); ++ while (!kthread_should_stop()) { ++ schedule(); ++ set_current_state(TASK_INTERRUPTIBLE); ++ } ++ __set_current_state(TASK_RUNNING); ++ return 0; ++} ++ ++static inline int __fastpath_timer_check(struct task_struct *tsk) ++{ ++ /* tsk == current, ensure it is safe to use ->signal/sighand */ ++ if (unlikely(tsk->exit_state)) ++ return 0; ++ ++ if (!task_cputime_zero(&tsk->cputime_expires)) ++ return 1; ++ ++ if (!task_cputime_zero(&tsk->signal->cputime_expires)) ++ return 1; ++ ++ return 0; ++} ++ ++void run_posix_cpu_timers(struct task_struct *tsk) ++{ ++ unsigned long cpu = smp_processor_id(); ++ struct task_struct *tasklist; ++ ++ BUG_ON(!irqs_disabled()); ++ if(!per_cpu(posix_timer_task, cpu)) ++ return; ++ /* get per-cpu references */ ++ tasklist = per_cpu(posix_timer_tasklist, cpu); ++ ++ /* check to see if we're already queued */ ++ if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { ++ get_task_struct(tsk); ++ if (tasklist) { ++ tsk->posix_timer_list = tasklist; ++ } else { ++ /* ++ * The list is terminated by a self-pointing ++ * task_struct ++ */ ++ tsk->posix_timer_list = tsk; ++ } ++ per_cpu(posix_timer_tasklist, cpu) = tsk; ++ ++ wake_up_process(per_cpu(posix_timer_task, cpu)); ++ } ++} ++ ++/* ++ * posix_cpu_thread_call - callback that gets triggered when a CPU is added. ++ * Here we can start up the necessary migration thread for the new CPU. ++ */ ++static int posix_cpu_thread_call(struct notifier_block *nfb, ++ unsigned long action, void *hcpu) ++{ ++ int cpu = (long)hcpu; ++ struct task_struct *p; ++ struct sched_param param; ++ ++ switch (action) { ++ case CPU_UP_PREPARE: ++ p = kthread_create(posix_cpu_timers_thread, hcpu, ++ "posixcputmr/%d",cpu); ++ if (IS_ERR(p)) ++ return NOTIFY_BAD; ++ p->flags |= PF_NOFREEZE; ++ kthread_bind(p, cpu); ++ /* Must be high prio to avoid getting starved */ ++ param.sched_priority = MAX_RT_PRIO-1; ++ sched_setscheduler(p, SCHED_FIFO, ¶m); ++ per_cpu(posix_timer_task,cpu) = p; ++ break; ++ case CPU_ONLINE: ++ /* Strictly unneccessary, as first user will wake it. */ ++ wake_up_process(per_cpu(posix_timer_task,cpu)); ++ break; ++#ifdef CONFIG_HOTPLUG_CPU ++ case CPU_UP_CANCELED: ++ /* Unbind it from offline cpu so it can run. Fall thru. */ ++ kthread_bind(per_cpu(posix_timer_task,cpu), ++ any_online_cpu(cpu_online_map)); ++ kthread_stop(per_cpu(posix_timer_task,cpu)); ++ per_cpu(posix_timer_task,cpu) = NULL; ++ break; ++ case CPU_DEAD: ++ kthread_stop(per_cpu(posix_timer_task,cpu)); ++ per_cpu(posix_timer_task,cpu) = NULL; ++ break; ++#endif ++ } ++ return NOTIFY_OK; ++} ++ ++/* Register at highest priority so that task migration (migrate_all_tasks) ++ * happens before everything else. ++ */ ++static struct notifier_block __devinitdata posix_cpu_thread_notifier = { ++ .notifier_call = posix_cpu_thread_call, ++ .priority = 10 ++}; ++ ++static int __init posix_cpu_thread_init(void) ++{ ++ void *cpu = (void *)(long)smp_processor_id(); ++ /* Start one for boot CPU. */ ++ posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, cpu); ++ posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, cpu); ++ register_cpu_notifier(&posix_cpu_thread_notifier); ++ return 0; ++} ++early_initcall(posix_cpu_thread_init); ++ + /* + * Set one of the process-wide special case CPU timers or RLIMIT_CPU. + * The tsk->sighand->siglock must be held by the caller. +@@ -1617,6 +1788,11 @@ static __init int init_posix_cpu_timers( + .timer_create = thread_cpu_timer_create, + }; + struct timespec ts; ++ unsigned long cpu; ++ ++ /* init the per-cpu posix_timer_tasklets */ ++ for_each_cpu_mask(cpu, cpu_possible_map) ++ per_cpu(posix_timer_tasklist, cpu) = NULL; + + posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process); + posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread); +Index: linux-2.6/kernel/sched_stats.h +=================================================================== +--- linux-2.6.orig/kernel/sched_stats.h ++++ linux-2.6/kernel/sched_stats.h +@@ -282,10 +282,10 @@ static inline void account_group_user_ti + if (!cputimer->running) + return; + +- spin_lock(&cputimer->lock); ++ raw_spin_lock(&cputimer->lock); + cputimer->cputime.utime = + cputime_add(cputimer->cputime.utime, cputime); +- spin_unlock(&cputimer->lock); ++ raw_spin_unlock(&cputimer->lock); + } + + /** +@@ -306,10 +306,10 @@ static inline void account_group_system_ + if (!cputimer->running) + return; + +- spin_lock(&cputimer->lock); ++ raw_spin_lock(&cputimer->lock); + cputimer->cputime.stime = + cputime_add(cputimer->cputime.stime, cputime); +- spin_unlock(&cputimer->lock); ++ raw_spin_unlock(&cputimer->lock); + } + + /** +@@ -330,7 +330,7 @@ static inline void account_group_exec_ru + if (!cputimer->running) + return; + +- spin_lock(&cputimer->lock); ++ raw_spin_lock(&cputimer->lock); + cputimer->cputime.sum_exec_runtime += ns; +- spin_unlock(&cputimer->lock); ++ raw_spin_unlock(&cputimer->lock); + } +Index: linux-2.6/include/linux/semaphore.h +=================================================================== +--- linux-2.6.orig/include/linux/semaphore.h ++++ linux-2.6/include/linux/semaphore.h +@@ -14,14 +14,14 @@ + + /* Please don't access any members of this structure directly */ + struct semaphore { +- spinlock_t lock; ++ raw_spinlock_t lock; + unsigned int count; + struct list_head wait_list; + }; + + #define __SEMAPHORE_INITIALIZER(name, n) \ + { \ +- .lock = __SPIN_LOCK_UNLOCKED((name).lock), \ ++ .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \ + .count = n, \ + .wait_list = LIST_HEAD_INIT((name).wait_list), \ + } +Index: linux-2.6/kernel/semaphore.c +=================================================================== +--- linux-2.6.orig/kernel/semaphore.c ++++ linux-2.6/kernel/semaphore.c +@@ -54,12 +54,12 @@ void down(struct semaphore *sem) + { + unsigned long flags; + +- spin_lock_irqsave(&sem->lock, flags); ++ raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + __down(sem); +- spin_unlock_irqrestore(&sem->lock, flags); ++ raw_spin_unlock_irqrestore(&sem->lock, flags); + } + EXPORT_SYMBOL(down); + +@@ -77,12 +77,12 @@ int down_interruptible(struct semaphore + unsigned long flags; + int result = 0; + +- spin_lock_irqsave(&sem->lock, flags); ++ raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + result = __down_interruptible(sem); +- spin_unlock_irqrestore(&sem->lock, flags); ++ raw_spin_unlock_irqrestore(&sem->lock, flags); + + return result; + } +@@ -103,12 +103,12 @@ int down_killable(struct semaphore *sem) + unsigned long flags; + int result = 0; + +- spin_lock_irqsave(&sem->lock, flags); ++ raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + result = __down_killable(sem); +- spin_unlock_irqrestore(&sem->lock, flags); ++ raw_spin_unlock_irqrestore(&sem->lock, flags); + + return result; + } +@@ -132,11 +132,11 @@ int down_trylock(struct semaphore *sem) + unsigned long flags; + int count; + +- spin_lock_irqsave(&sem->lock, flags); ++ raw_spin_lock_irqsave(&sem->lock, flags); + count = sem->count - 1; + if (likely(count >= 0)) + sem->count = count; +- spin_unlock_irqrestore(&sem->lock, flags); ++ raw_spin_unlock_irqrestore(&sem->lock, flags); + + return (count < 0); + } +@@ -157,12 +157,12 @@ int down_timeout(struct semaphore *sem, + unsigned long flags; + int result = 0; + +- spin_lock_irqsave(&sem->lock, flags); ++ raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(sem->count > 0)) + sem->count--; + else + result = __down_timeout(sem, jiffies); +- spin_unlock_irqrestore(&sem->lock, flags); ++ raw_spin_unlock_irqrestore(&sem->lock, flags); + + return result; + } +@@ -179,12 +179,12 @@ void up(struct semaphore *sem) + { + unsigned long flags; + +- spin_lock_irqsave(&sem->lock, flags); ++ raw_spin_lock_irqsave(&sem->lock, flags); + if (likely(list_empty(&sem->wait_list))) + sem->count++; + else + __up(sem); +- spin_unlock_irqrestore(&sem->lock, flags); ++ raw_spin_unlock_irqrestore(&sem->lock, flags); + } + EXPORT_SYMBOL(up); + +@@ -217,9 +217,9 @@ static inline int __sched __down_common( + if (timeout <= 0) + goto timed_out; + __set_task_state(task, state); +- spin_unlock_irq(&sem->lock); ++ raw_spin_unlock_irq(&sem->lock); + timeout = schedule_timeout(timeout); +- spin_lock_irq(&sem->lock); ++ raw_spin_lock_irq(&sem->lock); + if (waiter.up) + return 0; + } +Index: linux-2.6/include/linux/rwsem-spinlock.h +=================================================================== +--- linux-2.6.orig/include/linux/rwsem-spinlock.h ++++ linux-2.6/include/linux/rwsem-spinlock.h +@@ -20,26 +20,42 @@ + * - if activity is -1 then there is one active writer + * - if wait_list is not empty, then there are processes waiting for the semaphore + */ ++struct rw_anon_semaphore { ++ __s32 activity; ++ raw_spinlock_t wait_lock; ++ struct list_head wait_list; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; ++ ++#ifndef CONFIG_PREEMPT_RT_FULL ++/* ++ * Non preempt-rt implementation of rw_semaphore. Same as above, but ++ * restricted vs. ownership. i.e. ownerless locked state and non owner ++ * release not allowed. ++ */ + struct rw_semaphore { + __s32 activity; +- spinlock_t wait_lock; ++ raw_spinlock_t wait_lock; + struct list_head wait_list; + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + #endif + }; ++#endif /* PREEMPT_RT_FULL */ + + #define RWSEM_UNLOCKED_VALUE 0x00000000 + +-extern void __down_read(struct rw_semaphore *sem); +-extern int __down_read_trylock(struct rw_semaphore *sem); +-extern void __down_write(struct rw_semaphore *sem); +-extern void __down_write_nested(struct rw_semaphore *sem, int subclass); +-extern int __down_write_trylock(struct rw_semaphore *sem); +-extern void __up_read(struct rw_semaphore *sem); +-extern void __up_write(struct rw_semaphore *sem); +-extern void __downgrade_write(struct rw_semaphore *sem); +-extern int rwsem_is_locked(struct rw_semaphore *sem); ++extern void __down_read(struct rw_anon_semaphore *sem); ++extern int __down_read_trylock(struct rw_anon_semaphore *sem); ++extern void __down_write(struct rw_anon_semaphore *sem); ++extern void __down_write_nested(struct rw_anon_semaphore *sem, int subclass); ++extern int __down_write_trylock(struct rw_anon_semaphore *sem); ++extern void __up_read(struct rw_anon_semaphore *sem); ++extern void __up_write(struct rw_anon_semaphore *sem); ++extern void __downgrade_write(struct rw_anon_semaphore *sem); ++extern int anon_rwsem_is_locked(struct rw_anon_semaphore *sem); + + #endif /* __KERNEL__ */ + #endif /* _LINUX_RWSEM_SPINLOCK_H */ +Index: linux-2.6/include/linux/rwsem.h +=================================================================== +--- linux-2.6.orig/include/linux/rwsem.h ++++ linux-2.6/include/linux/rwsem.h +@@ -17,37 +17,50 @@ + #include + #include + ++struct rw_anon_semaphore; + struct rw_semaphore; + + #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK + #include /* use a generic implementation */ +-#else ++#else /* RWSEM_GENERIC_SPINLOCK */ ++ + /* All arch specific implementations share the same struct */ +-struct rw_semaphore { ++struct rw_anon_semaphore { + long count; +- spinlock_t wait_lock; ++ raw_spinlock_t wait_lock; + struct list_head wait_list; + #ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; + #endif + }; + +-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); +-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); +-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); +-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); ++extern struct rw_anon_semaphore *rwsem_down_read_failed(struct rw_anon_semaphore *sem); ++extern struct rw_anon_semaphore *rwsem_down_write_failed(struct rw_anon_semaphore *sem); ++extern struct rw_anon_semaphore *rwsem_wake(struct rw_anon_semaphore *); ++extern struct rw_anon_semaphore *rwsem_downgrade_wake(struct rw_anon_semaphore *sem); + + /* Include the arch specific part */ + #include + + /* In all implementations count != 0 means locked */ +-static inline int rwsem_is_locked(struct rw_semaphore *sem) ++static inline int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) + { + return sem->count != 0; + } + ++#ifndef CONFIG_PREEMPT_RT_FULL ++struct rw_semaphore { ++ long count; ++ raw_spinlock_t wait_lock; ++ struct list_head wait_list; ++#ifdef CONFIG_DEBUG_LOCK_ALLOC ++ struct lockdep_map dep_map; ++#endif ++}; + #endif + ++#endif /* !RWSEM_GENERIC_SPINLOCK */ ++ + /* Common initializer macros and functions */ + + #ifdef CONFIG_DEBUG_LOCK_ALLOC +@@ -56,57 +69,59 @@ static inline int rwsem_is_locked(struct + # define __RWSEM_DEP_MAP_INIT(lockname) + #endif + +-#define __RWSEM_INITIALIZER(name) \ +- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \ +- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) } ++#define __RWSEM_ANON_INITIALIZER(name) \ ++ { RWSEM_UNLOCKED_VALUE, \ ++ __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ ++ LIST_HEAD_INIT((name).wait_list) \ ++ __RWSEM_DEP_MAP_INIT(name) } + +-#define DECLARE_RWSEM(name) \ +- struct rw_semaphore name = __RWSEM_INITIALIZER(name) ++#define DECLARE_ANON_RWSEM(name) \ ++ struct rw_anon_semaphore name = __RWSEM_INITIALIZER(name) + +-extern void __init_rwsem(struct rw_semaphore *sem, const char *name, +- struct lock_class_key *key); ++extern void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, ++ struct lock_class_key *key); + +-#define init_rwsem(sem) \ ++#define init_anon_rwsem(sem) \ + do { \ + static struct lock_class_key __key; \ + \ +- __init_rwsem((sem), #sem, &__key); \ ++ __init_anon_rwsem((sem), #sem, &__key); \ + } while (0) + + /* + * lock for reading + */ +-extern void down_read(struct rw_semaphore *sem); ++extern void anon_down_read(struct rw_anon_semaphore *sem); + + /* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +-extern int down_read_trylock(struct rw_semaphore *sem); ++extern int anon_down_read_trylock(struct rw_anon_semaphore *sem); + + /* + * lock for writing + */ +-extern void down_write(struct rw_semaphore *sem); ++extern void anon_down_write(struct rw_anon_semaphore *sem); + + /* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +-extern int down_write_trylock(struct rw_semaphore *sem); ++extern int anon_down_write_trylock(struct rw_anon_semaphore *sem); + + /* + * release a read lock + */ +-extern void up_read(struct rw_semaphore *sem); ++extern void anon_up_read(struct rw_anon_semaphore *sem); + + /* + * release a write lock + */ +-extern void up_write(struct rw_semaphore *sem); ++extern void anon_up_write(struct rw_anon_semaphore *sem); + + /* + * downgrade write lock to read lock + */ +-extern void downgrade_write(struct rw_semaphore *sem); ++extern void anon_downgrade_write(struct rw_anon_semaphore *sem); + + #ifdef CONFIG_DEBUG_LOCK_ALLOC + /* +@@ -122,21 +137,101 @@ extern void downgrade_write(struct rw_se + * lockdep_set_class() at lock initialization time. + * See Documentation/lockdep-design.txt for more details.) + */ +-extern void down_read_nested(struct rw_semaphore *sem, int subclass); +-extern void down_write_nested(struct rw_semaphore *sem, int subclass); ++extern void anon_down_read_nested(struct rw_anon_semaphore *sem, int subclass); ++extern void anon_down_write_nested(struct rw_anon_semaphore *sem, int subclass); + /* + * Take/release a lock when not the owner will release it. + * + * [ This API should be avoided as much as possible - the + * proper abstraction for this case is completions. ] + */ +-extern void down_read_non_owner(struct rw_semaphore *sem); +-extern void up_read_non_owner(struct rw_semaphore *sem); ++extern void anon_down_read_non_owner(struct rw_anon_semaphore *sem); ++extern void anon_up_read_non_owner(struct rw_anon_semaphore *sem); + #else +-# define down_read_nested(sem, subclass) down_read(sem) +-# define down_write_nested(sem, subclass) down_write(sem) +-# define down_read_non_owner(sem) down_read(sem) +-# define up_read_non_owner(sem) up_read(sem) ++# define anon_down_read_nested(sem, subclass) anon_down_read(sem) ++# define anon_down_write_nested(sem, subclass) anon_down_write(sem) ++# define anon_down_read_non_owner(sem) anon_down_read(sem) ++# define anon_up_read_non_owner(sem) anon_up_read(sem) + #endif + ++#ifdef CONFIG_PREEMPT_RT_FULL ++#include ++#else /* PREEMPT_RT_FULL */ ++/* ++ * Non preempt-rt implementations ++ */ ++#define __RWSEM_INITIALIZER(name) \ ++ { RWSEM_UNLOCKED_VALUE, \ ++ __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ ++ LIST_HEAD_INIT((name).wait_list) \ ++ __RWSEM_DEP_MAP_INIT(name) } ++ ++#define DECLARE_RWSEM(name) \ ++ struct rw_semaphore name = __RWSEM_INITIALIZER(name) ++ ++static inline void __init_rwsem(struct rw_semaphore *sem, const char *name, ++ struct lock_class_key *key) ++{ ++ __init_anon_rwsem((struct rw_anon_semaphore *)sem, name, key); ++} ++ ++#define init_rwsem(sem) \ ++do { \ ++ static struct lock_class_key __key; \ ++ \ ++ __init_rwsem((sem), #sem, &__key); \ ++} while (0) ++ ++static inline void down_read(struct rw_semaphore *sem) ++{ ++ anon_down_read((struct rw_anon_semaphore *)sem); ++} ++ ++static inline int down_read_trylock(struct rw_semaphore *sem) ++{ ++ return anon_down_read_trylock((struct rw_anon_semaphore *)sem); ++} ++ ++static inline void down_write(struct rw_semaphore *sem) ++{ ++ anon_down_write((struct rw_anon_semaphore *)sem); ++} ++ ++static inline int down_write_trylock(struct rw_semaphore *sem) ++{ ++ return anon_down_write_trylock((struct rw_anon_semaphore *)sem); ++} ++ ++static inline void up_read(struct rw_semaphore *sem) ++{ ++ anon_up_read((struct rw_anon_semaphore *)sem); ++} ++ ++static inline void up_write(struct rw_semaphore *sem) ++{ ++ anon_up_write((struct rw_anon_semaphore *)sem); ++} ++ ++static inline void downgrade_write(struct rw_semaphore *sem) ++{ ++ anon_downgrade_write((struct rw_anon_semaphore *)sem); ++} ++ ++static inline void down_read_nested(struct rw_semaphore *sem, int subclass) ++{ ++ return anon_down_read_nested((struct rw_anon_semaphore *)sem, subclass); ++} ++ ++static inline void down_write_nested(struct rw_semaphore *sem, int subclass) ++{ ++ anon_down_write_nested((struct rw_anon_semaphore *)sem, subclass); ++} ++ ++static inline int rwsem_is_locked(struct rw_semaphore *sem) ++{ ++ return anon_rwsem_is_locked((struct rw_anon_semaphore *)sem); ++} ++#endif /* !PREEMPT_RT_FULL */ ++ + #endif /* _LINUX_RWSEM_H */ ++ +Index: linux-2.6/lib/rwsem-spinlock.c +=================================================================== +--- linux-2.6.orig/lib/rwsem-spinlock.c ++++ linux-2.6/lib/rwsem-spinlock.c +@@ -17,24 +17,24 @@ struct rwsem_waiter { + #define RWSEM_WAITING_FOR_WRITE 0x00000002 + }; + +-int rwsem_is_locked(struct rw_semaphore *sem) ++int anon_rwsem_is_locked(struct rw_anon_semaphore *sem) + { + int ret = 1; + unsigned long flags; + +- if (spin_trylock_irqsave(&sem->wait_lock, flags)) { ++ if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { + ret = (sem->activity != 0); +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + } + return ret; + } +-EXPORT_SYMBOL(rwsem_is_locked); ++EXPORT_SYMBOL(anon_rwsem_is_locked); + + /* + * initialise the semaphore + */ +-void __init_rwsem(struct rw_semaphore *sem, const char *name, +- struct lock_class_key *key) ++void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, ++ struct lock_class_key *key) + { + #ifdef CONFIG_DEBUG_LOCK_ALLOC + /* +@@ -44,10 +44,10 @@ void __init_rwsem(struct rw_semaphore *s + lockdep_init_map(&sem->dep_map, name, key, 0); + #endif + sem->activity = 0; +- spin_lock_init(&sem->wait_lock); ++ raw_spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); + } +-EXPORT_SYMBOL(__init_rwsem); ++EXPORT_SYMBOL(__init_anon_rwsem); + + /* + * handle the lock release when processes blocked on it that can now run +@@ -58,8 +58,8 @@ EXPORT_SYMBOL(__init_rwsem); + * - woken process blocks are discarded from the list after having task zeroed + * - writers are only woken if wakewrite is non-zero + */ +-static inline struct rw_semaphore * +-__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) ++static inline struct rw_anon_semaphore * ++__rwsem_do_wake(struct rw_anon_semaphore *sem, int wakewrite) + { + struct rwsem_waiter *waiter; + struct task_struct *tsk; +@@ -117,8 +117,8 @@ __rwsem_do_wake(struct rw_semaphore *sem + /* + * wake a single writer + */ +-static inline struct rw_semaphore * +-__rwsem_wake_one_writer(struct rw_semaphore *sem) ++static inline struct rw_anon_semaphore * ++__rwsem_wake_one_writer(struct rw_anon_semaphore *sem) + { + struct rwsem_waiter *waiter; + struct task_struct *tsk; +@@ -139,18 +139,18 @@ __rwsem_wake_one_writer(struct rw_semaph + /* + * get a read lock on the semaphore + */ +-void __sched __down_read(struct rw_semaphore *sem) ++void __sched __down_read(struct rw_anon_semaphore *sem) + { + struct rwsem_waiter waiter; + struct task_struct *tsk; + unsigned long flags; + +- spin_lock_irqsave(&sem->wait_lock, flags); ++ raw_spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + /* granted */ + sem->activity++; +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + goto out; + } + +@@ -165,7 +165,7 @@ void __sched __down_read(struct rw_semap + list_add_tail(&waiter.list, &sem->wait_list); + + /* we don't need to touch the semaphore struct anymore */ +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + /* wait to be given the lock */ + for (;;) { +@@ -183,13 +183,13 @@ void __sched __down_read(struct rw_semap + /* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +-int __down_read_trylock(struct rw_semaphore *sem) ++int __down_read_trylock(struct rw_anon_semaphore *sem) + { + unsigned long flags; + int ret = 0; + + +- spin_lock_irqsave(&sem->wait_lock, flags); ++ raw_spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + /* granted */ +@@ -197,7 +197,7 @@ int __down_read_trylock(struct rw_semaph + ret = 1; + } + +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return ret; + } +@@ -206,18 +206,18 @@ int __down_read_trylock(struct rw_semaph + * get a write lock on the semaphore + * - we increment the waiting count anyway to indicate an exclusive lock + */ +-void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) ++void __sched __down_write_nested(struct rw_anon_semaphore *sem, int subclass) + { + struct rwsem_waiter waiter; + struct task_struct *tsk; + unsigned long flags; + +- spin_lock_irqsave(&sem->wait_lock, flags); ++ raw_spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity == 0 && list_empty(&sem->wait_list)) { + /* granted */ + sem->activity = -1; +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + goto out; + } + +@@ -232,7 +232,7 @@ void __sched __down_write_nested(struct + list_add_tail(&waiter.list, &sem->wait_list); + + /* we don't need to touch the semaphore struct anymore */ +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + /* wait to be given the lock */ + for (;;) { +@@ -247,7 +247,7 @@ void __sched __down_write_nested(struct + ; + } + +-void __sched __down_write(struct rw_semaphore *sem) ++void __sched __down_write(struct rw_anon_semaphore *sem) + { + __down_write_nested(sem, 0); + } +@@ -255,12 +255,12 @@ void __sched __down_write(struct rw_sema + /* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +-int __down_write_trylock(struct rw_semaphore *sem) ++int __down_write_trylock(struct rw_anon_semaphore *sem) + { + unsigned long flags; + int ret = 0; + +- spin_lock_irqsave(&sem->wait_lock, flags); ++ raw_spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity == 0 && list_empty(&sem->wait_list)) { + /* granted */ +@@ -268,7 +268,7 @@ int __down_write_trylock(struct rw_semap + ret = 1; + } + +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return ret; + } +@@ -276,48 +276,48 @@ int __down_write_trylock(struct rw_semap + /* + * release a read lock on the semaphore + */ +-void __up_read(struct rw_semaphore *sem) ++void __up_read(struct rw_anon_semaphore *sem) + { + unsigned long flags; + +- spin_lock_irqsave(&sem->wait_lock, flags); ++ raw_spin_lock_irqsave(&sem->wait_lock, flags); + + if (--sem->activity == 0 && !list_empty(&sem->wait_list)) + sem = __rwsem_wake_one_writer(sem); + +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + } + + /* + * release a write lock on the semaphore + */ +-void __up_write(struct rw_semaphore *sem) ++void __up_write(struct rw_anon_semaphore *sem) + { + unsigned long flags; + +- spin_lock_irqsave(&sem->wait_lock, flags); ++ raw_spin_lock_irqsave(&sem->wait_lock, flags); + + sem->activity = 0; + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, 1); + +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + } + + /* + * downgrade a write lock into a read lock + * - just wake up any readers at the front of the queue + */ +-void __downgrade_write(struct rw_semaphore *sem) ++void __downgrade_write(struct rw_anon_semaphore *sem) + { + unsigned long flags; + +- spin_lock_irqsave(&sem->wait_lock, flags); ++ raw_spin_lock_irqsave(&sem->wait_lock, flags); + + sem->activity = 1; + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, 0); + +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + } + +Index: linux-2.6/lib/rwsem.c +=================================================================== +--- linux-2.6.orig/lib/rwsem.c ++++ linux-2.6/lib/rwsem.c +@@ -11,8 +11,8 @@ + /* + * Initialize an rwsem: + */ +-void __init_rwsem(struct rw_semaphore *sem, const char *name, +- struct lock_class_key *key) ++void __init_anon_rwsem(struct rw_anon_semaphore *sem, const char *name, ++ struct lock_class_key *key) + { + #ifdef CONFIG_DEBUG_LOCK_ALLOC + /* +@@ -22,11 +22,11 @@ void __init_rwsem(struct rw_semaphore *s + lockdep_init_map(&sem->dep_map, name, key, 0); + #endif + sem->count = RWSEM_UNLOCKED_VALUE; +- spin_lock_init(&sem->wait_lock); ++ raw_spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); + } + +-EXPORT_SYMBOL(__init_rwsem); ++EXPORT_SYMBOL(__init_anon_rwsem); + + struct rwsem_waiter { + struct list_head list; +@@ -54,8 +54,8 @@ struct rwsem_waiter { + * - woken process blocks are discarded from the list after having task zeroed + * - writers are only woken if downgrading is false + */ +-static struct rw_semaphore * +-__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) ++static struct rw_anon_semaphore * ++__rwsem_do_wake(struct rw_anon_semaphore *sem, int wake_type) + { + struct rwsem_waiter *waiter; + struct task_struct *tsk; +@@ -169,8 +169,8 @@ __rwsem_do_wake(struct rw_semaphore *sem + /* + * wait for a lock to be granted + */ +-static struct rw_semaphore __sched * +-rwsem_down_failed_common(struct rw_semaphore *sem, ++static struct rw_anon_semaphore __sched * ++rwsem_down_failed_common(struct rw_anon_semaphore *sem, + unsigned int flags, signed long adjustment) + { + struct rwsem_waiter waiter; +@@ -180,7 +180,7 @@ rwsem_down_failed_common(struct rw_semap + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + + /* set up my own style of waitqueue */ +- spin_lock_irq(&sem->wait_lock); ++ raw_spin_lock_irq(&sem->wait_lock); + waiter.task = tsk; + waiter.flags = flags; + get_task_struct(tsk); +@@ -204,7 +204,7 @@ rwsem_down_failed_common(struct rw_semap + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + +- spin_unlock_irq(&sem->wait_lock); ++ raw_spin_unlock_irq(&sem->wait_lock); + + /* wait to be given the lock */ + for (;;) { +@@ -222,7 +222,8 @@ rwsem_down_failed_common(struct rw_semap + /* + * wait for the read lock to be granted + */ +-struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) ++struct rw_anon_semaphore __sched * ++rwsem_down_read_failed(struct rw_anon_semaphore *sem) + { + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, + -RWSEM_ACTIVE_READ_BIAS); +@@ -231,7 +232,8 @@ struct rw_semaphore __sched *rwsem_down_ + /* + * wait for the write lock to be granted + */ +-struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) ++struct rw_anon_semaphore __sched * ++rwsem_down_write_failed(struct rw_anon_semaphore *sem) + { + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, + -RWSEM_ACTIVE_WRITE_BIAS); +@@ -241,17 +243,17 @@ struct rw_semaphore __sched *rwsem_down_ + * handle waking up a waiter on the semaphore + * - up_read/up_write has decremented the active part of count if we come here + */ +-struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) ++struct rw_anon_semaphore *rwsem_wake(struct rw_anon_semaphore *sem) + { + unsigned long flags; + +- spin_lock_irqsave(&sem->wait_lock, flags); ++ raw_spin_lock_irqsave(&sem->wait_lock, flags); + + /* do nothing if list empty */ + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return sem; + } +@@ -261,17 +263,17 @@ struct rw_semaphore *rwsem_wake(struct r + * - caller incremented waiting part of count and discovered it still negative + * - just wake up any readers at the front of the queue + */ +-struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) ++struct rw_anon_semaphore *rwsem_downgrade_wake(struct rw_anon_semaphore *sem) + { + unsigned long flags; + +- spin_lock_irqsave(&sem->wait_lock, flags); ++ raw_spin_lock_irqsave(&sem->wait_lock, flags); + + /* do nothing if list empty */ + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + +- spin_unlock_irqrestore(&sem->wait_lock, flags); ++ raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return sem; + } +Index: linux-2.6/kernel/time/timer_stats.c +=================================================================== +--- linux-2.6.orig/kernel/time/timer_stats.c ++++ linux-2.6/kernel/time/timer_stats.c +@@ -81,7 +81,7 @@ struct entry { + /* + * Spinlock protecting the tables - not taken during lookup: + */ +-static DEFINE_SPINLOCK(table_lock); ++static DEFINE_RAW_SPINLOCK(table_lock); + + /* + * Per-CPU lookup locks for fast hash lookup: +@@ -188,7 +188,7 @@ static struct entry *tstat_lookup(struct + prev = NULL; + curr = *head; + +- spin_lock(&table_lock); ++ raw_spin_lock(&table_lock); + /* + * Make sure we have not raced with another CPU: + */ +@@ -215,7 +215,7 @@ static struct entry *tstat_lookup(struct + *head = curr; + } + out_unlock: +- spin_unlock(&table_lock); ++ raw_spin_unlock(&table_lock); + + return curr; + } +Index: linux-2.6/kernel/latencytop.c +=================================================================== +--- linux-2.6.orig/kernel/latencytop.c ++++ linux-2.6/kernel/latencytop.c +@@ -58,7 +58,7 @@ + #include + #include + +-static DEFINE_SPINLOCK(latency_lock); ++static DEFINE_RAW_SPINLOCK(latency_lock); + + #define MAXLR 128 + static struct latency_record latency_record[MAXLR]; +@@ -72,19 +72,19 @@ void clear_all_latency_tracing(struct ta + if (!latencytop_enabled) + return; + +- spin_lock_irqsave(&latency_lock, flags); ++ raw_spin_lock_irqsave(&latency_lock, flags); + memset(&p->latency_record, 0, sizeof(p->latency_record)); + p->latency_record_count = 0; +- spin_unlock_irqrestore(&latency_lock, flags); ++ raw_spin_unlock_irqrestore(&latency_lock, flags); + } + + static void clear_global_latency_tracing(void) + { + unsigned long flags; + +- spin_lock_irqsave(&latency_lock, flags); ++ raw_spin_lock_irqsave(&latency_lock, flags); + memset(&latency_record, 0, sizeof(latency_record)); +- spin_unlock_irqrestore(&latency_lock, flags); ++ raw_spin_unlock_irqrestore(&latency_lock, flags); + } + + static void __sched +@@ -190,7 +190,7 @@ __account_scheduler_latency(struct task_ + lat.max = usecs; + store_stacktrace(tsk, &lat); + +- spin_lock_irqsave(&latency_lock, flags); ++ raw_spin_lock_irqsave(&latency_lock, flags); + + account_global_scheduler_latency(tsk, &lat); + +@@ -231,7 +231,7 @@ __account_scheduler_latency(struct task_ + memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); + + out_unlock: +- spin_unlock_irqrestore(&latency_lock, flags); ++ raw_spin_unlock_irqrestore(&latency_lock, flags); + } + + static int lstats_show(struct seq_file *m, void *v) +Index: linux-2.6/drivers/video/console/vgacon.c +=================================================================== +--- linux-2.6.orig/drivers/video/console/vgacon.c ++++ linux-2.6/drivers/video/console/vgacon.c +@@ -50,7 +50,7 @@ + #include